summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorEric Cox <eric.cox@mongodb.com>2022-06-24 13:52:42 +0000
committerEric Cox <eric.cox@mongodb.com>2022-06-24 13:52:42 +0000
commite41eb06388b603a2575e826d87051eebd38d52f5 (patch)
tree2fd04f7aa3047bacb6b5f81ea802ae51ecd7b844 /src/mongo/db
parente27fb371450c1aecbf3045c13c9a5257560ee615 (diff)
parentd37641e0439f48745a656272a09eb121636ae7a2 (diff)
downloadmongo-e41eb06388b603a2575e826d87051eebd38d52f5.tar.gz
Merge branch 'master' into eric/id-hack-ix-scan-refactor
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/SConscript91
-rw-r--r--src/mongo/db/auth/README.md99
-rw-r--r--src/mongo/db/auth/SConscript4
-rw-r--r--src/mongo/db/auth/auth_name.h9
-rw-r--r--src/mongo/db/auth/authorization_manager.h2
-rw-r--r--src/mongo/db/auth/authorization_manager_impl.cpp2
-rw-r--r--src/mongo/db/auth/authorization_manager_impl.h2
-rw-r--r--src/mongo/db/auth/authorization_session_impl.cpp7
-rw-r--r--src/mongo/db/auth/authz_manager_external_state.h3
-rw-r--r--src/mongo/db/auth/authz_manager_external_state_d.cpp2
-rw-r--r--src/mongo/db/auth/authz_manager_external_state_local.cpp4
-rw-r--r--src/mongo/db/auth/authz_manager_external_state_local.h2
-rw-r--r--src/mongo/db/auth/authz_manager_external_state_s.h2
-rw-r--r--src/mongo/db/auth/builtin_roles.cpp15
-rw-r--r--src/mongo/db/auth/builtin_roles.h3
-rw-r--r--src/mongo/db/auth/builtin_roles_test.cpp4
-rw-r--r--src/mongo/db/auth/security_token_authentication_guard.cpp (renamed from src/mongo/db/auth/security_token.h)65
-rw-r--r--src/mongo/db/auth/security_token_authentication_guard.h (renamed from src/mongo/db/initialize_snmp.h)35
-rw-r--r--src/mongo/db/auth/validated_tenancy_scope.cpp (renamed from src/mongo/db/auth/security_token.cpp)168
-rw-r--r--src/mongo/db/auth/validated_tenancy_scope.h116
-rw-r--r--src/mongo/db/auth/validated_tenancy_scope_test.cpp177
-rw-r--r--src/mongo/db/catalog/README.md26
-rw-r--r--src/mongo/db/catalog/SConscript2
-rw-r--r--src/mongo/db/catalog/capped_utils.cpp2
-rw-r--r--src/mongo/db/catalog/coll_mod.cpp4
-rw-r--r--src/mongo/db/catalog/collection_catalog.cpp22
-rw-r--r--src/mongo/db/catalog/collection_catalog.h33
-rw-r--r--src/mongo/db/catalog/collection_impl.cpp44
-rw-r--r--src/mongo/db/catalog/collection_impl.h2
-rw-r--r--src/mongo/db/catalog/collection_writer_test.cpp8
-rw-r--r--src/mongo/db/catalog/commit_quorum.idl4
-rw-r--r--src/mongo/db/catalog/commit_quorum_options.h18
-rw-r--r--src/mongo/db/catalog/database_holder.h7
-rw-r--r--src/mongo/db/catalog/database_impl.cpp85
-rw-r--r--src/mongo/db/catalog/database_test.cpp4
-rw-r--r--src/mongo/db/catalog/document_validation.h34
-rw-r--r--src/mongo/db/catalog/index_builds_manager.cpp2
-rw-r--r--src/mongo/db/catalog/index_catalog_impl.cpp11
-rw-r--r--src/mongo/db/catalog/index_key_validate.cpp19
-rw-r--r--src/mongo/db/catalog/index_key_validate_test.cpp37
-rw-r--r--src/mongo/db/catalog/multi_index_block.cpp12
-rw-r--r--src/mongo/db/catalog/multi_index_block_test.cpp4
-rw-r--r--src/mongo/db/catalog/throttle_cursor_test.cpp27
-rw-r--r--src/mongo/db/catalog_raii.cpp75
-rw-r--r--src/mongo/db/catalog_raii.h38
-rw-r--r--src/mongo/db/catalog_raii_test.cpp118
-rw-r--r--src/mongo/db/change_stream_change_collection_manager.cpp194
-rw-r--r--src/mongo/db/change_stream_change_collection_manager.h28
-rw-r--r--src/mongo/db/change_streams_cluster_parameter.cpp62
-rw-r--r--src/mongo/db/change_streams_cluster_parameter.h (renamed from src/mongo/db/initialize_snmp.cpp)26
-rw-r--r--src/mongo/db/change_streams_cluster_parameter.idl64
-rw-r--r--src/mongo/db/change_streams_cluster_parameter_test.cpp78
-rw-r--r--src/mongo/db/cloner.cpp45
-rw-r--r--src/mongo/db/cloner.h2
-rw-r--r--src/mongo/db/commands.cpp39
-rw-r--r--src/mongo/db/commands.h29
-rw-r--r--src/mongo/db/commands/SConscript2
-rw-r--r--src/mongo/db/commands/async_command_execution_test.cpp2
-rw-r--r--src/mongo/db/commands/cqf/cqf_aggregate.cpp33
-rw-r--r--src/mongo/db/commands/cqf/cqf_command_utils.cpp696
-rw-r--r--src/mongo/db/commands/cqf/cqf_command_utils.h53
-rw-r--r--src/mongo/db/commands/create_command.cpp4
-rw-r--r--src/mongo/db/commands/create_indexes.cpp13
-rw-r--r--src/mongo/db/commands/distinct.cpp4
-rw-r--r--src/mongo/db/commands/drop_indexes.cpp6
-rw-r--r--src/mongo/db/commands/find_and_modify.cpp21
-rw-r--r--src/mongo/db/commands/find_cmd.cpp15
-rw-r--r--src/mongo/db/commands/fle_compact_test.cpp9
-rw-r--r--src/mongo/db/commands/get_cluster_parameter_invocation.cpp4
-rw-r--r--src/mongo/db/commands/getmore_cmd.cpp4
-rw-r--r--src/mongo/db/commands/index_filter_commands.cpp2
-rw-r--r--src/mongo/db/commands/killcursors_common.h4
-rw-r--r--src/mongo/db/commands/pipeline_command.cpp4
-rw-r--r--src/mongo/db/commands/plan_cache_commands_test.cpp10
-rw-r--r--src/mongo/db/commands/run_aggregate.cpp30
-rw-r--r--src/mongo/db/commands/set_cluster_parameter_command.cpp6
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp68
-rw-r--r--src/mongo/db/commands/set_index_commit_quorum.idl5
-rw-r--r--src/mongo/db/commands/set_index_commit_quorum_command.cpp5
-rw-r--r--src/mongo/db/commands/txn_cmds.cpp17
-rw-r--r--src/mongo/db/commands/user_management_commands.cpp9
-rw-r--r--src/mongo/db/commands/write_commands.cpp42
-rw-r--r--src/mongo/db/concurrency/lock_state.cpp3
-rw-r--r--src/mongo/db/create_indexes.idl5
-rw-r--r--src/mongo/db/curop.cpp1
-rw-r--r--src/mongo/db/database_name.h52
-rw-r--r--src/mongo/db/database_name_test.cpp (renamed from src/mongo/db/tenant_database_name_test.cpp)6
-rw-r--r--src/mongo/db/db_raii.cpp8
-rw-r--r--src/mongo/db/db_raii.h9
-rw-r--r--src/mongo/db/db_raii_multi_collection_test.cpp43
-rw-r--r--src/mongo/db/dbdirectclient.cpp5
-rw-r--r--src/mongo/db/dbdirectclient.h3
-rw-r--r--src/mongo/db/dbdirectclient_test.cpp6
-rw-r--r--src/mongo/db/dbhelpers.cpp3
-rw-r--r--src/mongo/db/dbhelpers.h1
-rw-r--r--src/mongo/db/dbmessage.h2
-rw-r--r--src/mongo/db/dollar_tenant_decoration_test.cpp167
-rw-r--r--src/mongo/db/exec/add_fields_projection_executor.cpp32
-rw-r--r--src/mongo/db/exec/batched_delete_stage.cpp10
-rw-r--r--src/mongo/db/exec/bucket_unpacker.cpp56
-rw-r--r--src/mongo/db/exec/bucket_unpacker.h5
-rw-r--r--src/mongo/db/exec/bucket_unpacker_test.cpp51
-rw-r--r--src/mongo/db/exec/collection_scan.cpp49
-rw-r--r--src/mongo/db/exec/collection_scan.h2
-rw-r--r--src/mongo/db/exec/collection_scan_common.h2
-rw-r--r--src/mongo/db/exec/delete_stage.cpp61
-rw-r--r--src/mongo/db/exec/multi_plan.cpp9
-rw-r--r--src/mongo/db/exec/plan_cache_util.cpp12
-rw-r--r--src/mongo/db/exec/plan_cache_util.h20
-rw-r--r--src/mongo/db/exec/sbe/SConscript1
-rw-r--r--src/mongo/db/exec/sbe/abt/abt_lower.cpp15
-rw-r--r--src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp8
-rw-r--r--src/mongo/db/exec/sbe/expressions/expression.cpp17
-rw-r--r--src/mongo/db/exec/sbe/sbe_test.cpp80
-rw-r--r--src/mongo/db/exec/sbe/stages/branch.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/branch.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/bson_scan.cpp14
-rw-r--r--src/mongo/db/exec/sbe/stages/bson_scan.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/check_bounds.cpp14
-rw-r--r--src/mongo/db/exec/sbe/stages/check_bounds.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/co_scan.cpp9
-rw-r--r--src/mongo/db/exec/sbe/stages/co_scan.h4
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/column_scan.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/exchange.cpp19
-rw-r--r--src/mongo/db/exec/sbe/stages/exchange.h10
-rw-r--r--src/mongo/db/exec/sbe/stages/filter.h13
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_agg.cpp39
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_agg.h8
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_join.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_join.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_lookup.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/hash_lookup.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/ix_scan.cpp3
-rw-r--r--src/mongo/db/exec/sbe/stages/ix_scan.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/limit_skip.cpp7
-rw-r--r--src/mongo/db/exec/sbe/stages/limit_skip.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/loop_join.cpp14
-rw-r--r--src/mongo/db/exec/sbe/stages/loop_join.h6
-rw-r--r--src/mongo/db/exec/sbe/stages/makeobj.cpp33
-rw-r--r--src/mongo/db/exec/sbe/stages/makeobj.h16
-rw-r--r--src/mongo/db/exec/sbe/stages/merge_join.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/merge_join.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/project.cpp12
-rw-r--r--src/mongo/db/exec/sbe/stages/project.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/scan.cpp23
-rw-r--r--src/mongo/db/exec/sbe/stages/scan.h9
-rw-r--r--src/mongo/db/exec/sbe/stages/sort.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/sort.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/sorted_merge.cpp14
-rw-r--r--src/mongo/db/exec/sbe/stages/sorted_merge.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/spool.cpp27
-rw-r--r--src/mongo/db/exec/sbe/stages/spool.h16
-rw-r--r--src/mongo/db/exec/sbe/stages/stages.h29
-rw-r--r--src/mongo/db/exec/sbe/stages/traverse.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/traverse.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/union.cpp12
-rw-r--r--src/mongo/db/exec/sbe/stages/union.h3
-rw-r--r--src/mongo/db/exec/sbe/stages/unique.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/unique.h5
-rw-r--r--src/mongo/db/exec/sbe/stages/unwind.cpp8
-rw-r--r--src/mongo/db/exec/sbe/stages/unwind.h3
-rw-r--r--src/mongo/db/exec/sbe/util/spilling.cpp3
-rw-r--r--src/mongo/db/exec/sbe/values/columnar.cpp42
-rw-r--r--src/mongo/db/exec/sbe/values/columnar_test.cpp7
-rw-r--r--src/mongo/db/exec/sbe/values/value.cpp3
-rw-r--r--src/mongo/db/exec/sbe/values/value.h13
-rw-r--r--src/mongo/db/exec/sbe/values/value_printer.cpp6
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.cpp74
-rw-r--r--src/mongo/db/exec/sbe/vm/vm.h24
-rw-r--r--src/mongo/db/exec/update_stage.cpp65
-rw-r--r--src/mongo/db/exec/write_stage_common.cpp22
-rw-r--r--src/mongo/db/exec/write_stage_common.h2
-rw-r--r--src/mongo/db/exhaust_cursor_currentop_integration_test.cpp79
-rw-r--r--src/mongo/db/fle_crud.cpp168
-rw-r--r--src/mongo/db/fle_crud.h28
-rw-r--r--src/mongo/db/fle_crud_mongod.cpp8
-rw-r--r--src/mongo/db/fle_crud_test.cpp50
-rw-r--r--src/mongo/db/fle_query_interface_mock.cpp10
-rw-r--r--src/mongo/db/fle_query_interface_mock.h17
-rw-r--r--src/mongo/db/geo/geoparser.cpp121
-rw-r--r--src/mongo/db/geo/hash.cpp35
-rw-r--r--src/mongo/db/geo/hash_test.cpp68
-rw-r--r--src/mongo/db/index/expression_keys_private.cpp5
-rw-r--r--src/mongo/db/index/index_descriptor.h1
-rw-r--r--src/mongo/db/index_build_entry_helpers.cpp7
-rw-r--r--src/mongo/db/index_builds_coordinator.cpp42
-rw-r--r--src/mongo/db/index_builds_coordinator.h12
-rw-r--r--src/mongo/db/index_builds_coordinator_mongod.cpp2
-rw-r--r--src/mongo/db/initialize_server_global_state.cpp18
-rw-r--r--src/mongo/db/initialize_server_global_state.h23
-rw-r--r--src/mongo/db/initialize_server_global_state.idl2
-rw-r--r--src/mongo/db/internal_transactions_feature_flag.idl5
-rw-r--r--src/mongo/db/mongod_main.cpp20
-rw-r--r--src/mongo/db/multitenancy.cpp52
-rw-r--r--src/mongo/db/multitenancy.h8
-rw-r--r--src/mongo/db/namespace_string.cpp15
-rw-r--r--src/mongo/db/namespace_string.h90
-rw-r--r--src/mongo/db/namespace_string_test.cpp27
-rw-r--r--src/mongo/db/op_observer_impl.cpp8
-rw-r--r--src/mongo/db/op_observer_impl_test.cpp48
-rw-r--r--src/mongo/db/ops/SConscript1
-rw-r--r--src/mongo/db/ops/write_ops.cpp39
-rw-r--r--src/mongo/db/ops/write_ops_exec.cpp77
-rw-r--r--src/mongo/db/ops/write_ops_exec.h3
-rw-r--r--src/mongo/db/pipeline/SConscript3
-rw-r--r--src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp31
-rw-r--r--src/mongo/db/pipeline/abt/agg_expression_visitor.cpp38
-rw-r--r--src/mongo/db/pipeline/abt/match_expression_visitor.cpp12
-rw-r--r--src/mongo/db/pipeline/abt/pipeline_test.cpp110
-rw-r--r--src/mongo/db/pipeline/aggregation_context_fixture.h9
-rw-r--r--src/mongo/db/pipeline/change_stream_document_diff_parser.cpp262
-rw-r--r--src/mongo/db/pipeline/change_stream_document_diff_parser.h7
-rw-r--r--src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp219
-rw-r--r--src/mongo/db/pipeline/change_stream_event_transform.cpp17
-rw-r--r--src/mongo/db/pipeline/change_stream_event_transform_test.cpp140
-rw-r--r--src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp38
-rw-r--r--src/mongo/db/pipeline/dependencies.cpp51
-rw-r--r--src/mongo/db/pipeline/dependencies.h14
-rw-r--r--src/mongo/db/pipeline/dependencies_test.cpp79
-rw-r--r--src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp6
-rw-r--r--src/mongo/db/pipeline/document_source_change_stream_test.cpp4
-rw-r--r--src/mongo/db/pipeline/document_source_check_resume_token_test.cpp4
-rw-r--r--src/mongo/db/pipeline/document_source_cursor.cpp20
-rw-r--r--src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp1
-rw-r--r--src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp5
-rw-r--r--src/mongo/db/pipeline/document_source_lookup_test.cpp39
-rw-r--r--src/mongo/db/pipeline/document_source_union_with_test.cpp136
-rw-r--r--src/mongo/db/pipeline/expression.cpp361
-rw-r--r--src/mongo/db/pipeline/expression.h33
-rw-r--r--src/mongo/db/pipeline/expression_parser.idl (renamed from src/mongo/db/ops/new_write_error_exception_format_feature_flag.idl)34
-rw-r--r--src/mongo/db/pipeline/expression_test.cpp450
-rw-r--r--src/mongo/db/pipeline/expression_visitor.h3
-rw-r--r--src/mongo/db/pipeline/pipeline.h5
-rw-r--r--src/mongo/db/pipeline/pipeline_d.cpp133
-rw-r--r--src/mongo/db/pipeline/pipeline_d.h5
-rw-r--r--src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp25
-rw-r--r--src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp4
-rw-r--r--src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp6
-rw-r--r--src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp4
-rw-r--r--src/mongo/db/pipeline/sharded_union_test.cpp50
-rw-r--r--src/mongo/db/pipeline/visitors/document_source_visitor.h3
-rw-r--r--src/mongo/db/pipeline/visitors/document_source_walker.cpp2
-rw-r--r--src/mongo/db/process_health/config_server_health_observer.cpp4
-rw-r--r--src/mongo/db/process_health/dns_health_observer.cpp21
-rw-r--r--src/mongo/db/process_health/dns_health_observer.h2
-rw-r--r--src/mongo/db/process_health/health_observer_base.cpp47
-rw-r--r--src/mongo/db/process_health/health_observer_base.h2
-rw-r--r--src/mongo/db/process_health/health_observer_mock.h25
-rw-r--r--src/mongo/db/process_health/health_observer_test.cpp44
-rw-r--r--src/mongo/db/process_health/test_health_observer.cpp2
-rw-r--r--src/mongo/db/process_health/test_health_observer.h2
-rw-r--r--src/mongo/db/query/canonical_query.cpp7
-rw-r--r--src/mongo/db/query/canonical_query_encoder.cpp34
-rw-r--r--src/mongo/db/query/canonical_query_encoder.h6
-rw-r--r--src/mongo/db/query/canonical_query_encoder_test.cpp174
-rw-r--r--src/mongo/db/query/classic_stage_builder.cpp2
-rw-r--r--src/mongo/db/query/datetime/date_time_support.cpp3
-rw-r--r--src/mongo/db/query/explain.cpp44
-rw-r--r--src/mongo/db/query/explain.h22
-rw-r--r--src/mongo/db/query/fle/server_rewrite.cpp355
-rw-r--r--src/mongo/db/query/fle/server_rewrite.h69
-rw-r--r--src/mongo/db/query/fle/server_rewrite_test.cpp307
-rw-r--r--src/mongo/db/query/get_executor.cpp40
-rw-r--r--src/mongo/db/query/get_executor.h5
-rw-r--r--src/mongo/db/query/interval_evaluation_tree.h16
-rw-r--r--src/mongo/db/query/optimizer/algebra/algebra_test.cpp17
-rw-r--r--src/mongo/db/query/optimizer/algebra/operator.h115
-rw-r--r--src/mongo/db/query/optimizer/algebra/polyvalue.h154
-rw-r--r--src/mongo/db/query/optimizer/bool_expression.h12
-rw-r--r--src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp39
-rw-r--r--src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp54
-rw-r--r--src/mongo/db/query/optimizer/rewrites/const_eval.cpp2
-rw-r--r--src/mongo/db/query/optimizer/syntax/syntax.h4
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.cpp192
-rw-r--r--src/mongo/db/query/optimizer/utils/utils.h15
-rw-r--r--src/mongo/db/query/plan_cache_key_factory.cpp72
-rw-r--r--src/mongo/db/query/plan_cache_key_factory.h11
-rw-r--r--src/mongo/db/query/plan_executor.cpp4
-rw-r--r--src/mongo/db/query/plan_executor.h9
-rw-r--r--src/mongo/db/query/plan_executor_impl.cpp20
-rw-r--r--src/mongo/db/query/plan_executor_sbe.h5
-rw-r--r--src/mongo/db/query/plan_yield_policy.cpp28
-rw-r--r--src/mongo/db/query/plan_yield_policy.h2
-rw-r--r--src/mongo/db/query/planner_access.cpp39
-rw-r--r--src/mongo/db/query/planner_access.h1
-rw-r--r--src/mongo/db/query/planner_analysis.cpp57
-rw-r--r--src/mongo/db/query/planner_analysis.h1
-rw-r--r--src/mongo/db/query/projection.cpp1
-rw-r--r--src/mongo/db/query/projection.h5
-rw-r--r--src/mongo/db/query/query_feature_flags.idl22
-rw-r--r--src/mongo/db/query/query_knobs.idl18
-rw-r--r--src/mongo/db/query/query_planner.cpp116
-rw-r--r--src/mongo/db/query/query_planner_common.cpp41
-rw-r--r--src/mongo/db/query/query_planner_common.h11
-rw-r--r--src/mongo/db/query/query_planner_params.h21
-rw-r--r--src/mongo/db/query/query_request_helper.cpp1
-rw-r--r--src/mongo/db/query/query_request_helper.h6
-rw-r--r--src/mongo/db/query/query_solution.cpp2
-rw-r--r--src/mongo/db/query/query_solution.h2
-rw-r--r--src/mongo/db/query/sbe_cached_solution_planner.cpp17
-rw-r--r--src/mongo/db/query/sbe_multi_planner.cpp30
-rw-r--r--src/mongo/db/query/sbe_plan_cache.cpp13
-rw-r--r--src/mongo/db/query/sbe_plan_cache.h134
-rw-r--r--src/mongo/db/query/sbe_stage_builder.cpp168
-rw-r--r--src/mongo/db/query/sbe_stage_builder_coll_scan.cpp4
-rw-r--r--src/mongo/db/query/sbe_stage_builder_expression.cpp5
-rw-r--r--src/mongo/db/query/sbe_stage_builder_filter.cpp43
-rw-r--r--src/mongo/db/query/sbe_stage_builder_helpers.cpp89
-rw-r--r--src/mongo/db/query/sbe_stage_builder_helpers.h66
-rw-r--r--src/mongo/db/query/sbe_stage_builder_projection.cpp2
-rw-r--r--src/mongo/db/query/sbe_sub_planner.cpp5
-rw-r--r--src/mongo/db/query/sbe_utils.cpp9
-rw-r--r--src/mongo/db/record_id.h5
-rw-r--r--src/mongo/db/record_id_helpers.cpp48
-rw-r--r--src/mongo/db/record_id_helpers.h2
-rw-r--r--src/mongo/db/repl/SConscript5
-rw-r--r--src/mongo/db/repl/apply_ops.cpp12
-rw-r--r--src/mongo/db/repl/collection_bulk_loader_impl.cpp3
-rw-r--r--src/mongo/db/repl/collection_cloner.cpp45
-rw-r--r--src/mongo/db/repl/collection_cloner.h6
-rw-r--r--src/mongo/db/repl/data_replicator_external_state.h2
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_impl.cpp2
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_impl.h2
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_mock.cpp6
-rw-r--r--src/mongo/db/repl/data_replicator_external_state_mock.h2
-rw-r--r--src/mongo/db/repl/idempotency_test.cpp2
-rw-r--r--src/mongo/db/repl/oplog.cpp4
-rw-r--r--src/mongo/db/repl/oplog_applier_impl.cpp15
-rw-r--r--src/mongo/db/repl/oplog_applier_impl_test.cpp41
-rw-r--r--src/mongo/db/repl/oplog_entry.idl3
-rw-r--r--src/mongo/db/repl/oplog_entry_test.cpp4
-rw-r--r--src/mongo/db/repl/oplog_fetcher.cpp80
-rw-r--r--src/mongo/db/repl/oplog_fetcher.h9
-rw-r--r--src/mongo/db/repl/oplog_fetcher_test.cpp43
-rw-r--r--src/mongo/db/repl/primary_only_service.cpp3
-rw-r--r--src/mongo/db/repl/repl_set_commands.cpp7
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp2
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp81
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.h29
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp74
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp99
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.cpp16
-rw-r--r--src/mongo/db/repl/replication_coordinator_mock.h4
-rw-r--r--src/mongo/db/repl/roll_back_local_operations_test.cpp3
-rw-r--r--src/mongo/db/repl/rollback_source_impl.cpp3
-rw-r--r--src/mongo/db/repl/rs_rollback.cpp36
-rw-r--r--src/mongo/db/repl/storage_interface_impl.cpp24
-rw-r--r--src/mongo/db/repl/storage_interface_impl_test.cpp5
-rw-r--r--src/mongo/db/repl/storage_timestamp_test.cpp6
-rw-r--r--src/mongo/db/repl/tenant_collection_cloner.cpp56
-rw-r--r--src/mongo/db/repl/tenant_collection_cloner.h6
-rw-r--r--src/mongo/db/repl/tenant_file_cloner.cpp13
-rw-r--r--src/mongo/db/repl/tenant_file_cloner.h2
-rw-r--r--src/mongo/db/repl/tenant_file_importer_service.cpp62
-rw-r--r--src/mongo/db/repl/tenant_file_importer_service.h92
-rw-r--r--src/mongo/db/repl/tenant_migration_access_blocker_util.cpp4
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp9
-rw-r--r--src/mongo/db/repl/tenant_migration_recipient_service.cpp21
-rw-r--r--src/mongo/db/repl/tenant_oplog_applier_test.cpp117
-rw-r--r--src/mongo/db/repl/topology_coordinator.cpp6
-rw-r--r--src/mongo/db/repl/topology_coordinator.h2
-rw-r--r--src/mongo/db/repl_index_build_state.h2
-rw-r--r--src/mongo/db/s/README.md12
-rw-r--r--src/mongo/db/s/SConscript12
-rw-r--r--src/mongo/db/s/balancer/balance_stats_test.cpp4
-rw-r--r--src/mongo/db/s/balancer/balancer.cpp74
-rw-r--r--src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp10
-rw-r--r--src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp2
-rw-r--r--src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp10
-rw-r--r--src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp27
-rw-r--r--src/mongo/db/s/balancer/balancer_policy_test.cpp5
-rw-r--r--src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp3
-rw-r--r--src/mongo/db/s/balancer/type_migration.cpp3
-rw-r--r--src/mongo/db/s/balancer/type_migration_test.cpp17
-rw-r--r--src/mongo/db/s/check_sharding_index_command.cpp8
-rw-r--r--src/mongo/db/s/chunk_splitter.cpp1
-rw-r--r--src/mongo/db/s/collection_metadata_filtering_test.cpp4
-rw-r--r--src/mongo/db/s/collection_metadata_test.cpp6
-rw-r--r--src/mongo/db/s/collection_sharding_runtime_test.cpp12
-rw-r--r--src/mongo/db/s/collmod_coordinator.cpp79
-rw-r--r--src/mongo/db/s/collmod_coordinator.h35
-rw-r--r--src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp264
-rw-r--r--src/mongo/db/s/collmod_coordinator_pre60_compatible.h101
-rw-r--r--src/mongo/db/s/commit_chunk_migration.idl85
-rw-r--r--src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp93
-rw-r--r--src/mongo/db/s/compact_structured_encryption_data_coordinator.h33
-rw-r--r--src/mongo/db/s/config/config_server_test_fixture.cpp25
-rw-r--r--src/mongo/db/s/config/config_server_test_fixture.h11
-rw-r--r--src/mongo/db/s/config/configsvr_collmod_command.cpp4
-rw-r--r--src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp109
-rw-r--r--src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp5
-rw-r--r--src/mongo/db/s/config/configsvr_merge_chunks_command.cpp4
-rw-r--r--src/mongo/db/s/config/configsvr_move_chunk_command.cpp12
-rw-r--r--src/mongo/db/s/config/configsvr_remove_chunks_command.cpp4
-rw-r--r--src/mongo/db/s/config/configsvr_remove_tags_command.cpp4
-rw-r--r--src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp4
-rw-r--r--src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp12
-rw-r--r--src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp6
-rw-r--r--src/mongo/db/s/config/initial_split_policy.cpp47
-rw-r--r--src/mongo/db/s/config/initial_split_policy_test.cpp6
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp2
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp19
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp67
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp10
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp76
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp120
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp31
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp16
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp3
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp34
-rw-r--r--src/mongo/db/s/create_collection_coordinator.cpp148
-rw-r--r--src/mongo/db/s/create_collection_coordinator.h83
-rw-r--r--src/mongo/db/s/create_collection_coordinator_test.cpp133
-rw-r--r--src/mongo/db/s/database_sharding_state.cpp5
-rw-r--r--src/mongo/db/s/drop_collection_coordinator.cpp71
-rw-r--r--src/mongo/db/s/drop_collection_coordinator.h38
-rw-r--r--src/mongo/db/s/drop_database_coordinator.cpp70
-rw-r--r--src/mongo/db/s/drop_database_coordinator.h41
-rw-r--r--src/mongo/db/s/flush_resharding_state_change_command.cpp2
-rw-r--r--src/mongo/db/s/flush_routing_table_cache_updates_command.cpp3
-rw-r--r--src/mongo/db/s/metadata_manager_test.cpp4
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp59
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.h18
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp7
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp10
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp17
-rw-r--r--src/mongo/db/s/migration_destination_manager_legacy_commands.cpp8
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp27
-rw-r--r--src/mongo/db/s/migration_util.cpp22
-rw-r--r--src/mongo/db/s/migration_util_test.cpp10
-rw-r--r--src/mongo/db/s/move_primary_coordinator.cpp28
-rw-r--r--src/mongo/db/s/move_primary_coordinator.h17
-rw-r--r--src/mongo/db/s/move_primary_source_manager.cpp8
-rw-r--r--src/mongo/db/s/op_observer_sharding_test.cpp8
-rw-r--r--src/mongo/db/s/operation_sharding_state_test.cpp10
-rw-r--r--src/mongo/db/s/range_deletion_util_test.cpp4
-rw-r--r--src/mongo/db/s/refine_collection_shard_key_coordinator.cpp48
-rw-r--r--src/mongo/db/s/refine_collection_shard_key_coordinator.h34
-rw-r--r--src/mongo/db/s/rename_collection_coordinator.cpp136
-rw-r--r--src/mongo/db/s/rename_collection_coordinator.h38
-rw-r--r--src/mongo/db/s/rename_collection_participant_service.cpp9
-rw-r--r--src/mongo/db/s/reshard_collection_coordinator.cpp48
-rw-r--r--src/mongo/db/s/reshard_collection_coordinator.h34
-rw-r--r--src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp3
-rw-r--r--src/mongo/db/s/resharding/resharding_agg_test.cpp34
-rw-r--r--src/mongo/db/s/resharding/resharding_collection_cloner.cpp6
-rw-r--r--src/mongo/db/s/resharding/resharding_collection_cloner.h6
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp8
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h6
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp6
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_observer.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp84
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.cpp175
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service.h14
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp17
-rw-r--r--src/mongo/db/s/resharding/resharding_coordinator_test.cpp35
-rw-r--r--src/mongo/db/s/resharding/resharding_data_replication.cpp20
-rw-r--r--src/mongo/db/s/resharding/resharding_data_replication.h6
-rw-r--r--src/mongo/db/s/resharding/resharding_data_replication_test.cpp11
-rw-r--r--src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp10
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp5
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_recipient_common.h4
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp112
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.cpp46
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service.h8
-rw-r--r--src/mongo/db/s/resharding/resharding_donor_service_test.cpp20
-rw-r--r--src/mongo/db/s/resharding/resharding_manual_cleanup.cpp5
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics.cpp (renamed from src/mongo/db/s/resharding/resharding_metrics_new.cpp)102
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics.h (renamed from src/mongo/db/s/resharding/resharding_metrics_new.h)46
-rw-r--r--src/mongo/db/s/resharding/resharding_metrics_test.cpp (renamed from src/mongo/db/s/resharding/resharding_metrics_new_test.cpp)38
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_application.cpp9
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_application.h1
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier.cpp2
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier.h2
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp18
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h6
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp16
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp25
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp36
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp41
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp20
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_fetcher.h12
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp20
-rw-r--r--src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp10
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service.cpp78
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service.h10
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp8
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service_external_state.h8
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp10
-rw-r--r--src/mongo/db/s/resharding/resharding_recipient_service_test.cpp31
-rw-r--r--src/mongo/db/s/resharding/resharding_txn_cloner.cpp1
-rw-r--r--src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp20
-rw-r--r--src/mongo/db/s/resharding/resharding_util.cpp4
-rw-r--r--src/mongo/db/s/resharding/resharding_util.h2
-rw-r--r--src/mongo/db/s/resharding/resharding_util_test.cpp4
-rw-r--r--src/mongo/db/s/resharding_test_commands.cpp6
-rw-r--r--src/mongo/db/s/sessions_collection_config_server.cpp6
-rw-r--r--src/mongo/db/s/set_allow_migrations_coordinator.cpp28
-rw-r--r--src/mongo/db/s/set_allow_migrations_coordinator.h16
-rw-r--r--src/mongo/db/s/set_shard_version_command.cpp340
-rw-r--r--src/mongo/db/s/shard_key_index_util.cpp73
-rw-r--r--src/mongo/db/s/shard_key_index_util.h9
-rw-r--r--src/mongo/db/s/shard_key_util.cpp49
-rw-r--r--src/mongo/db/s/shard_key_util.h12
-rw-r--r--src/mongo/db/s/shard_metadata_util.cpp17
-rw-r--r--src/mongo/db/s/shard_metadata_util.h47
-rw-r--r--src/mongo/db/s/shard_metadata_util_test.cpp12
-rw-r--r--src/mongo/db/s/shard_server_catalog_cache_loader.cpp64
-rw-r--r--src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp12
-rw-r--r--src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp12
-rw-r--r--src/mongo/db/s/sharding_data_transform_cumulative_metrics.h4
-rw-r--r--src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp8
-rw-r--r--src/mongo/db/s/sharding_data_transform_instance_metrics.cpp5
-rw-r--r--src/mongo/db/s/sharding_data_transform_instance_metrics.h4
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.h271
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.idl4
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator_service.cpp9
-rw-r--r--src/mongo/db/s/sharding_ddl_util.cpp21
-rw-r--r--src/mongo/db/s/sharding_ddl_util_test.cpp12
-rw-r--r--src/mongo/db/s/sharding_mongod_test_fixture.cpp1
-rw-r--r--src/mongo/db/s/sharding_server_status.cpp10
-rw-r--r--src/mongo/db/s/sharding_util.cpp46
-rw-r--r--src/mongo/db/s/sharding_util.h9
-rw-r--r--src/mongo/db/s/sharding_write_router_bm.cpp2
-rw-r--r--src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp2
-rw-r--r--src/mongo/db/s/shardsvr_collmod_command.cpp96
-rw-r--r--src/mongo/db/s/shardsvr_collmod_participant_command.cpp4
-rw-r--r--src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp2
-rw-r--r--src/mongo/db/s/shardsvr_create_collection_command.cpp20
-rw-r--r--src/mongo/db/s/shardsvr_create_collection_participant_command.cpp35
-rw-r--r--src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp36
-rw-r--r--src/mongo/db/s/shardsvr_merge_chunks_command.cpp3
-rw-r--r--src/mongo/db/s/shardsvr_participant_block_command.cpp4
-rw-r--r--src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp71
-rw-r--r--src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp7
-rw-r--r--src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp4
-rw-r--r--src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp4
-rw-r--r--src/mongo/db/s/split_chunk.cpp3
-rw-r--r--src/mongo/db/s/transaction_coordinator_service.cpp4
-rw-r--r--src/mongo/db/s/txn_two_phase_commit_cmds.cpp20
-rw-r--r--src/mongo/db/s/type_shard_collection.cpp20
-rw-r--r--src/mongo/db/s/type_shard_collection.h5
-rw-r--r--src/mongo/db/s/type_shard_collection.idl15
-rw-r--r--src/mongo/db/s/type_shard_collection_test.cpp21
-rw-r--r--src/mongo/db/server_options.h9
-rw-r--r--src/mongo/db/serverless/shard_split_donor_op_observer.cpp187
-rw-r--r--src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp50
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service.cpp510
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service.h24
-rw-r--r--src/mongo/db/serverless/shard_split_donor_service_test.cpp61
-rw-r--r--src/mongo/db/serverless/shard_split_state_machine.idl1
-rw-r--r--src/mongo/db/serverless/shard_split_utils.cpp6
-rw-r--r--src/mongo/db/serverless/shard_split_utils.h2
-rw-r--r--src/mongo/db/service_context_d_test_fixture.cpp2
-rw-r--r--src/mongo/db/service_context_d_test_fixture.h8
-rw-r--r--src/mongo/db/service_entry_point_common.cpp35
-rw-r--r--src/mongo/db/session_catalog_mongod.cpp61
-rw-r--r--src/mongo/db/sessions_collection.cpp2
-rw-r--r--src/mongo/db/sessions_collection_rs.cpp1
-rw-r--r--src/mongo/db/sessions_collection_standalone.cpp1
-rw-r--r--src/mongo/db/stats/counters.h2
-rw-r--r--src/mongo/db/storage/kv/durable_catalog_test.cpp33
-rw-r--r--src/mongo/db/storage/kv/storage_engine_test.cpp2
-rw-r--r--src/mongo/db/storage/record_store_test_oplog.cpp2
-rw-r--r--src/mongo/db/storage/storage_engine_test_fixture.h8
-rw-r--r--src/mongo/db/storage/storage_options.h3
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp3
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp11
-rw-r--r--src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp3
-rw-r--r--src/mongo/db/tenant_id.h7
-rw-r--r--src/mongo/db/timeseries/SConscript3
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.cpp473
-rw-r--r--src/mongo/db/timeseries/bucket_catalog.h73
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_helpers.cpp8
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_helpers.h6
-rw-r--r--src/mongo/db/timeseries/bucket_catalog_test.cpp476
-rw-r--r--src/mongo/db/timeseries/bucket_compression.cpp19
-rw-r--r--src/mongo/db/timeseries/bucket_compression.h1
-rw-r--r--src/mongo/db/timeseries/timeseries.idl12
-rw-r--r--src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp83
-rw-r--r--src/mongo/db/timeseries/timeseries_constants.h1
-rw-r--r--src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp3
-rw-r--r--src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp16
-rw-r--r--src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h7
-rw-r--r--src/mongo/db/transaction_api.cpp47
-rw-r--r--src/mongo/db/transaction_api.h20
-rw-r--r--src/mongo/db/transaction_api_test.cpp174
-rw-r--r--src/mongo/db/transaction_participant.cpp255
-rw-r--r--src/mongo/db/transaction_participant.h42
-rw-r--r--src/mongo/db/transaction_participant_test.cpp388
-rw-r--r--src/mongo/db/transaction_validation.cpp58
-rw-r--r--src/mongo/db/transaction_validation.h6
-rw-r--r--src/mongo/db/views/durable_view_catalog.cpp12
596 files changed, 12742 insertions, 7979 deletions
diff --git a/src/mongo/db/SConscript b/src/mongo/db/SConscript
index 379103e36fb..f06e89c7c3d 100644
--- a/src/mongo/db/SConscript
+++ b/src/mongo/db/SConscript
@@ -108,16 +108,6 @@ env.Library(
)
env.Library(
- target='initialize_snmp',
- source=[
- 'initialize_snmp.cpp',
- ],
- LIBDEPS=[
- '$BUILD_DIR/mongo/base',
- ],
-)
-
-env.Library(
target="dbmessage",
source=[
"dbmessage.cpp",
@@ -520,6 +510,14 @@ env.Library(
)
env.Library(
+ target='change_streams_cluster_parameter',
+ source=['change_streams_cluster_parameter.idl', 'change_streams_cluster_parameter.cpp'],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/idl/cluster_server_parameter',
+ ],
+)
+
+env.Library(
target='change_stream_change_collection_manager',
source=[
'change_stream_change_collection_manager.cpp',
@@ -1078,35 +1076,33 @@ env.Library(
source=[
"op_observer_impl.cpp",
],
- LIBDEPS=[
+ LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
'$BUILD_DIR/mongo/db/catalog/collection_catalog',
+ '$BUILD_DIR/mongo/db/catalog/collection_options',
+ '$BUILD_DIR/mongo/db/catalog/commit_quorum_options',
+ '$BUILD_DIR/mongo/db/catalog/database_holder',
+ '$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
+ '$BUILD_DIR/mongo/db/concurrency/exception_util',
+ '$BUILD_DIR/mongo/db/pipeline/change_stream_pre_image_helpers',
'$BUILD_DIR/mongo/db/pipeline/change_stream_preimage',
+ '$BUILD_DIR/mongo/db/repl/image_collection_entry',
+ '$BUILD_DIR/mongo/db/repl/oplog',
+ '$BUILD_DIR/mongo/db/repl/repl_server_parameters',
'$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
+ '$BUILD_DIR/mongo/db/s/sharding_api_d',
'$BUILD_DIR/mongo/db/timeseries/bucket_catalog',
+ '$BUILD_DIR/mongo/db/views/views_mongod',
'$BUILD_DIR/mongo/s/coreshard',
'$BUILD_DIR/mongo/s/grid',
'batched_write_context',
- 'catalog/collection_options',
- 'catalog/database_holder',
- 'op_observer',
- 'op_observer_util',
- 'read_write_concern_defaults',
- 'repl/oplog',
- 's/sharding_api_d',
- 'views/views_mongod',
- ],
- LIBDEPS_PRIVATE=[
- '$BUILD_DIR/mongo/db/catalog/commit_quorum_options',
- '$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
- '$BUILD_DIR/mongo/db/concurrency/exception_util',
- '$BUILD_DIR/mongo/db/pipeline/change_stream_pre_image_helpers',
- '$BUILD_DIR/mongo/db/server_feature_flags',
'dbhelpers',
'internal_transactions_feature_flag',
'multitenancy',
- 'repl/image_collection_entry',
- 'repl/repl_server_parameters',
+ 'op_observer',
+ 'op_observer_util',
+ 'read_write_concern_defaults',
+ 'server_feature_flags',
'transaction',
],
)
@@ -2197,6 +2193,7 @@ env.Library(
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/catalog/catalog_impl',
+ '$BUILD_DIR/mongo/db/catalog/database_holder',
'$BUILD_DIR/mongo/db/commands/mongod',
'$BUILD_DIR/mongo/db/index/index_access_method_factory',
'$BUILD_DIR/mongo/db/index/index_access_methods',
@@ -2393,6 +2390,7 @@ env.Library(
# NOTE: If you need to add a static or mongo initializer to mongod startup,
# please add that library here, as a private library dependency.
'$BUILD_DIR/mongo/executor/network_interface_factory',
+ '$BUILD_DIR/mongo/logv2/logv2_options',
'$BUILD_DIR/mongo/rpc/rpc',
'$BUILD_DIR/mongo/s/commands/sharded_cluster_sharding_commands',
'$BUILD_DIR/mongo/scripting/scripting_server',
@@ -2441,7 +2439,6 @@ env.Library(
'index/index_access_methods',
'index/index_descriptor',
'index_builds_coordinator_mongod',
- 'initialize_snmp',
'introspect',
'keys_collection_client_direct',
'kill_sessions_local',
@@ -2519,6 +2516,7 @@ env.Library(
'$BUILD_DIR/mongo/client/clientdriver_minimal',
'$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
'$BUILD_DIR/mongo/db/change_stream_options_manager',
+ '$BUILD_DIR/mongo/db/change_streams_cluster_parameter',
'$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
'$BUILD_DIR/mongo/idl/cluster_server_parameter',
'$BUILD_DIR/mongo/idl/cluster_server_parameter_op_observer',
@@ -2551,7 +2549,6 @@ env.Library(
'index/index_access_method_factory',
'index/index_access_methods',
'index_builds_coordinator_mongod',
- 'initialize_snmp',
'keys_collection_client_direct',
'kill_sessions',
'kill_sessions_local',
@@ -2692,16 +2689,17 @@ if wiredtiger:
source=[
'cancelable_operation_context_test.cpp',
'catalog_raii_test.cpp',
+ 'change_streams_cluster_parameter_test.cpp',
'client_strand_test.cpp',
'client_context_test.cpp',
'collection_index_usage_tracker_test.cpp',
'commands_test.cpp',
'curop_test.cpp',
+ 'database_name_test.cpp',
'dbdirectclient_test.cpp',
'dbmessage_test.cpp',
'db_raii_test.cpp',
'db_raii_multi_collection_test.cpp',
- 'dollar_tenant_decoration_test.cpp',
"explain_test.cpp",
'field_parser_test.cpp',
'field_ref_set_test.cpp',
@@ -2737,7 +2735,6 @@ if wiredtiger:
'session_catalog_mongod_test.cpp',
'session_catalog_test.cpp',
'startup_warnings_mongod_test.cpp',
- 'tenant_database_name_test.cpp',
'thread_client_test.cpp',
'time_proof_service_test.cpp',
'transaction_api_test.cpp',
@@ -2761,15 +2758,31 @@ if wiredtiger:
'$BUILD_DIR/mongo/crypto/encrypted_field_config',
'$BUILD_DIR/mongo/crypto/fle_crypto',
'$BUILD_DIR/mongo/db/auth/auth',
+ '$BUILD_DIR/mongo/db/auth/authmocks',
'$BUILD_DIR/mongo/db/auth/security_token',
'$BUILD_DIR/mongo/db/catalog/catalog_test_fixture',
+ '$BUILD_DIR/mongo/db/catalog/database_holder',
'$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
'$BUILD_DIR/mongo/db/catalog/index_build_entry_idl',
'$BUILD_DIR/mongo/db/catalog/local_oplog_info',
+ '$BUILD_DIR/mongo/db/change_streams_cluster_parameter',
'$BUILD_DIR/mongo/db/mongohasher',
+ '$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
'$BUILD_DIR/mongo/db/query/common_query_enums_and_helpers',
'$BUILD_DIR/mongo/db/query/query_test_service_context',
+ '$BUILD_DIR/mongo/db/repl/image_collection_entry',
+ '$BUILD_DIR/mongo/db/repl/mock_repl_coord_server_fixture',
+ '$BUILD_DIR/mongo/db/repl/oplog_interface_local',
+ '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
+ '$BUILD_DIR/mongo/db/repl/repl_server_parameters',
+ '$BUILD_DIR/mongo/db/repl/replica_set_aware_service',
+ '$BUILD_DIR/mongo/db/repl/replmocks',
+ '$BUILD_DIR/mongo/db/repl/storage_interface_impl',
+ '$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
+ '$BUILD_DIR/mongo/db/s/shard_server_test_fixture',
'$BUILD_DIR/mongo/db/s/sharding_api_d',
+ '$BUILD_DIR/mongo/db/stats/fill_locker_info',
+ '$BUILD_DIR/mongo/db/stats/transaction_stats',
'$BUILD_DIR/mongo/db/storage/wiredtiger/storage_wiredtiger',
'$BUILD_DIR/mongo/executor/async_timer_mock',
'$BUILD_DIR/mongo/idl/idl_parser',
@@ -2780,8 +2793,7 @@ if wiredtiger:
'$BUILD_DIR/mongo/util/clock_source_mock',
'$BUILD_DIR/mongo/util/net/network',
'$BUILD_DIR/mongo/util/net/ssl_options_server',
- 'auth/authmocks',
- 'catalog/database_holder',
+ 'batched_write_context',
'catalog_raii',
'collection_index_usage_tracker',
'commands',
@@ -2812,16 +2824,7 @@ if wiredtiger:
'range_arithmetic',
'read_write_concern_defaults_mock',
'record_id_helpers',
- 'repl/image_collection_entry',
- 'repl/mock_repl_coord_server_fixture',
- 'repl/oplog_interface_local',
- 'repl/repl_coordinator_interface',
- 'repl/repl_server_parameters',
- 'repl/replica_set_aware_service',
- 'repl/replmocks',
- 'repl/storage_interface_impl',
'rw_concern_d',
- 's/shard_server_test_fixture',
'server_options_core',
'server_options_servers',
'service_context',
@@ -2836,8 +2839,6 @@ if wiredtiger:
'signed_logical_time',
'snapshot_window_options',
'startup_warnings_mongod',
- 'stats/fill_locker_info',
- 'stats/transaction_stats',
'time_proof_service',
'transaction',
'transaction_api',
diff --git a/src/mongo/db/auth/README.md b/src/mongo/db/auth/README.md
index 87354dded20..2e49ab5c20a 100644
--- a/src/mongo/db/auth/README.md
+++ b/src/mongo/db/auth/README.md
@@ -11,6 +11,7 @@
- [Cluster Authentication](#cluster-authentication)
- [Localhost Auth Bypass](#localhost-auth-bypass)
- [Authorization](#authorization)
+ - [AuthName](#authname) (`UserName` and `RoleName`)
- [Users](#users)
- [User Roles](#user-roles)
- [User Credentials](#user-credentials)
@@ -21,6 +22,9 @@
- [Role Authentication Restrictions](#role-authentication-restrictions)
- [User and Role Management](#user-and-role-management)
- [UMC Transactions](#umc-transactions)
+ - [Privilege](#privilege)
+ - [ResourcePattern](#resourcepattern)
+ - [ActionType](#actiontype)
- [Command Execution](#command-execution)
- [Authorization Caching](#authorization-caching)
- [Authorization Manager External State](#authorization-manager-external-state)
@@ -294,23 +298,39 @@ execute commands.
[Here](https://github.com/mongodb/mongo/blob/r4.4.0/src/mongo/db/auth/authorization_session_impl.cpp#L126)
is the authorization session calling into the authorization manager to acquire a user.
-Clients are expected to authenticate at most one time on a connection, and a
-client which opts into API Version 1 will receive an error if it attempts to
-authenticate more than once. However, legacy clients which have not opted into
-an API Version may authenticate multiple times. If a legacy client
-authenticates as UserA on a database and then authenticates as UserB on the
-same database, its AuthorizationSession will implicitly logout UserA and
-replace its cached User object with that of UserB. Alternatively, if a legacy
-client authenticates as UserA on one database and then authenticates as UserB
-on a second database, its AuthorizationSession will store User objects for both
-UserA and UserB, and will consider itself authorized for the union of the two
-users' privileges. Because modern drivers no longer allow applications to
-authenticate with multiple user identities, this behavior in
-AuthorizationSession is deprecated, and support for it will eventually be
-removed.
+Clients are expected to authenticate at most one time on a connection.
+Attempting to reauthenticate as the currently authenticated user results
+in a warning being emitted to the global log, but the operation succeeds.
+Attempting to authenticate as a new user on an already authenticated connection is an error.
+### AuthName
-### User
+The [AuthName](auth_name.h) template
+provides the generic implementation for `UserName` and `RoleName` implementations.
+Each of these objects is made up of three component pieces of information.
+
+| Field | Accessor | Use |
+| -- | -- | -- |
+| `_name` | `getName()` | The symbolic name associated with the user or role, (e.g. 'Alice') |
+| `_db` | `getDB()` | The authentication database associated with the named auth identifier (e.g. 'admin' or 'test') |
+| `_tenant` | `getTenant()` | When used in multitenancy mode, this value retains a `TenantId` for authorization checking. |
+
+[`UserName`](user_name.h) and [`RoleName`](role_name.h) specializations are CRTP defined
+to include additional `getUser()` and `getRole()` accessors which proxy to `getName()`,
+and provide a set of `constexpr StringData` identifiers relating to their type.
+
+#### Serializations
+
+* `getDisplayName()` and `toString()` create a new string of the form `name@db` for use in log messages.
+* `getUnambiguousName()` creates a new string of the form `db.name` for use in generating `_id` fields for authzn documents and generating unique hashes for logical session identifiers.
+
+#### Multitenancy
+
+`AuthName` objects may be associated with a `TenantId` either separately via `AuthName(StringData name, StringData db, boost::optional<TenantId> tenant = boost::none)` or using the compound `DatabaseName` type `AuthName(StringData name, DatabaseName db)`.
+
+When a `TenantId` is associated with an `AuthName`, it will NOT be included in `BSON` or `String` serializations unless explicitly requested with a boolean argument to these functions.
+
+### Users
`User` objects contain authorization information with regards to a specific user in a database. The
`AuthorizationManager` has control over creation, management, and deletion of a `UserHandle` object,
@@ -498,6 +518,48 @@ Authentication restrictions defined on a role have the same meaning as
those defined directly on users. The effective set of `authenticationRestrictions`
imposed on a user is the union of all direct and indirect authentication restrictions.
+### Privilege
+
+A [Privilege](privilege.h) represents a tuple of [ResourcePattern](resource_pattern.h) and
+[set](action_set.h) of [ActionType](action_type.idl)s which describe the resources which
+may be acted upon by a user, and what actions they may perform, respectively.
+
+A [PrivilegeVector](privilege.h) is an alias for `std::vector<Privilege>` and represents
+the full set of privileges across all resource and actionype conbinations for the user or role.
+
+#### ResourcePattern
+
+A resource pattern is a combination of a [MatchType](action_type.idl) with a `NamespaceString` to possibly narrow the scope of that `MatchType`. Most MatchTypes refer to some storage resource, such as a specific collection or database, however `kMatchClusterResource` refers to an entire host, replica set, or cluster.
+
+| MatchType | As encoded in a privilege doc | Usage |
+| -- | -- | -- |
+| `kMatchNever` | _Unexpressable_ | A base type only used internally to indicate that the privilege specified by the ResourcePattern can not match any real resource |
+| `kMatchClusterResource` | `{ cluster : true }` | Commonly used with host and cluster management actions such as `ActionType::addShard`, `ActionType::setParameter`, or `ActionType::shutdown`. |
+| `kMatchAnyResource` | `{ anyResource: true }` | Matches all storage resources, even [non-normal namespaces](#normal-namespace) such as `db.system.views`. |
+| `kMatchAnyNormalResource` | `{ db: '', collection: '' }` | Matches all [normal](#normal-namespace) storage resources. Used with [builtin role](builtin_roles.cpp) `readWriteAnyDatabase`. |
+| `kMatchDatabaseName` | `{ db: 'dbname', collection: '' }` | Matches all [normal](#normal-namespace) storage resources for a specific named database. Used with [builtin role](builtin_roles.cpp) `readWrite`. |
+| `kMatchCollectionName` | `{ db: '', collection: 'collname' }` | Matches all storage resources, normal or not, which have the exact collection suffix '`collname`'. For example, to provide read-only access to `*.system.js`. |
+| `kMatchExactNamespace` | `{ db: 'dbname', collection: 'collname' }` | Matches the exact namespace '`dbname`.`collname`'. |
+| `kMatchAnySystemBucketResource` | `{ db: '', system_buckets: '' }` | Matches the namespace pattern `*.system.buckets.*`. |
+| `kMatchAnySystemBucketInDBResource` | `{ db: 'dbname', system_buckets: '' }` | Matches the namespace pattern `dbname.system.buckets.*`. |
+| `kMatchAnySystemBucketInAnyDBResource` | `{ db: '', system_buckets: 'suffix' }` | Matches the namespace pattern `*.system.buckets.suffix`. |
+| `kMatchExactSystemBucketResource` | `{ db: 'dbname', system_buckets: 'suffix' }` | Matches the exact namespace `dbname.system.buckets.suffix`. |
+
+##### Normal Namespace
+
+A "normal" resource is a `namespace` which does not match either of the following patterns:
+
+| Namespace pattern | Examples | Usage |
+| -- | -- | -- |
+| `local.replset.*` | `local.replset.initialSyncId` | Namespaces used by Replication to manage per-host state. |
+| `*.system.*` | `admin.system.version` `myDB.system.views` | Collections used by the database to support user collections. |
+
+See also: [NamespaceString::isNormalCollection()](../namespace_string.h)
+
+#### ActionType
+
+An [ActionType](action_type.idl) is a task which a client may be expected to perform. These are combined with [ResourcePattern](#resourcepattern)s to produce a [Privilege](#privilege). Note that not all `ActionType`s make sense with all `ResourcePattern`s (e.g. `ActionType::shutdown` applied to `ResourcePattern` `{ db: 'test', collection: 'my.awesome.collection' }`), however the system will generally not prohibit declaring these combinations.
+
### User and Role Management
`User Management Commands`, sometimes referred to as `UMCs` provide an
@@ -530,6 +592,13 @@ allowing a rollback.
The [UMCTransaction](https://github.com/mongodb/mongo/blob/92cc84b0171942375ccbd2312a052bc7e9f159dd/src/mongo/db/commands/user_management_commands.cpp#L756)
class provides an abstraction around this mechanism.
+#### Multitenancy
+
+When acting in multitenancy mode, each tenant uses distinct storage for their users and roles.
+For example, given a `TenantId` of `"012345678ABCDEF01234567"`, all users for that tenant will
+be found in the `012345678ABCDEF01234567_admin.system.users` collection, and all roles will be
+found in the `012345678ABCDEF01234567_admin.system.roles` collection.
+
### Command Execution
When a client attempts to execute a command, the service entry point calls
diff --git a/src/mongo/db/auth/SConscript b/src/mongo/db/auth/SConscript
index 3df9d6922f2..0f4dcbc61bb 100644
--- a/src/mongo/db/auth/SConscript
+++ b/src/mongo/db/auth/SConscript
@@ -7,8 +7,9 @@ env = env.Clone()
env.Library(
target='security_token',
source=[
- 'security_token.cpp',
+ 'security_token_authentication_guard.cpp',
'security_token.idl',
+ 'validated_tenancy_scope.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
@@ -540,6 +541,7 @@ env.CppUnitTest(
'sasl_scram_test.cpp',
'security_key_test.cpp',
'user_document_parser_test.cpp',
+ 'validated_tenancy_scope_test.cpp',
],
LIBDEPS=[
'$BUILD_DIR/mongo/base',
diff --git a/src/mongo/db/auth/auth_name.h b/src/mongo/db/auth/auth_name.h
index 62fd1d6fa8e..6c5b052e5c7 100644
--- a/src/mongo/db/auth/auth_name.h
+++ b/src/mongo/db/auth/auth_name.h
@@ -39,6 +39,7 @@
#include "mongo/base/string_data.h"
#include "mongo/bson/bsonelement.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/db/database_name.h"
#include "mongo/db/tenant_id.h"
#include "mongo/stdx/variant.h"
@@ -71,6 +72,10 @@ public:
_tenant = std::move(tenant);
}
+ template <typename Name>
+ AuthName(Name name, const DatabaseName& dbname)
+ : AuthName(name, dbname.db(), dbname.tenantId()) {}
+
/**
* Parses a string of the form "db.name" into an AuthName object with an optional tenant.
*/
@@ -105,6 +110,10 @@ public:
return _db;
}
+ DatabaseName getDatabaseName() const {
+ return DatabaseName(_tenant, _db);
+ }
+
/**
* Gets the TenantId, if any, associated with this AuthName.
*/
diff --git a/src/mongo/db/auth/authorization_manager.h b/src/mongo/db/auth/authorization_manager.h
index c8072025264..fca637f77d3 100644
--- a/src/mongo/db/auth/authorization_manager.h
+++ b/src/mongo/db/auth/authorization_manager.h
@@ -298,7 +298,7 @@ public:
* Delegates method call to the underlying AuthzManagerExternalState.
*/
virtual Status getRoleDescriptionsForDB(OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat privilegeFormat,
AuthenticationRestrictionsFormat,
bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authorization_manager_impl.cpp b/src/mongo/db/auth/authorization_manager_impl.cpp
index 5a010ea5f9b..4ecfb5af2df 100644
--- a/src/mongo/db/auth/authorization_manager_impl.cpp
+++ b/src/mongo/db/auth/authorization_manager_impl.cpp
@@ -476,7 +476,7 @@ Status AuthorizationManagerImpl::getRolesAsUserFragment(
Status AuthorizationManagerImpl::getRoleDescriptionsForDB(
OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat privileges,
AuthenticationRestrictionsFormat restrictions,
bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authorization_manager_impl.h b/src/mongo/db/auth/authorization_manager_impl.h
index 58a8e3e5c51..81f247b2b1d 100644
--- a/src/mongo/db/auth/authorization_manager_impl.h
+++ b/src/mongo/db/auth/authorization_manager_impl.h
@@ -91,7 +91,7 @@ public:
BSONObj* result) override;
Status getRoleDescriptionsForDB(OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat privilegeFormat,
AuthenticationRestrictionsFormat,
bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authorization_session_impl.cpp b/src/mongo/db/auth/authorization_session_impl.cpp
index 173a19cfd58..5ebc878a07c 100644
--- a/src/mongo/db/auth/authorization_session_impl.cpp
+++ b/src/mongo/db/auth/authorization_session_impl.cpp
@@ -43,7 +43,7 @@
#include "mongo/db/auth/action_type.h"
#include "mongo/db/auth/authz_session_external_state.h"
#include "mongo/db/auth/privilege.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/client.h"
#include "mongo/db/namespace_string.h"
@@ -245,14 +245,15 @@ Status AuthorizationSessionImpl::addAndAuthorizeUser(OperationContext* opCtx,
stdx::lock_guard<Client> lk(*opCtx->getClient());
- if (auto token = auth::getSecurityToken(opCtx)) {
+ auto validatedTenancyScope = auth::ValidatedTenancyScope::get(opCtx);
+ if (validatedTenancyScope && validatedTenancyScope->hasAuthenticatedUser()) {
uassert(
6161501,
"Attempt to authorize via security token on connection with established authentication",
_authenticationMode != AuthenticationMode::kConnection);
uassert(6161502,
"Attempt to authorize a user other than that present in the security token",
- token->getAuthenticatedUser() == userName);
+ validatedTenancyScope->authenticatedUser() == userName);
validateSecurityTokenUserPrivileges(user->getPrivileges());
_authenticationMode = AuthenticationMode::kSecurityToken;
} else {
diff --git a/src/mongo/db/auth/authz_manager_external_state.h b/src/mongo/db/auth/authz_manager_external_state.h
index b693d3e0622..96cf71257e0 100644
--- a/src/mongo/db/auth/authz_manager_external_state.h
+++ b/src/mongo/db/auth/authz_manager_external_state.h
@@ -42,6 +42,7 @@
#include "mongo/db/auth/role_name.h"
#include "mongo/db/auth/user.h"
#include "mongo/db/auth/user_name.h"
+#include "mongo/db/database_name.h"
#include "mongo/db/jsobj.h"
namespace mongo {
@@ -161,7 +162,7 @@ public:
* contain a "warnings" array, with std::string messages describing inconsistencies.
*/
virtual Status getRoleDescriptionsForDB(OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat showPrivileges,
AuthenticationRestrictionsFormat,
bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authz_manager_external_state_d.cpp b/src/mongo/db/auth/authz_manager_external_state_d.cpp
index b602cc1b963..788526abc82 100644
--- a/src/mongo/db/auth/authz_manager_external_state_d.cpp
+++ b/src/mongo/db/auth/authz_manager_external_state_d.cpp
@@ -68,7 +68,7 @@ Status AuthzManagerExternalStateMongod::query(
FindCommandRequest findRequest{collectionName};
findRequest.setFilter(filter);
findRequest.setProjection(projection);
- client.find(std::move(findRequest), ReadPreferenceSetting{}, resultProcessor);
+ client.find(std::move(findRequest), resultProcessor);
return Status::OK();
} catch (const DBException& e) {
return e.toStatus();
diff --git a/src/mongo/db/auth/authz_manager_external_state_local.cpp b/src/mongo/db/auth/authz_manager_external_state_local.cpp
index 2eb9ba9c47d..0228897aad8 100644
--- a/src/mongo/db/auth/authz_manager_external_state_local.cpp
+++ b/src/mongo/db/auth/authz_manager_external_state_local.cpp
@@ -631,7 +631,7 @@ Status AuthzManagerExternalStateLocal::getRolesDescription(
Status AuthzManagerExternalStateLocal::getRoleDescriptionsForDB(
OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat showPrivileges,
AuthenticationRestrictionsFormat showRestrictions,
bool showBuiltinRoles,
@@ -682,7 +682,7 @@ Status AuthzManagerExternalStateLocal::getRoleDescriptionsForDB(
return query(opCtx,
getRolesCollection(getActiveTenant(opCtx)),
- BSON(AuthorizationManager::ROLE_DB_FIELD_NAME << dbname),
+ BSON(AuthorizationManager::ROLE_DB_FIELD_NAME << dbname.db()),
BSONObj(),
[&](const BSONObj& roleDoc) {
try {
diff --git a/src/mongo/db/auth/authz_manager_external_state_local.h b/src/mongo/db/auth/authz_manager_external_state_local.h
index 2d6ae65b235..7c5e690b9d6 100644
--- a/src/mongo/db/auth/authz_manager_external_state_local.h
+++ b/src/mongo/db/auth/authz_manager_external_state_local.h
@@ -74,7 +74,7 @@ public:
AuthenticationRestrictionsFormat,
BSONObj* result) override;
Status getRoleDescriptionsForDB(OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat showPrivileges,
AuthenticationRestrictionsFormat,
bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/authz_manager_external_state_s.h b/src/mongo/db/auth/authz_manager_external_state_s.h
index 58547be92b2..a1ac0feee41 100644
--- a/src/mongo/db/auth/authz_manager_external_state_s.h
+++ b/src/mongo/db/auth/authz_manager_external_state_s.h
@@ -79,7 +79,7 @@ public:
return {ErrorCodes::NotImplemented, "AuthzMongos::getRolesAsUserFragment"};
}
Status getRoleDescriptionsForDB(OperationContext* opCtx,
- StringData dbname,
+ const DatabaseName& dbname,
PrivilegeFormat showPrivileges,
AuthenticationRestrictionsFormat,
bool showBuiltinRoles,
diff --git a/src/mongo/db/auth/builtin_roles.cpp b/src/mongo/db/auth/builtin_roles.cpp
index 2b0c63cb798..a263d1d318c 100644
--- a/src/mongo/db/auth/builtin_roles.cpp
+++ b/src/mongo/db/auth/builtin_roles.cpp
@@ -781,19 +781,19 @@ const std::map<StringData, BuiltinRoleDefinition> kBuiltinRoles({
// $external is a virtual database used for X509, LDAP,
// and other authentication mechanisms and not used for storage.
// Therefore, granting privileges on this database does not make sense.
-bool isValidDB(StringData dbname) {
+bool isValidDB(const DatabaseName& dbname) {
return NamespaceString::validDBName(dbname, NamespaceString::DollarInDbNameBehavior::Allow) &&
- (dbname != NamespaceString::kExternalDb);
+ (dbname.db() != NamespaceString::kExternalDb);
}
} // namespace
-stdx::unordered_set<RoleName> auth::getBuiltinRoleNamesForDB(StringData dbname) {
+stdx::unordered_set<RoleName> auth::getBuiltinRoleNamesForDB(const DatabaseName& dbname) {
if (!isValidDB(dbname)) {
return {};
}
- const bool isAdmin = dbname == ADMIN_DBNAME;
+ const bool isAdmin = dbname.db() == ADMIN_DBNAME;
stdx::unordered_set<RoleName> roleNames;
for (const auto& [role, def] : kBuiltinRoles) {
@@ -808,7 +808,7 @@ bool auth::addPrivilegesForBuiltinRole(const RoleName& roleName, PrivilegeVector
auto role = roleName.getRole();
auto dbname = roleName.getDB();
- if (!isValidDB(dbname)) {
+ if (!isValidDB(roleName.getDatabaseName())) {
return false;
}
@@ -834,8 +834,7 @@ void auth::generateUniversalPrivileges(PrivilegeVector* privileges) {
}
bool auth::isBuiltinRole(const RoleName& role) {
- auto dbname = role.getDB();
- if (!isValidDB(dbname)) {
+ if (!isValidDB(role.getDatabaseName())) {
return false;
}
@@ -844,7 +843,7 @@ bool auth::isBuiltinRole(const RoleName& role) {
return false;
}
- return !it->second.adminOnly() || (dbname == ADMIN_DBNAME);
+ return !it->second.adminOnly() || (role.getDB() == ADMIN_DBNAME);
}
} // namespace mongo
diff --git a/src/mongo/db/auth/builtin_roles.h b/src/mongo/db/auth/builtin_roles.h
index 3665e79b9ba..e20dbdaa86d 100644
--- a/src/mongo/db/auth/builtin_roles.h
+++ b/src/mongo/db/auth/builtin_roles.h
@@ -31,6 +31,7 @@
#include "mongo/db/auth/privilege.h"
#include "mongo/db/auth/role_name.h"
+#include "mongo/db/database_name.h"
#include "mongo/stdx/unordered_set.h"
namespace mongo {
@@ -47,7 +48,7 @@ bool addPrivilegesForBuiltinRole(const RoleName& role, PrivilegeVector* privileg
/**
* Ennumerate all builtin RoleNames for the given database.
*/
-stdx::unordered_set<RoleName> getBuiltinRoleNamesForDB(StringData dbname);
+stdx::unordered_set<RoleName> getBuiltinRoleNamesForDB(const DatabaseName& dbname);
/**
* Adds to "privileges" the necessary privileges to do absolutely anything on the system.
diff --git a/src/mongo/db/auth/builtin_roles_test.cpp b/src/mongo/db/auth/builtin_roles_test.cpp
index 15b5aa932fd..a7662650d21 100644
--- a/src/mongo/db/auth/builtin_roles_test.cpp
+++ b/src/mongo/db/auth/builtin_roles_test.cpp
@@ -75,7 +75,7 @@ TEST(BuiltinRoles, BuiltinRolesOnlyOnAppropriateDatabases) {
}
TEST(BuiltinRoles, getBuiltinRolesForDB) {
- auto adminRoles = auth::getBuiltinRoleNamesForDB("admin");
+ auto adminRoles = auth::getBuiltinRoleNamesForDB({boost::none, "admin"});
ASSERT(adminRoles.contains(RoleName("read", "admin")));
ASSERT(adminRoles.contains(RoleName("readAnyDatabase", "admin")));
for (const auto& role : adminRoles) {
@@ -83,7 +83,7 @@ TEST(BuiltinRoles, getBuiltinRolesForDB) {
ASSERT(auth::isBuiltinRole(role));
}
- auto testRoles = auth::getBuiltinRoleNamesForDB("test");
+ auto testRoles = auth::getBuiltinRoleNamesForDB({boost::none, "test"});
ASSERT(testRoles.contains(RoleName("read", "test")));
ASSERT(!testRoles.contains(RoleName("readAnyDatabase", "test")));
for (const auto& role : testRoles) {
diff --git a/src/mongo/db/auth/security_token.h b/src/mongo/db/auth/security_token_authentication_guard.cpp
index 2e45e63952f..5be6de3dc75 100644
--- a/src/mongo/db/auth/security_token.h
+++ b/src/mongo/db/auth/security_token_authentication_guard.cpp
@@ -1,5 +1,5 @@
/**
- * Copyright (C) 2021-present MongoDB, Inc.
+ * Copyright (C) 2022-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
@@ -27,51 +27,40 @@
* it in the license file.
*/
-#pragma once
-#include <boost/optional.hpp>
+#include "mongo/db/auth/security_token_authentication_guard.h"
-#include "mongo/bson/bsonobj.h"
-#include "mongo/db/auth/security_token_gen.h"
-#include "mongo/db/client.h"
-#include "mongo/db/operation_context.h"
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/logv2/log.h"
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kAccessControl
namespace mongo {
namespace auth {
-class SecurityTokenAuthenticationGuard {
-public:
- SecurityTokenAuthenticationGuard() = delete;
- SecurityTokenAuthenticationGuard(OperationContext* opCtx);
- ~SecurityTokenAuthenticationGuard();
-
-private:
- Client* _client;
-};
-
-/**
- * Takes an unsigned security token as input and applies
- * the temporary signature algorithm to extend it into a full SecurityToken.
- */
-BSONObj signSecurityToken(BSONObj obj);
-
-/**
- * Verify the contents of the provided security token
- * using the temporary signing algorithm,
- */
-SecurityToken verifySecurityToken(BSONObj obj);
+SecurityTokenAuthenticationGuard::SecurityTokenAuthenticationGuard(
+ OperationContext* opCtx, const ValidatedTenancyScope& token) {
+ if (token.hasAuthenticatedUser()) {
+ const auto& userName = token.authenticatedUser();
+ auto* client = opCtx->getClient();
+ uassertStatusOK(AuthorizationSession::get(client)->addAndAuthorizeUser(opCtx, userName));
+ _client = client;
-/**
- * Parse any SecurityToken from the OpMsg and place it as a decoration
- * on OperationContext
- */
-void readSecurityTokenMetadata(OperationContext* opCtx, BSONObj securityToken);
+ LOGV2_DEBUG(5838100,
+ 4,
+ "Authenticated with security token",
+ "token"_attr = token.getOriginalToken());
+ } else {
+ _client = nullptr;
+ }
+}
-/**
- * Retrieve the Security Token associated with this operation context
- */
-using MaybeSecurityToken = boost::optional<SecurityToken>;
-MaybeSecurityToken getSecurityToken(OperationContext* opCtx);
+SecurityTokenAuthenticationGuard::~SecurityTokenAuthenticationGuard() {
+ if (_client) {
+ // SecurityToken based users are "logged out" at the end of their request.
+ AuthorizationSession::get(_client)->logoutSecurityTokenUser(_client);
+ }
+}
} // namespace auth
} // namespace mongo
diff --git a/src/mongo/db/initialize_snmp.h b/src/mongo/db/auth/security_token_authentication_guard.h
index 5fb85193b43..c73e0324e5f 100644
--- a/src/mongo/db/initialize_snmp.h
+++ b/src/mongo/db/auth/security_token_authentication_guard.h
@@ -1,5 +1,5 @@
/**
- * Copyright (C) 2018-present MongoDB, Inc.
+ * Copyright (C) 2022-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
@@ -27,26 +27,29 @@
* it in the license file.
*/
-
#pragma once
-#include <functional>
+#include "mongo/db/auth/validated_tenancy_scope.h"
+#include "mongo/db/client.h"
+#include "mongo/db/operation_context.h"
namespace mongo {
-/**
- * Registers the specified initializer function `init` as the initialization handler for SNMP
- * enterprise modules.
- *
- * NOTE: This function may only be called once.
- * NOTE: This function is not multithread safe.
- */
-void registerSNMPInitializer(std::function<void()> init);
+namespace auth {
/**
- * Performs initialization for SNMP enterprise modules, if present, otherwise does nothing.
- *
- * This will call the function registered by `registerSNMPInitializer`. It is safe to call when no
- * function has been registered.
+ * If ValidatedTenancyScope represents an AuthenticatedUser,
+ * that user will be authenticated against the client until this guard dies.
+ * This is used in ServiceEntryPoint to scope authentication to a single operation.
*/
-void initializeSNMP();
+class SecurityTokenAuthenticationGuard {
+public:
+ SecurityTokenAuthenticationGuard() = delete;
+ SecurityTokenAuthenticationGuard(OperationContext*, const ValidatedTenancyScope&);
+ ~SecurityTokenAuthenticationGuard();
+
+private:
+ Client* _client;
+};
+
+} // namespace auth
} // namespace mongo
diff --git a/src/mongo/db/auth/security_token.cpp b/src/mongo/db/auth/validated_tenancy_scope.cpp
index 586abb92aee..2ab66b3abd3 100644
--- a/src/mongo/db/auth/security_token.cpp
+++ b/src/mongo/db/auth/validated_tenancy_scope.cpp
@@ -1,5 +1,5 @@
/**
- * Copyright (C) 2021-present MongoDB, Inc.
+ * Copyright (C) 2022-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
@@ -27,26 +27,23 @@
* it in the license file.
*/
-
-#include "mongo/db/auth/security_token.h"
-
-#include <boost/optional.hpp>
+#include "mongo/db/auth/validated_tenancy_scope.h"
#include "mongo/base/init.h"
#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/multitenancy.h"
#include "mongo/db/multitenancy_gen.h"
#include "mongo/db/server_feature_flags_gen.h"
-#include "mongo/db/tenant_id.h"
#include "mongo/logv2/log.h"
#include "mongo/logv2/log_detail.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kAccessControl
-
-namespace mongo {
-namespace auth {
+namespace mongo::auth {
namespace {
-const auto securityTokenDecoration = OperationContext::declareDecoration<MaybeSecurityToken>();
+const auto validatedTenancyScopeDecoration =
+ OperationContext::declareDecoration<boost::optional<ValidatedTenancyScope>>();
MONGO_INITIALIZER(SecurityTokenOptionValidate)(InitializerContext*) {
uassert(ErrorCodes::BadValue,
"multitenancySupport may not be specified if featureFlagMongoStore is not enabled",
@@ -54,15 +51,13 @@ MONGO_INITIALIZER(SecurityTokenOptionValidate)(InitializerContext*) {
if (gMultitenancySupport) {
logv2::detail::setGetTenantIDCallback([]() -> boost::optional<TenantId> {
auto* client = Client::getCurrent();
- if (!client)
+ if (!client) {
return boost::none;
+ }
if (auto* opCtx = client->getOperationContext()) {
- auto token = getSecurityToken(opCtx);
- if (token) {
- return token->getAuthenticatedUser().getTenant();
- } else {
- return boost::none;
+ if (auto token = ValidatedTenancyScope::get(opCtx)) {
+ return token->tenantId();
}
}
@@ -72,45 +67,10 @@ MONGO_INITIALIZER(SecurityTokenOptionValidate)(InitializerContext*) {
}
} // namespace
-SecurityTokenAuthenticationGuard::SecurityTokenAuthenticationGuard(OperationContext* opCtx) {
- auto token = getSecurityToken(opCtx);
- if (token == boost::none) {
- _client = nullptr;
- return;
- }
-
- auto client = opCtx->getClient();
- uassertStatusOK(AuthorizationSession::get(client)->addAndAuthorizeUser(
- opCtx, token->getAuthenticatedUser()));
- _client = client;
-}
-
-SecurityTokenAuthenticationGuard::~SecurityTokenAuthenticationGuard() {
- if (_client) {
- // SecurityToken based users are "logged out" at the end of their request.
- AuthorizationSession::get(_client)->logoutSecurityTokenUser(_client);
- }
-}
-
-BSONObj signSecurityToken(BSONObj obj) {
- auto authUserElem = obj[SecurityToken::kAuthenticatedUserFieldName];
- uassert(ErrorCodes::BadValue,
- "Invalid field(s) in token being signed",
- (authUserElem.type() == Object) && (obj.nFields() == 1));
-
- auto authUserObj = authUserElem.Obj();
- ConstDataRange authUserCDR(authUserObj.objdata(), authUserObj.objsize());
-
- // Placeholder algorithm.
- auto sig = SHA256Block::computeHash({authUserCDR});
-
- BSONObjBuilder signedToken(obj);
- signedToken.appendBinData(SecurityToken::kSigFieldName, sig.size(), BinDataGeneral, sig.data());
- return signedToken.obj();
-}
-
-SecurityToken verifySecurityToken(BSONObj obj) {
- uassert(ErrorCodes::BadValue, "Multitenancy not enabled", gMultitenancySupport);
+ValidatedTenancyScope::ValidatedTenancyScope(BSONObj obj, InitTag tag) : _originalToken(obj) {
+ uassert(ErrorCodes::InvalidOptions,
+ "Multitenancy not enabled, refusing to accept securityToken",
+ gMultitenancySupport || (tag == InitTag::kInitForShell));
auto token = SecurityToken::parse({"Security Token"}, obj);
auto authenticatedUser = token.getAuthenticatedUser();
@@ -126,23 +86,97 @@ SecurityToken verifySecurityToken(BSONObj obj) {
auto computed = SHA256Block::computeHash({authUserCDR});
uassert(ErrorCodes::Unauthorized, "Token signature invalid", computed == token.getSig());
- return token;
+
+ _tenantOrUser = std::move(authenticatedUser);
}
-void readSecurityTokenMetadata(OperationContext* opCtx, BSONObj securityToken) try {
- if (securityToken.nFields() == 0) {
- return;
+ValidatedTenancyScope::ValidatedTenancyScope(Client* client, TenantId tenant)
+ : _tenantOrUser(std::move(tenant)) {
+ uassert(ErrorCodes::InvalidOptions,
+ "Multitenancy not enabled, refusing to accept $tenant parameter",
+ gMultitenancySupport);
+
+ uassert(ErrorCodes::Unauthorized,
+ "'$tenant' may only be specified with the useTenant action type",
+ client &&
+ AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
+ ResourcePattern::forClusterResource(), ActionType::useTenant));
+}
+
+boost::optional<ValidatedTenancyScope> ValidatedTenancyScope::create(Client* client,
+ BSONObj body,
+ BSONObj securityToken) {
+ if (!gMultitenancySupport) {
+ return boost::none;
+ }
+
+ auto dollarTenantElem = body["$tenant"_sd];
+ const bool hasToken = securityToken.nFields() > 0;
+
+ uassert(6545800,
+ "Cannot pass $tenant id if also passing securityToken",
+ dollarTenantElem.eoo() || !hasToken);
+ uassert(ErrorCodes::OperationFailed,
+ "Cannot process $tenant id when no client is available",
+ dollarTenantElem.eoo() || client);
+
+ // TODO SERVER-66822: Re-enable this uassert.
+ // uassert(ErrorCodes::Unauthorized,
+ // "Multitenancy is enabled, $tenant id or securityToken is required.",
+ // dollarTenantElem || opMsg.securityToken.nFields() > 0);
+
+ if (dollarTenantElem) {
+ return ValidatedTenancyScope(client, TenantId::parseFromBSON(dollarTenantElem));
+ } else if (hasToken) {
+ return ValidatedTenancyScope(securityToken);
+ } else {
+ return boost::none;
+ }
+}
+
+bool ValidatedTenancyScope::hasAuthenticatedUser() const {
+ return stdx::holds_alternative<UserName>(_tenantOrUser);
+}
+
+const UserName& ValidatedTenancyScope::authenticatedUser() const {
+ invariant(hasAuthenticatedUser());
+ return stdx::get<UserName>(_tenantOrUser);
+}
+
+const TenantId& ValidatedTenancyScope::tenantId() const {
+ if (hasAuthenticatedUser()) {
+ return stdx::get<UserName>(_tenantOrUser).getTenant().get();
+ } else {
+ invariant(stdx::holds_alternative<TenantId>(_tenantOrUser));
+ return stdx::get<TenantId>(_tenantOrUser);
}
+}
+
+const boost::optional<ValidatedTenancyScope>& ValidatedTenancyScope::get(OperationContext* opCtx) {
+ return validatedTenancyScopeDecoration(opCtx);
+}
- securityTokenDecoration(opCtx) = verifySecurityToken(securityToken);
- LOGV2_DEBUG(5838100, 4, "Accepted security token", "token"_attr = securityToken);
-} catch (const DBException& ex) {
- uassertStatusOK(ex.toStatus().withContext("Unable to parse Security Token from Metadata"));
+void ValidatedTenancyScope::set(OperationContext* opCtx,
+ boost::optional<ValidatedTenancyScope> token) {
+ validatedTenancyScopeDecoration(opCtx) = std::move(token);
}
-MaybeSecurityToken getSecurityToken(OperationContext* opCtx) {
- return securityTokenDecoration(opCtx);
+ValidatedTenancyScope::ValidatedTenancyScope(BSONObj obj, TokenForTestingTag) {
+ auto authUserElem = obj[SecurityToken::kAuthenticatedUserFieldName];
+ uassert(ErrorCodes::BadValue,
+ "Invalid field(s) in token being signed",
+ (authUserElem.type() == Object) && (obj.nFields() == 1));
+
+ auto authUserObj = authUserElem.Obj();
+ ConstDataRange authUserCDR(authUserObj.objdata(), authUserObj.objsize());
+
+ // Placeholder algorithm.
+ auto sig = SHA256Block::computeHash({authUserCDR});
+
+ BSONObjBuilder signedToken(obj);
+ signedToken.appendBinData(SecurityToken::kSigFieldName, sig.size(), BinDataGeneral, sig.data());
+ _originalToken = signedToken.obj();
+ _tenantOrUser = UserName::parseFromBSONObj(authUserObj);
}
-} // namespace auth
-} // namespace mongo
+} // namespace mongo::auth
diff --git a/src/mongo/db/auth/validated_tenancy_scope.h b/src/mongo/db/auth/validated_tenancy_scope.h
new file mode 100644
index 00000000000..302b3fdac5a
--- /dev/null
+++ b/src/mongo/db/auth/validated_tenancy_scope.h
@@ -0,0 +1,116 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include <boost/optional.hpp>
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/db/auth/user_name.h"
+#include "mongo/db/tenant_id.h"
+#include "mongo/stdx/variant.h"
+
+namespace mongo {
+
+class Client;
+class OperationContext;
+
+namespace auth {
+
+class ValidatedTenancyScope {
+public:
+ ValidatedTenancyScope() = delete;
+ ValidatedTenancyScope(const ValidatedTenancyScope&) = default;
+
+ // kInitForShell allows parsing a securityToken without multitenancy enabled.
+ // This is required in the shell since we do not enable this setting in non-servers.
+ enum class InitTag {
+ kNormal,
+ kInitForShell,
+ };
+
+ /**
+ * Constructs a ValidatedTenancyScope by parsing a SecurityToken from a BSON object
+ * and verifying its cryptographic signature.
+ */
+ explicit ValidatedTenancyScope(BSONObj securityToken, InitTag tag = InitTag::kNormal);
+
+ /**
+ * Constructs a ValidatedTenancyScope for tenant only by validating that the
+ * current client is permitted to specify a tenant via the $tenant field.
+ */
+ ValidatedTenancyScope(Client* client, TenantId tenant);
+
+ /**
+ * Parses the client provided command body and securityToken for tenantId,
+ * and for securityToken respectively, the authenticatedUser as well.
+ *
+ * Returns boost::none when multitenancy support is not enabled.
+ */
+ static boost::optional<ValidatedTenancyScope> create(Client* client,
+ BSONObj body,
+ BSONObj securityToken);
+
+ bool hasAuthenticatedUser() const;
+ const UserName& authenticatedUser() const;
+ const TenantId& tenantId() const;
+
+ BSONObj getOriginalToken() const {
+ return _originalToken;
+ }
+
+ /**
+ * Get/Set a ValidatedTenancyScope as a decoration on the OperationContext
+ */
+ static const boost::optional<ValidatedTenancyScope>& get(OperationContext* opCtx);
+ static void set(OperationContext* opCtx, boost::optional<ValidatedTenancyScope> token);
+
+ /**
+ * Transitional token generator, do not use outside of test code.
+ */
+ struct TokenForTestingTag {};
+ explicit ValidatedTenancyScope(BSONObj token, TokenForTestingTag);
+
+ /**
+ * Backdoor API for use by FLE Query Analysis to setup a validated tenant without a security
+ * context.
+ */
+ struct TrustedFLEQueryAnalysisTag {};
+ explicit ValidatedTenancyScope(TenantId tenant, TrustedFLEQueryAnalysisTag)
+ : _tenantOrUser(std::move(tenant)) {}
+
+private:
+ // Preserve original token for serializing from MongoQ.
+ BSONObj _originalToken;
+
+ stdx::variant<UserName, TenantId> _tenantOrUser;
+};
+
+} // namespace auth
+} // namespace mongo
diff --git a/src/mongo/db/auth/validated_tenancy_scope_test.cpp b/src/mongo/db/auth/validated_tenancy_scope_test.cpp
new file mode 100644
index 00000000000..f1942f757a6
--- /dev/null
+++ b/src/mongo/db/auth/validated_tenancy_scope_test.cpp
@@ -0,0 +1,177 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/auth/authorization_manager_impl.h"
+#include "mongo/db/auth/authorization_session.h"
+#include "mongo/db/auth/authorization_session_impl.h"
+#include "mongo/db/auth/authz_manager_external_state_mock.h"
+#include "mongo/db/auth/security_token_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
+#include "mongo/db/multitenancy_gen.h"
+#include "mongo/db/service_context_test_fixture.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+
+class AuthorizationSessionImplTestHelper {
+public:
+ /**
+ * Synthesize a user with the useTenant privilege and add them to the authorization session.
+ */
+ static void grantUseTenant(Client& client) {
+ User user(UserName("useTenant", "admin"));
+ user.setPrivileges(
+ {Privilege(ResourcePattern::forClusterResource(), ActionType::useTenant)});
+ auto* as = dynamic_cast<AuthorizationSessionImpl*>(AuthorizationSession::get(client));
+ if (as->_authenticatedUser != boost::none) {
+ as->logoutAllDatabases(&client, "AuthorizationSessionImplTestHelper"_sd);
+ }
+ as->_authenticatedUser = std::move(user);
+ as->_authenticationMode = AuthorizationSession::AuthenticationMode::kConnection;
+ as->_updateInternalAuthorizationState();
+ }
+};
+
+namespace auth {
+namespace {
+
+class ValidatedTenancyScopeTestFixture : public mongo::ScopedGlobalServiceContextForTest,
+ public unittest::Test {
+protected:
+ void setUp() final {
+ auto authzManagerState = std::make_unique<AuthzManagerExternalStateMock>();
+ auto authzManager = std::make_unique<AuthorizationManagerImpl>(
+ getServiceContext(), std::move(authzManagerState));
+ authzManager->setAuthEnabled(true);
+ AuthorizationManager::set(getServiceContext(), std::move(authzManager));
+
+ client = getServiceContext()->makeClient("test");
+ }
+
+ BSONObj makeSecurityToken(const UserName& userName) {
+ constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
+ auto authUser = userName.toBSON(true /* serialize token */);
+ ASSERT_EQ(authUser["tenant"_sd].type(), jstOID);
+ using VTS = auth::ValidatedTenancyScope;
+ return VTS(BSON(authUserFieldName << authUser), VTS::TokenForTestingTag{})
+ .getOriginalToken();
+ }
+
+ ServiceContext::UniqueClient client;
+};
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportOffWithoutTenantOK) {
+ gMultitenancySupport = false;
+ auto body = BSON("$db"
+ << "foo");
+
+ auto validated = ValidatedTenancyScope::create(client.get(), body, {});
+ ASSERT_TRUE(validated == boost::none);
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithTenantOK) {
+ gMultitenancySupport = true;
+
+ auto kOid = OID::gen();
+ auto body = BSON("ping" << 1 << "$tenant" << kOid);
+
+ AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+ auto validated = ValidatedTenancyScope::create(client.get(), body, {});
+ ASSERT_TRUE(validated != boost::none);
+ ASSERT_TRUE(validated->tenantId() == TenantId(kOid));
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithSecurityTokenOK) {
+ gMultitenancySupport = true;
+
+ const TenantId kTenantId(OID::gen());
+ auto body = BSON("ping" << 1);
+ UserName user("user", "admin", kTenantId);
+ auto token = makeSecurityToken(user);
+
+ auto validated = ValidatedTenancyScope::create(client.get(), body, token);
+ ASSERT_TRUE(validated != boost::none);
+ ASSERT_TRUE(validated->tenantId() == kTenantId);
+ ASSERT_TRUE(validated->hasAuthenticatedUser());
+ ASSERT_TRUE(validated->authenticatedUser() == user);
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportOffWithTenantNOK) {
+ gMultitenancySupport = false;
+
+ auto kOid = OID::gen();
+ auto body = BSON("ping" << 1 << "$tenant" << kOid);
+
+ AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+ ASSERT_THROWS_CODE(ValidatedTenancyScope(client.get(), TenantId(kOid)),
+ DBException,
+ ErrorCodes::InvalidOptions);
+ ASSERT_TRUE(ValidatedTenancyScope::create(client.get(), body, {}) == boost::none);
+}
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithTenantNOK) {
+ gMultitenancySupport = true;
+
+ auto kOid = OID::gen();
+ auto body = BSON("ping" << 1 << "$tenant" << kOid);
+
+ ASSERT_THROWS_CODE(
+ ValidatedTenancyScope(client.get(), TenantId(kOid)), DBException, ErrorCodes::Unauthorized);
+ ASSERT_THROWS_CODE(ValidatedTenancyScope::create(client.get(), body, {}),
+ DBException,
+ ErrorCodes::Unauthorized);
+}
+
+// TODO SERVER-66822: Re-enable this test case.
+// TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithoutTenantAndSecurityTokenNOK) {
+// gMultitenancySupport = true;
+// auto body = BSON("ping" << 1);
+// AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+// ASSERT_THROWS_CODE(ValidatedTenancyScope::create(client.get(), body, {}), DBException,
+// ErrorCodes::Unauthorized);
+// }
+
+TEST_F(ValidatedTenancyScopeTestFixture, MultitenancySupportWithTenantAndSecurityTokenNOK) {
+ gMultitenancySupport = true;
+
+ auto kOid = OID::gen();
+ auto body = BSON("ping" << 1 << "$tenant" << kOid);
+ UserName user("user", "admin", TenantId(kOid));
+ auto token = makeSecurityToken(user);
+
+ AuthorizationSessionImplTestHelper::grantUseTenant(*(client.get()));
+ ASSERT_THROWS_CODE(
+ ValidatedTenancyScope::create(client.get(), body, token), DBException, 6545800);
+}
+
+} // namespace
+} // namespace auth
+} // namespace mongo
diff --git a/src/mongo/db/catalog/README.md b/src/mongo/db/catalog/README.md
index 71844f54e2d..db4bda42bdd 100644
--- a/src/mongo/db/catalog/README.md
+++ b/src/mongo/db/catalog/README.md
@@ -25,7 +25,10 @@ directory.
For more information on the Storage Engine API, see the [storage/README][].
+For more information on time-series collections, see the [timeseries/README][].
+
[storage/README]: https://github.com/mongodb/mongo/blob/master/src/mongo/db/storage/README.md
+[timeseries/README]: https://github.com/mongodb/mongo/blob/master/src/mongo/db/timeseries/README.md
# The Catalog
@@ -756,11 +759,18 @@ Manual](https://docs.mongodb.com/master/core/index-creation/#index-builds-in-rep
### Commit Quorum
-A primary will not commit an index build until a minimum number of data-bearing nodes have completed
-the index build and are ready to commit. This threshold is called the _commit quorum_.
+The purpose of `commitQuorm` is to ensure secondaries are ready to commit an index build quickly.
+This minimizes replication lag on secondaries: secondaries, on receipt of a `commitIndexBuild` oplog
+entry, will stall oplog application until the local index build can be committed. `commitQuorum`
+delays commit of an index build on the primary node until secondaries are also ready to commit. A
+primary will not commit an index build until a minimum number of data-bearing nodes are ready to
+commit the index build. Index builds can take anywhere from moments to days to complete, so the
+replication lag can be very significant. Note: `commitQuorum` makes no guarantee that indexes on
+secondaries are ready for use when the command completes, `writeConcern` must still be used for
+that.
A `commitQuorum` option can be provided to the `createIndexes` command and specifies the number of
-nodes, including itself, a primary must wait to be ready before committing. The `commitQuorum`
+nodes, including itself, for which a primary must wait to be ready before committing. The `commitQuorum`
option accepts the same range of values as the writeConcern `"w"` option. This can be an integer
specifying the number of nodes, `"majority"`, `"votingMembers"`, or a replica set tag. The default value
is `"votingMembers"`, or all voting data-bearing nodes.
@@ -778,6 +788,10 @@ the index build is successful, it will replicate a `commitIndexBuild` oplog entr
Secondaries that were not included in the commit quorum and recieve a `commitIndexBuild` oplog entry
will block replication until their index build is complete.
+The `commitQuorum` for a running index build may be changed by the user via the
+[`setIndexCommitQuorum`](https://github.com/mongodb/mongo/blob/v6.0/src/mongo/db/commands/set_index_commit_quorum_command.cpp#L55)
+server command.
+
See
[IndexBuildsCoordinator::_waitForNextIndexBuildActionAndCommit](https://github.com/mongodb/mongo/blob/r4.4.0-rc9/src/mongo/db/index_builds_coordinator_mongod.cpp#L632).
@@ -954,8 +968,6 @@ _Code spelunking starting points:_
* [_The TTLCollectionCache Class_](https://github.com/mongodb/mongo/blob/d88a892d5b18035bd0f5393a42690e705c2007d7/src/mongo/db/ttl_collection_cache.h)
* [_ttl.idl_](https://github.com/mongodb/mongo/blob/d88a892d5b18035bd0f5393a42690e705c2007d7/src/mongo/db/ttl.idl)
-TODO SERVER-66898: Refresh links
-
# Repair
Data corruption has a variety of causes, but can usually be attributed to misconfigured or
@@ -1722,6 +1734,10 @@ The TTL monitor will only delete data from a time-series bucket collection when
time, _id, is past the expiration plus the bucket maximum time span (default 1 hour). This
procedure avoids deleting buckets with data that is not older than the expiration time.
+For more information on time-series collections, see the [timeseries/README][].
+
+[timeseries/README]: https://github.com/mongodb/mongo/blob/master/src/mongo/db/timeseries/README.md
+
## Capped clustered collections
Capped clustered collections are available internally. Unlike regular capped collections, clustered
diff --git a/src/mongo/db/catalog/SConscript b/src/mongo/db/catalog/SConscript
index f36fee396d6..2cae97495a4 100644
--- a/src/mongo/db/catalog/SConscript
+++ b/src/mongo/db/catalog/SConscript
@@ -537,6 +537,7 @@ env.Library(
'$BUILD_DIR/mongo/db/query/query_plan_cache',
'$BUILD_DIR/mongo/db/query/query_planner',
'$BUILD_DIR/mongo/db/update_index_data',
+ 'collection',
],
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/base',
@@ -670,6 +671,7 @@ if wiredtiger:
'collection',
'collection_catalog',
'collection_options',
+ 'collection_query_info',
'collection_validation',
'commit_quorum_options',
'database_holder',
diff --git a/src/mongo/db/catalog/capped_utils.cpp b/src/mongo/db/catalog/capped_utils.cpp
index ba307ee0124..0810970806f 100644
--- a/src/mongo/db/catalog/capped_utils.cpp
+++ b/src/mongo/db/catalog/capped_utils.cpp
@@ -97,7 +97,7 @@ Status emptyCapped(OperationContext* opCtx, const NamespaceString& collectionNam
WriteUnitOfWork wuow(opCtx);
- auto writableCollection = collection.getWritableCollection();
+ auto writableCollection = collection.getWritableCollection(opCtx);
Status status = writableCollection->truncate(opCtx);
if (!status.isOK()) {
return status;
diff --git a/src/mongo/db/catalog/coll_mod.cpp b/src/mongo/db/catalog/coll_mod.cpp
index cfad9d81079..d7c5ff1ee92 100644
--- a/src/mongo/db/catalog/coll_mod.cpp
+++ b/src/mongo/db/catalog/coll_mod.cpp
@@ -177,13 +177,11 @@ StatusWith<std::pair<ParsedCollModRequest, BSONObj>> parseCollModRequest(Operati
}
if (const auto& cappedSize = cmr.getCappedSize()) {
- static constexpr long long minCappedSize = 4096;
auto swCappedSize = CollectionOptions::checkAndAdjustCappedSize(*cappedSize);
if (!swCappedSize.isOK()) {
return swCappedSize.getStatus();
}
- parsed.cappedSize =
- (swCappedSize.getValue() < minCappedSize) ? minCappedSize : swCappedSize.getValue();
+ parsed.cappedSize = swCappedSize.getValue();
oplogEntryBuilder.append(CollMod::kCappedSizeFieldName, *cappedSize);
}
if (const auto& cappedMax = cmr.getCappedMax()) {
diff --git a/src/mongo/db/catalog/collection_catalog.cpp b/src/mongo/db/catalog/collection_catalog.cpp
index cd6cb68f8a5..4c02fb5a1a5 100644
--- a/src/mongo/db/catalog/collection_catalog.cpp
+++ b/src/mongo/db/catalog/collection_catalog.cpp
@@ -475,8 +475,11 @@ Status CollectionCatalog::createView(OperationContext* opCtx,
const BSONArray& pipeline,
const BSONObj& collation,
const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
- const bool updateDurableViewCatalog) const {
- invariant(opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
+ const ViewUpsertMode insertViewMode) const {
+ // A view document direct write can occur via the oplog application path, which may only hold a
+ // lock on the collection being updated (the database views collection).
+ invariant(insertViewMode == ViewUpsertMode::kAlreadyDurableView ||
+ opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
invariant(opCtx->lockState()->isCollectionLockedForMode(
NamespaceString(viewName.db(), NamespaceString::kSystemDotViewsCollectionName), MODE_X));
@@ -514,7 +517,7 @@ Status CollectionCatalog::createView(OperationContext* opCtx,
pipelineValidator,
std::move(collator.getValue()),
ViewsForDatabase{viewsForDb},
- ViewUpsertMode::kCreateView);
+ insertViewMode);
}
return result;
@@ -1404,8 +1407,11 @@ Status CollectionCatalog::_createOrUpdateView(
const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
std::unique_ptr<CollatorInterface> collator,
ViewsForDatabase&& viewsForDb,
- ViewUpsertMode mode) const {
- invariant(opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
+ ViewUpsertMode insertViewMode) const {
+ // A view document direct write can occur via the oplog application path, which may only hold a
+ // lock on the collection being updated (the database views collection).
+ invariant(insertViewMode == ViewUpsertMode::kAlreadyDurableView ||
+ opCtx->lockState()->isCollectionLockedForMode(viewName, MODE_IX));
invariant(opCtx->lockState()->isCollectionLockedForMode(
NamespaceString(viewName.db(), NamespaceString::kSystemDotViewsCollectionName), MODE_X));
@@ -1429,20 +1435,20 @@ Status CollectionCatalog::_createOrUpdateView(
// If the view is already in the durable view catalog, we don't need to validate the graph. If
// we need to update the durable view catalog, we need to check that the resulting dependency
// graph is acyclic and within the maximum depth.
- const bool viewGraphNeedsValidation = mode != ViewUpsertMode::kAlreadyDurableView;
+ const bool viewGraphNeedsValidation = insertViewMode != ViewUpsertMode::kAlreadyDurableView;
Status graphStatus =
viewsForDb.upsertIntoGraph(opCtx, view, pipelineValidator, viewGraphNeedsValidation);
if (!graphStatus.isOK()) {
return graphStatus;
}
- if (mode != ViewUpsertMode::kAlreadyDurableView) {
+ if (insertViewMode != ViewUpsertMode::kAlreadyDurableView) {
viewsForDb.durable->upsert(opCtx, viewName, viewDef);
}
viewsForDb.valid = false;
auto res = [&] {
- switch (mode) {
+ switch (insertViewMode) {
case ViewUpsertMode::kCreateView:
case ViewUpsertMode::kAlreadyDurableView:
return viewsForDb.insert(opCtx, viewDef);
diff --git a/src/mongo/db/catalog/collection_catalog.h b/src/mongo/db/catalog/collection_catalog.h
index 3bdbd87540d..dbf3db5956a 100644
--- a/src/mongo/db/catalog/collection_catalog.h
+++ b/src/mongo/db/catalog/collection_catalog.h
@@ -115,6 +115,19 @@ public:
}
};
+ enum class ViewUpsertMode {
+ // Insert all data for that view into the view map, view graph, and durable view catalog.
+ kCreateView,
+
+ // Insert into the view map and view graph without reinserting the view into the durable
+ // view catalog. Skip view graph validation.
+ kAlreadyDurableView,
+
+ // Reload the view map, insert into the view graph (flagging it as needing refresh), and
+ // update the durable view catalog.
+ kUpdateView,
+ };
+
static std::shared_ptr<const CollectionCatalog> get(ServiceContext* svcCtx);
static std::shared_ptr<const CollectionCatalog> get(OperationContext* opCtx);
@@ -147,7 +160,8 @@ public:
*
* Must be in WriteUnitOfWork. View creation rolls back if the unit of work aborts.
*
- * Caller must ensure corresponding database exists.
+ * Caller must ensure corresponding database exists. Expects db.system.views MODE_X lock and
+ * view namespace MODE_IX lock (unless 'insertViewMode' is set to kAlreadyDurableView).
*/
Status createView(OperationContext* opCtx,
const NamespaceString& viewName,
@@ -155,7 +169,7 @@ public:
const BSONArray& pipeline,
const BSONObj& collation,
const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
- bool updateDurableViewCatalog = true) const;
+ ViewUpsertMode insertViewMode = ViewUpsertMode::kCreateView) const;
/**
* Drop the view named 'viewName'.
@@ -542,19 +556,6 @@ private:
*/
void _replaceViewsForDatabase(const DatabaseName& dbName, ViewsForDatabase&& views);
- enum class ViewUpsertMode {
- // Insert all data for that view into the view map, view graph, and durable view catalog.
- kCreateView,
-
- // Insert into the view map and view graph without reinserting the view into the durable
- // view catalog. Skip view graph validation.
- kAlreadyDurableView,
-
- // Reload the view map, insert into the view graph (flagging it as needing refresh), and
- // update the durable view catalog.
- kUpdateView,
- };
-
/**
* Helper to take care of shared functionality for 'createView(...)' and 'modifyView(...)'.
*/
@@ -565,7 +566,7 @@ private:
const ViewsForDatabase::PipelineValidatorFn& pipelineValidator,
std::unique_ptr<CollatorInterface> collator,
ViewsForDatabase&& viewsForDb,
- ViewUpsertMode mode) const;
+ ViewUpsertMode insertViewMode) const;
/**
* Returns true if this CollectionCatalog instance is part of an ongoing batched catalog write.
diff --git a/src/mongo/db/catalog/collection_impl.cpp b/src/mongo/db/catalog/collection_impl.cpp
index aaf5e7607f2..16560953def 100644
--- a/src/mongo/db/catalog/collection_impl.cpp
+++ b/src/mongo/db/catalog/collection_impl.cpp
@@ -37,7 +37,7 @@
#include "mongo/bson/ordering.h"
#include "mongo/bson/simple_bsonelement_comparator.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/crypto/fle_crypto.h"
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/catalog/document_validation.h"
@@ -818,7 +818,6 @@ Status CollectionImpl::insertDocumentsForOplog(OperationContext* opCtx,
return status;
}
-
Status CollectionImpl::insertDocuments(OperationContext* opCtx,
const std::vector<InsertStatement>::const_iterator begin,
const std::vector<InsertStatement>::const_iterator end,
@@ -842,8 +841,20 @@ Status CollectionImpl::insertDocuments(OperationContext* opCtx,
}
auto status = _checkValidationAndParseResult(opCtx, it->doc);
- if (!status.isOK())
+ if (!status.isOK()) {
return status;
+ }
+
+ auto& validationSettings = DocumentValidationSettings::get(opCtx);
+
+ if (getCollectionOptions().encryptedFieldConfig &&
+ !validationSettings.isSchemaValidationDisabled() &&
+ !validationSettings.isSafeContentValidationDisabled() &&
+ it->doc.hasField(kSafeContent)) {
+ return Status(ErrorCodes::BadValue,
+ str::stream()
+ << "Cannot insert a document with field name " << kSafeContent);
+ }
}
const SnapshotId sid = opCtx->recoveryUnit()->getSnapshotId();
@@ -1347,6 +1358,17 @@ void CollectionImpl::deleteDocument(OperationContext* opCtx,
}
}
+bool compareSafeContentElem(const BSONObj& oldDoc, const BSONObj& newDoc) {
+ if (newDoc.hasField(kSafeContent) != oldDoc.hasField(kSafeContent)) {
+ return false;
+ }
+ if (!newDoc.hasField(kSafeContent)) {
+ return true;
+ }
+
+ return newDoc.getField(kSafeContent).binaryEqual(oldDoc.getField(kSafeContent));
+}
+
RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
RecordId oldLocation,
const Snapshotted<BSONObj>& oldDoc,
@@ -1371,6 +1393,17 @@ RecordId CollectionImpl::updateDocument(OperationContext* opCtx,
}
}
+ auto& validationSettings = DocumentValidationSettings::get(opCtx);
+ if (getCollectionOptions().encryptedFieldConfig &&
+ !validationSettings.isSchemaValidationDisabled() &&
+ !validationSettings.isSafeContentValidationDisabled()) {
+
+ uassert(ErrorCodes::BadValue,
+ str::stream() << "New document and old document both need to have " << kSafeContent
+ << " field.",
+ compareSafeContentElem(oldDoc.value(), newDoc));
+ }
+
dassert(opCtx->lockState()->isCollectionLockedForMode(ns(), MODE_IX));
invariant(oldDoc.snapshotId() == opCtx->recoveryUnit()->getSnapshotId());
invariant(newDoc.isOwned());
@@ -2166,8 +2199,9 @@ Status CollectionImpl::prepareForIndexBuild(OperationContext* opCtx,
str::stream() << "index " << imd.nameStringData()
<< " is already in current metadata: " << _metadata->toBSON());
- if (getTimeseriesOptions() && feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
- serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest() &&
+ if (getTimeseriesOptions() &&
+ feature_flags::gTimeseriesMetricIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility) &&
timeseries::doesBucketsIndexIncludeMeasurement(
opCtx, ns(), *getTimeseriesOptions(), spec->infoObj())) {
invariant(_metadata->timeseriesBucketsMayHaveMixedSchemaData);
diff --git a/src/mongo/db/catalog/collection_impl.h b/src/mongo/db/catalog/collection_impl.h
index 7d5bca73064..e2cc5175989 100644
--- a/src/mongo/db/catalog/collection_impl.h
+++ b/src/mongo/db/catalog/collection_impl.h
@@ -381,7 +381,7 @@ public:
uint64_t n = numRecords(opCtx);
if (n == 0)
- return 5;
+ return 0;
return static_cast<int>(dataSize(opCtx) / n);
}
diff --git a/src/mongo/db/catalog/collection_writer_test.cpp b/src/mongo/db/catalog/collection_writer_test.cpp
index 1f828f38800..a5aad697d59 100644
--- a/src/mongo/db/catalog/collection_writer_test.cpp
+++ b/src/mongo/db/catalog/collection_writer_test.cpp
@@ -101,7 +101,7 @@ TEST_F(CollectionWriterTest, Commit) {
{
AutoGetCollection lock(operationContext(), kNss, MODE_X);
WriteUnitOfWork wuow(operationContext());
- auto writable = writer.getWritableCollection();
+ auto writable = writer.getWritableCollection(operationContext());
// get() and getWritableCollection() should return the same instance
ASSERT_EQ(writer.get().get(), writable);
@@ -128,7 +128,7 @@ TEST_F(CollectionWriterTest, Commit) {
{
AutoGetCollection lock(operationContext(), kNss, MODE_X);
WriteUnitOfWork wuow(operationContext());
- auto writable = writer.getWritableCollection();
+ auto writable = writer.getWritableCollection(operationContext());
ASSERT_EQ(writer.get().get(), writable);
ASSERT_EQ(writable, lookupCollectionFromCatalog().get());
@@ -153,7 +153,7 @@ TEST_F(CollectionWriterTest, Rollback) {
{
AutoGetCollection lock(operationContext(), kNss, MODE_X);
WriteUnitOfWork wuow(operationContext());
- auto writable = writer.getWritableCollection();
+ auto writable = writer.getWritableCollection(operationContext());
ASSERT_EQ(writer.get().get(), writable);
ASSERT_EQ(writable, lookupCollectionFromCatalog().get());
@@ -179,7 +179,7 @@ TEST_F(CollectionWriterTest, CommitAfterDestroy) {
CollectionWriter writer(operationContext(), kNss);
// Request a writable Collection and destroy CollectionWriter before WUOW commits
- writable = writer.getWritableCollection();
+ writable = writer.getWritableCollection(operationContext());
}
wuow.commit();
diff --git a/src/mongo/db/catalog/commit_quorum.idl b/src/mongo/db/catalog/commit_quorum.idl
index 2a67a22fdef..26edd42e9fe 100644
--- a/src/mongo/db/catalog/commit_quorum.idl
+++ b/src/mongo/db/catalog/commit_quorum.idl
@@ -40,8 +40,8 @@ types:
- decimal
- double
- string
- description: "CommitQuorumOptions defines the required quorum for the index builds to
- commit."
+ description: "CommitQuorumOptions defines the replica set membership required to be ready
+ for commit in order for the primary to proceed to commit an index build."
cpp_type: "mongo::CommitQuorumOptions"
serializer: "mongo::CommitQuorumOptions::appendToBuilder"
deserializer: "mongo::CommitQuorumOptions::deserializerForIDL"
diff --git a/src/mongo/db/catalog/commit_quorum_options.h b/src/mongo/db/catalog/commit_quorum_options.h
index 351281131a0..a910e0a0831 100644
--- a/src/mongo/db/catalog/commit_quorum_options.h
+++ b/src/mongo/db/catalog/commit_quorum_options.h
@@ -38,12 +38,15 @@ namespace mongo {
class Status;
/**
+ * 'CommitQuorumOptions' is used to determine when a primary should commit an index build. When the
+ * specified 'quorum' of replica set members is reached, then the primary proceeds to commit the
+ * index. commitQuorum ensures secondaries are ready to commit the index as quickly as possible:
+ * secondary replication will stall on receipt of a commitIndexBuild oplog entry until the
+ * secondary's index build is complete and ready to be committed.
+ *
* The 'CommitQuorumOptions' has the same range of settings as the 'w' field from
* 'WriteConcernOptions'. It can be set to an integer starting from 0 and up, or to a string. The
* string option can be 'majority', 'votingMembers' or a replica set tag.
- *
- * The principal idea behind 'CommitQuorumOptions' is to figure out when an index build should be
- * committed on the replica set based on the number of commit ready members.
*/
class CommitQuorumOptions {
public:
@@ -86,10 +89,15 @@ public:
return (numNodes == rhs.numNodes && mode == rhs.mode) ? true : false;
}
- // Returns the BSON representation of this object.
+ /**
+ * Returns the BSON representation of this object.
+ * E.g. {commitQuorum: "majority"}
+ */
BSONObj toBSON() const;
- // Appends the BSON representation of this object.
+ /**
+ * Appends the commitQuorum value (mode or numNodes) with the given field name "fieldName".
+ */
void appendToBuilder(StringData fieldName, BSONObjBuilder* builder) const;
// The 'commitQuorum' parameter to define the required quorum for the index builds to commit.
diff --git a/src/mongo/db/catalog/database_holder.h b/src/mongo/db/catalog/database_holder.h
index 3a7918acda8..ffb305b2c4c 100644
--- a/src/mongo/db/catalog/database_holder.h
+++ b/src/mongo/db/catalog/database_holder.h
@@ -36,14 +36,12 @@
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/database_name.h"
+#include "mongo/s/database_version.h"
namespace mongo {
-class CollectionCatalogEntry;
class Database;
class OperationContext;
-class RecordStore;
-class ViewCatalog;
/**
* Registry of opened databases.
@@ -56,9 +54,8 @@ public:
static DatabaseHolder* get(OperationContext* opCtx);
static void set(ServiceContext* service, std::unique_ptr<DatabaseHolder> databaseHolder);
- virtual ~DatabaseHolder() = default;
-
DatabaseHolder() = default;
+ virtual ~DatabaseHolder() = default;
/**
* Retrieves an already opened database or returns nullptr. Must be called with the database
diff --git a/src/mongo/db/catalog/database_impl.cpp b/src/mongo/db/catalog/database_impl.cpp
index cd743dcac46..e8c9bf6c571 100644
--- a/src/mongo/db/catalog/database_impl.cpp
+++ b/src/mongo/db/catalog/database_impl.cpp
@@ -59,6 +59,7 @@
#include "mongo/db/introspect.h"
#include "mongo/db/op_observer.h"
#include "mongo/db/query/collation/collator_factory_interface.h"
+#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/db/repl/drop_pending_collection_reaper.h"
#include "mongo/db/repl/oplog.h"
#include "mongo/db/repl/replication_coordinator.h"
@@ -93,6 +94,17 @@ MONGO_FAIL_POINT_DEFINE(hangAndFailAfterCreateCollectionReservesOpTime);
MONGO_FAIL_POINT_DEFINE(openCreateCollectionWindowFp);
MONGO_FAIL_POINT_DEFINE(allowSystemViewsDrop);
+// When active, a column index will be created for all new collections. This is used for the column
+// index JS test passthrough suite. Other passthroughs work by overriding javascript methods on the
+// client side, but this approach often requires the drop() function to create the collection. This
+// behavior is confusing, and requires a large number of tests to be re-written to accommodate this
+// passthrough behavior. In case you're wondering, this failpoint approach would not work as well
+// for the sharded collections task, since mongos and the config servers are generally unaware of
+// when a collection is created. There isn't a great server-side hook we can use to auto-shard a
+// collection, and it is more complex technically to drive this process from one shard in the
+// cluster. For column store indexes, we just need to change local state on each mongod.
+MONGO_FAIL_POINT_DEFINE(createColumnIndexOnAllCollections);
+
Status validateDBNameForWindows(StringData dbname) {
const std::vector<std::string> windowsReservedNames = {
"con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", "com6", "com7",
@@ -132,6 +144,12 @@ void assertMovePrimaryInProgress(OperationContext* opCtx, NamespaceString const&
}
}
+static const BSONObj kColumnStoreSpec = BSON("name"
+ << "$**_columnstore"
+ << "key"
+ << BSON("$**"
+ << "columnstore")
+ << "v" << 2);
} // namespace
Status DatabaseImpl::validateDBName(StringData dbname) {
@@ -176,7 +194,7 @@ Status DatabaseImpl::init(OperationContext* const opCtx) {
// If this is called from the repair path, the collection is already initialized.
if (!collection->isInitialized()) {
WriteUnitOfWork wuow(opCtx);
- collection.getWritableCollection()->init(opCtx);
+ collection.getWritableCollection(opCtx)->init(opCtx);
wuow.commit();
}
}
@@ -450,6 +468,16 @@ Status DatabaseImpl::dropCollection(OperationContext* opCtx,
invariant(nss.db() == _name.db());
+ // Returns true if the supplied namespace 'nss' is a system collection that can be dropped,
+ // false otherwise.
+ auto isDroppableSystemCollection = [](const auto& nss) {
+ return nss.isHealthlog() || nss == NamespaceString::kLogicalSessionsNamespace ||
+ nss == NamespaceString::kKeysCollectionNamespace ||
+ nss.isTemporaryReshardingCollection() || nss.isTimeseriesBucketsCollection() ||
+ nss.isChangeStreamPreImagesCollection() ||
+ nss == NamespaceString::kConfigsvrRestoreNamespace || nss.isChangeCollection();
+ };
+
if (nss.isSystem()) {
if (nss.isSystemDotProfile()) {
if (catalog->getDatabaseProfileLevel(_name) != 0)
@@ -463,11 +491,7 @@ Status DatabaseImpl::dropCollection(OperationContext* opCtx,
<< " when time-series collections are present.",
viewStats && viewStats->userTimeseries == 0);
}
- } else if (!(nss.isHealthlog() || nss == NamespaceString::kLogicalSessionsNamespace ||
- nss == NamespaceString::kKeysCollectionNamespace ||
- nss.isTemporaryReshardingCollection() || nss.isTimeseriesBucketsCollection() ||
- nss.isChangeStreamPreImagesCollection() ||
- nss == NamespaceString::kConfigsvrRestoreNamespace)) {
+ } else if (!isDroppableSystemCollection(nss)) {
return Status(ErrorCodes::IllegalOperation,
str::stream() << "can't drop system collection " << nss);
}
@@ -520,14 +544,14 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
auto opObserver = serviceContext->getOpObserver();
auto isOplogDisabledForNamespace = replCoord->isOplogDisabledFor(opCtx, nss);
if (dropOpTime.isNull() && isOplogDisabledForNamespace) {
- _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection());
+ _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection(opCtx));
opObserver->onDropCollection(opCtx,
nss,
uuid,
numRecords,
OpObserver::CollectionDropType::kOnePhase,
markFromMigrate);
- return _finishDropCollection(opCtx, nss, collection.getWritableCollection());
+ return _finishDropCollection(opCtx, nss, collection.getWritableCollection(opCtx));
}
// Replicated collections should be dropped in two phases.
@@ -536,7 +560,7 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
// storage engine and will no longer be visible at the catalog layer with 3.6-style
// <db>.system.drop.* namespaces.
if (serviceContext->getStorageEngine()->supportsPendingDrops()) {
- _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection());
+ _dropCollectionIndexes(opCtx, nss, collection.getWritableCollection(opCtx));
auto commitTimestamp = opCtx->recoveryUnit()->getCommitTimestamp();
LOGV2(20314,
@@ -572,7 +596,7 @@ Status DatabaseImpl::dropCollectionEvenIfSystem(OperationContext* opCtx,
str::stream() << "OpTime is not null. OpTime: " << opTime.toString());
}
- return _finishDropCollection(opCtx, nss, collection.getWritableCollection());
+ return _finishDropCollection(opCtx, nss, collection.getWritableCollection(opCtx));
}
// Old two-phase drop: Replicated collections will be renamed with a special drop-pending
@@ -706,7 +730,7 @@ Status DatabaseImpl::renameCollection(OperationContext* opCtx,
// Set the namespace of 'collToRename' from within the CollectionCatalog. This is necessary
// because the CollectionCatalog manages the necessary isolation for this Collection until the
// WUOW commits.
- auto writableCollection = collToRename.getWritableCollection();
+ auto writableCollection = collToRename.getWritableCollection(opCtx);
Status status = writableCollection->rename(opCtx, toNss, stayTemp);
if (!status.isOK())
return status;
@@ -884,25 +908,32 @@ Collection* DatabaseImpl::createCollection(OperationContext* opCtx,
BSONObj fullIdIndexSpec;
- if (createIdIndex) {
- if (collection->requiresIdIndex()) {
- if (optionsWithUUID.autoIndexId == CollectionOptions::YES ||
- optionsWithUUID.autoIndexId == CollectionOptions::DEFAULT) {
- IndexCatalog* ic = collection->getIndexCatalog();
- fullIdIndexSpec = uassertStatusOK(ic->createIndexOnEmptyCollection(
- opCtx,
- collection,
- !idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection)));
- } else {
- // autoIndexId: false is only allowed on unreplicated collections.
- uassert(50001,
- str::stream() << "autoIndexId:false is not allowed for collection " << nss
- << " because it can be replicated",
- !nss.isReplicated());
- }
+ bool createColumnIndex = false;
+ if (createIdIndex && collection->requiresIdIndex()) {
+ if (optionsWithUUID.autoIndexId == CollectionOptions::YES ||
+ optionsWithUUID.autoIndexId == CollectionOptions::DEFAULT) {
+ auto* ic = collection->getIndexCatalog();
+ fullIdIndexSpec = uassertStatusOK(ic->createIndexOnEmptyCollection(
+ opCtx,
+ collection,
+ !idIndex.isEmpty() ? idIndex : ic->getDefaultIdIndexSpec(collection)));
+ createColumnIndex = createColumnIndexOnAllCollections.shouldFail();
+ } else {
+ // autoIndexId: false is only allowed on unreplicated collections.
+ uassert(50001,
+ str::stream() << "autoIndexId:false is not allowed for collection " << nss
+ << " because it can be replicated",
+ !nss.isReplicated());
}
}
+ if (MONGO_unlikely(createColumnIndex)) {
+ invariant(!internalQueryForceClassicEngine.load(),
+ "Column Store Indexes failpoint in use without enabling SBE engine");
+ uassertStatusOK(collection->getIndexCatalog()->createIndexOnEmptyCollection(
+ opCtx, collection, kColumnStoreSpec));
+ }
+
hangBeforeLoggingCreateCollection.pauseWhileSet();
opCtx->getServiceContext()->getOpObserver()->onCreateCollection(
diff --git a/src/mongo/db/catalog/database_test.cpp b/src/mongo/db/catalog/database_test.cpp
index e9da3e319ee..8346d0f1a29 100644
--- a/src/mongo/db/catalog/database_test.cpp
+++ b/src/mongo/db/catalog/database_test.cpp
@@ -355,7 +355,7 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom
auto nss1 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model));
if (!re.FullMatch(nss1.ns())) {
FAIL((StringBuilder() << "First generated namespace \"" << nss1.ns()
- << "\" does not match reqular expression \"" << re.pattern()
+ << "\" does not match regular expression \"" << re.pattern()
<< "\"")
.str());
}
@@ -372,7 +372,7 @@ TEST_F(DatabaseTest, MakeUniqueCollectionNamespaceReplacesPercentSignsWithRandom
auto nss2 = unittest::assertGet(db->makeUniqueCollectionNamespace(_opCtx.get(), model));
if (!re.FullMatch(nss2.ns())) {
FAIL((StringBuilder() << "Second generated namespace \"" << nss2.ns()
- << "\" does not match reqular expression \"" << re.pattern()
+ << "\" does not match regular expression \"" << re.pattern()
<< "\"")
.str());
}
diff --git a/src/mongo/db/catalog/document_validation.h b/src/mongo/db/catalog/document_validation.h
index 875f255c565..47db304d79d 100644
--- a/src/mongo/db/catalog/document_validation.h
+++ b/src/mongo/db/catalog/document_validation.h
@@ -52,7 +52,7 @@ class DocumentValidationSettings {
public:
enum flag : std::uint8_t {
/*
- * Enables document validation (both schema and internal).
+ * Enables document validation (schema, internal, and safeContent).
*/
kEnableValidation = 0x00,
/*
@@ -67,6 +67,12 @@ public:
* doesn't comply with internal validation rules.
*/
kDisableInternalValidation = 0x02,
+ /*
+ * If set, modifications to the safeContent array are allowed. This flag is only
+ * enabled when bypass document validation is enabled or if crudProcessed is true
+ * in the query.
+ */
+ kDisableSafeContentValidation = 0x04,
};
using Flags = std::uint8_t;
@@ -92,6 +98,10 @@ public:
return _flags & kDisableInternalValidation;
}
+ bool isSafeContentValidationDisabled() const {
+ return _flags & kDisableSafeContentValidation;
+ }
+
bool isDocumentValidationEnabled() const {
return _flags == kEnableValidation;
}
@@ -134,11 +144,29 @@ class DisableDocumentSchemaValidationIfTrue {
public:
DisableDocumentSchemaValidationIfTrue(OperationContext* opCtx,
bool shouldDisableSchemaValidation) {
- if (shouldDisableSchemaValidation)
- _documentSchemaValidationDisabler.emplace(opCtx);
+ if (shouldDisableSchemaValidation) {
+ _documentSchemaValidationDisabler.emplace(
+ opCtx, DocumentValidationSettings::kDisableSchemaValidation);
+ }
+ }
+
+private:
+ boost::optional<DisableDocumentValidation> _documentSchemaValidationDisabler;
+};
+
+class DisableSafeContentValidationIfTrue {
+public:
+ DisableSafeContentValidationIfTrue(OperationContext* opCtx,
+ bool shouldDisableSchemaValidation,
+ bool encryptionInformationCrudProcessed) {
+ if (shouldDisableSchemaValidation || encryptionInformationCrudProcessed) {
+ _documentSchemaValidationDisabler.emplace(
+ opCtx, DocumentValidationSettings::kDisableSafeContentValidation);
+ }
}
private:
boost::optional<DisableDocumentValidation> _documentSchemaValidationDisabler;
};
+
} // namespace mongo
diff --git a/src/mongo/db/catalog/index_builds_manager.cpp b/src/mongo/db/catalog/index_builds_manager.cpp
index c38af203930..553f4dcbf0e 100644
--- a/src/mongo/db/catalog/index_builds_manager.cpp
+++ b/src/mongo/db/catalog/index_builds_manager.cpp
@@ -322,7 +322,7 @@ Status IndexBuildsManager::commitIndexBuild(OperationContext* opCtx,
[this, builder, buildUUID, opCtx, &collection, nss, &onCreateEachFn, &onCommitFn] {
WriteUnitOfWork wunit(opCtx);
auto status = builder->commit(
- opCtx, collection.getWritableCollection(), onCreateEachFn, onCommitFn);
+ opCtx, collection.getWritableCollection(opCtx), onCreateEachFn, onCommitFn);
if (!status.isOK()) {
return status;
}
diff --git a/src/mongo/db/catalog/index_catalog_impl.cpp b/src/mongo/db/catalog/index_catalog_impl.cpp
index 74049b34427..0e3679f3f02 100644
--- a/src/mongo/db/catalog/index_catalog_impl.cpp
+++ b/src/mongo/db/catalog/index_catalog_impl.cpp
@@ -118,8 +118,8 @@ Status isSpecOKClusteredIndexCheck(const BSONObj& indexSpec,
auto key = indexSpec.getObjectField("key");
bool keysMatch = clustered_util::matchesClusterKey(key, collInfo);
- bool clusteredOptionPresent =
- indexSpec.hasField("clustered") && indexSpec["clustered"].trueValue();
+ bool clusteredOptionPresent = indexSpec.hasField(IndexDescriptor::kClusteredFieldName) &&
+ indexSpec[IndexDescriptor::kClusteredFieldName].trueValue();
if (clusteredOptionPresent && !keysMatch) {
// The 'clustered' option implies the indexSpec must match the clustered index.
@@ -907,8 +907,11 @@ Status IndexCatalogImpl::_isSpecOk(OperationContext* opCtx,
str::stream() << pluginName
<< " indexes are under development and cannot be used without "
"enabling the feature flag",
- feature_flags::gFeatureFlagColumnstoreIndexes.isEnabled(
- serverGlobalParams.featureCompatibility));
+ // With our testing failpoint we may try to run this code before we've initialized
+ // the FCV.
+ !serverGlobalParams.featureCompatibility.isVersionInitialized() ||
+ feature_flags::gFeatureFlagColumnstoreIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility));
if (auto columnSpecStatus = validateColumnStoreSpec(collection, spec, indexVersion);
!columnSpecStatus.isOK()) {
return columnSpecStatus;
diff --git a/src/mongo/db/catalog/index_key_validate.cpp b/src/mongo/db/catalog/index_key_validate.cpp
index 1abc7b14e03..199b1f5a13d 100644
--- a/src/mongo/db/catalog/index_key_validate.cpp
+++ b/src/mongo/db/catalog/index_key_validate.cpp
@@ -85,7 +85,7 @@ static const std::set<StringData> allowedClusteredIndexFieldNames = {
ClusteredIndexSpec::kVFieldName,
ClusteredIndexSpec::kKeyFieldName,
// This is for indexSpec creation only.
- "clustered",
+ IndexDescriptor::kClusteredFieldName,
};
/**
@@ -268,7 +268,8 @@ BSONObj repairIndexSpec(const NamespaceString& ns,
IndexDescriptor::kUniqueFieldName == fieldName ||
IndexDescriptor::kSparseFieldName == fieldName ||
IndexDescriptor::kDropDuplicatesFieldName == fieldName ||
- IndexDescriptor::kPrepareUniqueFieldName == fieldName || "clustered" == fieldName) &&
+ IndexDescriptor::kPrepareUniqueFieldName == fieldName ||
+ IndexDescriptor::kClusteredFieldName == fieldName) &&
!indexSpecElem.isNumber() && !indexSpecElem.isBoolean() && indexSpecElem.trueValue()) {
LOGV2_WARNING(6444400,
"Fixing boolean field from index spec",
@@ -293,7 +294,7 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
bool hasOriginalSpecField = false;
bool unique = false;
bool prepareUnique = false;
- auto clusteredField = indexSpec["clustered"];
+ auto clusteredField = indexSpec[IndexDescriptor::kClusteredFieldName];
bool apiStrict = opCtx && APIParameters::get(opCtx).getAPIStrict().value_or(false);
auto fieldNamesValidStatus = validateIndexSpecFieldNames(indexSpec);
@@ -500,11 +501,9 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
} else if ((IndexDescriptor::kBackgroundFieldName == indexSpecElemFieldName ||
IndexDescriptor::kUniqueFieldName == indexSpecElemFieldName ||
IndexDescriptor::kSparseFieldName == indexSpecElemFieldName ||
- IndexDescriptor::k2dsphereCoarsestIndexedLevel == indexSpecElemFieldName ||
- IndexDescriptor::k2dsphereFinestIndexedLevel == indexSpecElemFieldName ||
IndexDescriptor::kDropDuplicatesFieldName == indexSpecElemFieldName ||
IndexDescriptor::kPrepareUniqueFieldName == indexSpecElemFieldName ||
- "clustered" == indexSpecElemFieldName)) {
+ IndexDescriptor::kClusteredFieldName == indexSpecElemFieldName)) {
if (!indexSpecElem.isNumber() && !indexSpecElem.isBoolean()) {
return {ErrorCodes::TypeMismatch,
str::stream()
@@ -528,7 +527,9 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
IndexDescriptor::kTextVersionFieldName == indexSpecElemFieldName ||
IndexDescriptor::k2dIndexBitsFieldName == indexSpecElemFieldName ||
IndexDescriptor::k2dIndexMinFieldName == indexSpecElemFieldName ||
- IndexDescriptor::k2dIndexMaxFieldName == indexSpecElemFieldName) &&
+ IndexDescriptor::k2dIndexMaxFieldName == indexSpecElemFieldName ||
+ IndexDescriptor::k2dsphereCoarsestIndexedLevel == indexSpecElemFieldName ||
+ IndexDescriptor::k2dsphereFinestIndexedLevel == indexSpecElemFieldName) &&
!indexSpecElem.isNumber()) {
return {ErrorCodes::TypeMismatch,
str::stream() << "The field '" << indexSpecElemFieldName
@@ -629,7 +630,7 @@ StatusWith<BSONObj> validateIndexSpec(OperationContext* opCtx, const BSONObj& in
}
Status validateIdIndexSpec(const BSONObj& indexSpec) {
- bool isClusteredIndexSpec = indexSpec.hasField("clustered");
+ bool isClusteredIndexSpec = indexSpec.hasField(IndexDescriptor::kClusteredFieldName);
if (!isClusteredIndexSpec) {
// Field names for a 'clustered' index spec have already been validated through
@@ -691,7 +692,7 @@ Status validateIndexSpecFieldNames(const BSONObj& indexSpec) {
return Status::OK();
}
- if (indexSpec.hasField("clustered")) {
+ if (indexSpec.hasField(IndexDescriptor::kClusteredFieldName)) {
return validateClusteredSpecFieldNames(indexSpec);
}
diff --git a/src/mongo/db/catalog/index_key_validate_test.cpp b/src/mongo/db/catalog/index_key_validate_test.cpp
index adfd2e25a5a..e5b9e7b6316 100644
--- a/src/mongo/db/catalog/index_key_validate_test.cpp
+++ b/src/mongo/db/catalog/index_key_validate_test.cpp
@@ -353,5 +353,42 @@ TEST(IndexKeyValidateTest, RepairIndexSpecs) {
"true, force: true}"))));
}
+TEST(IndexKeyValidateTest, GeoIndexSpecs) {
+ ASSERT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':17,'"
+ "coarsestIndexedLevel':5}")));
+
+ ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':'string','"
+ "coarsestIndexedLevel':'string'}")));
+
+ ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':17,'"
+ "coarsestIndexedLevel':'string'}")));
+
+ ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':'string','"
+ "coarsestIndexedLevel':5}")));
+
+ ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':true,'"
+ "coarsestIndexedLevel':true}")));
+
+ ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':17,'"
+ "coarsestIndexedLevel':true}")));
+
+ ASSERT_NOT_OK(index_key_validate::validateIndexSpec(
+ nullptr,
+ fromjson("{'key':{'loc':'2dsphere'},'name':'loc_2dsphere','finestIndexedLevel':true,'"
+ "coarsestIndexedLevel':5}")));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/catalog/multi_index_block.cpp b/src/mongo/db/catalog/multi_index_block.cpp
index 95e1c8d7f2a..8a66be4f602 100644
--- a/src/mongo/db/catalog/multi_index_block.cpp
+++ b/src/mongo/db/catalog/multi_index_block.cpp
@@ -137,7 +137,7 @@ void MultiIndexBlock::abortIndexBuild(OperationContext* opCtx,
// This cleans up all index builds. Because that may need to write, it is done inside of
// a WUOW. Nothing inside this block can fail, and it is made fatal if it does.
for (size_t i = 0; i < _indexes.size(); i++) {
- _indexes[i].block->fail(opCtx, collection.getWritableCollection());
+ _indexes[i].block->fail(opCtx, collection.getWritableCollection(opCtx));
}
onCleanUp();
@@ -301,16 +301,18 @@ StatusWith<std::vector<BSONObj>> MultiIndexBlock::init(
stateInfoIt != resumeInfoIndexes.end());
stateInfo = *stateInfoIt;
- status = index.block->initForResume(
- opCtx, collection.getWritableCollection(), *stateInfo, resumeInfo->getPhase());
+ status = index.block->initForResume(opCtx,
+ collection.getWritableCollection(opCtx),
+ *stateInfo,
+ resumeInfo->getPhase());
} else {
- status = index.block->init(opCtx, collection.getWritableCollection());
+ status = index.block->init(opCtx, collection.getWritableCollection(opCtx));
}
if (!status.isOK())
return status;
auto indexCatalogEntry =
- index.block->getEntry(opCtx, collection.getWritableCollection());
+ index.block->getEntry(opCtx, collection.getWritableCollection(opCtx));
index.real = indexCatalogEntry->accessMethod();
status = index.real->initializeAsEmpty(opCtx);
if (!status.isOK())
diff --git a/src/mongo/db/catalog/multi_index_block_test.cpp b/src/mongo/db/catalog/multi_index_block_test.cpp
index 33874c6e92a..1315975f5c0 100644
--- a/src/mongo/db/catalog/multi_index_block_test.cpp
+++ b/src/mongo/db/catalog/multi_index_block_test.cpp
@@ -100,7 +100,7 @@ TEST_F(MultiIndexBlockTest, CommitWithoutInsertingDocuments) {
{
WriteUnitOfWork wunit(operationContext());
ASSERT_OK(indexer->commit(operationContext(),
- coll.getWritableCollection(),
+ coll.getWritableCollection(operationContext()),
MultiIndexBlock::kNoopOnCreateEachFn,
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
@@ -130,7 +130,7 @@ TEST_F(MultiIndexBlockTest, CommitAfterInsertingSingleDocument) {
{
WriteUnitOfWork wunit(operationContext());
ASSERT_OK(indexer->commit(operationContext(),
- coll.getWritableCollection(),
+ coll.getWritableCollection(operationContext()),
MultiIndexBlock::kNoopOnCreateEachFn,
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
diff --git a/src/mongo/db/catalog/throttle_cursor_test.cpp b/src/mongo/db/catalog/throttle_cursor_test.cpp
index 8de3f08fbeb..02999c2a739 100644
--- a/src/mongo/db/catalog/throttle_cursor_test.cpp
+++ b/src/mongo/db/catalog/throttle_cursor_test.cpp
@@ -67,7 +67,6 @@ public:
void setMaxMbPerSec(int maxMbPerSec);
Date_t getTime();
- int64_t getDifferenceInMillis(Date_t start, Date_t end);
SortedDataInterfaceThrottleCursor getIdIndex(const CollectionPtr& coll);
std::unique_ptr<DataThrottle> _dataThrottle;
@@ -114,10 +113,6 @@ Date_t ThrottleCursorTest::getTime() {
return operationContext()->getServiceContext()->getFastClockSource()->now();
}
-int64_t ThrottleCursorTest::getDifferenceInMillis(Date_t start, Date_t end) {
- return end.toMillisSinceEpoch() - start.toMillisSinceEpoch();
-}
-
SortedDataInterfaceThrottleCursor ThrottleCursorTest::getIdIndex(const CollectionPtr& coll) {
const IndexDescriptor* idDesc = coll->getIndexCatalog()->findIdIndex(operationContext());
const IndexCatalogEntry* idEntry = coll->getIndexCatalog()->getEntry(idDesc);
@@ -156,7 +151,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOff) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 20);
- ASSERT_EQ(getDifferenceInMillis(start, end), kTickDelay * numRecords + kTickDelay);
+ ASSERT_EQ(end - start, Milliseconds(kTickDelay * numRecords + kTickDelay));
}
TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOn) {
@@ -187,7 +182,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOn) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 10);
- ASSERT_TRUE(getDifferenceInMillis(start, end) >= 5000);
+ ASSERT_GTE(end - start, Milliseconds(5000));
}
// Using a throttle with a limit of 5MB per second, all operations should take at least 1
@@ -207,7 +202,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOn) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 10);
- ASSERT_TRUE(getDifferenceInMillis(start, end) >= 1000);
+ ASSERT_GTE(end - start, Milliseconds(1000));
}
}
@@ -239,7 +234,7 @@ TEST_F(ThrottleCursorTestFastClock, TestSeekableRecordThrottleCursorOnLargeDocs1
Date_t end = getTime();
ASSERT_EQ(scanRecords, 0);
- ASSERT_GTE(getDifferenceInMillis(start, end), 10 * 1000);
+ ASSERT_GTE(end - start, Milliseconds(10 * 1000));
}
TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOnLargeDocs5MBps) {
@@ -270,7 +265,7 @@ TEST_F(ThrottleCursorTest, TestSeekableRecordThrottleCursorOnLargeDocs5MBps) {
Date_t end = getTime();
ASSERT_EQ(scanRecords, 0);
- ASSERT_GTE(getDifferenceInMillis(start, end), 2000);
+ ASSERT_GTE(end - start, Milliseconds(2000));
}
TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOff) {
@@ -297,7 +292,7 @@ TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOff) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 10);
- ASSERT_EQ(getDifferenceInMillis(start, end), kTickDelay * numRecords + kTickDelay);
+ ASSERT_EQ(end - start, Milliseconds(kTickDelay * numRecords + kTickDelay));
}
TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOn) {
@@ -327,7 +322,7 @@ TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOn) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 10);
- ASSERT_TRUE(getDifferenceInMillis(start, end) >= 5000);
+ ASSERT_GTE(end - start, Milliseconds(5000));
}
// Using a throttle with a limit of 5MB per second, all operations should take at least 1
@@ -347,7 +342,7 @@ TEST_F(ThrottleCursorTest, TestSortedDataInterfaceThrottleCursorOn) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 10);
- ASSERT_TRUE(getDifferenceInMillis(start, end) >= 1000);
+ ASSERT_GTE(end - start, Milliseconds(1000));
}
}
@@ -390,7 +385,7 @@ TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOff) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 30);
- ASSERT_EQ(getDifferenceInMillis(start, end), kTickDelay * numRecords + kTickDelay);
+ ASSERT_EQ(end - start, Milliseconds(kTickDelay * numRecords + kTickDelay));
}
TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOn) {
@@ -425,7 +420,7 @@ TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOn) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 20);
- ASSERT_TRUE(getDifferenceInMillis(start, end) >= 5000);
+ ASSERT_GTE(end - start, Milliseconds(5000));
}
// Using a throttle with a limit of 5MB per second, all operations should take at least 2
@@ -447,7 +442,7 @@ TEST_F(ThrottleCursorTest, TestMixedCursorsWithSharedThrottleOn) {
Date_t end = getTime();
ASSERT_EQ(numRecords, 20);
- ASSERT_TRUE(getDifferenceInMillis(start, end) >= 2000);
+ ASSERT_GTE(end - start, Milliseconds(2000));
}
}
diff --git a/src/mongo/db/catalog_raii.cpp b/src/mongo/db/catalog_raii.cpp
index 4396bf245fb..e376905e45f 100644
--- a/src/mongo/db/catalog_raii.cpp
+++ b/src/mongo/db/catalog_raii.cpp
@@ -144,6 +144,10 @@ void acquireCollectionLocksInResourceIdOrder(
// ResourceId(RESOURCE_COLLECTION, nss.ns()).
temp.insert(catalog->resolveNamespaceStringOrUUID(opCtx, nsOrUUID));
for (const auto& secondaryNssOrUUID : secondaryNssOrUUIDs) {
+ invariant(secondaryNssOrUUID.db() == nsOrUUID.db(),
+ str::stream()
+ << "Unable to acquire locks for collections across different databases ("
+ << secondaryNssOrUUID << " vs " << nsOrUUID << ")");
temp.insert(catalog->resolveNamespaceStringOrUUID(opCtx, secondaryNssOrUUID));
}
@@ -165,29 +169,12 @@ void acquireCollectionLocksInResourceIdOrder(
} // namespace
// TODO SERVER-62918 Pass DatabaseName instead of string for dbName.
-AutoGetDb::AutoGetDb(OperationContext* opCtx,
- StringData dbName,
- LockMode mode,
- Date_t deadline,
- const std::set<StringData>& secondaryDbNames)
+AutoGetDb::AutoGetDb(OperationContext* opCtx, StringData dbName, LockMode mode, Date_t deadline)
: _dbName(dbName), _dbLock(opCtx, dbName, mode, deadline), _db([&] {
const DatabaseName tenantDbName(boost::none, dbName);
auto databaseHolder = DatabaseHolder::get(opCtx);
return databaseHolder->getDb(opCtx, tenantDbName);
}()) {
- // Take the secondary dbs' database locks only: no global or RSTL, as they are already acquired
- // above. Note: no consistent ordering is when acquiring database locks because there are no
- // occasions where multiple strong locks are acquired to make ordering matter (deadlock
- // avoidance).
- for (const auto& secondaryDbName : secondaryDbNames) {
- // The primary database may be repeated in the secondary databases and the primary database
- // should not be locked twice.
- if (secondaryDbName != _dbName) {
- _secondaryDbLocks.emplace_back(
- opCtx, secondaryDbName, MODE_IS, deadline, true /*skipGlobalAndRSTLLocks*/);
- }
- }
-
// The 'primary' database must be version checked for sharding.
auto dss = DatabaseShardingState::get(opCtx, dbName);
auto dssLock = DatabaseShardingState::DSSLock::lockShared(opCtx, dss);
@@ -219,19 +206,9 @@ AutoGetCollection::AutoGetCollection(
const std::vector<NamespaceStringOrUUID>& secondaryNssOrUUIDs) {
invariant(!opCtx->isLockFreeReadsOp());
- // Get a unique list of 'secondary' database names to pass into AutoGetDb below.
- std::set<StringData> secondaryDbNames;
- for (auto& secondaryNssOrUUID : secondaryNssOrUUIDs) {
- secondaryDbNames.emplace(secondaryNssOrUUID.db());
- }
-
// Acquire the global/RSTL and all the database locks (may or may not be multiple
// databases).
- _autoDb.emplace(opCtx,
- !nsOrUUID.dbname().empty() ? nsOrUUID.dbname() : nsOrUUID.nss()->db(),
- isSharedLockMode(modeColl) ? MODE_IS : MODE_IX,
- deadline,
- secondaryDbNames);
+ _autoDb.emplace(opCtx, nsOrUUID.db(), isSharedLockMode(modeColl) ? MODE_IS : MODE_IX, deadline);
// Out of an abundance of caution, force operations to acquire new snapshots after
// acquiring exclusive collection locks. Operations that hold MODE_X locks make an
@@ -246,7 +223,7 @@ AutoGetCollection::AutoGetCollection(
// Acquire the collection locks. If there's only one lock, then it can simply be taken. If
// there are many, however, the locks must be taken in _ascending_ ResourceId order to avoid
// deadlocks across threads.
- if (secondaryDbNames.empty()) {
+ if (secondaryNssOrUUIDs.empty()) {
uassertStatusOK(nsOrUUID.isNssValid());
_collLocks.emplace_back(opCtx, nsOrUUID, modeColl, deadline);
} else {
@@ -478,7 +455,6 @@ struct CollectionWriter::SharedImpl {
CollectionWriter::CollectionWriter(OperationContext* opCtx, const UUID& uuid)
: _collection(&_storedCollection),
- _opCtx(opCtx),
_managed(true),
_sharedImpl(std::make_shared<SharedImpl>(this)) {
@@ -490,7 +466,6 @@ CollectionWriter::CollectionWriter(OperationContext* opCtx, const UUID& uuid)
CollectionWriter::CollectionWriter(OperationContext* opCtx, const NamespaceString& nss)
: _collection(&_storedCollection),
- _opCtx(opCtx),
_managed(true),
_sharedImpl(std::make_shared<SharedImpl>(this)) {
_storedCollection = CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss);
@@ -502,7 +477,6 @@ CollectionWriter::CollectionWriter(OperationContext* opCtx, const NamespaceStrin
CollectionWriter::CollectionWriter(OperationContext* opCtx, AutoGetCollection& autoCollection)
: _collection(&autoCollection.getCollection()),
- _opCtx(opCtx),
_managed(true),
_sharedImpl(std::make_shared<SharedImpl>(this)) {
_sharedImpl->_writableCollectionInitializer = [&autoCollection, opCtx]() {
@@ -523,7 +497,7 @@ CollectionWriter::~CollectionWriter() {
}
}
-Collection* CollectionWriter::getWritableCollection() {
+Collection* CollectionWriter::getWritableCollection(OperationContext* opCtx) {
// Acquire writable instance lazily if not already available
if (!_writableCollection) {
_writableCollection = _sharedImpl->_writableCollectionInitializer();
@@ -539,7 +513,7 @@ Collection* CollectionWriter::getWritableCollection() {
// and re-clone the Collection if a new write unit of work is opened. Holds the back
// pointer to the CollectionWriter explicitly so we can detect if the instance is
// already destroyed.
- _opCtx->recoveryUnit()->registerChange(
+ opCtx->recoveryUnit()->registerChange(
[shared = _sharedImpl](boost::optional<Timestamp>) {
if (shared->_parent)
shared->_parent->_writableCollection = nullptr;
@@ -599,4 +573,35 @@ AutoGetOplog::AutoGetOplog(OperationContext* opCtx, OplogAccessMode mode, Date_t
_oplog = &_oplogInfo->getCollection();
}
+
+AutoGetChangeCollection::AutoGetChangeCollection(OperationContext* opCtx,
+ AutoGetChangeCollection::AccessMode mode,
+ boost::optional<TenantId> tenantId,
+ Date_t deadline) {
+ auto nss = NamespaceString::makeChangeCollectionNSS(tenantId);
+ if (mode == AccessMode::kWrite) {
+ // The global lock must already be held.
+ invariant(opCtx->lockState()->isWriteLocked());
+
+ // TODO SERVER-66715 avoid taking 'AutoGetCollection' and remove
+ // 'AllowLockAcquisitionOnTimestampedUnitOfWork'.
+ AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState());
+ _coll.emplace(
+ opCtx, nss, LockMode::MODE_IX, AutoGetCollectionViewMode::kViewsForbidden, deadline);
+ }
+}
+
+const Collection* AutoGetChangeCollection::operator->() const {
+ return _coll ? _coll->getCollection().get() : nullptr;
+}
+
+const CollectionPtr& AutoGetChangeCollection::operator*() const {
+ return _coll->getCollection();
+}
+
+AutoGetChangeCollection::operator bool() const {
+ return _coll && _coll->getCollection().get();
+}
+
+
} // namespace mongo
diff --git a/src/mongo/db/catalog_raii.h b/src/mongo/db/catalog_raii.h
index 3b66a3a7294..2c48422f8fb 100644
--- a/src/mongo/db/catalog_raii.h
+++ b/src/mongo/db/catalog_raii.h
@@ -56,16 +56,10 @@ class AutoGetDb {
AutoGetDb& operator=(const AutoGetDb&) = delete;
public:
- /**
- * Database locks are also acquired for any 'secondaryDbNames' database names provided. Only
- * MODE_IS is supported when 'secondaryDbNames' are provided. It is safe to repeat 'dbName' in
- * 'secondaryDbNames'.
- */
AutoGetDb(OperationContext* opCtx,
StringData dbName,
LockMode mode,
- Date_t deadline = Date_t::max(),
- const std::set<StringData>& secondaryDbNames = {});
+ Date_t deadline = Date_t::max());
AutoGetDb(AutoGetDb&&) = default;
@@ -386,7 +380,7 @@ public:
// Returns writable Collection, any previous Collection that has been returned may be
// invalidated.
- Collection* getWritableCollection();
+ Collection* getWritableCollection(OperationContext* opCtx);
private:
// If this class is instantiated with the constructors that take UUID or nss we need somewhere
@@ -397,7 +391,6 @@ private:
const CollectionPtr* _collection = nullptr;
CollectionPtr _storedCollection;
Collection* _writableCollection = nullptr;
- OperationContext* _opCtx = nullptr;
// Indicates if this instance is managing Collection pointers through commit and rollback.
bool _managed;
@@ -477,4 +470,31 @@ private:
const CollectionPtr* _oplog;
};
+/**
+ * A RAII-style class to acquire lock to a particular tenant's change collection.
+ *
+ * A change collection can be accessed in the following modes:
+ * kWrite - This mode assumes that the global IX lock is already held before writing to the change
+ * collection.
+ */
+class AutoGetChangeCollection {
+public:
+ enum class AccessMode { kWrite };
+
+ AutoGetChangeCollection(OperationContext* opCtx,
+ AccessMode mode,
+ boost::optional<TenantId> tenantId,
+ Date_t deadline = Date_t::max());
+
+ AutoGetChangeCollection(const AutoGetChangeCollection&) = delete;
+ AutoGetChangeCollection& operator=(const AutoGetChangeCollection&) = delete;
+
+ const Collection* operator->() const;
+ const CollectionPtr& operator*() const;
+ explicit operator bool() const;
+
+private:
+ boost::optional<AutoGetCollection> _coll;
+};
+
} // namespace mongo
diff --git a/src/mongo/db/catalog_raii_test.cpp b/src/mongo/db/catalog_raii_test.cpp
index 383a3dddc36..2b86133de66 100644
--- a/src/mongo/db/catalog_raii_test.cpp
+++ b/src/mongo/db/catalog_raii_test.cpp
@@ -27,12 +27,8 @@
* it in the license file.
*/
+#include <boost/optional/optional_io.hpp>
-#include "mongo/platform/basic.h"
-
-#include <string>
-
-#include "boost/optional/optional_io.hpp"
#include "mongo/db/catalog/database_holder_mock.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/client.h"
@@ -47,7 +43,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -66,6 +61,7 @@ public:
const NamespaceString nss = NamespaceString("test", "coll");
const NamespaceString kSecondaryNss1 = NamespaceString("test", "secondaryColl1");
const NamespaceString kSecondaryNss2 = NamespaceString("test", "secondaryColl2");
+ const NamespaceString kSecondaryNss3 = NamespaceString("test", "secondaryColl3");
const NamespaceString kSecondaryNssOtherDb1 = NamespaceString("test2", "secondaryColl1");
const NamespaceString kSecondaryNssOtherDb2 = NamespaceString("test2", "secondaryColl2");
const Milliseconds timeoutMs = Seconds(1);
@@ -125,44 +121,6 @@ TEST_F(CatalogRAIITestFixture, AutoGetDBDeadlineMin) {
Milliseconds(0));
}
-TEST_F(CatalogRAIITestFixture, AutoGetDBMultiDBDeadline) {
- // Take the kSecondaryNssOtherDb1 database MODE_X lock to create a conflict later.
- boost::optional<Lock::DBLock> dbLockXLock;
- dbLockXLock.emplace(client1.second.get(), kSecondaryNssOtherDb1.db(), MODE_X);
- ASSERT(client1.second->lockState()->isDbLockedForMode(kSecondaryNssOtherDb1.db(), MODE_X));
-
- // Trying to acquire the kSecondaryNssOtherDb1 database MODE_IS lock should time out.
- std::set<StringData> secondaryDbNamesConflicting{kSecondaryNss1.db(),
- kSecondaryNss2.db(),
- kSecondaryNssOtherDb1.db(),
- kSecondaryNssOtherDb2.db()};
- failsWithLockTimeout(
- [&] {
- AutoGetDb autoGetDb(client2.second.get(),
- nss.db(),
- MODE_IS,
- Date_t::now() + timeoutMs,
- secondaryDbNamesConflicting);
- },
- timeoutMs);
-
- {
- // Acquiring multiple database locks without the kSecondaryNssOtherDb1 database should work.
- std::set<StringData> secondaryDbNamesNoConflict{kSecondaryNss1.db()};
- AutoGetDb autoGetDbNoConflict(client2.second.get(),
- kSecondaryNss1.db(),
- MODE_IS,
- Date_t::max(),
- secondaryDbNamesNoConflict);
- }
-
- // Lastly, with the MODE_X lock on kSecondaryNssOtherDb1.db() released, the original multi
- // database lock request should work.
- dbLockXLock.reset();
- AutoGetDb autoGetDb(
- client2.second.get(), nss.db(), MODE_IS, Date_t::max(), secondaryDbNamesConflicting);
-}
-
TEST_F(CatalogRAIITestFixture, AutoGetCollectionCollLockDeadline) {
Lock::DBLock dbLock1(client1.second.get(), nss.db(), MODE_IX);
ASSERT(client1.second->lockState()->isDbLockedForMode(nss.db(), MODE_IX));
@@ -337,50 +295,11 @@ TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNamespacesMODEIX) {
ASSERT(!opCtx1->lockState()->isLocked()); // Global lock check
}
-TEST_F(CatalogRAIITestFixture, AutoGetCollectionSecondaryNamespacesMultiDb) {
- auto opCtx1 = client1.second.get();
-
- std::vector<NamespaceStringOrUUID> secondaryNamespaces{
- NamespaceStringOrUUID(kSecondaryNss1),
- NamespaceStringOrUUID(kSecondaryNss2),
- NamespaceStringOrUUID(kSecondaryNssOtherDb1),
- NamespaceStringOrUUID(kSecondaryNssOtherDb2)};
- boost::optional<AutoGetCollection> autoGetColl;
- autoGetColl.emplace(opCtx1,
- nss,
- MODE_IS,
- AutoGetCollectionViewMode::kViewsForbidden,
- Date_t::max(),
- secondaryNamespaces);
-
- ASSERT(opCtx1->lockState()->isRSTLLocked());
- ASSERT(opCtx1->lockState()->isReadLocked()); // Global lock check
- ASSERT(opCtx1->lockState()->isDbLockedForMode(nss.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss1.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss2.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb1.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb2.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isCollectionLockedForMode(nss, MODE_IS));
- ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNss1, MODE_IS));
- ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNss2, MODE_IS));
- ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNssOtherDb1, MODE_IS));
- ASSERT(opCtx1->lockState()->isCollectionLockedForMode(kSecondaryNssOtherDb2, MODE_IS));
-
- ASSERT(!opCtx1->lockState()->isRSTLExclusive());
- ASSERT(!opCtx1->lockState()->isGlobalLockedRecursively());
- ASSERT(!opCtx1->lockState()->isWriteLocked());
-
- // All the locks should release.
- autoGetColl.reset();
- ASSERT(!opCtx1->lockState()->isLocked()); // Global lock check.
-}
-
TEST_F(CatalogRAIITestFixture, AutoGetDbSecondaryNamespacesSingleDb) {
auto opCtx1 = client1.second.get();
- std::set<StringData> secondaryDbNames{kSecondaryNss1.db(), kSecondaryNss2.db()};
boost::optional<AutoGetDb> autoGetDb;
- autoGetDb.emplace(opCtx1, nss.db(), MODE_IS, Date_t::max(), secondaryDbNames);
+ autoGetDb.emplace(opCtx1, nss.db(), MODE_IS, Date_t::max());
ASSERT(opCtx1->lockState()->isRSTLLocked());
ASSERT(opCtx1->lockState()->isReadLocked()); // Global lock check
@@ -399,33 +318,6 @@ TEST_F(CatalogRAIITestFixture, AutoGetDbSecondaryNamespacesSingleDb) {
ASSERT(!opCtx1->lockState()->isLocked()); // Global lock check.
}
-TEST_F(CatalogRAIITestFixture, AutoGetDbSecondaryNamespacesMultiDb) {
- auto opCtx1 = client1.second.get();
-
- std::set<StringData> secondaryDbNames{kSecondaryNss1.db(),
- kSecondaryNss2.db(),
- kSecondaryNssOtherDb1.db(),
- kSecondaryNssOtherDb2.db()};
- boost::optional<AutoGetDb> autoGetDb;
- autoGetDb.emplace(opCtx1, nss.db(), MODE_IS, Date_t::max(), secondaryDbNames);
-
- ASSERT(opCtx1->lockState()->isReadLocked()); // Global lock check
- ASSERT(opCtx1->lockState()->isRSTLLocked());
- ASSERT(opCtx1->lockState()->isDbLockedForMode(nss.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss1.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNss2.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb1.db(), MODE_IS));
- ASSERT(opCtx1->lockState()->isDbLockedForMode(kSecondaryNssOtherDb2.db(), MODE_IS));
-
- ASSERT(!opCtx1->lockState()->isRSTLExclusive());
- ASSERT(!opCtx1->lockState()->isGlobalLockedRecursively());
- ASSERT(!opCtx1->lockState()->isWriteLocked());
-
- // All the locks should release.
- autoGetDb.reset();
- ASSERT(!opCtx1->lockState()->isLocked()); // Global lock check.
-}
-
TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNssCollLockDeadline) {
// Take a MODE_X collection lock on kSecondaryNss1.
boost::optional<AutoGetCollection> autoGetCollWithXLock;
@@ -437,7 +329,7 @@ TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNssCollLockDeadline) {
const std::vector<NamespaceStringOrUUID> secondaryNamespacesConflict{
NamespaceStringOrUUID(kSecondaryNss1),
NamespaceStringOrUUID(kSecondaryNss2),
- NamespaceStringOrUUID(kSecondaryNssOtherDb1)};
+ NamespaceStringOrUUID(kSecondaryNss3)};
failsWithLockTimeout(
[&] {
AutoGetCollection coll(client2.second.get(),
@@ -452,7 +344,7 @@ TEST_F(CatalogRAIITestFixture, AutoGetCollectionMultiNssCollLockDeadline) {
{
// Sanity check that there's no conflict without kSecondaryNss1 that's MODE_X locked.
const std::vector<NamespaceStringOrUUID> secondaryNamespacesNoConflict{
- NamespaceStringOrUUID(kSecondaryNss2), NamespaceStringOrUUID(kSecondaryNssOtherDb1)};
+ NamespaceStringOrUUID(kSecondaryNss2), NamespaceStringOrUUID(kSecondaryNss2)};
AutoGetCollection collNoConflict(client2.second.get(),
nss,
MODE_IS,
diff --git a/src/mongo/db/change_stream_change_collection_manager.cpp b/src/mongo/db/change_stream_change_collection_manager.cpp
index 3311c7be092..d76d197c505 100644
--- a/src/mongo/db/change_stream_change_collection_manager.cpp
+++ b/src/mongo/db/change_stream_change_collection_manager.cpp
@@ -48,11 +48,101 @@ namespace {
const auto getChangeCollectionManager =
ServiceContext::declareDecoration<boost::optional<ChangeStreamChangeCollectionManager>>();
-// TODO: SERVER-65950 create or update the change collection for a particular tenant.
-NamespaceString getTenantChangeCollectionNamespace(boost::optional<TenantId> tenantId) {
- return NamespaceString{NamespaceString::kConfigDb, NamespaceString::kChangeCollectionName};
+/**
+ * Creates a Document object from the supplied oplog entry, performs necessary modifications to it
+ * and then returns it as a BSON object.
+ */
+BSONObj createChangeCollectionEntryFromOplog(const BSONObj& oplogEntry) {
+ Document oplogDoc(oplogEntry);
+ MutableDocument changeCollDoc(oplogDoc);
+ changeCollDoc["_id"] = Value(oplogDoc["ts"]);
+
+ auto readyChangeCollDoc = changeCollDoc.freeze();
+ return readyChangeCollDoc.toBson();
}
+/**
+ * Helper to write insert statements to respective change collections based on tenant ids.
+ */
+class ChangeCollectionsWriter {
+public:
+ /**
+ * Adds the insert statement for the provided tenant that will be written to the change
+ * collection when the 'write()' method is called.
+ */
+ void add(const TenantId& tenantId, InsertStatement insertStatement) {
+ if (_shouldAddEntry(insertStatement)) {
+ _tenantStatementsMap[tenantId].push_back(std::move(insertStatement));
+ }
+ }
+
+ /**
+ * Writes the batch of insert statements for each change collection. Bails out further writes if
+ * a failure is encountered in writing to a any change collection.
+ */
+ Status write(OperationContext* opCtx, OpDebug* opDebug) {
+ for (auto&& [tenantId, insertStatements] : _tenantStatementsMap) {
+ AutoGetChangeCollection tenantChangeCollection(
+ opCtx, AutoGetChangeCollection::AccessMode::kWrite, boost::none /* tenantId */);
+
+ // The change collection does not exist for a particular tenant because either the
+ // change collection is not enabled or is in the process of enablement. Ignore this
+ // insert for now.
+ // TODO: SERVER-65950 move this check before inserting to the map
+ // 'tenantToInsertStatements'.
+ if (!tenantChangeCollection) {
+ continue;
+ }
+
+ // Writes to the change collection should not be replicated.
+ repl::UnreplicatedWritesBlock unReplBlock(opCtx);
+
+ Status status = tenantChangeCollection->insertDocuments(opCtx,
+ insertStatements.begin(),
+ insertStatements.end(),
+ opDebug,
+ false /* fromMigrate */);
+ if (!status.isOK()) {
+ return Status(status.code(),
+ str::stream()
+ << "Write to change collection: " << tenantChangeCollection->ns()
+ << "failed, reason: " << status.reason());
+ }
+ }
+
+ return Status::OK();
+ }
+
+private:
+ bool _shouldAddEntry(const InsertStatement& insertStatement) {
+ auto& oplogDoc = insertStatement.doc;
+
+ // TODO SERVER-65950 retreive tenant from the oplog.
+ // TODO SERVER-67170 avoid inspecting the oplog BSON object.
+
+ if (auto nssFieldElem = oplogDoc[repl::OplogEntry::kNssFieldName];
+ nssFieldElem && nssFieldElem.String() == "config.$cmd"_sd) {
+ if (auto objectFieldElem = oplogDoc[repl::OplogEntry::kObjectFieldName]) {
+ // The oplog entry might be a drop command on the change collection. Check if the
+ // drop request is for the already deleted change collection, as such do not attempt
+ // to write to the change collection if that is the case. This scenario is possible
+ // because 'WriteUnitOfWork' will stage the changes and while committing the staged
+ // 'CollectionImpl::insertDocuments' change the collection object might have already
+ // been deleted.
+ if (auto dropFieldElem = objectFieldElem["drop"_sd]) {
+ return dropFieldElem.String() != NamespaceString::kChangeCollectionName;
+ }
+ }
+ }
+
+ return true;
+ }
+
+ // Maps inserts statements for each tenant.
+ stdx::unordered_map<TenantId, std::vector<InsertStatement>, TenantId::Hasher>
+ _tenantStatementsMap;
+};
+
} // namespace
ChangeStreamChangeCollectionManager& ChangeStreamChangeCollectionManager::get(
@@ -69,12 +159,19 @@ void ChangeStreamChangeCollectionManager::create(ServiceContext* service) {
getChangeCollectionManager(service).emplace(service);
}
-bool ChangeStreamChangeCollectionManager::isChangeCollectionEnabled() {
+bool ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive() {
return feature_flags::gFeatureFlagServerlessChangeStreams.isEnabled(
serverGlobalParams.featureCompatibility) &&
gMultitenancySupport;
}
+bool ChangeStreamChangeCollectionManager::hasChangeCollection(
+ OperationContext* opCtx, boost::optional<TenantId> tenantId) const {
+ auto catalog = CollectionCatalog::get(opCtx);
+ return static_cast<bool>(catalog->lookupCollectionByNamespace(
+ opCtx, NamespaceString::makeChangeCollectionNSS(tenantId)));
+}
+
Status ChangeStreamChangeCollectionManager::createChangeCollection(
OperationContext* opCtx, boost::optional<TenantId> tenantId) {
// Make the change collection clustered by '_id'. The '_id' field will have the same value as
@@ -83,8 +180,10 @@ Status ChangeStreamChangeCollectionManager::createChangeCollection(
changeCollectionOptions.clusteredIndex.emplace(clustered_util::makeDefaultClusteredIdIndex());
changeCollectionOptions.capped = true;
- auto status = createCollection(
- opCtx, getTenantChangeCollectionNamespace(tenantId), changeCollectionOptions, BSONObj());
+ auto status = createCollection(opCtx,
+ NamespaceString::makeChangeCollectionNSS(tenantId),
+ changeCollectionOptions,
+ BSONObj());
if (status.code() == ErrorCodes::NamespaceExists) {
return Status::OK();
}
@@ -96,7 +195,7 @@ Status ChangeStreamChangeCollectionManager::dropChangeCollection(
OperationContext* opCtx, boost::optional<TenantId> tenantId) {
DropReply dropReply;
return dropCollection(opCtx,
- getTenantChangeCollectionNamespace(tenantId),
+ NamespaceString::makeChangeCollectionNSS(tenantId),
&dropReply,
DropCollectionSystemCollectionMode::kAllowSystemCollectionDrops);
}
@@ -111,58 +210,57 @@ void ChangeStreamChangeCollectionManager::insertDocumentsToChangeCollection(
// commiting the unit of work.
invariant(opCtx->lockState()->inAWriteUnitOfWork());
- // Maps statements that should be inserted to the change collection for each tenant.
- stdx::unordered_map<TenantId, std::vector<InsertStatement>, TenantId::Hasher>
- tenantToInsertStatements;
+ ChangeCollectionsWriter changeCollectionsWriter;
for (size_t idx = 0; idx < oplogRecords.size(); idx++) {
auto& record = oplogRecords[idx];
auto& ts = oplogTimestamps[idx];
- // Create a mutable document and update the '_id' field with the oplog entry timestamp. The
- // '_id' field will be use to order the change collection documents.
- Document oplogDoc(record.data.toBson());
- MutableDocument changeCollDoc(oplogDoc);
- changeCollDoc["_id"] = Value(ts);
-
// Create an insert statement that should be written at the timestamp 'ts' for a particular
// tenant.
- auto readyChangeCollDoc = changeCollDoc.freeze();
- tenantToInsertStatements[TenantId::kSystemTenantId].push_back(
- InsertStatement{readyChangeCollDoc.toBson(), ts, repl::OpTime::kUninitializedTerm});
+ auto changeCollDoc = createChangeCollectionEntryFromOplog(record.data.toBson());
+
+ // TODO SERVER-65950 replace 'TenantId::kSystemTenantId' with the tenant id.
+ changeCollectionsWriter.add(
+ TenantId::kSystemTenantId,
+ InsertStatement{std::move(changeCollDoc), ts, repl::OpTime::kUninitializedTerm});
}
- for (auto&& [tenantId, insertStatements] : tenantToInsertStatements) {
- // TODO SERVER-66715 avoid taking 'AutoGetCollection' and remove
- // 'AllowLockAcquisitionOnTimestampedUnitOfWork'.
- AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState());
- AutoGetCollection tenantChangeCollection(
- opCtx, getTenantChangeCollectionNamespace(tenantId), LockMode::MODE_IX);
-
- // The change collection does not exist for a particular tenant because either the change
- // collection is not enabled or is in the process of enablement. Ignore this insert for now.
- // TODO: SERVER-65950 move this check before inserting to the map
- // 'tenantToInsertStatements'.
- if (!tenantChangeCollection) {
- continue;
- }
+ // Write documents to change collections and throw exception in case of any failure.
+ Status status = changeCollectionsWriter.write(opCtx, nullptr /* opDebug */);
+ if (!status.isOK()) {
+ LOGV2_FATAL(
+ 6612300, "Failed to write to change collection", "reason"_attr = status.reason());
+ }
+}
- // Writes to the change collection should not be replicated.
- repl::UnreplicatedWritesBlock unReplBlock(opCtx);
-
- Status status = tenantChangeCollection->insertDocuments(opCtx,
- insertStatements.begin(),
- insertStatements.end(),
- nullptr /* opDebug */,
- false /* fromMigrate */);
- if (!status.isOK()) {
- LOGV2_FATAL(6612300,
- "Write to change collection: {ns} failed: {error}",
- "Write to change collection failed",
- "ns"_attr = tenantChangeCollection->ns().toString(),
- "error"_attr = status.toString());
- }
+Status ChangeStreamChangeCollectionManager::insertDocumentsToChangeCollection(
+ OperationContext* opCtx,
+ std::vector<InsertStatement>::const_iterator beginOplogEntries,
+ std::vector<InsertStatement>::const_iterator endOplogEntries,
+ OpDebug* opDebug) {
+ ChangeCollectionsWriter changeCollectionsWriter;
+
+ // Transform oplog entries to change collections entries and group them by tenant id.
+ for (auto oplogEntryIter = beginOplogEntries; oplogEntryIter != endOplogEntries;
+ oplogEntryIter++) {
+ auto& oplogDoc = oplogEntryIter->doc;
+
+ // The initial seed oplog insertion is not timestamped as such the 'oplogSlot' is not
+ // initialized. The corresponding change collection insertion will not be timestamped.
+ auto oplogSlot = oplogEntryIter->oplogSlot;
+
+ auto changeCollDoc = createChangeCollectionEntryFromOplog(oplogDoc);
+
+ // TODO SERVER-65950 replace 'TenantId::kSystemTenantId' with the tenant id.
+ changeCollectionsWriter.add(TenantId::kSystemTenantId,
+ InsertStatement{std::move(changeCollDoc),
+ oplogSlot.getTimestamp(),
+ oplogSlot.getTerm()});
}
+
+ // Write documents to change collections.
+ return changeCollectionsWriter.write(opCtx, opDebug);
}
} // namespace mongo
diff --git a/src/mongo/db/change_stream_change_collection_manager.h b/src/mongo/db/change_stream_change_collection_manager.h
index 8ecc48b9a5c..f9fe6d6f414 100644
--- a/src/mongo/db/change_stream_change_collection_manager.h
+++ b/src/mongo/db/change_stream_change_collection_manager.h
@@ -31,6 +31,7 @@
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/service_context.h"
namespace mongo {
@@ -63,7 +64,12 @@ public:
* Returns true if change collections are enabled for recording oplog entries, false
* otherwise.
*/
- static bool isChangeCollectionEnabled();
+ static bool isChangeCollectionsModeActive();
+
+ /**
+ * Returns true if the change collection is present for the specified tenant, false otherwise.
+ */
+ bool hasChangeCollection(OperationContext* opCtx, boost::optional<TenantId> tenantId) const;
/**
* Creates a change collection for the specified tenant, if it doesn't exist. Returns Status::OK
@@ -81,9 +87,9 @@ public:
Status dropChangeCollection(OperationContext* opCtx, boost::optional<TenantId> tenantId);
/**
- * Inserts documents to change collections. The parameter 'oplogRecords'
- * is a vector of oplog records and the parameter 'oplogTimestamps' is a vector for respective
- * timestamp for each oplog record.
+ * Inserts documents to change collections. The parameter 'oplogRecords' is a vector of oplog
+ * records and the parameter 'oplogTimestamps' is a vector for respective timestamp for each
+ * oplog record.
*
* The method fetches the tenant-id from the oplog entry, performs necessary modification to the
* document and then write to the tenant's change collection at the specified oplog timestamp.
@@ -96,6 +102,20 @@ public:
void insertDocumentsToChangeCollection(OperationContext* opCtx,
const std::vector<Record>& oplogRecords,
const std::vector<Timestamp>& oplogTimestamps);
+
+
+ /**
+ * Performs a range inserts on respective change collections using the oplog entries as
+ * specified by 'beginOplogEntries' and 'endOplogEntries'.
+ *
+ * Bails out if a failure is encountered in inserting documents to a particular change
+ * collection.
+ */
+ Status insertDocumentsToChangeCollection(
+ OperationContext* opCtx,
+ std::vector<InsertStatement>::const_iterator beginOplogEntries,
+ std::vector<InsertStatement>::const_iterator endOplogEntries,
+ OpDebug* opDebug);
};
} // namespace mongo
diff --git a/src/mongo/db/change_streams_cluster_parameter.cpp b/src/mongo/db/change_streams_cluster_parameter.cpp
new file mode 100644
index 00000000000..c0ac9577f2e
--- /dev/null
+++ b/src/mongo/db/change_streams_cluster_parameter.cpp
@@ -0,0 +1,62 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
+
+#include "mongo/db/change_streams_cluster_parameter.h"
+
+#include "mongo/base/status.h"
+#include "mongo/db/change_streams_cluster_parameter_gen.h"
+#include "mongo/logv2/log.h"
+namespace mongo {
+
+Status validateChangeStreamsClusterParameter(
+ const ChangeStreamsClusterParameterStorage& clusterParameter) {
+ LOGV2_DEBUG(6594801,
+ 1,
+ "Validating change streams cluster parameter",
+ "enabled"_attr = clusterParameter.getEnabled(),
+ "expireAfterSeconds"_attr = clusterParameter.getExpireAfterSeconds());
+ if (clusterParameter.getEnabled()) {
+ if (clusterParameter.getExpireAfterSeconds() <= 0) {
+ return Status(ErrorCodes::BadValue,
+ "Expected a positive integer for 'expireAfterSeconds' field if 'enabled' "
+ "field is true");
+ }
+ } else {
+ if (clusterParameter.getExpireAfterSeconds() != 0) {
+ return Status(
+ ErrorCodes::BadValue,
+ "Expected a zero value for 'expireAfterSeconds' if 'enabled' field is false");
+ }
+ }
+ return Status::OK();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/initialize_snmp.cpp b/src/mongo/db/change_streams_cluster_parameter.h
index c623b8c91f7..ebeedaa0e8b 100644
--- a/src/mongo/db/initialize_snmp.cpp
+++ b/src/mongo/db/change_streams_cluster_parameter.h
@@ -1,5 +1,5 @@
/**
- * Copyright (C) 2018-present MongoDB, Inc.
+ * Copyright (C) 2022-present MongoDB, Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the Server Side Public License, version 1,
@@ -27,22 +27,16 @@
* it in the license file.
*/
-#include "mongo/db/initialize_snmp.h"
-#include "mongo/util/assert_util.h"
+#pragma once
+#include "mongo/base/status.h"
namespace mongo {
-namespace {
-bool initSet = false;
-std::function<void()> snmpInitializer = [] {};
-} // namespace
-} // namespace mongo
-void mongo::registerSNMPInitializer(std::function<void()> init) {
- invariant(!initSet);
- snmpInitializer = std::move(init);
- initSet = true;
-}
+class ChangeStreamsClusterParameterStorage;
-void mongo::initializeSNMP() {
- return snmpInitializer();
-}
+/**
+ * Validates 'changeStreams' cluster-wide parameter.
+ */
+Status validateChangeStreamsClusterParameter(
+ const ChangeStreamsClusterParameterStorage& clusterParameter);
+} // namespace mongo
diff --git a/src/mongo/db/change_streams_cluster_parameter.idl b/src/mongo/db/change_streams_cluster_parameter.idl
new file mode 100644
index 00000000000..74563d47752
--- /dev/null
+++ b/src/mongo/db/change_streams_cluster_parameter.idl
@@ -0,0 +1,64 @@
+# Copyright (C) 2022-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+global:
+ cpp_namespace: "mongo"
+ cpp_includes:
+ - "mongo/db/change_streams_cluster_parameter.h"
+
+imports:
+ - "mongo/idl/basic_types.idl"
+ - "mongo/idl/cluster_server_parameter.idl"
+
+structs:
+ ChangeStreamsClusterParameterStorage:
+ description: "A specification for the 'changeStreams' cluster-wide configuration parameter
+ type."
+ inline_chained_structs: true
+ chained_structs:
+ ClusterServerParameter: clusterServerParameter
+ fields:
+ enabled:
+ description: "Enable or disable change streams."
+ type: bool
+ default: false
+ expireAfterSeconds:
+ description: "The number of seconds to retain the change events. This value will be a
+ non-zero positive value if the change stream is enabled and a zero value if the change
+ stream is disabled."
+ type: safeInt64
+ default: 0
+
+server_parameters:
+ changeStreams:
+ description: "The cluster-wide configuration parameter for the change stream in the serverless."
+ set_at: cluster
+ cpp_vartype: ChangeStreamsClusterParameterStorage
+ cpp_varname: gChangeStreamsClusterParameter
+ validator:
+ callback: validateChangeStreamsClusterParameter
diff --git a/src/mongo/db/change_streams_cluster_parameter_test.cpp b/src/mongo/db/change_streams_cluster_parameter_test.cpp
new file mode 100644
index 00000000000..80ef8d71da7
--- /dev/null
+++ b/src/mongo/db/change_streams_cluster_parameter_test.cpp
@@ -0,0 +1,78 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/change_streams_cluster_parameter.h"
+#include "mongo/db/change_streams_cluster_parameter_gen.h"
+#include "mongo/unittest/unittest.h"
+
+namespace mongo {
+namespace {
+
+
+TEST(ValidateChangeStreamsClusterParameter, EnabledWithSuccess) {
+ ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+ changeStreamClusterParam.setEnabled(true);
+ changeStreamClusterParam.setExpireAfterSeconds(3600);
+ auto result = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+ ASSERT_TRUE(result.isOK());
+}
+
+TEST(ValidateChangeStreamsClusterParameter, EnabledWithNonPositiveExpireAfterSeconds) {
+ ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+ changeStreamClusterParam.setEnabled(true);
+ changeStreamClusterParam.setExpireAfterSeconds(0);
+ auto resultZero = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+ ASSERT_EQ(resultZero.code(), ErrorCodes::BadValue);
+
+ changeStreamClusterParam.setExpireAfterSeconds(-1);
+ auto resultNegative = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+ ASSERT_EQ(resultNegative.code(), ErrorCodes::BadValue);
+}
+
+TEST(ValidateChangeStreamsClusterParameter, DisabledWithSuccess) {
+ ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+ changeStreamClusterParam.setEnabled(false);
+ auto resultDefault = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+ ASSERT_TRUE(resultDefault.isOK());
+
+ changeStreamClusterParam.setExpireAfterSeconds(0);
+ auto resultZero = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+ ASSERT_TRUE(resultDefault.isOK());
+}
+
+TEST(ValidateChangeStreamsClusterParameter, DisabledWithNonZeroExpireAfterSeconds) {
+ ChangeStreamsClusterParameterStorage changeStreamClusterParam;
+ changeStreamClusterParam.setEnabled(false);
+ changeStreamClusterParam.setExpireAfterSeconds(1);
+ auto result = validateChangeStreamsClusterParameter(changeStreamClusterParam);
+ ASSERT_EQ(result.code(), ErrorCodes::BadValue);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/cloner.cpp b/src/mongo/db/cloner.cpp
index bc5fc066e03..a6f394b481d 100644
--- a/src/mongo/db/cloner.cpp
+++ b/src/mongo/db/cloner.cpp
@@ -90,11 +90,11 @@ BSONObj Cloner::_getIdIndexSpec(const std::list<BSONObj>& indexSpecs) {
Cloner::Cloner() {}
-struct Cloner::Fun {
- Fun(OperationContext* opCtx, const std::string& dbName)
+struct Cloner::BatchHandler {
+ BatchHandler(OperationContext* opCtx, const std::string& dbName)
: lastLog(0), opCtx(opCtx), _dbName(dbName) {}
- void operator()(DBClientCursorBatchIterator& i) {
+ void operator()(DBClientCursor& cursor) {
boost::optional<Lock::DBLock> dbLock;
dbLock.emplace(opCtx, _dbName, MODE_X);
uassert(ErrorCodes::NotWritablePrimary,
@@ -128,7 +128,7 @@ struct Cloner::Fun {
});
}
- while (i.moreInCurrentBatch()) {
+ while (cursor.moreInCurrentBatch()) {
if (numSeen % 128 == 127) {
time_t now = time(nullptr);
if (now - lastLog >= 60) {
@@ -164,7 +164,7 @@ struct Cloner::Fun {
collection);
}
- BSONObj tmp = i.nextSafe();
+ BSONObj tmp = cursor.nextSafe();
/* assure object is valid. note this will slow us down a little. */
// We allow cloning of collections containing decimal data even if decimal is disabled.
@@ -245,23 +245,24 @@ void Cloner::_copy(OperationContext* opCtx,
logAttrs(nss),
"conn_getServerAddress"_attr = conn->getServerAddress());
- Fun f(opCtx, toDBName);
- f.numSeen = 0;
- f.nss = nss;
- f.from_options = from_opts;
- f.from_id_index = from_id_index;
- f.saveLast = time(nullptr);
-
- int options = QueryOption_NoCursorTimeout | QueryOption_Exhaust;
-
- conn->query_DEPRECATED(std::function<void(DBClientCursorBatchIterator&)>(f),
- nss,
- BSONObj{} /* filter */,
- Query() /* querySettings */,
- nullptr,
- options,
- 0 /* batchSize */,
- repl::ReadConcernArgs::kLocal);
+ BatchHandler batchHandler{opCtx, toDBName};
+ batchHandler.numSeen = 0;
+ batchHandler.nss = nss;
+ batchHandler.from_options = from_opts;
+ batchHandler.from_id_index = from_id_index;
+ batchHandler.saveLast = time(nullptr);
+
+ FindCommandRequest findCmd{nss};
+ findCmd.setNoCursorTimeout(true);
+ findCmd.setReadConcern(repl::ReadConcernArgs::kLocal);
+ auto cursor = conn->find(std::move(findCmd),
+ ReadPreferenceSetting{ReadPreference::SecondaryPreferred},
+ ExhaustMode::kOn);
+
+ // Process the results of the cursor in batches.
+ while (cursor->more()) {
+ batchHandler(*cursor);
+ }
}
void Cloner::_copyIndexes(OperationContext* opCtx,
diff --git a/src/mongo/db/cloner.h b/src/mongo/db/cloner.h
index 8d1d512fe1f..5cbb4d76337 100644
--- a/src/mongo/db/cloner.h
+++ b/src/mongo/db/cloner.h
@@ -104,7 +104,7 @@ private:
const std::list<BSONObj>& from_indexes,
DBClientBase* conn);
- struct Fun;
+ struct BatchHandler;
};
} // namespace mongo
diff --git a/src/mongo/db/commands.cpp b/src/mongo/db/commands.cpp
index 7826963b3c2..6ebe905b732 100644
--- a/src/mongo/db/commands.cpp
+++ b/src/mongo/db/commands.cpp
@@ -108,38 +108,11 @@ bool checkAuthorizationImplPreParse(OperationContext* opCtx,
uassert(ErrorCodes::Unauthorized,
str::stream() << "command " << command->getName() << " requires authentication",
!command->requiresAuth() || authzSession->isAuthenticated() ||
- request.securityToken.nFields());
+ (request.validatedTenancyScope &&
+ request.validatedTenancyScope->hasAuthenticatedUser()));
return false;
}
-// TODO SERVER-65101: Replace this with a property on each command.
-// The command names that are allowed in a multi-document transaction.
-const StringMap<int> txnCmdAllowlist = {{"abortTransaction", 1},
- {"aggregate", 1},
- {"clusterAbortTransaction", 1},
- {"clusterAggregate", 1},
- {"clusterCommitTransaction", 1},
- {"clusterDelete", 1},
- {"clusterFind", 1},
- {"clusterGetMore", 1},
- {"clusterInsert", 1},
- {"clusterUpdate", 1},
- {"commitTransaction", 1},
- {"coordinateCommitTransaction", 1},
- {"create", 1},
- {"createIndexes", 1},
- {"delete", 1},
- {"distinct", 1},
- {"find", 1},
- {"findandmodify", 1},
- {"findAndModify", 1},
- {"getMore", 1},
- {"insert", 1},
- {"killCursors", 1},
- {"prepareTransaction", 1},
- {"testInternalTransactions", 1},
- {"update", 1}};
-
auto getCommandInvocationHooks =
ServiceContext::declareDecoration<std::unique_ptr<CommandInvocationHooks>>();
@@ -577,11 +550,15 @@ void CommandHelpers::canUseTransactions(const NamespaceString& nss,
"http://dochub.mongodb.org/core/transaction-count for a recommended alternative.",
cmdName != "count"_sd);
- auto inTxnAllowlist = txnCmdAllowlist.find(cmdName) != txnCmdAllowlist.cend();
+ auto command = findCommand(cmdName);
+ uassert(ErrorCodes::CommandNotFound,
+ str::stream() << "Encountered unknown command during check if can run in transactions: "
+ << cmdName,
+ command);
uassert(ErrorCodes::OperationNotSupportedInTransaction,
str::stream() << "Cannot run '" << cmdName << "' in a multi-document transaction.",
- inTxnAllowlist);
+ command->allowedInTransactions());
const auto dbName = nss.db();
diff --git a/src/mongo/db/commands.h b/src/mongo/db/commands.h
index 48341c29335..d54d86e7c50 100644
--- a/src/mongo/db/commands.h
+++ b/src/mongo/db/commands.h
@@ -603,6 +603,35 @@ public:
return nullptr;
}
+ /**
+ * Returns true if this command supports apply once semantic when retried.
+ */
+ virtual bool supportsRetryableWrite() const {
+ return false;
+ }
+
+ /**
+ * Returns true if sessions should be checked out when lsid and txnNumber is present in the
+ * request.
+ */
+ virtual bool shouldCheckoutSession() const {
+ return true;
+ }
+
+ /**
+ * Returns true if this is a command related to managing the lifecycle of a transaction.
+ */
+ virtual bool isTransactionCommand() const {
+ return false;
+ }
+
+ /**
+ * Returns true if this command can be run in a transaction.
+ */
+ virtual bool allowedInTransactions() const {
+ return false;
+ }
+
private:
// The full name of the command
const std::string _name;
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index cf6f79671bc..a9a5e95533e 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -326,6 +326,7 @@ env.Library(
source=[
"count_cmd.cpp",
"cqf/cqf_aggregate.cpp",
+ "cqf/cqf_command_utils.cpp",
"create_command.cpp",
"create_indexes.cpp",
"current_op.cpp",
@@ -369,6 +370,7 @@ env.Library(
'$BUILD_DIR/mongo/db/catalog/database_holder',
'$BUILD_DIR/mongo/db/catalog/index_key_validate',
'$BUILD_DIR/mongo/db/catalog/multi_index_block',
+ '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
'$BUILD_DIR/mongo/db/command_can_run_here',
'$BUILD_DIR/mongo/db/commands',
'$BUILD_DIR/mongo/db/concurrency/exception_util',
diff --git a/src/mongo/db/commands/async_command_execution_test.cpp b/src/mongo/db/commands/async_command_execution_test.cpp
index 7e798b2028b..01bec502118 100644
--- a/src/mongo/db/commands/async_command_execution_test.cpp
+++ b/src/mongo/db/commands/async_command_execution_test.cpp
@@ -72,7 +72,7 @@ struct AsyncCommandExecutionTest::TestState {
// Setup the execution context
rec = std::make_shared<RequestExecutionContext>(opCtx.get(), mockMessage());
rec->setReplyBuilder(makeReplyBuilder(rpc::protocolForMessage(rec->getMessage())));
- rec->setRequest(rpc::opMsgRequestFromAnyProtocol(rec->getMessage()));
+ rec->setRequest(rpc::opMsgRequestFromAnyProtocol(rec->getMessage(), opCtx->getClient()));
rec->setCommand(CommandHelpers::findCommand(rec->getRequest().getCommandName()));
// Setup the invocation
diff --git a/src/mongo/db/commands/cqf/cqf_aggregate.cpp b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
index aabfc99c3a5..516a3f9ca2e 100644
--- a/src/mongo/db/commands/cqf/cqf_aggregate.cpp
+++ b/src/mongo/db/commands/cqf/cqf_aggregate.cpp
@@ -87,15 +87,10 @@ static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpe
while (indexIterator->more()) {
const IndexCatalogEntry& catalogEntry = *indexIterator->next();
- const bool isMultiKey = catalogEntry.isMultikey(opCtx, collection);
- const MultikeyPaths& multiKeyPaths = catalogEntry.getMultikeyPaths(opCtx, collection);
- uassert(6624251, "Multikey paths cannot be empty.", !multiKeyPaths.empty());
-
const IndexDescriptor& descriptor = *catalogEntry.descriptor();
if (descriptor.hidden() || descriptor.isSparse() ||
descriptor.getIndexType() != IndexType::INDEX_BTREE) {
- // Not supported for now.
- continue;
+ uasserted(ErrorCodes::InternalErrorNotSupported, "Unsupported index type");
}
if (indexHint) {
@@ -111,6 +106,10 @@ static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpe
}
}
+ const bool isMultiKey = catalogEntry.isMultikey(opCtx, collection);
+ const MultikeyPaths& multiKeyPaths = catalogEntry.getMultikeyPaths(opCtx, collection);
+ uassert(6624251, "Multikey paths cannot be empty.", !multiKeyPaths.empty());
+
// SBE version is base 0.
const int64_t version = static_cast<int>(descriptor.version()) - 1;
@@ -185,12 +184,16 @@ static opt::unordered_map<std::string, optimizer::IndexDefinition> buildIndexSpe
// TODO: simplify expression.
- PartialSchemaReqConversion conversion = convertExprToPartialSchemaReq(exprABT);
- if (!conversion._success || conversion._hasEmptyInterval) {
+ auto conversion = convertExprToPartialSchemaReq(exprABT, true /*isFilterContext*/);
+ if (!conversion || conversion->_hasEmptyInterval) {
// Unsatisfiable partial index filter?
continue;
}
- partialIndexReqMap = std::move(conversion._reqMap);
+ tassert(6624257,
+ "Should not be seeing a partial index filter where we need to over-approximate",
+ !conversion->_retainPredicate);
+
+ partialIndexReqMap = std::move(conversion->_reqMap);
}
// For now we assume distribution is Centralized.
@@ -380,6 +383,18 @@ std::unique_ptr<PlanExecutor, PlanExecutor::Deleter> getSBEExecutorViaCascadesOp
uasserted(6624256,
"For now we can apply hints only for queries involving a single collection");
}
+ // Unsupported command/collection options.
+ uassert(ErrorCodes::InternalErrorNotSupported,
+ "Collection-default collation is not supported",
+ !collection || collection->getCollectionOptions().collation.isEmpty());
+
+ uassert(ErrorCodes::InternalErrorNotSupported,
+ "Clustered collections are not supported",
+ !collection || !collection->isClustered());
+
+ uassert(ErrorCodes::InternalErrorNotSupported,
+ "Timeseries collections are not supported",
+ !collection || !collection->getTimeseriesOptions());
QueryHints queryHints = getHintsFromQueryKnobs();
diff --git a/src/mongo/db/commands/cqf/cqf_command_utils.cpp b/src/mongo/db/commands/cqf/cqf_command_utils.cpp
new file mode 100644
index 00000000000..2edf7a56772
--- /dev/null
+++ b/src/mongo/db/commands/cqf/cqf_command_utils.cpp
@@ -0,0 +1,696 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/db/commands/cqf/cqf_command_utils.h"
+
+#include "mongo/db/exec/add_fields_projection_executor.h"
+#include "mongo/db/exec/exclusion_projection_executor.h"
+#include "mongo/db/exec/inclusion_projection_executor.h"
+#include "mongo/db/exec/sbe/abt/abt_lower.h"
+#include "mongo/db/matcher/expression_always_boolean.h"
+#include "mongo/db/matcher/expression_array.h"
+#include "mongo/db/matcher/expression_expr.h"
+#include "mongo/db/matcher/expression_geo.h"
+#include "mongo/db/matcher/expression_internal_bucket_geo_within.h"
+#include "mongo/db/matcher/expression_internal_expr_comparison.h"
+#include "mongo/db/matcher/expression_leaf.h"
+#include "mongo/db/matcher/expression_text.h"
+#include "mongo/db/matcher/expression_text_noop.h"
+#include "mongo/db/matcher/expression_tree.h"
+#include "mongo/db/matcher/expression_type.h"
+#include "mongo/db/matcher/expression_visitor.h"
+#include "mongo/db/matcher/expression_where.h"
+#include "mongo/db/matcher/expression_where_noop.h"
+#include "mongo/db/matcher/match_expression_walker.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_all_elem_match_from_index.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_allowed_properties.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_cond.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_eq.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_fmod.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_match_array_index.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_max_items.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_max_length.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_max_properties.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_min_items.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_min_length.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_min_properties.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_object_match.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_root_doc_eq.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_unique_items.h"
+#include "mongo/db/matcher/schema/expression_internal_schema_xor.h"
+#include "mongo/db/pipeline/abt/abt_document_source_visitor.h"
+#include "mongo/db/pipeline/abt/agg_expression_visitor.h"
+#include "mongo/db/pipeline/abt/match_expression_visitor.h"
+#include "mongo/db/pipeline/abt/utils.h"
+#include "mongo/db/pipeline/document_source_bucket_auto.h"
+#include "mongo/db/pipeline/document_source_coll_stats.h"
+#include "mongo/db/pipeline/document_source_current_op.h"
+#include "mongo/db/pipeline/document_source_cursor.h"
+#include "mongo/db/pipeline/document_source_exchange.h"
+#include "mongo/db/pipeline/document_source_facet.h"
+#include "mongo/db/pipeline/document_source_geo_near.h"
+#include "mongo/db/pipeline/document_source_geo_near_cursor.h"
+#include "mongo/db/pipeline/document_source_graph_lookup.h"
+#include "mongo/db/pipeline/document_source_group.h"
+#include "mongo/db/pipeline/document_source_index_stats.h"
+#include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h"
+#include "mongo/db/pipeline/document_source_internal_shard_filter.h"
+#include "mongo/db/pipeline/document_source_internal_split_pipeline.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
+#include "mongo/db/pipeline/document_source_limit.h"
+#include "mongo/db/pipeline/document_source_list_cached_and_active_users.h"
+#include "mongo/db/pipeline/document_source_list_local_sessions.h"
+#include "mongo/db/pipeline/document_source_list_sessions.h"
+#include "mongo/db/pipeline/document_source_lookup.h"
+#include "mongo/db/pipeline/document_source_match.h"
+#include "mongo/db/pipeline/document_source_merge.h"
+#include "mongo/db/pipeline/document_source_operation_metrics.h"
+#include "mongo/db/pipeline/document_source_out.h"
+#include "mongo/db/pipeline/document_source_plan_cache_stats.h"
+#include "mongo/db/pipeline/document_source_queue.h"
+#include "mongo/db/pipeline/document_source_redact.h"
+#include "mongo/db/pipeline/document_source_replace_root.h"
+#include "mongo/db/pipeline/document_source_sample.h"
+#include "mongo/db/pipeline/document_source_sample_from_random_cursor.h"
+#include "mongo/db/pipeline/document_source_sequential_document_cache.h"
+#include "mongo/db/pipeline/document_source_single_document_transformation.h"
+#include "mongo/db/pipeline/document_source_skip.h"
+#include "mongo/db/pipeline/document_source_sort.h"
+#include "mongo/db/pipeline/document_source_tee_consumer.h"
+#include "mongo/db/pipeline/document_source_union_with.h"
+#include "mongo/db/pipeline/document_source_unwind.h"
+#include "mongo/db/pipeline/visitors/document_source_visitor.h"
+#include "mongo/db/pipeline/visitors/document_source_walker.h"
+#include "mongo/db/pipeline/visitors/transformer_interface_walker.h"
+#include "mongo/db/query/query_feature_flags_gen.h"
+#include "mongo/db/query/query_knobs_gen.h"
+#include "mongo/db/query/query_planner_params.h"
+#include "mongo/s/query/document_source_merge_cursors.h"
+
+namespace mongo {
+
+using namespace optimizer;
+
+namespace {
+
+/**
+ * Visitor that is responsible for indicating whether a MatchExpression is eligible for Bonsai by
+ * setting the '_eligible' member variable. Expressions which are "test-only" and not officially
+ * supported should set _eligible to false.
+ */
+class ABTMatchExpressionVisitor : public MatchExpressionConstVisitor {
+public:
+ ABTMatchExpressionVisitor(bool& eligible) : _eligible(eligible) {}
+
+ void visit(const LTEMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const LTMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const ElemMatchObjectMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const ElemMatchValueMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const EqualityMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const GTEMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const GTMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const InMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+
+ // $in over a regex predicate is not supported.
+ if (!expr->getRegexes().empty()) {
+ _eligible = false;
+ }
+ }
+ void visit(const ExistsMatchExpression* expr) override {
+ assertSupportedPathExpression(expr);
+ }
+ void visit(const AndMatchExpression* expr) override {}
+ void visit(const OrMatchExpression* expr) override {}
+
+ void visit(const GeoMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const GeoNearMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalBucketGeoWithinMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalExprEqMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalExprGTMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalExprGTEMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalExprLTMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalExprLTEMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaAllElemMatchFromIndexMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaAllowedPropertiesMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaBinDataEncryptedTypeExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaBinDataFLE2EncryptedTypeExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaBinDataSubTypeExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaCondMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaEqMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaFmodMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMatchArrayIndexMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMaxItemsMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMaxLengthMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMaxPropertiesMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMinItemsMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMinLengthMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaMinPropertiesMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaObjectMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaRootDocEqMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaTypeExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaUniqueItemsMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const InternalSchemaXorMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const ModMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const NorMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const NotMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const RegexMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const SizeMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const TextMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const TextNoOpMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const TwoDPtInAnnulusExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const WhereMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const WhereNoOpMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const BitsAllClearMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const BitsAllSetMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const BitsAnyClearMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const BitsAnySetMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const TypeMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const AlwaysFalseMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const AlwaysTrueMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+ void visit(const ExprMatchExpression* expr) override {
+ unsupportedExpression(expr);
+ }
+
+private:
+ void unsupportedExpression(const MatchExpression* expr) {
+ _eligible = false;
+ }
+
+ void assertSupportedPathExpression(const PathMatchExpression* expr) {
+ if (FieldRef(expr->path()).hasNumericPathComponents())
+ _eligible = false;
+ }
+
+ bool& _eligible;
+};
+
+
+class ABTTransformerVisitor : public TransformerInterfaceConstVisitor {
+public:
+ ABTTransformerVisitor(bool& eligible) : _eligible(eligible) {}
+
+ void visit(const projection_executor::ExclusionProjectionExecutor* transformer) override {
+ std::set<std::string> preservedPaths;
+ transformer->getRoot()->reportProjectedPaths(&preservedPaths);
+
+ for (const std::string& path : preservedPaths) {
+ if (FieldRef(path).hasNumericPathComponents()) {
+ unsupportedTransformer(transformer);
+ return;
+ }
+ }
+ }
+
+ void visit(const projection_executor::InclusionProjectionExecutor* transformer) override {
+ std::set<std::string> computedPaths;
+ StringMap<std::string> renamedPaths;
+ transformer->getRoot()->reportComputedPaths(&computedPaths, &renamedPaths);
+
+ // Non-simple projections are supported under test only.
+ if (computedPaths.size() > 0 || renamedPaths.size() > 0) {
+ unsupportedTransformer(transformer);
+ return;
+ }
+
+ std::set<std::string> preservedPaths;
+ transformer->getRoot()->reportProjectedPaths(&preservedPaths);
+
+ for (const std::string& path : preservedPaths) {
+ if (FieldRef(path).hasNumericPathComponents()) {
+ unsupportedTransformer(transformer);
+ return;
+ }
+ }
+ }
+
+ void visit(const projection_executor::AddFieldsProjectionExecutor* transformer) override {
+ unsupportedTransformer(transformer);
+ }
+
+ void visit(const GroupFromFirstDocumentTransformation* transformer) override {
+ unsupportedTransformer(transformer);
+ }
+
+ void visit(const ReplaceRootTransformation* transformer) override {
+ unsupportedTransformer(transformer);
+ }
+
+private:
+ void unsupportedTransformer(const TransformerInterface* transformer) const {
+ _eligible = false;
+ }
+
+ bool& _eligible;
+};
+
+/**
+ * Visitor that is responsible for indicating whether a DocumentSource is eligible for Bonsai by
+ * setting the 'eligible' member variable. Stages which are "test-only" and not officially supported
+ * should set 'eligible' to false.
+ */
+class ABTUnsupportedDocumentSourceVisitor : public DocumentSourceConstVisitor {
+public:
+ void visit(const DocumentSourceInternalUnpackBucket* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceBucketAuto* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceCollStats* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceCurrentOp* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceCursor* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceExchange* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceFacet* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceGeoNear* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceGeoNearCursor* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceGraphLookUp* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceIndexStats* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceInternalShardFilter* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceInternalSplitPipeline* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceListCachedAndActiveUsers* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceListLocalSessions* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceListSessions* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceLookUp* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceMerge* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceMergeCursors* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceOperationMetrics* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceOut* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourcePlanCacheStats* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceQueue* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceRedact* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceSample* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceSampleFromRandomCursor* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceSequentialDocumentCache* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceTeeConsumer* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceGroup* source) override {
+ unsupportedStage(source);
+ }
+ void visit(const DocumentSourceLimit* source) override {
+ unsupportedStage(source);
+ }
+ void visit(const DocumentSourceSkip* source) override {
+ unsupportedStage(source);
+ }
+ void visit(const DocumentSourceSort* source) override {
+ unsupportedStage(source);
+ }
+ void visit(const DocumentSourceUnwind* source) override {
+ unsupportedStage(source);
+ }
+ void visit(const DocumentSourceUnionWith* source) override {
+ unsupportedStage(source);
+ }
+
+ void visit(const DocumentSourceInternalInhibitOptimization* source) override {
+ // Can be ignored.
+ }
+
+ void visit(const DocumentSourceMatch* source) override {
+ // Pass a reference to our local 'eligible' variable to allow the visitor to overwrite it.
+ ABTMatchExpressionVisitor visitor(eligible);
+ MatchExpressionWalker walker(nullptr /*preVisitor*/, nullptr /*inVisitor*/, &visitor);
+ tree_walker::walk<true, MatchExpression>(source->getMatchExpression(), &walker);
+ }
+
+ void visit(const DocumentSourceSingleDocumentTransformation* source) override {
+ ABTTransformerVisitor visitor(eligible);
+ TransformerInterfaceWalker walker(&visitor);
+ walker.walk(&source->getTransformer());
+ }
+
+ void unsupportedStage(const DocumentSource* source) {
+ eligible = false;
+ }
+
+ bool eligible = true;
+};
+
+template <class RequestType>
+bool isEligibleCommon(const RequestType& request,
+ OperationContext* opCtx,
+ const CollectionPtr& collection) {
+ // The FindCommandRequest defaults some parameters to BSONObj() instead of boost::none.
+ auto noneOrDefaultEmpty = [&](auto param) {
+ if constexpr (std::is_same_v<decltype(param), boost::optional<BSONObj>>) {
+ return param && !param->isEmpty();
+ } else {
+ return !param.isEmpty();
+ }
+ };
+ bool unsupportedCmdOption = noneOrDefaultEmpty(request.getHint()) ||
+ noneOrDefaultEmpty(request.getCollation()) || request.getLet() ||
+ request.getLegacyRuntimeConstants();
+
+ bool unsupportedIndexType = [&]() {
+ if (collection == nullptr)
+ return false;
+
+ const IndexCatalog& indexCatalog = *collection->getIndexCatalog();
+ auto indexIterator =
+ indexCatalog.getIndexIterator(opCtx, IndexCatalog::InclusionPolicy::kReady);
+
+ while (indexIterator->more()) {
+ const IndexDescriptor& descriptor = *indexIterator->next()->descriptor();
+ if (descriptor.isPartial() || descriptor.hidden() || descriptor.isSparse() ||
+ descriptor.getIndexType() != IndexType::INDEX_BTREE) {
+ return true;
+ }
+ }
+ return false;
+ }();
+
+ bool unsupportedCollectionType = [&]() {
+ if (collection == nullptr)
+ return false;
+
+ if (collection->isClustered() || !collection->getCollectionOptions().collation.isEmpty() ||
+ collection->getTimeseriesOptions()) {
+ return true;
+ }
+
+ return false;
+ }();
+
+ return !unsupportedCmdOption && !unsupportedIndexType && !unsupportedCollectionType;
+}
+
+boost::optional<bool> shouldForceBonsai() {
+ // Without the feature flag set, nothing else matters.
+ if (!feature_flags::gFeatureFlagCommonQueryFramework.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
+ return false;
+ }
+
+ // The "force classic" flag takes precedence over the others.
+ if (internalQueryForceClassicEngine.load()) {
+ return false;
+ }
+
+ if (internalQueryForceCommonQueryFramework.load()) {
+ return true;
+ }
+
+ if (!internalQueryEnableCascadesOptimizer.load()) {
+ return false;
+ }
+
+ return boost::none;
+}
+
+} // namespace
+
+bool isEligibleForBonsai(const AggregateCommandRequest& request,
+ const Pipeline& pipeline,
+ OperationContext* opCtx,
+ const CollectionPtr& collection) {
+ if (auto forceBonsai = shouldForceBonsai(); forceBonsai.has_value()) {
+ return *forceBonsai;
+ }
+
+ bool commandOptionsEligible = isEligibleCommon(request, opCtx, collection) &&
+ !request.getUnwrappedReadPref() && !request.getRequestReshardingResumeToken().has_value() &&
+ !request.getExchange();
+
+ ABTUnsupportedDocumentSourceVisitor visitor;
+ DocumentSourceWalker walker(nullptr /*preVisitor*/, &visitor);
+ walker.walk(pipeline);
+ bool eligiblePipeline = visitor.eligible;
+
+ return commandOptionsEligible && eligiblePipeline;
+}
+
+bool isEligibleForBonsai(const FindCommandRequest& request,
+ const MatchExpression& expression,
+ OperationContext* opCtx,
+ const CollectionPtr& collection) {
+ if (auto forceBonsai = shouldForceBonsai(); forceBonsai.has_value()) {
+ return *forceBonsai;
+ }
+
+ bool commandOptionsEligible = isEligibleCommon(request, opCtx, collection);
+
+ bool eligibleMatch = true;
+ ABTMatchExpressionVisitor visitor(eligibleMatch);
+ MatchExpressionWalker walker(nullptr /*preVisitor*/, nullptr /*inVisitor*/, &visitor);
+ tree_walker::walk<true, MatchExpression>(&expression, &walker);
+
+ return commandOptionsEligible && eligibleMatch;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/commands/cqf/cqf_command_utils.h b/src/mongo/db/commands/cqf/cqf_command_utils.h
new file mode 100644
index 00000000000..a88b0b712d6
--- /dev/null
+++ b/src/mongo/db/commands/cqf/cqf_command_utils.h
@@ -0,0 +1,53 @@
+/**
+ * Copyright (C) 2022-present MongoDB, Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the Server Side Public License, version 1,
+ * as published by MongoDB, Inc.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * Server Side Public License for more details.
+ *
+ * You should have received a copy of the Server Side Public License
+ * along with this program. If not, see
+ * <http://www.mongodb.com/licensing/server-side-public-license>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the Server Side Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/db/catalog/collection.h"
+
+namespace mongo {
+
+/**
+ * Returns whether the given Pipeline and aggregate command is eligible to use the bonsai
+ * optimizer.
+ */
+bool isEligibleForBonsai(const AggregateCommandRequest& request,
+ const Pipeline& pipeline,
+ OperationContext* opCtx,
+ const CollectionPtr& collection);
+
+/**
+ * Returns whether the given find command is eligible to use the bonsai optimizer.
+ */
+bool isEligibleForBonsai(const FindCommandRequest& request,
+ const MatchExpression& expression,
+ OperationContext* opCtx,
+ const CollectionPtr& collection);
+
+} // namespace mongo
diff --git a/src/mongo/db/commands/create_command.cpp b/src/mongo/db/commands/create_command.cpp
index e6b3a70efc5..27a871ba489 100644
--- a/src/mongo/db/commands/create_command.cpp
+++ b/src/mongo/db/commands/create_command.cpp
@@ -89,6 +89,10 @@ public:
return kCreateCommandHelp.toString();
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBaseGen {
public:
using InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/db/commands/create_indexes.cpp b/src/mongo/db/commands/create_indexes.cpp
index 9172bbdac17..9b848683cd3 100644
--- a/src/mongo/db/commands/create_indexes.cpp
+++ b/src/mongo/db/commands/create_indexes.cpp
@@ -375,7 +375,7 @@ CreateIndexesReply runCreateIndexesOnNewCollection(
for (const auto& spec : specs) {
uassert(6100900,
"Cannot implicitly create a new collection with createIndex 'clustered' option",
- !spec["clustered"]);
+ !spec[IndexDescriptor::kClusteredFieldName]);
}
// We need to create the collection.
@@ -716,6 +716,13 @@ CreateIndexesReply runCreateIndexesWithCoordinator(OperationContext* opCtx,
* { createIndexes : "bar",
* indexes : [ { ns : "test.bar", key : { x : 1 }, name: "x_1" } ],
* commitQuorum: "majority" }
+ *
+ * commitQuorum specifies which or how many replica set members must be ready to commit before the
+ * primary will commit the index. The same values can be used for commitQuorum as writeConcern, with
+ * the addition of 'votingMembers', the default. It is used to ensure secondaries can commit indexes
+ * quickly, minimizing replication lag (secondaries block replication on receipt of commitIndexBuild
+ * while completing the associated index). Note that commitQuorum is NOT like writeConcern: there is
+ * no guarantee that indexes on secondaries are ready for use after the command returns.
*/
class CmdCreateIndexes : public CreateIndexesCmdVersion1Gen<CmdCreateIndexes> {
public:
@@ -807,6 +814,10 @@ public:
return AllowedOnSecondary::kNever;
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
} cmdCreateIndex;
} // namespace
diff --git a/src/mongo/db/commands/distinct.cpp b/src/mongo/db/commands/distinct.cpp
index 09a4350fddd..7b298885cde 100644
--- a/src/mongo/db/commands/distinct.cpp
+++ b/src/mongo/db/commands/distinct.cpp
@@ -134,6 +134,10 @@ public:
hasTerm);
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
Status explain(OperationContext* opCtx,
const OpMsgRequest& request,
ExplainOptions::Verbosity verbosity,
diff --git a/src/mongo/db/commands/drop_indexes.cpp b/src/mongo/db/commands/drop_indexes.cpp
index 88326b20e48..e95da5c1378 100644
--- a/src/mongo/db/commands/drop_indexes.cpp
+++ b/src/mongo/db/commands/drop_indexes.cpp
@@ -235,8 +235,8 @@ public:
"Uninitialized");
writeConflictRetry(opCtx, "dropAllIndexes", toReIndexNss.ns(), [&] {
WriteUnitOfWork wunit(opCtx);
- collection.getWritableCollection()->getIndexCatalog()->dropAllIndexes(
- opCtx, collection.getWritableCollection(), true, {});
+ collection.getWritableCollection(opCtx)->getIndexCatalog()->dropAllIndexes(
+ opCtx, collection.getWritableCollection(opCtx), true, {});
swIndexesToRebuild =
indexer->init(opCtx, collection, all, MultiIndexBlock::kNoopOnInitFn);
@@ -263,7 +263,7 @@ public:
writeConflictRetry(opCtx, "commitReIndex", toReIndexNss.ns(), [&] {
WriteUnitOfWork wunit(opCtx);
uassertStatusOK(indexer->commit(opCtx,
- collection.getWritableCollection(),
+ collection.getWritableCollection(opCtx),
MultiIndexBlock::kNoopOnCreateEachFn,
MultiIndexBlock::kNoopOnCommitFn));
wunit.commit();
diff --git a/src/mongo/db/commands/find_and_modify.cpp b/src/mongo/db/commands/find_and_modify.cpp
index 221d053036a..c5bffda7673 100644
--- a/src/mongo/db/commands/find_and_modify.cpp
+++ b/src/mongo/db/commands/find_and_modify.cpp
@@ -280,6 +280,14 @@ public:
CmdFindAndModify::_updateMetrics.collectMetrics(request);
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBaseGen {
public:
using InvocationBaseGen::InvocationBaseGen;
@@ -638,10 +646,15 @@ write_ops::FindAndModifyCommandReply CmdFindAndModify::Invocation::typedRun(
// Collect metrics.
CmdFindAndModify::collectMetrics(req);
- boost::optional<DisableDocumentValidation> maybeDisableValidation;
- if (req.getBypassDocumentValidation().value_or(false)) {
- maybeDisableValidation.emplace(opCtx);
- }
+ auto disableDocumentValidation = req.getBypassDocumentValidation().value_or(false);
+ auto fleCrudProcessed =
+ write_ops_exec::getFleCrudProcessed(opCtx, req.getEncryptionInformation());
+
+ DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+ disableDocumentValidation);
+
+ DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+ opCtx, disableDocumentValidation, fleCrudProcessed);
const auto inTransaction = opCtx->inMultiDocumentTransaction();
uassert(50781,
diff --git a/src/mongo/db/commands/find_cmd.cpp b/src/mongo/db/commands/find_cmd.cpp
index 5bae30e43c2..eda3c32b291 100644
--- a/src/mongo/db/commands/find_cmd.cpp
+++ b/src/mongo/db/commands/find_cmd.cpp
@@ -36,6 +36,7 @@
#include "mongo/db/client.h"
#include "mongo/db/clientcursor.h"
#include "mongo/db/commands.h"
+#include "mongo/db/commands/cqf/cqf_command_utils.h"
#include "mongo/db/commands/run_aggregate.h"
#include "mongo/db/commands/test_commands_enabled.h"
#include "mongo/db/cursor_manager.h"
@@ -216,6 +217,10 @@ public:
return true;
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public CommandInvocation {
public:
Invocation(const FindCmd* definition, const OpMsgRequest& request, StringData dbName)
@@ -297,9 +302,8 @@ public:
// If we are running a query against a view, or if we are trying to test the new
// optimizer, redirect this query through the aggregation system.
if (ctx->getView() ||
- (feature_flags::gfeatureFlagCommonQueryFramework.isEnabled(
- serverGlobalParams.featureCompatibility) &&
- internalQueryEnableCascadesOptimizer.load())) {
+ isEligibleForBonsai(
+ cq->getFindCommandRequest(), *cq->root(), opCtx, ctx->getCollection())) {
// Relinquish locks. The aggregation command will re-acquire them.
ctx.reset();
@@ -503,9 +507,8 @@ public:
// If we are running a query against a view, or if we are trying to test the new
// optimizer, redirect this query through the aggregation system.
if (ctx->getView() ||
- (feature_flags::gfeatureFlagCommonQueryFramework.isEnabled(
- serverGlobalParams.featureCompatibility) &&
- internalQueryEnableCascadesOptimizer.load())) {
+ isEligibleForBonsai(
+ cq->getFindCommandRequest(), *cq->root(), opCtx, ctx->getCollection())) {
// Relinquish locks. The aggregation command will re-acquire them.
ctx.reset();
diff --git a/src/mongo/db/commands/fle_compact_test.cpp b/src/mongo/db/commands/fle_compact_test.cpp
index 18c52f548ef..26153aadcc8 100644
--- a/src/mongo/db/commands/fle_compact_test.cpp
+++ b/src/mongo/db/commands/fle_compact_test.cpp
@@ -395,8 +395,13 @@ void FleCompactTest::doSingleInsert(int id, BSONObj encryptedFieldsObj) {
auto efc =
generateEncryptedFieldConfig(encryptedFieldsObj.getFieldNames<std::set<std::string>>());
- uassertStatusOK(processInsert(
- _queryImpl.get(), _namespaces.edcNss, serverPayload, efc, kUninitializedTxnNumber, result));
+ uassertStatusOK(processInsert(_queryImpl.get(),
+ _namespaces.edcNss,
+ serverPayload,
+ efc,
+ kUninitializedTxnNumber,
+ result,
+ false));
}
void FleCompactTest::doSingleDelete(int id, BSONObj encryptedFieldsObj) {
diff --git a/src/mongo/db/commands/get_cluster_parameter_invocation.cpp b/src/mongo/db/commands/get_cluster_parameter_invocation.cpp
index b95acf4896f..7eb4218040e 100644
--- a/src/mongo/db/commands/get_cluster_parameter_invocation.cpp
+++ b/src/mongo/db/commands/get_cluster_parameter_invocation.cpp
@@ -43,10 +43,6 @@ namespace mongo {
std::pair<std::vector<std::string>, std::vector<BSONObj>>
GetClusterParameterInvocation::retrieveRequestedParameters(OperationContext* opCtx,
const CmdBody& cmdBody) {
- uassert(ErrorCodes::IllegalOperation,
- "featureFlagClusterWideConfig not enabled",
- gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility));
-
ServerParameterSet* clusterParameters = ServerParameterSet::getClusterParameterSet();
std::vector<std::string> parameterNames;
std::vector<BSONObj> parameterValues;
diff --git a/src/mongo/db/commands/getmore_cmd.cpp b/src/mongo/db/commands/getmore_cmd.cpp
index eacb27e85ff..3b34751014f 100644
--- a/src/mongo/db/commands/getmore_cmd.cpp
+++ b/src/mongo/db/commands/getmore_cmd.cpp
@@ -317,6 +317,10 @@ public:
return std::make_unique<Invocation>(this, opMsgRequest);
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public CommandInvocation {
public:
Invocation(Command* cmd, const OpMsgRequest& request)
diff --git a/src/mongo/db/commands/index_filter_commands.cpp b/src/mongo/db/commands/index_filter_commands.cpp
index f040bd9eea3..5deb5ecd339 100644
--- a/src/mongo/db/commands/index_filter_commands.cpp
+++ b/src/mongo/db/commands/index_filter_commands.cpp
@@ -99,7 +99,7 @@ void removePlanCacheEntriesByIndexFilterKeys(const stdx::unordered_set<uint32_t>
sbe::PlanCache* planCache) {
planCache->removeIf([&](const sbe::PlanCacheKey& key, const sbe::PlanCacheEntry& entry) {
return indexFilterKeys.contains(entry.indexFilterKey) &&
- key.getCollectionUuid() == collectionUuid;
+ key.getMainCollectionState().uuid == collectionUuid;
});
}
} // namespace
diff --git a/src/mongo/db/commands/killcursors_common.h b/src/mongo/db/commands/killcursors_common.h
index 06ee9c9335d..f0ccc33d794 100644
--- a/src/mongo/db/commands/killcursors_common.h
+++ b/src/mongo/db/commands/killcursors_common.h
@@ -66,6 +66,10 @@ public:
return true;
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation : public KCV1Gen::InvocationBaseGen {
public:
using KCV1Gen::InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/db/commands/pipeline_command.cpp b/src/mongo/db/commands/pipeline_command.cpp
index 1d2e0f25059..9f0cb6bd909 100644
--- a/src/mongo/db/commands/pipeline_command.cpp
+++ b/src/mongo/db/commands/pipeline_command.cpp
@@ -207,6 +207,10 @@ public:
return &::mongo::AggregateCommandRequest::kAuthorizationContract;
}
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
} pipelineCmd;
} // namespace
diff --git a/src/mongo/db/commands/plan_cache_commands_test.cpp b/src/mongo/db/commands/plan_cache_commands_test.cpp
index 4ef229c36f4..3495ee127d6 100644
--- a/src/mongo/db/commands/plan_cache_commands_test.cpp
+++ b/src/mongo/db/commands/plan_cache_commands_test.cpp
@@ -40,7 +40,7 @@ namespace {
static const NamespaceString nss{"test.collection"_sd};
-PlanCacheKey makeKey(const CanonicalQuery& cq) {
+PlanCacheKey makeClassicKey(const CanonicalQuery& cq) {
CollectionMock coll(nss);
return plan_cache_key_factory::make<PlanCacheKey>(cq, &coll);
}
@@ -106,7 +106,7 @@ TEST(PlanCacheCommandsTest, CanCanonicalizeWithValidQuery) {
plan_cache_commands::canonicalize(opCtx.get(), nss.ns(), fromjson("{query: {b: 3, a: 4}}"));
ASSERT_OK(statusWithCQ.getStatus());
std::unique_ptr<CanonicalQuery> equivQuery = std::move(statusWithCQ.getValue());
- ASSERT_EQUALS(makeKey(*query), makeKey(*equivQuery));
+ ASSERT_EQUALS(makeClassicKey(*query), makeClassicKey(*equivQuery));
}
TEST(PlanCacheCommandsTest, SortQueryResultsInDifferentPlanCacheKeyFromUnsorted) {
@@ -124,7 +124,7 @@ TEST(PlanCacheCommandsTest, SortQueryResultsInDifferentPlanCacheKeyFromUnsorted)
opCtx.get(), nss.ns(), fromjson("{query: {a: 1, b: 1}, sort: {a: 1, b: 1}}"));
ASSERT_OK(statusWithCQ.getStatus());
std::unique_ptr<CanonicalQuery> sortQuery = std::move(statusWithCQ.getValue());
- ASSERT_NOT_EQUALS(makeKey(*query), makeKey(*sortQuery));
+ ASSERT_NOT_EQUALS(makeClassicKey(*query), makeClassicKey(*sortQuery));
}
// Regression test for SERVER-17158.
@@ -143,7 +143,7 @@ TEST(PlanCacheCommandsTest, SortsAreProperlyDelimitedInPlanCacheKey) {
opCtx.get(), nss.ns(), fromjson("{query: {a: 1, b: 1}, sort: {aab: 1}}"));
ASSERT_OK(statusWithCQ.getStatus());
std::unique_ptr<CanonicalQuery> sortQuery2 = std::move(statusWithCQ.getValue());
- ASSERT_NOT_EQUALS(makeKey(*sortQuery1), makeKey(*sortQuery2));
+ ASSERT_NOT_EQUALS(makeClassicKey(*sortQuery1), makeClassicKey(*sortQuery2));
}
TEST(PlanCacheCommandsTest, ProjectQueryResultsInDifferentPlanCacheKeyFromUnprojected) {
@@ -160,7 +160,7 @@ TEST(PlanCacheCommandsTest, ProjectQueryResultsInDifferentPlanCacheKeyFromUnproj
opCtx.get(), nss.ns(), fromjson("{query: {a: 1, b: 1}, projection: {_id: 0, a: 1}}"));
ASSERT_OK(statusWithCQ.getStatus());
std::unique_ptr<CanonicalQuery> projectionQuery = std::move(statusWithCQ.getValue());
- ASSERT_NOT_EQUALS(makeKey(*query), makeKey(*projectionQuery));
+ ASSERT_NOT_EQUALS(makeClassicKey(*query), makeClassicKey(*projectionQuery));
}
} // namespace
diff --git a/src/mongo/db/commands/run_aggregate.cpp b/src/mongo/db/commands/run_aggregate.cpp
index 6cb0760d139..42053578913 100644
--- a/src/mongo/db/commands/run_aggregate.cpp
+++ b/src/mongo/db/commands/run_aggregate.cpp
@@ -41,7 +41,9 @@
#include "mongo/db/catalog/collection_uuid_mismatch.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/catalog/database_holder.h"
+#include "mongo/db/change_stream_change_collection_manager.h"
#include "mongo/db/commands/cqf/cqf_aggregate.h"
+#include "mongo/db/commands/cqf/cqf_command_utils.h"
#include "mongo/db/curop.h"
#include "mongo/db/cursor_manager.h"
#include "mongo/db/db_raii.h"
@@ -686,12 +688,8 @@ Status runAggregate(OperationContext* opCtx,
// Determine if this aggregation has foreign collections that the execution subsystem needs
// to be aware of.
- std::vector<NamespaceStringOrUUID> secondaryExecNssList;
-
- // Taking locks over multiple collections is not supported outside of $lookup pushdown.
- if (feature_flags::gFeatureFlagSBELookupPushdown.isEnabledAndIgnoreFCV()) {
- secondaryExecNssList = liteParsedPipeline.getForeignExecutionNamespaces();
- }
+ std::vector<NamespaceStringOrUUID> secondaryExecNssList =
+ liteParsedPipeline.getForeignExecutionNamespaces();
// The collation to use for this aggregation. boost::optional to distinguish between the case
// where the collation has not yet been resolved, and where it has been resolved to nullptr.
@@ -752,9 +750,21 @@ Status runAggregate(OperationContext* opCtx,
<< " is not supported for a change stream",
!request.getCollectionUUID());
- // Replace the execution namespace with that of the oplog.
+ // Replace the execution namespace with the oplog.
nss = NamespaceString::kRsOplogNamespace;
+ // In case of serverless the change stream will be opened on the change collection. We
+ // should first check if the change collection for the particular tenant exists and then
+ // replace the namespace with the change collection.
+ if (ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive()) {
+ auto& changeCollectionManager = ChangeStreamChangeCollectionManager::get(opCtx);
+ uassert(ErrorCodes::ChangeStreamNotEnabled,
+ "Change streams must be enabled before being used.",
+ changeCollectionManager.hasChangeCollection(opCtx, origNss.tenantId()));
+
+ nss = NamespaceString::makeChangeCollectionNSS(origNss.tenantId());
+ }
+
// Upgrade and wait for read concern if necessary.
_adjustChangeStreamReadConcern(opCtx);
@@ -940,9 +950,7 @@ Status runAggregate(OperationContext* opCtx,
constexpr bool alreadyOptimized = true;
pipeline->validateCommon(alreadyOptimized);
- if (feature_flags::gfeatureFlagCommonQueryFramework.isEnabled(
- serverGlobalParams.featureCompatibility) &&
- internalQueryEnableCascadesOptimizer.load()) {
+ if (isEligibleForBonsai(request, *pipeline, opCtx, collections.getMainCollection())) {
uassert(6624344,
"Exchanging is not supported in the Cascades optimizer",
!request.getExchange().has_value());
@@ -1023,7 +1031,7 @@ Status runAggregate(OperationContext* opCtx,
// yet.
invariant(ctx);
Explain::explainStages(explainExecutor,
- ctx->getCollection(),
+ collections,
*(expCtx->explain),
BSON("optimizedPipeline" << true),
cmdObj,
diff --git a/src/mongo/db/commands/set_cluster_parameter_command.cpp b/src/mongo/db/commands/set_cluster_parameter_command.cpp
index 696c6eda751..08ae1b2835e 100644
--- a/src/mongo/db/commands/set_cluster_parameter_command.cpp
+++ b/src/mongo/db/commands/set_cluster_parameter_command.cpp
@@ -75,12 +75,6 @@ public:
"setClusterParameter can only run on mongos in sharded clusters",
(serverGlobalParams.clusterRole == ClusterRole::None));
- FixedFCVRegion fcvRegion(opCtx);
- uassert(
- ErrorCodes::IllegalOperation,
- "Cannot set cluster parameter, gFeatureFlagClusterWideConfig is not enabled",
- gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility));
-
// TODO SERVER-65249: This will eventually be made specific to the parameter being set
// so that some parameters will be able to use setClusterParameter even on standalones.
uassert(ErrorCodes::IllegalOperation,
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index ad2084e2ac7..1f146fa082f 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -74,7 +74,6 @@
#include "mongo/db/s/resharding/resharding_coordinator_service.h"
#include "mongo/db/s/resharding/resharding_donor_recipient_common.h"
#include "mongo/db/s/sharding_ddl_coordinator_service.h"
-#include "mongo/db/s/sharding_util.h"
#include "mongo/db/s/transaction_coordinator_service.h"
#include "mongo/db/server_feature_flags_gen.h"
#include "mongo/db/server_options.h"
@@ -87,7 +86,6 @@
#include "mongo/logv2/log.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/s/pm2423_feature_flags_gen.h"
-#include "mongo/s/pm2583_feature_flags_gen.h"
#include "mongo/s/resharding/resharding_feature_flag_gen.h"
#include "mongo/s/sharding_feature_flags_gen.h"
#include "mongo/stdx/unordered_set.h"
@@ -345,14 +343,10 @@ public:
// Drain moveChunks if the actualVersion relies on the new migration protocol but
// the requestedVersion uses the old one (downgrading).
- if ((feature_flags::gFeatureFlagMigrationRecipientCriticalSection
- .isEnabledOnVersion(actualVersion) &&
- !feature_flags::gFeatureFlagMigrationRecipientCriticalSection
- .isEnabledOnVersion(requestedVersion)) ||
- (feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
- actualVersion) &&
- !feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
- requestedVersion))) {
+ if (feature_flags::gFeatureFlagMigrationRecipientCriticalSection.isEnabledOnVersion(
+ actualVersion) &&
+ !feature_flags::gFeatureFlagMigrationRecipientCriticalSection
+ .isEnabledOnVersion(requestedVersion)) {
drainNewMoveChunks.emplace(opCtx, "setFeatureCompatibilityVersionDowngrade");
// At this point, because we are holding the MigrationBlockingGuard, no new
@@ -388,30 +382,6 @@ public:
!isBlockingUserWrites);
}
- // TODO (SERVER-65572): Remove setClusterParameter serialization and collection
- // drop after this is backported to 6.0.
- if (!gFeatureFlagClusterWideConfig.isEnabledOnVersion(requestedVersion)) {
- if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- uassert(ErrorCodes::CannotDowngrade,
- "Cannot downgrade while cluster server parameter is being set",
- ConfigsvrCoordinatorService::getService(opCtx)
- ->areAllCoordinatorsOfTypeFinished(
- opCtx, ConfigsvrCoordinatorTypeEnum::kSetClusterParameter));
- }
-
- DropReply dropReply;
- const auto dropStatus = dropCollection(
- opCtx,
- NamespaceString::kClusterParametersNamespace,
- &dropReply,
- DropCollectionSystemCollectionMode::kAllowSystemCollectionDrops);
- uassert(
- dropStatus.code(),
- str::stream() << "Failed to drop the cluster server parameters collection"
- << causedBy(dropStatus.reason()),
- dropStatus.isOK() || dropStatus.code() == ErrorCodes::NamespaceNotFound);
- }
-
FeatureCompatibilityVersion::updateFeatureCompatibilityVersionDocument(
opCtx,
actualVersion,
@@ -430,17 +400,6 @@ public:
clearOrphanCountersFromRangeDeletionTasks(opCtx);
}
- // TODO (SERVER-62325): Remove collMod draining mechanism after 6.0 branching.
- if (actualVersion > requestedVersion &&
- requestedVersion < multiversion::FeatureCompatibilityVersion::kVersion_6_0) {
- // No more collMod coordinators will start because we have already switched
- // the FCV value to kDowngrading. Wait for the ongoing collMod coordinators to
- // finish.
- ShardingDDLCoordinatorService::getService(opCtx)
- ->waitForCoordinatorsOfGivenTypeToComplete(
- opCtx, DDLCoordinatorTypeEnum::kCollMod);
- }
-
// TODO SERVER-65077: Remove FCV check once 6.0 is released
if (actualVersion > requestedVersion &&
!gFeatureFlagFLE2.isEnabledOnVersion(requestedVersion)) {
@@ -481,10 +440,6 @@ public:
actualVersion) &&
feature_flags::gFeatureFlagMigrationRecipientCriticalSection.isEnabledOnVersion(
requestedVersion)) ||
- (!feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
- actualVersion) &&
- feature_flags::gFeatureFlagNewPersistedChunkVersionFormat.isEnabledOnVersion(
- requestedVersion)) ||
orphanTrackingCondition) {
drainOldMoveChunks.emplace(opCtx, "setFeatureCompatibilityVersionUpgrade");
@@ -586,14 +541,6 @@ private:
!feature_flags::gFeatureFlagChangeStreamPreAndPostImages.isEnabledOnVersion(
requestedVersion);
- // TODO SERVER-62693: remove the following scope once 6.0 branches out
- if (requestedVersion == multiversion::GenericFCV::kLastLTS) {
- if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer ||
- serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
- sharding_util::downgradeCollectionBalancingFieldsToPre53(opCtx);
- }
- }
-
// TODO SERVER-65332 remove logic bound to this future object When kLastLTS is 6.0
boost::optional<SharedSemiFuture<void>> chunkResizeAsyncTask;
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
@@ -731,13 +678,6 @@ private:
}
}
- // TODO SERVER-64720 Remove when 6.0 becomes last LTS
- if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
- ShardingDDLCoordinatorService::getService(opCtx)
- ->waitForCoordinatorsOfGivenTypeToComplete(
- opCtx, DDLCoordinatorTypeEnum::kCreateCollection);
- }
-
// TODO SERVER-62338 Remove when 6.0 branches-out
if (serverGlobalParams.clusterRole == ClusterRole::ShardServer &&
!resharding::gFeatureFlagRecoverableShardsvrReshardCollectionCoordinator
diff --git a/src/mongo/db/commands/set_index_commit_quorum.idl b/src/mongo/db/commands/set_index_commit_quorum.idl
index 97cd14c2938..960a6847791 100644
--- a/src/mongo/db/commands/set_index_commit_quorum.idl
+++ b/src/mongo/db/commands/set_index_commit_quorum.idl
@@ -53,5 +53,6 @@ commands:
commitQuorum:
type: CommitQuorum
description: "commitQuorum can be set to the same values as writeConcern.w and
- indicates how many and/or which replica set members are needed for the
- primary to commit the index build."
+ indicates how many and/or which replica set members must be ready to
+ commit the index build before the primary will proceed to commit the
+ index build."
diff --git a/src/mongo/db/commands/set_index_commit_quorum_command.cpp b/src/mongo/db/commands/set_index_commit_quorum_command.cpp
index 35a0c45122b..6eaf7148272 100644
--- a/src/mongo/db/commands/set_index_commit_quorum_command.cpp
+++ b/src/mongo/db/commands/set_index_commit_quorum_command.cpp
@@ -66,7 +66,10 @@ public:
<< " commitQuorum: <string|number|object> option to define the required quorum for"
<< std::endl
<< " the index builds to commit" << std::endl
- << "}";
+ << "}" << std::endl
+ << "This command is useful if the commitQuorum of an active index build is no longer "
+ "possible or desirable (replica set membership has changed), or potential secondary "
+ "replication lag has become a greater concern";
return ss.str();
}
diff --git a/src/mongo/db/commands/txn_cmds.cpp b/src/mongo/db/commands/txn_cmds.cpp
index c06f80f2590..d180b1964a2 100644
--- a/src/mongo/db/commands/txn_cmds.cpp
+++ b/src/mongo/db/commands/txn_cmds.cpp
@@ -78,6 +78,15 @@ public:
std::string help() const final {
return "Commits a transaction";
}
+
+ bool isTransactionCommand() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBaseGen {
public:
using InvocationBaseGen::InvocationBaseGen;
@@ -182,6 +191,14 @@ public:
return "Aborts a transaction";
}
+ bool isTransactionCommand() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBaseGen {
public:
using InvocationBaseGen::InvocationBaseGen;
diff --git a/src/mongo/db/commands/user_management_commands.cpp b/src/mongo/db/commands/user_management_commands.cpp
index 3045a4e69b9..63bfeb73a03 100644
--- a/src/mongo/db/commands/user_management_commands.cpp
+++ b/src/mongo/db/commands/user_management_commands.cpp
@@ -236,7 +236,7 @@ Status queryAuthzDocument(OperationContext* opCtx,
FindCommandRequest findRequest{collectionName};
findRequest.setFilter(query);
findRequest.setProjection(projection);
- client.find(std::move(findRequest), ReadPreferenceSetting{}, resultProcessor);
+ client.find(std::move(findRequest), resultProcessor);
return Status::OK();
} catch (const DBException& e) {
return e.toStatus();
@@ -1461,8 +1461,11 @@ UsersInfoReply CmdUMCTyped<UsersInfoCommand, UMCInfoParams>::Invocation::typedRu
CommandHelpers::appendSimpleCommandStatus(bodyBuilder, true);
bodyBuilder.doneFast();
auto response = CursorResponse::parseFromBSONThrowing(replyBuilder.releaseBody());
- DBClientCursor cursor(
- &client, response.getNSS(), response.getCursorId(), 0, 0, response.releaseBatch());
+ DBClientCursor cursor(&client,
+ response.getNSS(),
+ response.getCursorId(),
+ false /*isExhaust*/,
+ response.releaseBatch());
while (cursor.more()) {
users.push_back(cursor.next().getOwned());
diff --git a/src/mongo/db/commands/write_commands.cpp b/src/mongo/db/commands/write_commands.cpp
index d76053e34a7..b360c7b1a2c 100644
--- a/src/mongo/db/commands/write_commands.cpp
+++ b/src/mongo/db/commands/write_commands.cpp
@@ -263,6 +263,11 @@ BSONObj makeTimeseriesInsertDocument(std::shared_ptr<BucketCatalog::WriteBatch>
kTimeseriesControlDefaultVersion);
bucketControlBuilder.append(kBucketControlMinFieldName, batch->min());
bucketControlBuilder.append(kBucketControlMaxFieldName, batch->max());
+
+ if (feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
+ bucketControlBuilder.append(kBucketControlClosedFieldName, false);
+ }
}
if (metadataElem) {
builder.appendAs(metadataElem, kBucketMetaFieldName);
@@ -511,6 +516,13 @@ public:
return false;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
class Invocation final : public InvocationBaseGen {
public:
Invocation(OperationContext* opCtx,
@@ -531,7 +543,8 @@ public:
write_ops::InsertCommandReply typedRun(OperationContext* opCtx) final try {
transactionChecks(opCtx, ns());
- if (request().getEncryptionInformation().has_value()) {
+ if (request().getEncryptionInformation().has_value() &&
+ !request().getEncryptionInformation()->getCrudProcessed()) {
write_ops::InsertCommandReply insertReply;
auto batch = processFLEInsert(opCtx, request(), &insertReply);
if (batch == FLEBatchResult::kProcessed) {
@@ -720,8 +733,11 @@ public:
beforeSize = bucketDoc.objsize();
// Reset every time we run to ensure we never use a stale value
compressionStats = {};
- auto compressed = timeseries::compressBucket(
- bucketDoc, closedBucket.timeField, ns(), validateCompression);
+ auto compressed = timeseries::compressBucket(bucketDoc,
+ closedBucket.timeField,
+ ns(),
+ closedBucket.eligibleForReopening,
+ validateCompression);
if (compressed.compressedBucket) {
// If compressed object size is larger than uncompressed, skip compression
// update.
@@ -1386,6 +1402,15 @@ public:
bool shouldAffectCommandCounter() const final {
return false;
}
+
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBaseGen {
public:
Invocation(OperationContext* opCtx,
@@ -1458,7 +1483,8 @@ public:
write_ops::UpdateCommandReply updateReply;
OperationSource source = OperationSource::kStandard;
- if (request().getEncryptionInformation().has_value()) {
+ if (request().getEncryptionInformation().has_value() &&
+ !request().getEncryptionInformation().get().getCrudProcessed()) {
return processFLEUpdate(opCtx, request());
}
@@ -1623,6 +1649,14 @@ public:
return false;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBaseGen {
public:
Invocation(OperationContext* opCtx,
diff --git a/src/mongo/db/concurrency/lock_state.cpp b/src/mongo/db/concurrency/lock_state.cpp
index 681043076dd..f1da9723284 100644
--- a/src/mongo/db/concurrency/lock_state.cpp
+++ b/src/mongo/db/concurrency/lock_state.cpp
@@ -374,8 +374,7 @@ bool LockerImpl::_acquireTicket(OperationContext* opCtx, LockMode mode, Date_t d
// Acquiring a ticket is a potentially blocking operation. This must not be called after a
// transaction timestamp has been set, indicating this transaction has created an oplog
// hole.
- if (opCtx)
- invariant(!opCtx->recoveryUnit()->isTimestamped());
+ invariant(!opCtx->recoveryUnit()->isTimestamped());
auto waitMode = _uninterruptibleLocksRequested ? TicketHolder::WaitMode::kUninterruptible
: TicketHolder::WaitMode::kInterruptible;
diff --git a/src/mongo/db/create_indexes.idl b/src/mongo/db/create_indexes.idl
index 247e0295edb..f00f287af5a 100644
--- a/src/mongo/db/create_indexes.idl
+++ b/src/mongo/db/create_indexes.idl
@@ -215,7 +215,10 @@ commands:
default: false
unstable: true
commitQuorum:
- description: 'Commit Quorum options'
+ description: "Dictates which or how many replica set members must be ready to commit
+ the index build before the primary will proceed to commit the index.
+ This minimizes replication lag by ensuring secondaries are ready for
+ commit and can quickly apply the commit on a finished index build"
type: CommitQuorum
optional: true
unstable: false
diff --git a/src/mongo/db/curop.cpp b/src/mongo/db/curop.cpp
index 08158554ad9..9fce4d9d7f9 100644
--- a/src/mongo/db/curop.cpp
+++ b/src/mongo/db/curop.cpp
@@ -502,6 +502,7 @@ bool CurOp::completeAndLogOperation(OperationContext* opCtx,
_debug.report(
opCtx, (lockerInfo ? &lockerInfo->stats : nullptr), operationMetricsPtr, &attr);
+ // TODO SERVER-67020 Ensure the ns in attr has the tenantId as the db prefix
LOGV2_OPTIONS(51803, {component}, "Slow query", attr);
_checkForFailpointsAfterCommandLogged();
diff --git a/src/mongo/db/database_name.h b/src/mongo/db/database_name.h
index a4a549eb75a..a44979a2096 100644
--- a/src/mongo/db/database_name.h
+++ b/src/mongo/db/database_name.h
@@ -49,27 +49,22 @@ public:
/**
* Constructs an empty DatabaseName.
*/
- DatabaseName() : _tenantId(boost::none), _dbString(""), _tenantDbString(boost::none){};
+ DatabaseName() = default;
/**
* Constructs a DatabaseName from the given tenantId and database name.
* "dbName" is expected only consist of a db name. It is the caller's responsibility to ensure
* the dbName is a valid db name.
*/
- DatabaseName(boost::optional<TenantId> tenantId, StringData dbString) {
- _tenantId = tenantId;
- _dbString = dbString.toString();
-
- _tenantDbString =
- _tenantId ? boost::make_optional(_tenantId->toString() + "_" + _dbString) : boost::none;
- }
+ DatabaseName(boost::optional<TenantId> tenantId, StringData dbString)
+ : _tenantId(std::move(tenantId)), _dbString(dbString.toString()) {}
/**
* Prefer to use the constructor above.
* TODO SERVER-65456 Remove this constructor.
*/
DatabaseName(StringData dbName, boost::optional<TenantId> tenantId = boost::none)
- : DatabaseName(tenantId, dbName) {}
+ : DatabaseName(std::move(tenantId), dbName) {}
static DatabaseName createSystemTenantDbName(StringData dbString);
@@ -82,28 +77,26 @@ public:
}
const std::string& toString() const {
- if (_tenantDbString)
- return *_tenantDbString;
+ return db();
+ }
+
+ std::string toStringWithTenantId() const {
+ if (_tenantId)
+ return str::stream() << *_tenantId << '_' << _dbString;
- invariant(!_tenantId);
return _dbString;
}
bool equalCaseInsensitive(const DatabaseName& other) const {
- return boost::iequals(toString(), other.toString());
- }
-
- /**
- * Returns -1, 0, or 1 if 'this' is less, equal, or greater than 'other' in
- * lexicographical order.
- */
- int compare(const DatabaseName& other) const {
- return toString().compare(other.toString());
+ return boost::iequals(toStringWithTenantId(), other.toStringWithTenantId());
}
template <typename H>
friend H AbslHashValue(H h, const DatabaseName& obj) {
- return H::combine(std::move(h), obj.toString());
+ if (obj._tenantId) {
+ return H::combine(std::move(h), obj._tenantId.get(), obj._dbString);
+ }
+ return H::combine(std::move(h), obj._dbString);
}
friend auto logAttrs(const DatabaseName& obj) {
@@ -111,9 +104,8 @@ public:
}
private:
- boost::optional<TenantId> _tenantId;
+ boost::optional<TenantId> _tenantId = boost::none;
std::string _dbString;
- boost::optional<std::string> _tenantDbString;
};
inline std::ostream& operator<<(std::ostream& stream, const DatabaseName& tdb) {
@@ -125,7 +117,7 @@ inline StringBuilder& operator<<(StringBuilder& builder, const DatabaseName& tdb
}
inline bool operator==(const DatabaseName& lhs, const DatabaseName& rhs) {
- return lhs.compare(rhs) == 0;
+ return (lhs.tenantId() == rhs.tenantId()) && (lhs.db() == rhs.db());
}
inline bool operator!=(const DatabaseName& lhs, const DatabaseName& rhs) {
@@ -133,11 +125,17 @@ inline bool operator!=(const DatabaseName& lhs, const DatabaseName& rhs) {
}
inline bool operator<(const DatabaseName& lhs, const DatabaseName& rhs) {
- return lhs.compare(rhs) < 0;
+ if (lhs.tenantId() != rhs.tenantId()) {
+ return lhs.tenantId() < rhs.tenantId();
+ }
+ return lhs.db() < rhs.db();
}
inline bool operator>(const DatabaseName& lhs, const DatabaseName& rhs) {
- return rhs < lhs;
+ if (lhs.tenantId() != rhs.tenantId()) {
+ return lhs.tenantId() > rhs.tenantId();
+ }
+ return lhs.db() > rhs.db();
}
inline bool operator<=(const DatabaseName& lhs, const DatabaseName& rhs) {
diff --git a/src/mongo/db/tenant_database_name_test.cpp b/src/mongo/db/database_name_test.cpp
index 15ed7f9cd14..88436c5d3f1 100644
--- a/src/mongo/db/tenant_database_name_test.cpp
+++ b/src/mongo/db/database_name_test.cpp
@@ -48,7 +48,8 @@ TEST(DatabaseNameTest, MultitenancySupportDisabled) {
ASSERT(dbnWithTenant.tenantId());
ASSERT_EQUALS(tenantId, *dbnWithTenant.tenantId());
ASSERT_EQUALS(std::string("a"), dbnWithTenant.db());
- ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toString());
+ ASSERT_EQUALS(std::string("a"), dbnWithTenant.toString());
+ ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toStringWithTenantId());
}
TEST(DatabaseNameTest, MultitenancySupportEnabledTenantIDNotRequired) {
@@ -65,7 +66,8 @@ TEST(DatabaseNameTest, MultitenancySupportEnabledTenantIDNotRequired) {
ASSERT(dbnWithTenant.tenantId());
ASSERT_EQUALS(tenantId, *dbnWithTenant.tenantId());
ASSERT_EQUALS(std::string("a"), dbnWithTenant.db());
- ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toString());
+ ASSERT_EQUALS(std::string("a"), dbnWithTenant.toString());
+ ASSERT_EQUALS(std::string(tenantId.toString() + "_a"), dbnWithTenant.toStringWithTenantId());
}
/*
diff --git a/src/mongo/db/db_raii.cpp b/src/mongo/db/db_raii.cpp
index ddc53c40db1..688577f8e28 100644
--- a/src/mongo/db/db_raii.cpp
+++ b/src/mongo/db/db_raii.cpp
@@ -804,6 +804,14 @@ const CollectionPtr& AutoGetCollectionForReadMaybeLockFree::getCollection() cons
}
}
+bool AutoGetCollectionForReadMaybeLockFree::isAnySecondaryNamespaceAViewOrSharded() const {
+ if (_autoGet) {
+ return _autoGet->isAnySecondaryNamespaceAViewOrSharded();
+ } else {
+ return _autoGetLockFree->isAnySecondaryNamespaceAViewOrSharded();
+ }
+}
+
template <typename AutoGetCollectionForReadType>
AutoGetCollectionForReadCommandBase<AutoGetCollectionForReadType>::
AutoGetCollectionForReadCommandBase(
diff --git a/src/mongo/db/db_raii.h b/src/mongo/db/db_raii.h
index 55e96aea833..63bdf8c621d 100644
--- a/src/mongo/db/db_raii.h
+++ b/src/mongo/db/db_raii.h
@@ -184,10 +184,6 @@ public:
Date_t deadline = Date_t::max(),
const std::vector<NamespaceStringOrUUID>& secondaryNssOrUUIDs = {});
- Database* getDb() const {
- return _autoColl->getDb();
- }
-
/**
* Indicates whether any namespace in 'secondaryNssOrUUIDs' is a view or sharded.
*
@@ -315,6 +311,7 @@ public:
const CollectionPtr& getCollection() const;
const ViewDefinition* getView() const;
const NamespaceString& getNss() const;
+ bool isAnySecondaryNamespaceAViewOrSharded() const;
private:
boost::optional<AutoGetCollectionForRead> _autoGet;
@@ -389,10 +386,6 @@ public:
const std::vector<NamespaceStringOrUUID>& secondaryNssOrUUIDs = {})
: AutoGetCollectionForReadCommandBase(
opCtx, nsOrUUID, viewMode, deadline, logMode, secondaryNssOrUUIDs) {}
-
- Database* getDb() const {
- return _autoCollForRead.getDb();
- }
};
/**
diff --git a/src/mongo/db/db_raii_multi_collection_test.cpp b/src/mongo/db/db_raii_multi_collection_test.cpp
index 1bbf3df0f62..cc2ce8c100f 100644
--- a/src/mongo/db/db_raii_multi_collection_test.cpp
+++ b/src/mongo/db/db_raii_multi_collection_test.cpp
@@ -27,11 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
-#include <string>
-
#include "mongo/db/catalog/catalog_test_fixture.h"
#include "mongo/db/client.h"
#include "mongo/db/concurrency/lock_state.h"
@@ -41,7 +36,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -82,10 +76,11 @@ public:
const NamespaceString _primaryNss = NamespaceString("db1.primary1");
const NamespaceString _secondaryNss1 = NamespaceString("db1.secondary1");
const NamespaceString _secondaryNss2 = NamespaceString("db1.secondary2");
- const NamespaceString _secondaryNssOtherDbNss = NamespaceString("db2.secondary1");
const std::vector<NamespaceStringOrUUID> _secondaryNssOrUUIDVec = {
NamespaceStringOrUUID(_secondaryNss1), NamespaceStringOrUUID(_secondaryNss2)};
+
+ const NamespaceString _secondaryNssOtherDbNss = NamespaceString("db2.secondary1");
const std::vector<NamespaceStringOrUUID> _secondaryNssOtherDbNssVec = {
NamespaceStringOrUUID(_secondaryNssOtherDbNss)};
@@ -235,34 +230,6 @@ TEST_F(AutoGetCollectionMultiTest, LockFreeMultiDBs) {
_secondaryNssOtherDbNss));
}
-TEST_F(AutoGetCollectionMultiTest, LockedMultiDBs) {
- auto opCtx1 = _client1.second.get();
-
- createCollections(opCtx1);
-
- AutoGetCollectionForRead autoGet(opCtx1,
- NamespaceStringOrUUID(_primaryNss),
- AutoGetCollectionViewMode::kViewsForbidden,
- Date_t::max(),
- _secondaryNssOtherDbNssVec);
-
- auto locker = opCtx1->lockState();
- locker->dump();
- invariant(locker->isLockHeldForMode(resourceIdGlobal, MODE_IS));
- invariant(locker->isDbLockedForMode(_primaryNss.db(), MODE_IS));
- invariant(locker->isDbLockedForMode(_secondaryNssOtherDbNss.db(), MODE_IS));
- // Set 'shouldConflictWithSecondaryBatchApplication' to true so isCollectionLockedForMode()
- // doesn't return true regardless of what locks are held.
- opCtx1->lockState()->setShouldConflictWithSecondaryBatchApplication(true);
- invariant(locker->isCollectionLockedForMode(_primaryNss, MODE_IS));
- invariant(locker->isCollectionLockedForMode(_secondaryNssOtherDbNss, MODE_IS));
-
- const auto& coll = autoGet.getCollection();
- ASSERT(coll);
- ASSERT(CollectionCatalog::get(opCtx1)->lookupCollectionByNamespace(opCtx1,
- _secondaryNssOtherDbNss));
-}
-
TEST_F(AutoGetCollectionMultiTest, LockFreeSecondaryNamespaceNotFoundIsOK) {
auto opCtx1 = _client1.second.get();
@@ -287,7 +254,7 @@ TEST_F(AutoGetCollectionMultiTest, LockedSecondaryNamespaceNotFound) {
NamespaceStringOrUUID(_primaryNss),
AutoGetCollectionViewMode::kViewsForbidden,
Date_t::max(),
- _secondaryNssOrUUIDAllVec);
+ _secondaryNssOrUUIDVec);
auto locker = opCtx1->lockState();
@@ -301,9 +268,9 @@ TEST_F(AutoGetCollectionMultiTest, LockedSecondaryNamespaceNotFound) {
invariant(locker->isDbLockedForMode(_primaryNss.db(), MODE_IS));
invariant(locker->isCollectionLockedForMode(_primaryNss, MODE_IS));
- for (const auto& secondaryNss : _secondaryNamespacesAll) {
+ for (const auto& secondaryNss : _secondaryNssOrUUIDVec) {
invariant(locker->isDbLockedForMode(secondaryNss.db(), MODE_IS));
- invariant(locker->isCollectionLockedForMode(secondaryNss, MODE_IS));
+ invariant(locker->isCollectionLockedForMode(*secondaryNss.nss(), MODE_IS));
}
const auto& coll = autoGet.getCollection();
diff --git a/src/mongo/db/dbdirectclient.cpp b/src/mongo/db/dbdirectclient.cpp
index 1304b97ad27..de53dd33bed 100644
--- a/src/mongo/db/dbdirectclient.cpp
+++ b/src/mongo/db/dbdirectclient.cpp
@@ -148,10 +148,11 @@ void DBDirectClient::say(Message& toSend, bool isRetry, string* actualServer) {
}
std::unique_ptr<DBClientCursor> DBDirectClient::find(FindCommandRequest findRequest,
- const ReadPreferenceSetting& readPref) {
+ const ReadPreferenceSetting& readPref,
+ ExhaustMode exhaustMode) {
invariant(!findRequest.getReadConcern(),
"passing readConcern to DBDirectClient::find() is not supported");
- return DBClientBase::find(std::move(findRequest), readPref);
+ return DBClientBase::find(std::move(findRequest), readPref, exhaustMode);
}
write_ops::FindAndModifyCommandReply DBDirectClient::findAndModify(
diff --git a/src/mongo/db/dbdirectclient.h b/src/mongo/db/dbdirectclient.h
index 7c8e89d0bc2..e47b6b50ec8 100644
--- a/src/mongo/db/dbdirectclient.h
+++ b/src/mongo/db/dbdirectclient.h
@@ -58,7 +58,8 @@ public:
using DBClientBase::update;
std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
- const ReadPreferenceSetting& readPref) override;
+ const ReadPreferenceSetting& readPref,
+ ExhaustMode exhaustMode) override;
write_ops::FindAndModifyCommandReply findAndModify(
const write_ops::FindAndModifyCommandRequest& findAndModify);
diff --git a/src/mongo/db/dbdirectclient_test.cpp b/src/mongo/db/dbdirectclient_test.cpp
index 19ba4c35e86..ab15c3cd7fb 100644
--- a/src/mongo/db/dbdirectclient_test.cpp
+++ b/src/mongo/db/dbdirectclient_test.cpp
@@ -171,9 +171,9 @@ TEST_F(DBDirectClientTest, ExhaustQuery) {
ASSERT_FALSE(insertReply.getWriteErrors());
// The query should work even though exhaust mode is requested.
- int batchSize = 2;
- auto cursor = client.query_DEPRECATED(
- kNs, BSONObj{}, Query{}, 0 /*limit*/, 0 /*skip*/, nullptr, QueryOption_Exhaust, batchSize);
+ FindCommandRequest findCmd{kNs};
+ findCmd.setBatchSize(2);
+ auto cursor = client.find(std::move(findCmd), ReadPreferenceSetting{}, ExhaustMode::kOn);
ASSERT_EQ(cursor->itcount(), numDocs);
}
diff --git a/src/mongo/db/dbhelpers.cpp b/src/mongo/db/dbhelpers.cpp
index 40375b58ddb..4afc53b4840 100644
--- a/src/mongo/db/dbhelpers.cpp
+++ b/src/mongo/db/dbhelpers.cpp
@@ -137,14 +137,11 @@ RecordId Helpers::findOne(OperationContext* opCtx,
}
bool Helpers::findById(OperationContext* opCtx,
- Database* database,
StringData ns,
BSONObj query,
BSONObj& result,
bool* nsFound,
bool* indexFound) {
- invariant(database);
-
// TODO ForRead?
NamespaceString nss{ns};
CollectionPtr collection =
diff --git a/src/mongo/db/dbhelpers.h b/src/mongo/db/dbhelpers.h
index b975bceaf21..ecb7081f29e 100644
--- a/src/mongo/db/dbhelpers.h
+++ b/src/mongo/db/dbhelpers.h
@@ -88,7 +88,6 @@ struct Helpers {
* Returns true if a matching document was found.
*/
static bool findById(OperationContext* opCtx,
- Database* db,
StringData ns,
BSONObj query,
BSONObj& result,
diff --git a/src/mongo/db/dbmessage.h b/src/mongo/db/dbmessage.h
index 1f5472b2272..0b8e8ce84c7 100644
--- a/src/mongo/db/dbmessage.h
+++ b/src/mongo/db/dbmessage.h
@@ -227,7 +227,7 @@ public:
* Indicates whether this message is expected to have a ns.
*/
bool messageShouldHaveNs() const {
- return (_msg.operation() >= dbUpdate) & (_msg.operation() <= dbDelete);
+ return static_cast<int>(_msg.operation() >= dbUpdate) & (_msg.operation() <= dbDelete);
}
/**
diff --git a/src/mongo/db/dollar_tenant_decoration_test.cpp b/src/mongo/db/dollar_tenant_decoration_test.cpp
deleted file mode 100644
index 391250a1791..00000000000
--- a/src/mongo/db/dollar_tenant_decoration_test.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/**
- * Copyright (C) 2022-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/bson/oid.h"
-#include "mongo/db/auth/authorization_manager_impl.h"
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/authorization_session_impl.h"
-#include "mongo/db/auth/authz_manager_external_state_mock.h"
-#include "mongo/db/auth/security_token.h"
-#include "mongo/db/multitenancy.h"
-#include "mongo/db/multitenancy_gen.h"
-#include "mongo/db/service_context_test_fixture.h"
-#include "mongo/unittest/unittest.h"
-
-namespace mongo {
-
-/**
- * Encapsulation thwarting helper for authorizing a user without
- * having to set up any externalstate mocks or transport layers.
- */
-class AuthorizationSessionImplTestHelper {
-public:
- /**
- * Synthesize a user with the useTenant privilege and add them to the authorization session.
- */
- static void grantUseTenant(OperationContext* opCtx) {
- User user(UserName("useTenant", "admin"));
- user.setPrivileges(
- {Privilege(ResourcePattern::forClusterResource(), ActionType::useTenant)});
- auto* as =
- dynamic_cast<AuthorizationSessionImpl*>(AuthorizationSession::get(opCtx->getClient()));
- if (as->_authenticatedUser != boost::none) {
- as->logoutAllDatabases(opCtx->getClient(), "AuthorizationSessionImplTestHelper"_sd);
- }
- as->_authenticatedUser = std::move(user);
- as->_authenticationMode = AuthorizationSession::AuthenticationMode::kConnection;
- as->_updateInternalAuthorizationState();
- }
-};
-
-namespace {
-
-class DollarTenantDecorationTest : public ScopedGlobalServiceContextForTest, public unittest::Test {
-protected:
- void setUp() final {
- auto authzManagerState = std::make_unique<AuthzManagerExternalStateMock>();
- auto authzManager = std::make_unique<AuthorizationManagerImpl>(
- getServiceContext(), std::move(authzManagerState));
- authzManager->setAuthEnabled(true);
- AuthorizationManager::set(getServiceContext(), std::move(authzManager));
-
- client = getServiceContext()->makeClient("test");
- opCtxPtr = getServiceContext()->makeOperationContext(client.get());
- opCtx = opCtxPtr.get();
- }
-
- BSONObj makeSecurityToken(const UserName& userName) {
- constexpr auto authUserFieldName = auth::SecurityToken::kAuthenticatedUserFieldName;
- auto authUser = userName.toBSON(true /* serialize token */);
- ASSERT_EQ(authUser["tenant"_sd].type(), jstOID);
- return auth::signSecurityToken(BSON(authUserFieldName << authUser));
- }
-
- ServiceContext::UniqueClient client;
- ServiceContext::UniqueOperationContext opCtxPtr;
- OperationContext* opCtx;
-};
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantFromRequestSecurityTokenAlreadySet) {
- gMultitenancySupport = true;
-
- // Ensure the security token is set on the opCtx.
- const auto kTenantId = TenantId(OID::gen());
- auto token = makeSecurityToken(UserName("user", "admin", kTenantId));
- auth::readSecurityTokenMetadata(opCtx, token);
- ASSERT(getActiveTenant(opCtx));
- ASSERT_EQ(*getActiveTenant(opCtx), kTenantId);
-
- // Grant authorization to set $tenant.
- AuthorizationSessionImplTestHelper::grantUseTenant(opCtx);
-
- // The dollarTenantDecoration should not be set because the security token is already set.
- const auto kTenantParameter = OID::gen();
- auto opMsgRequest = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kTenantParameter));
- ASSERT_THROWS_CODE(
- parseDollarTenantFromRequest(opCtx, opMsgRequest), AssertionException, 6223901);
-
- // getActiveTenant should still return the tenantId in the security token.
- ASSERT(getActiveTenant(opCtx));
- ASSERT_EQ(*getActiveTenant(opCtx), kTenantId);
-}
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantFromRequestUnauthorized) {
- gMultitenancySupport = true;
- const auto kOid = OID::gen();
-
- // We are not authenticated at all.
- auto opMsgRequest = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kOid));
- ASSERT_THROWS_CODE(parseDollarTenantFromRequest(opCtx, opMsgRequest),
- AssertionException,
- ErrorCodes::Unauthorized);
- ASSERT(!getActiveTenant(opCtx));
-}
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantMultitenancySupportDisabled) {
- gMultitenancySupport = false;
- const auto kOid = OID::gen();
-
- // Grant authorization to set $tenant.
- AuthorizationSessionImplTestHelper::grantUseTenant(opCtx);
-
- // TenantId is passed as the '$tenant' parameter. "multitenancySupport" is disabled, so we
- // should throw when attempting to set this tenantId on the opCtx.
- auto opMsgRequestParameter = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kOid));
- ASSERT_THROWS_CODE(parseDollarTenantFromRequest(opCtx, opMsgRequestParameter),
- AssertionException,
- ErrorCodes::InvalidOptions);
- ASSERT(!getActiveTenant(opCtx));
-}
-
-TEST_F(DollarTenantDecorationTest, ParseDollarTenantFromRequestSuccess) {
- gMultitenancySupport = true;
- const auto kOid = OID::gen();
-
- // Grant authorization to set $tenant.
- AuthorizationSessionImplTestHelper::grantUseTenant(opCtx);
-
- // The tenantId should be successfully set because "multitenancySupport" is enabled and we're
- // authorized.
- auto opMsgRequest = OpMsgRequest::fromDBAndBody("test", BSON("$tenant" << kOid));
- parseDollarTenantFromRequest(opCtx, opMsgRequest);
-
- auto tenantId = getActiveTenant(opCtx);
- ASSERT(tenantId);
- ASSERT_EQ(tenantId->toString(), kOid.toString());
-}
-
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/exec/add_fields_projection_executor.cpp b/src/mongo/db/exec/add_fields_projection_executor.cpp
index 592074b4834..a0fd7f08580 100644
--- a/src/mongo/db/exec/add_fields_projection_executor.cpp
+++ b/src/mongo/db/exec/add_fields_projection_executor.cpp
@@ -92,38 +92,6 @@ private:
// The original object. Used to generate more helpful error messages.
const BSONObj& _rawObj;
- // Custom comparator that orders fieldpath strings by path prefix first, then by field.
- struct PathPrefixComparator {
- static constexpr char dot = '.';
-
- // Returns true if the lhs value should sort before the rhs, false otherwise.
- bool operator()(const std::string& lhs, const std::string& rhs) const {
- for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
- auto &lchar = lhs[pos], &rchar = rhs[pos];
- if (lchar == rchar) {
- continue;
- }
-
- // Consider the path delimiter '.' as being less than all other characters, so that
- // paths sort directly before any paths they prefix and directly after any paths
- // which prefix them.
- if (lchar == dot) {
- return true;
- } else if (rchar == dot) {
- return false;
- }
-
- // Otherwise, default to normal character comparison.
- return lchar < rchar;
- }
-
- // If we get here, then we have reached the end of lhs and/or rhs and all of their path
- // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
- // and should sort before it.
- return lhs.size() < rhs.size();
- }
- };
-
// Tracks which paths we've seen to ensure no two paths conflict with each other.
std::set<std::string, PathPrefixComparator> _seenPaths;
};
diff --git a/src/mongo/db/exec/batched_delete_stage.cpp b/src/mongo/db/exec/batched_delete_stage.cpp
index 436aedc5232..588fdfe2b23 100644
--- a/src/mongo/db/exec/batched_delete_stage.cpp
+++ b/src/mongo/db/exec/batched_delete_stage.cpp
@@ -257,6 +257,16 @@ PlanStage::StageState BatchedDeleteStage::_deleteBatch(WorkingSetID* out) {
timeInBatch = _commitBatch(out, &recordsToSkip, &docsDeleted, &bufferOffset);
} catch (const WriteConflictException&) {
return _prepareToRetryDrainAfterWCE(out, recordsToSkip);
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+ if (ex->getVersionReceived() == ChunkVersion::IGNORED() && ex->getCriticalSectionSignal()) {
+ // If ChunkVersion is IGNORED and we encountered a critical section, then yield, wait
+ // for critical section to finish and then we'll resume the write from the point we had
+ // left. We do this to prevent large multi-writes from repeatedly failing due to
+ // StaleConfig and exhausting the mongos retry attempts.
+ planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+ return _prepareToRetryDrainAfterWCE(out, recordsToSkip);
+ }
+ throw;
}
incrementSSSMetricNoOverflow(batchedDeletesSSS.docs, docsDeleted);
diff --git a/src/mongo/db/exec/bucket_unpacker.cpp b/src/mongo/db/exec/bucket_unpacker.cpp
index 0651aae78ee..43ccca4a13a 100644
--- a/src/mongo/db/exec/bucket_unpacker.cpp
+++ b/src/mongo/db/exec/bucket_unpacker.cpp
@@ -240,29 +240,14 @@ std::unique_ptr<MatchExpression> createComparisonPredicate(
policy, matchExpr, "can't handle string comparison with a non-default collation"_sd);
}
- // We must avoid mapping predicates on the meta field onto the control field. These should be
- // mapped to the meta field instead.
- //
- // You might think these were handled earlier, by splitting the match expression into a
- // metadata-only part, and measurement/time-only part. However, splitting a $match into two
- // sequential $matches only works when splitting a conjunction. A predicate like
- // {$or: [ {a: 5}, {meta.b: 5} ]} cannot be split, and can't be metadata-only, so we have to
- // handle it here.
+ // This function only handles time and measurement predicates--not metadata.
if (bucketSpec.metaField() &&
(matchExprPath == bucketSpec.metaField().get() ||
expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) {
-
- if (haveComputedMetaField)
- return handleIneligible(policy, matchExpr, "can't handle a computed meta field");
-
- if (!includeMetaField)
- return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field");
-
- auto result = matchExpr->shallowClone();
- expression::applyRenamesToExpression(
- result.get(),
- {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}});
- return result;
+ tasserted(
+ 6707200,
+ str::stream() << "createComparisonPredicate() does not handle metadata predicates: "
+ << matchExpr);
}
// We must avoid mapping predicates on fields computed via $addFields or a computed $project.
@@ -456,6 +441,33 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
tassert(5916304, "BucketSpec::createPredicatesOnBucketLevelField nullptr", matchExpr);
+ // If we have a leaf predicate on a meta field, we can map it to the bucket's meta field.
+ // This includes comparisons such as $eq and $lte, as well as other non-comparison predicates
+ // such as $exists, $mod, or $elemMatch.
+ //
+ // Metadata predicates are partially handled earlier, by splitting the match expression into a
+ // metadata-only part, and measurement/time-only part. However, splitting a $match into two
+ // sequential $matches only works when splitting a conjunction. A predicate like
+ // {$or: [ {a: 5}, {meta.b: 5} ]} can't be split, and can't be metadata-only, so we have to
+ // handle it here.
+ const auto matchExprPath = matchExpr->path();
+ if (!matchExprPath.empty() && bucketSpec.metaField() &&
+ (matchExprPath == bucketSpec.metaField().get() ||
+ expression::isPathPrefixOf(bucketSpec.metaField().get(), matchExprPath))) {
+
+ if (haveComputedMetaField)
+ return handleIneligible(policy, matchExpr, "can't handle a computed meta field");
+
+ if (!includeMetaField)
+ return handleIneligible(policy, matchExpr, "cannot handle an excluded meta field");
+
+ auto result = matchExpr->shallowClone();
+ expression::applyRenamesToExpression(
+ result.get(),
+ {{bucketSpec.metaField().get(), timeseries::kBucketMetaFieldName.toString()}});
+ return result;
+ }
+
if (matchExpr->matchType() == MatchExpression::AND) {
auto nextAnd = static_cast<const AndMatchExpression*>(matchExpr);
auto andMatchExpr = std::make_unique<AndMatchExpression>();
@@ -606,7 +618,7 @@ std::unique_ptr<MatchExpression> BucketSpec::createPredicatesOnBucketLevelField(
return handleIneligible(policy, matchExpr, "can't handle this predicate");
}
-BSONObj BucketSpec::pushdownPredicate(
+std::pair<bool, BSONObj> BucketSpec::pushdownPredicate(
const boost::intrusive_ptr<ExpressionContext>& expCtx,
const TimeseriesOptions& tsOptions,
ExpressionContext::CollationMatchesDefault collationMatchesDefault,
@@ -666,7 +678,7 @@ BSONObj BucketSpec::pushdownPredicate(
metaOnlyPredicate->serialize(&result);
if (bucketMetricPredicate)
bucketMetricPredicate->serialize(&result);
- return result.obj();
+ return std::make_pair(bucketMetricPredicate.get(), result.obj());
}
class BucketUnpacker::UnpackingImpl {
diff --git a/src/mongo/db/exec/bucket_unpacker.h b/src/mongo/db/exec/bucket_unpacker.h
index 287bd9f2540..7e32629407d 100644
--- a/src/mongo/db/exec/bucket_unpacker.h
+++ b/src/mongo/db/exec/bucket_unpacker.h
@@ -167,8 +167,11 @@ public:
*
* When using IneligiblePredicatePolicy::kIgnore, if the predicate can't be pushed down, it
* returns null. When using IneligiblePredicatePolicy::kError it raises a user error.
+ *
+ * Returns a boolean (alongside the bucket-level predicate) describing if the result contains
+ * a metric predicate.
*/
- static BSONObj pushdownPredicate(
+ static std::pair<bool, BSONObj> pushdownPredicate(
const boost::intrusive_ptr<ExpressionContext>& expCtx,
const TimeseriesOptions& tsOptions,
ExpressionContext::CollationMatchesDefault collationMatchesDefault,
diff --git a/src/mongo/db/exec/bucket_unpacker_test.cpp b/src/mongo/db/exec/bucket_unpacker_test.cpp
index 8ee0f4e05f5..ce5065318be 100644
--- a/src/mongo/db/exec/bucket_unpacker_test.cpp
+++ b/src/mongo/db/exec/bucket_unpacker_test.cpp
@@ -220,7 +220,8 @@ TEST_F(BucketUnpackerTest, ExcludeASingleField) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) {
@@ -246,7 +247,8 @@ TEST_F(BucketUnpackerTest, EmptyIncludeGetsEmptyMeasurements) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) {
@@ -274,7 +276,8 @@ TEST_F(BucketUnpackerTest, EmptyExcludeMaterializesAllFields) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther) {
@@ -300,7 +303,8 @@ TEST_F(BucketUnpackerTest, SparseColumnsWhereOneColumnIsExhaustedBeforeTheOther)
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) {
@@ -329,7 +333,8 @@ TEST_F(BucketUnpackerTest, UnpackBasicIncludeWithDollarPrefix) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) {
@@ -354,7 +359,8 @@ TEST_F(BucketUnpackerTest, BucketsWithMetadataOnly) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, UnorderedRowKeysDoesntAffectMaterialization) {
@@ -412,7 +418,8 @@ TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadata) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, MissingMetaFieldDoesntMaterializeMetadataUnorderedKeys) {
@@ -467,7 +474,8 @@ TEST_F(BucketUnpackerTest, ExcludedMetaFieldDoesntMaterializeMetadataWhenBucketH
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) {
@@ -486,7 +494,8 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUndefinedMeta) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) {
@@ -506,7 +515,8 @@ TEST_F(BucketUnpackerTest, UnpackerResetThrowsOnUnexpectedMeta) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) {
@@ -533,7 +543,8 @@ TEST_F(BucketUnpackerTest, NullMetaInBucketMaterializesAsNull) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) {
@@ -565,7 +576,8 @@ TEST_F(BucketUnpackerTest, GetNextHandlesMissingMetaInBucket) {
};
test(bucket);
- test(*timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket);
+ test(*timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket);
}
TEST_F(BucketUnpackerTest, EmptyDataRegionInBucketIsTolerated) {
@@ -887,7 +899,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountLess) {
"a:{'0':1, '1':2}, b:{'1':1}}}");
auto compressedBucket =
- timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+ timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket;
// Reduce the count by one to be 1.
auto modifiedCompressedBucket = modifyCompressedBucketElementCount(*compressedBucket, -1);
@@ -922,7 +935,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountMore) {
"a:{'0':1, '1':2}, b:{'1':1}}}");
auto compressedBucket =
- timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+ timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket;
// Increase the count by one to be 3.
auto modifiedCompressedBucket = modifyCompressedBucketElementCount(*compressedBucket, 1);
@@ -957,7 +971,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedCountMissing) {
"a:{'0':1, '1':2}, b:{'1':1}}}");
auto compressedBucket =
- timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+ timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket;
// Remove the count field
auto modifiedCompressedBucket = modifyCompressedBucketElementCount(*compressedBucket, 0);
@@ -993,7 +1008,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchDataField) {
"a:{'0':1, '1':2}, b:{'1':1}}}");
auto compressedBucket =
- timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+ timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket;
// Remove an element in the "a" field.
auto modifiedCompressedBucket =
modifyCompressedBucketRemoveLastInField(*compressedBucket, "a"_sd);
@@ -1028,7 +1044,8 @@ TEST_F(BucketUnpackerTest, TamperedCompressedElementMismatchTimeField) {
"a:{'0':1, '1':2}, b:{'1':1}}}");
auto compressedBucket =
- timeseries::compressBucket(bucket, "time"_sd, {}, false).compressedBucket;
+ timeseries::compressBucket(bucket, "time"_sd, {}, /*eligibleForReopening=*/false, false)
+ .compressedBucket;
// Remove an element in the time field
auto modifiedCompressedBucket =
modifyCompressedBucketRemoveLastInField(*compressedBucket, "time"_sd);
diff --git a/src/mongo/db/exec/collection_scan.cpp b/src/mongo/db/exec/collection_scan.cpp
index b8f0df82dcd..19d8033d3f1 100644
--- a/src/mongo/db/exec/collection_scan.cpp
+++ b/src/mongo/db/exec/collection_scan.cpp
@@ -80,7 +80,7 @@ CollectionScan::CollectionScan(ExpressionContext* expCtx,
// The 'minRecord' and 'maxRecord' parameters are used for a special optimization that
// applies only to forwards scans of the oplog and scans on clustered collections.
invariant(!params.resumeAfterRecordId);
- if (collection->ns().isOplog()) {
+ if (collection->ns().isOplogOrChangeCollection()) {
invariant(params.direction == CollectionScanParams::FORWARD);
} else {
invariant(collection->isClustered());
@@ -109,17 +109,26 @@ CollectionScan::CollectionScan(ExpressionContext* expCtx,
"collection scan bounds",
"min"_attr = (!_params.minRecord) ? "none" : _params.minRecord->toString(),
"max"_attr = (!_params.maxRecord) ? "none" : _params.maxRecord->toString());
- invariant(!_params.shouldTrackLatestOplogTimestamp || collection->ns().isOplog());
-
- if (params.assertTsHasNotFallenOffOplog) {
- invariant(params.shouldTrackLatestOplogTimestamp);
- invariant(params.direction == CollectionScanParams::FORWARD);
+ tassert(6521000,
+ "Expected an oplog or a change collection with 'shouldTrackLatestOplogTimestamp'",
+ !_params.shouldTrackLatestOplogTimestamp ||
+ collection->ns().isOplogOrChangeCollection());
+
+ if (params.assertTsHasNotFallenOff) {
+ tassert(6521001,
+ "Expected 'shouldTrackLatestOplogTimestamp' with 'assertTsHasNotFallenOff'",
+ params.shouldTrackLatestOplogTimestamp);
+ tassert(6521002,
+ "Expected forward collection scan with 'assertTsHasNotFallenOff'",
+ params.direction == CollectionScanParams::FORWARD);
}
if (params.resumeAfterRecordId) {
// The 'resumeAfterRecordId' parameter is used for resumable collection scans, which we
// only support in the forward direction.
- invariant(params.direction == CollectionScanParams::FORWARD);
+ tassert(6521003,
+ "Expected forward collection scan with 'resumeAfterRecordId'",
+ params.direction == CollectionScanParams::FORWARD);
}
}
@@ -227,8 +236,8 @@ PlanStage::StageState CollectionScan::doWork(WorkingSetID* out) {
}
_lastSeenId = record->id;
- if (_params.assertTsHasNotFallenOffOplog) {
- assertTsHasNotFallenOffOplog(*record);
+ if (_params.assertTsHasNotFallenOff) {
+ assertTsHasNotFallenOff(*record);
}
if (_params.shouldTrackLatestOplogTimestamp) {
setLatestOplogEntryTimestamp(*record);
@@ -259,22 +268,28 @@ void CollectionScan::setLatestOplogEntryTimestamp(const Record& record) {
_latestOplogEntryTimestamp = std::max(_latestOplogEntryTimestamp, tsElem.timestamp());
}
-void CollectionScan::assertTsHasNotFallenOffOplog(const Record& record) {
- // If the first entry we see in the oplog is the replset initialization, then it doesn't matter
- // if its timestamp is later than the timestamp that should not have fallen off the oplog; no
- // events earlier can have fallen off this oplog. Otherwise, verify that the timestamp of the
- // first observed oplog entry is earlier than or equal to timestamp that should not have fallen
- // off the oplog.
+void CollectionScan::assertTsHasNotFallenOff(const Record& record) {
auto oplogEntry = uassertStatusOK(repl::OplogEntry::parse(record.data.toBson()));
invariant(_specificStats.docsTested == 0);
+
+ // If the first entry we see in the oplog is the replset initialization, then it doesn't matter
+ // if its timestamp is later than the timestamp that should not have fallen off the oplog; no
+ // events earlier can have fallen off this oplog.
+ // NOTE: A change collection can be created at any moment as such it might not have replset
+ // initialization message, as such this case is not fully applicable for the change collection.
const bool isNewRS =
oplogEntry.getObject().binaryEqual(BSON("msg" << repl::kInitiatingSetMsg)) &&
oplogEntry.getOpType() == repl::OpTypeEnum::kNoop;
+
+ // Verify that the timestamp of the first observed oplog entry is earlier than or equal to
+ // timestamp that should not have fallen off the oplog.
+ const bool tsHasNotFallenOff = oplogEntry.getTimestamp() <= *_params.assertTsHasNotFallenOff;
+
uassert(ErrorCodes::OplogQueryMinTsMissing,
"Specified timestamp has already fallen off the oplog",
- isNewRS || oplogEntry.getTimestamp() <= *_params.assertTsHasNotFallenOffOplog);
+ isNewRS || tsHasNotFallenOff);
// We don't need to check this assertion again after we've confirmed the first oplog event.
- _params.assertTsHasNotFallenOffOplog = boost::none;
+ _params.assertTsHasNotFallenOff = boost::none;
}
namespace {
diff --git a/src/mongo/db/exec/collection_scan.h b/src/mongo/db/exec/collection_scan.h
index f9ce637dbad..a3737635ad6 100644
--- a/src/mongo/db/exec/collection_scan.h
+++ b/src/mongo/db/exec/collection_scan.h
@@ -117,7 +117,7 @@ private:
/**
* Asserts that the minimum timestamp in the query filter has not already fallen off the oplog.
*/
- void assertTsHasNotFallenOffOplog(const Record& record);
+ void assertTsHasNotFallenOff(const Record& record);
// WorkingSet is not owned by us.
WorkingSet* _workingSet;
diff --git a/src/mongo/db/exec/collection_scan_common.h b/src/mongo/db/exec/collection_scan_common.h
index ba5559a4491..a0e550a904d 100644
--- a/src/mongo/db/exec/collection_scan_common.h
+++ b/src/mongo/db/exec/collection_scan_common.h
@@ -98,7 +98,7 @@ struct CollectionScanParams {
bool tailable = false;
// Assert that the specified timestamp has not fallen off the oplog on a forward scan.
- boost::optional<Timestamp> assertTsHasNotFallenOffOplog = boost::none;
+ boost::optional<Timestamp> assertTsHasNotFallenOff = boost::none;
// Should we keep track of the timestamp of the latest oplog entry we've seen? This information
// is needed to merge cursors from the oplog in order of operation time when reading the oplog
diff --git a/src/mongo/db/exec/delete_stage.cpp b/src/mongo/db/exec/delete_stage.cpp
index 75ae33e9dc8..331a2293680 100644
--- a/src/mongo/db/exec/delete_stage.cpp
+++ b/src/mongo/db/exec/delete_stage.cpp
@@ -180,23 +180,38 @@ PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
bool writeToOrphan = false;
if (!_params->isExplain && !_params->fromMigrate) {
- const auto action = _preWriteFilter.computeAction(member->doc.value());
- if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
- LOGV2_DEBUG(5983201,
- 3,
- "Skipping delete operation to orphan document to prevent a wrong change "
- "stream event",
- "namespace"_attr = collection()->ns(),
- "record"_attr = member->doc.value());
- return PlanStage::NEED_TIME;
- } else if (action == write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
- LOGV2_DEBUG(6184700,
- 3,
- "Marking delete operation to orphan document with the fromMigrate flag "
- "to prevent a wrong change stream event",
- "namespace"_attr = collection()->ns(),
- "record"_attr = member->doc.value());
- writeToOrphan = true;
+ try {
+ const auto action = _preWriteFilter.computeAction(member->doc.value());
+ if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
+ LOGV2_DEBUG(
+ 5983201,
+ 3,
+ "Skipping delete operation to orphan document to prevent a wrong change "
+ "stream event",
+ "namespace"_attr = collection()->ns(),
+ "record"_attr = member->doc.value());
+ return PlanStage::NEED_TIME;
+ } else if (action == write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
+ LOGV2_DEBUG(6184700,
+ 3,
+ "Marking delete operation to orphan document with the fromMigrate flag "
+ "to prevent a wrong change stream event",
+ "namespace"_attr = collection()->ns(),
+ "record"_attr = member->doc.value());
+ writeToOrphan = true;
+ }
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+ if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+ ex->getCriticalSectionSignal()) {
+ // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+ // wait for the critical section to finish and then we'll resume the write from the
+ // point we had left. We do this to prevent large multi-writes from repeatedly
+ // failing due to StaleConfig and exhausting the mongos retry attempts.
+ planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+ memberFreer.dismiss(); // Keep this member around so we can retry deleting it.
+ return prepareToRetryWSM(id, out);
+ }
+ throw;
}
}
@@ -237,6 +252,18 @@ PlanStage::StageState DeleteStage::doWork(WorkingSetID* out) {
} catch (const WriteConflictException&) {
memberFreer.dismiss(); // Keep this member around so we can retry deleting it.
return prepareToRetryWSM(id, out);
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+ if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+ ex->getCriticalSectionSignal()) {
+ // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+ // wait for the critical section to finish and then we'll resume the write from the
+ // point we had left. We do this to prevent large multi-writes from repeatedly
+ // failing due to StaleConfig and exhausting the mongos retry attempts.
+ planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+ memberFreer.dismiss(); // Keep this member around so we can retry deleting it.
+ return prepareToRetryWSM(id, out);
+ }
+ throw;
}
}
_specificStats.docsDeleted += _params->numStatsForDoc ? _params->numStatsForDoc(bsonObjDoc) : 1;
diff --git a/src/mongo/db/exec/multi_plan.cpp b/src/mongo/db/exec/multi_plan.cpp
index 1db8860dc2e..0dbb0c4a405 100644
--- a/src/mongo/db/exec/multi_plan.cpp
+++ b/src/mongo/db/exec/multi_plan.cpp
@@ -46,6 +46,7 @@
#include "mongo/db/query/classic_plan_cache.h"
#include "mongo/db/query/collection_query_info.h"
#include "mongo/db/query/explain.h"
+#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/plan_cache_key_factory.h"
#include "mongo/db/query/plan_ranker.h"
#include "mongo/db/query/plan_ranker_util.h"
@@ -280,8 +281,12 @@ Status MultiPlanStage::pickBestPlan(PlanYieldPolicy* yieldPolicy) {
}
}
- plan_cache_util::updatePlanCache(
- expCtx()->opCtx, collection(), _cachingMode, *_query, std::move(ranking), _candidates);
+ plan_cache_util::updatePlanCache(expCtx()->opCtx,
+ MultipleCollectionAccessor(collection()),
+ _cachingMode,
+ *_query,
+ std::move(ranking),
+ _candidates);
return Status::OK();
}
diff --git a/src/mongo/db/exec/plan_cache_util.cpp b/src/mongo/db/exec/plan_cache_util.cpp
index 85d5c823849..a3fc5ff19d1 100644
--- a/src/mongo/db/exec/plan_cache_util.cpp
+++ b/src/mongo/db/exec/plan_cache_util.cpp
@@ -74,17 +74,17 @@ void logNotCachingNoData(std::string&& solution) {
} // namespace log_detail
void updatePlanCache(OperationContext* opCtx,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
const CanonicalQuery& query,
const QuerySolution& solution,
const sbe::PlanStage& root,
const stage_builder::PlanStageData& data) {
- // TODO SERVER-61507: Integration between lowering parts of aggregation pipeline into the find
- // subsystem and the new SBE cache isn't implemented yet. Remove cq->pipeline().empty() check
- // once it's implemented.
- if (shouldCacheQuery(query) && collection && query.pipeline().empty() &&
+ // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown is
+ // integrated with SBE plan cache.
+ if (shouldCacheQuery(query) && collections.getMainCollection() &&
+ canonical_query_encoder::canUseSbePlanCache(query) &&
feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
- auto key = plan_cache_key_factory::make<sbe::PlanCacheKey>(query, collection);
+ auto key = plan_cache_key_factory::make(query, collections);
auto plan = std::make_unique<sbe::CachedSbePlan>(root.clone(), data);
plan->indexFilterApplied = solution.indexFilterApplied;
sbe::getPlanCache(opCtx).setPinned(
diff --git a/src/mongo/db/exec/plan_cache_util.h b/src/mongo/db/exec/plan_cache_util.h
index 630458cbcd4..2fb16d8be89 100644
--- a/src/mongo/db/exec/plan_cache_util.h
+++ b/src/mongo/db/exec/plan_cache_util.h
@@ -32,6 +32,7 @@
#include "mongo/db/exec/plan_stats.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/collection_query_info.h"
+#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/plan_cache_debug_info.h"
#include "mongo/db/query/plan_cache_key_factory.h"
#include "mongo/db/query/plan_explainer_factory.h"
@@ -98,7 +99,7 @@ plan_cache_debug_info::DebugInfoSBE buildDebugInfo(const QuerySolution* solution
template <typename PlanStageType, typename ResultType, typename Data>
void updatePlanCache(
OperationContext* opCtx,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
PlanCachingMode cachingMode,
const CanonicalQuery& query,
std::unique_ptr<plan_ranker::PlanRankingDecision> ranking,
@@ -183,6 +184,7 @@ void updatePlanCache(
callbacks{query, buildDebugInfoFn};
winningPlan.solution->cacheData->indexFilterApplied =
winningPlan.solution->indexFilterApplied;
+ auto& collection = collections.getMainCollection();
uassertStatusOK(CollectionQueryInfo::get(collection)
.getPlanCache()
->set(plan_cache_key_factory::make<PlanCacheKey>(query, collection),
@@ -195,10 +197,10 @@ void updatePlanCache(
if (winningPlan.solution->cacheData != nullptr) {
if constexpr (std::is_same_v<PlanStageType, std::unique_ptr<sbe::PlanStage>>) {
- // TODO SERVER-61507: Integration between lowering parts of aggregation pipeline
- // into the find subsystem and the new SBE cache isn't implemented yet.
+ // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown
+ // is integrated with SBE plan cache.
if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
- query.pipeline().empty()) {
+ canonical_query_encoder::canUseSbePlanCache(query)) {
tassert(6142201,
"The winning CandidatePlan should contain the original plan",
winningPlan.clonedPlan);
@@ -215,16 +217,16 @@ void updatePlanCache(
plan_cache_debug_info::DebugInfoSBE>
callbacks{query, buildDebugInfoFn};
uassertStatusOK(sbe::getPlanCache(opCtx).set(
- plan_cache_key_factory::make<sbe::PlanCacheKey>(query, collection),
+ plan_cache_key_factory::make(query, collections),
std::move(cachedPlan),
*rankingDecision,
opCtx->getServiceContext()->getPreciseClockSource()->now(),
&callbacks,
boost::none /* worksGrowthCoefficient */));
} else {
- // TODO(SERVER-61507, SERVER-64882): Fall back to use the classic plan cache.
- // Remove this branch after "gFeatureFlagSbePlanCache" is removed and lowering
- // parts of pipeline is integrated with SBE cache.
+ // TODO(SERVER-64882, SERVER-61507): Fall back to use the classic plan cache.
+ // Remove this branch after "gFeatureFlagSbePlanCache" is removed and $group
+ // pushdown is integrated with SBE plan cache.
cacheClassicPlan();
}
} else {
@@ -245,7 +247,7 @@ void updatePlanCache(
* the cache, the plan immediately becomes "active".
*/
void updatePlanCache(OperationContext* opCtx,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
const CanonicalQuery& query,
const QuerySolution& solution,
const sbe::PlanStage& root,
diff --git a/src/mongo/db/exec/sbe/SConscript b/src/mongo/db/exec/sbe/SConscript
index 6a2503d7f26..6ee97450f2b 100644
--- a/src/mongo/db/exec/sbe/SConscript
+++ b/src/mongo/db/exec/sbe/SConscript
@@ -223,6 +223,7 @@ env.CppUnitTest(
'$BUILD_DIR/mongo/db/query/collation/collator_interface_mock',
'$BUILD_DIR/mongo/db/service_context_d_test_fixture',
'$BUILD_DIR/mongo/db/service_context_test_fixture',
+ '$BUILD_DIR/mongo/util/pcre_wrapper',
'sbe_plan_stage_test',
],
)
diff --git a/src/mongo/db/exec/sbe/abt/abt_lower.cpp b/src/mongo/db/exec/sbe/abt/abt_lower.cpp
index a706d150d61..25ea76bd8d0 100644
--- a/src/mongo/db/exec/sbe/abt/abt_lower.cpp
+++ b/src/mongo/db/exec/sbe/abt/abt_lower.cpp
@@ -990,9 +990,20 @@ std::unique_ptr<sbe::PlanStage> SBENodeLowering::walk(const IndexScanNode& n, co
generateSlots(fieldProjectionMap, ridSlot, rootSlot, fields, vars);
uassert(6624233, "Cannot deliver root projection in this context", !rootSlot.has_value());
+ std::vector<std::pair<size_t, sbe::value::SlotId>> indexVars;
sbe::IndexKeysInclusionSet indexKeysToInclude;
- for (const std::string& fieldName : fields) {
- indexKeysToInclude.set(decodeIndexKeyName(fieldName), true);
+
+ for (size_t index = 0; index < fields.size(); index++) {
+ const size_t indexFieldPos = decodeIndexKeyName(fields.at(index));
+ indexVars.emplace_back(indexFieldPos, vars.at(index));
+ indexKeysToInclude.set(indexFieldPos, true);
+ }
+
+ // Make sure vars are in sorted order on index field position.
+ std::sort(indexVars.begin(), indexVars.end());
+ vars.clear();
+ for (const auto& [indexFieldPos, slot] : indexVars) {
+ vars.push_back(slot);
}
auto lowerBoundExpr = convertBoundsToExpr(true /*isLower*/, indexDef, interval);
diff --git a/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp b/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp
index 252b7ce52b4..34aa0441994 100644
--- a/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp
+++ b/src/mongo/db/exec/sbe/abt/sbe_abt_diff_test.cpp
@@ -247,6 +247,14 @@ TEST_F(NodeSBE, DiffTest) {
ASSERT_TRUE(compare("[{$match: {'a': {$ne: 2}}}]",
{"{a: 1}", "{a: 2}", "{a: [1, 2]}", "{a: [1]}", "{a: [2]}"}));
+
+
+ ASSERT_TRUE(compare("[{$project: {concat: {$concat: ['$a', ' - ', '$b', ' - ', '$c']}}}]",
+ {"{a: 'a1', b: 'b1', c: 'c1'}"}));
+ ASSERT_TRUE(compare(
+ "[{$project: {res1: {$divide: ['$a', '$b']}, res2: {$divide: ['$c', '$a']}, res3: {$mod: "
+ "['$d', '$b']}, res4: {$abs: '$e'}, res5: {$floor: '$f'}, res6: {$ceil: {$ln: '$d'}}}}]",
+ {"{a: 5, b: 10, c: 20, d: 25, e: -5, f: 2.4}"}));
}
} // namespace
diff --git a/src/mongo/db/exec/sbe/expressions/expression.cpp b/src/mongo/db/exec/sbe/expressions/expression.cpp
index 61812667316..970543b706a 100644
--- a/src/mongo/db/exec/sbe/expressions/expression.cpp
+++ b/src/mongo/db/exec/sbe/expressions/expression.cpp
@@ -582,6 +582,11 @@ static stdx::unordered_map<std::string, InstrFn> kInstrFunctions = {
{"collMin", InstrFn{[](size_t n) { return n == 2; }, &vm::CodeFragment::appendCollMin, true}},
{"collMax", InstrFn{[](size_t n) { return n == 2; }, &vm::CodeFragment::appendCollMax, true}},
{"mod", InstrFn{[](size_t n) { return n == 2; }, &vm::CodeFragment::appendMod, false}},
+ // Note that we do not provide a pointer to a function for appending the 'applyClassicMatcher'
+ // instruction, because it's required that the first argument to applyClassicMatcher be a
+ // constant MatchExpression. This constant is stored as part of the bytecode itself, to avoid
+ // the stack manipulation overhead.
+ {"applyClassicMatcher", InstrFn{[](size_t n) { return n == 2; }, nullptr, false}},
};
} // namespace
@@ -689,6 +694,18 @@ vm::CodeFragment EFunction::compileDirect(CompileCtx& ctx) const {
code.appendTraverseP(bodyPosition);
return code;
+ } else if (_name == "applyClassicMatcher") {
+ tassert(6681400,
+ "First argument to applyClassicMatcher must be constant",
+ _nodes[0]->as<EConstant>());
+ auto [matcherTag, matcherVal] = _nodes[0]->as<EConstant>()->getConstant();
+ tassert(6681409,
+ "First argument to applyClassicMatcher must be a classic matcher",
+ matcherTag == value::TypeTags::classicMatchExpresion);
+
+ code.append(_nodes[1]->compileDirect(ctx));
+ code.appendApplyClassicMatcher(value::getClassicMatchExpressionView(matcherVal));
+ return code;
}
// The order of evaluation is flipped for instruction functions. We may want to change the
diff --git a/src/mongo/db/exec/sbe/sbe_test.cpp b/src/mongo/db/exec/sbe/sbe_test.cpp
index 323368a5334..5a577f02462 100644
--- a/src/mongo/db/exec/sbe/sbe_test.cpp
+++ b/src/mongo/db/exec/sbe/sbe_test.cpp
@@ -31,6 +31,7 @@
#include "mongo/db/exec/sbe/values/value.h"
#include "mongo/db/exec/sbe/vm/vm.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/pcre.h"
namespace mongo::sbe {
@@ -421,6 +422,85 @@ TEST(SBEVM, ConvertBinDataToBsonObj) {
namespace {
+// The hex representation of memory addresses in the output of CodeFragment::toString() differs on
+// Linux and Windows machines so 'addrPattern' is used to cover both cases.
+static const std::string kLinuxAddrPattern{"(0x[a-f0-9]+)"};
+static const std::string kWindowsAddrPattern{"([A-F0-9]+)"};
+static const std::string kAddrPattern{"(" + kLinuxAddrPattern + "|" + kWindowsAddrPattern + ")"};
+
+// The beginning of the output from CodeFragment::toString() gives a range of the addresses that
+// 'pcPointer' will traverse.
+static const std::string kPcPointerRangePattern{"(\\[" + kAddrPattern + ")-(" + kAddrPattern +
+ ")\\])"};
+
+/**
+ * Creates a pcre pattern to match the instructions in the output of CodeFragment::toString(). Any
+ * arguments must be passed in a single comma separated string, and no arguments can be represented
+ * using an empty string.
+ */
+std::string instrPattern(std::string op, std::string args) {
+ return "(" + kAddrPattern + ": " + op + "\\(" + args + "\\); )";
+}
+} // namespace
+
+TEST(SBEVM, CodeFragmentToString) {
+ {
+ vm::CodeFragment code;
+ std::string toStringPattern{kPcPointerRangePattern + "( )"};
+
+ code.appendDiv();
+ toStringPattern += instrPattern("div", "");
+ code.appendMul();
+ toStringPattern += instrPattern("mul", "");
+ code.appendAdd();
+ toStringPattern += instrPattern("add", "");
+
+ std::string instrs = code.toString();
+
+ static const pcre::Regex validToStringOutput{toStringPattern};
+
+ ASSERT_TRUE(!!validToStringOutput.matchView(instrs));
+ }
+}
+
+TEST(SBEVM, CodeFragmentToStringArgs) {
+ {
+ vm::CodeFragment code;
+ std::string toStringPattern{kAddrPattern};
+
+ code.appendFillEmpty(vm::Instruction::True);
+ toStringPattern += instrPattern("fillEmptyConst", "k: True");
+ code.appendFillEmpty(vm::Instruction::Null);
+ toStringPattern += instrPattern("fillEmptyConst", "k: Null");
+ code.appendFillEmpty(vm::Instruction::False);
+ toStringPattern += instrPattern("fillEmptyConst", "k: False");
+
+ code.appendTraverseP(0xAA);
+ auto offsetP = 0xAA - code.instrs().size();
+ toStringPattern += instrPattern("traversePConst", "offset: " + std::to_string(offsetP));
+ code.appendTraverseF(0xBB, vm::Instruction::True);
+ auto offsetF = 0xBB - code.instrs().size();
+ toStringPattern +=
+ instrPattern("traverseFConst", "k: True, offset: " + std::to_string(offsetF));
+
+ auto [tag, val] = value::makeNewString("Hello world!");
+ value::ValueGuard guard{tag, val};
+ code.appendGetField(tag, val);
+ toStringPattern += instrPattern("getFieldConst", "value: \"Hello world!\"");
+
+ code.appendAdd();
+ toStringPattern += instrPattern("add", "");
+
+ std::string instrs = code.toString();
+
+ static const pcre::Regex validToStringOutput{toStringPattern};
+
+ ASSERT_TRUE(!!validToStringOutput.matchView(instrs));
+ }
+}
+
+namespace {
+
/**
* Fills bytes after the null terminator in the string with 'pattern'.
*
diff --git a/src/mongo/db/exec/sbe/stages/branch.cpp b/src/mongo/db/exec/sbe/stages/branch.cpp
index bec12b12ee2..adbbd533273 100644
--- a/src/mongo/db/exec/sbe/stages/branch.cpp
+++ b/src/mongo/db/exec/sbe/stages/branch.cpp
@@ -42,8 +42,9 @@ BranchStage::BranchStage(std::unique_ptr<PlanStage> inputThen,
value::SlotVector inputThenVals,
value::SlotVector inputElseVals,
value::SlotVector outputVals,
- PlanNodeId planNodeId)
- : PlanStage("branch"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("branch"_sd, planNodeId, participateInTrialRunTracking),
_filter(std::move(filter)),
_inputThenVals(std::move(inputThenVals)),
_inputElseVals(std::move(inputElseVals)),
@@ -61,7 +62,8 @@ std::unique_ptr<PlanStage> BranchStage::clone() const {
_inputThenVals,
_inputElseVals,
_outputVals,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void BranchStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/branch.h b/src/mongo/db/exec/sbe/stages/branch.h
index 67b5af8a517..df813e762a4 100644
--- a/src/mongo/db/exec/sbe/stages/branch.h
+++ b/src/mongo/db/exec/sbe/stages/branch.h
@@ -52,7 +52,8 @@ public:
value::SlotVector inputThenVals,
value::SlotVector inputElseVals,
value::SlotVector outputVals,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/bson_scan.cpp b/src/mongo/db/exec/sbe/stages/bson_scan.cpp
index c340071ba0e..3a4c3b50512 100644
--- a/src/mongo/db/exec/sbe/stages/bson_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/bson_scan.cpp
@@ -42,8 +42,9 @@ BSONScanStage::BSONScanStage(const char* bsonBegin,
boost::optional<value::SlotId> recordSlot,
std::vector<std::string> fields,
value::SlotVector vars,
- PlanNodeId planNodeId)
- : PlanStage("bsonscan"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("bsonscan"_sd, planNodeId, participateInTrialRunTracking),
_bsonBegin(bsonBegin),
_bsonEnd(bsonEnd),
_recordSlot(recordSlot),
@@ -52,8 +53,13 @@ BSONScanStage::BSONScanStage(const char* bsonBegin,
_bsonCurrent(bsonBegin) {}
std::unique_ptr<PlanStage> BSONScanStage::clone() const {
- return std::make_unique<BSONScanStage>(
- _bsonBegin, _bsonEnd, _recordSlot, _fields, _vars, _commonStats.nodeId);
+ return std::make_unique<BSONScanStage>(_bsonBegin,
+ _bsonEnd,
+ _recordSlot,
+ _fields,
+ _vars,
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void BSONScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/bson_scan.h b/src/mongo/db/exec/sbe/stages/bson_scan.h
index 7804bcd4149..79238f695a2 100644
--- a/src/mongo/db/exec/sbe/stages/bson_scan.h
+++ b/src/mongo/db/exec/sbe/stages/bson_scan.h
@@ -51,7 +51,8 @@ public:
boost::optional<value::SlotId> recordSlot,
std::vector<std::string> fields,
value::SlotVector vars,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.cpp b/src/mongo/db/exec/sbe/stages/check_bounds.cpp
index 483e9f50260..bc62b089005 100644
--- a/src/mongo/db/exec/sbe/stages/check_bounds.cpp
+++ b/src/mongo/db/exec/sbe/stages/check_bounds.cpp
@@ -39,8 +39,9 @@ CheckBoundsStage::CheckBoundsStage(std::unique_ptr<PlanStage> input,
value::SlotId inKeySlot,
value::SlotId inRecordIdSlot,
value::SlotId outSlot,
- PlanNodeId planNodeId)
- : PlanStage{"chkbounds"_sd, planNodeId},
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage{"chkbounds"_sd, planNodeId, participateInTrialRunTracking},
_params{std::move(params)},
_inKeySlot{inKeySlot},
_inRecordIdSlot{inRecordIdSlot},
@@ -49,8 +50,13 @@ CheckBoundsStage::CheckBoundsStage(std::unique_ptr<PlanStage> input,
}
std::unique_ptr<PlanStage> CheckBoundsStage::clone() const {
- return std::make_unique<CheckBoundsStage>(
- _children[0]->clone(), _params, _inKeySlot, _inRecordIdSlot, _outSlot, _commonStats.nodeId);
+ return std::make_unique<CheckBoundsStage>(_children[0]->clone(),
+ _params,
+ _inKeySlot,
+ _inRecordIdSlot,
+ _outSlot,
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void CheckBoundsStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/check_bounds.h b/src/mongo/db/exec/sbe/stages/check_bounds.h
index 29f52faa523..dbdf87938f7 100644
--- a/src/mongo/db/exec/sbe/stages/check_bounds.h
+++ b/src/mongo/db/exec/sbe/stages/check_bounds.h
@@ -76,7 +76,8 @@ public:
value::SlotId inKeySlot,
value::SlotId inRecordIdSlot,
value::SlotId outSlot,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/co_scan.cpp b/src/mongo/db/exec/sbe/stages/co_scan.cpp
index 73e89a5e87e..9666d03cf01 100644
--- a/src/mongo/db/exec/sbe/stages/co_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/co_scan.cpp
@@ -34,11 +34,14 @@
#include "mongo/db/exec/sbe/expressions/expression.h"
namespace mongo::sbe {
-CoScanStage::CoScanStage(PlanNodeId planNodeId, PlanYieldPolicy* yieldPolicy)
- : PlanStage("coscan"_sd, yieldPolicy, planNodeId) {}
+CoScanStage::CoScanStage(PlanNodeId planNodeId,
+ PlanYieldPolicy* yieldPolicy,
+ bool participateInTrialRunTracking)
+ : PlanStage("coscan"_sd, yieldPolicy, planNodeId, participateInTrialRunTracking) {}
std::unique_ptr<PlanStage> CoScanStage::clone() const {
- return std::make_unique<CoScanStage>(_commonStats.nodeId);
+ return std::make_unique<CoScanStage>(
+ _commonStats.nodeId, _yieldPolicy, _participateInTrialRunTracking);
}
void CoScanStage::prepare(CompileCtx& ctx) {}
value::SlotAccessor* CoScanStage::getAccessor(CompileCtx& ctx, value::SlotId slot) {
diff --git a/src/mongo/db/exec/sbe/stages/co_scan.h b/src/mongo/db/exec/sbe/stages/co_scan.h
index 4625b636a14..1f8c8d5404d 100644
--- a/src/mongo/db/exec/sbe/stages/co_scan.h
+++ b/src/mongo/db/exec/sbe/stages/co_scan.h
@@ -42,7 +42,9 @@ namespace mongo::sbe {
*/
class CoScanStage final : public PlanStage {
public:
- explicit CoScanStage(PlanNodeId, PlanYieldPolicy* yieldPolicy = nullptr);
+ explicit CoScanStage(PlanNodeId,
+ PlanYieldPolicy* yieldPolicy = nullptr,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.cpp b/src/mongo/db/exec/sbe/stages/column_scan.cpp
index 8058307a916..24f769fa2c7 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/column_scan.cpp
@@ -59,8 +59,9 @@ ColumnScanStage::ColumnScanStage(UUID collectionUuid,
std::vector<std::unique_ptr<EExpression>> pathExprs,
value::SlotId rowStoreSlot,
PlanYieldPolicy* yieldPolicy,
- PlanNodeId nodeId)
- : PlanStage("columnscan"_sd, yieldPolicy, nodeId),
+ PlanNodeId nodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("columnscan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
_collUuid(collectionUuid),
_columnIndexName(columnIndexName),
_fieldSlots(std::move(fieldSlots)),
@@ -89,7 +90,8 @@ std::unique_ptr<PlanStage> ColumnScanStage::clone() const {
std::move(pathExprs),
_rowStoreSlot,
_yieldPolicy,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void ColumnScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/column_scan.h b/src/mongo/db/exec/sbe/stages/column_scan.h
index d00d4641171..1efeef25bca 100644
--- a/src/mongo/db/exec/sbe/stages/column_scan.h
+++ b/src/mongo/db/exec/sbe/stages/column_scan.h
@@ -53,7 +53,8 @@ public:
std::vector<std::unique_ptr<EExpression>> pathExprs,
value::SlotId internalSlot,
PlanYieldPolicy* yieldPolicy,
- PlanNodeId nodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/exchange.cpp b/src/mongo/db/exec/sbe/stages/exchange.cpp
index 8cd7b065559..fdbb6531913 100644
--- a/src/mongo/db/exec/sbe/stages/exchange.cpp
+++ b/src/mongo/db/exec/sbe/stages/exchange.cpp
@@ -171,8 +171,9 @@ ExchangeConsumer::ExchangeConsumer(std::unique_ptr<PlanStage> input,
ExchangePolicy policy,
std::unique_ptr<EExpression> partition,
std::unique_ptr<EExpression> orderLess,
- PlanNodeId planNodeId)
- : PlanStage("exchange"_sd, planNodeId) {
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("exchange"_sd, planNodeId, participateInTrialRunTracking) {
_children.emplace_back(std::move(input));
_state = std::make_shared<ExchangeState>(
numOfProducers, std::move(fields), policy, std::move(partition), std::move(orderLess));
@@ -186,13 +187,16 @@ ExchangeConsumer::ExchangeConsumer(std::unique_ptr<PlanStage> input,
uassert(5922202, "partition expression must not be present", !_state->partitionExpr());
}
}
-ExchangeConsumer::ExchangeConsumer(std::shared_ptr<ExchangeState> state, PlanNodeId planNodeId)
- : PlanStage("exchange"_sd, planNodeId), _state(state) {
+ExchangeConsumer::ExchangeConsumer(std::shared_ptr<ExchangeState> state,
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("exchange"_sd, planNodeId, participateInTrialRunTracking), _state(state) {
_tid = _state->addConsumer(this);
_orderPreserving = _state->isOrderPreserving();
}
std::unique_ptr<PlanStage> ExchangeConsumer::clone() const {
- return std::make_unique<ExchangeConsumer>(_state, _commonStats.nodeId);
+ return std::make_unique<ExchangeConsumer>(
+ _state, _commonStats.nodeId, _participateInTrialRunTracking);
}
void ExchangeConsumer::prepare(CompileCtx& ctx) {
for (size_t idx = 0; idx < _state->fields().size(); ++idx) {
@@ -486,8 +490,9 @@ void ExchangeProducer::closePipes() {
ExchangeProducer::ExchangeProducer(std::unique_ptr<PlanStage> input,
std::shared_ptr<ExchangeState> state,
- PlanNodeId planNodeId)
- : PlanStage("exchangep"_sd, planNodeId), _state(state) {
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("exchangep"_sd, planNodeId, participateInTrialRunTracking), _state(state) {
_children.emplace_back(std::move(input));
_tid = _state->addProducer(this);
diff --git a/src/mongo/db/exec/sbe/stages/exchange.h b/src/mongo/db/exec/sbe/stages/exchange.h
index b94b4968f66..15928cd50fb 100644
--- a/src/mongo/db/exec/sbe/stages/exchange.h
+++ b/src/mongo/db/exec/sbe/stages/exchange.h
@@ -261,9 +261,12 @@ public:
ExchangePolicy policy,
std::unique_ptr<EExpression> partition,
std::unique_ptr<EExpression> orderLess,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
- ExchangeConsumer(std::shared_ptr<ExchangeState> state, PlanNodeId planNodeId);
+ ExchangeConsumer(std::shared_ptr<ExchangeState> state,
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
@@ -311,7 +314,8 @@ class ExchangeProducer final : public PlanStage {
public:
ExchangeProducer(std::unique_ptr<PlanStage> input,
std::shared_ptr<ExchangeState> state,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
static void start(OperationContext* opCtx,
CompileCtx& ctx,
diff --git a/src/mongo/db/exec/sbe/stages/filter.h b/src/mongo/db/exec/sbe/stages/filter.h
index 2120be1c062..059dd1c7ab4 100644
--- a/src/mongo/db/exec/sbe/stages/filter.h
+++ b/src/mongo/db/exec/sbe/stages/filter.h
@@ -58,16 +58,21 @@ class FilterStage final : public PlanStage {
public:
FilterStage(std::unique_ptr<PlanStage> input,
std::unique_ptr<EExpression> filter,
- PlanNodeId planNodeId)
- : PlanStage(IsConst ? "cfilter"_sd : (IsEof ? "efilter" : "filter"_sd), planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true)
+ : PlanStage(IsConst ? "cfilter"_sd : (IsEof ? "efilter" : "filter"_sd),
+ planNodeId,
+ participateInTrialRunTracking),
_filter(std::move(filter)) {
static_assert(!IsEof || !IsConst);
_children.emplace_back(std::move(input));
}
std::unique_ptr<PlanStage> clone() const final {
- return std::make_unique<FilterStage<IsConst, IsEof>>(
- _children[0]->clone(), _filter->clone(), _commonStats.nodeId);
+ return std::make_unique<FilterStage<IsConst, IsEof>>(_children[0]->clone(),
+ _filter->clone(),
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void prepare(CompileCtx& ctx) final {
diff --git a/src/mongo/db/exec/sbe/stages/hash_agg.cpp b/src/mongo/db/exec/sbe/stages/hash_agg.cpp
index e3fd62cb86b..f930d4b5e95 100644
--- a/src/mongo/db/exec/sbe/stages/hash_agg.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_agg.cpp
@@ -47,8 +47,9 @@ HashAggStage::HashAggStage(std::unique_ptr<PlanStage> input,
bool optimizedClose,
boost::optional<value::SlotId> collatorSlot,
bool allowDiskUse,
- PlanNodeId planNodeId)
- : PlanStage("group"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("group"_sd, planNodeId, participateInTrialRunTracking),
_gbs(std::move(gbs)),
_aggs(std::move(aggs)),
_collatorSlot(collatorSlot),
@@ -74,7 +75,8 @@ std::unique_ptr<PlanStage> HashAggStage::clone() const {
_optimizedClose,
_collatorSlot,
_allowDiskUse,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void HashAggStage::doSaveState(bool relinquishCursor) {
@@ -354,25 +356,18 @@ void HashAggStage::open(bool reOpen) {
key.reset(idx++, false, tag, val);
}
- if (!_recordStore) {
- // The memory limit hasn't been reached yet, accumulate state in '_ht'.
- auto [it, inserted] = _ht->try_emplace(std::move(key), value::MaterializedRow{0});
- if (inserted) {
- // Copy keys.
- const_cast<value::MaterializedRow&>(it->first).makeOwned();
- // Initialize accumulators.
- it->second.resize(_outAggAccessors.size());
- }
- // Always update the state in the '_ht' for the branch when data hasn't been
- // spilled to disk.
+
+ if (_htIt = _ht->find(key); !_recordStore && _htIt == _ht->end()) {
+ // The memory limit hasn't been reached yet, insert a new key in '_ht' by copying
+ // the key. Note as a future optimization, we should avoid the lookup in the find()
+ // call and the emplace.
+ key.makeOwned();
+ auto [it, _] = _ht->emplace(std::move(key), value::MaterializedRow{0});
+ // Initialize accumulators.
+ it->second.resize(_outAggAccessors.size());
_htIt = it;
- updateAggStateHt = true;
- } else {
- // The memory limit has been reached, accumulate state in '_ht' only if we
- // find the key in '_ht'.
- _htIt = _ht->find(key);
- updateAggStateHt = _htIt != _ht->end();
}
+ updateAggStateHt = _htIt != _ht->end();
if (updateAggStateHt) {
// Accumulate state in '_ht' by pointing the '_outAggAccessors' the
@@ -500,9 +495,9 @@ PlanState HashAggStage::getNext() {
KeyString::TypeBits::fromBuffer(KeyString::Version::kLatestVersion, &valReader);
_aggValueRecordStore = val;
- BufBuilder buf;
+ _aggKeyRSBuffer.reset();
_aggKeyRecordStore = value::MaterializedRow::deserializeFromKeyString(
- decodeKeyString(nextRecord->id, typeBits), &buf);
+ decodeKeyString(nextRecord->id, typeBits), &_aggKeyRSBuffer);
return trackPlanState(PlanState::ADVANCED);
} else {
_rsCursor.reset();
diff --git a/src/mongo/db/exec/sbe/stages/hash_agg.h b/src/mongo/db/exec/sbe/stages/hash_agg.h
index 8c117e8717d..d200c4b9c3d 100644
--- a/src/mongo/db/exec/sbe/stages/hash_agg.h
+++ b/src/mongo/db/exec/sbe/stages/hash_agg.h
@@ -75,7 +75,8 @@ public:
bool optimizedClose,
boost::optional<value::SlotId> collatorSlot,
bool allowDiskUse,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
@@ -184,6 +185,11 @@ private:
std::vector<std::unique_ptr<value::MaterializedSingleRowAccessor>> _outRecordStoreKeyAccessors;
std::vector<std::unique_ptr<value::MaterializedSingleRowAccessor>> _outRecordStoreAggAccessors;
+ // This buffer stores values for the spilled '_aggKeyRecordStore' that's loaded into memory from
+ // the '_recordStore'. Values in the '_aggKeyRecordStore' row are pointers that point to data in
+ // this buffer.
+ BufBuilder _aggKeyRSBuffer;
+
std::vector<value::SlotAccessor*> _seekKeysAccessors;
value::MaterializedRow _seekKeys;
diff --git a/src/mongo/db/exec/sbe/stages/hash_join.cpp b/src/mongo/db/exec/sbe/stages/hash_join.cpp
index 86675029c0e..bad53262acb 100644
--- a/src/mongo/db/exec/sbe/stages/hash_join.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_join.cpp
@@ -44,8 +44,9 @@ HashJoinStage::HashJoinStage(std::unique_ptr<PlanStage> outer,
value::SlotVector innerCond,
value::SlotVector innerProjects,
boost::optional<value::SlotId> collatorSlot,
- PlanNodeId planNodeId)
- : PlanStage("hj"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("hj"_sd, planNodeId, participateInTrialRunTracking),
_outerCond(std::move(outerCond)),
_outerProjects(std::move(outerProjects)),
_innerCond(std::move(innerCond)),
@@ -68,7 +69,8 @@ std::unique_ptr<PlanStage> HashJoinStage::clone() const {
_innerCond,
_innerProjects,
_collatorSlot,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void HashJoinStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/hash_join.h b/src/mongo/db/exec/sbe/stages/hash_join.h
index ed4781116d9..a3997074db0 100644
--- a/src/mongo/db/exec/sbe/stages/hash_join.h
+++ b/src/mongo/db/exec/sbe/stages/hash_join.h
@@ -66,7 +66,8 @@ public:
value::SlotVector innerCond,
value::SlotVector innerProjects,
boost::optional<value::SlotId> collatorSlot,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/hash_lookup.cpp b/src/mongo/db/exec/sbe/stages/hash_lookup.cpp
index a65f2f8bd89..16e61d68630 100644
--- a/src/mongo/db/exec/sbe/stages/hash_lookup.cpp
+++ b/src/mongo/db/exec/sbe/stages/hash_lookup.cpp
@@ -47,8 +47,9 @@ HashLookupStage::HashLookupStage(std::unique_ptr<PlanStage> outer,
value::SlotVector innerProjects,
value::SlotMap<std::unique_ptr<EExpression>> innerAggs,
boost::optional<value::SlotId> collatorSlot,
- PlanNodeId planNodeId)
- : PlanStage("hash_lookup"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("hash_lookup"_sd, planNodeId, participateInTrialRunTracking),
_outerCond(outerCond),
_innerCond(innerCond),
_innerProjects(innerProjects),
@@ -72,7 +73,8 @@ std::unique_ptr<PlanStage> HashLookupStage::clone() const {
_innerProjects,
std::move(innerAggs),
_collatorSlot,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void HashLookupStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/hash_lookup.h b/src/mongo/db/exec/sbe/stages/hash_lookup.h
index 2e3f0b34816..611c5603606 100644
--- a/src/mongo/db/exec/sbe/stages/hash_lookup.h
+++ b/src/mongo/db/exec/sbe/stages/hash_lookup.h
@@ -86,7 +86,8 @@ public:
value::SlotVector innerProjects,
value::SlotMap<std::unique_ptr<EExpression>> innerAggs,
boost::optional<value::SlotId> collatorSlot,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/ix_scan.cpp b/src/mongo/db/exec/sbe/stages/ix_scan.cpp
index 520e68fe074..1c4a54248dd 100644
--- a/src/mongo/db/exec/sbe/stages/ix_scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/ix_scan.cpp
@@ -81,7 +81,8 @@ std::unique_ptr<PlanStage> IndexScanStage::clone() const {
_seekKeyLow->clone(),
_seekKeyHigh->clone(),
_yieldPolicy,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void IndexScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/ix_scan.h b/src/mongo/db/exec/sbe/stages/ix_scan.h
index c57389b3434..3d1ae2eec78 100644
--- a/src/mongo/db/exec/sbe/stages/ix_scan.h
+++ b/src/mongo/db/exec/sbe/stages/ix_scan.h
@@ -85,7 +85,8 @@ public:
std::unique_ptr<EExpression> seekKeyLow,
std::unique_ptr<EExpression> seekKeyHigh,
PlanYieldPolicy* yieldPolicy,
- PlanNodeId nodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/limit_skip.cpp b/src/mongo/db/exec/sbe/stages/limit_skip.cpp
index 359355582ac..8343f56ca96 100644
--- a/src/mongo/db/exec/sbe/stages/limit_skip.cpp
+++ b/src/mongo/db/exec/sbe/stages/limit_skip.cpp
@@ -37,8 +37,9 @@ namespace mongo::sbe {
LimitSkipStage::LimitSkipStage(std::unique_ptr<PlanStage> input,
boost::optional<long long> limit,
boost::optional<long long> skip,
- PlanNodeId planNodeId)
- : PlanStage(!skip ? "limit"_sd : "limitskip"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage(!skip ? "limit"_sd : "limitskip"_sd, planNodeId, participateInTrialRunTracking),
_limit(limit),
_skip(skip),
_current(0),
@@ -51,7 +52,7 @@ LimitSkipStage::LimitSkipStage(std::unique_ptr<PlanStage> input,
std::unique_ptr<PlanStage> LimitSkipStage::clone() const {
return std::make_unique<LimitSkipStage>(
- _children[0]->clone(), _limit, _skip, _commonStats.nodeId);
+ _children[0]->clone(), _limit, _skip, _commonStats.nodeId, _participateInTrialRunTracking);
}
void LimitSkipStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/limit_skip.h b/src/mongo/db/exec/sbe/stages/limit_skip.h
index f0f62b34239..7fc366a2174 100644
--- a/src/mongo/db/exec/sbe/stages/limit_skip.h
+++ b/src/mongo/db/exec/sbe/stages/limit_skip.h
@@ -50,7 +50,8 @@ public:
LimitSkipStage(std::unique_ptr<PlanStage> input,
boost::optional<long long> limit,
boost::optional<long long> skip,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/loop_join.cpp b/src/mongo/db/exec/sbe/stages/loop_join.cpp
index 6c49f2e700a..3df5e179a09 100644
--- a/src/mongo/db/exec/sbe/stages/loop_join.cpp
+++ b/src/mongo/db/exec/sbe/stages/loop_join.cpp
@@ -41,7 +41,8 @@ LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
value::SlotVector outerProjects,
value::SlotVector outerCorrelated,
std::unique_ptr<EExpression> predicate,
- PlanNodeId nodeId)
+ PlanNodeId nodeId,
+ bool participateInTrialRunTracking)
: LoopJoinStage(std::move(outer),
std::move(inner),
std::move(outerProjects),
@@ -49,7 +50,8 @@ LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
value::SlotVector{},
std::move(predicate),
JoinType::Inner,
- nodeId) {}
+ nodeId,
+ participateInTrialRunTracking) {}
LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
std::unique_ptr<PlanStage> inner,
@@ -58,8 +60,9 @@ LoopJoinStage::LoopJoinStage(std::unique_ptr<PlanStage> outer,
value::SlotVector innerProjects,
std::unique_ptr<EExpression> predicate,
JoinType joinType,
- PlanNodeId nodeId)
- : PlanStage("nlj"_sd, nodeId),
+ PlanNodeId nodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("nlj"_sd, nodeId, participateInTrialRunTracking),
_outerProjects(std::move(outerProjects)),
_outerCorrelated(std::move(outerCorrelated)),
_innerProjects(std::move(innerProjects)),
@@ -80,7 +83,8 @@ std::unique_ptr<PlanStage> LoopJoinStage::clone() const {
_innerProjects,
_predicate ? _predicate->clone() : nullptr,
_joinType,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void LoopJoinStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/loop_join.h b/src/mongo/db/exec/sbe/stages/loop_join.h
index 076655bca4c..c69010071fd 100644
--- a/src/mongo/db/exec/sbe/stages/loop_join.h
+++ b/src/mongo/db/exec/sbe/stages/loop_join.h
@@ -63,7 +63,8 @@ public:
value::SlotVector outerProjects,
value::SlotVector outerCorrelated,
std::unique_ptr<EExpression> predicate,
- PlanNodeId nodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
LoopJoinStage(std::unique_ptr<PlanStage> outer,
std::unique_ptr<PlanStage> inner,
@@ -72,7 +73,8 @@ public:
value::SlotVector innerProjects,
std::unique_ptr<EExpression> predicate,
JoinType joinType,
- PlanNodeId nodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/makeobj.cpp b/src/mongo/db/exec/sbe/stages/makeobj.cpp
index e0402934517..0c84fde3083 100644
--- a/src/mongo/db/exec/sbe/stages/makeobj.cpp
+++ b/src/mongo/db/exec/sbe/stages/makeobj.cpp
@@ -46,8 +46,11 @@ MakeObjStageBase<O>::MakeObjStageBase(std::unique_ptr<PlanStage> input,
value::SlotVector projectVars,
bool forceNewObject,
bool returnOldObject,
- PlanNodeId planNodeId)
- : PlanStage(O == MakeObjOutputType::object ? "mkobj"_sd : "mkbson"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage(O == MakeObjOutputType::object ? "mkobj"_sd : "mkbson"_sd,
+ planNodeId,
+ participateInTrialRunTracking),
_objSlot(objSlot),
_rootSlot(rootSlot),
_fieldBehavior(fieldBehavior),
@@ -62,6 +65,29 @@ MakeObjStageBase<O>::MakeObjStageBase(std::unique_ptr<PlanStage> input,
}
template <MakeObjOutputType O>
+MakeObjStageBase<O>::MakeObjStageBase(std::unique_ptr<PlanStage> input,
+ value::SlotId objSlot,
+ boost::optional<value::SlotId> rootSlot,
+ boost::optional<FieldBehavior> fieldBehavior,
+ std::set<std::string> fields,
+ std::set<std::string> projectFields,
+ value::SlotVector projectVars,
+ bool forceNewObject,
+ bool returnOldObject,
+ PlanNodeId planNodeId)
+ : MakeObjStageBase<O>::MakeObjStageBase(
+ std::move(input),
+ objSlot,
+ rootSlot,
+ fieldBehavior,
+ std::vector<std::string>(fields.begin(), fields.end()),
+ std::vector<std::string>(projectFields.begin(), projectFields.end()),
+ std::move(projectVars),
+ forceNewObject,
+ returnOldObject,
+ planNodeId) {}
+
+template <MakeObjOutputType O>
std::unique_ptr<PlanStage> MakeObjStageBase<O>::clone() const {
return std::make_unique<MakeObjStageBase<O>>(_children[0]->clone(),
_objSlot,
@@ -72,7 +98,8 @@ std::unique_ptr<PlanStage> MakeObjStageBase<O>::clone() const {
_projectVars,
_forceNewObject,
_returnOldObject,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
template <MakeObjOutputType O>
diff --git a/src/mongo/db/exec/sbe/stages/makeobj.h b/src/mongo/db/exec/sbe/stages/makeobj.h
index 1cf0755f1c5..3034470b95a 100644
--- a/src/mongo/db/exec/sbe/stages/makeobj.h
+++ b/src/mongo/db/exec/sbe/stages/makeobj.h
@@ -87,6 +87,22 @@ public:
value::SlotVector projectVars,
bool forceNewObject,
bool returnOldObject,
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
+
+ /**
+ * A convenience constructor that takes a set instead of a vector for 'fields' and
+ * 'projectedFields'.
+ */
+ MakeObjStageBase(std::unique_ptr<PlanStage> input,
+ value::SlotId objSlot,
+ boost::optional<value::SlotId> rootSlot,
+ boost::optional<FieldBehavior> fieldBehavior,
+ std::set<std::string> fields,
+ std::set<std::string> projectFields,
+ value::SlotVector projectVars,
+ bool forceNewObject,
+ bool returnOldObject,
PlanNodeId planNodeId);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/merge_join.cpp b/src/mongo/db/exec/sbe/stages/merge_join.cpp
index 170227e0575..d6f03af7502 100644
--- a/src/mongo/db/exec/sbe/stages/merge_join.cpp
+++ b/src/mongo/db/exec/sbe/stages/merge_join.cpp
@@ -76,8 +76,9 @@ MergeJoinStage::MergeJoinStage(std::unique_ptr<PlanStage> outer,
value::SlotVector innerKeys,
value::SlotVector innerProjects,
std::vector<value::SortDirection> sortDirs,
- PlanNodeId planNodeId)
- : PlanStage("mj"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("mj"_sd, planNodeId, participateInTrialRunTracking),
_outerKeys(std::move(outerKeys)),
_outerProjects(std::move(outerProjects)),
_innerKeys(std::move(innerKeys)),
@@ -104,7 +105,8 @@ std::unique_ptr<PlanStage> MergeJoinStage::clone() const {
_innerKeys,
_innerProjects,
_dirs,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void MergeJoinStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/merge_join.h b/src/mongo/db/exec/sbe/stages/merge_join.h
index b0f61cd677c..ff94784ac0d 100644
--- a/src/mongo/db/exec/sbe/stages/merge_join.h
+++ b/src/mongo/db/exec/sbe/stages/merge_join.h
@@ -62,7 +62,8 @@ public:
value::SlotVector innerKeys,
value::SlotVector innerProjects,
std::vector<value::SortDirection> sortDirs,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/project.cpp b/src/mongo/db/exec/sbe/stages/project.cpp
index 736110bc83a..c534c5c8cdc 100644
--- a/src/mongo/db/exec/sbe/stages/project.cpp
+++ b/src/mongo/db/exec/sbe/stages/project.cpp
@@ -37,8 +37,10 @@ namespace mongo {
namespace sbe {
ProjectStage::ProjectStage(std::unique_ptr<PlanStage> input,
value::SlotMap<std::unique_ptr<EExpression>> projects,
- PlanNodeId nodeId)
- : PlanStage("project"_sd, nodeId), _projects(std::move(projects)) {
+ PlanNodeId nodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("project"_sd, nodeId, participateInTrialRunTracking),
+ _projects(std::move(projects)) {
_children.emplace_back(std::move(input));
}
@@ -47,8 +49,10 @@ std::unique_ptr<PlanStage> ProjectStage::clone() const {
for (auto& [k, v] : _projects) {
projects.emplace(k, v->clone());
}
- return std::make_unique<ProjectStage>(
- _children[0]->clone(), std::move(projects), _commonStats.nodeId);
+ return std::make_unique<ProjectStage>(_children[0]->clone(),
+ std::move(projects),
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void ProjectStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/project.h b/src/mongo/db/exec/sbe/stages/project.h
index 1754dd7d2a9..bf4e169c8c9 100644
--- a/src/mongo/db/exec/sbe/stages/project.h
+++ b/src/mongo/db/exec/sbe/stages/project.h
@@ -47,7 +47,8 @@ class ProjectStage final : public PlanStage {
public:
ProjectStage(std::unique_ptr<PlanStage> input,
value::SlotMap<std::unique_ptr<EExpression>> projects,
- PlanNodeId nodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/scan.cpp b/src/mongo/db/exec/sbe/stages/scan.cpp
index 678d3f84ef9..fbbc3a9ae0d 100644
--- a/src/mongo/db/exec/sbe/stages/scan.cpp
+++ b/src/mongo/db/exec/sbe/stages/scan.cpp
@@ -56,8 +56,10 @@ ScanStage::ScanStage(UUID collectionUuid,
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
ScanCallbacks scanCallbacks,
- bool useRandomCursor)
- : PlanStage(seekKeySlot ? "seek"_sd : "scan"_sd, yieldPolicy, nodeId),
+ bool useRandomCursor,
+ bool participateInTrialRunTracking)
+ : PlanStage(
+ seekKeySlot ? "seek"_sd : "scan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
_collUuid(collectionUuid),
_recordSlot(recordSlot),
_recordIdSlot(recordIdSlot),
@@ -98,7 +100,9 @@ std::unique_ptr<PlanStage> ScanStage::clone() const {
_forward,
_yieldPolicy,
_commonStats.nodeId,
- _scanCallbacks);
+ _scanCallbacks,
+ _useRandomCursor,
+ _participateInTrialRunTracking);
}
void ScanStage::prepare(CompileCtx& ctx) {
@@ -592,8 +596,9 @@ ParallelScanStage::ParallelScanStage(UUID collectionUuid,
value::SlotVector vars,
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
- ScanCallbacks callbacks)
- : PlanStage("pscan"_sd, yieldPolicy, nodeId),
+ ScanCallbacks callbacks,
+ bool participateInTrialRunTracking)
+ : PlanStage("pscan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
_collUuid(collectionUuid),
_recordSlot(recordSlot),
_recordIdSlot(recordIdSlot),
@@ -621,8 +626,9 @@ ParallelScanStage::ParallelScanStage(const std::shared_ptr<ParallelState>& state
value::SlotVector vars,
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
- ScanCallbacks callbacks)
- : PlanStage("pscan"_sd, yieldPolicy, nodeId),
+ ScanCallbacks callbacks,
+ bool participateInTrialRunTracking)
+ : PlanStage("pscan"_sd, yieldPolicy, nodeId, participateInTrialRunTracking),
_collUuid(collectionUuid),
_recordSlot(recordSlot),
_recordIdSlot(recordIdSlot),
@@ -650,7 +656,8 @@ std::unique_ptr<PlanStage> ParallelScanStage::clone() const {
_vars,
_yieldPolicy,
_commonStats.nodeId,
- _scanCallbacks);
+ _scanCallbacks,
+ _participateInTrialRunTracking);
}
void ParallelScanStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/scan.h b/src/mongo/db/exec/sbe/stages/scan.h
index 37462ac5e14..ed138f6302e 100644
--- a/src/mongo/db/exec/sbe/stages/scan.h
+++ b/src/mongo/db/exec/sbe/stages/scan.h
@@ -108,7 +108,8 @@ public:
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
ScanCallbacks scanCallbacks,
- bool useRandomCursor = false);
+ bool useRandomCursor = false,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
@@ -227,7 +228,8 @@ public:
value::SlotVector vars,
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
- ScanCallbacks callbacks);
+ ScanCallbacks callbacks,
+ bool participateInTrialRunTracking = true);
ParallelScanStage(const std::shared_ptr<ParallelState>& state,
const UUID& collectionUuid,
@@ -241,7 +243,8 @@ public:
value::SlotVector vars,
PlanYieldPolicy* yieldPolicy,
PlanNodeId nodeId,
- ScanCallbacks callbacks);
+ ScanCallbacks callbacks,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/sort.cpp b/src/mongo/db/exec/sbe/stages/sort.cpp
index 5acf73afe8d..0968b0bea68 100644
--- a/src/mongo/db/exec/sbe/stages/sort.cpp
+++ b/src/mongo/db/exec/sbe/stages/sort.cpp
@@ -55,8 +55,9 @@ SortStage::SortStage(std::unique_ptr<PlanStage> input,
size_t limit,
size_t memoryLimit,
bool allowDiskUse,
- PlanNodeId planNodeId)
- : PlanStage("sort"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("sort"_sd, planNodeId, participateInTrialRunTracking),
_obs(std::move(obs)),
_dirs(std::move(dirs)),
_vals(std::move(vals)),
@@ -80,7 +81,8 @@ std::unique_ptr<PlanStage> SortStage::clone() const {
_specificStats.limit,
_specificStats.maxMemoryUsageBytes,
_allowDiskUse,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void SortStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/sort.h b/src/mongo/db/exec/sbe/stages/sort.h
index 2bfc9e1d9fb..dda9716b75b 100644
--- a/src/mongo/db/exec/sbe/stages/sort.h
+++ b/src/mongo/db/exec/sbe/stages/sort.h
@@ -70,7 +70,8 @@ public:
size_t limit,
size_t memoryLimit,
bool allowDiskUse,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
~SortStage();
diff --git a/src/mongo/db/exec/sbe/stages/sorted_merge.cpp b/src/mongo/db/exec/sbe/stages/sorted_merge.cpp
index f0a648f38ad..39cee407a00 100644
--- a/src/mongo/db/exec/sbe/stages/sorted_merge.cpp
+++ b/src/mongo/db/exec/sbe/stages/sorted_merge.cpp
@@ -41,8 +41,9 @@ SortedMergeStage::SortedMergeStage(PlanStage::Vector inputStages,
std::vector<value::SortDirection> dirs,
std::vector<value::SlotVector> inputVals,
value::SlotVector outputVals,
- PlanNodeId planNodeId)
- : PlanStage("smerge"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("smerge"_sd, planNodeId, participateInTrialRunTracking),
_inputKeys(std::move(inputKeys)),
_dirs(std::move(dirs)),
_inputVals(std::move(inputVals)),
@@ -69,8 +70,13 @@ std::unique_ptr<PlanStage> SortedMergeStage::clone() const {
for (auto& child : _children) {
inputStages.emplace_back(child->clone());
}
- return std::make_unique<SortedMergeStage>(
- std::move(inputStages), _inputKeys, _dirs, _inputVals, _outputVals, _commonStats.nodeId);
+ return std::make_unique<SortedMergeStage>(std::move(inputStages),
+ _inputKeys,
+ _dirs,
+ _inputVals,
+ _outputVals,
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void SortedMergeStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/sorted_merge.h b/src/mongo/db/exec/sbe/stages/sorted_merge.h
index 3b87e4c8849..436ddfce080 100644
--- a/src/mongo/db/exec/sbe/stages/sorted_merge.h
+++ b/src/mongo/db/exec/sbe/stages/sorted_merge.h
@@ -61,7 +61,8 @@ public:
// Each element of 'inputVals' must be the same size as 'outputVals'.
std::vector<value::SlotVector> inputVals,
value::SlotVector outputVals,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/spool.cpp b/src/mongo/db/exec/sbe/stages/spool.cpp
index 4550f569b09..47ca744962c 100644
--- a/src/mongo/db/exec/sbe/stages/spool.cpp
+++ b/src/mongo/db/exec/sbe/stages/spool.cpp
@@ -35,14 +35,20 @@ namespace mongo::sbe {
SpoolEagerProducerStage::SpoolEagerProducerStage(std::unique_ptr<PlanStage> input,
SpoolId spoolId,
value::SlotVector vals,
- PlanNodeId planNodeId)
- : PlanStage{"espool"_sd, planNodeId}, _spoolId{spoolId}, _vals{std::move(vals)} {
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage{"espool"_sd, planNodeId, participateInTrialRunTracking},
+ _spoolId{spoolId},
+ _vals{std::move(vals)} {
_children.emplace_back(std::move(input));
}
std::unique_ptr<PlanStage> SpoolEagerProducerStage::clone() const {
- return std::make_unique<SpoolEagerProducerStage>(
- _children[0]->clone(), _spoolId, _vals, _commonStats.nodeId);
+ return std::make_unique<SpoolEagerProducerStage>(_children[0]->clone(),
+ _spoolId,
+ _vals,
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void SpoolEagerProducerStage::prepare(CompileCtx& ctx) {
@@ -171,8 +177,9 @@ SpoolLazyProducerStage::SpoolLazyProducerStage(std::unique_ptr<PlanStage> input,
SpoolId spoolId,
value::SlotVector vals,
std::unique_ptr<EExpression> predicate,
- PlanNodeId planNodeId)
- : PlanStage{"lspool"_sd, planNodeId},
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage{"lspool"_sd, planNodeId, participateInTrialRunTracking},
_spoolId{spoolId},
_vals{std::move(vals)},
_predicate{std::move(predicate)} {
@@ -180,8 +187,12 @@ SpoolLazyProducerStage::SpoolLazyProducerStage(std::unique_ptr<PlanStage> input,
}
std::unique_ptr<PlanStage> SpoolLazyProducerStage::clone() const {
- return std::make_unique<SpoolLazyProducerStage>(
- _children[0]->clone(), _spoolId, _vals, _predicate->clone(), _commonStats.nodeId);
+ return std::make_unique<SpoolLazyProducerStage>(_children[0]->clone(),
+ _spoolId,
+ _vals,
+ _predicate->clone(),
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void SpoolLazyProducerStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/spool.h b/src/mongo/db/exec/sbe/stages/spool.h
index a2dd6f81657..09a453e0e0e 100644
--- a/src/mongo/db/exec/sbe/stages/spool.h
+++ b/src/mongo/db/exec/sbe/stages/spool.h
@@ -56,7 +56,8 @@ public:
SpoolEagerProducerStage(std::unique_ptr<PlanStage> input,
SpoolId spoolId,
value::SlotVector vals,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
@@ -109,7 +110,8 @@ public:
SpoolId spoolId,
value::SlotVector vals,
std::unique_ptr<EExpression> predicate,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
@@ -165,13 +167,17 @@ private:
template <bool IsStack>
class SpoolConsumerStage final : public PlanStage {
public:
- SpoolConsumerStage(SpoolId spoolId, value::SlotVector vals, PlanNodeId planNodeId)
- : PlanStage{IsStack ? "sspool"_sd : "cspool"_sd, planNodeId},
+ SpoolConsumerStage(SpoolId spoolId,
+ value::SlotVector vals,
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true)
+ : PlanStage{IsStack ? "sspool"_sd : "cspool"_sd, planNodeId, participateInTrialRunTracking},
_spoolId{spoolId},
_vals{std::move(vals)} {}
std::unique_ptr<PlanStage> clone() const {
- return std::make_unique<SpoolConsumerStage<IsStack>>(_spoolId, _vals, _commonStats.nodeId);
+ return std::make_unique<SpoolConsumerStage<IsStack>>(
+ _spoolId, _vals, _commonStats.nodeId, _participateInTrialRunTracking);
}
void prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/stages.h b/src/mongo/db/exec/sbe/stages/stages.h
index 59f6746a005..02dd6ae62fb 100644
--- a/src/mongo/db/exec/sbe/stages/stages.h
+++ b/src/mongo/db/exec/sbe/stages/stages.h
@@ -254,7 +254,9 @@ protected:
template <typename T>
class CanTrackStats {
public:
- CanTrackStats(StringData stageType, PlanNodeId nodeId) : _commonStats(stageType, nodeId) {}
+ CanTrackStats(StringData stageType, PlanNodeId nodeId, bool participateInTrialRunTracking)
+ : _commonStats(stageType, nodeId),
+ _participateInTrialRunTracking(participateInTrialRunTracking) {}
/**
* Returns a tree of stats. If the stage has any children it must propagate the request for
@@ -414,6 +416,12 @@ protected:
CommonStats _commonStats;
+ // Flag which determines whether this node and its children can participate in trial run
+ // tracking. A stage and its children are not eligible for trial run tracking when they are
+ // planned deterministically (that is, the amount of work they perform is independent of
+ // other parts of the tree which are multiplanned).
+ bool _participateInTrialRunTracking{true};
+
private:
/**
* In general, accessors can be accessed only after getNext returns a row. It is most definitely
@@ -422,14 +430,6 @@ private:
* that feature is retired we can then simply revisit all stages and simplify them.
*/
bool _slotsAccessible{false};
-
- /**
- * Flag which determines whether this node and its children can participate in trial run
- * tracking. A stage and its children are not eligible for trial run tracking when they are
- * planned deterministically (that is, the amount of work they perform is independent of
- * other parts of the tree which are multiplanned).
- */
- bool _participateInTrialRunTracking{true};
};
/**
@@ -496,10 +496,15 @@ class PlanStage : public CanSwitchOperationContext<PlanStage>,
public:
using Vector = absl::InlinedVector<std::unique_ptr<PlanStage>, 2>;
- PlanStage(StringData stageType, PlanYieldPolicy* yieldPolicy, PlanNodeId nodeId)
- : CanTrackStats{stageType, nodeId}, CanInterrupt{yieldPolicy} {}
+ PlanStage(StringData stageType,
+ PlanYieldPolicy* yieldPolicy,
+ PlanNodeId nodeId,
+ bool participateInTrialRunTracking)
+ : CanTrackStats{stageType, nodeId, participateInTrialRunTracking},
+ CanInterrupt{yieldPolicy} {}
- PlanStage(StringData stageType, PlanNodeId nodeId) : PlanStage(stageType, nullptr, nodeId) {}
+ PlanStage(StringData stageType, PlanNodeId nodeId, bool participateInTrialRunTracking)
+ : PlanStage(stageType, nullptr, nodeId, participateInTrialRunTracking) {}
virtual ~PlanStage() = default;
diff --git a/src/mongo/db/exec/sbe/stages/traverse.cpp b/src/mongo/db/exec/sbe/stages/traverse.cpp
index d1e0a040b3e..654a1a160fa 100644
--- a/src/mongo/db/exec/sbe/stages/traverse.cpp
+++ b/src/mongo/db/exec/sbe/stages/traverse.cpp
@@ -42,8 +42,9 @@ TraverseStage::TraverseStage(std::unique_ptr<PlanStage> outer,
std::unique_ptr<EExpression> foldExpr,
std::unique_ptr<EExpression> finalExpr,
PlanNodeId planNodeId,
- boost::optional<size_t> nestedArraysDepth)
- : PlanStage("traverse"_sd, planNodeId),
+ boost::optional<size_t> nestedArraysDepth,
+ bool participateInTrialRunTracking)
+ : PlanStage("traverse"_sd, planNodeId, participateInTrialRunTracking),
_inField(inField),
_outField(outField),
_outFieldInner(outFieldInner),
@@ -69,7 +70,8 @@ std::unique_ptr<PlanStage> TraverseStage::clone() const {
_fold ? _fold->clone() : nullptr,
_final ? _final->clone() : nullptr,
_commonStats.nodeId,
- _nestedArraysDepth);
+ _nestedArraysDepth,
+ _participateInTrialRunTracking);
}
void TraverseStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/traverse.h b/src/mongo/db/exec/sbe/stages/traverse.h
index 2b3fee33a47..09e5dc3dfcf 100644
--- a/src/mongo/db/exec/sbe/stages/traverse.h
+++ b/src/mongo/db/exec/sbe/stages/traverse.h
@@ -74,7 +74,8 @@ public:
std::unique_ptr<EExpression> foldExpr,
std::unique_ptr<EExpression> finalExpr,
PlanNodeId planNodeId,
- boost::optional<size_t> nestedArraysDepth);
+ boost::optional<size_t> nestedArraysDepth,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/union.cpp b/src/mongo/db/exec/sbe/stages/union.cpp
index a661e6c579f..2fd6d0b4fc5 100644
--- a/src/mongo/db/exec/sbe/stages/union.cpp
+++ b/src/mongo/db/exec/sbe/stages/union.cpp
@@ -38,8 +38,9 @@ namespace mongo::sbe {
UnionStage::UnionStage(PlanStage::Vector inputStages,
std::vector<value::SlotVector> inputVals,
value::SlotVector outputVals,
- PlanNodeId planNodeId)
- : PlanStage("union"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("union"_sd, planNodeId, participateInTrialRunTracking),
_inputVals{std::move(inputVals)},
_outputVals{std::move(outputVals)} {
_children = std::move(inputStages);
@@ -57,8 +58,11 @@ std::unique_ptr<PlanStage> UnionStage::clone() const {
for (auto& child : _children) {
inputStages.emplace_back(child->clone());
}
- return std::make_unique<UnionStage>(
- std::move(inputStages), _inputVals, _outputVals, _commonStats.nodeId);
+ return std::make_unique<UnionStage>(std::move(inputStages),
+ _inputVals,
+ _outputVals,
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void UnionStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/union.h b/src/mongo/db/exec/sbe/stages/union.h
index 2ec0ec73df9..b21d5e6caf5 100644
--- a/src/mongo/db/exec/sbe/stages/union.h
+++ b/src/mongo/db/exec/sbe/stages/union.h
@@ -53,7 +53,8 @@ public:
UnionStage(PlanStage::Vector inputStages,
std::vector<value::SlotVector> inputVals,
value::SlotVector outputVals,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/unique.cpp b/src/mongo/db/exec/sbe/stages/unique.cpp
index 355927ff912..c88fa9ab43e 100644
--- a/src/mongo/db/exec/sbe/stages/unique.cpp
+++ b/src/mongo/db/exec/sbe/stages/unique.cpp
@@ -37,13 +37,15 @@ namespace mongo {
namespace sbe {
UniqueStage::UniqueStage(std::unique_ptr<PlanStage> input,
value::SlotVector keys,
- PlanNodeId planNodeId)
- : PlanStage("unique"_sd, planNodeId), _keySlots(keys) {
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("unique"_sd, planNodeId, participateInTrialRunTracking), _keySlots(keys) {
_children.emplace_back(std::move(input));
}
std::unique_ptr<PlanStage> UniqueStage::clone() const {
- return std::make_unique<UniqueStage>(_children[0]->clone(), _keySlots, _commonStats.nodeId);
+ return std::make_unique<UniqueStage>(
+ _children[0]->clone(), _keySlots, _commonStats.nodeId, _participateInTrialRunTracking);
}
void UniqueStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/unique.h b/src/mongo/db/exec/sbe/stages/unique.h
index 1165743a0cc..c344cd09d24 100644
--- a/src/mongo/db/exec/sbe/stages/unique.h
+++ b/src/mongo/db/exec/sbe/stages/unique.h
@@ -53,7 +53,10 @@ namespace mongo::sbe {
*/
class UniqueStage final : public PlanStage {
public:
- UniqueStage(std::unique_ptr<PlanStage> input, value::SlotVector keys, PlanNodeId planNodeId);
+ UniqueStage(std::unique_ptr<PlanStage> input,
+ value::SlotVector keys,
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/stages/unwind.cpp b/src/mongo/db/exec/sbe/stages/unwind.cpp
index b4c5e225adc..7ad10eecb23 100644
--- a/src/mongo/db/exec/sbe/stages/unwind.cpp
+++ b/src/mongo/db/exec/sbe/stages/unwind.cpp
@@ -40,8 +40,9 @@ UnwindStage::UnwindStage(std::unique_ptr<PlanStage> input,
value::SlotId outField,
value::SlotId outIndex,
bool preserveNullAndEmptyArrays,
- PlanNodeId planNodeId)
- : PlanStage("unwind"_sd, planNodeId),
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking)
+ : PlanStage("unwind"_sd, planNodeId, participateInTrialRunTracking),
_inField(inField),
_outField(outField),
_outIndex(outIndex),
@@ -59,7 +60,8 @@ std::unique_ptr<PlanStage> UnwindStage::clone() const {
_outField,
_outIndex,
_preserveNullAndEmptyArrays,
- _commonStats.nodeId);
+ _commonStats.nodeId,
+ _participateInTrialRunTracking);
}
void UnwindStage::prepare(CompileCtx& ctx) {
diff --git a/src/mongo/db/exec/sbe/stages/unwind.h b/src/mongo/db/exec/sbe/stages/unwind.h
index 049fee4a069..57b28d9c1cf 100644
--- a/src/mongo/db/exec/sbe/stages/unwind.h
+++ b/src/mongo/db/exec/sbe/stages/unwind.h
@@ -52,7 +52,8 @@ public:
value::SlotId outField,
value::SlotId outIndex,
bool preserveNullAndEmptyArrays,
- PlanNodeId planNodeId);
+ PlanNodeId planNodeId,
+ bool participateInTrialRunTracking = true);
std::unique_ptr<PlanStage> clone() const final;
diff --git a/src/mongo/db/exec/sbe/util/spilling.cpp b/src/mongo/db/exec/sbe/util/spilling.cpp
index 45931efec8b..c54f3bfe956 100644
--- a/src/mongo/db/exec/sbe/util/spilling.cpp
+++ b/src/mongo/db/exec/sbe/util/spilling.cpp
@@ -63,8 +63,7 @@ boost::optional<value::MaterializedRow> readFromRecordStore(OperationContext* op
RecordData record;
if (rs->findRecord(opCtx, rid, &record)) {
auto valueReader = BufReader(record.data(), record.size());
- auto val = value::MaterializedRow::deserializeForSorter(valueReader, {});
- return val;
+ return value::MaterializedRow::deserializeForSorter(valueReader, {});
}
return boost::none;
}
diff --git a/src/mongo/db/exec/sbe/values/columnar.cpp b/src/mongo/db/exec/sbe/values/columnar.cpp
index 7490d549803..c1bd51f6b69 100644
--- a/src/mongo/db/exec/sbe/values/columnar.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar.cpp
@@ -237,6 +237,24 @@ void addToObjectNoArrays(value::TypeTags tag,
});
}
+/*
+ * Ensures that the path (stored in 'state') leads to an object and materializes an empty object if
+ * it does not. Assumes that there are no arrays along remaining path (i.e., the components that are
+ * not yet traversed via withNextPathComponent()).
+ *
+ * This function is a no-op when there are no remaining path components.
+ */
+template <class C>
+void materializeObjectNoArrays(AddToDocumentState<C>& state, value::Object& out) {
+ if (state.atLastPathComponent()) {
+ return;
+ }
+
+ state.withNextPathComponent([&](StringData nextPathComponent) {
+ materializeObjectNoArrays(state, *findOrAddObjInObj(nextPathComponent, &out));
+ });
+}
+
template <class C>
void addToObject(value::Object& obj, AddToDocumentState<C>& state);
@@ -268,23 +286,19 @@ void addToArray(value::Array& arr, AddToDocumentState<C>& state) {
for (; insertAt < index; insertAt++) {
invariant(insertAt < arr.size());
- auto [tag, val] = [nextChar, &state]() {
- if (nextChar == '|') {
- return state.extractAndCopyValue();
+ if (nextChar == 'o') {
+ materializeObjectNoArrays(state, *findOrAddObjInArr(insertAt, &arr));
+ } else if (nextChar == '|') {
+ auto [tag, val] = state.extractAndCopyValue();
+ if (state.atLastPathComponent()) {
+ invariant(arr.getAt(insertAt).first == kPlaceHolderType);
+ arr.setAt(insertAt, tag, val);
} else {
- invariant(nextChar == 'o');
- return value::makeNewObject();
+ addToObjectNoArrays(
+ tag, val, state, *findOrAddObjInArr(insertAt, &arr), 0);
}
- }();
- if (state.atLastPathComponent()) {
- // At this point we are inserting a leaf value.
- dassert(arr.getAt(insertAt).first == kPlaceHolderType);
- arr.setAt(insertAt, tag, val);
} else {
- // This is valid on initialized elements when the subobject contains more
- // than one member.
- auto* subObj = findOrAddObjInArr(insertAt, &arr);
- addToObjectNoArrays(tag, val, state, *subObj, 0);
+ MONGO_UNREACHABLE;
}
}
break;
diff --git a/src/mongo/db/exec/sbe/values/columnar_test.cpp b/src/mongo/db/exec/sbe/values/columnar_test.cpp
index 9dc9e7717d0..ebbed88848a 100644
--- a/src/mongo/db/exec/sbe/values/columnar_test.cpp
+++ b/src/mongo/db/exec/sbe/values/columnar_test.cpp
@@ -201,4 +201,11 @@ TEST(ColumnarObjTest, AddNonLeafCellWithArrayInfoToObject) {
std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "{[o1", {})};
compareMakeObjWithExpected(cells, fromjson("{a: {b: [{}, {}]}}"));
}
+
+TEST(ColumnarObjTest, AddLeafCellThenAddSparseSibling) {
+ std::vector<MockTranslatedCell> cells{makeCellOfIntegers("a.b", "[", {1, 2}),
+ makeCellOfIntegers("a", "[o1", {}),
+ makeCellOfIntegers("a.c", "[1", {3})};
+ compareMakeObjWithExpected(cells, fromjson("{a: [{b: 1}, {b: 2, c: 3}]}"));
+}
} // namespace mongo::sbe
diff --git a/src/mongo/db/exec/sbe/values/value.cpp b/src/mongo/db/exec/sbe/values/value.cpp
index cde56e31ba2..5bbdc40170e 100644
--- a/src/mongo/db/exec/sbe/values/value.cpp
+++ b/src/mongo/db/exec/sbe/values/value.cpp
@@ -326,6 +326,9 @@ void releaseValue(TypeTags tag, Value val) noexcept {
case TypeTags::indexBounds:
delete getIndexBoundsView(val);
break;
+ case TypeTags::classicMatchExpresion:
+ delete getClassicMatchExpressionView(val);
+ break;
default:
break;
}
diff --git a/src/mongo/db/exec/sbe/values/value.h b/src/mongo/db/exec/sbe/values/value.h
index 728a0c16634..d0202b0f1c0 100644
--- a/src/mongo/db/exec/sbe/values/value.h
+++ b/src/mongo/db/exec/sbe/values/value.h
@@ -154,6 +154,9 @@ enum class TypeTags : uint8_t {
// Pointer to a IndexBounds object.
indexBounds,
+
+ // Pointer to a classic engine match expression.
+ classicMatchExpresion,
};
inline constexpr bool isNumber(TypeTags tag) noexcept {
@@ -1249,6 +1252,10 @@ inline IndexBounds* getIndexBoundsView(Value val) noexcept {
return reinterpret_cast<IndexBounds*>(val);
}
+inline MatchExpression* getClassicMatchExpressionView(Value val) noexcept {
+ return reinterpret_cast<MatchExpression*>(val);
+}
+
/**
* Pattern and flags of Regex are stored in BSON as two C strings written one after another.
*
@@ -1450,6 +1457,12 @@ inline std::pair<TypeTags, Value> copyValue(TypeTags tag, Value val) {
return makeCopyCollator(*getCollatorView(val));
case TypeTags::indexBounds:
return makeCopyIndexBounds(*getIndexBoundsView(val));
+ case TypeTags::classicMatchExpresion:
+ // Beware: "shallow cloning" a match expression does not copy the underlying BSON. The
+ // original BSON must remain alive for both the original MatchExpression and the clone.
+ return {TypeTags::classicMatchExpresion,
+ bitcastFrom<const MatchExpression*>(
+ getClassicMatchExpressionView(val)->shallowClone().release())};
default:
break;
}
diff --git a/src/mongo/db/exec/sbe/values/value_printer.cpp b/src/mongo/db/exec/sbe/values/value_printer.cpp
index 78e655114a3..90a43442329 100644
--- a/src/mongo/db/exec/sbe/values/value_printer.cpp
+++ b/src/mongo/db/exec/sbe/values/value_printer.cpp
@@ -156,6 +156,9 @@ void ValuePrinter<T>::writeTagToStream(TypeTags tag) {
case TypeTags::indexBounds:
stream << "indexBounds";
break;
+ case TypeTags::classicMatchExpresion:
+ stream << "classicMatchExpression";
+ break;
default:
stream << "unknown tag";
break;
@@ -472,6 +475,9 @@ void ValuePrinter<T>::writeValueToStream(TypeTags tag, Value val, size_t depth)
getIndexBoundsView(val)->toString(true /* hasNonSimpleCollation */));
stream << ")";
break;
+ case TypeTags::classicMatchExpresion:
+ stream << "ClassicMatcher(" << getClassicMatchExpressionView(val)->toString() << ")";
+ break;
default:
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/db/exec/sbe/vm/vm.cpp b/src/mongo/db/exec/sbe/vm/vm.cpp
index f6a6b35970e..4f9329e7ed6 100644
--- a/src/mongo/db/exec/sbe/vm/vm.cpp
+++ b/src/mongo/db/exec/sbe/vm/vm.cpp
@@ -27,7 +27,6 @@
* it in the license file.
*/
-
#include "mongo/platform/basic.h"
#include "mongo/db/exec/sbe/expressions/expression.h"
@@ -153,6 +152,8 @@ int Instruction::stackOffset[Instruction::Tags::lastInstruction] = {
0, // ret
-1, // fail
+
+ 0, // applyClassicMatcher
};
namespace {
@@ -211,17 +212,13 @@ std::string CodeFragment::toString() const {
case Instruction::cmp3w:
case Instruction::collCmp3w:
case Instruction::fillEmpty:
- case Instruction::fillEmptyConst:
case Instruction::getField:
- case Instruction::getFieldConst:
case Instruction::getElement:
case Instruction::getArraySize:
case Instruction::collComparisonKey:
case Instruction::getFieldOrElement:
case Instruction::traverseP:
- case Instruction::traversePConst:
case Instruction::traverseF:
- case Instruction::traverseFConst:
case Instruction::setField:
case Instruction::aggSum:
case Instruction::aggMin:
@@ -249,9 +246,15 @@ std::string CodeFragment::toString() const {
break;
}
// Instructions with a single integer argument.
+ case Instruction::pushLocalLambda:
+ case Instruction::traversePConst: {
+ auto offset = readFromMemory<int>(pcPointer);
+ pcPointer += sizeof(offset);
+ ss << "offset: " << offset;
+ break;
+ }
case Instruction::pushLocalVal:
- case Instruction::pushMoveLocalVal:
- case Instruction::pushLocalLambda: {
+ case Instruction::pushMoveLocalVal: {
auto arg = readFromMemory<int>(pcPointer);
pcPointer += sizeof(arg);
ss << "arg: " << arg;
@@ -266,6 +269,21 @@ std::string CodeFragment::toString() const {
break;
}
// Instructions with other kinds of arguments.
+ case Instruction::traverseFConst: {
+ auto k = readFromMemory<Instruction::Constants>(pcPointer);
+ pcPointer += sizeof(k);
+ auto offset = readFromMemory<int>(pcPointer);
+ pcPointer += sizeof(offset);
+ ss << "k: " << Instruction::toStringConstants(k) << ", offset: " << offset;
+ break;
+ }
+ case Instruction::fillEmptyConst: {
+ auto k = readFromMemory<Instruction::Constants>(pcPointer);
+ pcPointer += sizeof(k);
+ ss << "k: " << Instruction::toStringConstants(k);
+ break;
+ }
+ case Instruction::getFieldConst:
case Instruction::pushConstVal: {
auto tag = readFromMemory<value::TypeTags>(pcPointer);
pcPointer += sizeof(tag);
@@ -281,6 +299,12 @@ std::string CodeFragment::toString() const {
ss << "accessor: " << static_cast<void*>(accessor);
break;
}
+ case Instruction::applyClassicMatcher: {
+ const auto* matcher = readFromMemory<const MatchExpression*>(pcPointer);
+ pcPointer += sizeof(matcher);
+ ss << "matcher: " << static_cast<const void*>(matcher);
+ break;
+ }
case Instruction::numConvert: {
auto tag = readFromMemory<value::TypeTags>(pcPointer);
pcPointer += sizeof(tag);
@@ -446,6 +470,17 @@ void CodeFragment::appendNumericConvert(value::TypeTags targetTag) {
offset += writeToMemory(offset, targetTag);
}
+void CodeFragment::appendApplyClassicMatcher(const MatchExpression* matcher) {
+ Instruction i;
+ i.tag = Instruction::applyClassicMatcher;
+ adjustStackSimple(i);
+
+ auto offset = allocateSpace(sizeof(Instruction) + sizeof(matcher));
+
+ offset += writeToMemory(offset, i);
+ offset += writeToMemory(offset, matcher);
+}
+
void CodeFragment::appendSub() {
appendSimpleInstruction(Instruction::sub);
}
@@ -5848,6 +5883,31 @@ void ByteCode::runInternal(const CodeFragment* code, int64_t position) {
break;
}
+ case Instruction::applyClassicMatcher: {
+ const auto* matcher = readFromMemory<const MatchExpression*>(pcPointer);
+ pcPointer += sizeof(matcher);
+
+ auto [ownedObj, tagObj, valObj] = getFromStack(0);
+
+ BSONObj bsonObjForMatching;
+ if (tagObj == value::TypeTags::Object) {
+ BSONObjBuilder builder;
+ sbe::bson::convertToBsonObj(builder, sbe::value::getObjectView(valObj));
+ bsonObjForMatching = builder.obj();
+ } else if (tagObj == value::TypeTags::bsonObject) {
+ auto bson = value::getRawPointerView(valObj);
+ bsonObjForMatching = BSONObj(bson);
+ } else {
+ MONGO_UNREACHABLE_TASSERT(6681402);
+ }
+
+ bool res = matcher->matchesBSON(bsonObjForMatching);
+ if (ownedObj) {
+ value::releaseValue(tagObj, valObj);
+ }
+ topStack(false, value::TypeTags::Boolean, value::bitcastFrom<bool>(res));
+ break;
+ }
default:
MONGO_UNREACHABLE;
}
diff --git a/src/mongo/db/exec/sbe/vm/vm.h b/src/mongo/db/exec/sbe/vm/vm.h
index 56d708fe1a1..2fec8265bfd 100644
--- a/src/mongo/db/exec/sbe/vm/vm.h
+++ b/src/mongo/db/exec/sbe/vm/vm.h
@@ -321,6 +321,8 @@ struct Instruction {
fail,
+ applyClassicMatcher, // Instruction which calls into the classic engine MatchExpression.
+
lastInstruction // this is just a marker used to calculate number of instructions
};
@@ -330,6 +332,19 @@ struct Instruction {
False,
};
+ static const char* toStringConstants(Constants k) {
+ switch (k) {
+ case Null:
+ return "Null";
+ case True:
+ return "True";
+ case False:
+ return "False";
+ default:
+ return "unknown";
+ }
+ }
+
// Make sure that values in this arrays are always in-sync with the enum.
static int stackOffset[];
@@ -481,6 +496,8 @@ struct Instruction {
return "ret";
case fail:
return "fail";
+ case applyClassicMatcher:
+ return "applyClassicMatcher";
default:
return "unrecognized";
}
@@ -769,9 +786,13 @@ public:
appendSimpleInstruction(Instruction::fail);
}
void appendNumericConvert(value::TypeTags targetTag);
+ void appendApplyClassicMatcher(const MatchExpression*);
void fixup(int offset);
+ // For printing from an interactive debugger.
+ std::string toString() const;
+
private:
void appendSimpleInstruction(Instruction::Tags tag);
auto allocateSpace(size_t size) {
@@ -784,9 +805,6 @@ private:
void copyCodeAndFixup(CodeFragment&& from);
private:
- // For printing from an interactive debugger.
- std::string toString() const;
-
absl::InlinedVector<uint8_t, 16> _instrs;
/**
diff --git a/src/mongo/db/exec/update_stage.cpp b/src/mongo/db/exec/update_stage.cpp
index 74dc3aaba5a..27304f16289 100644
--- a/src/mongo/db/exec/update_stage.cpp
+++ b/src/mongo/db/exec/update_stage.cpp
@@ -461,24 +461,41 @@ PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
bool writeToOrphan = false;
if (!_params.request->explain() && _isUserInitiatedWrite) {
- const auto action = _preWriteFilter.computeAction(member->doc.value());
- if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
- LOGV2_DEBUG(
- 5983200,
- 3,
- "Skipping update operation to orphan document to prevent a wrong change "
- "stream event",
- "namespace"_attr = collection()->ns(),
- "record"_attr = member->doc.value());
- return PlanStage::NEED_TIME;
- } else if (action == write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
- LOGV2_DEBUG(6184701,
- 3,
- "Marking update operation to orphan document with the fromMigrate flag "
- "to prevent a wrong change stream event",
- "namespace"_attr = collection()->ns(),
- "record"_attr = member->doc.value());
- writeToOrphan = true;
+ try {
+ const auto action = _preWriteFilter.computeAction(member->doc.value());
+ if (action == write_stage_common::PreWriteFilter::Action::kSkip) {
+ LOGV2_DEBUG(
+ 5983200,
+ 3,
+ "Skipping update operation to orphan document to prevent a wrong change "
+ "stream event",
+ "namespace"_attr = collection()->ns(),
+ "record"_attr = member->doc.value());
+ return PlanStage::NEED_TIME;
+ } else if (action ==
+ write_stage_common::PreWriteFilter::Action::kWriteAsFromMigrate) {
+ LOGV2_DEBUG(
+ 6184701,
+ 3,
+ "Marking update operation to orphan document with the fromMigrate flag "
+ "to prevent a wrong change stream event",
+ "namespace"_attr = collection()->ns(),
+ "record"_attr = member->doc.value());
+ writeToOrphan = true;
+ }
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+ if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+ ex->getCriticalSectionSignal()) {
+ // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+ // wait for critical section to finish and then we'll resume the write from the
+ // point we had left. We do this to prevent large multi-writes from repeatedly
+ // failing due to StaleConfig and exhausting the mongos retry attempts.
+ planExecutorShardingCriticalSectionFuture(opCtx()) =
+ ex->getCriticalSectionSignal();
+ memberFreer.dismiss(); // Keep this member around so we can retry deleting it.
+ return prepareToRetryWSM(id, out);
+ }
+ throw;
}
}
@@ -508,6 +525,18 @@ PlanStage::StageState UpdateStage::doWork(WorkingSetID* out) {
} catch (const WriteConflictException&) {
memberFreer.dismiss(); // Keep this member around so we can retry updating it.
return prepareToRetryWSM(id, out);
+ } catch (const ExceptionFor<ErrorCodes::StaleConfig>& ex) {
+ if (ex->getVersionReceived() == ChunkVersion::IGNORED() &&
+ ex->getCriticalSectionSignal()) {
+ // If ChunkVersion is IGNORED and we encountered a critical section, then yield,
+ // wait for critical section to finish and then we'll resume the write from the
+ // point we had left. We do this to prevent large multi-writes from repeatedly
+ // failing due to StaleConfig and exhausting the mongos retry attempts.
+ planExecutorShardingCriticalSectionFuture(opCtx()) = ex->getCriticalSectionSignal();
+ memberFreer.dismiss(); // Keep this member around so we can retry updating it.
+ return prepareToRetryWSM(id, out);
+ }
+ throw;
}
// Set member's obj to be the doc we want to return.
diff --git a/src/mongo/db/exec/write_stage_common.cpp b/src/mongo/db/exec/write_stage_common.cpp
index 0a1ed4179aa..3d885d9d50e 100644
--- a/src/mongo/db/exec/write_stage_common.cpp
+++ b/src/mongo/db/exec/write_stage_common.cpp
@@ -46,15 +46,6 @@
namespace mongo {
-namespace {
-
-bool computeIsStandaloneOrPrimary(OperationContext* opCtx) {
- const auto replCoord{repl::ReplicationCoordinator::get(opCtx)};
- return replCoord->canAcceptWritesForDatabase(opCtx, "admin");
-}
-
-} // namespace
-
namespace write_stage_common {
PreWriteFilter::PreWriteFilter(OperationContext* opCtx, NamespaceString nss)
@@ -65,14 +56,23 @@ PreWriteFilter::PreWriteFilter(OperationContext* opCtx, NamespaceString nss)
return fcv.isVersionInitialized() &&
feature_flags::gFeatureFlagNoChangeStreamEventsDueToOrphans.isEnabled(fcv);
}()),
- _isStandaloneOrPrimary(computeIsStandaloneOrPrimary(_opCtx)) {}
+ _skipFiltering([&] {
+ // Always allow writes on replica sets.
+ if (serverGlobalParams.clusterRole == ClusterRole::None) {
+ return true;
+ }
+
+ // Always allow writes on standalone and secondary nodes.
+ const auto replCoord{repl::ReplicationCoordinator::get(opCtx)};
+ return !replCoord->canAcceptWritesForDatabase(opCtx, NamespaceString::kAdminDb);
+ }()) {}
PreWriteFilter::Action PreWriteFilter::computeAction(const Document& doc) {
// Skip the checks if the Filter is not enabled.
if (!_isEnabled)
return Action::kWrite;
- if (!_isStandaloneOrPrimary) {
+ if (_skipFiltering) {
// Secondaries do not apply any filtering logic as the primary already did.
return Action::kWrite;
}
diff --git a/src/mongo/db/exec/write_stage_common.h b/src/mongo/db/exec/write_stage_common.h
index 3eff70da081..5628822efff 100644
--- a/src/mongo/db/exec/write_stage_common.h
+++ b/src/mongo/db/exec/write_stage_common.h
@@ -80,7 +80,7 @@ private:
OperationContext* _opCtx;
NamespaceString _nss;
const bool _isEnabled;
- const bool _isStandaloneOrPrimary;
+ const bool _skipFiltering;
std::unique_ptr<ShardFilterer> _shardFilterer;
};
diff --git a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
index df60b317eb2..faa05c2b63e 100644
--- a/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
+++ b/src/mongo/db/exhaust_cursor_currentop_integration_test.cpp
@@ -143,42 +143,49 @@ auto startExhaustQuery(
int queryOptions = 0,
Milliseconds awaitDataTimeoutMS = Milliseconds(5000),
const boost::optional<repl::OpTime>& lastKnownCommittedOpTime = boost::none) {
- queryOptions = queryOptions | QueryOption_Exhaust;
- auto queryThread =
- stdx::async(stdx::launch::async,
- [&queryCursor,
- queryConnection,
- queryOptions,
- awaitDataTimeoutMS,
- lastKnownCommittedOpTime] {
- const auto projSpec = BSON("_id" << 0 << "a" << 1);
- // Issue the initial 'find' with a batchSize of 2 and the exhaust flag set.
- // We then iterate through the first batch and confirm that the results are
- // as expected.
- queryCursor = queryConnection->query_DEPRECATED(
- testNSS, BSONObj{}, Query(), 0, 0, &projSpec, queryOptions, 2);
- for (int i = 0; i < 2; ++i) {
- ASSERT_BSONOBJ_EQ(queryCursor->nextSafe(), BSON("a" << i));
- }
- // Having exhausted the two results returned by the initial find, we set the
- // batchSize to 1 and issue a single getMore via DBClientCursor::more().
- // Because the 'exhaust' flag is set, the server will generate a series of
- // internal getMores and stream them back to the client until the cursor is
- // exhausted, without the client sending any further getMore requests. We
- // expect this request to hang at the
- // 'waitWithPinnedCursorDuringGetMoreBatch' failpoint.
- queryCursor->setBatchSize(1);
- if ((queryOptions & QueryOption_CursorTailable) &&
- (queryOptions & QueryOption_AwaitData)) {
- queryCursor->setAwaitDataTimeoutMS(awaitDataTimeoutMS);
- if (lastKnownCommittedOpTime) {
- auto term = lastKnownCommittedOpTime.get().getTerm();
- queryCursor->setCurrentTermAndLastCommittedOpTime(
- term, lastKnownCommittedOpTime);
- }
- }
- ASSERT(queryCursor->more());
- });
+ auto queryThread = stdx::async(
+ stdx::launch::async,
+ [&queryCursor,
+ queryConnection,
+ queryOptions,
+ awaitDataTimeoutMS,
+ lastKnownCommittedOpTime] {
+ const auto projSpec = BSON("_id" << 0 << "a" << 1);
+ // Issue the initial 'find' with a batchSize of 2 and the exhaust flag set.
+ // We then iterate through the first batch and confirm that the results are
+ // as expected.
+ FindCommandRequest findCmd{testNSS};
+ findCmd.setProjection(projSpec);
+ findCmd.setBatchSize(2);
+ if (queryOptions & QueryOption_CursorTailable) {
+ findCmd.setTailable(true);
+ }
+ if (queryOptions & QueryOption_AwaitData) {
+ findCmd.setAwaitData(true);
+ }
+
+ queryCursor = queryConnection->find(findCmd, ReadPreferenceSetting{}, ExhaustMode::kOn);
+ for (int i = 0; i < 2; ++i) {
+ ASSERT_BSONOBJ_EQ(queryCursor->nextSafe(), BSON("a" << i));
+ }
+ // Having exhausted the two results returned by the initial find, we set the
+ // batchSize to 1 and issue a single getMore via DBClientCursor::more().
+ // Because the 'exhaust' flag is set, the server will generate a series of
+ // internal getMores and stream them back to the client until the cursor is
+ // exhausted, without the client sending any further getMore requests. We
+ // expect this request to hang at the
+ // 'waitWithPinnedCursorDuringGetMoreBatch' failpoint.
+ queryCursor->setBatchSize(1);
+ if (findCmd.getTailable() && findCmd.getAwaitData()) {
+ queryCursor->setAwaitDataTimeoutMS(awaitDataTimeoutMS);
+ if (lastKnownCommittedOpTime) {
+ auto term = lastKnownCommittedOpTime.get().getTerm();
+ queryCursor->setCurrentTermAndLastCommittedOpTime(term,
+ lastKnownCommittedOpTime);
+ }
+ }
+ ASSERT(queryCursor->more());
+ });
// Wait until the parallel operation initializes its cursor.
const auto startTime = clock->now();
diff --git a/src/mongo/db/fle_crud.cpp b/src/mongo/db/fle_crud.cpp
index d8f88c80991..c9ee8496652 100644
--- a/src/mongo/db/fle_crud.cpp
+++ b/src/mongo/db/fle_crud.cpp
@@ -46,6 +46,7 @@
#include "mongo/db/query/collation/collator_factory_interface.h"
#include "mongo/db/query/find_command_gen.h"
#include "mongo/db/query/fle/server_rewrite.h"
+#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/service_context.h"
#include "mongo/db/transaction_api.h"
#include "mongo/idl/idl_parser.h"
@@ -96,21 +97,24 @@ void appendSingleStatusToWriteErrors(const Status& status,
replyBase->setWriteErrors(errors);
}
-void replyToResponse(write_ops::WriteCommandReplyBase* replyBase,
+void replyToResponse(OperationContext* opCtx,
+ write_ops::WriteCommandReplyBase* replyBase,
BatchedCommandResponse* response) {
response->setStatus(Status::OK());
response->setN(replyBase->getN());
- if (replyBase->getElectionId()) {
- response->setElectionId(replyBase->getElectionId().value());
- }
- if (replyBase->getOpTime()) {
- response->setLastOp(replyBase->getOpTime().value());
- }
if (replyBase->getWriteErrors()) {
for (const auto& error : *replyBase->getWriteErrors()) {
response->addToErrDetails(error);
}
}
+
+ // Update the OpTime for the reply to current OpTime
+ //
+ // The OpTime in the reply reflects the OpTime of when the request was run, not when it was
+ // committed. The Transaction API propagates the OpTime from the commit transaction onto the
+ // current thread so grab it from TLS and change the OpTime on the reply.
+ //
+ response->setLastOp(repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp());
}
void responseToReply(const BatchedCommandResponse& response,
@@ -188,16 +192,20 @@ std::pair<FLEBatchResult, write_ops::InsertCommandReply> processInsert(
auto edcNss = insertRequest.getNamespace();
auto ei = insertRequest.getEncryptionInformation().get();
+ bool bypassDocumentValidation =
+ insertRequest.getWriteCommandRequestBase().getBypassDocumentValidation();
+
auto efc = EncryptionInformationHelpers::getAndValidateSchema(edcNss, ei);
auto documents = insertRequest.getDocuments();
// TODO - how to check if a document will be too large???
+
uassert(6371202,
"Only single insert batches are supported in Queryable Encryption",
documents.size() == 1);
auto document = documents[0];
- EDCServerCollection::validateEncryptedFieldInfo(document, efc);
+ EDCServerCollection::validateEncryptedFieldInfo(document, efc, bypassDocumentValidation);
auto serverPayload = std::make_shared<std::vector<EDCServerPayloadInfo>>(
EDCServerCollection::getEncryptedFieldInfo(document));
@@ -221,8 +229,8 @@ std::pair<FLEBatchResult, write_ops::InsertCommandReply> processInsert(
auto swResult = trun->runNoThrow(
opCtx,
- [sharedInsertBlock, reply, ownedDocument](const txn_api::TransactionClient& txnClient,
- ExecutorPtr txnExec) {
+ [sharedInsertBlock, reply, ownedDocument, bypassDocumentValidation](
+ const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
FLEQueryInterfaceImpl queryImpl(txnClient, getGlobalServiceContext());
auto [edcNss2, efc2, serverPayload2, stmtId2] = *sharedInsertBlock.get();
@@ -232,8 +240,13 @@ std::pair<FLEBatchResult, write_ops::InsertCommandReply> processInsert(
fleCrudHangPreInsert.pauseWhileSet();
}
- *reply = uassertStatusOK(processInsert(
- &queryImpl, edcNss2, *serverPayload2.get(), efc2, stmtId2, ownedDocument));
+ *reply = uassertStatusOK(processInsert(&queryImpl,
+ edcNss2,
+ *serverPayload2.get(),
+ efc2,
+ stmtId2,
+ ownedDocument,
+ bypassDocumentValidation));
if (MONGO_unlikely(fleCrudHangInsert.shouldFail())) {
LOGV2(6371903, "Hanging due to fleCrudHangInsert fail point");
@@ -439,7 +452,8 @@ void processFieldsForInsert(FLEQueryInterface* queryImpl,
const NamespaceString& edcNss,
std::vector<EDCServerPayloadInfo>& serverPayload,
const EncryptedFieldConfig& efc,
- int32_t* pStmtId) {
+ int32_t* pStmtId,
+ bool bypassDocumentValidation) {
NamespaceString nssEsc(edcNss.db(), efc.getEscCollection().get());
@@ -507,7 +521,8 @@ void processFieldsForInsert(FLEQueryInterface* queryImpl,
ECOCCollection::generateDocument(payload.fieldPathName,
payload.payload.getEncryptedTokens()),
pStmtId,
- false));
+ false,
+ bypassDocumentValidation));
checkWriteErrors(ecocInsertReply);
}
}
@@ -717,9 +732,11 @@ StatusWith<write_ops::InsertCommandReply> processInsert(
std::vector<EDCServerPayloadInfo>& serverPayload,
const EncryptedFieldConfig& efc,
int32_t stmtId,
- BSONObj document) {
+ BSONObj document,
+ bool bypassDocumentValidation) {
- processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+ processFieldsForInsert(
+ queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
auto finalDoc = EDCServerCollection::finalizeForInsert(document, serverPayload);
@@ -790,6 +807,9 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
auto tokenMap = EncryptionInformationHelpers::getDeleteTokens(edcNss, ei);
const auto updateOpEntry = updateRequest.getUpdates()[0];
+ auto bypassDocumentValidation =
+ updateRequest.getWriteCommandRequestBase().getBypassDocumentValidation();
+
const auto updateModification = updateOpEntry.getU();
int32_t stmtId = getStmtIdForWriteAt(updateRequest, 0);
@@ -797,16 +817,26 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
// Step 1 ----
std::vector<EDCServerPayloadInfo> serverPayload;
auto newUpdateOpEntry = updateRequest.getUpdates()[0];
- newUpdateOpEntry.setQ(fle::rewriteEncryptedFilterInsideTxn(
- queryImpl, updateRequest.getDbName(), efc, expCtx, newUpdateOpEntry.getQ()));
+
+ auto highCardinalityModeAllowed = newUpdateOpEntry.getUpsert()
+ ? fle::HighCardinalityModeAllowed::kDisallow
+ : fle::HighCardinalityModeAllowed::kAllow;
+
+ newUpdateOpEntry.setQ(fle::rewriteEncryptedFilterInsideTxn(queryImpl,
+ updateRequest.getDbName(),
+ efc,
+ expCtx,
+ newUpdateOpEntry.getQ(),
+ highCardinalityModeAllowed));
if (updateModification.type() == write_ops::UpdateModification::Type::kModifier) {
auto updateModifier = updateModification.getUpdateModifier();
auto setObject = updateModifier.getObjectField("$set");
- EDCServerCollection::validateEncryptedFieldInfo(setObject, efc);
+ EDCServerCollection::validateEncryptedFieldInfo(setObject, efc, bypassDocumentValidation);
serverPayload = EDCServerCollection::getEncryptedFieldInfo(updateModifier);
- processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+ processFieldsForInsert(
+ queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
// Step 2 ----
auto pushUpdate = EDCServerCollection::finalizeForUpdate(updateModifier, serverPayload);
@@ -815,10 +845,12 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
pushUpdate, write_ops::UpdateModification::ClassicTag(), false));
} else {
auto replacementDocument = updateModification.getUpdateReplacement();
- EDCServerCollection::validateEncryptedFieldInfo(replacementDocument, efc);
+ EDCServerCollection::validateEncryptedFieldInfo(
+ replacementDocument, efc, bypassDocumentValidation);
serverPayload = EDCServerCollection::getEncryptedFieldInfo(replacementDocument);
- processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+ processFieldsForInsert(
+ queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
// Step 2 ----
auto safeContentReplace =
@@ -833,6 +865,8 @@ write_ops::UpdateCommandReply processUpdate(FLEQueryInterface* queryImpl,
newUpdateRequest.setUpdates({newUpdateOpEntry});
newUpdateRequest.getWriteCommandRequestBase().setStmtIds(boost::none);
newUpdateRequest.getWriteCommandRequestBase().setStmtId(stmtId);
+ newUpdateRequest.getWriteCommandRequestBase().setBypassDocumentValidation(
+ bypassDocumentValidation);
++stmtId;
auto [updateReply, originalDocument] =
@@ -890,6 +924,10 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
BatchedCommandResponse* response,
boost::optional<OID> targetEpoch) {
+ if (request.getWriteCommandRequestBase().getEncryptionInformation()->getCrudProcessed()) {
+ return FLEBatchResult::kNotProcessed;
+ }
+
// TODO (SERVER-65077): Remove FCV check once 6.0 is released
uassert(6371209,
"Queryable Encryption is only supported when FCV supports 6.0",
@@ -904,7 +942,7 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
return FLEBatchResult::kNotProcessed;
}
- replyToResponse(&insertReply.getWriteCommandReplyBase(), response);
+ replyToResponse(opCtx, &insertReply.getWriteCommandReplyBase(), response);
return FLEBatchResult::kProcessed;
} else if (request.getBatchType() == BatchedCommandRequest::BatchType_Delete) {
@@ -913,7 +951,7 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
auto deleteReply = processDelete(opCtx, deleteRequest, &getTransactionWithRetriesForMongoS);
- replyToResponse(&deleteReply.getWriteCommandReplyBase(), response);
+ replyToResponse(opCtx, &deleteReply.getWriteCommandReplyBase(), response);
return FLEBatchResult::kProcessed;
} else if (request.getBatchType() == BatchedCommandRequest::BatchType_Update) {
@@ -922,7 +960,7 @@ FLEBatchResult processFLEBatch(OperationContext* opCtx,
auto updateReply = processUpdate(opCtx, updateRequest, &getTransactionWithRetriesForMongoS);
- replyToResponse(&updateReply.getWriteCommandReplyBase(), response);
+ replyToResponse(opCtx, &updateReply.getWriteCommandReplyBase(), response);
response->setNModified(updateReply.getNModified());
@@ -968,19 +1006,25 @@ std::unique_ptr<BatchedCommandRequest> processFLEBatchExplain(
request.getNS(),
deleteRequest.getEncryptionInformation().get(),
newDeleteOp.getQ(),
- &getTransactionWithRetriesForMongoS));
+ &getTransactionWithRetriesForMongoS,
+ fle::HighCardinalityModeAllowed::kAllow));
deleteRequest.setDeletes({newDeleteOp});
deleteRequest.getWriteCommandRequestBase().setEncryptionInformation(boost::none);
return std::make_unique<BatchedCommandRequest>(deleteRequest);
} else if (request.getBatchType() == BatchedCommandRequest::BatchType_Update) {
auto updateRequest = request.getUpdateRequest();
auto newUpdateOp = updateRequest.getUpdates()[0];
+ auto highCardinalityModeAllowed = newUpdateOp.getUpsert()
+ ? fle::HighCardinalityModeAllowed::kDisallow
+ : fle::HighCardinalityModeAllowed::kAllow;
+
newUpdateOp.setQ(fle::rewriteQuery(opCtx,
getExpCtx(newUpdateOp),
request.getNS(),
updateRequest.getEncryptionInformation().get(),
newUpdateOp.getQ(),
- &getTransactionWithRetriesForMongoS));
+ &getTransactionWithRetriesForMongoS,
+ highCardinalityModeAllowed));
updateRequest.setUpdates({newUpdateOp});
updateRequest.getWriteCommandRequestBase().setEncryptionInformation(boost::none);
return std::make_unique<BatchedCommandRequest>(updateRequest);
@@ -1003,10 +1047,22 @@ write_ops::FindAndModifyCommandReply processFindAndModify(
auto newFindAndModifyRequest = findAndModifyRequest;
+ const auto bypassDocumentValidation =
+ findAndModifyRequest.getBypassDocumentValidation().value_or(false);
+
// Step 0 ----
// Rewrite filter
- newFindAndModifyRequest.setQuery(fle::rewriteEncryptedFilterInsideTxn(
- queryImpl, edcNss.db(), efc, expCtx, findAndModifyRequest.getQuery()));
+ auto highCardinalityModeAllowed = findAndModifyRequest.getUpsert().value_or(false)
+ ? fle::HighCardinalityModeAllowed::kDisallow
+ : fle::HighCardinalityModeAllowed::kAllow;
+
+ newFindAndModifyRequest.setQuery(
+ fle::rewriteEncryptedFilterInsideTxn(queryImpl,
+ edcNss.db(),
+ efc,
+ expCtx,
+ findAndModifyRequest.getQuery(),
+ highCardinalityModeAllowed));
// Make sure not to inherit the command's writeConcern, this should be set at the transaction
// level.
@@ -1023,9 +1079,11 @@ write_ops::FindAndModifyCommandReply processFindAndModify(
if (updateModification.type() == write_ops::UpdateModification::Type::kModifier) {
auto updateModifier = updateModification.getUpdateModifier();
auto setObject = updateModifier.getObjectField("$set");
- EDCServerCollection::validateEncryptedFieldInfo(setObject, efc);
+ EDCServerCollection::validateEncryptedFieldInfo(
+ setObject, efc, bypassDocumentValidation);
serverPayload = EDCServerCollection::getEncryptedFieldInfo(updateModifier);
- processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+ processFieldsForInsert(
+ queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
auto pushUpdate = EDCServerCollection::finalizeForUpdate(updateModifier, serverPayload);
@@ -1034,10 +1092,12 @@ write_ops::FindAndModifyCommandReply processFindAndModify(
pushUpdate, write_ops::UpdateModification::ClassicTag(), false);
} else {
auto replacementDocument = updateModification.getUpdateReplacement();
- EDCServerCollection::validateEncryptedFieldInfo(replacementDocument, efc);
+ EDCServerCollection::validateEncryptedFieldInfo(
+ replacementDocument, efc, bypassDocumentValidation);
serverPayload = EDCServerCollection::getEncryptedFieldInfo(replacementDocument);
- processFieldsForInsert(queryImpl, edcNss, serverPayload, efc, &stmtId);
+ processFieldsForInsert(
+ queryImpl, edcNss, serverPayload, efc, &stmtId, bypassDocumentValidation);
// Step 2 ----
auto safeContentReplace =
@@ -1129,8 +1189,17 @@ write_ops::FindAndModifyCommandRequest processFindAndModifyExplain(
auto efc = EncryptionInformationHelpers::getAndValidateSchema(edcNss, ei);
auto newFindAndModifyRequest = findAndModifyRequest;
- newFindAndModifyRequest.setQuery(fle::rewriteEncryptedFilterInsideTxn(
- queryImpl, edcNss.db(), efc, expCtx, findAndModifyRequest.getQuery()));
+ auto highCardinalityModeAllowed = findAndModifyRequest.getUpsert().value_or(false)
+ ? fle::HighCardinalityModeAllowed::kDisallow
+ : fle::HighCardinalityModeAllowed::kAllow;
+
+ newFindAndModifyRequest.setQuery(
+ fle::rewriteEncryptedFilterInsideTxn(queryImpl,
+ edcNss.db(),
+ efc,
+ expCtx,
+ findAndModifyRequest.getQuery(),
+ highCardinalityModeAllowed));
newFindAndModifyRequest.setEncryptionInformation(boost::none);
return newFindAndModifyRequest;
@@ -1232,10 +1301,23 @@ uint64_t FLEQueryInterfaceImpl::countDocuments(const NamespaceString& nss) {
}
StatusWith<write_ops::InsertCommandReply> FLEQueryInterfaceImpl::insertDocument(
- const NamespaceString& nss, BSONObj obj, StmtId* pStmtId, bool translateDuplicateKey) {
+ const NamespaceString& nss,
+ BSONObj obj,
+ StmtId* pStmtId,
+ bool translateDuplicateKey,
+ bool bypassDocumentValidation) {
write_ops::InsertCommandRequest insertRequest(nss);
insertRequest.setDocuments({obj});
+ EncryptionInformation encryptionInformation;
+ encryptionInformation.setCrudProcessed(true);
+
+ // We need to set an empty BSON object here for the schema.
+ encryptionInformation.setSchema(BSONObj());
+ insertRequest.getWriteCommandRequestBase().setEncryptionInformation(encryptionInformation);
+ insertRequest.getWriteCommandRequestBase().setBypassDocumentValidation(
+ bypassDocumentValidation);
+
int32_t stmtId = *pStmtId;
if (stmtId != kUninitializedStmtId) {
(*pStmtId)++;
@@ -1320,6 +1402,7 @@ std::pair<write_ops::UpdateCommandReply, BSONObj> FLEQueryInterfaceImpl::updateW
findAndModifyRequest.setLet(
mergeLetAndCVariables(updateRequest.getLet(), updateOpEntry.getC()));
findAndModifyRequest.setStmtId(updateRequest.getStmtId());
+ findAndModifyRequest.setBypassDocumentValidation(updateRequest.getBypassDocumentValidation());
auto ei2 = ei;
ei2.setCrudProcessed(true);
@@ -1361,9 +1444,15 @@ std::pair<write_ops::UpdateCommandReply, BSONObj> FLEQueryInterfaceImpl::updateW
}
write_ops::UpdateCommandReply FLEQueryInterfaceImpl::update(
- const NamespaceString& nss,
- int32_t stmtId,
- const write_ops::UpdateCommandRequest& updateRequest) {
+ const NamespaceString& nss, int32_t stmtId, write_ops::UpdateCommandRequest& updateRequest) {
+
+ invariant(!updateRequest.getWriteCommandRequestBase().getEncryptionInformation());
+
+ EncryptionInformation encryptionInformation;
+ encryptionInformation.setCrudProcessed(true);
+
+ encryptionInformation.setSchema(BSONObj());
+ updateRequest.getWriteCommandRequestBase().setEncryptionInformation(encryptionInformation);
dassert(updateRequest.getStmtIds().value_or(std::vector<int32_t>()).empty());
@@ -1401,7 +1490,6 @@ std::vector<BSONObj> FLEQueryInterfaceImpl::findDocuments(const NamespaceString&
BSONObj filter) {
FindCommandRequest find(nss);
find.setFilter(filter);
- find.setSingleBatch(true);
// Throws on error
return _txnClient.exhaustiveFind(find).get();
diff --git a/src/mongo/db/fle_crud.h b/src/mongo/db/fle_crud.h
index 738e85b8996..7c8d93ae1f9 100644
--- a/src/mongo/db/fle_crud.h
+++ b/src/mongo/db/fle_crud.h
@@ -261,7 +261,11 @@ public:
* FLEStateCollectionContention instead.
*/
virtual StatusWith<write_ops::InsertCommandReply> insertDocument(
- const NamespaceString& nss, BSONObj obj, StmtId* pStmtId, bool translateDuplicateKey) = 0;
+ const NamespaceString& nss,
+ BSONObj obj,
+ StmtId* pStmtId,
+ bool translateDuplicateKey,
+ bool bypassDocumentValidation = false) = 0;
/**
* Delete a single document with the given query.
@@ -294,7 +298,7 @@ public:
virtual write_ops::UpdateCommandReply update(
const NamespaceString& nss,
int32_t stmtId,
- const write_ops::UpdateCommandRequest& updateRequest) = 0;
+ write_ops::UpdateCommandRequest& updateRequest) = 0;
/**
* Do a single findAndModify request.
@@ -325,10 +329,12 @@ public:
uint64_t countDocuments(const NamespaceString& nss) final;
- StatusWith<write_ops::InsertCommandReply> insertDocument(const NamespaceString& nss,
- BSONObj obj,
- int32_t* pStmtId,
- bool translateDuplicateKey) final;
+ StatusWith<write_ops::InsertCommandReply> insertDocument(
+ const NamespaceString& nss,
+ BSONObj obj,
+ int32_t* pStmtId,
+ bool translateDuplicateKey,
+ bool bypassDocumentValidation = false) final;
std::pair<write_ops::DeleteCommandReply, BSONObj> deleteWithPreimage(
const NamespaceString& nss,
@@ -340,10 +346,9 @@ public:
const EncryptionInformation& ei,
const write_ops::UpdateCommandRequest& updateRequest) final;
- write_ops::UpdateCommandReply update(
- const NamespaceString& nss,
- int32_t stmtId,
- const write_ops::UpdateCommandRequest& updateRequest) final;
+ write_ops::UpdateCommandReply update(const NamespaceString& nss,
+ int32_t stmtId,
+ write_ops::UpdateCommandRequest& updateRequest) final;
write_ops::FindAndModifyCommandReply findAndModify(
const NamespaceString& nss,
@@ -408,7 +413,8 @@ StatusWith<write_ops::InsertCommandReply> processInsert(
std::vector<EDCServerPayloadInfo>& serverPayload,
const EncryptedFieldConfig& efc,
int32_t stmtId,
- BSONObj document);
+ BSONObj document,
+ bool bypassDocumentValidation = false);
/**
* Process a FLE delete with the query interface
diff --git a/src/mongo/db/fle_crud_mongod.cpp b/src/mongo/db/fle_crud_mongod.cpp
index 68327133c88..1e488f1f65a 100644
--- a/src/mongo/db/fle_crud_mongod.cpp
+++ b/src/mongo/db/fle_crud_mongod.cpp
@@ -284,7 +284,13 @@ BSONObj processFLEWriteExplainD(OperationContext* opCtx,
const BSONObj& query) {
auto expCtx = make_intrusive<ExpressionContext>(
opCtx, fle::collatorFromBSON(opCtx, collation), nss, runtimeConstants, letParameters);
- return fle::rewriteQuery(opCtx, expCtx, nss, info, query, &getTransactionWithRetriesForMongoD);
+ return fle::rewriteQuery(opCtx,
+ expCtx,
+ nss,
+ info,
+ query,
+ &getTransactionWithRetriesForMongoD,
+ fle::HighCardinalityModeAllowed::kAllow);
}
std::pair<write_ops::FindAndModifyCommandRequest, OpMsgRequest>
diff --git a/src/mongo/db/fle_crud_test.cpp b/src/mongo/db/fle_crud_test.cpp
index 527dd5bca11..0a5d7dfc37c 100644
--- a/src/mongo/db/fle_crud_test.cpp
+++ b/src/mongo/db/fle_crud_test.cpp
@@ -27,6 +27,7 @@
* it in the license file.
*/
+#include "mongo/base/error_codes.h"
#include "mongo/platform/basic.h"
#include <algorithm>
@@ -153,8 +154,12 @@ protected:
void assertDocumentCounts(uint64_t edc, uint64_t esc, uint64_t ecc, uint64_t ecoc);
- void doSingleInsert(int id, BSONElement element);
- void doSingleInsert(int id, BSONObj obj);
+ void testValidateEncryptedFieldInfo(BSONObj obj, bool bypassValidation);
+
+ void testValidateTags(BSONObj obj);
+
+ void doSingleInsert(int id, BSONElement element, bool bypassDocumentValidation = false);
+ void doSingleInsert(int id, BSONObj obj, bool bypassDocumentValidation = false);
void doSingleInsertWithContention(
int id, BSONElement element, int64_t cm, uint64_t cf, EncryptedFieldConfig efc);
@@ -406,7 +411,7 @@ void FleCrudTest::doSingleWideInsert(int id, uint64_t fieldCount, ValueGenerator
auto efc = getTestEncryptedFieldConfig();
- uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result));
+ uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result, false));
}
@@ -451,7 +456,16 @@ std::vector<char> generateSinglePlaceholder(BSONElement value, int64_t cm = 0) {
return v;
}
-void FleCrudTest::doSingleInsert(int id, BSONElement element) {
+void FleCrudTest::testValidateEncryptedFieldInfo(BSONObj obj, bool bypassValidation) {
+ auto efc = getTestEncryptedFieldConfig();
+ EDCServerCollection::validateEncryptedFieldInfo(obj, efc, bypassValidation);
+}
+
+void FleCrudTest::testValidateTags(BSONObj obj) {
+ FLEClientCrypto::validateTagsArray(obj);
+}
+
+void FleCrudTest::doSingleInsert(int id, BSONElement element, bool bypassDocumentValidation) {
auto buf = generateSinglePlaceholder(element);
BSONObjBuilder builder;
builder.append("_id", id);
@@ -467,10 +481,10 @@ void FleCrudTest::doSingleInsert(int id, BSONElement element) {
auto efc = getTestEncryptedFieldConfig();
- uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result));
+ uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result, false));
}
-void FleCrudTest::doSingleInsert(int id, BSONObj obj) {
+void FleCrudTest::doSingleInsert(int id, BSONObj obj, bool bypassDocumentValidation) {
doSingleInsert(id, obj.firstElement());
}
@@ -490,7 +504,7 @@ void FleCrudTest::doSingleInsertWithContention(
auto serverPayload = EDCServerCollection::getEncryptedFieldInfo(result);
- uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result));
+ uassertStatusOK(processInsert(_queryImpl.get(), _edcNs, serverPayload, efc, 0, result, false));
}
void FleCrudTest::doSingleInsertWithContention(
@@ -890,7 +904,6 @@ TEST_F(FleCrudTest, UpdateOneSameValue) {
<< "secret"));
}
-
// Update one document with replacement
TEST_F(FleCrudTest, UpdateOneReplace) {
@@ -956,7 +969,16 @@ TEST_F(FleCrudTest, SetSafeContent) {
builder.append("$set", BSON(kSafeContent << "foo"));
auto result = builder.obj();
- ASSERT_THROWS_CODE(doSingleUpdateWithUpdateDoc(1, result), DBException, 6371507);
+ ASSERT_THROWS_CODE(doSingleUpdateWithUpdateDoc(1, result), DBException, 6666200);
+}
+
+// Test that EDCServerCollection::validateEncryptedFieldInfo checks that the
+// safeContent cannot be present in the BSON obj.
+TEST_F(FleCrudTest, testValidateEncryptedFieldConfig) {
+ testValidateEncryptedFieldInfo(BSON(kSafeContent << "secret"), true);
+ ASSERT_THROWS_CODE(testValidateEncryptedFieldInfo(BSON(kSafeContent << "secret"), false),
+ DBException,
+ 6666200);
}
// Update one document via findAndModify
@@ -1038,6 +1060,11 @@ TEST_F(FleCrudTest, FindAndModify_RenameSafeContent) {
ASSERT_THROWS_CODE(doFindAndModify(req), DBException, 6371506);
}
+TEST_F(FleCrudTest, validateTagsTest) {
+ testValidateTags(BSON(kSafeContent << BSON_ARRAY(123)));
+ ASSERT_THROWS_CODE(testValidateTags(BSON(kSafeContent << "foo")), DBException, 6371507);
+}
+
// Mess with __safeContent__ and ensure the update errors
TEST_F(FleCrudTest, FindAndModify_SetSafeContent) {
doSingleInsert(1,
@@ -1056,8 +1083,7 @@ TEST_F(FleCrudTest, FindAndModify_SetSafeContent) {
req.setUpdate(
write_ops::UpdateModification(result, write_ops::UpdateModification::ClassicTag{}, false));
-
- ASSERT_THROWS_CODE(doFindAndModify(req), DBException, 6371507);
+ ASSERT_THROWS_CODE(doFindAndModify(req), DBException, 6666200);
}
TEST_F(FleTagsTest, InsertOne) {
@@ -1199,7 +1225,7 @@ TEST_F(FleTagsTest, MemoryLimit) {
doSingleInsert(10, doc);
// readTags returns 11 tags which does exceed memory limit.
- ASSERT_THROWS_CODE(readTags(doc), DBException, 6401800);
+ ASSERT_THROWS_CODE(readTags(doc), DBException, ErrorCodes::FLEMaxTagLimitExceeded);
doSingleDelete(5);
diff --git a/src/mongo/db/fle_query_interface_mock.cpp b/src/mongo/db/fle_query_interface_mock.cpp
index 2aeb39788dd..b5ca4e1e9cd 100644
--- a/src/mongo/db/fle_query_interface_mock.cpp
+++ b/src/mongo/db/fle_query_interface_mock.cpp
@@ -54,7 +54,11 @@ uint64_t FLEQueryInterfaceMock::countDocuments(const NamespaceString& nss) {
}
StatusWith<write_ops::InsertCommandReply> FLEQueryInterfaceMock::insertDocument(
- const NamespaceString& nss, BSONObj obj, StmtId* pStmtId, bool translateDuplicateKey) {
+ const NamespaceString& nss,
+ BSONObj obj,
+ StmtId* pStmtId,
+ bool translateDuplicateKey,
+ bool bypassDocumentValidation) {
repl::TimestampedBSONObj tb;
tb.obj = obj;
@@ -132,9 +136,7 @@ std::pair<write_ops::UpdateCommandReply, BSONObj> FLEQueryInterfaceMock::updateW
}
write_ops::UpdateCommandReply FLEQueryInterfaceMock::update(
- const NamespaceString& nss,
- int32_t stmtId,
- const write_ops::UpdateCommandRequest& updateRequest) {
+ const NamespaceString& nss, int32_t stmtId, write_ops::UpdateCommandRequest& updateRequest) {
auto [reply, _] = updateWithPreimage(nss, EncryptionInformation(), updateRequest);
return reply;
}
diff --git a/src/mongo/db/fle_query_interface_mock.h b/src/mongo/db/fle_query_interface_mock.h
index 229d2c08dfe..a89fc71ce1e 100644
--- a/src/mongo/db/fle_query_interface_mock.h
+++ b/src/mongo/db/fle_query_interface_mock.h
@@ -47,10 +47,12 @@ public:
uint64_t countDocuments(const NamespaceString& nss) final;
- StatusWith<write_ops::InsertCommandReply> insertDocument(const NamespaceString& nss,
- BSONObj obj,
- StmtId* pStmtId,
- bool translateDuplicateKey) final;
+ StatusWith<write_ops::InsertCommandReply> insertDocument(
+ const NamespaceString& nss,
+ BSONObj obj,
+ StmtId* pStmtId,
+ bool translateDuplicateKey,
+ bool bypassDocumentValidation = false) final;
std::pair<write_ops::DeleteCommandReply, BSONObj> deleteWithPreimage(
const NamespaceString& nss,
@@ -62,10 +64,9 @@ public:
const EncryptionInformation& ei,
const write_ops::UpdateCommandRequest& updateRequest) final;
- write_ops::UpdateCommandReply update(
- const NamespaceString& nss,
- int32_t stmtId,
- const write_ops::UpdateCommandRequest& updateRequest) final;
+ write_ops::UpdateCommandReply update(const NamespaceString& nss,
+ int32_t stmtId,
+ write_ops::UpdateCommandRequest& updateRequest) final;
write_ops::FindAndModifyCommandReply findAndModify(
const NamespaceString& nss,
diff --git a/src/mongo/db/geo/geoparser.cpp b/src/mongo/db/geo/geoparser.cpp
index 57e2fbee611..893d7832b18 100644
--- a/src/mongo/db/geo/geoparser.cpp
+++ b/src/mongo/db/geo/geoparser.cpp
@@ -52,16 +52,21 @@ namespace mongo {
namespace dps = ::mongo::dotted_path_support;
static Status parseFlatPoint(const BSONElement& elem, Point* out, bool allowAddlFields = false) {
- if (!elem.isABSONObj())
- return BAD_VALUE("Point must be an array or object");
+ if (!elem.isABSONObj()) {
+ return BAD_VALUE("Point must be an array or object, instead got type "
+ << typeName(elem.type()));
+ }
+
BSONObjIterator it(elem.Obj());
BSONElement x = it.next();
if (!x.isNumber()) {
- return BAD_VALUE("Point must only contain numeric elements");
+ return BAD_VALUE("Point must only contain numeric elements, instead got type "
+ << typeName(x.type()));
}
BSONElement y = it.next();
if (!y.isNumber()) {
- return BAD_VALUE("Point must only contain numeric elements");
+ return BAD_VALUE("Point must only contain numeric elements, instead got type "
+ << typeName(y.type()));
}
if (!allowAddlFields && it.more()) {
return BAD_VALUE("Point must only contain two numeric elements");
@@ -86,7 +91,7 @@ static Status coordToPoint(double lng, double lat, S2Point* out) {
// We don't rely on drem to clean up non-sane points. We just don't let them become
// spherical.
if (!isValidLngLat(lng, lat))
- return BAD_VALUE("longitude/latitude is out of bounds, lng: " << lng << " lat: " << lat);
+ return BAD_VALUE("Longitude/latitude is out of bounds, lng: " << lng << " lat: " << lat);
// Note that it's (lat, lng) for S2 but (lng, lat) for MongoDB.
S2LatLng ll = S2LatLng::FromDegrees(lat, lng).Normalized();
// This shouldn't happen since we should only have valid lng/lats.
@@ -101,7 +106,8 @@ static Status coordToPoint(double lng, double lat, S2Point* out) {
static Status parseGeoJSONCoordinate(const BSONElement& elem, S2Point* out) {
if (Array != elem.type()) {
- return BAD_VALUE("GeoJSON coordinates must be an array");
+ return BAD_VALUE("GeoJSON coordinates must be an array, instead got type "
+ << typeName(elem.type()));
}
Point p;
// GeoJSON allows extra elements, e.g. altitude.
@@ -116,7 +122,8 @@ static Status parseGeoJSONCoordinate(const BSONElement& elem, S2Point* out) {
// "coordinates": [ [100.0, 0.0], [101.0, 1.0] ]
static Status parseArrayOfCoordinates(const BSONElement& elem, vector<S2Point>* out) {
if (Array != elem.type()) {
- return BAD_VALUE("GeoJSON coordinates must be an array of coordinates");
+ return BAD_VALUE("GeoJSON coordinates must be an array of coordinates, instead got type "
+ << typeName(elem.type()));
}
BSONObjIterator it(elem.Obj());
// Iterate all coordinates in array
@@ -146,7 +153,8 @@ static Status isLoopClosed(const vector<S2Point>& loop, const BSONElement loopEl
}
if (loop[0] != loop[loop.size() - 1]) {
- return BAD_VALUE("Loop is not closed: " << loopElt.toString(false));
+ return BAD_VALUE("Loop is not closed, first vertex does not equal last vertex: "
+ << loopElt.toString(false));
}
return Status::OK();
@@ -156,7 +164,8 @@ static Status parseGeoJSONPolygonCoordinates(const BSONElement& elem,
bool skipValidation,
S2Polygon* out) {
if (Array != elem.type()) {
- return BAD_VALUE("Polygon coordinates must be an array");
+ return BAD_VALUE("Polygon coordinates must be an array, instead got type "
+ << typeName(elem.type()));
}
std::vector<std::unique_ptr<S2Loop>> loops;
@@ -184,8 +193,9 @@ static Status parseGeoJSONPolygonCoordinates(const BSONElement& elem,
// At least 3 vertices.
if (points.size() < 3) {
- return BAD_VALUE(
- "Loop must have at least 3 different vertices: " << coordinateElt.toString(false));
+ return BAD_VALUE("Loop must have at least 3 different vertices, "
+ << points.size() << " unique vertices were provided: "
+ << coordinateElt.toString(false));
}
loops.push_back(std::make_unique<S2Loop>(points));
@@ -266,15 +276,17 @@ static Status parseGeoJSONPolygonCoordinates(const BSONElement& elem,
}
static Status parseBigSimplePolygonCoordinates(const BSONElement& elem, BigSimplePolygon* out) {
- if (Array != elem.type())
- return BAD_VALUE("Coordinates of polygon must be an array");
+ if (Array != elem.type()) {
+ return BAD_VALUE("Coordinates of polygon must be an array, instead got type "
+ << typeName(elem.type()));
+ }
const vector<BSONElement>& coordinates = elem.Array();
// Only one loop is allowed in a BigSimplePolygon
if (coordinates.size() != 1) {
- return BAD_VALUE(
- "Only one simple loop is allowed in a big polygon: " << elem.toString(false));
+ return BAD_VALUE("Only one simple loop is allowed in a big polygon, instead provided "
+ << coordinates.size() << " loops: " << elem.toString(false));
}
vector<S2Point> exteriorVertices;
@@ -297,7 +309,9 @@ static Status parseBigSimplePolygonCoordinates(const BSONElement& elem, BigSimpl
// At least 3 vertices.
if (exteriorVertices.size() < 3) {
- return BAD_VALUE("Loop must have at least 3 different vertices: " << elem.toString(false));
+ return BAD_VALUE("Loop must have at least 3 different vertices, "
+ << exteriorVertices.size()
+ << " unique vertices were provided: " << elem.toString(false));
}
std::unique_ptr<S2Loop> loop(new S2Loop(exteriorVertices));
@@ -326,8 +340,10 @@ static Status parseGeoJSONCRS(const BSONObj& obj, CRS* crs, bool allowStrictSphe
return Status::OK();
}
- if (!crsElt.isABSONObj())
- return BAD_VALUE("GeoJSON CRS must be an object");
+ if (!crsElt.isABSONObj()) {
+ return BAD_VALUE("GeoJSON CRS must be an object, instead got type "
+ << typeName(crsElt.type()));
+ }
BSONObj crsObj = crsElt.embeddedObject();
// "type": "name"
@@ -336,17 +352,22 @@ static Status parseGeoJSONCRS(const BSONObj& obj, CRS* crs, bool allowStrictSphe
// "properties"
BSONElement propertiesElt = crsObj["properties"];
- if (!propertiesElt.isABSONObj())
- return BAD_VALUE("CRS must have field \"properties\" which is an object");
+ if (!propertiesElt.isABSONObj()) {
+ return BAD_VALUE("CRS must have field \"properties\" which is an object, instead got type "
+ << typeName(propertiesElt.type()));
+ }
BSONObj propertiesObj = propertiesElt.embeddedObject();
- if (String != propertiesObj["name"].type())
- return BAD_VALUE("In CRS, \"properties.name\" must be a string");
+ if (String != propertiesObj["name"].type()) {
+ return BAD_VALUE("In CRS, \"properties.name\" must be a string, instead got type "
+ << typeName(propertiesObj["name"].type()));
+ }
+
const string& name = propertiesObj["name"].String();
if (CRS_CRS84 == name || CRS_EPSG_4326 == name) {
*crs = SPHERE;
} else if (CRS_STRICT_WINDING == name) {
if (!allowStrictSphere) {
- return BAD_VALUE("Strict winding order is only supported by polygon");
+ return BAD_VALUE("Strict winding order CRS is only supported by polygon");
}
*crs = STRICT_SPHERE;
} else {
@@ -369,8 +390,8 @@ static Status parseGeoJSONLineCoordinates(const BSONElement& elem,
eraseDuplicatePoints(&vertices);
if (!skipValidation) {
if (vertices.size() < 2)
- return BAD_VALUE(
- "GeoJSON LineString must have at least 2 vertices: " << elem.toString(false));
+ return BAD_VALUE("GeoJSON LineString must have at least 2 vertices, instead got "
+ << vertices.size() << " vertices: " << elem.toString(false));
string err;
if (!S2Polyline::IsValid(vertices, &err))
@@ -384,9 +405,10 @@ static Status parseGeoJSONLineCoordinates(const BSONElement& elem,
// Parse legacy point or GeoJSON point, used by geo near.
// Only stored legacy points allow additional fields.
Status parsePoint(const BSONElement& elem, PointWithCRS* out, bool allowAddlFields) {
- if (!elem.isABSONObj())
- return BAD_VALUE("Point must be an array or object");
-
+ if (!elem.isABSONObj()) {
+ return BAD_VALUE("Point must be an array or object, instead got type "
+ << typeName(elem.type()));
+ }
BSONObj obj = elem.Obj();
// location: [1, 2] or location: {x: 1, y:2}
if (Array == elem.type() || obj.firstElement().isNumber()) {
@@ -439,7 +461,8 @@ Status GeoParser::parseLegacyPolygon(const BSONObj& obj, PolygonWithCRS* out) {
points.push_back(p);
}
if (points.size() < 3)
- return BAD_VALUE("Polygon must have at least 3 points");
+ return BAD_VALUE("Polygon must have at least 3 points, instead got " << points.size()
+ << " vertices");
out->oldPolygon.init(points);
out->crs = FLAT;
return Status::OK();
@@ -461,7 +484,7 @@ Status GeoParser::parseGeoJSONPoint(const BSONObj& obj, PointWithCRS* out) {
// Projection
out->crs = FLAT;
if (!ShapeProjection::supportsProject(*out, SPHERE))
- return BAD_VALUE("longitude/latitude is out of bounds, lng: " << out->oldPoint.x << " lat: "
+ return BAD_VALUE("Longitude/latitude is out of bounds, lng: " << out->oldPoint.x << " lat: "
<< out->oldPoint.y);
ShapeProjection::projectInto(out, SPHERE);
return Status::OK();
@@ -534,8 +557,11 @@ Status GeoParser::parseMultiLine(const BSONObj& obj, bool skipValidation, MultiL
return status;
BSONElement coordElt = dps::extractElementAtPath(obj, GEOJSON_COORDINATES);
- if (Array != coordElt.type())
- return BAD_VALUE("MultiLineString coordinates must be an array");
+ if (Array != coordElt.type()) {
+ return BAD_VALUE("MultiLineString coordinates must be an array, instead got type "
+ << typeName(coordElt.type()));
+ }
+
out->lines.clear();
auto& lines = out->lines;
@@ -564,9 +590,10 @@ Status GeoParser::parseMultiPolygon(const BSONObj& obj,
return status;
BSONElement coordElt = dps::extractElementAtPath(obj, GEOJSON_COORDINATES);
- if (Array != coordElt.type())
- return BAD_VALUE("MultiPolygon coordinates must be an array");
-
+ if (Array != coordElt.type()) {
+ return BAD_VALUE("MultiPolygon coordinates must be an array, instead got type "
+ << typeName(coordElt.type()));
+ }
out->polygons.clear();
auto& polygons = out->polygons;
@@ -597,11 +624,11 @@ Status GeoParser::parseLegacyCenter(const BSONObj& obj, CapWithCRS* out) {
BSONElement radius = objIt.next();
// radius >= 0 and is not NaN
if (!radius.isNumber() || !(radius.number() >= 0))
- return BAD_VALUE("radius must be a non-negative number");
+ return BAD_VALUE("Radius must be a non-negative number: " << radius.toString(false));
// No more
if (objIt.more())
- return BAD_VALUE("Only 2 fields allowed for circular region");
+ return BAD_VALUE("Only 2 fields allowed for circular region, but more were provided");
out->circle.radius = radius.number();
out->crs = FLAT;
@@ -627,13 +654,15 @@ Status GeoParser::parseCenterSphere(const BSONObj& obj, CapWithCRS* out) {
// Radius
BSONElement radiusElt = objIt.next();
// radius >= 0 and is not NaN
- if (!radiusElt.isNumber() || !(radiusElt.number() >= 0))
- return BAD_VALUE("radius must be a non-negative number");
+ if (!radiusElt.isNumber() || !(radiusElt.number() >= 0)) {
+ return BAD_VALUE("Radius must be a non-negative number: " << radiusElt.toString(false));
+ }
+
double radius = radiusElt.number();
// No more elements
if (objIt.more())
- return BAD_VALUE("Only 2 fields allowed for circular region");
+ return BAD_VALUE("Only 2 fields allowed for circular region, but more were provided");
out->cap = S2Cap::FromAxisAngle(centerPoint, S1Angle::Radians(radius));
out->circle.radius = radius;
@@ -656,16 +685,20 @@ Status GeoParser::parseGeometryCollection(const BSONObj& obj,
bool skipValidation,
GeometryCollection* out) {
BSONElement coordElt = dps::extractElementAtPath(obj, GEOJSON_GEOMETRIES);
- if (Array != coordElt.type())
- return BAD_VALUE("GeometryCollection geometries must be an array");
-
+ if (Array != coordElt.type()) {
+ return BAD_VALUE("GeometryCollection geometries must be an array, instead got type "
+ << typeName(coordElt.type()));
+ }
const vector<BSONElement>& geometries = coordElt.Array();
if (0 == geometries.size())
return BAD_VALUE("GeometryCollection geometries must have at least 1 element");
for (size_t i = 0; i < geometries.size(); ++i) {
if (Object != geometries[i].type())
- return BAD_VALUE("Element " << i << " of \"geometries\" is not an object");
+ return BAD_VALUE("Element " << i
+ << " of \"geometries\" must be an object, instead got type "
+ << typeName(geometries[i].type()) << ": "
+ << geometries[i].toString(false));
const BSONObj& geoObj = geometries[i].Obj();
GeoJSONType type = parseGeoJSONType(geoObj);
diff --git a/src/mongo/db/geo/hash.cpp b/src/mongo/db/geo/hash.cpp
index c2b7009a86d..86f47847b2e 100644
--- a/src/mongo/db/geo/hash.cpp
+++ b/src/mongo/db/geo/hash.cpp
@@ -154,16 +154,37 @@ void GeoHash::initFromString(const char* s) {
setBit(i, 1);
}
+namespace {
+// Extends a 32 bit value into a 64 bit value interleaved with zeros.
+std::uint64_t interleaveWithZeros(std::uint32_t input) {
+ // The following example is an extension to 32-bits of the following bit manipulation for 16-bit
+ // numbers.
+ //
+ // 0000 0000 0000 0000 abcd efgh ijkl mnop
+ // -> 0000 0000 abcd efgh 0000 0000 ijkl mnop
+ // -> 0000 abcd 0000 efgh 0000 ijkl 0000 mnop
+ // -> 00ab 00cd 00ef 00gh 00ij 00kl 00mn 00op
+ // -> 0a0b 0c0d 0e0f 0g0h 0i0j 0k0l 0m0n 0o0p
+ uint64_t word = input;
+ word = (word ^ (word << 16)) & 0x0000ffff0000ffff;
+ word = (word ^ (word << 8)) & 0x00ff00ff00ff00ff;
+ word = (word ^ (word << 4)) & 0x0f0f0f0f0f0f0f0f;
+ word = (word ^ (word << 2)) & 0x3333333333333333;
+ word = (word ^ (word << 1)) & 0x5555555555555555;
+ return word;
+}
+} // namespace
+
GeoHash::GeoHash(unsigned x, unsigned y, unsigned bits) {
verify(bits <= 32);
- _hash = 0;
_bits = bits;
- for (unsigned i = 0; i < bits; i++) {
- if (isBitSet(x, i))
- _hash |= mask64For(i * 2);
- if (isBitSet(y, i))
- _hash |= mask64For((i * 2) + 1);
- }
+ auto interleavedX = interleaveWithZeros(x);
+ auto interleavedY = interleaveWithZeros(y);
+ auto fullHash = (interleavedX << 1) | interleavedY;
+ // bits * 2 number of significant bits set to 1 leaving the rest set at 0.
+ auto bitMask = (std::numeric_limits<std::uint64_t>::max() << (64 - (bits * 2)));
+ fullHash = fullHash & bitMask;
+ _hash = fullHash;
}
GeoHash::GeoHash(const GeoHash& old) {
diff --git a/src/mongo/db/geo/hash_test.cpp b/src/mongo/db/geo/hash_test.cpp
index d73c983bc06..e9c9ed83cab 100644
--- a/src/mongo/db/geo/hash_test.cpp
+++ b/src/mongo/db/geo/hash_test.cpp
@@ -148,6 +148,74 @@ TEST(GeoHash, UnhashFastMatchesUnhashSlow) {
}
}
+TEST(GeoHash, HashAndUnhash) {
+ PseudoRandom random(12345);
+ for (int i = 0; i < 1'000; i++) {
+ auto x = random.nextInt32();
+ auto y = random.nextInt32();
+ auto hash = GeoHash(x, y, 32);
+ unsigned int unhashedX, unhashedY;
+ hash.unhash(&unhashedX, &unhashedY);
+ ASSERT_EQ(x, unhashedX);
+ ASSERT_EQ(y, unhashedY);
+ }
+}
+
+TEST(GeoHash, HashCropsBits) {
+ // The following numbers were generated with this code snippet on Linux and hardcoded.
+ // PseudoRandom random(12345);
+ {
+ auto x = -2067174821;
+ auto y = 1127948890;
+ auto bits = 1;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(), "10");
+ }
+ {
+ auto x = -847616485;
+ auto y = -2132331508;
+ auto bits = 3;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(), "111000");
+ }
+ {
+ auto x = -818733575;
+ auto y = -721367113;
+ auto bits = 6;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(), "111100011011");
+ }
+ {
+ auto x = 1272197554;
+ auto y = 1923758992;
+ auto bits = 15;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(), "001101011000111011100110011001");
+ }
+ {
+ auto x = -1516163863;
+ auto y = -158391651;
+ auto bits = 23;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(), "1101110100110110110010000101011100001100101001");
+ }
+ {
+ auto x = -1665346465;
+ auto y = 1063852771;
+ auto bits = 30;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(), "100001111111010110011110111000011010001101100100011101101010");
+ }
+ {
+ auto x = 327397251;
+ auto y = 471329956;
+ auto bits = 32;
+ auto hash = GeoHash(x, y, bits);
+ ASSERT_EQ(hash.toString(),
+ "0000001101011010100000010001111111011100111110101100010000011010");
+ }
+}
+
TEST(GeoHashConvertor, EdgeLength) {
const double kError = 10E-15;
GeoHashConverter::Parameters params{};
diff --git a/src/mongo/db/index/expression_keys_private.cpp b/src/mongo/db/index/expression_keys_private.cpp
index 845e65c4b65..0e8fb0d6208 100644
--- a/src/mongo/db/index/expression_keys_private.cpp
+++ b/src/mongo/db/index/expression_keys_private.cpp
@@ -433,8 +433,9 @@ void ExpressionKeysPrivate::validateDocumentCommon(const CollectionPtr& collecti
const BSONObj& keyPattern) {
// If we have a timeseries collection, check that indexed metric fields do not have expanded
// array values
- if (auto tsOptions = collection->getTimeseriesOptions();
- tsOptions && feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV()) {
+ if (auto tsOptions = collection->getTimeseriesOptions(); tsOptions &&
+ feature_flags::gTimeseriesMetricIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
// Each user metric field will be included twice, as both control.min.<field> and
// control.max.<field>, so we'll want to keep track that we've checked data.<field> to avoid
// scanning it twice. The time field can be excluded as it is guaranteed to be a date at
diff --git a/src/mongo/db/index/index_descriptor.h b/src/mongo/db/index/index_descriptor.h
index f4f48cb35b9..115fc8b1d17 100644
--- a/src/mongo/db/index/index_descriptor.h
+++ b/src/mongo/db/index/index_descriptor.h
@@ -89,6 +89,7 @@ public:
static constexpr StringData kWeightsFieldName = "weights"_sd;
static constexpr StringData kOriginalSpecFieldName = "originalSpec"_sd;
static constexpr StringData kPrepareUniqueFieldName = "prepareUnique"_sd;
+ static constexpr StringData kClusteredFieldName = "clustered"_sd;
/**
* infoObj is a copy of the index-describing BSONObj contained in the catalog.
diff --git a/src/mongo/db/index_build_entry_helpers.cpp b/src/mongo/db/index_build_entry_helpers.cpp
index 5c90ce38388..b51ed281b3a 100644
--- a/src/mongo/db/index_build_entry_helpers.cpp
+++ b/src/mongo/db/index_build_entry_helpers.cpp
@@ -289,13 +289,6 @@ StatusWith<IndexBuildEntry> getIndexBuildEntry(OperationContext* opCtx, UUID ind
// build entry from the config db collection.
hangBeforeGettingIndexBuildEntry.pauseWhileSet(Interruptible::notInterruptible());
- if (!collection.getDb()) {
- str::stream ss;
- ss << "Cannot read " << NamespaceString::kIndexBuildEntryNamespace.ns()
- << ". Database not found: " << NamespaceString::kIndexBuildEntryNamespace.db();
- return Status(ErrorCodes::NamespaceNotFound, ss);
- }
-
if (!collection) {
str::stream ss;
ss << "Collection not found: " << NamespaceString::kIndexBuildEntryNamespace.ns();
diff --git a/src/mongo/db/index_builds_coordinator.cpp b/src/mongo/db/index_builds_coordinator.cpp
index d00dcab50d6..5b954baa536 100644
--- a/src/mongo/db/index_builds_coordinator.cpp
+++ b/src/mongo/db/index_builds_coordinator.cpp
@@ -90,8 +90,9 @@ MONGO_FAIL_POINT_DEFINE(hangBeforeBuildingIndexSecond);
MONGO_FAIL_POINT_DEFINE(hangIndexBuildBeforeWaitingUntilMajorityOpTime);
MONGO_FAIL_POINT_DEFINE(failSetUpResumeIndexBuild);
-IndexBuildsCoordinator::ActiveIndexBuildsSSS::ActiveIndexBuildsSSS()
- : ServerStatusSection("activeIndexBuilds"),
+IndexBuildsCoordinator::IndexBuildsSSS::IndexBuildsSSS()
+ : ServerStatusSection("indexBuilds"),
+ registered(0),
scanCollection(0),
drainSideWritesTable(0),
drainSideWritesTablePreCommit(0),
@@ -550,15 +551,15 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
// 1) Drop all unfinished indexes.
// 2) Start, but do not complete the index build process.
WriteUnitOfWork wuow(opCtx);
- auto indexCatalog = collection.getWritableCollection()->getIndexCatalog();
+ auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog();
for (size_t i = 0; i < indexNames.size(); i++) {
auto descriptor = indexCatalog->findIndexByName(
opCtx, indexNames[i], IndexCatalog::InclusionPolicy::kReady);
if (descriptor) {
- Status s =
- indexCatalog->dropIndex(opCtx, collection.getWritableCollection(), descriptor);
+ Status s = indexCatalog->dropIndex(
+ opCtx, collection.getWritableCollection(opCtx), descriptor);
if (!s.isOK()) {
return s;
}
@@ -598,7 +599,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
IndexCatalog::InclusionPolicy::kFrozen);
if (descriptor) {
Status s = indexCatalog->dropUnfinishedIndex(
- opCtx, collection.getWritableCollection(), descriptor);
+ opCtx, collection.getWritableCollection(opCtx), descriptor);
if (!s.isOK()) {
return s;
}
@@ -609,7 +610,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
catalog::removeIndex(
opCtx,
indexNames[i],
- collection.getWritableCollection(),
+ collection.getWritableCollection(opCtx),
nullptr /* ident */,
// Unfinished or partially dropped indexes do not need two-phase drop b/c the
// incomplete index will never be recovered. This is an optimization that will
@@ -621,7 +622,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
// We need to initialize the collection to rebuild the indexes. The collection may already
// be initialized when rebuilding indexes with rollback-via-refetch.
if (!collection->isInitialized()) {
- collection.getWritableCollection()->init(opCtx);
+ collection.getWritableCollection(opCtx)->init(opCtx);
}
auto dbName = nss.db().toString();
@@ -632,6 +633,7 @@ Status IndexBuildsCoordinator::_startIndexBuildForRecovery(OperationContext* opC
if (!status.isOK()) {
return status;
}
+ indexBuildsSSS.registered.addAndFetch(1);
IndexBuildsManager::SetupOptions options;
options.protocol = protocol;
@@ -703,7 +705,7 @@ Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx,
if (!collection->isInitialized()) {
WriteUnitOfWork wuow(opCtx);
- collection.getWritableCollection()->init(opCtx);
+ collection.getWritableCollection(opCtx)->init(opCtx);
wuow.commit();
}
@@ -715,6 +717,7 @@ Status IndexBuildsCoordinator::_setUpResumeIndexBuild(OperationContext* opCtx,
if (!status.isOK()) {
return status;
}
+ indexBuildsSSS.registered.addAndFetch(1);
IndexBuildsManager::SetupOptions options;
options.protocol = protocol;
@@ -1808,11 +1811,12 @@ void IndexBuildsCoordinator::createIndexesOnEmptyCollection(OperationContext* op
auto opObserver = opCtx->getServiceContext()->getOpObserver();
- auto indexCatalog = collection.getWritableCollection()->getIndexCatalog();
+ auto indexCatalog = collection.getWritableCollection(opCtx)->getIndexCatalog();
// Always run single phase index build for empty collection. And, will be coordinated using
// createIndexes oplog entry.
for (const auto& spec : specs) {
- if (spec.hasField("clustered") && spec.getBoolField("clustered")) {
+ if (spec.hasField(IndexDescriptor::kClusteredFieldName) &&
+ spec.getBoolField(IndexDescriptor::kClusteredFieldName)) {
// The index is already built implicitly.
continue;
}
@@ -1821,7 +1825,7 @@ void IndexBuildsCoordinator::createIndexesOnEmptyCollection(OperationContext* op
// timestamp.
opObserver->onCreateIndex(opCtx, nss, collectionUUID, spec, fromMigrate);
uassertStatusOK(indexCatalog->createIndexOnEmptyCollection(
- opCtx, collection.getWritableCollection(), spec));
+ opCtx, collection.getWritableCollection(opCtx), spec));
}
}
@@ -1963,6 +1967,7 @@ IndexBuildsCoordinator::_filterSpecsAndRegisterBuild(OperationContext* opCtx,
if (!status.isOK()) {
return status;
}
+ indexBuildsSSS.registered.addAndFetch(1);
// The index has been registered on the Coordinator in an unstarted state. Return an
// uninitialized Future so that the caller can set up the index build by calling
@@ -2519,7 +2524,7 @@ void IndexBuildsCoordinator::_scanCollectionAndInsertSortedKeysIntoIndex(
boost::optional<RecordId> resumeAfterRecordId) {
// Collection scan and insert into index.
{
- const ScopedCounter counter{activeIndexBuildsSSS.scanCollection};
+ indexBuildsSSS.scanCollection.addAndFetch(1);
ScopeGuard scopeGuard([&] {
opCtx->recoveryUnit()->setTimestampReadSource(RecoveryUnit::ReadSource::kNoTimestamp);
@@ -2585,7 +2590,7 @@ CollectionPtr IndexBuildsCoordinator::_setUpForScanCollectionAndInsertSortedKeys
*/
void IndexBuildsCoordinator::_insertKeysFromSideTablesWithoutBlockingWrites(
OperationContext* opCtx, std::shared_ptr<ReplIndexBuildState> replState) {
- const ScopedCounter counter{activeIndexBuildsSSS.drainSideWritesTable};
+ indexBuildsSSS.drainSideWritesTable.addAndFetch(1);
// Perform the first drain while holding an intent lock.
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
@@ -2611,7 +2616,7 @@ void IndexBuildsCoordinator::_insertKeysFromSideTablesBlockingWrites(
OperationContext* opCtx,
std::shared_ptr<ReplIndexBuildState> replState,
const IndexBuildOptions& indexBuildOptions) {
- const ScopedCounter counter{activeIndexBuildsSSS.drainSideWritesTablePreCommit};
+ indexBuildsSSS.drainSideWritesTablePreCommit.addAndFetch(1);
const NamespaceStringOrUUID dbAndUUID(replState->dbName, replState->collectionUUID);
// Perform the second drain while stopping writes on the collection.
{
@@ -2717,7 +2722,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
<< ", collection UUID: " << replState->collectionUUID);
{
- const ScopedCounter counter{activeIndexBuildsSSS.drainSideWritesTableOnCommit};
+ indexBuildsSSS.drainSideWritesTableOnCommit.addAndFetch(1);
// Perform the third and final drain after releasing a shared lock and reacquiring an
// exclusive lock on the collection.
uassertStatusOK(_indexBuildsManager.drainBackgroundWrites(
@@ -2759,8 +2764,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
// can be called for two-phase builds in all replication states except during initial sync
// when this node is not guaranteed to be consistent.
{
- const ScopedCounter counter{
- activeIndexBuildsSSS.processConstraintsViolatonTableOnCommit};
+ indexBuildsSSS.processConstraintsViolatonTableOnCommit.addAndFetch(1);
bool twoPhaseAndNotInitialSyncing =
IndexBuildProtocol::kTwoPhase == replState->protocol &&
!replCoord->getMemberState().startup2();
@@ -2770,7 +2774,7 @@ IndexBuildsCoordinator::CommitResult IndexBuildsCoordinator::_insertKeysFromSide
opCtx, collection.get(), replState->buildUUID));
}
}
- const ScopedCounter counter{activeIndexBuildsSSS.commit};
+ indexBuildsSSS.commit.addAndFetch(1);
// If two phase index builds is enabled, index build will be coordinated using
// startIndexBuild and commitIndexBuild oplog entries.
diff --git a/src/mongo/db/index_builds_coordinator.h b/src/mongo/db/index_builds_coordinator.h
index ee617a0742d..ac8193685ac 100644
--- a/src/mongo/db/index_builds_coordinator.h
+++ b/src/mongo/db/index_builds_coordinator.h
@@ -489,9 +489,9 @@ public:
*/
static int getNumIndexesTotal(OperationContext* opCtx, const CollectionPtr& collection);
- class ActiveIndexBuildsSSS : public ServerStatusSection {
+ class IndexBuildsSSS : public ServerStatusSection {
public:
- ActiveIndexBuildsSSS();
+ IndexBuildsSSS();
bool includeByDefault() const final {
return true;
@@ -504,10 +504,7 @@ public:
BSONObjBuilder indexBuilds;
BSONObjBuilder phases;
- indexBuilds.append(
- "total",
- static_cast<int>(
- IndexBuildsCoordinator::get(opCtx)->activeIndexBuilds.getActiveIndexBuilds()));
+ indexBuilds.append("total", registered.loadRelaxed());
phases.append("scanCollection", scanCollection.loadRelaxed());
phases.append("drainSideWritesTable", drainSideWritesTable.loadRelaxed());
@@ -525,6 +522,7 @@ public:
return indexBuilds.obj();
}
+ AtomicWord<int> registered;
AtomicWord<int> scanCollection;
AtomicWord<int> drainSideWritesTable;
AtomicWord<int> drainSideWritesTablePreCommit;
@@ -532,7 +530,7 @@ public:
AtomicWord<int> drainSideWritesTableOnCommit;
AtomicWord<int> processConstraintsViolatonTableOnCommit;
AtomicWord<int> commit;
- } activeIndexBuildsSSS;
+ } indexBuildsSSS;
private:
/**
diff --git a/src/mongo/db/index_builds_coordinator_mongod.cpp b/src/mongo/db/index_builds_coordinator_mongod.cpp
index fb309720f22..0aa1661bed2 100644
--- a/src/mongo/db/index_builds_coordinator_mongod.cpp
+++ b/src/mongo/db/index_builds_coordinator_mongod.cpp
@@ -709,7 +709,7 @@ void IndexBuildsCoordinatorMongod::_waitForNextIndexBuildActionAndCommit(
<< replState->buildUUID);
auto const nextAction = [&] {
- const ScopedCounter counter{activeIndexBuildsSSS.waitForCommitQuorum};
+ indexBuildsSSS.waitForCommitQuorum.addAndFetch(1);
// Future wait can be interrupted.
return _drainSideWritesUntilNextActionIsAvailable(opCtx, replState);
}();
diff --git a/src/mongo/db/initialize_server_global_state.cpp b/src/mongo/db/initialize_server_global_state.cpp
index e37d6809049..18c2cf45555 100644
--- a/src/mongo/db/initialize_server_global_state.cpp
+++ b/src/mongo/db/initialize_server_global_state.cpp
@@ -64,7 +64,7 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kControl
-namespace mongo {
+namespace mongo::initialize_server_global_state {
#ifndef _WIN32
static void croak(StringData prefix, int savedErr = errno) {
@@ -411,7 +411,7 @@ MONGO_INITIALIZER(RegisterShortCircuitExitHandler)(InitializerContext*) {
uasserted(ErrorCodes::InternalError, "Failed setting short-circuit exit handler.");
}
-bool initializeServerGlobalState(ServiceContext* service, PidFileWrite pidWrite) {
+bool checkSocketPath() {
#ifndef _WIN32
if (!serverGlobalParams.noUnixSocket &&
!boost::filesystem::is_directory(serverGlobalParams.socket)) {
@@ -420,16 +420,14 @@ bool initializeServerGlobalState(ServiceContext* service, PidFileWrite pidWrite)
}
#endif
- if (!serverGlobalParams.pidFile.empty() && pidWrite == PidFileWrite::kWrite) {
- if (!writePidFile(serverGlobalParams.pidFile)) {
- // error message logged in writePidFile
- return false;
- }
- }
-
return true;
}
+bool writePidFile() {
+ return serverGlobalParams.pidFile.empty() ? true
+ : mongo::writePidFile(serverGlobalParams.pidFile);
+}
+
#ifndef _WIN32
namespace {
// Handling for `honorSystemUmask` and `processUmask` setParameters.
@@ -541,4 +539,4 @@ void ProcessUMaskServerParameter::append(OperationContext*,
#endif
}
-} // namespace mongo
+} // namespace mongo::initialize_server_global_state
diff --git a/src/mongo/db/initialize_server_global_state.h b/src/mongo/db/initialize_server_global_state.h
index d9d6db3e17f..cab4a3f86d1 100644
--- a/src/mongo/db/initialize_server_global_state.h
+++ b/src/mongo/db/initialize_server_global_state.h
@@ -29,28 +29,19 @@
#pragma once
-namespace mongo {
+#include "mongo/db/service_context.h"
-class ServiceContext;
+namespace mongo::initialize_server_global_state {
/**
- * Enum which controls whether the pid file is written at startup.
+ * Returns whether the specified socket path is a directory.
*/
-enum class PidFileWrite {
- // Open PID file and write PID to disk
- kWrite,
-
- // Do not open or write PID file
- kNoWrite,
-};
+bool checkSocketPath();
/**
- * Perform initialization activity common across all mongo server types.
- *
- * Set up logging, daemonize the process, configure SSL, etc.
+ * Attempts to write the PID file (if specified) and returns whether it was successful.
*/
-bool initializeServerGlobalState(ServiceContext* service,
- PidFileWrite pidWrite = PidFileWrite::kWrite);
+bool writePidFile();
/**
* Forks and detaches the server, on platforms that support it, if serverGlobalParams.doFork is
@@ -66,4 +57,4 @@ void forkServerOrDie();
*/
void signalForkSuccess();
-} // namespace mongo
+} // namespace mongo::initialize_server_global_state
diff --git a/src/mongo/db/initialize_server_global_state.idl b/src/mongo/db/initialize_server_global_state.idl
index 1e509d9663d..413e81c6bf6 100644
--- a/src/mongo/db/initialize_server_global_state.idl
+++ b/src/mongo/db/initialize_server_global_state.idl
@@ -26,7 +26,7 @@
# it in the license file.
global:
- cpp_namespace: mongo
+ cpp_namespace: mongo::initialize_server_global_state
cpp_includes:
- mongo/logv2/constants.h
diff --git a/src/mongo/db/internal_transactions_feature_flag.idl b/src/mongo/db/internal_transactions_feature_flag.idl
index d0373f56140..bbbb9fa1477 100644
--- a/src/mongo/db/internal_transactions_feature_flag.idl
+++ b/src/mongo/db/internal_transactions_feature_flag.idl
@@ -41,6 +41,11 @@ feature_flags:
default: true
version: 6.0
+ featureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp:
+ description: Feature flag to enable always creating the config.transactions partial index on step up to primary even if the collection is not empty.
+ cpp_varname: gFeatureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp
+ default: false
+
featureFlagUpdateDocumentShardKeyUsingTransactionApi:
description: Feature flag to enable usage of the transaction api for update findAndModify and update commands that change a document's shard key.
cpp_varname: gFeatureFlagUpdateDocumentShardKeyUsingTransactionApi
diff --git a/src/mongo/db/mongod_main.cpp b/src/mongo/db/mongod_main.cpp
index ccb5ce1a404..531b875918c 100644
--- a/src/mongo/db/mongod_main.cpp
+++ b/src/mongo/db/mongod_main.cpp
@@ -89,7 +89,6 @@
#include "mongo/db/index_builds_coordinator_mongod.h"
#include "mongo/db/index_names.h"
#include "mongo/db/initialize_server_global_state.h"
-#include "mongo/db/initialize_snmp.h"
#include "mongo/db/internal_transactions_reap_service.h"
#include "mongo/db/introspect.h"
#include "mongo/db/json.h"
@@ -543,8 +542,6 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
startMongoDFTDC();
- initializeSNMP();
-
if (mongodGlobalParams.scriptingEnabled) {
ScriptEngine::setup();
}
@@ -835,6 +832,10 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
}
}
+ if (!initialize_server_global_state::writePidFile()) {
+ quickExit(EXIT_FAILURE);
+ }
+
// Startup options are written to the audit log at the end of startup so that cluster server
// parameters are guaranteed to have been initialized from disk at this point.
audit::logStartupOptions(Client::getCurrent(), serverGlobalParams.parsedOpts);
@@ -842,7 +843,7 @@ ExitCode _initAndListen(ServiceContext* serviceContext, int listenPort) {
serviceContext->notifyStartupComplete();
#ifndef _WIN32
- mongo::signalForkSuccess();
+ initialize_server_global_state::signalForkSuccess();
#else
if (ntservice::shouldStartService()) {
ntservice::reportStatus(SERVICE_RUNNING);
@@ -894,7 +895,7 @@ ExitCode initService() {
MONGO_INITIALIZER_GENERAL(ForkServer, ("EndStartupOptionHandling"), ("default"))
(InitializerContext* context) {
- mongo::forkServerOrDie();
+ initialize_server_global_state::forkServerOrDie();
}
#ifdef __linux__
@@ -1144,10 +1145,7 @@ void setUpObservers(ServiceContext* serviceContext) {
opObserverRegistry->addObserver(
std::make_unique<repl::PrimaryOnlyServiceOpObserver>(serviceContext));
opObserverRegistry->addObserver(std::make_unique<FcvOpObserver>());
-
- if (gFeatureFlagClusterWideConfig.isEnabledAndIgnoreFCV()) {
- opObserverRegistry->addObserver(std::make_unique<ClusterServerParameterOpObserver>());
- }
+ opObserverRegistry->addObserver(std::make_unique<ClusterServerParameterOpObserver>());
setupFreeMonitoringOpObserver(opObserverRegistry.get());
@@ -1542,7 +1540,7 @@ int mongod_main(int argc, char* argv[]) {
startupConfigActions(std::vector<std::string>(argv, argv + argc));
cmdline_utils::censorArgvArray(argc, argv);
- if (!initializeServerGlobalState(service))
+ if (!initialize_server_global_state::checkSocketPath())
quickExit(EXIT_FAILURE);
// There is no single-threaded guarantee beyond this point.
@@ -1550,7 +1548,7 @@ int mongod_main(int argc, char* argv[]) {
LOGV2(5945603, "Multi threading initialized");
// Per SERVER-7434, startSignalProcessingThread must run after any forks (i.e.
- // initializeServerGlobalState) and before the creation of any other threads
+ // initialize_server_global_state::forkServerOrDie) and before the creation of any other threads
startSignalProcessingThread();
ReadWriteConcernDefaults::create(service, readWriteConcernDefaultsCacheLookupMongoD);
diff --git a/src/mongo/db/multitenancy.cpp b/src/mongo/db/multitenancy.cpp
index 27ced8eee24..f12a4f7b55d 100644
--- a/src/mongo/db/multitenancy.cpp
+++ b/src/mongo/db/multitenancy.cpp
@@ -29,61 +29,17 @@
#include "mongo/db/multitenancy.h"
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/security_token.h"
-#include "mongo/db/multitenancy_gen.h"
+#include "mongo/db/auth/validated_tenancy_scope.h"
#include "mongo/db/tenant_id.h"
-#include "mongo/logv2/log.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kDefault
-
namespace mongo {
-// Holds the tenantId for the operation if it was provided in the request on the $tenant field only
-// if the tenantId was not also provided in the security token.
-const auto dollarTenantDecoration =
- OperationContext::declareDecoration<boost::optional<mongo::TenantId>>();
-
-void parseDollarTenantFromRequest(OperationContext* opCtx, const OpMsg& request) {
- // The internal security user is allowed to run commands on behalf of a tenant by passing
- // the tenantId in the "$tenant" field.
- auto tenantElem = request.body["$tenant"];
- if (!tenantElem)
- return;
-
- uassert(ErrorCodes::InvalidOptions,
- "Multitenancy not enabled, cannot set $tenant in command body",
- gMultitenancySupport);
-
- uassert(ErrorCodes::Unauthorized,
- "'$tenant' may only be specified with the useTenant action type",
- AuthorizationSession::get(opCtx->getClient())
- ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
- ActionType::useTenant));
-
- auto tenantId = TenantId::parseFromBSON(tenantElem);
-
- uassert(6223901,
- str::stream() << "Cannot pass $tenant id if also passing securityToken, securityToken: "
- << auth::getSecurityToken(opCtx)->getAuthenticatedUser().getTenant()
- << " $tenant: " << tenantId,
- !auth::getSecurityToken(opCtx));
-
-
- dollarTenantDecoration(opCtx) = std::move(tenantId);
- LOGV2_DEBUG(
- 6223900, 4, "Setting tenantId from $tenant request parameter", "tenantId"_attr = tenantId);
-}
-
boost::optional<TenantId> getActiveTenant(OperationContext* opCtx) {
- auto token = auth::getSecurityToken(opCtx);
- if (!token) {
- return dollarTenantDecoration(opCtx);
+ if (auto token = auth::ValidatedTenancyScope::get(opCtx)) {
+ return token->tenantId();
}
- invariant(!dollarTenantDecoration(opCtx));
- return token->getAuthenticatedUser().getTenant();
+ return boost::none;
}
} // namespace mongo
diff --git a/src/mongo/db/multitenancy.h b/src/mongo/db/multitenancy.h
index f354d225a50..b028286659d 100644
--- a/src/mongo/db/multitenancy.h
+++ b/src/mongo/db/multitenancy.h
@@ -37,13 +37,7 @@
namespace mongo {
/**
- * Parses the tenantId from the '$tenant' field in the request if it exists and
- * "multitenancySupport" is enabled. Then, sets the parsed tenantId on the opCtx.
- */
-void parseDollarTenantFromRequest(OperationContext* opCtx, const OpMsg& request);
-
-/**
- * Extract the active TenantId for this operation.
+ * Extract the active TenantId for this OperationContext.
*/
boost::optional<TenantId> getActiveTenant(OperationContext* opCtx);
diff --git a/src/mongo/db/namespace_string.cpp b/src/mongo/db/namespace_string.cpp
index 3d74f7a507a..633ab3ce8ce 100644
--- a/src/mongo/db/namespace_string.cpp
+++ b/src/mongo/db/namespace_string.cpp
@@ -95,8 +95,8 @@ const NamespaceString NamespaceString::kTenantMigrationRecipientsNamespace(
const NamespaceString NamespaceString::kTenantMigrationOplogView(
NamespaceString::kLocalDb, "system.tenantMigration.oplogView");
-const NamespaceString NamespaceString::kTenantSplitDonorsNamespace(NamespaceString::kConfigDb,
- "tenantSplitDonors");
+const NamespaceString NamespaceString::kShardSplitDonorsNamespace(NamespaceString::kConfigDb,
+ "shardSplitDonors");
const NamespaceString NamespaceString::kShardConfigCollectionsNamespace(NamespaceString::kConfigDb,
"cache.collections");
@@ -269,6 +269,7 @@ bool NamespaceString::mustBeAppliedInOwnOplogBatch() const {
return isSystemDotViews() || isServerConfigurationCollection() || isPrivilegeCollection() ||
_ns == kDonorReshardingOperationsNamespace.ns() ||
_ns == kForceOplogBatchBoundaryNamespace.ns() ||
+ _ns == kTenantMigrationDonorsNamespace.ns() ||
_ns == kTenantMigrationRecipientsNamespace.ns() || _ns == kConfigsvrShardsNamespace.ns();
}
@@ -286,6 +287,12 @@ NamespaceString NamespaceString::makeCollectionlessAggregateNSS(const DatabaseNa
return nss;
}
+NamespaceString NamespaceString::makeChangeCollectionNSS(
+ const boost::optional<TenantId>& tenantId) {
+ // TODO: SERVER-65950 create namespace for a particular tenant.
+ return NamespaceString{NamespaceString::kConfigDb, NamespaceString::kChangeCollectionName};
+}
+
std::string NamespaceString::getSisterNS(StringData local) const {
verify(local.size() && local[0] != '.');
return db().toString() + "." + local.toString();
@@ -422,6 +429,10 @@ bool NamespaceString::isFLE2StateCollection() const {
coll().endsWith(fle2EcocSuffix));
}
+bool NamespaceString::isOplogOrChangeCollection() const {
+ return isOplog() || isChangeCollection();
+}
+
NamespaceString NamespaceString::makeTimeseriesBucketsNamespace() const {
return {db(), kTimeseriesBucketsCollectionPrefix.toString() + coll()};
}
diff --git a/src/mongo/db/namespace_string.h b/src/mongo/db/namespace_string.h
index 730a2859b91..91dbcd144b8 100644
--- a/src/mongo/db/namespace_string.h
+++ b/src/mongo/db/namespace_string.h
@@ -156,7 +156,7 @@ public:
static const NamespaceString kTenantMigrationOplogView;
// Namespace for storing the persisted state of tenant split donors.
- static const NamespaceString kTenantSplitDonorsNamespace;
+ static const NamespaceString kShardSplitDonorsNamespace;
// Namespace for replica set configuration settings.
static const NamespaceString kSystemReplSetNamespace;
@@ -233,57 +233,50 @@ public:
/**
* Constructs an empty NamespaceString.
*/
- NamespaceString() : _ns(), _dotIndex(std::string::npos), _dbName() {}
+ NamespaceString() = default;
/**
* Constructs a NamespaceString from the fully qualified namespace named in "ns" and the
* tenantId. "ns" is NOT expected to contain the tenantId.
*/
explicit NamespaceString(boost::optional<TenantId> tenantId, StringData ns) {
- _ns = tenantId ? tenantId->toString() + "_" + ns.toString()
- : ns.toString(); // copy to our buffer
- _dotIndex = _ns.find('.');
+ _dotIndex = ns.find(".");
+
uassert(ErrorCodes::InvalidNamespace,
"namespaces cannot have embedded null characters",
- _ns.find('\0') == std::string::npos);
+ ns.find('\0') == std::string::npos);
- auto db = _dotIndex == std::string::npos ? ns : ns.substr(0, ns.find('.'));
- _dbName = DatabaseName(tenantId, db);
+ StringData db = ns.substr(0, _dotIndex);
+ _dbName = DatabaseName(std::move(tenantId), db);
+ _ns = ns.toString();
}
// TODO SERVER-65920 Remove this constructor once all constructor call sites have been updated
// to pass tenantId explicitly
explicit NamespaceString(StringData ns, boost::optional<TenantId> tenantId = boost::none)
- : NamespaceString(tenantId, ns) {}
+ : NamespaceString(std::move(tenantId), ns) {}
/**
* Constructs a NamespaceString for the given database and collection names.
* "dbName" must not contain a ".", and "collectionName" must not start with one.
*/
NamespaceString(DatabaseName dbName, StringData collectionName)
- : _ns(dbName.toString().size() + collectionName.size() + 1, '\0') {
+ : _dbName(std::move(dbName)), _ns(str::stream() << _dbName.db() << '.' << collectionName) {
+ auto db = _dbName.db();
+
uassert(ErrorCodes::InvalidNamespace,
- "'.' is an invalid character in the database name: " + dbName.db(),
- dbName.db().find('.') == std::string::npos);
+ "'.' is an invalid character in the database name: " + db,
+ db.find('.') == std::string::npos);
uassert(ErrorCodes::InvalidNamespace,
"Collection names cannot start with '.': " + collectionName,
collectionName.empty() || collectionName[0] != '.');
- auto db = dbName.toString();
- std::string::iterator it = std::copy(db.begin(), db.end(), _ns.begin());
- *it = '.';
- ++it;
- it = std::copy(collectionName.begin(), collectionName.end(), it);
_dotIndex = db.size();
-
- dassert(it == _ns.end());
dassert(_ns[_dotIndex] == '.');
uassert(ErrorCodes::InvalidNamespace,
"namespaces cannot have embedded null characters",
_ns.find('\0') == std::string::npos);
-
- _dbName = std::move(dbName);
}
/**
@@ -292,14 +285,14 @@ public:
* NOT expected to contain a tenantId.
*/
NamespaceString(boost::optional<TenantId> tenantId, StringData db, StringData collectionName)
- : NamespaceString(DatabaseName(tenantId, db), collectionName) {}
+ : NamespaceString(DatabaseName(std::move(tenantId), db), collectionName) {}
// TODO SERVER-65920 Remove this constructor once all constructor call sites have been updated
// to pass tenantId explicitly
NamespaceString(StringData db,
StringData collectionName,
boost::optional<TenantId> tenantId = boost::none)
- : NamespaceString(DatabaseName(tenantId, db), collectionName) {}
+ : NamespaceString(DatabaseName(std::move(tenantId), db), collectionName) {}
/**
* Constructs a NamespaceString from the string 'ns'. Should only be used when reading a
@@ -314,6 +307,11 @@ public:
static NamespaceString makeCollectionlessAggregateNSS(const DatabaseName& dbName);
/**
+ * Constructs the change collection namespace for the specified tenant.
+ */
+ static NamespaceString makeChangeCollectionNSS(const boost::optional<TenantId>& tenantId);
+
+ /**
* Constructs a NamespaceString representing a listCollections namespace. The format for this
* namespace is "<dbName>.$cmd.listCollections".
*/
@@ -336,7 +334,7 @@ public:
StringData db() const {
// TODO SERVER-65456 Remove this function.
- return StringData(_dbName.toString());
+ return _dbName.db();
}
const DatabaseName& dbName() const {
@@ -357,6 +355,13 @@ public:
return ns();
}
+ std::string toStringWithTenantId() const {
+ if (auto tenantId = _dbName.tenantId())
+ return str::stream() << *tenantId << '_' << ns();
+
+ return ns();
+ }
+
size_t size() const {
return _ns.size();
}
@@ -482,6 +487,11 @@ public:
bool isFLE2StateCollection() const;
/**
+ * Returns true if the namespace is an oplog or a change collection, false otherwise.
+ */
+ bool isOplogOrChangeCollection() const;
+
+ /**
* Returns the time-series buckets namespace for this view.
*/
NamespaceString makeTimeseriesBucketsNamespace() const;
@@ -600,9 +610,14 @@ public:
* contain a $ should be checked explicitly.
* @return if db is an allowed database name
*/
- static bool validDBName(StringData dbString,
+ static bool validDBName(StringData dbName,
DollarInDbNameBehavior behavior = DollarInDbNameBehavior::Disallow);
+ static bool validDBName(const DatabaseName& dbName,
+ DollarInDbNameBehavior behavior = DollarInDbNameBehavior::Disallow) {
+ return validDBName(dbName.db(), behavior);
+ }
+
/**
* Takes a fully qualified namespace (ie dbname.collectionName), and returns true if
* the collection name component of the namespace is valid.
@@ -632,26 +647,35 @@ public:
// Relops among `NamespaceString`.
friend bool operator==(const NamespaceString& a, const NamespaceString& b) {
- return a.ns() == b.ns();
+ return (a.tenantId() == b.tenantId()) && (a.ns() == b.ns());
}
friend bool operator!=(const NamespaceString& a, const NamespaceString& b) {
- return a.ns() != b.ns();
+ return !(a == b);
}
friend bool operator<(const NamespaceString& a, const NamespaceString& b) {
+ if (a.tenantId() != b.tenantId()) {
+ return a.tenantId() < b.tenantId();
+ }
return a.ns() < b.ns();
}
friend bool operator>(const NamespaceString& a, const NamespaceString& b) {
+ if (a.tenantId() != b.tenantId()) {
+ return a.tenantId() > b.tenantId();
+ }
return a.ns() > b.ns();
}
friend bool operator<=(const NamespaceString& a, const NamespaceString& b) {
- return a.ns() <= b.ns();
+ return !(a > b);
}
friend bool operator>=(const NamespaceString& a, const NamespaceString& b) {
- return a.ns() >= b.ns();
+ return !(a < b);
}
template <typename H>
friend H AbslHashValue(H h, const NamespaceString& nss) {
+ if (nss.tenantId()) {
+ return H::combine(std::move(h), nss._dbName.tenantId().get(), nss._ns);
+ }
return H::combine(std::move(h), nss._ns);
}
@@ -660,9 +684,9 @@ public:
}
private:
- std::string _ns;
- size_t _dotIndex = 0;
DatabaseName _dbName;
+ std::string _ns;
+ size_t _dotIndex = std::string::npos;
};
/**
@@ -696,12 +720,14 @@ public:
/**
* Returns database name if this object was initialized with a UUID.
+ *
+ * TODO SERVER-66887 remove this function for better clarity once call sites have been changed
*/
std::string dbname() const {
return _dbname ? _dbname->db() : "";
}
- const boost::optional<DatabaseName>& dbnameWithTenant() const {
+ const boost::optional<DatabaseName>& dbName() const {
return _dbname;
}
diff --git a/src/mongo/db/namespace_string_test.cpp b/src/mongo/db/namespace_string_test.cpp
index 9673481b874..4412b0246f6 100644
--- a/src/mongo/db/namespace_string_test.cpp
+++ b/src/mongo/db/namespace_string_test.cpp
@@ -303,22 +303,25 @@ TEST(NamespaceStringTest, NSSWithTenantId) {
TenantId tenantId(OID::gen());
std::string tenantNsStr = str::stream() << tenantId.toString() << "_foo.bar";
- NamespaceString nss("foo.bar", tenantId);
- ASSERT_EQ(nss.ns(), tenantNsStr);
- ASSERT_EQ(nss.toString(), tenantNsStr);
+ NamespaceString nss(tenantId, "foo.bar");
+ ASSERT_EQ(nss.ns(), "foo.bar");
+ ASSERT_EQ(nss.toString(), "foo.bar");
+ ASSERT_EQ(nss.toStringWithTenantId(), tenantNsStr);
ASSERT(nss.tenantId());
ASSERT_EQ(*nss.tenantId(), tenantId);
DatabaseName dbName(tenantId, "foo");
NamespaceString nss2(dbName, "bar");
- ASSERT_EQ(nss2.ns(), tenantNsStr);
- ASSERT_EQ(nss2.toString(), tenantNsStr);
+ ASSERT_EQ(nss2.ns(), "foo.bar");
+ ASSERT_EQ(nss2.toString(), "foo.bar");
+ ASSERT_EQ(nss2.toStringWithTenantId(), tenantNsStr);
ASSERT(nss2.tenantId());
ASSERT_EQ(*nss2.tenantId(), tenantId);
NamespaceString nss3("foo", "bar", tenantId);
- ASSERT_EQ(nss3.ns(), tenantNsStr);
- ASSERT_EQ(nss3.toString(), tenantNsStr);
+ ASSERT_EQ(nss3.ns(), "foo.bar");
+ ASSERT_EQ(nss3.toString(), "foo.bar");
+ ASSERT_EQ(nss3.toStringWithTenantId(), tenantNsStr);
ASSERT(nss3.tenantId());
ASSERT_EQ(*nss3.tenantId(), tenantId);
}
@@ -327,9 +330,10 @@ TEST(NamespaceStringTest, NSSNoCollectionWithTenantId) {
TenantId tenantId(OID::gen());
std::string tenantNsStr = str::stream() << tenantId.toString() << "_foo";
- NamespaceString nss("foo", tenantId);
- ASSERT_EQ(nss.ns(), tenantNsStr);
- ASSERT_EQ(nss.toString(), tenantNsStr);
+ NamespaceString nss(tenantId, "foo");
+ ASSERT_EQ(nss.ns(), "foo");
+ ASSERT_EQ(nss.toString(), "foo");
+ ASSERT_EQ(nss.toStringWithTenantId(), tenantNsStr);
ASSERT(nss.tenantId());
ASSERT_EQ(*nss.tenantId(), tenantId);
@@ -351,7 +355,8 @@ TEST(NamespaceStringTest, ParseNSSWithTenantId) {
NamespaceString nss =
NamespaceString::parseFromStringExpectTenantIdInMultitenancyMode(tenantNsStr);
- ASSERT_EQ(nss.ns(), tenantNsStr);
+ ASSERT_EQ(nss.ns(), "foo.bar");
+ ASSERT_EQ(nss.toStringWithTenantId(), tenantNsStr);
ASSERT(nss.tenantId());
ASSERT_EQ(*nss.tenantId(), tenantId);
}
diff --git a/src/mongo/db/op_observer_impl.cpp b/src/mongo/db/op_observer_impl.cpp
index e14695db5de..383f5d47e34 100644
--- a/src/mongo/db/op_observer_impl.cpp
+++ b/src/mongo/db/op_observer_impl.cpp
@@ -2288,14 +2288,6 @@ void OpObserverImpl::_onReplicationRollback(OperationContext* opCtx,
fassertFailedNoTrace(50712);
}
- // Force the config server to update its shard registry on next access. Otherwise it may have
- // the stale data that has been just rolled back.
- if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- if (auto shardRegistry = Grid::get(opCtx)->shardRegistry()) {
- shardRegistry->clearEntries();
- }
- }
-
// Force the default read/write concern cache to reload on next access in case the defaults
// document was rolled back.
ReadWriteConcernDefaults::get(opCtx).invalidate();
diff --git a/src/mongo/db/op_observer_impl_test.cpp b/src/mongo/db/op_observer_impl_test.cpp
index 0fc2f08ae96..4ad6a6baed0 100644
--- a/src/mongo/db/op_observer_impl_test.cpp
+++ b/src/mongo/db/op_observer_impl_test.cpp
@@ -739,9 +739,13 @@ TEST_F(OpObserverTest, SingleStatementInsertTestIncludesTenantId) {
auto oplogEntryObj = getSingleOplogEntry(opCtx.get());
const repl::OplogEntry& entry = assertGet(repl::OplogEntry::parse(oplogEntryObj));
- ASSERT(nss.tenantId().has_value());
+ // TODO SERVER-67155 Check that (nss == entry.getNss()) and uncomment the
+ // line below once the OplogEntry deserializer passes "tid" to the NamespaceString
+ // constructor
+ ASSERT_EQ(NamespaceString(boost::none, nss.ns()), entry.getNss());
+ // ASSERT(nss.tenantId().has_value());
+
ASSERT_EQ(*nss.tenantId(), *entry.getTid());
- ASSERT_EQ(nss, entry.getNss());
ASSERT_EQ(uuid, *entry.getUuid());
}
@@ -772,7 +776,9 @@ TEST_F(OpObserverTest, SingleStatementUpdateTestIncludesTenantId) {
ASSERT(nss.tenantId().has_value());
ASSERT_EQ(*nss.tenantId(), *entry.getTid());
- ASSERT_EQ(nss, entry.getNss());
+ // TODO SERVER-67155 Check that (nss == entry.getNss()) once the OplogEntry deserializer passes
+ // "tid" to the NamespaceString constructor
+ ASSERT_EQ(NamespaceString(boost::none, nss.ns()), entry.getNss());
ASSERT_EQ(uuid, *entry.getUuid());
}
@@ -798,9 +804,11 @@ TEST_F(OpObserverTest, SingleStatementDeleteTestIncludesTenantId) {
auto oplogEntryObj = getSingleOplogEntry(opCtx.get());
const repl::OplogEntry& entry = assertGet(repl::OplogEntry::parse(oplogEntryObj));
- ASSERT(nss.tenantId().has_value());
+ // TODO SERVER-67155 Check that (nss == entry.getNss()) once the OplogEntry deserializer passes
+ // "tid" to the NamespaceString constructor
+ // ASSERT(nss.tenantId().has_value());
+ ASSERT_EQ(NamespaceString(boost::none, nss.ns()), entry.getNss());
ASSERT_EQ(*nss.tenantId(), *entry.getTid());
- ASSERT_EQ(nss, entry.getNss());
ASSERT_EQ(uuid, *entry.getUuid());
}
@@ -2953,9 +2961,13 @@ TEST_F(BatchedWriteOutputsTest, TestApplyOpsInsertDeleteUpdateIncludesTenantId)
const auto innerEntry = innerEntries[0];
ASSERT(innerEntry.getCommandType() == OplogEntry::CommandType::kNotCommand);
ASSERT(innerEntry.getOpType() == repl::OpTypeEnum::kInsert);
- ASSERT(innerEntry.getNss() == _nssWithTid);
- ASSERT(innerEntry.getNss().tenantId().has_value());
- ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+ // TODO SERVER-67155 Check that (innerEntry.getNss() == _nssWithTid) and uncomment the
+ // 2 lines below once the OplogEntry deserializer passes "tid" to the NamespaceString
+ // constructor
+ ASSERT(innerEntry.getNss() == NamespaceString(boost::none, _nssWithTid.ns()));
+ // ASSERT(innerEntry.getNss().tenantId().has_value());
+ // ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+
ASSERT(innerEntry.getTid().has_value());
ASSERT(*innerEntry.getTid() == *_nssWithTid.tenantId());
ASSERT(0 ==
@@ -2967,9 +2979,13 @@ TEST_F(BatchedWriteOutputsTest, TestApplyOpsInsertDeleteUpdateIncludesTenantId)
const auto innerEntry = innerEntries[1];
ASSERT(innerEntry.getCommandType() == OplogEntry::CommandType::kNotCommand);
ASSERT(innerEntry.getOpType() == repl::OpTypeEnum::kDelete);
- ASSERT(innerEntry.getNss() == _nssWithTid);
- ASSERT(innerEntry.getNss().tenantId().has_value());
- ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+ // TODO SERVER-67155 Check that (innerEntry.getNss() == _nssWithTid) and uncomment the
+ // 2 lines below once the OplogEntry deserializer passes "tid" to the NamespaceString
+ // constructor
+ ASSERT(innerEntry.getNss() == NamespaceString(boost::none, _nssWithTid.ns()));
+ // ASSERT(innerEntry.getNss().tenantId().has_value());
+ // ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+
ASSERT(innerEntry.getTid().has_value());
ASSERT(*innerEntry.getTid() == *_nssWithTid.tenantId());
ASSERT(0 == innerEntry.getObject().woCompare(BSON("_id" << 1)));
@@ -2979,9 +2995,13 @@ TEST_F(BatchedWriteOutputsTest, TestApplyOpsInsertDeleteUpdateIncludesTenantId)
const auto innerEntry = innerEntries[2];
ASSERT(innerEntry.getCommandType() == OplogEntry::CommandType::kNotCommand);
ASSERT(innerEntry.getOpType() == repl::OpTypeEnum::kUpdate);
- ASSERT(innerEntry.getNss() == _nssWithTid);
- ASSERT(innerEntry.getNss().tenantId().has_value());
- ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+ // TODO SERVER-67155 Check that (innerEntry.getNss() == _nssWithTid) and uncomment the
+ // 2 lines below once the OplogEntry deserializer passes "tid" to the NamespaceString
+ // constructor
+ ASSERT(innerEntry.getNss() == NamespaceString(boost::none, _nssWithTid.ns()));
+ // ASSERT(innerEntry.getNss().tenantId().has_value());
+ // ASSERT(*innerEntry.getNss().tenantId() == *_nssWithTid.tenantId());
+
ASSERT(innerEntry.getTid().has_value());
ASSERT(*innerEntry.getTid() == *_nssWithTid.tenantId());
ASSERT(0 ==
diff --git a/src/mongo/db/ops/SConscript b/src/mongo/db/ops/SConscript
index 0b736897acc..983698e5060 100644
--- a/src/mongo/db/ops/SConscript
+++ b/src/mongo/db/ops/SConscript
@@ -34,7 +34,6 @@ env.Library(
env.Library(
target='write_ops_parsers',
source=[
- 'new_write_error_exception_format_feature_flag.idl',
'write_ops.cpp',
'write_ops.idl',
],
diff --git a/src/mongo/db/ops/write_ops.cpp b/src/mongo/db/ops/write_ops.cpp
index 54cef4d3d2a..92d0478a541 100644
--- a/src/mongo/db/ops/write_ops.cpp
+++ b/src/mongo/db/ops/write_ops.cpp
@@ -30,7 +30,6 @@
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/dbmessage.h"
-#include "mongo/db/ops/new_write_error_exception_format_feature_flag_gen.h"
#include "mongo/db/pipeline/aggregation_request_helper.h"
#include "mongo/db/update/update_oplog_entry_serialization.h"
#include "mongo/db/update/update_oplog_entry_version.h"
@@ -295,18 +294,6 @@ WriteError WriteError::parse(const BSONObj& obj) {
auto code = ErrorCodes::Error(obj[WriteError::kCodeFieldName].Int());
auto errmsg = obj[WriteError::kErrmsgFieldName].valueStringDataSafe();
- // At least up to FCV 5.x, the write commands operation used to convert StaleConfig errors
- // into StaleShardVersion and store the extra info of StaleConfig in a sub-field called
- // "errInfo".
- //
- // TODO (SERVER-64449): This special parsing should be removed in the stable version
- // following the resolution of this ticket.
- if (code == ErrorCodes::OBSOLETE_StaleShardVersion) {
- return Status(ErrorCodes::StaleConfig,
- std::move(errmsg),
- obj[WriteError::kErrInfoFieldName].Obj());
- }
-
// All remaining errors have the error stored at the same level as the code and errmsg (in
// the same way that Status is serialised as part of regular command response)
return Status(code, std::move(errmsg), obj);
@@ -319,28 +306,10 @@ BSONObj WriteError::serialize() const {
BSONObjBuilder errBuilder;
errBuilder.append(WriteError::kIndexFieldName, _index);
- // At least up to FCV 5.x, the write commands operation used to convert StaleConfig errors into
- // StaleShardVersion and store the extra info of StaleConfig in a sub-field called "errInfo".
- // This logic preserves this for backwards compatibility.
- //
- // TODO (SERVER-64449): This special serialisation should be removed in the stable version
- // following the resolution of this ticket.
- if (_status == ErrorCodes::StaleConfig &&
- !feature_flags::gFeatureFlagNewWriteErrorExceptionFormat.isEnabled(
- serverGlobalParams.featureCompatibility)) {
- errBuilder.append(WriteError::kCodeFieldName,
- int32_t(ErrorCodes::OBSOLETE_StaleShardVersion));
- errBuilder.append(WriteError::kErrmsgFieldName, _status.reason());
- auto extraInfo = _status.extraInfo();
- invariant(extraInfo);
- BSONObjBuilder extraInfoBuilder(errBuilder.subobjStart(WriteError::kErrInfoFieldName));
- extraInfo->serialize(&extraInfoBuilder);
- } else {
- errBuilder.append(WriteError::kCodeFieldName, int32_t(_status.code()));
- errBuilder.append(WriteError::kErrmsgFieldName, _status.reason());
- if (auto extraInfo = _status.extraInfo()) {
- extraInfo->serialize(&errBuilder);
- }
+ errBuilder.append(WriteError::kCodeFieldName, int32_t(_status.code()));
+ errBuilder.append(WriteError::kErrmsgFieldName, _status.reason());
+ if (auto extraInfo = _status.extraInfo()) {
+ extraInfo->serialize(&errBuilder);
}
return errBuilder.obj();
diff --git a/src/mongo/db/ops/write_ops_exec.cpp b/src/mongo/db/ops/write_ops_exec.cpp
index ee703965b72..8e02cf04ec7 100644
--- a/src/mongo/db/ops/write_ops_exec.cpp
+++ b/src/mongo/db/ops/write_ops_exec.cpp
@@ -452,8 +452,13 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
opCtx,
wholeOp.getNamespace(),
fixLockModeForSystemDotViewsChanges(wholeOp.getNamespace(), MODE_IX));
- if (*collection)
+ checkCollectionUUIDMismatch(opCtx,
+ wholeOp.getNamespace(),
+ collection->getCollection(),
+ wholeOp.getCollectionUUID());
+ if (*collection) {
break;
+ }
if (source == OperationSource::kTimeseriesInsert) {
assertTimeseriesBucketsCollectionNotFound(wholeOp.getNamespace());
@@ -499,11 +504,6 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
if (shouldProceedWithBatchInsert) {
try {
if (!collection->getCollection()->isCapped() && !inTxn && batch.size() > 1) {
- checkCollectionUUIDMismatch(opCtx,
- wholeOp.getNamespace(),
- collection->getCollection(),
- wholeOp.getCollectionUUID());
-
// First try doing it all together. If all goes well, this is all we need to do.
// See Collection::_insertDocuments for why we do all capped inserts one-at-a-time.
lastOpFixer->startingOp();
@@ -546,10 +546,6 @@ bool insertBatchAndHandleErrors(OperationContext* opCtx,
// Transactions are not allowed to operate on capped collections.
uassertStatusOK(
checkIfTransactionOnCappedColl(opCtx, collection->getCollection()));
- checkCollectionUUIDMismatch(opCtx,
- wholeOp.getNamespace(),
- collection->getCollection(),
- wholeOp.getCollectionUUID());
lastOpFixer->startingOp();
insertDocuments(opCtx,
collection->getCollection(),
@@ -604,11 +600,36 @@ SingleWriteResult makeWriteResultForInsertOrDeleteRetry() {
return res;
}
+
+// Returns the flags that determine the type of document validation we want to
+// perform. First item in the tuple determines whether to bypass document validation altogether,
+// second item determines if _safeContent_ array can be modified in an encrypted collection.
+std::tuple<bool, bool> getDocumentValidationFlags(OperationContext* opCtx,
+ const write_ops::WriteCommandRequestBase& req) {
+ auto& encryptionInfo = req.getEncryptionInformation();
+ const bool fleCrudProcessed = getFleCrudProcessed(opCtx, encryptionInfo);
+ return std::make_tuple(req.getBypassDocumentValidation(), fleCrudProcessed);
+}
} // namespace
+bool getFleCrudProcessed(OperationContext* opCtx,
+ const boost::optional<EncryptionInformation>& encryptionInfo) {
+ if (encryptionInfo && encryptionInfo->getCrudProcessed().value_or(false)) {
+ uassert(6666201,
+ "External users cannot have crudProcessed enabled",
+ AuthorizationSession::get(opCtx->getClient())
+ ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
+ ActionType::internal));
+
+ return true;
+ }
+ return false;
+}
+
WriteResult performInserts(OperationContext* opCtx,
const write_ops::InsertCommandRequest& wholeOp,
OperationSource source) {
+
// Insert performs its own retries, so we should only be within a WriteUnitOfWork when run in a
// transaction.
auto txnParticipant = TransactionParticipant::get(opCtx);
@@ -643,8 +664,15 @@ WriteResult performInserts(OperationContext* opCtx,
uassertStatusOK(userAllowedWriteNS(opCtx, wholeOp.getNamespace()));
}
- DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(
- opCtx, wholeOp.getWriteCommandRequestBase().getBypassDocumentValidation());
+ const auto [disableDocumentValidation, fleCrudProcessed] =
+ getDocumentValidationFlags(opCtx, wholeOp.getWriteCommandRequestBase());
+
+ DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+ disableDocumentValidation);
+
+ DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+ opCtx, disableDocumentValidation, fleCrudProcessed);
+
LastOpFixer lastOpFixer(opCtx, wholeOp.getNamespace());
WriteResult out;
@@ -766,6 +794,7 @@ static SingleWriteResult performSingleUpdateOp(OperationContext* opCtx,
boost::optional<AutoGetCollection> collection;
while (true) {
collection.emplace(opCtx, ns, fixLockModeForSystemDotViewsChanges(ns, MODE_IX));
+ checkCollectionUUIDMismatch(opCtx, ns, collection->getCollection(), opCollectionUUID);
if (*collection) {
break;
}
@@ -830,8 +859,6 @@ static SingleWriteResult performSingleUpdateOp(OperationContext* opCtx,
uassertStatusOK(checkIfTransactionOnCappedColl(opCtx, coll));
}
- checkCollectionUUIDMismatch(opCtx, ns, collection->getCollection(), opCollectionUUID);
-
const ExtensionsCallbackReal extensionsCallback(opCtx, &updateRequest->getNamespaceString());
ParsedUpdate parsedUpdate(opCtx, updateRequest, extensionsCallback, forgoOpCounterIncrements);
uassertStatusOK(parsedUpdate.parseRequest());
@@ -1003,8 +1030,15 @@ WriteResult performUpdates(OperationContext* opCtx,
(txnParticipant && opCtx->inMultiDocumentTransaction()));
uassertStatusOK(userAllowedWriteNS(opCtx, ns));
- DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(
- opCtx, wholeOp.getWriteCommandRequestBase().getBypassDocumentValidation());
+ const auto [disableDocumentValidation, fleCrudProcessed] =
+ getDocumentValidationFlags(opCtx, wholeOp.getWriteCommandRequestBase());
+
+ DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+ disableDocumentValidation);
+
+ DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+ opCtx, disableDocumentValidation, fleCrudProcessed);
+
LastOpFixer lastOpFixer(opCtx, ns);
bool containsRetry = false;
@@ -1231,8 +1265,15 @@ WriteResult performDeletes(OperationContext* opCtx,
(txnParticipant && opCtx->inMultiDocumentTransaction()));
uassertStatusOK(userAllowedWriteNS(opCtx, ns));
- DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(
- opCtx, wholeOp.getWriteCommandRequestBase().getBypassDocumentValidation());
+ const auto [disableDocumentValidation, fleCrudProcessed] =
+ getDocumentValidationFlags(opCtx, wholeOp.getWriteCommandRequestBase());
+
+ DisableDocumentSchemaValidationIfTrue docSchemaValidationDisabler(opCtx,
+ disableDocumentValidation);
+
+ DisableSafeContentValidationIfTrue safeContentValidationDisabler(
+ opCtx, disableDocumentValidation, fleCrudProcessed);
+
LastOpFixer lastOpFixer(opCtx, ns);
bool containsRetry = false;
diff --git a/src/mongo/db/ops/write_ops_exec.h b/src/mongo/db/ops/write_ops_exec.h
index 548a3034713..3550a51c1ce 100644
--- a/src/mongo/db/ops/write_ops_exec.h
+++ b/src/mongo/db/ops/write_ops_exec.h
@@ -64,6 +64,9 @@ struct WriteResult {
bool canContinue = true;
};
+bool getFleCrudProcessed(OperationContext* opCtx,
+ const boost::optional<EncryptionInformation>& encryptionInfo);
+
/**
* Performs a batch of inserts, updates, or deletes.
*
diff --git a/src/mongo/db/pipeline/SConscript b/src/mongo/db/pipeline/SConscript
index ff2c639db8b..96c7d59a025 100644
--- a/src/mongo/db/pipeline/SConscript
+++ b/src/mongo/db/pipeline/SConscript
@@ -99,6 +99,7 @@ env.Library(
'expression_context.cpp',
'expression_function.cpp',
'expression_js_emit.cpp',
+ 'expression_parser.idl',
'expression_test_api_version.cpp',
'expression_trigonometric.cpp',
'javascript_execution.cpp',
@@ -106,6 +107,7 @@ env.Library(
'variables.cpp',
],
LIBDEPS=[
+ '$BUILD_DIR/mongo/crypto/fle_crypto',
'$BUILD_DIR/mongo/db/bson/dotted_path_support',
'$BUILD_DIR/mongo/db/commands/test_commands_enabled',
'$BUILD_DIR/mongo/db/exec/document_value/document_value',
@@ -128,6 +130,7 @@ env.Library(
LIBDEPS_PRIVATE=[
'$BUILD_DIR/mongo/db/mongohasher',
'$BUILD_DIR/mongo/db/vector_clock',
+ '$BUILD_DIR/mongo/idl/idl_parser',
],
)
diff --git a/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp b/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp
index 114f49e1b7e..863769c9f5b 100644
--- a/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/abt_document_source_visitor.cpp
@@ -49,6 +49,7 @@
#include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h"
#include "mongo/db/pipeline/document_source_internal_shard_filter.h"
#include "mongo/db/pipeline/document_source_internal_split_pipeline.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
#include "mongo/db/pipeline/document_source_limit.h"
#include "mongo/db/pipeline/document_source_list_cached_and_active_users.h"
#include "mongo/db/pipeline/document_source_list_local_sessions.h"
@@ -178,11 +179,19 @@ private:
<< static_cast<int>(transformer->getType()) << ")");
}
+ void assertSupportedPath(const std::string& path) {
+ uassert(ErrorCodes::InternalErrorNotSupported,
+ "Projection contains unsupported numeric path component",
+ !FieldRef(path).hasNumericPathComponents());
+ }
+
void processProjectedPaths(const projection_executor::InclusionNode& node) {
std::set<std::string> preservedPaths;
node.reportProjectedPaths(&preservedPaths);
for (const std::string& preservedPathStr : preservedPaths) {
+ assertSupportedPath(preservedPathStr);
+
_builder.integrateFieldPath(FieldPath(preservedPathStr),
[](const bool isLastElement, FieldMapEntry& entry) {
entry._hasLeadingObj = true;
@@ -232,6 +241,8 @@ private:
// Handle general expression projection.
for (const std::string& computedPathStr : computedPaths) {
+ assertSupportedPath(computedPathStr);
+
const FieldPath computedPath(computedPathStr);
auto entry = _ctx.getNode();
@@ -272,6 +283,7 @@ private:
node.reportProjectedPaths(&preservedPaths);
for (const std::string& preservedPathStr : preservedPaths) {
+ assertSupportedPath(preservedPathStr);
_builder.integrateFieldPath(FieldPath(preservedPathStr),
[](const bool isLastElement, FieldMapEntry& entry) {
if (isLastElement) {
@@ -326,6 +338,10 @@ public:
unsupportedStage(source);
}
+ void visit(const DocumentSourceInternalUnpackBucket* source) override {
+ unsupportedStage(source);
+ }
+
void visit(const DocumentSourceGroup* source) override {
const StringMap<boost::intrusive_ptr<Expression>>& idFields = source->getIdFields();
uassert(6624201, "Empty idFields map", !idFields.empty());
@@ -334,6 +350,9 @@ public:
for (const auto& [fieldName, expr] : idFields) {
groupByFieldNames.push_back(fieldName);
}
+ const bool isSingleIdField =
+ groupByFieldNames.size() == 1 && groupByFieldNames.front() == "_id";
+
// Sort in order to generate consistent plans.
std::sort(groupByFieldNames.begin(), groupByFieldNames.end());
@@ -434,11 +453,21 @@ public:
ABT integrationPath = make<PathIdentity>();
for (size_t i = 0; i < groupByFieldNames.size(); i++) {
+ std::string fieldName = std::move(groupByFieldNames.at(i));
+ if (!isSingleIdField) {
+ // Erase '_id.' prefix.
+ fieldName = fieldName.substr(strlen("_id."));
+ }
+
maybeComposePath(integrationPath,
- make<PathField>(std::move(groupByFieldNames.at(i)),
+ make<PathField>(std::move(fieldName),
make<PathConstant>(make<Variable>(
std::move(groupByProjNames.at(i))))));
}
+ if (!isSingleIdField) {
+ integrationPath = make<PathField>("_id", std::move(integrationPath));
+ }
+
for (size_t i = 0; i < aggProjFieldNames.size(); i++) {
maybeComposePath(
integrationPath,
diff --git a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
index 06b7f7113d0..05b9fff8932 100644
--- a/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/agg_expression_visitor.cpp
@@ -158,15 +158,16 @@ public:
const Operations op = translateCmpOpFn(expr->getOp());
if (op != Operations::Cmp3w) {
- // If we have EvalPaths coming from the left or on the right, add a PathCompare, and
- // keep propagating the path.
- if (auto leftPtr = left.cast<EvalPath>();
- leftPtr != nullptr && leftPtr->getInput() == _ctx.getRootProjVar()) {
+ // If we have simple EvalPaths coming from the left or on the right, add a PathCompare,
+ // and keep propagating the path.
+ if (auto leftPtr = left.cast<EvalPath>(); leftPtr != nullptr &&
+ isSimplePath(leftPtr->getPath()) && leftPtr->getInput() == _ctx.getRootProjVar()) {
addEvalFilterFn(std::move(leftPtr->getPath()), std::move(right), op);
return;
}
- if (auto rightPtr = right.cast<EvalPath>();
- rightPtr != nullptr && rightPtr->getInput() == _ctx.getRootProjVar()) {
+ if (auto rightPtr = right.cast<EvalPath>(); rightPtr != nullptr &&
+ isSimplePath(rightPtr->getPath()) &&
+ rightPtr->getInput() == _ctx.getRootProjVar()) {
addEvalFilterFn(
std::move(rightPtr->getPath()), std::move(left), reverseComparisonOp(op));
return;
@@ -248,8 +249,10 @@ public:
ABT path = translateFieldPath(
fieldPath,
make<PathIdentity>(),
- [](const std::string& fieldName, const bool /*isLastElement*/, ABT input) {
- // No traverse.
+ [](const std::string& fieldName, const bool isLastElement, ABT input) {
+ if (!isLastElement) {
+ input = make<PathTraverse>(std::move(input));
+ }
return make<PathGet>(fieldName, std::move(input));
},
1ul);
@@ -308,7 +311,7 @@ public:
}
void visit(const ExpressionLn* expr) override final {
- unsupportedExpression(expr->getOpName());
+ pushSingleArgFunctionFromTop("ln");
}
void visit(const ExpressionLog* expr) override final {
@@ -319,6 +322,10 @@ public:
unsupportedExpression(expr->getOpName());
}
+ void visit(const ExpressionInternalFLEEqual* expr) override final {
+ unsupportedExpression(expr->getOpName());
+ }
+
void visit(const ExpressionMap* expr) override final {
unsupportedExpression("$map");
}
@@ -328,7 +335,7 @@ public:
}
void visit(const ExpressionMod* expr) override final {
- unsupportedExpression(expr->getOpName());
+ pushMultiArgFunctionFromTop("mod", 2);
}
void visit(const ExpressionMultiply* expr) override final {
@@ -775,6 +782,7 @@ private:
for (size_t i = 0; i < arity; i++) {
ABT child = _ctx.pop();
if (auto filterPtr = child.cast<EvalFilter>(); allFilters && filterPtr != nullptr &&
+ isSimplePath(filterPtr->getPath()) &&
filterPtr->getInput() == _ctx.getRootProjVar()) {
childPaths.push_back(filterPtr->getPath());
} else {
@@ -784,7 +792,7 @@ private:
}
if (allFilters) {
- // If all children are paths, place a path composition.
+ // If all children are simple paths, place a path composition.
ABT result = make<PathIdentity>();
if (isAnd) {
for (ABT& child : childPaths) {
@@ -812,6 +820,8 @@ private:
for (size_t i = 0; i < argCount; i++) {
children.emplace_back(_ctx.pop());
}
+ std::reverse(children.begin(), children.end());
+
_ctx.push<FunctionCall>(functionName, children);
}
@@ -822,14 +832,10 @@ private:
void pushArithmeticBinaryExpr(const Expression* expr, const Operations op) {
const size_t arity = expr->getChildren().size();
_ctx.ensureArity(arity);
- if (arity < 2) {
- // Nothing to do for arity 0 and 1.
- return;
- }
ABT current = _ctx.pop();
for (size_t i = 0; i < arity - 1; i++) {
- current = make<BinaryOp>(op, std::move(current), _ctx.pop());
+ current = make<BinaryOp>(op, _ctx.pop(), std::move(current));
}
_ctx.push(std::move(current));
}
diff --git a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
index 5eef023db6b..bc0416f658c 100644
--- a/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
+++ b/src/mongo/db/pipeline/abt/match_expression_visitor.cpp
@@ -155,6 +155,8 @@ public:
"$in with regexes is not supported.",
expr->getRegexes().empty());
+ assertSupportedPathExpression(expr);
+
const auto& equalities = expr->getEqualities();
// $in with an empty equalities list matches nothing; replace with constant false.
@@ -406,6 +408,8 @@ private:
template <bool isValueElemMatch>
void generateElemMatch(const ArrayMatchingMatchExpression* expr) {
+ assertSupportedPathExpression(expr);
+
// Returns true if at least one sub-objects matches the condition.
const size_t childCount = expr->numChildren();
@@ -484,7 +488,15 @@ private:
});
}
+ void assertSupportedPathExpression(const PathMatchExpression* expr) {
+ uassert(ErrorCodes::InternalErrorNotSupported,
+ "Expression contains a numeric path component",
+ !FieldRef(expr->path()).hasNumericPathComponents());
+ }
+
void generateSimpleComparison(const ComparisonMatchExpressionBase* expr, const Operations op) {
+ assertSupportedPathExpression(expr);
+
auto [tag, val] = convertFrom(Value(expr->getData()));
const bool isArray = tag == sbe::value::TypeTags::Array;
ABT result = make<PathCompare>(op, make<Constant>(tag, val));
diff --git a/src/mongo/db/pipeline/abt/pipeline_test.cpp b/src/mongo/db/pipeline/abt/pipeline_test.cpp
index e4067947675..694047d6683 100644
--- a/src/mongo/db/pipeline/abt/pipeline_test.cpp
+++ b/src/mongo/db/pipeline/abt/pipeline_test.cpp
@@ -461,7 +461,9 @@ TEST(ABTTranslate, ProjectPaths) {
"| EvalPath []\n"
"| | Variable [scan_0]\n"
"| PathGet [x]\n"
+ "| PathTraverse []\n"
"| PathGet [y]\n"
+ "| PathTraverse []\n"
"| PathGet [z]\n"
"| PathIdentity []\n"
"Scan [collection]\n"
@@ -525,12 +527,13 @@ TEST(ABTTranslate, ProjectInclusion) {
"| BindBlock:\n"
"| [projGetPath_0]\n"
"| BinaryOp [Add]\n"
- "| | EvalPath []\n"
- "| | | Variable [scan_0]\n"
- "| | PathGet [c]\n"
- "| | PathGet [d]\n"
- "| | PathIdentity []\n"
- "| Const [2]\n"
+ "| | Const [2]\n"
+ "| EvalPath []\n"
+ "| | Variable [scan_0]\n"
+ "| PathGet [c]\n"
+ "| PathTraverse []\n"
+ "| PathGet [d]\n"
+ "| PathIdentity []\n"
"Scan [collection]\n"
" BindBlock:\n"
" [scan_0]\n"
@@ -654,9 +657,10 @@ TEST(ABTTranslate, MatchBasic) {
optimized);
}
-TEST(ABTTranslate, MatchPath) {
- ABT translated = translatePipeline("[{$match: {$expr: {$eq: ['$a.b', 1]}}}]");
+TEST(ABTTranslate, MatchPath1) {
+ ABT translated = translatePipeline("[{$match: {$expr: {$eq: ['$a', 1]}}}]");
+ // Demonstrate simple path is converted to EvalFilter.
ASSERT_EXPLAIN_V2(
"Root []\n"
"| | projections: \n"
@@ -667,7 +671,6 @@ TEST(ABTTranslate, MatchPath) {
"| EvalFilter []\n"
"| | Variable [scan_0]\n"
"| PathGet [a]\n"
- "| PathGet [b]\n"
"| PathCompare [Eq]\n"
"| Const [1]\n"
"Scan [collection]\n"
@@ -677,6 +680,34 @@ TEST(ABTTranslate, MatchPath) {
translated);
}
+TEST(ABTTranslate, MatchPath2) {
+ ABT translated = translatePipeline("[{$match: {$expr: {$eq: ['$a.b', 1]}}}]");
+
+ ASSERT_EXPLAIN_V2(
+ "Root []\n"
+ "| | projections: \n"
+ "| | scan_0\n"
+ "| RefBlock: \n"
+ "| Variable [scan_0]\n"
+ "Filter []\n"
+ "| EvalFilter []\n"
+ "| | Variable [scan_0]\n"
+ "| PathConstant []\n"
+ "| BinaryOp [Eq]\n"
+ "| | Const [1]\n"
+ "| EvalPath []\n"
+ "| | Variable [scan_0]\n"
+ "| PathGet [a]\n"
+ "| PathTraverse []\n"
+ "| PathGet [b]\n"
+ "| PathIdentity []\n"
+ "Scan [collection]\n"
+ " BindBlock:\n"
+ " [scan_0]\n"
+ " Source []\n",
+ translated);
+}
+
TEST(ABTTranslate, ElemMatchPath) {
ABT translated = translatePipeline(
"[{$project: {a: {$literal: [1, 2, 3, 4]}}}, {$match: {a: {$elemMatch: {$gte: 2, $lte: "
@@ -776,11 +807,11 @@ TEST(ABTTranslate, MatchProject) {
"| BinaryOp [Add]\n"
"| | EvalPath []\n"
"| | | Variable [scan_0]\n"
- "| | PathGet [a]\n"
+ "| | PathGet [b]\n"
"| | PathIdentity []\n"
"| EvalPath []\n"
"| | Variable [scan_0]\n"
- "| PathGet [b]\n"
+ "| PathGet [a]\n"
"| PathIdentity []\n"
"Scan [collection]\n"
" BindBlock:\n"
@@ -942,11 +973,11 @@ TEST(ABTTranslate, GroupBasic) {
"| BinaryOp [Mult]\n"
"| | EvalPath []\n"
"| | | Variable [scan_0]\n"
- "| | PathGet [b]\n"
+ "| | PathGet [c]\n"
"| | PathIdentity []\n"
"| EvalPath []\n"
"| | Variable [scan_0]\n"
- "| PathGet [c]\n"
+ "| PathGet [b]\n"
"| PathIdentity []\n"
"Evaluation []\n"
"| BindBlock:\n"
@@ -954,6 +985,7 @@ TEST(ABTTranslate, GroupBasic) {
"| EvalPath []\n"
"| | Variable [scan_0]\n"
"| PathGet [a]\n"
+ "| PathTraverse []\n"
"| PathGet [b]\n"
"| PathIdentity []\n"
"Scan [collection]\n"
@@ -1218,6 +1250,7 @@ TEST(ABTTranslate, UnwindAndGroup) {
"| EvalPath []\n"
"| | Variable [embedProj_0]\n"
"| PathGet [a]\n"
+ "| PathTraverse []\n"
"| PathGet [b]\n"
"| PathIdentity []\n"
"Evaluation []\n"
@@ -2039,11 +2072,12 @@ TEST(ABTTranslate, GroupMultiKey) {
"| | PathField [count]\n"
"| | PathConstant []\n"
"| | Variable [count_agg_0]\n"
+ "| PathField [_id]\n"
"| PathComposeM []\n"
- "| | PathField [_id.year]\n"
+ "| | PathField [year]\n"
"| | PathConstant []\n"
"| | Variable [groupByProj_1]\n"
- "| PathField [_id.isin]\n"
+ "| PathField [isin]\n"
"| PathConstant []\n"
"| Variable [groupByProj_0]\n"
"GroupBy []\n"
@@ -2292,13 +2326,15 @@ TEST(ABTTranslate, PartialIndex) {
// The expression matches the pipeline.
// By default the constant is translated as "int32".
- auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
- make<PathGet>("b",
- make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
- make<Variable>(scanProjName)));
- ASSERT_TRUE(conversionResult._success);
- ASSERT_FALSE(conversionResult._hasEmptyInterval);
- ASSERT_FALSE(conversionResult._retainPredicate);
+ auto conversionResult = convertExprToPartialSchemaReq(
+ make<EvalFilter>(
+ make<PathGet>(
+ "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
+ make<Variable>(scanProjName)),
+ true /*isFilterContext*/);
+ ASSERT_TRUE(conversionResult.has_value());
+ ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+ ASSERT_FALSE(conversionResult->_retainPredicate);
Metadata metadata = {
{{scanDefName,
@@ -2307,7 +2343,7 @@ TEST(ABTTranslate, PartialIndex) {
IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
true /*multiKey*/,
{DistributionType::Centralized},
- std::move(conversionResult._reqMap)}}}}}}};
+ std::move(conversionResult->_reqMap)}}}}}}};
ABT translated = translatePipeline(
metadata, "[{$match: {'a': 3, 'b': 2}}]", scanProjName, scanDefName, prefixId);
@@ -2360,13 +2396,15 @@ TEST(ABTTranslate, PartialIndexNegative) {
ProjectionName scanProjName = prefixId.getNextId("scan");
// The expression does not match the pipeline.
- auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
- make<PathGet>("b",
- make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
- make<Variable>(scanProjName)));
- ASSERT_TRUE(conversionResult._success);
- ASSERT_FALSE(conversionResult._hasEmptyInterval);
- ASSERT_FALSE(conversionResult._retainPredicate);
+ auto conversionResult = convertExprToPartialSchemaReq(
+ make<EvalFilter>(
+ make<PathGet>(
+ "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int32(2)))),
+ make<Variable>(scanProjName)),
+ true /*isFilterContext*/);
+ ASSERT_TRUE(conversionResult.has_value());
+ ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+ ASSERT_FALSE(conversionResult->_retainPredicate);
Metadata metadata = {
{{scanDefName,
@@ -2375,7 +2413,7 @@ TEST(ABTTranslate, PartialIndexNegative) {
IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
true /*multiKey*/,
{DistributionType::Centralized},
- std::move(conversionResult._reqMap)}}}}}}};
+ std::move(conversionResult->_reqMap)}}}}}}};
ABT translated = translatePipeline(
metadata, "[{$match: {'a': 3, 'b': 3}}]", scanProjName, scanDefName, prefixId);
@@ -2461,11 +2499,11 @@ TEST(ABTTranslate, CommonExpressionElimination) {
"| BindBlock:\n"
"| [projGetPath_0]\n"
"| BinaryOp [Add]\n"
- "| | EvalPath []\n"
- "| | | Variable [scan_0]\n"
- "| | PathGet [b]\n"
- "| | PathIdentity []\n"
- "| Const [1]\n"
+ "| | Const [1]\n"
+ "| EvalPath []\n"
+ "| | Variable [scan_0]\n"
+ "| PathGet [b]\n"
+ "| PathIdentity []\n"
"Scan [test]\n"
" BindBlock:\n"
" [scan_0]\n"
diff --git a/src/mongo/db/pipeline/aggregation_context_fixture.h b/src/mongo/db/pipeline/aggregation_context_fixture.h
index 76cc01a40c4..e7595382094 100644
--- a/src/mongo/db/pipeline/aggregation_context_fixture.h
+++ b/src/mongo/db/pipeline/aggregation_context_fixture.h
@@ -33,6 +33,7 @@
#include <memory>
#include "mongo/db/concurrency/locker_noop_client_observer.h"
+#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/service_context_test_fixture.h"
#include "mongo/unittest/temp_dir.h"
@@ -76,6 +77,14 @@ private:
boost::intrusive_ptr<ExpressionContextForTest> _expCtx;
};
+// A custom-deleter which disposes a DocumentSource when it goes out of scope.
+struct DocumentSourceDeleter {
+ void operator()(DocumentSource* docSource) {
+ docSource->dispose();
+ delete docSource;
+ }
+};
+
class ServerlessAggregationContextFixture : public AggregationContextFixture {
public:
ServerlessAggregationContextFixture()
diff --git a/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp b/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
index 15b5f8181f9..c847f32872e 100644
--- a/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
+++ b/src/mongo/db/pipeline/change_stream_document_diff_parser.cpp
@@ -28,6 +28,7 @@
*/
#include "mongo/db/pipeline/change_stream_document_diff_parser.h"
+
#include "mongo/db/field_ref.h"
namespace mongo {
@@ -36,119 +37,203 @@ using doc_diff::Diff;
using doc_diff::DocumentDiffReader;
namespace {
-// If the terminal fieldname in the given FieldRef has an embedded dot, add it into the
-// dottedFieldNames vector.
-void appendIfDottedField(FieldRef* fieldRef, std::vector<Value>* dottedFieldNames) {
- auto fieldName = fieldRef->getPart(fieldRef->numParts() - 1);
- if (fieldName.find('.') != std::string::npos) {
- dottedFieldNames->push_back(Value(fieldName));
+using DeltaUpdateDescription = change_stream_document_diff_parser::DeltaUpdateDescription;
+using FieldNameOrArrayIndex = stdx::variant<StringData, size_t>;
+
+/**
+ * DeltaUpdateDescriptionBuilder is responsible both for tracking the current path as we traverse
+ * the diff, and for populating a DeltaUpdateDescription reflecting the contents of that diff.
+ */
+struct DeltaUpdateDescriptionBuilder {
+ // Adds the specified entry to the 'updateFields' document in the DeltaUpdateDescription.
+ void addToUpdatedFields(FieldNameOrArrayIndex terminalField, Value updatedValue) {
+ DeltaUpdateDescriptionBuilder::TempAppendToPath tmpAppend(*this, terminalField);
+ _updatedFields.addField(_fieldRef.dottedField(), updatedValue);
+ _addToDisambiguatedPathsIfRequired();
+ }
+
+ // Adds the specified entry to the 'removedFields' vector in the DeltaUpdateDescription.
+ void addToRemovedFields(StringData terminalFieldName) {
+ DeltaUpdateDescriptionBuilder::TempAppendToPath tmpAppend(*this, terminalFieldName);
+ _updateDesc.removedFields.push_back(Value(_fieldRef.dottedField()));
+ _addToDisambiguatedPathsIfRequired();
+ }
+
+ // Adds the current path to the 'truncatedArrays' vector in the DeltaUpdateDescription.
+ void addToTruncatedArrays(int newSize) {
+ _updateDesc.truncatedArrays.push_back(
+ Value(Document{{"field", _fieldRef.dottedField()}, {"newSize", newSize}}));
+ _addToDisambiguatedPathsIfRequired();
+ }
+
+ // Called once the diff traversal is complete. Freezes and returns the DeltaUpdateDescription.
+ // It is an error to use the DeltaUpdateDescriptionBuilder again after this method is called.
+ DeltaUpdateDescription&& freezeDeltaUpdateDescription() {
+ _updateDesc.updatedFields = _updatedFields.freeze();
+ _updateDesc.disambiguatedPaths = _disambiguatedPaths.freeze();
+ return std::move(_updateDesc);
}
-}
+
+ // Returns the last field in the current path.
+ StringData lastPart() const {
+ return _fieldRef.getPart(_fieldRef.numParts() - 1);
+ }
+
+ // Returns the number of fields in the current path.
+ FieldIndex numParts() const {
+ return _fieldRef.numParts();
+ }
+
+ // A structure used to add a scope-guarded field to the current path maintained by the builder.
+ // When this object goes out of scope, it will automatically remove the field from the path.
+ struct TempAppendToPath {
+ TempAppendToPath(DeltaUpdateDescriptionBuilder& builder, FieldNameOrArrayIndex field)
+ : _builder(builder) {
+ // Append the specified field to the builder's path.
+ _builder._appendFieldToPath(std::move(field));
+ }
+
+ ~TempAppendToPath() {
+ // Remove the last field from the path when we go out of scope.
+ _builder._removeLastFieldfromPath();
+ }
+
+ private:
+ DeltaUpdateDescriptionBuilder& _builder;
+ };
+
+private:
+ // A structure for tracking path ambiguity information. Maps 1:1 to fields in the FieldRef via
+ // the _pathAmbiguity list. The 'pathIsAmbiguous' bool indicates whether the path as a whole is
+ // ambiguous as of the corresponding field. Once a path is marked as ambiguous, all subsequent
+ // entries must also be marked as ambiguous.
+ struct AmbiguityInfo {
+ bool pathIsAmbiguous = false;
+ BSONType fieldType = BSONType::String;
+ };
+
+ // Append the given field to the path, and update the path ambiguity information accordingly.
+ void _appendFieldToPath(FieldNameOrArrayIndex field) {
+ // Resolve the FieldNameOrArrayIndex to one or the other, and append it to the path.
+ const bool isArrayIndex = stdx::holds_alternative<size_t>(field);
+ _fieldRef.appendPart(isArrayIndex ? std::to_string(stdx::get<size_t>(field))
+ : stdx::get<StringData>(field));
+
+ // Once a path has become ambiguous, it will remain so as new fields are added. If the final
+ // path component is marked ambiguous, retain that value and add the type of the new field.
+ const auto fieldType = (isArrayIndex ? BSONType::NumberInt : BSONType::String);
+ if (!_pathAmbiguity.empty() && _pathAmbiguity.back().pathIsAmbiguous) {
+ _pathAmbiguity.push_back({true /* pathIsAmbiguous */, fieldType});
+ return;
+ }
+ // If the field is a numeric string or contains an embedded dot, it's ambiguous. We record
+ // array indices so that we can reconstruct the path, but the presence of an array index is
+ // not itself sufficient to make the path ambiguous. We don't include numeric fields at the
+ // start of the path because those are unambiguous.
+ const bool isNumeric = (!isArrayIndex && _fieldRef.numParts() > 1 &&
+ FieldRef::isNumericPathComponentStrict(lastPart()));
+ const bool isDotted =
+ (!isArrayIndex && !isNumeric && lastPart().find('.') != std::string::npos);
+
+ // Add to the field list, marking the path as ambiguous if this field is dotted or numeric.
+ _pathAmbiguity.push_back({(isNumeric || isDotted), fieldType});
+ }
+
+ // Remove the last field from the path, along with its entry in the ambiguity list.
+ void _removeLastFieldfromPath() {
+ _fieldRef.removeLastPart();
+ _pathAmbiguity.pop_back();
+ }
+
+ // If this path is marked as ambiguous, add a new entry for it to 'disambiguatedPaths'.
+ void _addToDisambiguatedPathsIfRequired() {
+ // The final entry in _pathAmbiguity will always be marked as ambiguous if any field in the
+ // path is ambiguous. If so, iterate over the list and create a vector of individual fields.
+ if (!_pathAmbiguity.empty() && _pathAmbiguity.back().pathIsAmbiguous) {
+ std::vector<Value> disambiguatedPath;
+ FieldIndex fieldNum = 0;
+ for (const auto& fieldInfo : _pathAmbiguity) {
+ auto fieldVal = _fieldRef.getPart(fieldNum++);
+ disambiguatedPath.push_back(fieldInfo.fieldType == BSONType::NumberInt
+ ? Value(std::stoi(fieldVal.toString()))
+ : Value(fieldVal));
+ }
+ // Add the vector of individual fields into the 'disambiguatedPaths' document. The name
+ // of the field matches the entry in updatedFields, removedFields, or truncatedArrays.
+ _disambiguatedPaths.addField(_fieldRef.dottedField(),
+ Value(std::move(disambiguatedPath)));
+ }
+ }
+
+ friend struct DeltaUpdateDescriptionBuilder::TempAppendToPath;
+
+ // Each element in the _pathAmbiguity list annotates the field at the corresponding index in the
+ // _fieldRef, indicating the type of that field and whether the path is ambiguous at that point.
+ std::list<AmbiguityInfo> _pathAmbiguity;
+ FieldRef _fieldRef;
+
+ DeltaUpdateDescription _updateDesc;
+ MutableDocument _updatedFields;
+ MutableDocument _disambiguatedPaths;
+};
void buildUpdateDescriptionWithDeltaOplog(
stdx::variant<DocumentDiffReader*, ArrayDiffReader*> reader,
- FieldRef* fieldRef,
- MutableDocument* updatedFields,
- std::vector<Value>* removedFields,
- std::vector<Value>* truncatedArrays,
- MutableDocument* arrayIndices,
- MutableDocument* dottedFields) {
+ DeltaUpdateDescriptionBuilder* builder,
+ boost::optional<FieldNameOrArrayIndex> currentSubField) {
+
+ // Append the field name associated with the current level of the diff to the path.
+ boost::optional<DeltaUpdateDescriptionBuilder::TempAppendToPath> tempAppend;
+ if (currentSubField) {
+ tempAppend.emplace(*builder, std::move(*currentSubField));
+ }
stdx::visit(
visit_helper::Overloaded{
[&](DocumentDiffReader* reader) {
- // Used to track dotted fieldnames at the current level of the diff.
- std::vector<Value> currentDottedFieldNames;
-
boost::optional<BSONElement> nextMod;
while ((nextMod = reader->nextUpdate()) || (nextMod = reader->nextInsert())) {
- FieldRef::FieldRefTempAppend tmpAppend(*fieldRef,
- nextMod->fieldNameStringData());
- updatedFields->addField(fieldRef->dottedField(), Value(*nextMod));
- appendIfDottedField(fieldRef, &currentDottedFieldNames);
+ builder->addToUpdatedFields(nextMod->fieldNameStringData(), Value(*nextMod));
}
- boost::optional<StringData> nextDelete;
- while ((nextDelete = reader->nextDelete())) {
- FieldRef::FieldRefTempAppend tmpAppend(*fieldRef, *nextDelete);
- removedFields->push_back(Value(fieldRef->dottedField()));
- appendIfDottedField(fieldRef, &currentDottedFieldNames);
+ while (auto nextDelete = reader->nextDelete()) {
+ builder->addToRemovedFields(*nextDelete);
}
- boost::optional<
- std::pair<StringData, stdx::variant<DocumentDiffReader, ArrayDiffReader>>>
- nextSubDiff;
- while ((nextSubDiff = reader->nextSubDiff())) {
- FieldRef::FieldRefTempAppend tmpAppend(*fieldRef, nextSubDiff->first);
- appendIfDottedField(fieldRef, &currentDottedFieldNames);
-
+ while (auto nextSubDiff = reader->nextSubDiff()) {
stdx::variant<DocumentDiffReader*, ArrayDiffReader*> nextReader;
stdx::visit(visit_helper::Overloaded{[&nextReader](auto& reader) {
nextReader = &reader;
}},
nextSubDiff->second);
- buildUpdateDescriptionWithDeltaOplog(nextReader,
- fieldRef,
- updatedFields,
- removedFields,
- truncatedArrays,
- arrayIndices,
- dottedFields);
- }
-
- // Now that we have iterated through all fields at this level of the diff, add any
- // dotted fieldnames we encountered into the 'dottedFields' output document.
- if (!currentDottedFieldNames.empty()) {
- dottedFields->addField(fieldRef->dottedField(),
- Value(std::move(currentDottedFieldNames)));
+ buildUpdateDescriptionWithDeltaOplog(
+ nextReader, builder, {{nextSubDiff->first}});
}
},
[&](ArrayDiffReader* reader) {
- // ArrayDiffReader can not be the root of the diff object, so 'fieldRef' should not
- // be empty.
- invariant(!fieldRef->empty());
-
- const auto newSize = reader->newSize();
- if (newSize) {
- const int sz = *newSize;
- truncatedArrays->push_back(
- Value(Document{{"field", fieldRef->dottedField()}, {"newSize", sz}}));
+ // Cannot be the root of the diff object, so 'fieldRef' should not be empty.
+ tassert(6697700, "Invalid diff or parsing error", builder->numParts() > 0);
+
+ // We don't need to add a fieldname, since we already descended into the array diff.
+ if (auto newSize = reader->newSize()) {
+ builder->addToTruncatedArrays(*newSize);
}
- // Used to track the array indices at the current level of the diff.
- std::vector<Value> currentArrayIndices;
for (auto nextMod = reader->next(); nextMod; nextMod = reader->next()) {
- const auto& fieldName = std::to_string(nextMod->first);
- FieldRef::FieldRefTempAppend tmpAppend(*fieldRef, fieldName);
-
- currentArrayIndices.push_back(Value(static_cast<int>(nextMod->first)));
-
stdx::visit(
visit_helper::Overloaded{
[&](BSONElement elem) {
- updatedFields->addField(fieldRef->dottedField(), Value(elem));
+ builder->addToUpdatedFields(nextMod->first, Value(elem));
},
[&](auto& nextReader) {
- buildUpdateDescriptionWithDeltaOplog(&nextReader,
- fieldRef,
- updatedFields,
- removedFields,
- truncatedArrays,
- arrayIndices,
- dottedFields);
+ buildUpdateDescriptionWithDeltaOplog(
+ &nextReader, builder, {{nextMod->first}});
},
},
nextMod->second);
}
-
- // Now that we have iterated through all fields at this level of the diff, add all
- // the array indices we encountered into the 'arrayIndices' output document.
- if (!currentArrayIndices.empty()) {
- arrayIndices->addField(fieldRef->dottedField(),
- Value(std::move(currentArrayIndices)));
- }
},
},
reader);
@@ -160,25 +245,12 @@ void buildUpdateDescriptionWithDeltaOplog(
namespace change_stream_document_diff_parser {
DeltaUpdateDescription parseDiff(const Diff& diff) {
- DeltaUpdateDescription updatedDesc;
- MutableDocument updatedFields;
- MutableDocument dottedFields;
- MutableDocument arrayIndices;
+ DeltaUpdateDescriptionBuilder builder;
DocumentDiffReader docReader(diff);
- stdx::variant<DocumentDiffReader*, ArrayDiffReader*> reader = &docReader;
- FieldRef path;
- buildUpdateDescriptionWithDeltaOplog(reader,
- &path,
- &updatedFields,
- &updatedDesc.removedFields,
- &updatedDesc.truncatedArrays,
- &arrayIndices,
- &dottedFields);
- updatedDesc.updatedFields = updatedFields.freeze();
- updatedDesc.arrayIndices = arrayIndices.freeze();
- updatedDesc.dottedFields = dottedFields.freeze();
-
- return updatedDesc;
+
+ buildUpdateDescriptionWithDeltaOplog(&docReader, &builder, boost::none);
+
+ return builder.freezeDeltaUpdateDescription();
}
} // namespace change_stream_document_diff_parser
diff --git a/src/mongo/db/pipeline/change_stream_document_diff_parser.h b/src/mongo/db/pipeline/change_stream_document_diff_parser.h
index e9a2864fe91..5e871c177e6 100644
--- a/src/mongo/db/pipeline/change_stream_document_diff_parser.h
+++ b/src/mongo/db/pipeline/change_stream_document_diff_parser.h
@@ -36,11 +36,14 @@
namespace mongo {
namespace change_stream_document_diff_parser {
struct DeltaUpdateDescription {
+ DeltaUpdateDescription(const DeltaUpdateDescription& other) = delete;
+ DeltaUpdateDescription(DeltaUpdateDescription&& other) = default;
+ DeltaUpdateDescription() = default;
+
Document updatedFields;
std::vector<Value> removedFields;
std::vector<Value> truncatedArrays;
- Document arrayIndices;
- Document dottedFields;
+ Document disambiguatedPaths;
};
/**
diff --git a/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp b/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp
index 0f5b3506703..fa2bae02a6f 100644
--- a/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp
+++ b/src/mongo/db/pipeline/change_stream_document_diff_parser_test.cpp
@@ -30,7 +30,6 @@
#include "mongo/db/pipeline/change_stream_document_diff_parser.h"
-
#include "mongo/bson/json.h"
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/exec/document_value/document_value_test_util.h"
@@ -41,110 +40,218 @@
namespace mongo {
namespace {
-
-TEST(ChangeStreamDocumentDiffParserTest, DottedFieldsInsideArrays) {
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesDottedFields) {
BSONObj diff = fromjson(
"{"
- " 'sarr.F.i.eld..': {a: true, l: 10,"
- " u0: 1,"
- " u1: {'a.b.c': {'a.b': 1}},"
- " s6: {u: {"
- " 'a.b.d': {'a.b.c': 3},"
- " 'a.b': {d: {'a.b': 1}}"
- " }}"
+ " u: {'a.b': 1},"
+ " 'sc.d.': {"
+ " u: {'e': 1, 'f.g': 1},"
+ " 'sh': {"
+ " u: {'i.j': 1}"
+ " }"
+ " },"
+ " 'sk': {"
+ " u: {'l.m': 1}"
" }"
"}");
auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
- ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields,
- Document(fromjson("{'arr.F.i.eld...0': 1, 'arr.F.i.eld...1': {'a.b.c': "
- "{'a.b': 1}}, 'arr.F.i.eld...6.a.b.d': {'a.b.c': 3}, "
- "'arr.F.i.eld...6.a.b': {d: {'a.b': 1}}}")));
+ ASSERT_DOCUMENT_EQ(
+ parsedDiff.updatedFields,
+ Document(fromjson("{'a.b': 1, 'c.d..e': 1, 'c.d..f.g': 1, 'c.d..h.i.j': 1, 'k.l.m': 1}")));
ASSERT_DOCUMENT_EQ(
- parsedDiff.dottedFields,
- Document(fromjson("{'arr.F.i.eld...6': ['a.b.d', 'a.b'], '': ['arr.F.i.eld..']}")));
+ parsedDiff.disambiguatedPaths,
+ Document(fromjson("{'a.b': ['a.b'], 'c.d..e': ['c.d.', 'e'], 'c.d..f.g': ['c.d.', 'f.g'], "
+ "'c.d..h.i.j': ['c.d.', 'h', 'i.j'], 'k.l.m': ['k', 'l.m']}")));
ASSERT(parsedDiff.removedFields.empty());
- ASSERT_EQ(parsedDiff.truncatedArrays.size(), 1);
- ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[0],
- Value(fromjson("{field: 'arr.F.i.eld..', newSize: 10}")));
+ ASSERT(parsedDiff.truncatedArrays.empty());
+}
- ASSERT_DOCUMENT_EQ(parsedDiff.arrayIndices, Document(fromjson("{'arr.F.i.eld..': [0, 1, 6]}")));
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesNumericFields) {
+ BSONObj diff = fromjson(
+ "{"
+ " 'sa': {"
+ " u: {'0': 1}"
+ " }"
+ "}");
+
+ auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+ ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.0': 1}")));
+
+ ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths, Document(fromjson("{'a.0': ['a', '0']}")));
+
+ ASSERT(parsedDiff.removedFields.empty());
+ ASSERT(parsedDiff.truncatedArrays.empty());
}
-TEST(ChangeStreamDocumentDiffParserTest, DottedFieldsInsideObjects) {
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesNumericFieldsFromArrayIndices) {
BSONObj diff = fromjson(
"{"
- " 'sobject.F.i.eld..': {"
- " u: {'0.0.0': 1, '1.1.1': {'0.0': 1}},"
- " s6: {'s7.8': {'s9.10': {"
- " u: {'a.b.d': {'a.b.c': 3}}"
- " }}}"
+ " 'sa': {"
+ " 's0': {a: true, u0: 1}"
" }"
"}");
auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
- ASSERT_DOCUMENT_EQ(
- parsedDiff.updatedFields,
- Document(fromjson("{'object.F.i.eld...0.0.0': 1, 'object.F.i.eld...1.1.1': {'0.0': 1},"
- "'object.F.i.eld...6.7.8.9.10.a.b.d': {'a.b.c': 3} }")));
+ ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.0.0': 1}")));
- ASSERT_DOCUMENT_EQ(parsedDiff.dottedFields,
- Document(fromjson("{'object.F.i.eld...6.7.8.9.10': ['a.b.d'], "
- "'object.F.i.eld...6.7.8': ['9.10'], "
- "'object.F.i.eld...6': ['7.8'], "
- "'object.F.i.eld..': ['0.0.0', '1.1.1'],"
- "'': ['object.F.i.eld..']}")));
+ ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths,
+ Document(fromjson("{'a.0.0': ['a', '0', 0]}")));
ASSERT(parsedDiff.removedFields.empty());
ASSERT(parsedDiff.truncatedArrays.empty());
- ASSERT(parsedDiff.arrayIndices.empty());
}
-TEST(ChangeStreamDocumentDiffParserTest, PathToArrayFields) {
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotDisambiguateNumericFieldAtRootOfDocument) {
BSONObj diff = fromjson(
"{"
- " 'sarr.F.i.eld..': {a: true, l: 10,"
- " u0: 1,"
- " s6: {a: true, s1: {"
- " 's0.0': {a: true, u0: 1}"
- " }}"
+ " u: {'0': 1}"
+ "}");
+
+ auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+ ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'0': 1}")));
+
+ // A numeric field at the root of the document is unambiguous; it must be a fieldname and cannot
+ // be an array index, since by definition the latter must index a parent field.
+ ASSERT(parsedDiff.disambiguatedPaths.empty());
+
+ ASSERT(parsedDiff.removedFields.empty());
+ ASSERT(parsedDiff.truncatedArrays.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotDisambiguateNumericFieldWithLeadingZeroes) {
+ BSONObj diff = fromjson(
+ "{"
+ " 'sa': {u: {'01': 1}}"
+ "}");
+
+ auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+ ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.01': 1}")));
+
+ // A numeric field with leading zeroes is unambiguous; it must be a fieldname and cannot be an
+ // array index, since array indexes are simple integers that do not have leading zeroes.
+ ASSERT(parsedDiff.disambiguatedPaths.empty());
+
+ ASSERT(parsedDiff.removedFields.empty());
+ ASSERT(parsedDiff.truncatedArrays.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotDisambiguateIfOnlyArrayIndicesPresent) {
+ BSONObj diff = fromjson(
+ "{"
+ " 'sa': {a: true,"
+ " s0: {u: {'b': 1}}"
" }"
"}");
auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
- ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields,
- Document(fromjson("{'arr.F.i.eld...0': 1, 'arr.F.i.eld...6.1.0.0.0': 1}")));
+ ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, Document(fromjson("{'a.0.b': 1}")));
+
+ ASSERT(parsedDiff.disambiguatedPaths.empty());
+ ASSERT(parsedDiff.removedFields.empty());
+ ASSERT(parsedDiff.truncatedArrays.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesRemovedFields) {
+ BSONObj diff = fromjson(
+ "{"
+ " d: {'a.b': false},"
+ " 'sc': {"
+ " d: {'0': false}"
+ " }"
+ "}");
+
+ auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+ ASSERT(parsedDiff.removedFields.size() == 2);
+ ASSERT_VALUE_EQ(parsedDiff.removedFields[0], Value("a.b"_sd));
+ ASSERT_VALUE_EQ(parsedDiff.removedFields[1], Value("c.0"_sd));
+
+ ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths,
+ Document(fromjson("{'a.b': ['a.b'], 'c.0': ['c', '0']}")));
+
+ ASSERT(parsedDiff.updatedFields.empty());
+ ASSERT(parsedDiff.truncatedArrays.empty());
+}
- ASSERT_DOCUMENT_EQ(parsedDiff.dottedFields,
- Document(fromjson("{'arr.F.i.eld...6.1': ['0.0'], '': ['arr.F.i.eld..']}")));
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesTruncatedArrays) {
+ BSONObj diff = fromjson(
+ "{"
+ " 'sa.b': {a: true, l: 5},"
+ " 'sc': {"
+ " 's0': {a: true, l: 5}"
+ " }"
+ "}");
+
+ auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
+
+ ASSERT(parsedDiff.truncatedArrays.size() == 2);
+ ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[0], Value(fromjson("{field: 'a.b', newSize: 5}")));
+ ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[1], Value(fromjson("{field: 'c.0', newSize: 5}")));
+
+ ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths,
+ Document(fromjson("{'a.b': ['a.b'], 'c.0': ['c', '0']}")));
+ ASSERT(parsedDiff.updatedFields.empty());
ASSERT(parsedDiff.removedFields.empty());
+}
+
+TEST(ChangeStreamDocumentDiffParserTest, DisambiguatesCombinationOfAmbiguousFields) {
+ // Array and numeric field within dotted parent, dotted and numeric fields within array, dotted
+ // field and array within numeric parent.
+ BSONObj diff = fromjson(
+ "{"
+ " 'sa.b': {a: true,"
+ " 's0': {u: {'1': 1}}"
+ " },"
+ " 'sc': {a: true,"
+ " 's0': {u: {'d.e': 1}},"
+ " 's1': {u: {'2': 1}}"
+ " },"
+ " 'sf': {"
+ " 's1': {"
+ " u: {'g.h': 1},"
+ " 's2': {a: true,"
+ " u3: 1,"
+ " s4: {u: {'5': 1}}"
+ " }"
+ " }"
+ " }"
+ "}");
+
+ auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
- ASSERT_EQ(parsedDiff.truncatedArrays.size(), 1);
- ASSERT_VALUE_EQ(parsedDiff.truncatedArrays[0],
- Value(fromjson("{field: 'arr.F.i.eld..', newSize: 10}")));
+ ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields,
+ Document(fromjson("{'a.b.0.1': 1, 'c.0.d.e': 1, 'c.1.2': 1, 'f.1.g.h': 1, "
+ "'f.1.2.3': 1, 'f.1.2.4.5': 1}")));
ASSERT_DOCUMENT_EQ(
- parsedDiff.arrayIndices,
- Document(fromjson(
- "{'arr.F.i.eld...6.1.0.0': [0], 'arr.F.i.eld...6': [1], 'arr.F.i.eld..': [0, 6]}")));
+ parsedDiff.disambiguatedPaths,
+ Document(fromjson("{'a.b.0.1': ['a.b', 0, '1'], 'c.0.d.e': ['c', 0, 'd.e'], 'c.1.2': ['c', "
+ "1, '2'], 'f.1.g.h': ['f', '1', 'g.h'], 'f.1.2.3': ['f', '1', '2', 3], "
+ "'f.1.2.4.5': ['f', '1', '2', 4, '5']}")));
+
+ ASSERT(parsedDiff.removedFields.empty());
+ ASSERT(parsedDiff.truncatedArrays.empty());
}
-TEST(ChangeStreamDocumentDiffParserTest, WithDuplicateFieldsInDiff) {
+TEST(ChangeStreamDocumentDiffParserTest, DoesNotFullyDisambiguateWithDuplicateFieldsInDiff) {
BSONObj diff = fromjson("{u: {'a.b' : 2}, sa : {u: {b: 1 }}}");
auto parsedDiff = change_stream_document_diff_parser::parseDiff(diff);
auto expectedUpdateFields = Document{{"a.b", 2}, {"a.b", 1}};
ASSERT_DOCUMENT_EQ(parsedDiff.updatedFields, expectedUpdateFields);
- ASSERT_DOCUMENT_EQ(parsedDiff.dottedFields, Document(fromjson("{'': ['a.b']}")));
+ ASSERT_DOCUMENT_EQ(parsedDiff.disambiguatedPaths, Document(fromjson("{'a.b': ['a.b']}")));
ASSERT(parsedDiff.removedFields.empty());
ASSERT(parsedDiff.truncatedArrays.empty());
- ASSERT(parsedDiff.arrayIndices.empty());
}
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/change_stream_event_transform.cpp b/src/mongo/db/pipeline/change_stream_event_transform.cpp
index 7b21e860899..f41ae425423 100644
--- a/src/mongo/db/pipeline/change_stream_event_transform.cpp
+++ b/src/mongo/db/pipeline/change_stream_event_transform.cpp
@@ -213,21 +213,18 @@ Document ChangeStreamDefaultEventTransformation::applyTransformation(const Docum
if (_changeStreamSpec.getShowRawUpdateDescription()) {
updateDescription = input[repl::OplogEntry::kObjectFieldName];
} else {
- const auto populateSpecialFields = _changeStreamSpec.getShowExpandedEvents() &&
+ const auto showDisambiguatedPaths = _changeStreamSpec.getShowExpandedEvents() &&
feature_flags::gFeatureFlagChangeStreamsFurtherEnrichedEvents.isEnabled(
serverGlobalParams.featureCompatibility);
const auto& deltaDesc = change_stream_document_diff_parser::parseDiff(
diffObj.getDocument().toBson());
- updateDescription = Value(
- Document{{"updatedFields", deltaDesc.updatedFields},
- {"removedFields", std::move(deltaDesc.removedFields)},
- {"truncatedArrays", std::move(deltaDesc.truncatedArrays)},
- {"specialFields",
- populateSpecialFields
- ? Value(Document{{"arrayIndices", deltaDesc.arrayIndices},
- {"dottedFields", deltaDesc.dottedFields}})
- : Value()}});
+ updateDescription = Value(Document{
+ {"updatedFields", deltaDesc.updatedFields},
+ {"removedFields", std::move(deltaDesc.removedFields)},
+ {"truncatedArrays", std::move(deltaDesc.truncatedArrays)},
+ {"disambiguatedPaths",
+ showDisambiguatedPaths ? Value(deltaDesc.disambiguatedPaths) : Value()}});
}
} else if (id.missing()) {
operationType = DocumentSourceChangeStream::kUpdateOpType;
diff --git a/src/mongo/db/pipeline/change_stream_event_transform_test.cpp b/src/mongo/db/pipeline/change_stream_event_transform_test.cpp
index 3123c26d160..e0060b7d7c7 100644
--- a/src/mongo/db/pipeline/change_stream_event_transform_test.cpp
+++ b/src/mongo/db/pipeline/change_stream_event_transform_test.cpp
@@ -92,8 +92,7 @@ TEST(ChangeStreamEventTransformTest, TestDefaultUpdateTransform) {
Document{{"updatedFields", Document{{"y", 2}}},
{"removedFields", std::vector<Value>()},
{"truncatedArrays", std::vector<Value>()},
- {"specialFields",
- Document{{"arrayIndices", Document()}, {"dottedFields", Document()}}}},
+ {"disambiguatedPaths", Document{}}},
},
};
@@ -197,25 +196,15 @@ TEST(ChangeStreamEventTransformTest, TestUpdateTransformWithTenantId) {
// in the oplog entry. It should still not be a part of the db name in the change event.
RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
- // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
- // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
- // including the tenantId as the db prefix in the OplogEntry's "ns" field. Until SERVER-66019 is
- // complete, the tenantId will be included in both the "tid" field and "ns" fields in serialized
- // oplog entries, because serializing NamespaceString currently will include the tenantId.
- auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
- << "u"
- << "ns"
- << "unittests.serverless_change_stream"
- << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
- << "o" << BSON("$v" << 2 << "diff" << BSON("u" << BSON("y" << 2)))
- << "o2" << documentKey.toBson());
-
- DocumentSourceChangeStreamSpec spec;
- spec.setStartAtOperationTime(kDefaultTs);
- ChangeStreamEventTransformer transformer(
- make_intrusive<ExpressionContextForTest>(nssWithTenant), spec);
+ auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kUpdate, // op type
+ nssWithTenant, // namespace
+ BSON("$v" << 2 << "diff" << BSON("u" << BSON("y" << 2))), // o
+ testUuid(), // uuid
+ boost::none, // fromMigrate
+ documentKey.toBson() // o2
+ );
- changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+ changeStreamDoc = applyTransformation(updateField, nssWithTenant);
outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
@@ -261,30 +250,14 @@ TEST(ChangeStreamEventTransformTest, TestRenameTransformWithTenantId) {
// in the oplog entry. It should still not be a part of the db name in the change event.
RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
- // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
- // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
- // including the tenantId as the db prefix in the OplogEntry's "ns", "renameCollection", and
- // "to" fields. Until SERVER-66019 is complete, the tenantId will be included in both the "tid"
- // field and these 3 fields in serialized oplog entries, because serializing NamespaceString
- // currently will include the tenantId.
- auto oplogEntry =
- BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
- << "c"
- << "ns"
- << "unittests.$cmd"
- << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid() << "o"
- << BSON("renameCollection"
- << "unittests.serverless_change_stream"
- << "to"
- << "unittests.rename_coll"));
-
- DocumentSourceChangeStreamSpec spec;
- spec.setStartAtOperationTime(kDefaultTs);
- spec.setShowExpandedEvents(true);
- ChangeStreamEventTransformer transformer(make_intrusive<ExpressionContextForTest>(renameFrom),
- spec);
+ auto oplogEntry = makeOplogEntry(
+ repl::OpTypeEnum::kCommand, // op type
+ renameFrom.getCommandNS(), // namespace
+ BSON("renameCollection" << renameFrom.toString() << "to" << renameTo.toString()), // o
+ testUuid() // uuid
+ );
- changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+ changeStreamDoc = applyTransformation(oplogEntry, renameFrom);
renameDoc = Document{
{DocumentSourceChangeStream::kNamespaceField,
changeStreamDoc.getField(DocumentSourceChangeStream::kNamespaceField)},
@@ -322,24 +295,13 @@ TEST(ChangeStreamEventTransformTest, TestDropDatabaseTransformWithTenantId) {
// in the oplog entry. It should still not be a part of the db name in the change event.
RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
- // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
- // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
- // including the tenantId as the db prefix in the OplogEntry's "ns" field Until SERVER-66019 is
- // complete, the tenantId will be included in both the "tid" and "ns" fields in serialized oplog
- // entries, because serializing NamespaceString currently will include the tenantId.
- auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
- << "c"
- << "ns"
- << "unittests.$cmd"
- << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
- << "o" << BSON("dropDatabase" << 1));
-
- DocumentSourceChangeStreamSpec spec;
- spec.setStartAtOperationTime(kDefaultTs);
- ChangeStreamEventTransformer transformer(make_intrusive<ExpressionContextForTest>(dbToDrop),
- spec);
+ auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kCommand, // op type
+ dbToDrop.getCommandNS(), // namespace
+ BSON("dropDatabase" << 1), // o
+ testUuid() // uuid
+ );
- changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+ changeStreamDoc = applyTransformation(oplogEntry, dbToDrop);
outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
@@ -372,25 +334,13 @@ TEST(ChangeStreamEventTransformTest, TestCreateTransformWithTenantId) {
// in the oplog entry. It should still not be a part of the db name in the change event.
RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
- // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
- // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
- // including the tenantId as the db prefix in the OplogEntry's "ns" field Until SERVER-66019 is
- // complete, the tenantId will be included in both the "tid" and "ns" fields in serialized oplog
- // entries, because serializing NamespaceString currently will include the tenantId.
- auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
- << "c"
- << "ns"
- << "unittests.$cmd"
- << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
- << "o" << BSON("create" << nssWithTenant.coll()));
-
- DocumentSourceChangeStreamSpec spec;
- spec.setStartAtOperationTime(kDefaultTs);
- spec.setShowExpandedEvents(true);
- ChangeStreamEventTransformer transformer(
- make_intrusive<ExpressionContextForTest>(nssWithTenant), spec);
+ auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kCommand, // op type
+ nssWithTenant.getCommandNS(), // namespace
+ BSON("create" << nssWithTenant.coll()), // o
+ testUuid() // uuid
+ );
- changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+ changeStreamDoc = applyTransformation(oplogEntry, nssWithTenant);
outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
@@ -424,37 +374,19 @@ TEST(ChangeStreamEventTransformTest, TestCreateViewTransformWithTenantId) {
ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
-
// Now set featureFlagRequireTenantId, so we expect the tenantId to be in a separate "tid" field
// in the oplog entry. It should still not be a part of the db name in the change event.
RAIIServerParameterControllerForTest featureFlagController("featureFlagRequireTenantID", true);
- // TODO SERVER-66019 Construct OplogEntry using makeOplogEntry and use the applyTransformation
- // helper defined above. We manually construct the OplogEntry as a BSON object below to avoid
- // including the tenantId as the db prefix in the OplogEntry's "ns" and "o._id" fields. Until
- // SERVER-66019 is complete, the tenantId will be included in both the "tid" field and these 2
- // fields in serialized oplog entries, because serializing NamespaceString currently will
- // include the tenantId.
- auto oplogEntry = BSON("ts" << Timestamp(0, 0) << "t" << 0LL << "op"
- << "i"
- << "ns"
- << "viewDB.system.views"
- << "tid" << tenantId << "wall" << Date_t() << "ui" << testUuid()
- << "o"
- << BSON("_id"
- << "viewDB.view.name"
- << "viewOn"
- << "baseColl"
- << "pipeline" << viewPipeline));
-
- DocumentSourceChangeStreamSpec spec;
- spec.setStartAtOperationTime(kDefaultTs);
- ChangeStreamEventTransformer transformer(
- make_intrusive<ExpressionContextForTest>(
- NamespaceString::makeCollectionlessAggregateNSS(viewNss.dbName())),
- spec);
+ auto oplogEntry = makeOplogEntry(repl::OpTypeEnum::kInsert, // op type
+ systemViewNss, // namespace
+ BSON("_id" << viewNss.toString() << "viewOn"
+ << "baseColl"
+ << "pipeline" << viewPipeline), // o
+ testUuid());
- changeStreamDoc = transformer.applyTransformation(Document(oplogEntry));
+ changeStreamDoc = applyTransformation(
+ oplogEntry, NamespaceString::makeCollectionlessAggregateNSS(viewNss.dbName()));
outputNs = changeStreamDoc[DocumentSourceChangeStream::kNamespaceField].getDocument();
ASSERT_DOCUMENT_EQ(outputNs, expectedNamespace);
diff --git a/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp b/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp
index f153a30818f..e4cbb6032ae 100644
--- a/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp
+++ b/src/mongo/db/pipeline/change_stream_pre_image_helpers.cpp
@@ -32,35 +32,49 @@
#include "mongo/db/pipeline/change_stream_pre_image_helpers.h"
+#include "mongo/base/error_codes.h"
+#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/concurrency/lock_manager_defs.h"
#include "mongo/db/concurrency/locker.h"
-#include "mongo/db/dbhelpers.h"
+#include "mongo/db/curop.h"
#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
#include "mongo/util/assert_util.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
-
namespace mongo {
void writeToChangeStreamPreImagesCollection(OperationContext* opCtx,
const ChangeStreamPreImage& preImage) {
- const auto collectionNamespace = NamespaceString::kChangeStreamPreImagesNamespace;
+ tassert(6646200,
+ "Expected to be executed in a write unit of work",
+ opCtx->lockState()->inAWriteUnitOfWork());
tassert(5869404,
str::stream() << "Invalid pre-image document applyOpsIndex: "
<< preImage.getId().getApplyOpsIndex(),
preImage.getId().getApplyOpsIndex() >= 0);
- // This lock acquisition can block on a stronger lock held by another operation modifying the
- // pre-images collection. There are no known cases where an operation holding an exclusive lock
- // on the pre-images collection also waits for oplog visibility.
+ // This lock acquisition can block on a stronger lock held by another operation modifying
+ // the pre-images collection. There are no known cases where an operation holding an
+ // exclusive lock on the pre-images collection also waits for oplog visibility.
AllowLockAcquisitionOnTimestampedUnitOfWork allowLockAcquisition(opCtx->lockState());
- AutoGetCollection preimagesCollectionRaii(opCtx, collectionNamespace, LockMode::MODE_IX);
- UpdateResult res = Helpers::upsert(opCtx, collectionNamespace.toString(), preImage.toBSON());
+ AutoGetCollection preImagesCollectionRaii(
+ opCtx, NamespaceString::kChangeStreamPreImagesNamespace, LockMode::MODE_IX);
+ auto& changeStreamPreImagesCollection = preImagesCollectionRaii.getCollection();
+ tassert(6646201,
+ "The change stream pre-images collection is not present",
+ changeStreamPreImagesCollection);
+
+ // Inserts into the change stream pre-images collection are not replicated.
+ repl::UnreplicatedWritesBlock unreplicatedWritesBlock{opCtx};
+ const auto insertionStatus = changeStreamPreImagesCollection->insertDocument(
+ opCtx, InsertStatement{preImage.toBSON()}, &CurOp::get(opCtx)->debug());
tassert(5868601,
- str::stream() << "Failed to insert a new document into the pre-images collection: ts: "
- << preImage.getId().getTs().toString()
- << ", applyOpsIndex: " << preImage.getId().getApplyOpsIndex(),
- !res.existing && !res.upsertedId.isEmpty());
+ str::stream() << "Attempted to insert a duplicate document into the pre-images "
+ "collection. Pre-image id: "
+ << preImage.getId().toBSON().toString(),
+ insertionStatus != ErrorCodes::DuplicateKey);
+ uassertStatusOK(insertionStatus);
}
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies.cpp b/src/mongo/db/pipeline/dependencies.cpp
index 8b60a31637c..d2a5563c7c7 100644
--- a/src/mongo/db/pipeline/dependencies.cpp
+++ b/src/mongo/db/pipeline/dependencies.cpp
@@ -37,6 +37,13 @@
namespace mongo {
+std::list<std::string> DepsTracker::sortedFields() const {
+ // Use a special comparator to put parent fieldpaths before their children.
+ std::list<std::string> sortedFields(fields.begin(), fields.end());
+ sortedFields.sort(PathPrefixComparator());
+ return sortedFields;
+}
+
BSONObj DepsTracker::toProjectionWithoutMetadata(
TruncateToRootLevel truncationBehavior /*= TruncateToRootLevel::no*/) const {
BSONObjBuilder bb;
@@ -52,17 +59,21 @@ BSONObj DepsTracker::toProjectionWithoutMetadata(
return bb.obj();
}
+ // Go through dependency fieldpaths to find the minimal set of projections that cover the
+ // dependencies. For example, the dependencies ["a.b", "a.b.c.g", "c", "c.d", "f"] would be
+ // minimally covered by the projection {"a.b": 1, "c": 1, "f": 1}. The key operation here is
+ // folding dependencies into ancestor dependencies, wherever possible. This is assisted by a
+ // special sort in DepsTracker::sortedFields that treats '.' as the first char and thus places
+ // parent paths directly before their children.
bool idSpecified = false;
std::string last;
- for (const auto& field : fields) {
+ for (const auto& field : sortedFields()) {
if (str::startsWith(field, "_id") && (field.size() == 3 || field[3] == '.')) {
idSpecified = true;
}
if (!last.empty() && str::startsWith(field, last)) {
- // we are including a parent of *it so we don't need to include this field
- // explicitly. This logic relies on on set iterators going in lexicographic order so
- // that a string is always directly before of all fields it prefixes.
+ // We are including a parent of this field, so we can skip this field.
continue;
}
@@ -96,4 +107,36 @@ void DepsTracker::setNeedsMetadata(DocumentMetadataFields::MetaType type, bool r
invariant(required || !_metadataDeps[type]);
_metadataDeps[type] = required;
}
+
+// Returns true if the lhs value should sort before the rhs, false otherwise.
+bool PathPrefixComparator::operator()(const std::string& lhs, const std::string& rhs) const {
+ constexpr char dot = '.';
+
+ for (size_t pos = 0, len = std::min(lhs.size(), rhs.size()); pos < len; ++pos) {
+ // Below, we explicitly choose unsigned char because the usual const char& returned by
+ // operator[] is actually signed on x86 and will incorrectly order unicode characters.
+ unsigned char lchar = lhs[pos], rchar = rhs[pos];
+ if (lchar == rchar) {
+ continue;
+ }
+
+ // Consider the path delimiter '.' as being less than all other characters, so that
+ // paths sort directly before any paths they prefix and directly after any paths
+ // which prefix them.
+ if (lchar == dot) {
+ return true;
+ } else if (rchar == dot) {
+ return false;
+ }
+
+ // Otherwise, default to normal character comparison.
+ return lchar < rchar;
+ }
+
+ // If we get here, then we have reached the end of lhs and/or rhs and all of their path
+ // segments up to this point match. If lhs is shorter than rhs, then lhs prefixes rhs
+ // and should sort before it.
+ return lhs.size() < rhs.size();
+}
+
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies.h b/src/mongo/db/pipeline/dependencies.h
index bda3bf9b243..3c892de8181 100644
--- a/src/mongo/db/pipeline/dependencies.h
+++ b/src/mongo/db/pipeline/dependencies.h
@@ -184,6 +184,11 @@ struct DepsTracker {
}
}
+ /**
+ * Return fieldpaths ordered such that a parent is immediately before its children.
+ */
+ std::list<std::string> sortedFields() const;
+
std::set<std::string> fields; // Names of needed fields in dotted notation.
std::set<Variables::Id> vars; // IDs of referenced variables.
bool needWholeDocument = false; // If true, ignore 'fields'; the whole document is needed.
@@ -201,4 +206,13 @@ private:
// dependency analysis.
QueryMetadataBitSet _metadataDeps;
};
+
+
+/** Custom comparator that orders fieldpath strings by path prefix first, then by field.
+ * This ensures that a parent field is ordered directly before its children.
+ */
+struct PathPrefixComparator {
+ /* Returns true if the lhs value should sort before the rhs, false otherwise. */
+ bool operator()(const std::string& lhs, const std::string& rhs) const;
+};
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dependencies_test.cpp b/src/mongo/db/pipeline/dependencies_test.cpp
index f366ad3ce1d..938130b91bd 100644
--- a/src/mongo/db/pipeline/dependencies_test.cpp
+++ b/src/mongo/db/pipeline/dependencies_test.cpp
@@ -162,6 +162,13 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldEvenIfSuffixOfAnotherFieldW
BSON("a" << 1 << "ab" << 1 << "_id" << 0));
}
+TEST(DependenciesToProjectionTest, ExcludeIndirectDescendants) {
+ const char* array[] = {"a.b", "_id", "a.b.c.d.e"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(), BSON("_id" << 1 << "a.b" << 1));
+}
+
TEST(DependenciesToProjectionTest, ShouldIncludeIdIfNeeded) {
const char* array[] = {"a", "_id"};
DepsTracker deps;
@@ -199,6 +206,27 @@ TEST(DependenciesToProjectionTest, ShouldIncludeFieldPrefixedByIdWhenIdSubfieldI
BSON("_id.a" << 1 << "_id_a" << 1 << "a" << 1));
}
+// SERVER-66418
+TEST(DependenciesToProjectionTest, ChildCoveredByParentWithSpecialChars) {
+ // without "_id"
+ {
+ // This is an important test case because '-' is one of the few chars before '.' in utf-8.
+ const char* array[] = {"a", "a-b", "a.b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(),
+ BSON("a" << 1 << "a-b" << 1 << "_id" << 0));
+ }
+ // with "_id"
+ {
+ const char* array[] = {"_id", "a", "a-b", "a.b"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ ASSERT_BSONOBJ_EQ(deps.toProjectionWithoutMetadata(),
+ BSON("_id" << 1 << "a" << 1 << "a-b" << 1));
+ }
+}
+
TEST(DependenciesToProjectionTest, ShouldOutputEmptyObjectIfEntireDocumentNeeded) {
const char* array[] = {"a"}; // fields ignored with needWholeDocument
DepsTracker deps;
@@ -259,5 +287,56 @@ TEST(DependenciesToProjectionTest,
ASSERT_TRUE(deps.metadataDeps()[DocumentMetadataFields::kTextScore]);
}
+TEST(DependenciesToProjectionTest, SortFieldPaths) {
+ const char* array[] = {"",
+ "A",
+ "_id",
+ "a",
+ "a.b",
+ "a.b.c",
+ "a.c",
+ // '-' char in utf-8 comes before '.' but our special fieldpath sort
+ // puts '.' first so that children directly follow their parents.
+ "a-b",
+ "a-b.ear",
+ "a-bear",
+ "a-bear.",
+ "a🌲",
+ "b",
+ "b.a"
+ "b.aa"
+ "b.🌲d"};
+ DepsTracker deps;
+ deps.fields = arrayToSet(array);
+ // our custom sort will restore the ordering above
+ std::list<std::string> fieldPathSorted = deps.sortedFields();
+ auto itr = fieldPathSorted.begin();
+ for (unsigned long i = 0; i < fieldPathSorted.size(); i++) {
+ ASSERT_EQ(*itr, array[i]);
+ ++itr;
+ }
+}
+
+TEST(DependenciesToProjectionTest, PathLessThan) {
+ auto lessThan = PathPrefixComparator();
+ ASSERT_FALSE(lessThan("a", "a"));
+ ASSERT_TRUE(lessThan("a", "aa"));
+ ASSERT_TRUE(lessThan("a", "b"));
+ ASSERT_TRUE(lessThan("", "a"));
+ ASSERT_TRUE(lessThan("Aa", "aa"));
+ ASSERT_TRUE(lessThan("a.b", "ab"));
+ ASSERT_TRUE(lessThan("a.b", "a-b")); // SERVER-66418
+ ASSERT_TRUE(lessThan("a.b", "a b")); // SERVER-66418
+ // verify the difference from the standard sort
+ ASSERT_TRUE(std::string("a.b") > std::string("a-b"));
+ ASSERT_TRUE(std::string("a.b") > std::string("a b"));
+ // test unicode behavior
+ ASSERT_TRUE(lessThan("a.b", "a🌲"));
+ ASSERT_TRUE(lessThan("a.b", "a🌲b"));
+ ASSERT_TRUE(lessThan("🌲", "🌳")); // U+1F332 < U+1F333
+ ASSERT_TRUE(lessThan("🌲", "🌲.b"));
+ ASSERT_FALSE(lessThan("🌲.b", "🌲"));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
index 069a7e2f0b2..effa178fc32 100644
--- a/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
+++ b/src/mongo/db/pipeline/dispatch_shard_pipeline_test.cpp
@@ -175,7 +175,7 @@ TEST_F(DispatchShardPipelineTest, DispatchShardPipelineDoesNotRetryOnStaleConfig
OID epoch{OID::gen()};
Timestamp timestamp{1, 0};
return createErrorCursorResponse({StaleConfigInfo(kTestAggregateNss,
- ChunkVersion(1, 0, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 0}),
boost::none,
ShardId{"0"}),
"Mock error: shard version mismatch"});
@@ -218,7 +218,7 @@ TEST_F(DispatchShardPipelineTest, WrappedDispatchDoesRetryOnStaleConfigError) {
// namespace, then mock out a successful response.
onCommand([&](const executor::RemoteCommandRequest& request) {
return createErrorCursorResponse({StaleConfigInfo(kTestAggregateNss,
- ChunkVersion(2, 0, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {2, 0}),
boost::none,
ShardId{"0"}),
"Mock error: shard version mismatch"});
@@ -227,7 +227,7 @@ TEST_F(DispatchShardPipelineTest, WrappedDispatchDoesRetryOnStaleConfigError) {
// Mock the expected config server queries.
const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
- ChunkVersion version(2, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {2, 0});
ChunkType chunk1(
uuid, {shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)}, version, {"0"});
diff --git a/src/mongo/db/pipeline/document_source_change_stream_test.cpp b/src/mongo/db/pipeline/document_source_change_stream_test.cpp
index fcd7c965b37..bc1707620e4 100644
--- a/src/mongo/db/pipeline/document_source_change_stream_test.cpp
+++ b/src/mongo/db/pipeline/document_source_change_stream_test.cpp
@@ -1421,7 +1421,7 @@ TEST_F(ChangeStreamStageTest, TransformReshardBegin) {
TEST_F(ChangeStreamStageTest, TransformReshardDoneCatchUpLegacyFormat) {
auto existingUuid = UUID::gen();
auto reshardingUuid = UUID::gen();
- auto temporaryNs = constructTemporaryReshardingNss(nss.db(), existingUuid);
+ auto temporaryNs = resharding::constructTemporaryReshardingNss(nss.db(), existingUuid);
const auto o2FieldInLegacyFormat = BSON("type"
<< "reshardDoneCatchUp"
@@ -1460,7 +1460,7 @@ TEST_F(ChangeStreamStageTest, TransformReshardDoneCatchUpLegacyFormat) {
TEST_F(ChangeStreamStageTest, TransformReshardDoneCatchUp) {
auto existingUuid = UUID::gen();
auto reshardingUuid = UUID::gen();
- auto temporaryNs = constructTemporaryReshardingNss(nss.db(), existingUuid);
+ auto temporaryNs = resharding::constructTemporaryReshardingNss(nss.db(), existingUuid);
ReshardDoneCatchUpChangeEventO2Field o2Field{temporaryNs, reshardingUuid};
auto reshardDoneCatchUp = makeOplogEntry(OpTypeEnum::kNoop,
diff --git a/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp b/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp
index 701c5b495a7..5d250101fa2 100644
--- a/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp
+++ b/src/mongo/db/pipeline/document_source_check_resume_token_test.cpp
@@ -167,7 +167,7 @@ public:
: DocumentSourceMock({}, expCtx), _collectionPtr(&_collection) {
_filterExpr = BSON("ns" << kTestNs);
_filter = MatchExpressionParser::parseAndNormalize(_filterExpr, pExpCtx);
- _params.assertTsHasNotFallenOffOplog = Timestamp(0);
+ _params.assertTsHasNotFallenOff = Timestamp(0);
_params.shouldTrackLatestOplogTimestamp = true;
_params.minRecord = RecordIdBound(RecordId(0));
_params.tailable = true;
@@ -178,7 +178,7 @@ public:
_filterExpr = BSON("ns" << kTestNs << "ts" << BSON("$gte" << resumeToken.clusterTime));
_filter = MatchExpressionParser::parseAndNormalize(_filterExpr, pExpCtx);
_params.minRecord = RecordIdBound(RecordId(resumeToken.clusterTime.asLL()));
- _params.assertTsHasNotFallenOffOplog = resumeToken.clusterTime;
+ _params.assertTsHasNotFallenOff = resumeToken.clusterTime;
}
void push_back(GetNextResult&& result) {
diff --git a/src/mongo/db/pipeline/document_source_cursor.cpp b/src/mongo/db/pipeline/document_source_cursor.cpp
index b98af917d99..c992288a0e4 100644
--- a/src/mongo/db/pipeline/document_source_cursor.cpp
+++ b/src/mongo/db/pipeline/document_source_cursor.cpp
@@ -33,6 +33,7 @@
#include "mongo/db/pipeline/document_source_cursor.h"
#include "mongo/db/catalog/collection.h"
+#include "mongo/db/db_raii.h"
#include "mongo/db/exec/document_value/document.h"
#include "mongo/db/exec/working_set_common.h"
#include "mongo/db/query/collection_query_info.h"
@@ -225,15 +226,20 @@ Value DocumentSourceCursor::serialize(boost::optional<ExplainOptions::Verbosity>
{
auto opCtx = pExpCtx->opCtx;
- auto lockMode = getLockModeForQuery(opCtx, _exec->nss());
- AutoGetDb dbLock(opCtx, _exec->nss().db(), lockMode);
- Lock::CollectionLock collLock(opCtx, _exec->nss(), lockMode);
- auto collection = dbLock.getDb()
- ? CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, _exec->nss())
- : nullptr;
+ auto secondaryNssList = _exec->getSecondaryNamespaces();
+ AutoGetCollectionForReadMaybeLockFree readLock(opCtx,
+ _exec->nss(),
+ AutoGetCollectionViewMode::kViewsForbidden,
+ Date_t::max(),
+ secondaryNssList);
+ MultipleCollectionAccessor collections(opCtx,
+ &readLock.getCollection(),
+ readLock.getNss(),
+ readLock.isAnySecondaryNamespaceAViewOrSharded(),
+ secondaryNssList);
Explain::explainStages(_exec.get(),
- collection,
+ collections,
verbosity.get(),
_execStatus,
_winningPlanTrialStats,
diff --git a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp
index e66c1484519..afe4d7cf6e9 100644
--- a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp
+++ b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup.cpp
@@ -80,6 +80,7 @@ boost::optional<repl::OplogEntry> forgeNoopImageOplogEntry(
"Not forging no-op image oplog entry because no image document found with "
"sessionId",
"sessionId"_attr = sessionId);
+ return boost::none;
}
auto image = repl::ImageEntry::parse(IDLParserErrorContext("image entry"), imageDoc->toBson());
diff --git a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp
index a80b87586ad..9068a61ff05 100644
--- a/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp
+++ b/src/mongo/db/pipeline/document_source_find_and_modify_image_lookup_test.cpp
@@ -165,7 +165,7 @@ TEST_F(FindAndModifyImageLookupTest, ShouldNotForgeImageEntryWhenImageDocMissing
const auto stmtId = 1;
const auto opTime = repl::OpTime(Timestamp(2, 1), 1);
const auto oplogEntryBson = makeOplogEntry(opTime,
- repl::OpTypeEnum::kNoop,
+ repl::OpTypeEnum::kUpdate,
NamespaceString("test.foo"),
UUID::gen(),
BSON("a" << 1),
@@ -203,7 +203,7 @@ TEST_F(FindAndModifyImageLookupTest, ShouldNotForgeImageEntryWhenImageDocHasDiff
const auto ts = Timestamp(2, 1);
const auto opTime = repl::OpTime(ts, 1);
const auto oplogEntryBson = makeOplogEntry(opTime,
- repl::OpTypeEnum::kNoop,
+ repl::OpTypeEnum::kUpdate,
NamespaceString("test.foo"),
UUID::gen(),
BSON("a" << 1),
@@ -239,7 +239,6 @@ TEST_F(FindAndModifyImageLookupTest, ShouldNotForgeImageEntryWhenImageDocHasDiff
ASSERT_TRUE(imageLookup->getNext().isEOF());
}
-
TEST_F(FindAndModifyImageLookupTest, ShouldForgeImageEntryWhenMatchingImageDocIsFoundCrudOp) {
std::vector<repl::RetryImageEnum> cases{repl::RetryImageEnum::kPreImage,
repl::RetryImageEnum::kPostImage};
diff --git a/src/mongo/db/pipeline/document_source_lookup_test.cpp b/src/mongo/db/pipeline/document_source_lookup_test.cpp
index 82a0b6bbd61..aae4d7beef5 100644
--- a/src/mongo/db/pipeline/document_source_lookup_test.cpp
+++ b/src/mongo/db/pipeline/document_source_lookup_test.cpp
@@ -82,6 +82,13 @@ public:
}
};
+auto makeLookUpFromBson(BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) {
+ auto docSource = DocumentSourceLookUp::createFromBson(elem, expCtx);
+ auto lookup = static_cast<DocumentSourceLookUp*>(docSource.detach());
+ return std::unique_ptr<DocumentSourceLookUp, DocumentSourceDeleter>(lookup,
+ DocumentSourceDeleter());
+}
+
// A 'let' variable defined in a $lookup stage is expected to be available to all sub-pipelines. For
// sub-pipelines below the immediate one, they are passed to via ExpressionContext. This test
// confirms that variables defined in the ExpressionContext are captured by the $lookup stage.
@@ -869,9 +876,7 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePauses) {
{"foreignField", "_id"_sd},
{"as", "foreignDocs"_sd}}}}
.toBson();
- auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
- auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
-
+ auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
lookup->setSource(mockLocalSource.get());
auto next = lookup->getNext();
@@ -890,7 +895,6 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePauses) {
ASSERT_TRUE(lookup->getNext().isEOF());
ASSERT_TRUE(lookup->getNext().isEOF());
- lookup->dispose();
}
TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
@@ -905,6 +909,14 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
expCtx->mongoProcessInterface =
std::make_shared<MockMongoInterface>(std::move(mockForeignContents));
+ // Mock its input, pausing every other result.
+ auto mockLocalSource =
+ DocumentSourceMock::createForTest({Document{{"foreignId", 0}},
+ DocumentSource::GetNextResult::makePauseExecution(),
+ Document{{"foreignId", 1}},
+ DocumentSource::GetNextResult::makePauseExecution()},
+ expCtx);
+
// Set up the $lookup stage.
auto lookupSpec = Document{{"$lookup",
Document{{"from", fromNs.coll()},
@@ -912,21 +924,13 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
{"foreignField", "_id"_sd},
{"as", "foreignDoc"_sd}}}}
.toBson();
- auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
- auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+ auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
const bool preserveNullAndEmptyArrays = false;
const boost::optional<std::string> includeArrayIndex = boost::none;
lookup->setUnwindStage(DocumentSourceUnwind::create(
expCtx, "foreignDoc", preserveNullAndEmptyArrays, includeArrayIndex));
- // Mock its input, pausing every other result.
- auto mockLocalSource =
- DocumentSourceMock::createForTest({Document{{"foreignId", 0}},
- DocumentSource::GetNextResult::makePauseExecution(),
- Document{{"foreignId", 1}},
- DocumentSource::GetNextResult::makePauseExecution()},
- expCtx);
lookup->setSource(mockLocalSource.get());
auto next = lookup->getNext();
@@ -945,7 +949,6 @@ TEST_F(DocumentSourceLookUpTest, ShouldPropagatePausesWhileUnwinding) {
ASSERT_TRUE(lookup->getNext().isEOF());
ASSERT_TRUE(lookup->getNext().isEOF());
- lookup->dispose();
}
TEST_F(DocumentSourceLookUpTest, LookupReportsAsFieldIsModified) {
@@ -961,14 +964,12 @@ TEST_F(DocumentSourceLookUpTest, LookupReportsAsFieldIsModified) {
{"foreignField", "_id"_sd},
{"as", "foreignDocs"_sd}}}}
.toBson();
- auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
- auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+ auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
auto modifiedPaths = lookup->getModifiedPaths();
ASSERT(modifiedPaths.type == DocumentSource::GetModPathsReturn::Type::kFiniteSet);
ASSERT_EQ(1U, modifiedPaths.paths.size());
ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDocs"));
- lookup->dispose();
}
TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
@@ -984,8 +985,7 @@ TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
{"foreignField", "_id"_sd},
{"as", "foreignDoc"_sd}}}}
.toBson();
- auto parsed = DocumentSourceLookUp::createFromBson(lookupSpec.firstElement(), expCtx);
- auto lookup = static_cast<DocumentSourceLookUp*>(parsed.get());
+ auto lookup = makeLookUpFromBson(lookupSpec.firstElement(), expCtx);
const bool preserveNullAndEmptyArrays = false;
const boost::optional<std::string> includeArrayIndex = std::string("arrIndex");
@@ -997,7 +997,6 @@ TEST_F(DocumentSourceLookUpTest, LookupReportsFieldsModifiedByAbsorbedUnwind) {
ASSERT_EQ(2U, modifiedPaths.paths.size());
ASSERT_EQ(1U, modifiedPaths.paths.count("foreignDoc"));
ASSERT_EQ(1U, modifiedPaths.paths.count("arrIndex"));
- lookup->dispose();
}
BSONObj sequentialCacheStageObj(const StringData status = "kBuilding"_sd,
diff --git a/src/mongo/db/pipeline/document_source_union_with_test.cpp b/src/mongo/db/pipeline/document_source_union_with_test.cpp
index 04f440fa91a..05e0feb7baa 100644
--- a/src/mongo/db/pipeline/document_source_union_with_test.cpp
+++ b/src/mongo/db/pipeline/document_source_union_with_test.cpp
@@ -60,6 +60,19 @@ using MockMongoInterface = StubLookupSingleDocumentProcessInterface;
// This provides access to getExpCtx(), but we'll use a different name for this test suite.
using DocumentSourceUnionWithTest = AggregationContextFixture;
+auto makeUnion(const boost::intrusive_ptr<ExpressionContext>& expCtx,
+ std::unique_ptr<Pipeline, PipelineDeleter> pipeline) {
+ return std::unique_ptr<DocumentSourceUnionWith, DocumentSourceDeleter>(
+ new DocumentSourceUnionWith(expCtx, std::move(pipeline)), DocumentSourceDeleter());
+}
+
+auto makeUnionFromBson(BSONElement elem, const boost::intrusive_ptr<ExpressionContext>& expCtx) {
+ auto docSource = DocumentSourceUnionWith::createFromBson(elem, expCtx);
+ auto unionWith = static_cast<DocumentSourceUnionWith*>(docSource.detach());
+ return std::unique_ptr<DocumentSourceUnionWith, DocumentSourceDeleter>(unionWith,
+ DocumentSourceDeleter());
+}
+
TEST_F(DocumentSourceUnionWithTest, BasicSerialUnions) {
const auto docs = std::array{Document{{"a", 1}}, Document{{"b", 1}}, Document{{"c", 1}}};
const auto mock = DocumentSourceMock::createForTest(docs[0], getExpCtx());
@@ -69,19 +82,19 @@ TEST_F(DocumentSourceUnionWithTest, BasicSerialUnions) {
mockCtxOne->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeOne);
const auto mockCtxTwo = getExpCtx()->copyWith({});
mockCtxTwo->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeTwo);
- auto unionWithOne = DocumentSourceUnionWith(
- mockCtxOne,
- Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
- auto unionWithTwo = DocumentSourceUnionWith(
- mockCtxTwo,
- Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
- unionWithOne.setSource(mock.get());
- unionWithTwo.setSource(&unionWithOne);
+ auto unionWithOne =
+ makeUnion(mockCtxOne,
+ Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+ auto unionWithTwo =
+ makeUnion(mockCtxTwo,
+ Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+ unionWithOne->setSource(mock.get());
+ unionWithTwo->setSource(unionWithOne.get());
auto comparator = DocumentComparator();
auto results = comparator.makeUnorderedDocumentSet();
for (auto& doc [[maybe_unused]] : docs) {
- auto next = unionWithTwo.getNext();
+ auto next = unionWithTwo->getNext();
ASSERT_TRUE(next.isAdvanced());
const auto [ignored, inserted] = results.insert(next.releaseDocument());
ASSERT_TRUE(inserted);
@@ -89,12 +102,9 @@ TEST_F(DocumentSourceUnionWithTest, BasicSerialUnions) {
for (const auto& doc : docs)
ASSERT_TRUE(results.find(doc) != results.end());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
- unionWithOne.dispose();
- unionWithTwo.dispose();
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
}
TEST_F(DocumentSourceUnionWithTest, BasicNestedUnions) {
@@ -109,16 +119,16 @@ TEST_F(DocumentSourceUnionWithTest, BasicNestedUnions) {
auto unionWithOne = make_intrusive<DocumentSourceUnionWith>(
mockCtxOne,
Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
- auto unionWithTwo = DocumentSourceUnionWith(
- mockCtxTwo,
- Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{unionWithOne},
- getExpCtx()));
- unionWithTwo.setSource(mock.get());
+ auto unionWithTwo =
+ makeUnion(mockCtxTwo,
+ Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{unionWithOne},
+ getExpCtx()));
+ unionWithTwo->setSource(mock.get());
auto comparator = DocumentComparator();
auto results = comparator.makeUnorderedDocumentSet();
for (auto& doc [[maybe_unused]] : docs) {
- auto next = unionWithTwo.getNext();
+ auto next = unionWithTwo->getNext();
ASSERT_TRUE(next.isAdvanced());
const auto [ignored, inserted] = results.insert(next.releaseDocument());
ASSERT_TRUE(inserted);
@@ -126,11 +136,9 @@ TEST_F(DocumentSourceUnionWithTest, BasicNestedUnions) {
for (const auto& doc : docs)
ASSERT_TRUE(results.find(doc) != results.end());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
- unionWithTwo.dispose();
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
}
TEST_F(DocumentSourceUnionWithTest, UnionsWithNonEmptySubPipelines) {
@@ -145,19 +153,19 @@ TEST_F(DocumentSourceUnionWithTest, UnionsWithNonEmptySubPipelines) {
mockCtxTwo->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeTwo);
const auto filter = DocumentSourceMatch::create(BSON("d" << 1), mockCtxOne);
const auto proj = DocumentSourceAddFields::create(BSON("d" << 1), mockCtxTwo);
- auto unionWithOne = DocumentSourceUnionWith(
+ auto unionWithOne = makeUnion(
mockCtxOne,
Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{filter}, getExpCtx()));
- auto unionWithTwo = DocumentSourceUnionWith(
+ auto unionWithTwo = makeUnion(
mockCtxTwo,
Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{proj}, getExpCtx()));
- unionWithOne.setSource(mock.get());
- unionWithTwo.setSource(&unionWithOne);
+ unionWithOne->setSource(mock.get());
+ unionWithTwo->setSource(unionWithOne.get());
auto comparator = DocumentComparator();
auto results = comparator.makeUnorderedDocumentSet();
for (auto& doc [[maybe_unused]] : outputDocs) {
- auto next = unionWithTwo.getNext();
+ auto next = unionWithTwo->getNext();
ASSERT_TRUE(next.isAdvanced());
const auto [ignored, inserted] = results.insert(next.releaseDocument());
ASSERT_TRUE(inserted);
@@ -165,12 +173,9 @@ TEST_F(DocumentSourceUnionWithTest, UnionsWithNonEmptySubPipelines) {
for (const auto& doc : outputDocs)
ASSERT_TRUE(results.find(doc) != results.end());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
- unionWithOne.dispose();
- unionWithTwo.dispose();
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
}
TEST_F(DocumentSourceUnionWithTest, SerializeAndParseWithPipeline) {
@@ -315,26 +320,23 @@ TEST_F(DocumentSourceUnionWithTest, PropagatePauses) {
mockCtxOne->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeOne);
const auto mockCtxTwo = getExpCtx()->copyWith({});
mockCtxTwo->mongoProcessInterface = std::make_unique<MockMongoInterface>(mockDequeTwo);
- auto unionWithOne = DocumentSourceUnionWith(
- mockCtxOne,
- Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
- auto unionWithTwo = DocumentSourceUnionWith(
- mockCtxTwo,
- Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
- unionWithOne.setSource(mock.get());
- unionWithTwo.setSource(&unionWithOne);
-
- ASSERT_TRUE(unionWithTwo.getNext().isAdvanced());
- ASSERT_TRUE(unionWithTwo.getNext().isPaused());
- ASSERT_TRUE(unionWithTwo.getNext().isAdvanced());
- ASSERT_TRUE(unionWithTwo.getNext().isPaused());
-
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
- ASSERT_TRUE(unionWithTwo.getNext().isEOF());
-
- unionWithOne.dispose();
- unionWithTwo.dispose();
+ auto unionWithOne =
+ makeUnion(mockCtxOne,
+ Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+ auto unionWithTwo =
+ makeUnion(mockCtxTwo,
+ Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+ unionWithOne->setSource(mock.get());
+ unionWithTwo->setSource(unionWithOne.get());
+
+ ASSERT_TRUE(unionWithTwo->getNext().isAdvanced());
+ ASSERT_TRUE(unionWithTwo->getNext().isPaused());
+ ASSERT_TRUE(unionWithTwo->getNext().isAdvanced());
+ ASSERT_TRUE(unionWithTwo->getNext().isPaused());
+
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
+ ASSERT_TRUE(unionWithTwo->getNext().isEOF());
}
TEST_F(DocumentSourceUnionWithTest, ReturnEOFAfterBeingDisposed) {
@@ -406,10 +408,10 @@ TEST_F(DocumentSourceUnionWithTest, RespectsViewDefinition) {
expCtx->mongoProcessInterface =
std::make_shared<MockMongoInterface>(std::move(mockForeignContents));
- auto bson = BSON("$unionWith" << nsToUnionWith.coll());
- auto unionWith = DocumentSourceUnionWith::createFromBson(bson.firstElement(), expCtx);
const auto localMock =
DocumentSourceMock::createForTest({Document{{"_id"_sd, "local"_sd}}}, getExpCtx());
+ auto bson = BSON("$unionWith" << nsToUnionWith.coll());
+ auto unionWith = makeUnionFromBson(bson.firstElement(), expCtx);
unionWith->setSource(localMock.get());
auto result = unionWith->getNext();
@@ -421,8 +423,6 @@ TEST_F(DocumentSourceUnionWithTest, RespectsViewDefinition) {
ASSERT_DOCUMENT_EQ(result.getDocument(), (Document{{"_id"_sd, 2}}));
ASSERT_TRUE(unionWith->getNext().isEOF());
-
- unionWith->dispose();
}
TEST_F(DocumentSourceUnionWithTest, ConcatenatesViewDefinitionToPipeline) {
@@ -445,7 +445,7 @@ TEST_F(DocumentSourceUnionWithTest, ConcatenatesViewDefinitionToPipeline) {
"coll" << viewNsToUnionWith.coll() << "pipeline"
<< BSON_ARRAY(fromjson(
"{$set: {originalId: '$_id', _id: {$add: [1, '$_id']}}}"))));
- auto unionWith = DocumentSourceUnionWith::createFromBson(bson.firstElement(), expCtx);
+ auto unionWith = makeUnionFromBson(bson.firstElement(), expCtx);
unionWith->setSource(localMock.get());
auto result = unionWith->getNext();
@@ -459,8 +459,6 @@ TEST_F(DocumentSourceUnionWithTest, ConcatenatesViewDefinitionToPipeline) {
ASSERT_DOCUMENT_EQ(result.getDocument(), (Document{{"_id"_sd, 3}, {"originalId"_sd, 2}}));
ASSERT_TRUE(unionWith->getNext().isEOF());
-
- unionWith->dispose();
}
TEST_F(DocumentSourceUnionWithTest, RejectUnionWhenDepthLimitIsExceeded) {
@@ -482,9 +480,9 @@ TEST_F(DocumentSourceUnionWithTest, RejectUnionWhenDepthLimitIsExceeded) {
}
TEST_F(DocumentSourceUnionWithTest, ConstraintsWithoutPipelineAreCorrect) {
- auto emptyUnion = DocumentSourceUnionWith(
- getExpCtx(),
- Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
+ auto emptyUnion =
+ makeUnion(getExpCtx(),
+ Pipeline::create(std::list<boost::intrusive_ptr<DocumentSource>>{}, getExpCtx()));
StageConstraints defaultConstraints(StageConstraints::StreamType::kStreaming,
StageConstraints::PositionRequirement::kNone,
StageConstraints::HostTypeRequirement::kAnyShard,
@@ -493,9 +491,7 @@ TEST_F(DocumentSourceUnionWithTest, ConstraintsWithoutPipelineAreCorrect) {
StageConstraints::TransactionRequirement::kNotAllowed,
StageConstraints::LookupRequirement::kAllowed,
StageConstraints::UnionRequirement::kAllowed);
- ASSERT_TRUE(emptyUnion.constraints(Pipeline::SplitState::kUnsplit) == defaultConstraints);
-
- emptyUnion.dispose();
+ ASSERT_TRUE(emptyUnion->constraints(Pipeline::SplitState::kUnsplit) == defaultConstraints);
}
TEST_F(DocumentSourceUnionWithTest, ConstraintsWithMixedSubPipelineAreCorrect) {
diff --git a/src/mongo/db/pipeline/expression.cpp b/src/mongo/db/pipeline/expression.cpp
index 8237dbdfeae..464d2ad6953 100644
--- a/src/mongo/db/pipeline/expression.cpp
+++ b/src/mongo/db/pipeline/expression.cpp
@@ -39,6 +39,9 @@
#include <utility>
#include <vector>
+#include "mongo/bson/bsonmisc.h"
+#include "mongo/bson/bsontypes.h"
+#include "mongo/crypto/fle_crypto.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/commands/feature_compatibility_version_documentation.h"
#include "mongo/db/exec/document_value/document.h"
@@ -46,6 +49,7 @@
#include "mongo/db/hasher.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/pipeline/expression_context.h"
+#include "mongo/db/pipeline/expression_parser_gen.h"
#include "mongo/db/pipeline/variable_validation.h"
#include "mongo/db/query/datetime/date_time_support.h"
#include "mongo/db/query/sort_pattern.h"
@@ -304,111 +308,173 @@ const char* ExpressionAbs::getOpName() const {
/* ------------------------- ExpressionAdd ----------------------------- */
-StatusWith<Value> ExpressionAdd::apply(Value lhs, Value rhs) {
- BSONType diffType = Value::getWidestNumeric(rhs.getType(), lhs.getType());
+namespace {
- if (diffType == NumberDecimal) {
- Decimal128 left = lhs.coerceToDecimal();
- Decimal128 right = rhs.coerceToDecimal();
- return Value(left.add(right));
- } else if (diffType == NumberDouble) {
- double right = rhs.coerceToDouble();
- double left = lhs.coerceToDouble();
- return Value(left + right);
- } else if (diffType == NumberLong) {
- long long result;
+/**
+ * We'll try to return the narrowest possible result value while avoiding overflow or implicit use
+ * of decimal types. To do that, compute separate sums for long, double and decimal values, and
+ * track the current widest type. The long sum will be converted to double when the first double
+ * value is seen or when long arithmetic would overflow.
+ */
+class AddState {
+ long long longTotal = 0;
+ double doubleTotal = 0;
+ Decimal128 decimalTotal;
+ BSONType widestType = NumberInt;
+ bool isDate = false;
- // If there is an overflow, convert the values to doubles.
- if (overflow::add(lhs.coerceToLong(), rhs.coerceToLong(), &result)) {
- return Value(lhs.coerceToDouble() + rhs.coerceToDouble());
+public:
+ /**
+ * Update the internal state with another operand. It is up to the caller to validate that the
+ * operand is of a proper type.
+ */
+ void operator+=(const Value& operand) {
+ auto oldWidestType = widestType;
+ // Dates are represented by the long number of milliseconds since the unix epoch, so we can
+ // treat them as regular numeric values for the purposes of addition after making sure that
+ // only one date is present in the operand list.
+ Value valToAdd;
+ if (operand.getType() == Date) {
+ uassert(16612, "only one date allowed in an $add expression", !isDate);
+ isDate = true;
+ valToAdd = Value(operand.getDate().toMillisSinceEpoch());
+ } else {
+ widestType = Value::getWidestNumeric(widestType, operand.getType());
+ valToAdd = operand;
}
- return Value(result);
- } else if (diffType == NumberInt) {
- long long right = rhs.coerceToLong();
- long long left = lhs.coerceToLong();
- return Value::createIntOrLong(left + right);
- } else if (lhs.nullish() || rhs.nullish()) {
- return Value(BSONNULL);
- } else {
- return Status(ErrorCodes::TypeMismatch,
- str::stream() << "cannot $add a" << typeName(rhs.getType()) << " from a "
- << typeName(lhs.getType()));
- }
-}
-
-Value ExpressionAdd::evaluate(const Document& root, Variables* variables) const {
- // We'll try to return the narrowest possible result value while avoiding overflow, loss
- // of precision due to intermediate rounding or implicit use of decimal types. To do that,
- // compute a compensated sum for non-decimal values and a separate decimal sum for decimal
- // values, and track the current narrowest type.
- DoubleDoubleSummation nonDecimalTotal;
- Decimal128 decimalTotal;
- BSONType totalType = NumberInt;
- bool haveDate = false;
- const size_t n = _children.size();
- for (size_t i = 0; i < n; ++i) {
- Value val = _children[i]->evaluate(root, variables);
+ // If this operation widens the return type, perform any necessary type conversions.
+ if (oldWidestType != widestType) {
+ switch (widestType) {
+ case NumberLong:
+ // Int -> Long is handled by the same sum.
+ break;
+ case NumberDouble:
+ // Int/Long -> Double converts the existing longTotal to a doubleTotal.
+ doubleTotal = longTotal;
+ break;
+ case NumberDecimal:
+ // Convert the right total to NumberDecimal by looking at the old widest type.
+ switch (oldWidestType) {
+ case NumberInt:
+ case NumberLong:
+ decimalTotal = Decimal128(longTotal);
+ break;
+ case NumberDouble:
+ decimalTotal = Decimal128(doubleTotal, Decimal128::kRoundTo34Digits);
+ break;
+ default:
+ MONGO_UNREACHABLE;
+ }
+ break;
+ default:
+ MONGO_UNREACHABLE;
+ }
+ }
- switch (val.getType()) {
- case NumberDecimal:
- decimalTotal = decimalTotal.add(val.getDecimal());
- totalType = NumberDecimal;
- break;
- case NumberDouble:
- nonDecimalTotal.addDouble(val.getDouble());
- if (totalType != NumberDecimal)
- totalType = NumberDouble;
- break;
+ // Perform the add operation.
+ switch (widestType) {
+ case NumberInt:
case NumberLong:
- nonDecimalTotal.addLong(val.getLong());
- if (totalType == NumberInt)
- totalType = NumberLong;
+ // If the long long arithmetic overflows, promote the result to a NumberDouble and
+ // start incrementing the doubleTotal.
+ long long newLongTotal;
+ if (overflow::add(longTotal, valToAdd.coerceToLong(), &newLongTotal)) {
+ widestType = NumberDouble;
+ doubleTotal = longTotal + valToAdd.coerceToDouble();
+ } else {
+ longTotal = newLongTotal;
+ }
break;
- case NumberInt:
- nonDecimalTotal.addDouble(val.getInt());
+ case NumberDouble:
+ doubleTotal += valToAdd.coerceToDouble();
break;
- case Date:
- uassert(16612, "only one date allowed in an $add expression", !haveDate);
- haveDate = true;
- nonDecimalTotal.addLong(val.getDate().toMillisSinceEpoch());
+ case NumberDecimal:
+ decimalTotal = decimalTotal.add(valToAdd.coerceToDecimal());
break;
default:
- uassert(16554,
- str::stream() << "$add only supports numeric or date types, not "
- << typeName(val.getType()),
- val.nullish());
- return Value(BSONNULL);
+ uasserted(ErrorCodes::TypeMismatch,
+ str::stream() << "$add only supports numeric or date types, not "
+ << typeName(valToAdd.getType()));
}
}
- if (haveDate) {
- int64_t longTotal;
- if (totalType == NumberDecimal) {
- longTotal = decimalTotal.add(nonDecimalTotal.getDecimal()).toLong();
+ Value getValue() const {
+ // If one of the operands was a date, then convert the result to a date.
+ if (isDate) {
+ switch (widestType) {
+ case NumberInt:
+ case NumberLong:
+ return Value(Date_t::fromMillisSinceEpoch(longTotal));
+ case NumberDouble:
+ using limits = std::numeric_limits<long long>;
+ uassert(ErrorCodes::Overflow,
+ "date overflow in $add",
+ // The upper bound is exclusive because it rounds up when it is cast to
+ // a double.
+ doubleTotal >= limits::min() &&
+ doubleTotal < static_cast<double>(limits::max()));
+ return Value(Date_t::fromMillisSinceEpoch(llround(doubleTotal)));
+ case NumberDecimal:
+ // Decimal dates are not checked for overflow.
+ return Value(Date_t::fromMillisSinceEpoch(decimalTotal.toLong()));
+ default:
+ MONGO_UNREACHABLE;
+ }
} else {
- uassert(ErrorCodes::Overflow, "date overflow in $add", nonDecimalTotal.fitsLong());
- longTotal = nonDecimalTotal.getLong();
+ switch (widestType) {
+ case NumberInt:
+ return Value::createIntOrLong(longTotal);
+ case NumberLong:
+ return Value(longTotal);
+ case NumberDouble:
+ return Value(doubleTotal);
+ case NumberDecimal:
+ return Value(decimalTotal);
+ default:
+ MONGO_UNREACHABLE;
+ }
}
- return Value(Date_t::fromMillisSinceEpoch(longTotal));
}
- switch (totalType) {
- case NumberDecimal:
- return Value(decimalTotal.add(nonDecimalTotal.getDecimal()));
- case NumberLong:
- dassert(nonDecimalTotal.isInteger());
- if (nonDecimalTotal.fitsLong())
- return Value(nonDecimalTotal.getLong());
- [[fallthrough]];
- case NumberInt:
- if (nonDecimalTotal.fitsLong())
- return Value::createIntOrLong(nonDecimalTotal.getLong());
- [[fallthrough]];
- case NumberDouble:
- return Value(nonDecimalTotal.getDouble());
- default:
- massert(16417, "$add resulted in a non-numeric type", false);
+};
+
+Status checkAddOperandType(Value val) {
+ if (!val.numeric() && val.getType() != Date) {
+ return Status(ErrorCodes::TypeMismatch,
+ str::stream() << "$add only supports numeric or date types, not "
+ << typeName(val.getType()));
}
+
+ return Status::OK();
+}
+} // namespace
+
+StatusWith<Value> ExpressionAdd::apply(Value lhs, Value rhs) {
+ if (lhs.nullish())
+ return Value(BSONNULL);
+ if (Status s = checkAddOperandType(lhs); !s.isOK())
+ return s;
+ if (rhs.nullish())
+ return Value(BSONNULL);
+ if (Status s = checkAddOperandType(rhs); !s.isOK())
+ return s;
+
+ AddState state;
+ state += lhs;
+ state += rhs;
+ return state.getValue();
+}
+
+Value ExpressionAdd::evaluate(const Document& root, Variables* variables) const {
+ AddState state;
+ for (auto&& child : _children) {
+ Value val = child->evaluate(root, variables);
+ if (val.nullish())
+ return Value(BSONNULL);
+ uassertStatusOK(checkAddOperandType(val));
+ state += val;
+ }
+ return state.getValue();
}
REGISTER_STABLE_EXPRESSION(add, ExpressionAdd::parse);
@@ -3253,7 +3319,7 @@ Value ExpressionMultiply::evaluate(const Document& root, Variables* variables) c
if (val.nullish())
return Value(BSONNULL);
uassertStatusOK(checkMultiplyNumeric(val));
- state *= child->evaluate(root, variables);
+ state *= val;
}
return state.getValue();
}
@@ -3742,6 +3808,123 @@ const char* ExpressionLog10::getOpName() const {
return "$log10";
}
+/* ----------------------- ExpressionInternalFLEEqual ---------------------------- */
+constexpr auto kInternalFleEq = "$_internalFleEq"_sd;
+
+ExpressionInternalFLEEqual::ExpressionInternalFLEEqual(ExpressionContext* const expCtx,
+ boost::intrusive_ptr<Expression> field,
+ ConstDataRange serverToken,
+ int64_t contentionFactor,
+ ConstDataRange edcToken)
+ : Expression(expCtx, {std::move(field)}),
+ _serverToken(PrfBlockfromCDR(serverToken)),
+ _edcToken(PrfBlockfromCDR(edcToken)),
+ _contentionFactor(contentionFactor) {
+ expCtx->sbeCompatible = false;
+
+ auto tokens =
+ EDCServerCollection::generateEDCTokens(ConstDataRange(_edcToken), _contentionFactor);
+
+ for (auto& token : tokens) {
+ _cachedEDCTokens.insert(std::move(token.data));
+ }
+}
+
+void ExpressionInternalFLEEqual::_doAddDependencies(DepsTracker* deps) const {
+ for (auto&& operand : _children) {
+ operand->addDependencies(deps);
+ }
+}
+
+REGISTER_EXPRESSION_WITH_MIN_VERSION(_internalFleEq,
+ ExpressionInternalFLEEqual::parse,
+ AllowedWithApiStrict::kAlways,
+ AllowedWithClientType::kAny,
+ multiversion::FeatureCompatibilityVersion::kVersion_6_0);
+
+intrusive_ptr<Expression> ExpressionInternalFLEEqual::parse(ExpressionContext* const expCtx,
+ BSONElement expr,
+ const VariablesParseState& vps) {
+
+ IDLParserErrorContext ctx(kInternalFleEq);
+ auto fleEq = InternalFleEqStruct::parse(ctx, expr.Obj());
+
+ auto fieldExpr = Expression::parseOperand(expCtx, fleEq.getField().getElement(), vps);
+
+ auto serverTokenPair = fromEncryptedConstDataRange(fleEq.getServerEncryptionToken());
+
+ uassert(6672405,
+ "Invalid server token",
+ serverTokenPair.first == EncryptedBinDataType::kFLE2TransientRaw &&
+ serverTokenPair.second.length() == sizeof(PrfBlock));
+
+ auto edcTokenPair = fromEncryptedConstDataRange(fleEq.getEdcDerivedToken());
+
+ uassert(6672406,
+ "Invalid edc token",
+ edcTokenPair.first == EncryptedBinDataType::kFLE2TransientRaw &&
+ edcTokenPair.second.length() == sizeof(PrfBlock));
+
+
+ auto cf = fleEq.getMaxCounter();
+ uassert(6672408, "Contention factor must be between 0 and 10000", cf >= 0 && cf < 10000);
+
+ return new ExpressionInternalFLEEqual(expCtx,
+ std::move(fieldExpr),
+ serverTokenPair.second,
+ fleEq.getMaxCounter(),
+ edcTokenPair.second);
+}
+
+Value toValue(const std::array<std::uint8_t, 32>& buf) {
+ auto vec = toEncryptedVector(EncryptedBinDataType::kFLE2TransientRaw, buf);
+ return Value(BSONBinData(vec.data(), vec.size(), BinDataType::Encrypt));
+}
+
+Value ExpressionInternalFLEEqual::serialize(bool explain) const {
+ return Value(Document{{kInternalFleEq,
+ Document{{"field", _children[0]->serialize(explain)},
+ {"edc", toValue(_edcToken)},
+ {"counter", Value(static_cast<long long>(_contentionFactor))},
+ {"server", toValue(_serverToken)}}}});
+}
+
+Value ExpressionInternalFLEEqual::evaluate(const Document& root, Variables* variables) const {
+ // Inputs
+ // 1. Value for FLE2IndexedEqualityEncryptedValue field
+
+ Value fieldValue = _children[0]->evaluate(root, variables);
+
+ if (fieldValue.nullish()) {
+ return Value(BSONNULL);
+ }
+
+ if (fieldValue.getType() != BinData) {
+ return Value(false);
+ }
+
+ auto fieldValuePair = fromEncryptedBinData(fieldValue);
+
+ uassert(6672407,
+ "Invalid encrypted indexed field",
+ fieldValuePair.first == EncryptedBinDataType::kFLE2EqualityIndexedValue);
+
+ // Value matches if
+ // 1. Decrypt field is successful
+ // 2. EDC_u Token is in GenTokens(EDC Token, ContentionFactor)
+ //
+ auto swIndexed =
+ EDCServerCollection::decryptAndParse(ConstDataRange(_serverToken), fieldValuePair.second);
+ uassertStatusOK(swIndexed);
+ auto indexed = swIndexed.getValue();
+
+ return Value(_cachedEDCTokens.count(indexed.edc.data) == 1);
+}
+
+const char* ExpressionInternalFLEEqual::getOpName() const {
+ return kInternalFleEq.rawData();
+}
+
/* ------------------------ ExpressionNary ----------------------------- */
/**
diff --git a/src/mongo/db/pipeline/expression.h b/src/mongo/db/pipeline/expression.h
index ff53eaedf3e..4b5745bb2b6 100644
--- a/src/mongo/db/pipeline/expression.h
+++ b/src/mongo/db/pipeline/expression.h
@@ -29,6 +29,7 @@
#pragma once
+#include "mongo/base/data_range.h"
#include "mongo/platform/basic.h"
#include <algorithm>
@@ -2197,6 +2198,38 @@ public:
}
};
+class ExpressionInternalFLEEqual final : public Expression {
+public:
+ ExpressionInternalFLEEqual(ExpressionContext* expCtx,
+ boost::intrusive_ptr<Expression> field,
+ ConstDataRange serverToken,
+ int64_t contentionFactor,
+ ConstDataRange edcToken);
+ Value serialize(bool explain) const final;
+
+ Value evaluate(const Document& root, Variables* variables) const final;
+ const char* getOpName() const;
+
+ static boost::intrusive_ptr<Expression> parse(ExpressionContext* expCtx,
+ BSONElement expr,
+ const VariablesParseState& vps);
+ void _doAddDependencies(DepsTracker* deps) const final;
+
+ void acceptVisitor(ExpressionMutableVisitor* visitor) final {
+ return visitor->visit(this);
+ }
+
+ void acceptVisitor(ExpressionConstVisitor* visitor) const final {
+ return visitor->visit(this);
+ }
+
+private:
+ std::array<std::uint8_t, 32> _serverToken;
+ std::array<std::uint8_t, 32> _edcToken;
+ int64_t _contentionFactor;
+ stdx::unordered_set<std::array<std::uint8_t, 32>> _cachedEDCTokens;
+};
+
class ExpressionMap final : public Expression {
public:
ExpressionMap(
diff --git a/src/mongo/db/ops/new_write_error_exception_format_feature_flag.idl b/src/mongo/db/pipeline/expression_parser.idl
index f5fb71095b0..9f1cde70856 100644
--- a/src/mongo/db/ops/new_write_error_exception_format_feature_flag.idl
+++ b/src/mongo/db/pipeline/expression_parser.idl
@@ -24,18 +24,34 @@
# delete this exception statement from your version. If you delete this
# exception statement from all source files in the program, then also delete
# it in the license file.
-#
global:
- cpp_namespace: "mongo::feature_flags"
+ cpp_namespace: "mongo"
imports:
- "mongo/idl/basic_types.idl"
-feature_flags:
- featureFlagNewWriteErrorExceptionFormat:
- description: Feature flag for enabling the new write error format which avoids serialising
- StaleShardVersion with the information of StaleConfig.
- cpp_varname: gFeatureFlagNewWriteErrorExceptionFormat
- default: true
- version: 6.0
+structs:
+
+ InternalFleEqStruct:
+ description: "Struct for $_internalFleEq"
+ strict: true
+ fields:
+ field:
+ description: "Expression"
+ type: IDLAnyType
+ cpp_name: field
+ edc:
+ description: "EDCDerivedFromDataToken"
+ type: bindata_encrypt
+ cpp_name: edcDerivedToken
+ server:
+ description: "ServerDataEncryptionLevel1Token"
+ type: bindata_encrypt
+ cpp_name: serverEncryptionToken
+ counter:
+ description: "Queryable Encryption max counter"
+ type: long
+ cpp_name: maxCounter
+
+
diff --git a/src/mongo/db/pipeline/expression_test.cpp b/src/mongo/db/pipeline/expression_test.cpp
index b33f3bc893c..314062c3f03 100644
--- a/src/mongo/db/pipeline/expression_test.cpp
+++ b/src/mongo/db/pipeline/expression_test.cpp
@@ -30,6 +30,8 @@
#include "mongo/platform/basic.h"
+#include <climits>
+
#include "mongo/bson/bsonmisc.h"
#include "mongo/config.h"
#include "mongo/db/exec/document_value/document.h"
@@ -47,6 +49,8 @@
#include "mongo/idl/server_parameter_test_util.h"
#include "mongo/logv2/log.h"
#include "mongo/unittest/unittest.h"
+#include "mongo/util/summation.h"
+#include "mongo/util/time_support.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
@@ -171,6 +175,7 @@ void parseAndVerifyResults(
ASSERT_VALUE_EQ(expr->evaluate({}, &expCtx.variables), expected);
}
+
/* ------------------------- ExpressionArrayToObject -------------------------- */
TEST(ExpressionArrayToObjectTest, KVFormatSimple) {
@@ -3717,4 +3722,449 @@ TEST(ExpressionCondTest, ConstantCondShouldOptimizeWithNonConstantBranches) {
ASSERT_BSONOBJ_BINARY_EQ(expectedResult, expressionToBson(optimizedExprCond));
}
+TEST(ExpressionAddTest, Integers) {
+ assertExpectedResults("$add",
+ {
+ // Empty case.
+ {{}, 0},
+ // Singleton case.
+ {{1}, 1},
+ // Integer addition.
+ {{1, 2, 3}, 6},
+ // Adding negative numbers
+ {{6, -3, 2}, 5},
+ // Getting a negative result
+ {{-6, -3, 2}, -7},
+ // Min/max ints are not promoted to longs.
+ {{INT_MAX}, INT_MAX},
+ {{INT_MAX, -1}, Value(INT_MAX - 1)},
+ {{INT_MIN}, INT_MIN},
+ {{INT_MIN, 1}, Value(INT_MIN + 1)},
+ // Integer overflow is promoted to a long.
+ {{INT_MAX, 1}, Value((long long)INT_MAX + 1LL)},
+ {{INT_MIN, -1}, Value((long long)INT_MIN - 1LL)},
+ });
+}
+
+
+TEST(ExpressionAddTest, Longs) {
+ assertExpectedResults(
+ "$add",
+ {
+ // Singleton case.
+ {{1LL}, 1LL},
+ // Long addition.
+ {{1LL, 2LL, 3LL}, 6LL},
+ // Adding negative numbers
+ {{6LL, -3LL, 2LL}, 5LL},
+ // Getting a negative result
+ {{-6LL, -3LL, 2LL}, -7LL},
+ // Confirm that NumberLong is wider than NumberInt, and the output
+ // will be a long if any operand is a long.
+ {{1LL, 2, 3LL}, 6LL},
+ {{1LL, 2, 3}, 6LL},
+ {{1, 2, 3LL}, 6LL},
+ {{1, 2LL, 3LL}, 6LL},
+ {{6, -3LL, 2}, 5LL},
+ {{-6LL, -3, 2}, -7LL},
+ // Min/max longs are not promoted to double.
+ {{LLONG_MAX}, LLONG_MAX},
+ {{LLONG_MAX, -1LL}, Value(LLONG_MAX - 1LL)},
+ {{LLONG_MIN}, LLONG_MIN},
+ {{LLONG_MIN, 1LL}, Value(LLONG_MIN + 1LL)},
+ // Long overflow is promoted to a double.
+ {{LLONG_MAX, 1LL}, Value((double)LLONG_MAX + 1.0)},
+ // The result is "incorrect" here due to floating-point rounding errors.
+ {{LLONG_MIN, -1LL}, Value((double)LLONG_MIN)},
+ });
+}
+
+TEST(ExpressionAddTest, Doubles) {
+ assertExpectedResults("$add",
+ {
+ // Singleton case.
+ {{1.0}, 1.0},
+ // Double addition.
+ {{1.0, 2.0, 3.0}, 6.0},
+ // Adding negative numbers
+ {{6.0, -3.0, 2.0}, 5.0},
+ // Getting a negative result
+ {{-6.0, -3.0, 2.0}, -7.0},
+ // Confirm that doubles are wider than ints and longs, and the output
+ // will be a double if any operand is a double.
+ {{1, 2, 3.0}, 6.0},
+ {{1LL, 2LL, 3.0}, 6.0},
+ {{3.0, 2, 1LL}, 6.0},
+ {{3, 2.0, 1LL}, 6.0},
+ {{-3, 2.0, 1LL}, 0.0},
+ {{-6LL, 2LL, 3.0}, -1.0},
+ {{-6.0, 2LL, 3.0}, -1.0},
+ // Confirm floating point arithmetic has rounding errors.
+ {{0.1, 0.2}, 0.30000000000000004},
+ });
+}
+
+TEST(ExpressionAddTest, Decimals) {
+ assertExpectedResults(
+ "$add",
+ {
+ // Singleton case.
+ {{Decimal128(1)}, Decimal128(1)},
+ // Decimal addition.
+ {{Decimal128(1.0), Decimal128(2.0), Decimal128(3.0)}, Decimal128(6.0)},
+ {{Decimal128(-6.0), Decimal128(2.0), Decimal128(3.0)}, Decimal128(-1.0)},
+ // Confirm that decimals are wider than all other types, and the output
+ // will be a double if any operand is a double.
+ {{Decimal128(1), 2LL, 3}, Decimal128(6.0)},
+ {{Decimal128(3), 2.0, 1LL}, Decimal128(6.0)},
+ {{Decimal128(3), 2, 1.0}, Decimal128(6.0)},
+ {{1, 2, Decimal128(3.0)}, Decimal128(6.0)},
+ {{1LL, Decimal128(2.0), 3.0}, Decimal128(6.0)},
+ {{1.0, 2.0, Decimal128(3.0)}, Decimal128(6.0)},
+ {{1, Decimal128(2.0), 3.0}, Decimal128(6.0)},
+ {{1LL, Decimal128(2.0), 3.0, 2}, Decimal128(8.0)},
+ {{1LL, Decimal128(2.0), 3, 2.0}, Decimal128(8.0)},
+ {{1, Decimal128(2.0), 3LL, 2.0}, Decimal128(8.0)},
+ {{3.0, Decimal128(0.0), 2, 1LL}, Decimal128(6.0)},
+ {{1, 3LL, 2.0, Decimal128(2.0)}, Decimal128(8.0)},
+ {{3.0, 2, 1LL, Decimal128(0.0)}, Decimal128(6.0)},
+ {{Decimal128(-6.0), 2.0, 3LL}, Decimal128(-1.0)},
+ });
+}
+
+TEST(ExpressionAddTest, DatesNonDecimal) {
+ assertExpectedResults(
+ "$add",
+ {
+ {{1, 2, 3, Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(106)},
+ {{1LL, 2LL, 3LL, Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(106)},
+ {{1.0, 2.0, 3.0, Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(106)},
+ {{1.0, 2.0, Value(Date_t::fromMillisSinceEpoch(100)), 3.0},
+ Date_t::fromMillisSinceEpoch(106)},
+ {{1.0, 2.2, 3.5, Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(107)},
+ {{1, 2.2, 3.5, Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(107)},
+ {{1, Date_t::fromMillisSinceEpoch(100), 2.2, 3.5}, Date_t::fromMillisSinceEpoch(107)},
+ {{Date_t::fromMillisSinceEpoch(100), 1, 2.2, 3.5}, Date_t::fromMillisSinceEpoch(107)},
+ {{-6, Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(94)},
+ {{-200, Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(-100)},
+ {{1, 2, 3, Date_t::fromMillisSinceEpoch(-100)}, Date_t::fromMillisSinceEpoch(-94)},
+ });
+}
+
+TEST(ExpressionAddTest, DatesDecimal) {
+ assertExpectedResults(
+ "$add",
+ {
+ {{1, Decimal128(2), 3, Date_t::fromMillisSinceEpoch(100)},
+ Date_t::fromMillisSinceEpoch(106)},
+ {{1LL, 2LL, Decimal128(3LL), Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(106)},
+ {{1, Decimal128(2.2), 3.5, Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(107)},
+ {{1, Decimal128(2.2), Decimal128(3.5), Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(107)},
+ {{1.0, Decimal128(2.2), Decimal128(3.5), Value(Date_t::fromMillisSinceEpoch(100))},
+ Date_t::fromMillisSinceEpoch(107)},
+ {{Decimal128(-6), Date_t::fromMillisSinceEpoch(100)}, Date_t::fromMillisSinceEpoch(94)},
+ {{Decimal128(-200), Date_t::fromMillisSinceEpoch(100)},
+ Date_t::fromMillisSinceEpoch(-100)},
+ {{1, Decimal128(2), 3, Date_t::fromMillisSinceEpoch(-100)},
+ Date_t::fromMillisSinceEpoch(-94)},
+ });
+}
+
+TEST(ExpressionAddTest, Assertions) {
+ // Date addition must fit in a NumberLong from a double.
+ ASSERT_THROWS_CODE(
+ evaluateExpression("$add", {Date_t::fromMillisSinceEpoch(100), (double)LLONG_MAX}),
+ AssertionException,
+ ErrorCodes::Overflow);
+
+ // Only one date allowed in an $add expression.
+ ASSERT_THROWS_CODE(
+ evaluateExpression(
+ "$add", {Date_t::fromMillisSinceEpoch(100), 1, Date_t::fromMillisSinceEpoch(100)}),
+ AssertionException,
+ 16612);
+
+ // Only numeric types are allowed in a $add.
+ ASSERT_THROWS_CODE(evaluateExpression("$add", {1, 2, "not numeric!"_sd, 3}),
+ AssertionException,
+ ErrorCodes::TypeMismatch);
+}
+
+
+TEST(ExpressionAddTest, VerifyNoDoubleDoubleSummation) {
+ // Confirm that we're not using DoubleDoubleSummation for $add expression with a set of double
+ // values from mongo/util/summation_test.cpp.
+ std::vector<ImplicitValue> doubleValues = {
+ 1.4831356930199802e-05, -3.121724665346865, 3041897608700.073,
+ 1001318343149.7166, -1714.6229586696593, 1731390114894580.8,
+ 6.256645803154374e-08, -107144114533844.25, -0.08839485091750919,
+ -265119153.02185738, -0.02450615965231944, 0.0002684331017079073,
+ 32079040427.68358, -0.04733295911845742, 0.061381859083076085,
+ -25329.59126796951, -0.0009567520620034965, -1553879364344.9932,
+ -2.1101077525869814e-08, -298421079729.5547, 0.03182394834273594,
+ 22.201944843278916, -33.35667991109125, 11496013.960449915,
+ -40652595.33210472, 3.8496066090328163, 2.5074042398147304e-08,
+ -0.02208724071782122, -134211.37290639878, 0.17640433666616578,
+ 4.463787499171126, 9.959669945399718, 129265976.35224283,
+ 1.5865526187526546e-07, -4746011.710555799, -712048598925.0789,
+ 582214206210.4034, 0.025236204812875362, 530078170.91147506,
+ -14.865307666195053, 1.6727994895185032e-05, -113386276.03121366,
+ -6.135827207137054, 10644945799901.145, -100848907797.1582,
+ 2.2404406961625282e-08, 1.315662618424494e-09, -0.832190208349044,
+ -9.779323414999364, -546522170658.2997};
+ double straightSum = 0.0;
+ DoubleDoubleSummation compensatedSum;
+ for (auto x : doubleValues) {
+ compensatedSum.addDouble(x.getDouble());
+ straightSum += x.getDouble();
+ }
+ ASSERT_NE(straightSum, compensatedSum.getDouble());
+
+ Value result = evaluateExpression("$add", doubleValues);
+ ASSERT_VALUE_EQ(result, Value(straightSum));
+ ASSERT_VALUE_NE(result, Value(compensatedSum.getDouble()));
+}
+TEST(ExpressionFLETest, BadInputs) {
+
+ auto expCtx = ExpressionContextForTest();
+ auto vps = expCtx.variablesParseState;
+ {
+ auto expr = fromjson("{$_internalFleEq: 12}");
+ ASSERT_THROWS_CODE(ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps),
+ DBException,
+ 10065);
+ }
+}
+
+// Test we return true if it matches
+TEST(ExpressionFLETest, TestBinData) {
+ auto expCtx = ExpressionContextForTest();
+ auto vps = expCtx.variablesParseState;
+
+ {
+ auto expr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$binary": {
+ "base64":
+ "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+ "subType": "6"
+ }
+ },
+ server: {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "3"
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ } } })");
+ auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+ ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(true));
+ }
+
+ // Negative: Use wrong server token
+ {
+ auto expr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$binary": {
+ "base64":
+ "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+ "subType": "6"
+ }
+ },
+ server: {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "3"
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSMQID7SFWYAUI3ZKSFKATKRYDQFNXXEOGAD5D4RSG",
+ "subType": "6"
+ }
+ } } })");
+ auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+ ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(false));
+ }
+
+ // Negative: Use wrong edc token
+ {
+ auto expr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$binary": {
+ "base64":
+ "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+ "subType": "6"
+ }
+ },
+ server: {
+ "$binary": {
+ "base64": "COUAC/ERLYAKKX6B0VZ1R3QODOQFFJQJD+XLGIPU4/PS",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "3"
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ } } })");
+ auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+ ASSERT_THROWS_CODE(
+ exprFle->evaluate({}, &expCtx.variables), DBException, ErrorCodes::Overflow);
+ }
+}
+
+TEST(ExpressionFLETest, TestBinData_ContentionFactor) {
+ auto expCtx = ExpressionContextForTest();
+ auto vps = expCtx.variablesParseState;
+
+ // Use the wrong contention factor - 0
+ {
+ auto expr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$binary": {
+ "base64":
+ "BxI0VngSNJh2EjQSNFZ4kBIQ5+Wa5+SZafJeRUDGdLNx+i2ADDkyV2qA90Xcve7FqltoDm1PllSSgUS4fYtw3XDjzoNZrFFg8LfG2wH0HYbLMswv681KJpmEw7+RXy4CcPVFgoRFt24N13p7jT+pqu2oQAHAoxYTy/TsiAyY4RnAMiXYGg3hWz4AO/WxHNSyq6B6kX5d7x/hrXvppsZDc2Pmhd+c5xmovlv5RPj7wnNld13kYcMluztjNswiCH05hM/kp2/P7kw30iVnbz0SZxn1FjjCug==",
+ "subType": "6"
+ }
+ },
+ server: {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "0"
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ } } })");
+ auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+ ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(false));
+ }
+
+ // Use the right contention factor - 50
+ {
+ auto expr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$binary": {
+ "base64":
+"BxI0VngSNJh2EjQSNFZ4kBIQ5+Wa5+SZafJeRUDGdLNx+i2ADDkyV2qA90Xcve7FqltoDm1PllSSgUS4fYtw3XDjzoNZrFFg8LfG2wH0HYbLMswv681KJpmEw7+RXy4CcPVFgoRFt24N13p7jT+pqu2oQAHAoxYTy/TsiAyY4RnAMiXYGg3hWz4AO/WxHNSyq6B6kX5d7x/hrXvppsZDc2Pmhd+c5xmovlv5RPj7wnNld13kYcMluztjNswiCH05hM/kp2/P7kw30iVnbz0SZxn1FjjCug==",
+ "subType": "6"
+ }
+ },
+ server: {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "50"
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ } } })");
+ auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+ ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(true));
+ }
+}
+
+TEST(ExpressionFLETest, TestBinData_RoundTrip) {
+ auto expCtx = ExpressionContextForTest();
+ auto vps = expCtx.variablesParseState;
+
+ auto expr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$binary": {
+ "base64":
+ "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+ "subType": "6"
+ }
+ },
+ server: {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "3"
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ } } })");
+ auto exprFle = ExpressionInternalFLEEqual::parse(&expCtx, expr.firstElement(), vps);
+
+ ASSERT_VALUE_EQ(exprFle->evaluate({}, &expCtx.variables), Value(true));
+
+ // Verify it round trips
+ auto value = exprFle->serialize(false);
+
+ auto roundTripExpr = fromjson(R"({$_internalFleEq: {
+ field: {
+ "$const" : { "$binary": {
+ "base64":
+ "BxI0VngSNJh2EjQSNFZ4kBIQ0JE8aMUFkPk5sSTVqfdNNfjqUfQQ1Uoj0BBcthrWoe9wyU3cN6zmWaQBPJ97t0ZPbecnMsU736yXre6cBO4Zdt/wThtY+v5+7vFgNnWpgRP0e+vam6QPmLvbBrO0LdsvAPTGW4yqwnzCIXCoEg7QPGfbfAXKPDTNenBfRlawiblmTOhO/6ljKotWsMp22q/rpHrn9IEIeJmecwuuPIJ7EA+XYQ3hOKVccYf2ogoK73+8xD/Vul83Qvr84Q8afc4QUMVs8A==",
+ "subType": "6"
+ }}
+ },
+ edc: {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ },
+ counter: {
+ "$numberLong": "3"
+ },
+ server: {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ } })");
+
+
+ ASSERT_BSONOBJ_EQ(value.getDocument().toBson(), roundTripExpr);
+}
+
} // namespace ExpressionTests
diff --git a/src/mongo/db/pipeline/expression_visitor.h b/src/mongo/db/pipeline/expression_visitor.h
index 46ad3ee6295..6b7c4fc4cdd 100644
--- a/src/mongo/db/pipeline/expression_visitor.h
+++ b/src/mongo/db/pipeline/expression_visitor.h
@@ -153,6 +153,7 @@ class ExpressionHyperbolicSine;
class ExpressionInternalFindSlice;
class ExpressionInternalFindPositional;
class ExpressionInternalFindElemMatch;
+class ExpressionInternalFLEEqual;
class ExpressionInternalJsEmit;
class ExpressionFunction;
class ExpressionDegreesToRadians;
@@ -245,6 +246,7 @@ public:
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionLn>) = 0;
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionLog>) = 0;
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionLog10>) = 0;
+ virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionInternalFLEEqual>) = 0;
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionMap>) = 0;
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionMeta>) = 0;
virtual void visit(expression_walker::MaybeConstPtr<IsConst, ExpressionMod>) = 0;
@@ -424,6 +426,7 @@ struct SelectiveConstExpressionVisitorBase : public ExpressionConstVisitor {
void visit(const ExpressionLn*) override {}
void visit(const ExpressionLog*) override {}
void visit(const ExpressionLog10*) override {}
+ void visit(const ExpressionInternalFLEEqual*) override {}
void visit(const ExpressionMap*) override {}
void visit(const ExpressionMeta*) override {}
void visit(const ExpressionMod*) override {}
diff --git a/src/mongo/db/pipeline/pipeline.h b/src/mongo/db/pipeline/pipeline.h
index 96cf6426be3..976b344e4f9 100644
--- a/src/mongo/db/pipeline/pipeline.h
+++ b/src/mongo/db/pipeline/pipeline.h
@@ -291,11 +291,6 @@ public:
static std::vector<Value> serializeContainer(
const SourceContainer& container, boost::optional<ExplainOptions::Verbosity> = boost::none);
- /**
- * Serializes the pipeline into BSON for explain/debug logging purposes.
- */
- std::vector<BSONObj> serializeToBSONForDebug() const;
-
// The initial source is special since it varies between mongos and mongod.
void addInitialSource(boost::intrusive_ptr<DocumentSource> source);
diff --git a/src/mongo/db/pipeline/pipeline_d.cpp b/src/mongo/db/pipeline/pipeline_d.cpp
index 3169859560f..adbca1bf973 100644
--- a/src/mongo/db/pipeline/pipeline_d.cpp
+++ b/src/mongo/db/pipeline/pipeline_d.cpp
@@ -34,6 +34,7 @@
#include "mongo/db/pipeline/pipeline_d.h"
#include "mongo/base/exact_cast.h"
+#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/database.h"
@@ -84,9 +85,11 @@
#include "mongo/db/query/query_feature_flags_gen.h"
#include "mongo/db/query/query_knobs_gen.h"
#include "mongo/db/query/query_planner.h"
+#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/sort_pattern.h"
#include "mongo/db/query/stage_types.h"
#include "mongo/db/s/collection_sharding_state.h"
+#include "mongo/db/server_options.h"
#include "mongo/db/service_context.h"
#include "mongo/db/stats/top.h"
#include "mongo/db/storage/record_store.h"
@@ -115,13 +118,11 @@ namespace {
* Group stages are extracted from the pipeline when all of the following conditions are met:
* - When the 'internalQueryForceClassicEngine' feature flag is 'false'.
* - When the 'internalQuerySlotBasedExecutionDisableGroupPushdown' query knob is 'false'.
- * - When the 'featureFlagSBEGroupPushdown' feature flag is 'true'.
* - When the DocumentSourceGroup has 'doingMerge=false'.
*
* Lookup stages are extracted from the pipeline when all of the following conditions are met:
* - When the 'internalQueryForceClassicEngine' feature flag is 'false'.
* - When the 'internalQuerySlotBasedExecutionDisableLookupPushdown' query knob is 'false'.
- * - When the 'featureFlagSBELookupPushdown' feature flag is 'true'.
* - The $lookup uses only the 'localField'/'foreignField' syntax (no pipelines).
* - The foreign collection is neither sharded nor a view.
*/
@@ -146,12 +147,6 @@ std::vector<std::unique_ptr<InnerPipelineStageInterface>> extractSbeCompatibleSt
auto&& sources = pipeline->getSources();
- const auto disallowGroupPushdown =
- !(serverGlobalParams.featureCompatibility.isVersionInitialized() &&
- feature_flags::gFeatureFlagSBEGroupPushdown.isEnabled(
- serverGlobalParams.featureCompatibility)) ||
- internalQuerySlotBasedExecutionDisableGroupPushdown.load();
-
bool isMainCollectionSharded = false;
if (const auto& mainColl = collections.getMainCollection()) {
isMainCollectionSharded = mainColl.isSharded();
@@ -165,7 +160,6 @@ std::vector<std::unique_ptr<InnerPipelineStageInterface>> extractSbeCompatibleSt
// sharded and which ones aren't. As such, if any secondary collection is a view or is sharded,
// no $lookup will be eligible for pushdown.
const bool disallowLookupPushdown =
- !feature_flags::gFeatureFlagSBELookupPushdown.isEnabledAndIgnoreFCV() ||
internalQuerySlotBasedExecutionDisableLookupPushdown.load() || isMainCollectionSharded ||
collections.isAnySecondaryNamespaceAViewOrSharded();
@@ -175,7 +169,7 @@ std::vector<std::unique_ptr<InnerPipelineStageInterface>> extractSbeCompatibleSt
// $group pushdown logic.
if (auto groupStage = dynamic_cast<DocumentSourceGroup*>(itr->get())) {
- if (disallowGroupPushdown) {
+ if (internalQuerySlotBasedExecutionDisableGroupPushdown.load()) {
break;
}
@@ -244,7 +238,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe
SkipThenLimit skipThenLimit,
boost::optional<std::string> groupIdForDistinctScan,
const AggregateCommandRequest* aggRequest,
- const size_t plannerOpts,
+ const QueryPlannerParams& plannerOpts,
const MatchExpressionParser::AllowedFeatureSet& matcherFeatures,
Pipeline* pipeline) {
auto findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -313,7 +307,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> attemptToGetExe
// index would produce one result for '1' and another for '2', which would be incorrect.
auto distinctExecutor =
getExecutorDistinct(&collections.getMainCollection(),
- plannerOpts | QueryPlannerParams::STRICT_DISTINCT_ONLY,
+ plannerOpts.options | QueryPlannerParams::STRICT_DISTINCT_ONLY,
&parsedDistinct);
if (!distinctExecutor.isOK()) {
return distinctExecutor.getStatus().withContext(
@@ -1111,6 +1105,41 @@ bool PipelineD::sortAndKeyPatternPartAgreeAndOnMeta(const BucketUnpacker& bucket
return (keyPatternFieldPath.tail() == sortFieldPath.tail());
}
+boost::optional<TraversalPreference> createTimeSeriesTraversalPreference(
+ DocumentSourceInternalUnpackBucket* unpack, DocumentSourceSort* sort) {
+ const auto metaField = unpack->bucketUnpacker().getMetaField();
+ BSONObjBuilder builder;
+ // Reverse the sort pattern so we can look for indexes that match.
+ for (const auto& sortPart : sort->getSortKeyPattern()) {
+ if (!sortPart.fieldPath) {
+ return boost::none;
+ }
+ const int reversedDirection = sortPart.isAscending ? -1 : 1;
+ const auto& path = sortPart.fieldPath->fullPath();
+ if (metaField.has_value() &&
+ (expression::isPathPrefixOf(*metaField, path) || *metaField == path)) {
+ std::string rewrittenField =
+ std::string{timeseries::kBucketMetaFieldName} + path.substr(metaField->size());
+ builder.append(rewrittenField, reversedDirection);
+ } else if (path == unpack->bucketUnpacker().getTimeField()) {
+ if (reversedDirection == 1) {
+ builder.append(unpack->bucketUnpacker().getMinField(path), reversedDirection);
+ } else {
+ builder.append(unpack->bucketUnpacker().getMaxField(path), reversedDirection);
+ }
+ } else {
+ // The field wasn't meta or time, so no direction preference should be made.
+ return boost::none;
+ }
+ }
+
+ TraversalPreference traversalPreference;
+ traversalPreference.sortPattern = builder.obj();
+ traversalPreference.clusterField = unpack->getMinTimeField();
+ traversalPreference.direction = -1;
+ return traversalPreference;
+}
+
std::pair<PipelineD::AttachExecutorCallback, std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>>
PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& collections,
const NamespaceString& nss,
@@ -1166,6 +1195,19 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
? DepsTracker::kDefaultUnavailableMetadata & ~DepsTracker::kOnlyTextScore
: DepsTracker::kDefaultUnavailableMetadata;
+ // If this is a query on a time-series collection then it may be eligible for a post-planning
+ // sort optimization. We check eligibility and perform the rewrite here.
+ auto [unpack, sort] = findUnpackThenSort(pipeline->_sources);
+ QueryPlannerParams plannerOpts;
+ if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
+ serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
+ multiversion::FeatureCompatibilityVersion::kVersion_6_0) &&
+ feature_flags::gFeatureFlagBucketUnpackWithSort.isEnabled(
+ serverGlobalParams.featureCompatibility) &&
+ unpack && sort) {
+ plannerOpts.traversalPreference = createTimeSeriesTraversalPreference(unpack, sort);
+ }
+
// Create the PlanExecutor.
bool shouldProduceEmptyDocs = false;
auto exec = uassertStatusOK(prepareExecutor(expCtx,
@@ -1179,11 +1221,11 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
skipThenLimit,
aggRequest,
Pipeline::kAllowedMatcherFeatures,
- &shouldProduceEmptyDocs));
+ &shouldProduceEmptyDocs,
+ std::move(plannerOpts)));
// If this is a query on a time-series collection then it may be eligible for a post-planning
// sort optimization. We check eligibility and perform the rewrite here.
- auto [unpack, sort] = findUnpackThenSort(pipeline->_sources);
if (serverGlobalParams.featureCompatibility.isVersionInitialized() &&
serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
multiversion::FeatureCompatibilityVersion::kVersion_6_0) &&
@@ -1192,7 +1234,6 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
unpack && sort) {
auto execImpl = dynamic_cast<PlanExecutorImpl*>(exec.get());
if (execImpl) {
-
// Get source stage
PlanStage* rootStage = execImpl->getRootStage();
while (rootStage &&
@@ -1205,28 +1246,27 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
case STAGE_SHARDING_FILTER:
rootStage = rootStage->child().get();
break;
- case STAGE_MULTI_PLAN:
- if (auto mps = static_cast<MultiPlanStage*>(rootStage)) {
- if (mps->bestPlanChosen() && mps->bestPlanIdx()) {
- rootStage = (mps->getChildren())[*(mps->bestPlanIdx())].get();
- } else {
- rootStage = nullptr;
- tasserted(6655801,
- "Expected multiplanner to have selected a bestPlan.");
- }
+ case STAGE_MULTI_PLAN: {
+ auto mps = static_cast<MultiPlanStage*>(rootStage);
+ if (mps->bestPlanChosen() && mps->bestPlanIdx()) {
+ rootStage = (mps->getChildren())[*(mps->bestPlanIdx())].get();
+ } else {
+ rootStage = nullptr;
+ tasserted(6655801,
+ "Expected multiplanner to have selected a bestPlan.");
}
break;
- case STAGE_CACHED_PLAN:
- if (auto cp = static_cast<CachedPlanStage*>(rootStage)) {
- if (cp->bestPlanChosen()) {
- rootStage = rootStage->child().get();
- } else {
- rootStage = nullptr;
- tasserted(6655802,
- "Expected cached plan to have selected a bestPlan.");
- }
+ }
+ case STAGE_CACHED_PLAN: {
+ auto cp = static_cast<CachedPlanStage*>(rootStage);
+ if (cp->bestPlanChosen()) {
+ rootStage = rootStage->child().get();
+ } else {
+ rootStage = nullptr;
+ tasserted(6655802, "Expected cached plan to have selected a bestPlan.");
}
break;
+ }
default:
rootStage = nullptr;
}
@@ -1358,8 +1398,9 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
sort->getSortKeyPattern(),
(indexOrderedByMinTime ? DocumentSourceSort::kMin
: DocumentSourceSort::kMax),
- ((indexOrderedByMinTime) ? unpack->getBucketMaxSpanSeconds()
- : -unpack->getBucketMaxSpanSeconds()) *
+ static_cast<long long>((indexOrderedByMinTime)
+ ? unpack->getBucketMaxSpanSeconds()
+ : -unpack->getBucketMaxSpanSeconds()) *
1000,
sort->getLimit(),
expCtx));
@@ -1399,7 +1440,9 @@ PipelineD::buildInnerQueryExecutorGeneric(const MultipleCollectionAccessor& coll
// This produces {$const: maxBucketSpanSeconds}
make_intrusive<ExpressionConstant>(
expCtx.get(),
- Value{unpack->getBucketMaxSpanSeconds() * 1000}))),
+ Value{static_cast<long long>(
+ unpack->getBucketMaxSpanSeconds()) *
+ 1000}))),
expCtx);
pipeline->_sources.insert(
unpackIter,
@@ -1513,24 +1556,22 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
SkipThenLimit skipThenLimit,
const AggregateCommandRequest* aggRequest,
const MatchExpressionParser::AllowedFeatureSet& matcherFeatures,
- bool* hasNoRequirements) {
+ bool* hasNoRequirements,
+ QueryPlannerParams plannerOpts) {
invariant(hasNoRequirements);
- // Any data returned from the inner executor must be owned.
- size_t plannerOpts = QueryPlannerParams::DEFAULT;
-
bool isChangeStream =
pipeline->peekFront() && pipeline->peekFront()->constraints().isChangeStreamStage();
if (isChangeStream) {
invariant(expCtx->tailableMode == TailableModeEnum::kTailableAndAwaitData);
- plannerOpts |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
- QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
+ plannerOpts.options |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
+ QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
}
// The $_requestReshardingResumeToken parameter is only valid for an oplog scan.
if (aggRequest && aggRequest->getRequestReshardingResumeToken()) {
- plannerOpts |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
- QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
+ plannerOpts.options |= (QueryPlannerParams::TRACK_LATEST_OPLOG_TS |
+ QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG);
}
// If there is a sort stage eligible for pushdown, serialize its SortPattern to a BSONObj. The
@@ -1570,7 +1611,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
if (*hasNoRequirements) {
// This query might be eligible for count optimizations, since the remaining stages in the
// pipeline don't actually need to read any data produced by the query execution layer.
- plannerOpts |= QueryPlannerParams::IS_COUNT;
+ plannerOpts.options |= QueryPlannerParams::IS_COUNT;
} else {
// Build a BSONObj representing a projection eligible for pushdown. If there is an inclusion
// projection at the front of the pipeline, it will be removed and handled by the PlanStage
@@ -1588,7 +1629,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> PipelineD::prep
// top-k sort, which both sorts and limits.)
bool allowExpressions = !sortStage && !skipThenLimit.getSkip() && !skipThenLimit.getLimit();
projObj = buildProjectionForPushdown(deps, pipeline, allowExpressions);
- plannerOpts |= QueryPlannerParams::RETURN_OWNED_DATA;
+ plannerOpts.options |= QueryPlannerParams::RETURN_OWNED_DATA;
}
if (rewrittenGroupStage) {
diff --git a/src/mongo/db/pipeline/pipeline_d.h b/src/mongo/db/pipeline/pipeline_d.h
index cd40bc33b8b..c109e75b1b8 100644
--- a/src/mongo/db/pipeline/pipeline_d.h
+++ b/src/mongo/db/pipeline/pipeline_d.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/db/exec/bucket_unpacker.h"
+#include "mongo/db/query/query_planner_params.h"
#include <boost/intrusive_ptr.hpp>
#include <memory>
@@ -44,6 +45,7 @@
#include "mongo/db/query/collation/collator_factory_interface.h"
#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/query_planner.h"
namespace mongo {
class Collection;
@@ -202,7 +204,8 @@ private:
SkipThenLimit skipThenLimit,
const AggregateCommandRequest* aggRequest,
const MatchExpressionParser::AllowedFeatureSet& matcherFeatures,
- bool* hasNoRequirements);
+ bool* hasNoRequirements,
+ QueryPlannerParams plannerOpts = QueryPlannerParams{});
/**
* Build a PlanExecutor and prepare a callback to create a special DocumentSourceGeoNearCursor
diff --git a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp
index b1d0090aa56..2fecb18bebe 100644
--- a/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp
+++ b/src/mongo/db/pipeline/process_interface/common_mongod_process_interface.cpp
@@ -365,12 +365,6 @@ Status CommonMongodProcessInterface::appendQueryExecStats(OperationContext* opCt
const NamespaceString& nss,
BSONObjBuilder* builder) const {
AutoGetCollectionForReadCommand collection(opCtx, nss);
-
- if (!collection.getDb()) {
- return {ErrorCodes::NamespaceNotFound,
- str::stream() << "Database [" << nss.db().toString() << "] not found."};
- }
-
if (!collection) {
return {ErrorCodes::NamespaceNotFound,
str::stream() << "Collection [" << nss.toString() << "] not found."};
@@ -398,9 +392,6 @@ BSONObj CommonMongodProcessInterface::getCollectionOptionsLocally(OperationConte
const NamespaceString& nss) {
AutoGetCollectionForReadCommand collection(opCtx, nss);
BSONObj collectionOptions = {};
- if (!collection.getDb()) {
- return collectionOptions;
- }
if (!collection) {
return collectionOptions;
}
@@ -436,14 +427,8 @@ CommonMongodProcessInterface::attachCursorSourceToPipelineForLocalRead(Pipeline*
// Reparse 'pipeline' to discover whether there are secondary namespaces that we need to lock
// when constructing our query executor.
- std::vector<NamespaceStringOrUUID> secondaryNamespaces = [&]() {
- if (feature_flags::gFeatureFlagSBELookupPushdown.isEnabledAndIgnoreFCV()) {
- auto lpp = LiteParsedPipeline(expCtx->ns, pipeline->serializeToBson());
- return lpp.getForeignExecutionNamespaces();
- } else {
- return std::vector<NamespaceStringOrUUID>{};
- }
- }();
+ auto lpp = LiteParsedPipeline(expCtx->ns, pipeline->serializeToBson());
+ std::vector<NamespaceStringOrUUID> secondaryNamespaces = lpp.getForeignExecutionNamespaces();
autoColl.emplace(expCtx->opCtx,
nsOrUUID,
@@ -574,7 +559,8 @@ std::vector<BSONObj> CommonMongodProcessInterface::getMatchingPlanCacheEntryStat
collVersion = collQueryInfo.getPlanCacheInvalidatorVersion()](
const sbe::PlanCacheKey& key) {
// Only fetch plan cache entries with keys matching given UUID and collectionVersion.
- return uuid == key.getCollectionUuid() && collVersion == key.getCollectionVersion();
+ return uuid == key.getMainCollectionState().uuid &&
+ collVersion == key.getMainCollectionState().version;
};
auto planCacheEntriesSBE =
@@ -883,8 +869,7 @@ boost::optional<Document> CommonMongodProcessInterface::lookupSingleDocumentLoca
const Document& documentKey) {
AutoGetCollectionForRead autoColl(expCtx->opCtx, nss);
BSONObj document;
- if (!Helpers::findById(
- expCtx->opCtx, autoColl.getDb(), nss.ns(), documentKey.toBson(), document)) {
+ if (!Helpers::findById(expCtx->opCtx, nss.ns(), documentKey.toBson(), document)) {
return boost::none;
}
return Document(document).getOwned();
diff --git a/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp b/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp
index 83b35a0c9fc..30de3fbfb1f 100644
--- a/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp
+++ b/src/mongo/db/pipeline/process_interface/mongos_process_interface_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/pipeline/aggregation_context_fixture.h"
#include "mongo/db/pipeline/process_interface/mongos_process_interface.h"
#include "mongo/unittest/unittest.h"
@@ -63,7 +61,7 @@ public:
TEST_F(MongosProcessInterfaceTest, FailsToEnsureFieldsUniqueIfTargetCollectionVersionIsSpecified) {
auto expCtx = getExpCtx();
auto targetCollectionVersion =
- boost::make_optional(ChunkVersion(0, 0, OID::gen(), Timestamp(1, 1)));
+ boost::make_optional(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0}));
auto processInterface = makeProcessInterface();
ASSERT_THROWS_CODE(processInterface->ensureFieldsUniqueOrResolveDocumentKey(
diff --git a/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp b/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp
index a0e9bd5e572..a8ca2a48896 100644
--- a/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp
+++ b/src/mongo/db/pipeline/process_interface/standalone_process_interface_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/pipeline/aggregation_context_fixture.h"
#include "mongo/db/pipeline/process_interface/standalone_process_interface.h"
#include "mongo/unittest/unittest.h"
@@ -67,7 +65,7 @@ TEST_F(ProcessInterfaceStandaloneTest,
FailsToEnsureFieldsUniqueIfTargetCollectionVersionIsSpecifiedOnMongos) {
auto expCtx = getExpCtx();
auto targetCollectionVersion =
- boost::make_optional(ChunkVersion(0, 0, OID::gen(), Timestamp(1, 1)));
+ boost::make_optional(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0}));
auto processInterface = makeProcessInterface();
// Test that 'targetCollectionVersion' is not accepted if not from mongos.
@@ -90,7 +88,7 @@ TEST_F(ProcessInterfaceStandaloneTest,
TEST_F(ProcessInterfaceStandaloneTest, FailsToEnsureFieldsUniqueIfJoinFieldsAreNotSentFromMongos) {
auto expCtx = getExpCtx();
auto targetCollectionVersion =
- boost::make_optional(ChunkVersion(0, 0, OID::gen(), Timestamp(1, 1)));
+ boost::make_optional(ChunkVersion({OID::gen(), Timestamp(1, 1)}, {0, 0}));
auto processInterface = makeProcessInterface();
expCtx->fromMongos = true;
diff --git a/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp b/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp
index bc17e9d0133..2df79a991d4 100644
--- a/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp
+++ b/src/mongo/db/pipeline/resharding_initial_split_policy_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/catalog/collection_catalog.h"
#include "mongo/db/pipeline/document_source_mock.h"
#include "mongo/db/pipeline/sharded_agg_helpers.h"
@@ -39,7 +36,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
diff --git a/src/mongo/db/pipeline/sharded_union_test.cpp b/src/mongo/db/pipeline/sharded_union_test.cpp
index 79863fc7f14..a8d15b8dbbe 100644
--- a/src/mongo/db/pipeline/sharded_union_test.cpp
+++ b/src/mongo/db/pipeline/sharded_union_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/exec/document_value/document_value_test_util.h"
#include "mongo/db/pipeline/document_source_group.h"
#include "mongo/db/pipeline/document_source_match.h"
@@ -163,10 +161,12 @@ TEST_F(ShardedUnionTest, RetriesSubPipelineOnStaleConfigError) {
onCommand([&](const executor::RemoteCommandRequest& request) {
OID epoch{OID::gen()};
Timestamp timestamp{1, 0};
- return createErrorCursorResponse(Status{
- StaleConfigInfo(
- kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
- "Mock error: shard version mismatch"});
+ return createErrorCursorResponse(
+ Status{StaleConfigInfo(kTestAggregateNss,
+ ChunkVersion({epoch, timestamp}, {1, 0}),
+ boost::none,
+ ShardId{"0"}),
+ "Mock error: shard version mismatch"});
});
// Mock the expected config server queries.
@@ -175,7 +175,7 @@ TEST_F(ShardedUnionTest, RetriesSubPipelineOnStaleConfigError) {
const Timestamp timestamp(1, 1);
const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
- ChunkVersion version(1, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {1, 0});
ChunkType chunk1(cm.getUUID(),
{shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
@@ -246,10 +246,12 @@ TEST_F(ShardedUnionTest, CorrectlySplitsSubPipelineIfRefreshedDistributionRequir
OID epoch{OID::gen()};
Timestamp timestamp{1, 0};
- return createErrorCursorResponse(Status{
- StaleConfigInfo(
- kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
- "Mock error: shard version mismatch"});
+ return createErrorCursorResponse(
+ Status{StaleConfigInfo(kTestAggregateNss,
+ ChunkVersion({epoch, timestamp}, {1, 0}),
+ boost::none,
+ ShardId{"0"}),
+ "Mock error: shard version mismatch"});
});
// Mock the expected config server queries. Update the distribution as if a chunk [0, 10] was
@@ -259,7 +261,7 @@ TEST_F(ShardedUnionTest, CorrectlySplitsSubPipelineIfRefreshedDistributionRequir
const Timestamp timestamp(1, 1);
const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
- ChunkVersion version(1, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {1, 0});
ChunkType chunk1(cm.getUUID(),
{shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
@@ -337,23 +339,27 @@ TEST_F(ShardedUnionTest, AvoidsSplittingSubPipelineIfRefreshedDistributionDoesNo
Timestamp timestamp{1, 1};
onCommand([&](const executor::RemoteCommandRequest& request) {
- return createErrorCursorResponse(Status{
- StaleConfigInfo(
- kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
- "Mock error: shard version mismatch"});
+ return createErrorCursorResponse(
+ Status{StaleConfigInfo(kTestAggregateNss,
+ ChunkVersion({epoch, timestamp}, {1, 0}),
+ boost::none,
+ ShardId{"0"}),
+ "Mock error: shard version mismatch"});
});
onCommand([&](const executor::RemoteCommandRequest& request) {
- return createErrorCursorResponse(Status{
- StaleConfigInfo(
- kTestAggregateNss, ChunkVersion(1, 0, epoch, timestamp), boost::none, ShardId{"0"}),
- "Mock error: shard version mismatch"});
+ return createErrorCursorResponse(
+ Status{StaleConfigInfo(kTestAggregateNss,
+ ChunkVersion({epoch, timestamp}, {1, 0}),
+ boost::none,
+ ShardId{"0"}),
+ "Mock error: shard version mismatch"});
});
// Mock the expected config server queries. Update the distribution so that all chunks are on
// the same shard.
const UUID uuid = UUID::gen();
const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
- ChunkVersion version(1, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {1, 0});
ChunkType chunk1(
cm.getUUID(),
{shardKeyPattern.getKeyPattern().globalMin(), shardKeyPattern.getKeyPattern().globalMax()},
@@ -412,7 +418,7 @@ TEST_F(ShardedUnionTest, IncorporatesViewDefinitionAndRetriesWhenViewErrorReceiv
const ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
const Timestamp timestamp(1, 1);
- ChunkVersion version(1, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {1, 0});
ChunkType chunk1(cm.getUUID(),
{shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << 0)},
diff --git a/src/mongo/db/pipeline/visitors/document_source_visitor.h b/src/mongo/db/pipeline/visitors/document_source_visitor.h
index a0158147e38..1db827e9dfb 100644
--- a/src/mongo/db/pipeline/visitors/document_source_visitor.h
+++ b/src/mongo/db/pipeline/visitors/document_source_visitor.h
@@ -47,6 +47,7 @@ class DocumentSourceIndexStats;
class DocumentSourceInternalInhibitOptimization;
class DocumentSourceInternalShardFilter;
class DocumentSourceInternalSplitPipeline;
+class DocumentSourceInternalUnpackBucket;
class DocumentSourceLimit;
class DocumentSourceListCachedAndActiveUsers;
class DocumentSourceListLocalSessions;
@@ -98,6 +99,8 @@ public:
tree_walker::MaybeConstPtr<IsConst, DocumentSourceInternalShardFilter> source) = 0;
virtual void visit(
tree_walker::MaybeConstPtr<IsConst, DocumentSourceInternalSplitPipeline> source) = 0;
+ virtual void visit(
+ tree_walker::MaybeConstPtr<IsConst, DocumentSourceInternalUnpackBucket> source) = 0;
virtual void visit(tree_walker::MaybeConstPtr<IsConst, DocumentSourceLimit> source) = 0;
virtual void visit(
tree_walker::MaybeConstPtr<IsConst, DocumentSourceListCachedAndActiveUsers> source) = 0;
diff --git a/src/mongo/db/pipeline/visitors/document_source_walker.cpp b/src/mongo/db/pipeline/visitors/document_source_walker.cpp
index b0ea004cae9..0fb3dba9967 100644
--- a/src/mongo/db/pipeline/visitors/document_source_walker.cpp
+++ b/src/mongo/db/pipeline/visitors/document_source_walker.cpp
@@ -44,6 +44,7 @@
#include "mongo/db/pipeline/document_source_internal_inhibit_optimization.h"
#include "mongo/db/pipeline/document_source_internal_shard_filter.h"
#include "mongo/db/pipeline/document_source_internal_split_pipeline.h"
+#include "mongo/db/pipeline/document_source_internal_unpack_bucket.h"
#include "mongo/db/pipeline/document_source_limit.h"
#include "mongo/db/pipeline/document_source_list_cached_and_active_users.h"
#include "mongo/db/pipeline/document_source_list_local_sessions.h"
@@ -108,6 +109,7 @@ void DocumentSourceWalker::walk(const Pipeline& pipeline) {
visitHelper<DocumentSourceInternalInhibitOptimization>(ds) ||
visitHelper<DocumentSourceInternalShardFilter>(ds) ||
visitHelper<DocumentSourceInternalSplitPipeline>(ds) ||
+ visitHelper<DocumentSourceInternalUnpackBucket>(ds) ||
visitHelper<DocumentSourceLimit>(ds) ||
visitHelper<DocumentSourceListCachedAndActiveUsers>(ds) ||
visitHelper<DocumentSourceListLocalSessions>(ds) ||
diff --git a/src/mongo/db/process_health/config_server_health_observer.cpp b/src/mongo/db/process_health/config_server_health_observer.cpp
index 5e8a9ada390..bf011d28472 100644
--- a/src/mongo/db/process_health/config_server_health_observer.cpp
+++ b/src/mongo/db/process_health/config_server_health_observer.cpp
@@ -87,7 +87,7 @@ public:
* previous one is filled, thus synchronization can be relaxed.
*/
Future<HealthCheckStatus> periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept override;
+ PeriodicHealthCheckContext&& periodicCheckContext) override;
private:
// Collects the results of one check.
@@ -146,7 +146,7 @@ ConfigServerHealthObserver::ConfigServerHealthObserver(ServiceContext* svcCtx)
: HealthObserverBase(svcCtx) {}
Future<HealthCheckStatus> ConfigServerHealthObserver::periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept {
+ PeriodicHealthCheckContext&& periodicCheckContext) {
// The chain is not capturing 'this' for the case the network call outlives the observer.
return _checkImpl(std::move(periodicCheckContext))
.then([type = getType()](CheckResult result) mutable -> Future<HealthCheckStatus> {
diff --git a/src/mongo/db/process_health/dns_health_observer.cpp b/src/mongo/db/process_health/dns_health_observer.cpp
index 6f41e5e2785..ff6611d10ac 100644
--- a/src/mongo/db/process_health/dns_health_observer.cpp
+++ b/src/mongo/db/process_health/dns_health_observer.cpp
@@ -47,9 +47,10 @@ namespace process_health {
MONGO_FAIL_POINT_DEFINE(dnsHealthObserverFp);
Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept {
+ PeriodicHealthCheckContext&& periodicCheckContext) {
LOGV2_DEBUG(5938401, 2, "DNS health observer executing");
+
auto makeFailedHealthCheckFuture = [this](const Status& status) {
return Future<HealthCheckStatus>::makeReady(
makeSimpleFailedStatus(Severity::kFailure, {status}));
@@ -101,13 +102,17 @@ Future<HealthCheckStatus> DnsHealthObserver::periodicCheckImpl(
auto status = periodicCheckContext.taskExecutor->scheduleWork(
[this, servers, promise = std::move(completionPf.promise)](
const executor::TaskExecutor::CallbackArgs& cbArgs) mutable {
- auto statusWith =
- getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward);
- if (statusWith.isOK() && !statusWith.getValue().empty()) {
- promise.emplaceValue(makeHealthyStatus());
- } else {
- promise.emplaceValue(
- makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()}));
+ try {
+ auto statusWith =
+ getHostFQDNs(servers.front().host(), HostnameCanonicalizationMode::kForward);
+ if (statusWith.isOK() && !statusWith.getValue().empty()) {
+ promise.emplaceValue(makeHealthyStatus());
+ } else {
+ promise.emplaceValue(
+ makeSimpleFailedStatus(Severity::kFailure, {statusWith.getStatus()}));
+ }
+ } catch (const DBException& e) {
+ promise.emplaceValue(makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()}));
}
});
diff --git a/src/mongo/db/process_health/dns_health_observer.h b/src/mongo/db/process_health/dns_health_observer.h
index 2640c9024f7..11f54ad01bd 100644
--- a/src/mongo/db/process_health/dns_health_observer.h
+++ b/src/mongo/db/process_health/dns_health_observer.h
@@ -56,7 +56,7 @@ protected:
}
Future<HealthCheckStatus> periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept override;
+ PeriodicHealthCheckContext&& periodicCheckContext) override;
private:
mutable PseudoRandom _random;
diff --git a/src/mongo/db/process_health/health_observer_base.cpp b/src/mongo/db/process_health/health_observer_base.cpp
index 243a9cf7937..01d90bf3db6 100644
--- a/src/mongo/db/process_health/health_observer_base.cpp
+++ b/src/mongo/db/process_health/health_observer_base.cpp
@@ -56,25 +56,38 @@ SharedSemiFuture<HealthCheckStatus> HealthObserverBase::periodicCheck(
_currentlyRunningHealthCheck = true;
}
+ Future<HealthCheckStatus> healthCheckResult;
+
+ try {
+ healthCheckResult = periodicCheckImpl({token, taskExecutor});
+ } catch (const DBException& e) {
+ LOGV2_DEBUG(6728001,
+ 2,
+ "Health observer failed due to an exception",
+ "observerType"_attr = getType(),
+ "errorCode"_attr = e.code(),
+ "reason"_attr = e.reason());
+
+ healthCheckResult = makeSimpleFailedStatus(Severity::kFailure, {e.toStatus()});
+ }
+
_deadlineFuture = DeadlineFuture<HealthCheckStatus>::create(
taskExecutor,
- periodicCheckImpl({token, taskExecutor})
- .onCompletion([this](StatusWith<HealthCheckStatus> status) {
- const auto now = _svcCtx->getPreciseClockSource()->now();
-
- auto lk = stdx::lock_guard(_mutex);
- ++_completedChecksCount;
- invariant(_currentlyRunningHealthCheck);
- _currentlyRunningHealthCheck = false;
- _lastTimeCheckCompleted = now;
-
- if (!status.isOK() ||
- !HealthCheckStatus::isResolved(status.getValue().getSeverity())) {
- ++_completedChecksWithFaultCount;
- }
-
- return status;
- }),
+ std::move(healthCheckResult).onCompletion([this](StatusWith<HealthCheckStatus> status) {
+ const auto now = _svcCtx->getPreciseClockSource()->now();
+
+ auto lk = stdx::lock_guard(_mutex);
+ ++_completedChecksCount;
+ invariant(_currentlyRunningHealthCheck);
+ _currentlyRunningHealthCheck = false;
+ _lastTimeCheckCompleted = now;
+
+ if (!status.isOK() || !HealthCheckStatus::isResolved(status.getValue().getSeverity())) {
+ ++_completedChecksWithFaultCount;
+ }
+
+ return status;
+ }),
getObserverTimeout());
return _deadlineFuture->get();
diff --git a/src/mongo/db/process_health/health_observer_base.h b/src/mongo/db/process_health/health_observer_base.h
index 18f24eb8540..ef7900f640f 100644
--- a/src/mongo/db/process_health/health_observer_base.h
+++ b/src/mongo/db/process_health/health_observer_base.h
@@ -91,7 +91,7 @@ protected:
* @return The result of a complete health check
*/
virtual Future<HealthCheckStatus> periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept = 0;
+ PeriodicHealthCheckContext&& periodicCheckContext) = 0;
HealthObserverLivenessStats getStatsLocked(WithLock) const;
diff --git a/src/mongo/db/process_health/health_observer_mock.h b/src/mongo/db/process_health/health_observer_mock.h
index e7a500bdf8c..b44fd35a368 100644
--- a/src/mongo/db/process_health/health_observer_mock.h
+++ b/src/mongo/db/process_health/health_observer_mock.h
@@ -40,8 +40,11 @@ namespace mongo {
namespace process_health {
/**
- * Mocked health observer is using a test callback to fetch the next
- * fault severity value every time the periodic check is invoked.
+ * Mocked health observer has two modes of operation (depending on constructor called):
+ * 1. Passing a callback that runs on an executor and returns a severity
+ * 2. Passing an implementation of periodicCheckImpl
+ *
+ * See unit test HealthCheckThrowingExceptionMakesFailedStatus for an example of the second mode.
*/
class HealthObserverMock : public HealthObserverBase {
public:
@@ -54,6 +57,16 @@ public:
_getSeverityCallback(getSeverityCallback),
_observerTimeout(observerTimeout) {}
+ HealthObserverMock(
+ FaultFacetType mockType,
+ ServiceContext* svcCtx,
+ std::function<Future<HealthCheckStatus>(PeriodicHealthCheckContext&&)> periodicCheckImpl,
+ Milliseconds observerTimeout)
+ : HealthObserverBase(svcCtx),
+ _mockType(mockType),
+ _periodicCheckImpl(periodicCheckImpl),
+ _observerTimeout(observerTimeout) {}
+
virtual ~HealthObserverMock() = default;
bool isConfigured() const override {
@@ -70,7 +83,11 @@ protected:
}
Future<HealthCheckStatus> periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept override {
+ PeriodicHealthCheckContext&& periodicCheckContext) override {
+
+ if (_periodicCheckImpl.has_value()) {
+ return (*_periodicCheckImpl)(std::move(periodicCheckContext));
+ }
auto completionPf = makePromiseFuture<HealthCheckStatus>();
@@ -99,6 +116,8 @@ protected:
private:
const FaultFacetType _mockType;
std::function<Severity()> _getSeverityCallback;
+ boost::optional<std::function<Future<HealthCheckStatus>(PeriodicHealthCheckContext&&)>>
+ _periodicCheckImpl;
const Milliseconds _observerTimeout;
};
diff --git a/src/mongo/db/process_health/health_observer_test.cpp b/src/mongo/db/process_health/health_observer_test.cpp
index 66f5c8a6e99..cd79db11ebc 100644
--- a/src/mongo/db/process_health/health_observer_test.cpp
+++ b/src/mongo/db/process_health/health_observer_test.cpp
@@ -46,6 +46,7 @@ namespace process_health {
// Using the common fault manager test suite.
using test::FaultManagerTest;
+using PeriodicHealthCheckContext = HealthObserverBase::PeriodicHealthCheckContext;
namespace {
// Tests that the mock observer is registered properly.
@@ -254,6 +255,49 @@ TEST_F(FaultManagerTest, SchedulingDuplicateHealthChecksRejected) {
LOGV2(6418205, "Total completed checks count", "count"_attr = totalCompletedCount);
}
+TEST_F(FaultManagerTest, HealthCheckThrowingExceptionMakesFailedStatus) {
+ resetManager(std::make_unique<FaultManagerConfig>());
+
+ FaultFacetType facetType = FaultFacetType::kMock1;
+ AtomicWord<bool> shouldThrow{false};
+
+ std::string logMsg = "Failed due to exception";
+
+ auto periodicCheckImpl =
+ [facetType, &shouldThrow, logMsg](
+ PeriodicHealthCheckContext&& periodicHealthCheckCtx) -> Future<HealthCheckStatus> {
+ if (shouldThrow.load()) {
+ uasserted(ErrorCodes::InternalError, logMsg);
+ }
+ auto completionPf = makePromiseFuture<HealthCheckStatus>();
+ completionPf.promise.emplaceValue(HealthCheckStatus(facetType, Severity::kOk, "success"));
+ return std::move(completionPf.future);
+ };
+
+ HealthObserverRegistration::registerObserverFactory(
+ [facetType, periodicCheckImpl](ServiceContext* svcCtx) {
+ return std::make_unique<HealthObserverMock>(
+ facetType, svcCtx, periodicCheckImpl, Milliseconds(Seconds(30)));
+ });
+
+ assertSoon([this] { return (manager().getFaultState() == FaultState::kStartupCheck); });
+
+ auto initialHealthCheckFuture = manager().startPeriodicHealthChecks();
+ assertSoon([this] { return (manager().getFaultState() == FaultState::kOk); });
+
+ auto observer = manager().getHealthObserversTest().front();
+ ASSERT_EQ(observer->getStats().completedChecksWithFaultCount, 0);
+
+ shouldThrow.store(true);
+ assertSoon([this] { return (manager().getFaultState() == FaultState::kTransientFault); });
+
+ ASSERT_EQ(manager().currentFault()->toBSON()["facets"]["mock1"]["description"].String(),
+ "InternalError: Failed due to exception ");
+
+ ASSERT_GTE(observer->getStats().completedChecksWithFaultCount, 1);
+ resetManager();
+}
+
} // namespace
} // namespace process_health
} // namespace mongo
diff --git a/src/mongo/db/process_health/test_health_observer.cpp b/src/mongo/db/process_health/test_health_observer.cpp
index 70572c48851..01224117baa 100644
--- a/src/mongo/db/process_health/test_health_observer.cpp
+++ b/src/mongo/db/process_health/test_health_observer.cpp
@@ -43,7 +43,7 @@ MONGO_FAIL_POINT_DEFINE(testHealthObserver);
MONGO_FAIL_POINT_DEFINE(badConfigTestHealthObserver);
MONGO_FAIL_POINT_DEFINE(statusFailureTestHealthObserver);
Future<HealthCheckStatus> TestHealthObserver::periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept {
+ PeriodicHealthCheckContext&& periodicCheckContext) {
LOGV2_DEBUG(5936801, 2, "Test health observer executing");
hangTestHealthObserver.pauseWhileSet();
diff --git a/src/mongo/db/process_health/test_health_observer.h b/src/mongo/db/process_health/test_health_observer.h
index 428d57f8e9d..0c23df7fb42 100644
--- a/src/mongo/db/process_health/test_health_observer.h
+++ b/src/mongo/db/process_health/test_health_observer.h
@@ -50,7 +50,7 @@ protected:
}
Future<HealthCheckStatus> periodicCheckImpl(
- PeriodicHealthCheckContext&& periodicCheckContext) noexcept override;
+ PeriodicHealthCheckContext&& periodicCheckContext) override;
bool isConfigured() const override;
};
diff --git a/src/mongo/db/query/canonical_query.cpp b/src/mongo/db/query/canonical_query.cpp
index c4d2de8fcb0..6449c5241fc 100644
--- a/src/mongo/db/query/canonical_query.cpp
+++ b/src/mongo/db/query/canonical_query.cpp
@@ -538,10 +538,11 @@ std::string CanonicalQuery::toStringShort() const {
}
CanonicalQuery::QueryShapeString CanonicalQuery::encodeKey() const {
- // TODO SERVER-61507: remove '_pipeline.empty()' check. Canonical queries with pushed down
- // $group/$lookup stages are not SBE-compatible until SERVER-61507 is complete.
+ // TODO SERVER-61507: remove 'canUseSbePlanCache' check. Canonical queries with pushed
+ // down $group stages are not compatible with the SBE plan cache until SERVER-61507 is complete.
return (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
- !_forceClassicEngine && _sbeCompatible && _pipeline.empty())
+ !_forceClassicEngine && _sbeCompatible &&
+ canonical_query_encoder::canUseSbePlanCache(*this))
? canonical_query_encoder::encodeSBE(*this)
: canonical_query_encoder::encode(*this);
}
diff --git a/src/mongo/db/query/canonical_query_encoder.cpp b/src/mongo/db/query/canonical_query_encoder.cpp
index 2013c8a635e..11b1a99479a 100644
--- a/src/mongo/db/query/canonical_query_encoder.cpp
+++ b/src/mongo/db/query/canonical_query_encoder.cpp
@@ -40,6 +40,7 @@
#include "mongo/db/matcher/expression_text_noop.h"
#include "mongo/db/matcher/expression_where.h"
#include "mongo/db/matcher/expression_where_noop.h"
+#include "mongo/db/pipeline/document_source_lookup.h"
#include "mongo/db/query/analyze_regex.h"
#include "mongo/db/query/projection.h"
#include "mongo/db/query/query_feature_flags_gen.h"
@@ -86,6 +87,7 @@ const char kEncodeProjectionRequirementSeparator = '-';
const char kEncodeRegexFlagsSeparator = '/';
const char kEncodeSortSection = '~';
const char kEncodeEngineSection = '@';
+const char kEncodePipelineSection = '^';
// These special bytes are used in the encoding of auto-parameterized match expressions in the SBE
// plan cache key.
@@ -135,6 +137,7 @@ void encodeUserString(StringData s, BuilderType* builder) {
case kEncodeEngineSection:
case kEncodeParamMarker:
case kEncodeConstantLiteralMarker:
+ case kEncodePipelineSection:
case '\\':
if constexpr (hasAppendChar<BuilderType>) {
builder->appendChar('\\');
@@ -431,6 +434,26 @@ void encodeCollation(const CollatorInterface* collation, StringBuilder* keyBuild
// not be stable between versions.
}
+void encodePipeline(const std::vector<std::unique_ptr<InnerPipelineStageInterface>>& pipeline,
+ BufBuilder* bufBuilder) {
+ bufBuilder->appendChar(kEncodePipelineSection);
+ for (auto& stage : pipeline) {
+ std::vector<Value> serializedArray;
+ if (auto lookupStage = dynamic_cast<DocumentSourceLookUp*>(stage->documentSource())) {
+ lookupStage->serializeToArray(serializedArray, boost::none);
+ tassert(6443201,
+ "$lookup stage isn't serialized to a single bson object",
+ serializedArray.size() == 1 && serializedArray[0].getType() == Object);
+ const auto bson = serializedArray[0].getDocument().toBson();
+ bufBuilder->appendBuf(bson.objdata(), bson.objsize());
+ } else {
+ tasserted(6443200,
+ str::stream() << "Pipeline stage cannot be encoded in plan cache key: "
+ << stage->documentSource()->getSourceName());
+ }
+ }
+}
+
template <class RegexIterator>
void encodeRegexFlagsForMatch(RegexIterator first, RegexIterator last, StringBuilder* keyBuilder) {
// We sort the flags, so that queries with the same regex flags in different orders will have
@@ -1085,6 +1108,8 @@ std::string encodeSBE(const CanonicalQuery& cq) {
encodeFindCommandRequest(cq.getFindCommandRequest(), &bufBuilder);
+ encodePipeline(cq.pipeline(), &bufBuilder);
+
return base64::encode(StringData(bufBuilder.buf(), bufBuilder.len()));
}
@@ -1106,5 +1131,14 @@ CanonicalQuery::IndexFilterKey encodeForIndexFilters(const CanonicalQuery& cq) {
uint32_t computeHash(StringData key) {
return SimpleStringDataComparator::kInstance.hash(key);
}
+
+bool canUseSbePlanCache(const CanonicalQuery& cq) {
+ for (auto& stage : cq.pipeline()) {
+ if (StringData{stage->documentSource()->getSourceName()} != "$lookup") {
+ return false;
+ }
+ }
+ return true;
+}
} // namespace canonical_query_encoder
} // namespace mongo
diff --git a/src/mongo/db/query/canonical_query_encoder.h b/src/mongo/db/query/canonical_query_encoder.h
index 3164ddbec67..4bfbb68c2f2 100644
--- a/src/mongo/db/query/canonical_query_encoder.h
+++ b/src/mongo/db/query/canonical_query_encoder.h
@@ -68,5 +68,11 @@ CanonicalQuery::IndexFilterKey encodeForIndexFilters(const CanonicalQuery& cq);
* Returns a hash of the given key (produced from either a QueryShapeString or a PlanCacheKey).
*/
uint32_t computeHash(StringData key);
+
+/**
+ * Returns whether a plan generated from this query can be stored in the SBE plan cache.
+ */
+bool canUseSbePlanCache(const CanonicalQuery& cq);
+
} // namespace canonical_query_encoder
} // namespace mongo
diff --git a/src/mongo/db/query/canonical_query_encoder_test.cpp b/src/mongo/db/query/canonical_query_encoder_test.cpp
index 486b4f2d14f..3394e048be8 100644
--- a/src/mongo/db/query/canonical_query_encoder_test.cpp
+++ b/src/mongo/db/query/canonical_query_encoder_test.cpp
@@ -29,10 +29,11 @@
#include "mongo/db/query/canonical_query_encoder.h"
-#include "mongo/db/catalog/collection_mock.h"
#include "mongo/db/jsobj.h"
#include "mongo/db/json.h"
+#include "mongo/db/pipeline/document_source.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
+#include "mongo/db/pipeline/inner_pipeline_stage_impl.h"
#include "mongo/db/query/canonical_query.h"
#include "mongo/db/query/plan_cache_key_factory.h"
#include "mongo/db/query/query_test_service_context.h"
@@ -46,10 +47,17 @@ namespace {
using std::unique_ptr;
static const NamespaceString nss("testdb.testcoll");
+static const NamespaceString foreignNss("testdb.foreigncoll");
-PlanCacheKey makeKey(const CanonicalQuery& cq) {
- CollectionMock coll(nss);
- return plan_cache_key_factory::make<PlanCacheKey>(cq, &coll);
+std::vector<std::unique_ptr<InnerPipelineStageInterface>> parsePipeline(
+ const boost::intrusive_ptr<ExpressionContext> expCtx, const std::vector<BSONObj>& rawPipeline) {
+ auto pipeline = Pipeline::parse(rawPipeline, expCtx);
+
+ std::vector<std::unique_ptr<InnerPipelineStageInterface>> stages;
+ for (auto&& source : pipeline->getSources()) {
+ stages.emplace_back(std::make_unique<InnerPipelineStageImpl>(source));
+ }
+ return stages;
}
/**
@@ -59,7 +67,8 @@ unique_ptr<CanonicalQuery> canonicalize(BSONObj query,
BSONObj sort,
BSONObj proj,
BSONObj collation,
- std::unique_ptr<FindCommandRequest> findCommand = nullptr) {
+ std::unique_ptr<FindCommandRequest> findCommand = nullptr,
+ std::vector<BSONObj> pipelineObj = {}) {
QueryTestServiceContext serviceContext;
auto opCtx = serviceContext.makeOperationContext();
@@ -70,14 +79,26 @@ unique_ptr<CanonicalQuery> canonicalize(BSONObj query,
findCommand->setSort(sort.getOwned());
findCommand->setProjection(proj.getOwned());
findCommand->setCollation(collation.getOwned());
- const boost::intrusive_ptr<ExpressionContext> expCtx;
+
+ const auto expCtx = make_intrusive<ExpressionContextForTest>(opCtx.get(), nss);
+ expCtx->addResolvedNamespaces({foreignNss});
+ if (!findCommand->getCollation().isEmpty()) {
+ auto statusWithCollator = CollatorFactoryInterface::get(opCtx->getServiceContext())
+ ->makeFromBSON(findCommand->getCollation());
+ ASSERT_OK(statusWithCollator.getStatus());
+ expCtx->setCollator(std::move(statusWithCollator.getValue()));
+ }
+ auto pipeline = parsePipeline(expCtx, pipelineObj);
+
auto statusWithCQ =
CanonicalQuery::canonicalize(opCtx.get(),
std::move(findCommand),
false,
expCtx,
ExtensionsCallbackNoop(),
- MatchExpressionParser::kAllowAllSpecialFeatures);
+ MatchExpressionParser::kAllowAllSpecialFeatures,
+ ProjectionPolicies::findProjectionPolicies(),
+ std::move(pipeline));
ASSERT_OK(statusWithCQ.getStatus());
return std::move(statusWithCQ.getValue());
}
@@ -115,13 +136,14 @@ void testComputeSBEKey(BSONObj query,
BSONObj sort,
BSONObj proj,
std::string expectedStr,
- std::unique_ptr<FindCommandRequest> findCommand = nullptr) {
+ std::unique_ptr<FindCommandRequest> findCommand = nullptr,
+ std::vector<BSONObj> pipelineObj = {}) {
BSONObj collation;
unique_ptr<CanonicalQuery> cq(
- canonicalize(query, sort, proj, collation, std::move(findCommand)));
+ canonicalize(query, sort, proj, collation, std::move(findCommand), std::move(pipelineObj)));
cq->setSbeCompatible(true);
- auto key = makeKey(*cq);
- ASSERT_EQUALS(key.toString(), expectedStr);
+ const auto key = canonical_query_encoder::encodeSBE(*cq);
+ ASSERT_EQUALS(key, expectedStr);
}
void testComputeKey(const char* queryStr,
@@ -135,12 +157,14 @@ void testComputeSBEKey(const char* queryStr,
const char* sortStr,
const char* projStr,
std::string expectedStr,
- std::unique_ptr<FindCommandRequest> findCommand = nullptr) {
+ std::unique_ptr<FindCommandRequest> findCommand = nullptr,
+ std::vector<BSONObj> pipelineObj = {}) {
testComputeSBEKey(fromjson(queryStr),
fromjson(sortStr),
fromjson(projStr),
expectedStr,
- std::move(findCommand));
+ std::move(findCommand),
+ std::move(pipelineObj));
}
TEST(CanonicalQueryEncoderTest, ComputeKey) {
@@ -262,8 +286,6 @@ TEST(CanonicalQueryEncoderTest, ComputeKeyEscaped) {
// Cache keys for $geoWithin queries with legacy and GeoJSON coordinates should
// not be the same.
TEST(CanonicalQueryEncoderTest, ComputeKeyGeoWithin) {
- PlanCache planCache(5000);
-
// Legacy coordinates.
unique_ptr<CanonicalQuery> cqLegacy(
canonicalize("{a: {$geoWithin: "
@@ -273,7 +295,8 @@ TEST(CanonicalQueryEncoderTest, ComputeKeyGeoWithin) {
canonicalize("{a: {$geoWithin: "
"{$geometry: {type: 'Polygon', coordinates: "
"[[[0, 0], [0, 90], [90, 0], [0, 0]]]}}}}"));
- ASSERT_NOT_EQUALS(makeKey(*cqLegacy), makeKey(*cqNew));
+ ASSERT_NOT_EQUALS(canonical_query_encoder::encode(*cqLegacy),
+ canonical_query_encoder::encode(*cqNew));
}
// GEO_NEAR cache keys should include information on geometry and CRS in addition
@@ -395,85 +418,87 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
// SBE must be enabled in order to generate SBE plan cache keys.
RAIIServerParameterControllerForTest controllerSBE("internalQueryForceClassicEngine", false);
- // TODO SERVER-61314: Remove when featureFlagSbePlanCache is removed.
RAIIServerParameterControllerForTest controllerSBEPlanCache("featureFlagSbePlanCache", true);
- testComputeSBEKey("{}", "{}", "{}", "YW4ABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ testComputeSBEKey("{}", "{}", "{}", "YW4ABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
testComputeSBEKey(
"{$or: [{a: 1}, {b: 2}]}",
"{}",
"{}",
- "b3IAW2VxAGE/AAAAACxlcQBiPwEAAABdBQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ "b3IAW2VxAGE/AAAAACxlcQBiPwEAAABdBQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
testComputeSBEKey(
- "{a: 1}", "{}", "{}", "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ "{a: 1}", "{}", "{}", "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
testComputeSBEKey(
- "{b: 1}", "{}", "{}", "ZXEAYj8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ "{b: 1}", "{}", "{}", "ZXEAYj8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
testComputeSBEKey(
"{a: 1, b: 1, c: 1}",
"{}",
"{}",
- "YW4AW2VxAGE/AAAAACxlcQBiPwEAAAAsZXEAYz8CAAAAXQUAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+ "YW4AW2VxAGE/"
+ "AAAAACxlcQBiPwEAAAAsZXEAYz8CAAAAXQUAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
// With sort
- testComputeSBEKey("{}", "{a: 1}", "{}", "YW4ABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ testComputeSBEKey("{}", "{a: 1}", "{}", "YW4ABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
testComputeSBEKey(
- "{}", "{a: -1}", "{}", "YW4ABQAAAAB+ZGEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ "{}", "{a: -1}", "{}", "YW4ABQAAAAB+ZGEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
testComputeSBEKey(
- "{a: 1}", "{a: 1}", "{}", "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=");
+ "{a: 1}", "{a: 1}", "{}", "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe");
// With projection
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{a: 1}",
- "ZXEAYT8AAAAADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
- testComputeSBEKey(
- "{}", "{a: 1}", "{a: 1}", "YW4ADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+ "ZXEAYT8AAAAADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
+ testComputeSBEKey("{}",
+ "{a: 1}",
+ "{a: 1}",
+ "YW4ADAAAABBhAAEAAAAAfmFhAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
testComputeSBEKey("{}",
"{a: 1}",
"{a: 1, b: [{$const: 1}]}",
"YW4AKAAAABBhAAEAAAAEYgAZAAAAAzAAEQAAABAkY29uc3QAAQAAAAAAAH5hYQAAAAAAAAAAbm5u"
- "bgUAAAAABQAAAAAFAAAAAA==");
+ "bgUAAAAABQAAAAAFAAAAAF4=");
testComputeSBEKey(
- "{}", "{}", "{a: 1}", "YW4ADAAAABBhAAEAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+ "{}", "{}", "{a: 1}", "YW4ADAAAABBhAAEAAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
testComputeSBEKey(
- "{}", "{}", "{a: true}", "YW4ACQAAAAhhAAEAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+ "{}", "{}", "{a: true}", "YW4ACQAAAAhhAAEAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
testComputeSBEKey(
- "{}", "{}", "{a: false}", "YW4ACQAAAAhhAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA");
+ "{}", "{}", "{a: false}", "YW4ACQAAAAhhAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==");
// With FindCommandRequest
auto findCommand = std::make_unique<FindCommandRequest>(nss);
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAAA=",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
findCommand->setAllowDiskUse(true);
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAHRubm4FAAAAAAUAAAAABQAAAAA=",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAHRubm4FAAAAAAUAAAAABQAAAABe",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
findCommand->setAllowDiskUse(false);
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAGZubm4FAAAAAAUAAAAABQAAAAA=",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAGZubm4FAAAAAAUAAAAABQAAAABe",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
findCommand->setReturnKey(true);
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG50bm4FAAAAAAUAAAAABQAAAAA=",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG50bm4FAAAAAAUAAAAABQAAAABe",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
findCommand->setRequestResumeToken(false);
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5uZm4FAAAAAAUAAAAABQAAAAA=",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5uZm4FAAAAAAUAAAAABQAAAABe",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -481,7 +506,7 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEKAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA",
+ "ZXEAYT8AAAAABQAAAAB+YWEKAAAAAAAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -489,7 +514,7 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAACgAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAA",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAACgAAAAAAAABubm5uBQAAAAAFAAAAAAUAAAAAXg==",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
@@ -497,14 +522,14 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAwAAAAQYQABAAAAAAUAAAAA",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAwAAAAQYQABAAAAAAUAAAAAXg==",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
findCommand->setMax(mongo::fromjson("{ a : 1 }"));
testComputeSBEKey("{a: 1}",
"{a: 1}",
"{}",
- "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAADAAAABBhAAEAAAAA",
+ "ZXEAYT8AAAAABQAAAAB+YWEAAAAAAAAAAG5ubm4FAAAAAAUAAAAADAAAABBhAAEAAAAAXg==",
std::move(findCommand));
findCommand = std::make_unique<FindCommandRequest>(nss);
findCommand->setRequestResumeToken(true);
@@ -515,9 +540,74 @@ TEST(CanonicalQueryEncoderTest, ComputeKeySBE) {
"{a: 1}",
"{}",
"{}",
- "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5udG4YAAAAEiRyZWNvcmRJZAABAAAAAAAAAAAFAAAAAAUAAAAA",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5udG4YAAAAEiRyZWNvcmRJZAABAAAAAAAAAAAFAAAAAAUAAAAAXg==",
std::move(findCommand));
}
+TEST(CanonicalQueryEncoderTest, ComputeKeySBEWithPipeline) {
+ // SBE must be enabled in order to generate SBE plan cache keys.
+ RAIIServerParameterControllerForTest controllerSBE("internalQueryForceClassicEngine", false);
+
+ RAIIServerParameterControllerForTest controllerSBEPlanCache("featureFlagSbePlanCache", true);
+
+ auto getLookupBson = [](StringData localField, StringData foreignField, StringData asField) {
+ return BSON("$lookup" << BSON("from" << foreignNss.coll() << "localField" << localField
+ << "foreignField" << foreignField << "as" << asField));
+ };
+
+ // No pipeline stage.
+ testComputeSBEKey("{a: 1}",
+ "{}",
+ "{}",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABe",
+ nullptr,
+ {});
+
+ // Different $lookup stage options.
+ testComputeSBEKey(
+ "{a: 1}",
+ "{}",
+ "{}",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWgAAAAMkbG9va3VwAEwAAAACZnJvbQAMAA"
+ "AAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAACAAAAYQACZm9yZWlnbkZpZWxkAAIAAABiAAAA",
+ nullptr,
+ {getLookupBson("a", "b", "as")});
+ testComputeSBEKey("{a: 1}",
+ "{}",
+ "{}",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWwAAAAMkbG9va3VwAE0A"
+ "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAADAAAAYTEAAmZv"
+ "cmVpZ25GaWVsZAACAAAAYgAAAA==",
+ nullptr,
+ {getLookupBson("a1", "b", "as")});
+ testComputeSBEKey("{a: 1}",
+ "{}",
+ "{}",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWwAAAAMkbG9va3VwAE0A"
+ "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAACAAAAYQACZm9y"
+ "ZWlnbkZpZWxkAAMAAABiMQAAAA==",
+ nullptr,
+ {getLookupBson("a", "b1", "as")});
+ testComputeSBEKey("{a: 1}",
+ "{}",
+ "{}",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWwAAAAMkbG9va3VwAE0A"
+ "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAQAAABhczEAAmxvY2FsRmllbGQAAgAAAGEAAmZv"
+ "cmVpZ25GaWVsZAACAAAAYgAAAA==",
+ nullptr,
+ {getLookupBson("a", "b", "as1")});
+
+ // Multiple $lookup stages.
+ testComputeSBEKey("{a: 1}",
+ "{}",
+ "{}",
+ "ZXEAYT8AAAAABQAAAAAAAAAAAAAAAG5ubm4FAAAAAAUAAAAABQAAAABeWgAAAAMkbG9va3VwAEwA"
+ "AAACZnJvbQAMAAAAZm9yZWlnbmNvbGwAAmFzAAMAAABhcwACbG9jYWxGaWVsZAACAAAAYQACZm9y"
+ "ZWlnbkZpZWxkAAIAAABiAAAAXQAAAAMkbG9va3VwAE8AAAACZnJvbQAMAAAAZm9yZWlnbmNvbGwA"
+ "AmFzAAQAAABhczEAAmxvY2FsRmllbGQAAwAAAGExAAJmb3JlaWduRmllbGQAAwAAAGIxAAAA",
+ nullptr,
+ {getLookupBson("a", "b", "as"), getLookupBson("a1", "b1", "as1")});
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/query/classic_stage_builder.cpp b/src/mongo/db/query/classic_stage_builder.cpp
index cc1915510c1..4404e2ab6da 100644
--- a/src/mongo/db/query/classic_stage_builder.cpp
+++ b/src/mongo/db/query/classic_stage_builder.cpp
@@ -79,7 +79,7 @@ std::unique_ptr<PlanStage> ClassicStageBuilder::build(const QuerySolutionNode* r
CollectionScanParams params;
params.tailable = csn->tailable;
params.shouldTrackLatestOplogTimestamp = csn->shouldTrackLatestOplogTimestamp;
- params.assertTsHasNotFallenOffOplog = csn->assertTsHasNotFallenOffOplog;
+ params.assertTsHasNotFallenOff = csn->assertTsHasNotFallenOff;
params.direction = (csn->direction == 1) ? CollectionScanParams::FORWARD
: CollectionScanParams::BACKWARD;
params.shouldWaitForOplogVisibility = csn->shouldWaitForOplogVisibility;
diff --git a/src/mongo/db/query/datetime/date_time_support.cpp b/src/mongo/db/query/datetime/date_time_support.cpp
index 09badabd4a0..439c1f028d2 100644
--- a/src/mongo/db/query/datetime/date_time_support.cpp
+++ b/src/mongo/db/query/datetime/date_time_support.cpp
@@ -76,6 +76,8 @@ long long seconds(Date_t date) {
// Format specifier map when parsing a date from a string with a required format.
//
const std::vector<timelib_format_specifier> kDateFromStringFormatMap = {
+ {'b', TIMELIB_FORMAT_TEXTUAL_MONTH_3_LETTER},
+ {'B', TIMELIB_FORMAT_TEXTUAL_MONTH_FULL},
{'d', TIMELIB_FORMAT_DAY_TWO_DIGIT},
{'G', TIMELIB_FORMAT_YEAR_ISO},
{'H', TIMELIB_FORMAT_HOUR_TWO_DIGIT_24_MAX},
@@ -775,6 +777,7 @@ static const StringMap<DayOfWeek> dayOfWeekNameToDayOfWeekMap{
{"sunday", DayOfWeek::sunday},
{"sun", DayOfWeek::sunday},
};
+
} // namespace
long long dateDiff(Date_t startDate,
diff --git a/src/mongo/db/query/explain.cpp b/src/mongo/db/query/explain.cpp
index 414badb8332..568c3da9fe0 100644
--- a/src/mongo/db/query/explain.cpp
+++ b/src/mongo/db/query/explain.cpp
@@ -48,6 +48,7 @@
#include "mongo/db/query/collection_query_info.h"
#include "mongo/db/query/explain_common.h"
#include "mongo/db/query/get_executor.h"
+#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/plan_cache_key_factory.h"
#include "mongo/db/query/plan_executor.h"
#include "mongo/db/query/plan_executor_impl.h"
@@ -79,7 +80,7 @@ namespace {
* - 'out' is a builder for the explain output.
*/
void generatePlannerInfo(PlanExecutor* exec,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
BSONObj extraInfo,
BSONObjBuilder* out) {
BSONObjBuilder plannerBob(out->subobjStart("queryPlanner"));
@@ -91,22 +92,23 @@ void generatePlannerInfo(PlanExecutor* exec,
bool indexFilterSet = false;
boost::optional<uint32_t> queryHash;
boost::optional<uint32_t> planCacheKeyHash;
- if (collection && exec->getCanonicalQuery()) {
+ const auto& mainCollection = collections.getMainCollection();
+ if (mainCollection && exec->getCanonicalQuery()) {
const QuerySettings* querySettings =
- QuerySettingsDecoration::get(collection->getSharedDecorations());
+ QuerySettingsDecoration::get(mainCollection->getSharedDecorations());
if (exec->getCanonicalQuery()->isSbeCompatible() &&
feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
!exec->getCanonicalQuery()->getForceClassicEngine() &&
- // TODO(SERVER-61507): Remove pipeline check once lowered pipelines are integrated with
- // SBE plan cache.
- exec->getCanonicalQuery()->pipeline().empty()) {
- const auto planCacheKeyInfo = plan_cache_key_factory::make<sbe::PlanCacheKey>(
- *exec->getCanonicalQuery(), collection);
+ // TODO SERVER-61507: remove canUseSbePlanCache check when $group pushdown is
+ // integrated with SBE plan cache.
+ canonical_query_encoder::canUseSbePlanCache(*exec->getCanonicalQuery())) {
+ const auto planCacheKeyInfo =
+ plan_cache_key_factory::make(*exec->getCanonicalQuery(), collections);
planCacheKeyHash = planCacheKeyInfo.planCacheKeyHash();
queryHash = planCacheKeyInfo.queryHash();
} else {
- const auto planCacheKeyInfo =
- plan_cache_key_factory::make<PlanCacheKey>(*exec->getCanonicalQuery(), collection);
+ const auto planCacheKeyInfo = plan_cache_key_factory::make<PlanCacheKey>(
+ *exec->getCanonicalQuery(), mainCollection);
planCacheKeyHash = planCacheKeyInfo.planCacheKeyHash();
queryHash = planCacheKeyInfo.queryHash();
}
@@ -310,7 +312,7 @@ void appendBasicPlanCacheEntryInfoToBSON(const EntryType& entry, BSONObjBuilder*
} // namespace
void Explain::explainStages(PlanExecutor* exec,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
ExplainOptions::Verbosity verbosity,
Status executePlanStatus,
boost::optional<PlanExplainer::PlanStatsDetails> winningPlanTrialStats,
@@ -325,7 +327,7 @@ void Explain::explainStages(PlanExecutor* exec,
out->appendElements(explainVersionToBson(explainer.getVersion()));
if (verbosity >= ExplainOptions::Verbosity::kQueryPlanner) {
- generatePlannerInfo(exec, collection, extraInfo, out);
+ generatePlannerInfo(exec, collections, extraInfo, out);
}
if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
@@ -364,7 +366,7 @@ void Explain::explainPipeline(PlanExecutor* exec,
}
void Explain::explainStages(PlanExecutor* exec,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
ExplainOptions::Verbosity verbosity,
BSONObj extraInfo,
const BSONObj& command,
@@ -372,9 +374,10 @@ void Explain::explainStages(PlanExecutor* exec,
auto&& explainer = exec->getPlanExplainer();
auto winningPlanTrialStats = explainer.getWinningPlanTrialStats();
Status executePlanStatus = Status::OK();
- const CollectionPtr* collectionPtr = &collection;
+ const MultipleCollectionAccessor* collectionsPtr = &collections;
// If we need execution stats, then run the plan in order to gather the stats.
+ const MultipleCollectionAccessor emptyCollections;
if (verbosity >= ExplainOptions::Verbosity::kExecStats) {
try {
executePlan(exec);
@@ -386,12 +389,12 @@ void Explain::explainStages(PlanExecutor* exec,
// then the collection may no longer be valid. We conservatively set our collection pointer
// to null in case it is invalid.
if (!executePlanStatus.isOK() && executePlanStatus != ErrorCodes::NoQueryExecutionPlans) {
- collectionPtr = &CollectionPtr::null;
+ collectionsPtr = &emptyCollections;
}
}
explainStages(exec,
- *collectionPtr,
+ *collectionsPtr,
verbosity,
executePlanStatus,
winningPlanTrialStats,
@@ -403,6 +406,15 @@ void Explain::explainStages(PlanExecutor* exec,
explain_common::generateServerParameters(out);
}
+void Explain::explainStages(PlanExecutor* exec,
+ const CollectionPtr& collection,
+ ExplainOptions::Verbosity verbosity,
+ BSONObj extraInfo,
+ const BSONObj& command,
+ BSONObjBuilder* out) {
+ explainStages(exec, MultipleCollectionAccessor(collection), verbosity, extraInfo, command, out);
+}
+
void Explain::planCacheEntryToBSON(const PlanCacheEntry& entry, BSONObjBuilder* out) {
out->append("version", "1");
diff --git a/src/mongo/db/query/explain.h b/src/mongo/db/query/explain.h
index d41dd3a1725..1dcabdeb7e3 100644
--- a/src/mongo/db/query/explain.h
+++ b/src/mongo/db/query/explain.h
@@ -39,6 +39,7 @@ namespace mongo {
class Collection;
class CollectionPtr;
+class MultipleCollectionAccessor;
class OperationContext;
class PlanExecutorPipeline;
struct PlanSummaryStats;
@@ -77,15 +78,26 @@ public:
BSONObj extraInfo,
const BSONObj& command,
BSONObjBuilder* out);
+
+ /**
+ * Similar to the above function, but takes in multiple collections instead to support
+ * aggregation that involves multiple collections (e.g. $lookup).
+ */
+ static void explainStages(PlanExecutor* exec,
+ const MultipleCollectionAccessor& collections,
+ ExplainOptions::Verbosity verbosity,
+ BSONObj extraInfo,
+ const BSONObj& command,
+ BSONObjBuilder* out);
+
/**
* Adds "queryPlanner" and "executionStats" (if requested in verbosity) fields to 'out'. Unlike
* the other overload of explainStages() above, this one does not add the "serverInfo" section.
*
* - 'exec' is the stage tree for the operation being explained.
- * - 'collection' is the relevant collection. During this call it may be required to execute the
- * plan to collect statistics. If the PlanExecutor uses 'kLockExternally' lock policy, the
- * caller should hold at least an IS lock on the collection the that the query runs on, even if
- * 'collection' parameter is nullptr.
+ * - 'collections' are the relevant main and secondary collections (e.g. for $lookup). If the
+ * PlanExecutor uses 'kLockExternally' lock policy, the caller should hold the necessary db_raii
+ * object on the involved collections.
* - 'verbosity' is the verbosity level of the explain.
* - 'extraInfo' specifies additional information to include into the output.
* - 'executePlanStatus' is the status returned after executing the query (Status::OK if the
@@ -97,7 +109,7 @@ public:
*/
static void explainStages(
PlanExecutor* exec,
- const CollectionPtr& collection,
+ const MultipleCollectionAccessor& collections,
ExplainOptions::Verbosity verbosity,
Status executePlanStatus,
boost::optional<PlanExplainer::PlanStatsDetails> winningPlanTrialStats,
diff --git a/src/mongo/db/query/fle/server_rewrite.cpp b/src/mongo/db/query/fle/server_rewrite.cpp
index f4f02bcb383..2aeb99a4061 100644
--- a/src/mongo/db/query/fle/server_rewrite.cpp
+++ b/src/mongo/db/query/fle/server_rewrite.cpp
@@ -32,6 +32,7 @@
#include <memory>
+#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/bson/bsontypes.h"
@@ -48,9 +49,14 @@
#include "mongo/db/pipeline/expression.h"
#include "mongo/db/query/collation/collator_factory_interface.h"
#include "mongo/db/service_context.h"
+#include "mongo/logv2/log.h"
#include "mongo/s/grid.h"
#include "mongo/s/transaction_router_resource_yielder.h"
#include "mongo/util/assert_util.h"
+#include "mongo/util/intrusive_counter.h"
+
+
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
namespace mongo::fle {
@@ -68,6 +74,56 @@ std::unique_ptr<CollatorInterface> collatorFromBSON(OperationContext* opCtx,
}
namespace {
+template <typename PayloadT>
+boost::intrusive_ptr<ExpressionInternalFLEEqual> generateFleEqualMatch(StringData path,
+ const PayloadT& ffp,
+ ExpressionContext* expCtx) {
+ // Generate { $_internalFleEq: { field: "$field_name", server: f_3, counter: cm, edc: k_EDC] }
+ auto tokens = ParsedFindPayload(ffp);
+
+ uassert(6672401,
+ "Missing required field server encryption token in find payload",
+ tokens.serverToken.has_value());
+
+ return make_intrusive<ExpressionInternalFLEEqual>(
+ expCtx,
+ ExpressionFieldPath::createPathFromString(
+ expCtx, path.toString(), expCtx->variablesParseState),
+ tokens.serverToken.get().data,
+ tokens.maxCounter.value_or(0LL),
+ tokens.edcToken.data);
+}
+
+
+template <typename PayloadT>
+std::unique_ptr<ExpressionInternalFLEEqual> generateFleEqualMatchUnique(StringData path,
+ const PayloadT& ffp,
+ ExpressionContext* expCtx) {
+ // Generate { $_internalFleEq: { field: "$field_name", server: f_3, counter: cm, edc: k_EDC] }
+ auto tokens = ParsedFindPayload(ffp);
+
+ uassert(6672419,
+ "Missing required field server encryption token in find payload",
+ tokens.serverToken.has_value());
+
+ return std::make_unique<ExpressionInternalFLEEqual>(
+ expCtx,
+ ExpressionFieldPath::createPathFromString(
+ expCtx, path.toString(), expCtx->variablesParseState),
+ tokens.serverToken.get().data,
+ tokens.maxCounter.value_or(0LL),
+ tokens.edcToken.data);
+}
+
+std::unique_ptr<MatchExpression> generateFleEqualMatchAndExpr(StringData path,
+ const BSONElement ffp,
+ ExpressionContext* expCtx) {
+ auto fleEqualMatch = generateFleEqualMatch(path, ffp, expCtx);
+
+ return std::make_unique<ExprMatchExpression>(fleEqualMatch, expCtx);
+}
+
+
/**
* This section defines a mapping from DocumentSources to the dispatch function to appropriately
* handle FLE rewriting for that stage. This should be kept in line with code on the client-side
@@ -128,7 +184,8 @@ public:
* The final output will look like
* {$or: [{$in: [tag0, "$__safeContent__"]}, {$in: [tag1, "$__safeContent__"]}, ...]}.
*/
- std::unique_ptr<Expression> rewriteComparisonsToEncryptedField(
+ std::unique_ptr<Expression> rewriteInToEncryptedField(
+ const Expression* leftExpr,
const std::vector<boost::intrusive_ptr<Expression>>& equalitiesList) {
size_t numFFPs = 0;
std::vector<boost::intrusive_ptr<Expression>> orListElems;
@@ -140,11 +197,122 @@ public:
continue;
}
- // ... rewrite the payload to a list of tags...
numFFPs++;
+ }
+ }
+
+ // Finally, construct an $or of all of the $ins.
+ if (numFFPs == 0) {
+ return nullptr;
+ }
+
+ uassert(
+ 6334102,
+ "If any elements in an comparison expression are encrypted, then all elements should "
+ "be encrypted.",
+ numFFPs == equalitiesList.size());
+
+ auto leftFieldPath = dynamic_cast<const ExpressionFieldPath*>(leftExpr);
+ uassert(6672417,
+ "$in is only supported with Queryable Encryption when the first argument is a "
+ "field path",
+ leftFieldPath != nullptr);
+
+ if (!queryRewriter->isForceHighCardinality()) {
+ try {
+ for (auto& equality : equalitiesList) {
+ // For each expression representing a FleFindPayload...
+ if (auto constChild = dynamic_cast<ExpressionConstant*>(equality.get())) {
+ // ... rewrite the payload to a list of tags...
+ auto tags = queryRewriter->rewritePayloadAsTags(constChild->getValue());
+ for (auto&& tagElt : tags) {
+ // ... and for each tag, construct expression {$in: [tag,
+ // "$__safeContent__"]}.
+ std::vector<boost::intrusive_ptr<Expression>> inVec{
+ ExpressionConstant::create(queryRewriter->expCtx(), tagElt),
+ ExpressionFieldPath::createPathFromString(
+ queryRewriter->expCtx(),
+ kSafeContent,
+ queryRewriter->expCtx()->variablesParseState)};
+ orListElems.push_back(make_intrusive<ExpressionIn>(
+ queryRewriter->expCtx(), std::move(inVec)));
+ }
+ }
+ }
+
+ didRewrite = true;
+
+ return std::make_unique<ExpressionOr>(queryRewriter->expCtx(),
+ std::move(orListElems));
+ } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+ LOGV2_DEBUG(6672403,
+ 2,
+ "FLE Max tag limit hit during aggregation $in rewrite",
+ "__error__"_attr = ex.what());
+
+ if (queryRewriter->getHighCardinalityMode() !=
+ FLEQueryRewriter::HighCardinalityMode::kUseIfNeeded) {
+ throw;
+ }
+
+ // fall through
+ }
+ }
+
+ for (auto& equality : equalitiesList) {
+ if (auto constChild = dynamic_cast<ExpressionConstant*>(equality.get())) {
+ auto fleEqExpr = generateFleEqualMatch(
+ leftFieldPath->getFieldPathWithoutCurrentPrefix().fullPath(),
+ constChild->getValue(),
+ queryRewriter->expCtx());
+ orListElems.push_back(fleEqExpr);
+ }
+ }
+
+ didRewrite = true;
+ return std::make_unique<ExpressionOr>(queryRewriter->expCtx(), std::move(orListElems));
+ }
+
+ // Rewrite a [$eq : [$fieldpath, constant]] or [$eq: [constant, $fieldpath]]
+ // to _internalFleEq: {field: $fieldpath, edc: edcToken, counter: N, server: serverToken}
+ std::unique_ptr<Expression> rewriteComparisonsToEncryptedField(
+ const std::vector<boost::intrusive_ptr<Expression>>& equalitiesList) {
+
+ auto leftConstant = dynamic_cast<ExpressionConstant*>(equalitiesList[0].get());
+ auto rightConstant = dynamic_cast<ExpressionConstant*>(equalitiesList[1].get());
+
+ bool isLeftFFP = leftConstant && queryRewriter->isFleFindPayload(leftConstant->getValue());
+ bool isRightFFP =
+ rightConstant && queryRewriter->isFleFindPayload(rightConstant->getValue());
+
+ uassert(6334100,
+ "Cannot compare two encrypted constants to each other",
+ !(isLeftFFP && isRightFFP));
+
+ // No FLE Find Payload
+ if (!isLeftFFP && !isRightFFP) {
+ return nullptr;
+ }
+
+ auto leftFieldPath = dynamic_cast<ExpressionFieldPath*>(equalitiesList[0].get());
+ auto rightFieldPath = dynamic_cast<ExpressionFieldPath*>(equalitiesList[1].get());
+
+ uassert(
+ 6672413,
+ "Queryable Encryption only supports comparisons between a field path and a constant",
+ leftFieldPath || rightFieldPath);
+
+ auto fieldPath = leftFieldPath ? leftFieldPath : rightFieldPath;
+ auto constChild = isLeftFFP ? leftConstant : rightConstant;
+
+ if (!queryRewriter->isForceHighCardinality()) {
+ try {
+ std::vector<boost::intrusive_ptr<Expression>> orListElems;
+
auto tags = queryRewriter->rewritePayloadAsTags(constChild->getValue());
for (auto&& tagElt : tags) {
- // ... and for each tag, construct expression {$in: [tag, "$__safeContent__"]}.
+ // ... and for each tag, construct expression {$in: [tag,
+ // "$__safeContent__"]}.
std::vector<boost::intrusive_ptr<Expression>> inVec{
ExpressionConstant::create(queryRewriter->expCtx(), tagElt),
ExpressionFieldPath::createPathFromString(
@@ -154,21 +322,33 @@ public:
orListElems.push_back(
make_intrusive<ExpressionIn>(queryRewriter->expCtx(), std::move(inVec)));
}
+
+ didRewrite = true;
+ return std::make_unique<ExpressionOr>(queryRewriter->expCtx(),
+ std::move(orListElems));
+
+ } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+ LOGV2_DEBUG(6672409,
+ 2,
+ "FLE Max tag limit hit during query $in rewrite",
+ "__error__"_attr = ex.what());
+
+ if (queryRewriter->getHighCardinalityMode() !=
+ FLEQueryRewriter::HighCardinalityMode::kUseIfNeeded) {
+ throw;
+ }
+
+ // fall through
}
}
- // Finally, construct an $or of all of the $ins.
- if (numFFPs == 0) {
- return nullptr;
- }
- uassert(
- 6334102,
- "If any elements in an comparison expression are encrypted, then all elements should "
- "be encrypted.",
- numFFPs == equalitiesList.size());
+ auto fleEqExpr =
+ generateFleEqualMatchUnique(fieldPath->getFieldPathWithoutCurrentPrefix().fullPath(),
+ constChild->getValue(),
+ queryRewriter->expCtx());
didRewrite = true;
- return std::make_unique<ExpressionOr>(queryRewriter->expCtx(), std::move(orListElems));
+ return fleEqExpr;
}
std::unique_ptr<Expression> postVisit(Expression* exp) {
@@ -177,30 +357,28 @@ public:
// ignored when rewrites are done; there is no extra information in that child that
// doesn't exist in the FFPs in the $in list.
if (auto inList = dynamic_cast<ExpressionArray*>(inExpr->getOperandList()[1].get())) {
- return rewriteComparisonsToEncryptedField(inList->getChildren());
+ return rewriteInToEncryptedField(inExpr->getOperandList()[0].get(),
+ inList->getChildren());
}
} else if (auto eqExpr = dynamic_cast<ExpressionCompare*>(exp); eqExpr &&
(eqExpr->getOp() == ExpressionCompare::EQ ||
eqExpr->getOp() == ExpressionCompare::NE)) {
// Rewrite an $eq comparing an encrypted field and an encrypted constant to an $or.
- // Either child may be the constant, so try rewriting both.
- auto or0 = rewriteComparisonsToEncryptedField({eqExpr->getChildren()[0]});
- auto or1 = rewriteComparisonsToEncryptedField({eqExpr->getChildren()[1]});
- uassert(6334100, "Cannot compare two encrypted constants to each other", !or0 || !or1);
+ auto newExpr = rewriteComparisonsToEncryptedField(eqExpr->getChildren());
// Neither child is an encrypted constant, and no rewriting needs to be done.
- if (!or0 && !or1) {
+ if (!newExpr) {
return nullptr;
}
// Exactly one child was an encrypted constant. The other child can be ignored; there is
// no extra information in that child that doesn't exist in the FFP.
if (eqExpr->getOp() == ExpressionCompare::NE) {
- std::vector<boost::intrusive_ptr<Expression>> notChild{(or0 ? or0 : or1).release()};
+ std::vector<boost::intrusive_ptr<Expression>> notChild{newExpr.release()};
return std::make_unique<ExpressionNot>(queryRewriter->expCtx(),
std::move(notChild));
}
- return std::move(or0 ? or0 : or1);
+ return newExpr;
}
return nullptr;
@@ -213,11 +391,14 @@ public:
BSONObj rewriteEncryptedFilter(const FLEStateCollectionReader& escReader,
const FLEStateCollectionReader& eccReader,
boost::intrusive_ptr<ExpressionContext> expCtx,
- BSONObj filter) {
+ BSONObj filter,
+ HighCardinalityModeAllowed mode) {
+
if (auto rewritten =
- FLEQueryRewriter(expCtx, escReader, eccReader).rewriteMatchExpression(filter)) {
+ FLEQueryRewriter(expCtx, escReader, eccReader, mode).rewriteMatchExpression(filter)) {
return rewritten.get();
}
+
return filter;
}
@@ -273,16 +454,18 @@ public:
FilterRewrite(boost::intrusive_ptr<ExpressionContext> expCtx,
const NamespaceString& nss,
const EncryptionInformation& encryptInfo,
- const BSONObj toRewrite)
- : RewriteBase(expCtx, nss, encryptInfo), userFilter(toRewrite) {}
+ const BSONObj toRewrite,
+ HighCardinalityModeAllowed mode)
+ : RewriteBase(expCtx, nss, encryptInfo), userFilter(toRewrite), _mode(mode) {}
~FilterRewrite(){};
void doRewrite(FLEStateCollectionReader& escReader, FLEStateCollectionReader& eccReader) final {
- rewrittenFilter = rewriteEncryptedFilter(escReader, eccReader, expCtx, userFilter);
+ rewrittenFilter = rewriteEncryptedFilter(escReader, eccReader, expCtx, userFilter, _mode);
}
const BSONObj userFilter;
BSONObj rewrittenFilter;
+ HighCardinalityModeAllowed _mode;
};
// This helper executes the rewrite(s) inside a transaction. The transaction runs in a separate
@@ -324,7 +507,8 @@ BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
StringData db,
const EncryptedFieldConfig& efc,
boost::intrusive_ptr<ExpressionContext> expCtx,
- BSONObj filter) {
+ BSONObj filter,
+ HighCardinalityModeAllowed mode) {
auto makeCollectionReader = [&](FLEQueryInterface* queryImpl, const StringData& coll) {
NamespaceString nss(db, coll);
auto docCount = queryImpl->countDocuments(nss);
@@ -332,7 +516,8 @@ BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
};
auto escReader = makeCollectionReader(queryImpl, efc.getEscCollection().get());
auto eccReader = makeCollectionReader(queryImpl, efc.getEccCollection().get());
- return rewriteEncryptedFilter(escReader, eccReader, expCtx, filter);
+
+ return rewriteEncryptedFilter(escReader, eccReader, expCtx, filter, mode);
}
BSONObj rewriteQuery(OperationContext* opCtx,
@@ -340,8 +525,9 @@ BSONObj rewriteQuery(OperationContext* opCtx,
const NamespaceString& nss,
const EncryptionInformation& info,
BSONObj filter,
- GetTxnCallback getTransaction) {
- auto sharedBlock = std::make_shared<FilterRewrite>(expCtx, nss, info, filter);
+ GetTxnCallback getTransaction,
+ HighCardinalityModeAllowed mode) {
+ auto sharedBlock = std::make_shared<FilterRewrite>(expCtx, nss, info, filter, mode);
doFLERewriteInTxn(opCtx, sharedBlock, getTransaction);
return sharedBlock->rewrittenFilter.getOwned();
}
@@ -365,7 +551,8 @@ void processFindCommand(OperationContext* opCtx,
nss,
findCommand->getEncryptionInformation().get(),
findCommand->getFilter().getOwned(),
- getTransaction));
+ getTransaction,
+ HighCardinalityModeAllowed::kAllow));
// The presence of encryptionInformation is a signal that this is a FLE request that requires
// special processing. Once we've rewritten the query, it's no longer a "special" FLE query, but
// a normal query that can be executed by the query system like any other, so remove
@@ -389,7 +576,8 @@ void processCountCommand(OperationContext* opCtx,
nss,
countCommand->getEncryptionInformation().get(),
countCommand->getQuery().getOwned(),
- getTxn));
+ getTxn,
+ HighCardinalityModeAllowed::kAllow));
// The presence of encryptionInformation is a signal that this is a FLE request that requires
// special processing. Once we've rewritten the query, it's no longer a "special" FLE query, but
// a normal query that can be executed by the query system like any other, so remove
@@ -504,59 +692,112 @@ std::vector<Value> FLEQueryRewriter::rewritePayloadAsTags(Value fleFindPayload)
return tagVec;
}
-std::unique_ptr<InMatchExpression> FLEQueryRewriter::rewriteEq(
- const EqualityMatchExpression* expr) {
+
+std::unique_ptr<MatchExpression> FLEQueryRewriter::rewriteEq(const EqualityMatchExpression* expr) {
auto ffp = expr->getData();
if (!isFleFindPayload(ffp)) {
return nullptr;
}
- auto obj = rewritePayloadAsTags(ffp);
-
- auto tags = std::vector<BSONElement>();
- obj.elems(tags);
+ if (_mode != HighCardinalityMode::kForceAlways) {
+ try {
+ auto obj = rewritePayloadAsTags(ffp);
+
+ auto tags = std::vector<BSONElement>();
+ obj.elems(tags);
+
+ auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
+ inExpr->setBackingBSON(std::move(obj));
+ auto status = inExpr->setEqualities(std::move(tags));
+ uassertStatusOK(status);
+ _rewroteLastExpression = true;
+ return inExpr;
+ } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+ LOGV2_DEBUG(6672410,
+ 2,
+ "FLE Max tag limit hit during query $eq rewrite",
+ "__error__"_attr = ex.what());
+
+ if (_mode != HighCardinalityMode::kUseIfNeeded) {
+ throw;
+ }
- auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
- inExpr->setBackingBSON(std::move(obj));
- auto status = inExpr->setEqualities(std::move(tags));
- uassertStatusOK(status);
+ // fall through
+ }
+ }
+ auto exprMatch = generateFleEqualMatchAndExpr(expr->path(), ffp, _expCtx.get());
_rewroteLastExpression = true;
- return inExpr;
+ return exprMatch;
}
-std::unique_ptr<InMatchExpression> FLEQueryRewriter::rewriteIn(const InMatchExpression* expr) {
- auto backingBSONBuilder = BSONArrayBuilder();
+std::unique_ptr<MatchExpression> FLEQueryRewriter::rewriteIn(const InMatchExpression* expr) {
size_t numFFPs = 0;
for (auto& eq : expr->getEqualities()) {
if (isFleFindPayload(eq)) {
- auto obj = rewritePayloadAsTags(eq);
++numFFPs;
- for (auto&& elt : obj) {
- backingBSONBuilder.append(elt);
- }
}
}
+
if (numFFPs == 0) {
return nullptr;
}
+
// All elements in an encrypted $in expression should be FFPs.
uassert(
6329400,
"If any elements in a $in expression are encrypted, then all elements should be encrypted.",
numFFPs == expr->getEqualities().size());
- auto backingBSON = backingBSONBuilder.arr();
- auto allTags = std::vector<BSONElement>();
- backingBSON.elems(allTags);
+ if (_mode != HighCardinalityMode::kForceAlways) {
+
+ try {
+ auto backingBSONBuilder = BSONArrayBuilder();
+
+ for (auto& eq : expr->getEqualities()) {
+ auto obj = rewritePayloadAsTags(eq);
+ for (auto&& elt : obj) {
+ backingBSONBuilder.append(elt);
+ }
+ }
- auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
- inExpr->setBackingBSON(std::move(backingBSON));
- auto status = inExpr->setEqualities(std::move(allTags));
- uassertStatusOK(status);
+ auto backingBSON = backingBSONBuilder.arr();
+ auto allTags = std::vector<BSONElement>();
+ backingBSON.elems(allTags);
+
+ auto inExpr = std::make_unique<InMatchExpression>(kSafeContent);
+ inExpr->setBackingBSON(std::move(backingBSON));
+ auto status = inExpr->setEqualities(std::move(allTags));
+ uassertStatusOK(status);
+
+ _rewroteLastExpression = true;
+ return inExpr;
+
+ } catch (const ExceptionFor<ErrorCodes::FLEMaxTagLimitExceeded>& ex) {
+ LOGV2_DEBUG(6672411,
+ 2,
+ "FLE Max tag limit hit during query $in rewrite",
+ "__error__"_attr = ex.what());
+
+ if (_mode != HighCardinalityMode::kUseIfNeeded) {
+ throw;
+ }
+
+ // fall through
+ }
+ }
+
+ std::vector<std::unique_ptr<MatchExpression>> matches;
+ matches.reserve(numFFPs);
+
+ for (auto& eq : expr->getEqualities()) {
+ auto exprMatch = generateFleEqualMatchAndExpr(expr->path(), eq, _expCtx.get());
+ matches.push_back(std::move(exprMatch));
+ }
+ auto orExpr = std::make_unique<OrMatchExpression>(std::move(matches));
_rewroteLastExpression = true;
- return inExpr;
+ return orExpr;
}
} // namespace mongo::fle
diff --git a/src/mongo/db/query/fle/server_rewrite.h b/src/mongo/db/query/fle/server_rewrite.h
index ed84ea283c5..bf02eeebd4e 100644
--- a/src/mongo/db/query/fle/server_rewrite.h
+++ b/src/mongo/db/query/fle/server_rewrite.h
@@ -31,7 +31,7 @@
#include <memory>
-#include "boost/smart_ptr/intrusive_ptr.hpp"
+#include <boost/smart_ptr/intrusive_ptr.hpp>
#include "mongo/bson/bsonobj.h"
#include "mongo/crypto/fle_crypto.h"
@@ -47,6 +47,14 @@ class FLEQueryInterface;
namespace fle {
/**
+ * Low Selectivity rewrites use $expr which is not supported in all commands such as upserts.
+ */
+enum class HighCardinalityModeAllowed {
+ kAllow,
+ kDisallow,
+};
+
+/**
* Make a collator object from its BSON representation. Useful when creating ExpressionContext
* objects for parsing MatchExpressions as part of the server-side rewrite.
*/
@@ -62,7 +70,8 @@ BSONObj rewriteQuery(OperationContext* opCtx,
const NamespaceString& nss,
const EncryptionInformation& info,
BSONObj filter,
- GetTxnCallback getTransaction);
+ GetTxnCallback getTransaction,
+ HighCardinalityModeAllowed mode);
/**
* Process a find command with encryptionInformation in-place, rewriting the filter condition so
@@ -100,11 +109,13 @@ std::unique_ptr<Pipeline, PipelineDeleter> processPipeline(
* from inside an existing transaction using a FLEQueryInterface constructed from a
* transaction client.
*/
-BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
- StringData db,
- const EncryptedFieldConfig& efc,
- boost::intrusive_ptr<ExpressionContext> expCtx,
- BSONObj filter);
+BSONObj rewriteEncryptedFilterInsideTxn(
+ FLEQueryInterface* queryImpl,
+ StringData db,
+ const EncryptedFieldConfig& efc,
+ boost::intrusive_ptr<ExpressionContext> expCtx,
+ BSONObj filter,
+ HighCardinalityModeAllowed mode = HighCardinalityModeAllowed::kDisallow);
/**
* Class which handles rewriting filter MatchExpressions for FLE2. The functionality is encapsulated
@@ -116,14 +127,37 @@ BSONObj rewriteEncryptedFilterInsideTxn(FLEQueryInterface* queryImpl,
*/
class FLEQueryRewriter {
public:
+ enum class HighCardinalityMode {
+ // Always use high cardinality filters, used by tests
+ kForceAlways,
+
+ // Use high cardinality mode if $in rewrites do not fit in the
+ // internalQueryFLERewriteMemoryLimit memory limit
+ kUseIfNeeded,
+
+ // Do not rewrite into high cardinality filter, throw exceptions instead
+ // Some contexts like upsert do not support $expr
+ kDisallow,
+ };
+
/**
* Takes in references to collection readers for the ESC and ECC that are used during tag
* computation.
*/
FLEQueryRewriter(boost::intrusive_ptr<ExpressionContext> expCtx,
const FLEStateCollectionReader& escReader,
- const FLEStateCollectionReader& eccReader)
+ const FLEStateCollectionReader& eccReader,
+ HighCardinalityModeAllowed mode = HighCardinalityModeAllowed::kAllow)
: _expCtx(expCtx), _escReader(&escReader), _eccReader(&eccReader) {
+
+ if (internalQueryFLEAlwaysUseHighCardinalityMode.load()) {
+ _mode = HighCardinalityMode::kForceAlways;
+ }
+
+ if (mode == HighCardinalityModeAllowed::kDisallow) {
+ _mode = HighCardinalityMode::kDisallow;
+ }
+
// This isn't the "real" query so we don't want to increment Expression
// counters here.
_expCtx->stopExpressionCounters();
@@ -184,6 +218,18 @@ public:
return _expCtx.get();
}
+ bool isForceHighCardinality() const {
+ return _mode == HighCardinalityMode::kForceAlways;
+ }
+
+ void setForceHighCardinalityForTest() {
+ _mode = HighCardinalityMode::kForceAlways;
+ }
+
+ HighCardinalityMode getHighCardinalityMode() const {
+ return _mode;
+ }
+
protected:
// This constructor should only be used for mocks in testing.
FLEQueryRewriter(boost::intrusive_ptr<ExpressionContext> expCtx)
@@ -196,8 +242,8 @@ private:
std::unique_ptr<MatchExpression> _rewrite(MatchExpression* me);
virtual BSONObj rewritePayloadAsTags(BSONElement fleFindPayload) const;
- std::unique_ptr<InMatchExpression> rewriteEq(const EqualityMatchExpression* expr);
- std::unique_ptr<InMatchExpression> rewriteIn(const InMatchExpression* expr);
+ std::unique_ptr<MatchExpression> rewriteEq(const EqualityMatchExpression* expr);
+ std::unique_ptr<MatchExpression> rewriteIn(const InMatchExpression* expr);
boost::intrusive_ptr<ExpressionContext> _expCtx;
@@ -208,6 +254,9 @@ private:
// True if the last Expression or MatchExpression processed by this rewriter was rewritten.
bool _rewroteLastExpression = false;
+
+ // Controls how query rewriter rewrites the query
+ HighCardinalityMode _mode{HighCardinalityMode::kUseIfNeeded};
};
diff --git a/src/mongo/db/query/fle/server_rewrite_test.cpp b/src/mongo/db/query/fle/server_rewrite_test.cpp
index cb81656dcb6..034de8f0aa9 100644
--- a/src/mongo/db/query/fle/server_rewrite_test.cpp
+++ b/src/mongo/db/query/fle/server_rewrite_test.cpp
@@ -31,7 +31,9 @@
#include <memory>
#include "mongo/bson/bsonelement.h"
+#include "mongo/bson/bsonmisc.h"
#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/bsontypes.h"
#include "mongo/db/matcher/expression_leaf.h"
#include "mongo/db/pipeline/expression_context_for_test.h"
#include "mongo/db/query/fle/server_rewrite.h"
@@ -42,9 +44,19 @@
namespace mongo {
namespace {
-class MockFLEQueryRewriter : public fle::FLEQueryRewriter {
+class BasicMockFLEQueryRewriter : public fle::FLEQueryRewriter {
public:
- MockFLEQueryRewriter() : fle::FLEQueryRewriter(new ExpressionContextForTest()), _tags() {}
+ BasicMockFLEQueryRewriter() : fle::FLEQueryRewriter(new ExpressionContextForTest()) {}
+
+ BSONObj rewriteMatchExpressionForTest(const BSONObj& obj) {
+ auto res = rewriteMatchExpression(obj);
+ return res ? res.get() : obj;
+ }
+};
+
+class MockFLEQueryRewriter : public BasicMockFLEQueryRewriter {
+public:
+ MockFLEQueryRewriter() : _tags() {}
bool isFleFindPayload(const BSONElement& fleFindPayload) const override {
return _encryptedFields.find(fleFindPayload.fieldNameStringData()) !=
@@ -56,11 +68,6 @@ public:
_tags[fieldvalue] = tags;
}
- BSONObj rewriteMatchExpressionForTest(const BSONObj& obj) {
- auto res = rewriteMatchExpression(obj);
- return res ? res.get() : obj;
- }
-
private:
BSONObj rewritePayloadAsTags(BSONElement fleFindPayload) const override {
ASSERT(fleFindPayload.isNumber()); // Only accept numbers as mock FFPs.
@@ -72,6 +79,7 @@ private:
std::map<std::pair<StringData, int>, BSONObj> _tags;
std::set<StringData> _encryptedFields;
};
+
class FLEServerRewriteTest : public unittest::Test {
public:
FLEServerRewriteTest() {}
@@ -361,5 +369,290 @@ TEST_F(FLEServerRewriteTest, ComparisonToObjectIgnored) {
}
}
+template <typename T>
+std::vector<uint8_t> toEncryptedVector(EncryptedBinDataType dt, T t) {
+ BSONObj obj = t.toBSON();
+
+ std::vector<uint8_t> buf(obj.objsize() + 1);
+ buf[0] = static_cast<uint8_t>(dt);
+
+ std::copy(obj.objdata(), obj.objdata() + obj.objsize(), buf.data() + 1);
+
+ return buf;
+}
+
+template <typename T>
+void toEncryptedBinData(StringData field, EncryptedBinDataType dt, T t, BSONObjBuilder* builder) {
+ auto buf = toEncryptedVector(dt, t);
+
+ builder->appendBinData(field, buf.size(), BinDataType::Encrypt, buf.data());
+}
+
+constexpr auto kIndexKeyId = "12345678-1234-9876-1234-123456789012"_sd;
+constexpr auto kUserKeyId = "ABCDEFAB-1234-9876-1234-123456789012"_sd;
+static UUID indexKeyId = uassertStatusOK(UUID::parse(kIndexKeyId.toString()));
+static UUID userKeyId = uassertStatusOK(UUID::parse(kUserKeyId.toString()));
+
+std::vector<char> testValue = {0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19};
+std::vector<char> testValue2 = {0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29};
+
+const FLEIndexKey& getIndexKey() {
+ static std::string indexVec = hexblob::decode(
+ "7dbfebc619aa68a659f64b8e23ccd21644ac326cb74a26840c3d2420176c40ae088294d00ad6cae9684237b21b754cf503f085c25cd320bf035c3417416e1e6fe3d9219f79586582112740b2add88e1030d91926ae8afc13ee575cfb8bb965b7"_sd);
+ static FLEIndexKey indexKey(KeyMaterial(indexVec.begin(), indexVec.end()));
+ return indexKey;
+}
+
+const FLEUserKey& getUserKey() {
+ static std::string userVec = hexblob::decode(
+ "a7ddbc4c8be00d51f68d9d8e485f351c8edc8d2206b24d8e0e1816d005fbe520e489125047d647b0d8684bfbdbf09c304085ed086aba6c2b2b1677ccc91ced8847a733bf5e5682c84b3ee7969e4a5fe0e0c21e5e3ee190595a55f83147d8de2a"_sd);
+ static FLEUserKey userKey(KeyMaterial(userVec.begin(), userVec.end()));
+ return userKey;
+}
+
+
+BSONObj generateFFP(StringData path, int value) {
+ auto indexKey = getIndexKey();
+ FLEIndexKeyAndId indexKeyAndId(indexKey.data, indexKeyId);
+ auto userKey = getUserKey();
+ FLEUserKeyAndId userKeyAndId(userKey.data, indexKeyId);
+
+ BSONObj doc = BSON("value" << value);
+ auto element = doc.firstElement();
+ auto fpp = FLEClientCrypto::serializeFindPayload(indexKeyAndId, userKeyAndId, element, 0);
+
+ BSONObjBuilder builder;
+ toEncryptedBinData(path, EncryptedBinDataType::kFLE2FindEqualityPayload, fpp, &builder);
+ return builder.obj();
+}
+
+class FLEServerHighCardRewriteTest : public unittest::Test {
+public:
+ FLEServerHighCardRewriteTest() {}
+
+ void setUp() override {}
+
+ void tearDown() override {}
+
+protected:
+ BasicMockFLEQueryRewriter _mock;
+};
+
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_Equality) {
+ _mock.setForceHighCardinalityForTest();
+
+ auto match = generateFFP("ssn", 1);
+ auto expected = fromjson(R"({
+ "$expr": {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }
+ }
+})");
+
+ auto actual = _mock.rewriteMatchExpressionForTest(match);
+ ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_In) {
+ _mock.setForceHighCardinalityForTest();
+
+ auto ffp1 = generateFFP("ssn", 1);
+ auto ffp2 = generateFFP("ssn", 2);
+ auto ffp3 = generateFFP("ssn", 3);
+ auto expected = fromjson(R"({
+ "$or": [
+ {
+ "$expr": {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }
+ }
+ },
+ {
+ "$expr": {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CLpCo6rNuYMVT+6n1HCX15MNrVYDNqf6udO46ayo43Sw",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }
+ }
+ },
+ {
+ "$expr": {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CPi44oCQHnNDeRqHsNLzbdCeHt2DK/wCly0g2dxU5fqN",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }
+ }
+ }
+ ]
+})");
+
+ auto match =
+ BSON("ssn" << BSON("$in" << BSON_ARRAY(ffp1.firstElement()
+ << ffp2.firstElement() << ffp3.firstElement())));
+
+ auto actual = _mock.rewriteMatchExpressionForTest(match);
+ ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_Expr) {
+
+ _mock.setForceHighCardinalityForTest();
+
+ auto ffp = generateFFP("$ssn", 1);
+ int len;
+ auto v = ffp.firstElement().binDataClean(len);
+ auto match = BSON("$expr" << BSON("$eq" << BSON_ARRAY(ffp.firstElement().fieldName()
+ << BSONBinData(v, len, Encrypt))));
+
+ auto expected = fromjson(R"({ "$expr": {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }
+ }
+ })");
+
+ auto actual = _mock.rewriteMatchExpressionForTest(match);
+ ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
+TEST_F(FLEServerHighCardRewriteTest, HighCard_TopLevel_Expr_In) {
+
+ _mock.setForceHighCardinalityForTest();
+
+ auto ffp = generateFFP("$ssn", 1);
+ int len;
+ auto v = ffp.firstElement().binDataClean(len);
+
+ auto ffp2 = generateFFP("$ssn", 1);
+ int len2;
+ auto v2 = ffp2.firstElement().binDataClean(len2);
+
+ auto match = BSON(
+ "$expr" << BSON("$in" << BSON_ARRAY(ffp.firstElement().fieldName()
+ << BSON_ARRAY(BSONBinData(v, len, Encrypt)
+ << BSONBinData(v2, len2, Encrypt)))));
+
+ auto expected = fromjson(R"({ "$expr": { "$or" : [ {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }},
+ {
+ "$_internalFleEq": {
+ "field": "$ssn",
+ "edc": {
+ "$binary": {
+ "base64": "CEWSmQID7SfwyAUI3ZkSFkATKryDQfnxXEOGad5d4Rsg",
+ "subType": "6"
+ }
+ },
+ "counter": {
+ "$numberLong": "0"
+ },
+ "server": {
+ "$binary": {
+ "base64": "COuac/eRLYakKX6B0vZ1r3QodOQFfjqJD+xlGiPu4/Ps",
+ "subType": "6"
+ }
+ }
+ }}
+ ]}})");
+
+ auto actual = _mock.rewriteMatchExpressionForTest(match);
+ ASSERT_BSONOBJ_EQ(actual, expected);
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/query/get_executor.cpp b/src/mongo/db/query/get_executor.cpp
index 5c22beab210..6c77f43ae1a 100644
--- a/src/mongo/db/query/get_executor.cpp
+++ b/src/mongo/db/query/get_executor.cpp
@@ -647,7 +647,7 @@ public:
_cq->setCollator(mainColl->getDefaultCollator()->clone());
}
- auto planCacheKey = plan_cache_key_factory::make<KeyType>(*_cq, mainColl);
+ auto planCacheKey = buildPlanCacheKey();
// Fill in some opDebug information, unless it has already been filled by an outer pipeline.
OpDebug& opDebug = CurOp::get(_opCtx)->debug();
if (!opDebug.queryHash) {
@@ -743,6 +743,11 @@ protected:
virtual PlanStageType buildExecutableTree(const QuerySolution& solution) const = 0;
/**
+ * Constructs the plan cache key.
+ */
+ virtual KeyType buildPlanCacheKey() const = 0;
+
+ /**
* Either constructs a PlanStage tree from a cached plan (if exists in the plan cache), or
* constructs a "id hack" PlanStage tree. Returns nullptr if no cached plan or id hack plan can
* be constructed.
@@ -879,6 +884,10 @@ protected:
return result;
}
+ PlanCacheKey buildPlanCacheKey() const {
+ return plan_cache_key_factory::make<PlanCacheKey>(*_cq, _collection);
+ }
+
std::unique_ptr<ClassicPrepareExecutionResult> buildCachedPlan(
const PlanCacheKey& planCacheKey) final {
initializePlannerParamsIfNeeded();
@@ -1083,13 +1092,17 @@ protected:
return result;
}
+ sbe::PlanCacheKey buildPlanCacheKey() const {
+ return plan_cache_key_factory::make(*_cq, _collections);
+ }
+
std::unique_ptr<SlotBasedPrepareExecutionResult> buildCachedPlan(
const sbe::PlanCacheKey& planCacheKey) final {
if (shouldCacheQuery(*_cq)) {
- // TODO SERVER-61507: remove _cq->pipeline().empty() check when $group pushdown is
+ // TODO SERVER-61507: remove canUseSbePlanCache check when $group pushdown is
// integrated with SBE plan cache.
if (!feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() ||
- !_cq->pipeline().empty()) {
+ !canonical_query_encoder::canUseSbePlanCache(*_cq)) {
// If the feature flag is off, we first try to build an "id hack" plan because the
// id hack plans are not cached in the classic cache. We then fall back to use the
// classic plan cache.
@@ -1346,18 +1359,19 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getSlotBasedExe
// No need for runtime planning, just use the constructed plan stage tree.
invariant(solutions.size() == 1);
invariant(roots.size() == 1);
- if (!cq->pipeline().empty()) {
- // Need to extend the solution with the agg pipeline and rebuild the execution tree.
- solutions[0] = QueryPlanner::extendWithAggPipeline(
- *cq,
- std::move(solutions[0]),
- fillOutSecondaryCollectionsInformation(opCtx, collections, cq.get()));
- roots[0] = helper.buildExecutableTree(*(solutions[0]));
- }
auto&& [root, data] = roots[0];
+
if (!planningResult->recoveredPinnedCacheEntry()) {
- plan_cache_util::updatePlanCache(
- opCtx, collections.getMainCollection(), *cq, *solutions[0], *root, data);
+ if (!cq->pipeline().empty()) {
+ // Need to extend the solution with the agg pipeline and rebuild the execution tree.
+ solutions[0] = QueryPlanner::extendWithAggPipeline(
+ *cq,
+ std::move(solutions[0]),
+ fillOutSecondaryCollectionsInformation(opCtx, collections, cq.get()));
+ roots[0] = helper.buildExecutableTree(*(solutions[0]));
+ }
+
+ plan_cache_util::updatePlanCache(opCtx, collections, *cq, *solutions[0], *root, data);
}
// Prepare the SBE tree for execution.
diff --git a/src/mongo/db/query/get_executor.h b/src/mongo/db/query/get_executor.h
index 20ca265bcbb..e913108679c 100644
--- a/src/mongo/db/query/get_executor.h
+++ b/src/mongo/db/query/get_executor.h
@@ -42,6 +42,7 @@
#include "mongo/db/query/multiple_collection_accessor.h"
#include "mongo/db/query/parsed_distinct.h"
#include "mongo/db/query/plan_executor.h"
+#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/query_settings.h"
#include "mongo/db/query/query_solution.h"
@@ -157,7 +158,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutor(
std::unique_ptr<CanonicalQuery> canonicalQuery,
std::function<void(CanonicalQuery*)> extractAndAttachPipelineStages,
PlanYieldPolicy::YieldPolicy yieldPolicy,
- size_t plannerOptions = 0);
+ const QueryPlannerParams& plannerOptions);
StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutor(
OperationContext* opCtx,
@@ -192,7 +193,7 @@ StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorFind
std::unique_ptr<CanonicalQuery> canonicalQuery,
std::function<void(CanonicalQuery*)> extractAndAttachPipelineStages,
bool permitYield = false,
- size_t plannerOptions = QueryPlannerParams::DEFAULT);
+ QueryPlannerParams plannerOptions = QueryPlannerParams{});
StatusWith<std::unique_ptr<PlanExecutor, PlanExecutor::Deleter>> getExecutorFind(
OperationContext* opCtx,
diff --git a/src/mongo/db/query/interval_evaluation_tree.h b/src/mongo/db/query/interval_evaluation_tree.h
index 5b48ee0ae11..cc432129357 100644
--- a/src/mongo/db/query/interval_evaluation_tree.h
+++ b/src/mongo/db/query/interval_evaluation_tree.h
@@ -55,7 +55,7 @@ using IET =
* ConstNode is a node that represents an interval with constant bounds, such as (MinKey,
* MaxKey).
*/
-class ConstNode : public optimizer::algebra::OpSpecificArity<IET, ConstNode, 0> {
+class ConstNode : public optimizer::algebra::OpSpecificArity<IET, 0> {
public:
explicit ConstNode(const OrderedIntervalList& oil) : oil{oil} {}
@@ -66,7 +66,7 @@ public:
* EvalNode is a node that evaluates an interval from a simple predicate such as {$gt: p1} where
* p1 is a parameter value known at runtime.
*/
-class EvalNode : public optimizer::algebra::OpSpecificArity<IET, EvalNode, 0> {
+class EvalNode : public optimizer::algebra::OpSpecificArity<IET, 0> {
public:
using InputParamId = MatchExpression::InputParamId;
@@ -89,9 +89,9 @@ private:
/**
* IntersectNode is a node that represents an intersection of two intervals.
*/
-class IntersectNode : public optimizer::algebra::OpSpecificArity<IET, IntersectNode, 2> {
+class IntersectNode : public optimizer::algebra::OpSpecificArity<IET, 2> {
public:
- using Base = optimizer::algebra::OpSpecificArity<IET, IntersectNode, 2>;
+ using Base = optimizer::algebra::OpSpecificArity<IET, 2>;
IntersectNode(IET lhs, IET rhs) : Base(std::move(lhs), std::move(rhs)) {}
};
@@ -99,9 +99,9 @@ public:
/**
* UnionNode is a node that represents a union of two intervals.
*/
-class UnionNode : public optimizer::algebra::OpSpecificArity<IET, UnionNode, 2> {
+class UnionNode : public optimizer::algebra::OpSpecificArity<IET, 2> {
public:
- using Base = optimizer::algebra::OpSpecificArity<IET, UnionNode, 2>;
+ using Base = optimizer::algebra::OpSpecificArity<IET, 2>;
UnionNode(IET lhs, IET rhs) : Base(std::move(lhs), std::move(rhs)) {}
};
@@ -109,9 +109,9 @@ public:
/**
* ComplementNode is a node that complements its child.
*/
-class ComplementNode : public optimizer::algebra::OpSpecificArity<IET, ComplementNode, 1> {
+class ComplementNode : public optimizer::algebra::OpSpecificArity<IET, 1> {
public:
- using Base = optimizer::algebra::OpSpecificArity<IET, ComplementNode, 1>;
+ using Base = optimizer::algebra::OpSpecificArity<IET, 1>;
ComplementNode(IET child) : Base(std::move(child)) {}
};
diff --git a/src/mongo/db/query/optimizer/algebra/algebra_test.cpp b/src/mongo/db/query/optimizer/algebra/algebra_test.cpp
index 48e668a6e32..013c5a67568 100644
--- a/src/mongo/db/query/optimizer/algebra/algebra_test.cpp
+++ b/src/mongo/db/query/optimizer/algebra/algebra_test.cpp
@@ -44,41 +44,40 @@ using Tree = PolyValue<Leaf, BinaryNode, NaryNode, AtLeastBinaryNode>;
/**
* A leaf in the tree. Just contains data - in this case a double.
*/
-class Leaf : public OpSpecificArity<Tree, Leaf, 0> {
+class Leaf : public OpSpecificArity<Tree, 0> {
public:
Leaf(double x) : x(x) {}
+
double x;
};
/**
* An inner node in the tree with exactly two children.
*/
-class BinaryNode : public OpSpecificArity<Tree, BinaryNode, 2> {
+class BinaryNode : public OpSpecificArity<Tree, 2> {
public:
BinaryNode(Tree left, Tree right)
- : OpSpecificArity<Tree, BinaryNode, 2>(std::move(left), std::move(right)) {}
+ : OpSpecificArity<Tree, 2>(std::move(left), std::move(right)) {}
};
/**
* An inner node in the tree with any number of children, zero or greater.
*/
-class NaryNode : public OpSpecificDynamicArity<Tree, NaryNode, 0> {
+class NaryNode : public OpSpecificDynamicArity<Tree, 0> {
public:
- NaryNode(std::vector<Tree> children)
- : OpSpecificDynamicArity<Tree, NaryNode, 0>(std::move(children)) {}
+ NaryNode(std::vector<Tree> children) : OpSpecificDynamicArity<Tree, 0>(std::move(children)) {}
};
/**
* An inner node in the tree with 2 or more nodes.
*/
-class AtLeastBinaryNode : public OpSpecificDynamicArity<Tree, AtLeastBinaryNode, 2> {
+class AtLeastBinaryNode : public OpSpecificDynamicArity<Tree, 2> {
public:
/**
* Notice the required number of nodes are given as separate arguments from the vector.
*/
AtLeastBinaryNode(std::vector<Tree> children, Tree left, Tree right)
- : OpSpecificDynamicArity<Tree, AtLeastBinaryNode, 2>(
- std::move(children), std::move(left), std::move(right)) {}
+ : OpSpecificDynamicArity<Tree, 2>(std::move(children), std::move(left), std::move(right)) {}
};
/**
diff --git a/src/mongo/db/query/optimizer/algebra/operator.h b/src/mongo/db/query/optimizer/algebra/operator.h
index fb6dbc4d474..aa6220b53f1 100644
--- a/src/mongo/db/query/optimizer/algebra/operator.h
+++ b/src/mongo/db/query/optimizer/algebra/operator.h
@@ -29,67 +29,66 @@
#pragma once
+#include <stddef.h>
+#include <utility>
#include <vector>
-#include "mongo/db/query/optimizer/algebra/polyvalue.h"
+#include "mongo/util/concepts.h"
namespace mongo::optimizer {
namespace algebra {
+/**
+ * Concrete storage for 'S' items of type 'T'. This class is an alias for a static array, useful in
+ * a tree representation to store a node's children.
+ */
template <typename T, int S>
struct OpNodeStorage {
- T _nodes[S];
-
template <typename... Ts>
OpNodeStorage(Ts&&... vals) : _nodes{std::forward<Ts>(vals)...} {}
+
+protected:
+ T _nodes[S];
};
+/**
+ * Stub for nodes with no children.
+ */
template <typename T>
struct OpNodeStorage<T, 0> {};
-/*=====-----
- *
- * Arity of operator can be:
- * 1. statically known - A, A, A, ...
- * 2. dynamic prefix with optional statically know - vector<A>, A, A, A, ...
- *
- * Denotations map A to some B.
- * So static arity <A,A,A> is mapped to <B,B,B>.
- * Similarly, arity <vector<A>,A> is mapped to <vector<B>,B>
- *
- * There is a wrinkle when B is a reference (if allowed at all)
- * Arity <vector<A>, A, A> is mapped to <vector<B>&, B&, B&> - note that the reference is lifted
- * outside of the vector.
- *
+/**
+ * Nodes which have a specific arity (number of children) should derive from this class. The 'Slot'
+ * determines the generic type to hold for each child.
*/
-template <typename Slot, typename Derived, int Arity>
+template <typename Slot, int Arity>
class OpSpecificArity : public OpNodeStorage<Slot, Arity> {
using Base = OpNodeStorage<Slot, Arity>;
public:
- template <typename... Ts>
- OpSpecificArity(Ts&&... vals) : Base({std::forward<Ts>(vals)...}) {
- static_assert(sizeof...(Ts) == Arity, "constructor paramaters do not match");
- }
+ TEMPLATE(typename... Ts)
+ REQUIRES(sizeof...(Ts) == Arity)
+ OpSpecificArity(Ts&&... vals) : Base({std::forward<Ts>(vals)...}) {}
- template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+ TEMPLATE(int I)
+ REQUIRES(I >= 0 && I < Arity)
auto& get() noexcept {
return this->_nodes[I];
}
- template <int I, std::enable_if_t<(I >= 0 && I < Arity), int> = 0>
+ TEMPLATE(int I)
+ REQUIRES(I >= 0 && I < Arity)
const auto& get() const noexcept {
return this->_nodes[I];
}
};
-/*=====-----
- *
- * Operator with dynamic arity
- *
+
+/**
+ * Nodes which have a known, minimum arity but may optionally contain more children.
*/
-template <typename Slot, typename Derived, int Arity>
-class OpSpecificDynamicArity : public OpSpecificArity<Slot, Derived, Arity> {
- using Base = OpSpecificArity<Slot, Derived, Arity>;
+template <typename Slot, int Arity>
+class OpSpecificDynamicArity : public OpSpecificArity<Slot, Arity> {
+ using Base = OpSpecificArity<Slot, Arity>;
std::vector<Slot> _dyNodes;
@@ -106,10 +105,8 @@ public:
}
};
-/*=====-----
- *
- * Semantic transport interface
- *
+/**
+ * Semantic transport interface.
*/
namespace detail {
template <typename D, typename T, typename... Args>
@@ -132,25 +129,35 @@ inline constexpr auto has_prepare_v =
has_prepare<void, call_prepare_slot_t, N, D, T, Args...>,
has_prepare<void, call_prepare_t, D, T, Args...>>::value;
-template <typename Slot, typename Derived, int Arity>
-inline constexpr int get_arity(const OpSpecificArity<Slot, Derived, Arity>*) {
+template <typename Slot, int Arity>
+inline constexpr int get_arity(const OpSpecificArity<Slot, Arity>*) {
return Arity;
}
-template <typename Slot, typename Derived, int Arity>
-inline constexpr bool is_dynamic(const OpSpecificArity<Slot, Derived, Arity>*) {
+template <typename Slot, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificArity<Slot, Arity>*) {
return false;
}
-template <typename Slot, typename Derived, int Arity>
-inline constexpr bool is_dynamic(const OpSpecificDynamicArity<Slot, Derived, Arity>*) {
+template <typename Slot, int Arity>
+inline constexpr bool is_dynamic(const OpSpecificDynamicArity<Slot, Arity>*) {
return true;
}
template <typename T>
using OpConcreteType = typename std::remove_reference_t<T>::template get_t<0>;
+
} // namespace detail
+/**
+ * A transporter is similar to a tree walker that utilizes knowledge of the underlying Operator
+ * types to visit each node of an Operator tree in a bottom-up fashion. The Domain class
+ * 'D' is used as a callback mechanism by matching the relevant 'transport' overload with
+ * the particular node type and children results.
+ *
+ * The caller may optionally supply 'withSlot' to include a reference to the base PolyValue type as
+ * a first argument to the transport callbacks.
+ */
template <typename D, bool withSlot>
class OpTransporter {
D& _domain;
@@ -271,6 +278,12 @@ public:
}
};
+/**
+ * Walker for the Operator* types. Accepts a domain 'D' of 'walk' callback overloads.
+ *
+ * The caller may optionally supply 'withSlot' to include a reference to base PolyValue as a first
+ * argument to the walk callbacks.
+ */
template <typename D, bool withSlot>
class OpWalker {
D& _domain;
@@ -327,11 +340,31 @@ public:
}
};
+/**
+ * Post-order traversal over the tree given by 'node', with domain D of 'transport' callbacks for
+ * each node type. The domain may optionally contain 'prepare' method overloads to pre-visit a node
+ * before traversing its children.
+ *
+ * This method also allows propagating results from the traversal implicitly via the return type of
+ * the methods in D. For instance, to return an integer after traversal and a node which has two
+ * children, the signature would look something like this:
+ *
+ * int transport(const NodeType&, int childResult0, int childResult1)
+ *
+ */
template <bool withSlot = false, typename D, typename N, typename... Args>
auto transport(N&& node, D& domain, Args&&... args) {
return node.visit(OpTransporter<D, withSlot>{domain}, std::forward<Args>(args)...);
}
+/**
+ * Visits 'node' by invoking the appropriate 'walk' overload in domain D. The 'walk' methods should
+ * accept the node as the first argument and its children as subsequent arguments with generic type
+ * N.
+ *
+ * Note that this method does not actually traverse the tree given in 'node'; the caller is
+ * responsible for manually walking.
+ */
template <bool withSlot = false, typename D, typename N, typename... Args>
auto walk(N&& node, D& domain, Args&&... args) {
return node.visit(OpWalker<D, withSlot>{domain}, std::forward<Args>(args)...);
diff --git a/src/mongo/db/query/optimizer/algebra/polyvalue.h b/src/mongo/db/query/optimizer/algebra/polyvalue.h
index 63f5965c50c..185080d916c 100644
--- a/src/mongo/db/query/optimizer/algebra/polyvalue.h
+++ b/src/mongo/db/query/optimizer/algebra/polyvalue.h
@@ -33,32 +33,30 @@
#include <stdexcept>
#include <type_traits>
-namespace mongo::optimizer {
-namespace algebra {
+#include "mongo/util/assert_util.h"
+
+namespace mongo::optimizer::algebra {
namespace detail {
template <typename T, typename... Args>
inline constexpr bool is_one_of_v = std::disjunction_v<std::is_same<T, Args>...>;
-template <typename T, typename... Args>
-inline constexpr bool is_one_of_f() {
- return is_one_of_v<T, Args...>;
-}
-
template <typename... Args>
struct is_unique_t : std::true_type {};
template <typename H, typename... T>
struct is_unique_t<H, T...>
- : std::bool_constant<!is_one_of_f<H, T...>() && is_unique_t<T...>::value> {};
+ : std::bool_constant<!is_one_of_v<H, T...> && is_unique_t<T...>::value> {};
template <typename... Args>
inline constexpr bool is_unique_v = is_unique_t<Args...>::value;
-// Given the type T find its index in Ts
+/**
+ * Given the type T find its index in Ts.
+ */
template <typename T, typename... Ts>
static inline constexpr int find_index() {
- static_assert(detail::is_unique_v<Ts...>, "Types must be unique");
+ static_assert(is_unique_v<Ts...>, "Types must be unique");
constexpr bool matchVector[] = {std::is_same<T, Ts>::value...};
for (int index = 0; index < static_cast<int>(sizeof...(Ts)); ++index) {
@@ -85,35 +83,10 @@ using get_type_by_index = typename get_type_by_index_impl<I, Ts...>::type;
} // namespace detail
-/*=====-----
- *
- * The overload trick to construct visitors from lambdas.
- *
- */
-template <class... Ts>
-struct overload : Ts... {
- using Ts::operator()...;
-};
-template <class... Ts>
-overload(Ts...)->overload<Ts...>;
-
-/*=====-----
- *
- * Forward declarations
- *
- */
-template <typename... Ts>
-class PolyValue;
-
-template <typename T, typename... Ts>
-class ControlBlockVTable;
-
-/*=====-----
- *
+/**
* The base control block that PolyValue holds.
*
- * It does not contain anything else by the runtime tag.
- *
+ * It does not contain anything else except for the runtime tag.
*/
template <typename... Ts>
class ControlBlock {
@@ -128,13 +101,10 @@ public:
}
};
-/*=====-----
- *
+/**
* The concrete control block VTable generator.
*
- * It must be empty ad PolyValue derives from the generators
- * and we want EBO to kick in.
- *
+ * It must be empty as PolyValue derives from the generators and we want EBO to kick in.
*/
template <typename T, typename... Ts>
class ControlBlockVTable {
@@ -144,13 +114,9 @@ protected:
using AbstractType = ControlBlock<Ts...>;
- /*=====-----
- *
- * The concrete control block for every type T of Ts.
- *
- * It derives from the ControlBlock. All methods are private and only
- * the friend class ControlBlockVTable can call them.
- *
+ /**
+ * The concrete control block for every type T of Ts. Derives from a ControlBlock which holds
+ * the runtime type tag for T.
*/
class ConcreteType : public AbstractType {
T _t;
@@ -222,18 +188,21 @@ public:
}
}
- template <typename V, typename N, typename... Args>
- static auto visit(V&& v, N& holder, AbstractType* block, Args&&... args) {
- return v(holder, *cast<T>(block), std::forward<Args>(args)...);
+ template <typename Callback, typename N, typename... Args>
+ static auto visit(Callback&& cb, N& holder, AbstractType* block, Args&&... args) {
+ return cb(holder, *cast<T>(block), std::forward<Args>(args)...);
}
- template <typename V, typename N, typename... Args>
- static auto visitConst(V&& v, const N& holder, const AbstractType* block, Args&&... args) {
- return v(holder, *castConst<T>(block), std::forward<Args>(args)...);
+ template <typename Callback, typename N, typename... Args>
+ static auto visitConst(Callback&& cb,
+ const N& holder,
+ const AbstractType* block,
+ Args&&... args) {
+ return cb(holder, *castConst<T>(block), std::forward<Args>(args)...);
}
};
-/*=====-----
+/**
*
* This is a variation on variant and polymorphic value theme.
*
@@ -257,6 +226,9 @@ private:
static_assert(std::conjunction_v<std::is_empty<ControlBlockVTable<Ts, Ts...>>...>,
"VTable base classes must be empty");
+ // Static array that allows lookup into methods on ControlBlockVTable using the PolyValue tag.
+ static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
+
ControlBlock<Ts...>* _object{nullptr};
PolyValue(ControlBlock<Ts...>* object) noexcept : _object(object) {}
@@ -266,9 +238,7 @@ private:
}
static void check(const ControlBlock<Ts...>* object) {
- if (!object) {
- throw std::logic_error("PolyValue is empty");
- }
+ tassert(6232700, "PolyValue is empty", object != nullptr);
}
static void destroy(ControlBlock<Ts...>* object) noexcept {
@@ -336,35 +306,38 @@ private:
return tag();
}
-
- template <typename V, typename... Args>
- auto visit(V&& v, Args&&... args) {
+ template <typename Callback, typename... Args>
+ auto visit(Callback&& cb, Args&&... args) {
// unfortunately gcc rejects much nicer code, clang and msvc accept
// static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
// visit<V>... };
using FunPtrType = decltype(
- &ControlBlockVTable<get_t<0>, Ts...>::template visit<V, Reference, Args...>);
+ &ControlBlockVTable<get_t<0>, Ts...>::template visit<Callback, Reference, Args...>);
static constexpr FunPtrType visitTbl[] = {
- &ControlBlockVTable<Ts, Ts...>::template visit<V, Reference, Args...>...};
+ &ControlBlockVTable<Ts, Ts...>::template visit<Callback, Reference, Args...>...};
check(_object);
- return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+ return visitTbl[tag()](
+ std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
}
- template <typename V, typename... Args>
- auto visit(V&& v, Args&&... args) const {
+ template <typename Callback, typename... Args>
+ auto visit(Callback&& cb, Args&&... args) const {
// unfortunately gcc rejects much nicer code, clang and msvc accept
// static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
// visitConst<V>... };
using FunPtrType = decltype(
- &ControlBlockVTable<get_t<0>, Ts...>::template visitConst<V, Reference, Args...>);
+ &ControlBlockVTable<get_t<0>,
+ Ts...>::template visitConst<Callback, Reference, Args...>);
static constexpr FunPtrType visitTbl[] = {
- &ControlBlockVTable<Ts, Ts...>::template visitConst<V, Reference, Args...>...};
+ &ControlBlockVTable<Ts,
+ Ts...>::template visitConst<Callback, Reference, Args...>...};
check(_object);
- return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+ return visitTbl[tag()](
+ std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
}
template <typename T>
@@ -420,21 +393,18 @@ public:
key_type tagOf() const {
check(_object);
-
return tag();
}
PolyValue() = delete;
PolyValue(const PolyValue& other) {
- static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
if (other._object) {
_object = cloneTbl[other.tag()](other._object);
}
}
PolyValue(const Reference& other) {
- static constexpr std::array cloneTbl = {&ControlBlockVTable<Ts, Ts...>::clone...};
if (other._object) {
_object = cloneTbl[other.tag()](other._object);
}
@@ -463,34 +433,37 @@ public:
template <int I>
using get_t = detail::get_type_by_index<I, Ts...>;
- template <typename V, typename... Args>
- auto visit(V&& v, Args&&... args) {
+ template <typename Callback, typename... Args>
+ auto visit(Callback&& cb, Args&&... args) {
// unfortunately gcc rejects much nicer code, clang and msvc accept
// static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
// visit<V>... };
- using FunPtrType =
- decltype(&ControlBlockVTable<get_t<0>, Ts...>::template visit<V, PolyValue, Args...>);
+ using FunPtrType = decltype(
+ &ControlBlockVTable<get_t<0>, Ts...>::template visit<Callback, PolyValue, Args...>);
static constexpr FunPtrType visitTbl[] = {
- &ControlBlockVTable<Ts, Ts...>::template visit<V, PolyValue, Args...>...};
+ &ControlBlockVTable<Ts, Ts...>::template visit<Callback, PolyValue, Args...>...};
check(_object);
- return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+ return visitTbl[tag()](
+ std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
}
- template <typename V, typename... Args>
- auto visit(V&& v, Args&&... args) const {
+ template <typename Callback, typename... Args>
+ auto visit(Callback&& cb, Args&&... args) const {
// unfortunately gcc rejects much nicer code, clang and msvc accept
// static constexpr std::array visitTbl = { &ControlBlockVTable<Ts, Ts...>::template
// visitConst<V>... };
- using FunPtrType = decltype(
- &ControlBlockVTable<get_t<0>, Ts...>::template visitConst<V, PolyValue, Args...>);
+ using FunPtrType =
+ decltype(&ControlBlockVTable<get_t<0>,
+ Ts...>::template visitConst<Callback, PolyValue, Args...>);
static constexpr FunPtrType visitTbl[] = {
- &ControlBlockVTable<Ts, Ts...>::template visitConst<V, PolyValue, Args...>...};
+ &ControlBlockVTable<Ts, Ts...>::template visitConst<Callback, PolyValue, Args...>...};
check(_object);
- return visitTbl[tag()](std::forward<V>(v), *this, _object, std::forward<Args>(args)...);
+ return visitTbl[tag()](
+ std::forward<Callback>(cb), *this, _object, std::forward<Args>(args)...);
}
template <typename T>
@@ -517,13 +490,13 @@ public:
}
bool operator==(const PolyValue& rhs) const noexcept {
- static constexpr std::array cmp = {ControlBlockVTable<Ts, Ts...>::compareEq...};
- return cmp[tag()](_object, rhs._object);
+ static constexpr std::array cmpTbl = {ControlBlockVTable<Ts, Ts...>::compareEq...};
+ return cmpTbl[tag()](_object, rhs._object);
}
bool operator==(const Reference& rhs) const noexcept {
- static constexpr std::array cmp = {ControlBlockVTable<Ts, Ts...>::compareEq...};
- return cmp[tag()](_object, rhs._object);
+ static constexpr std::array cmpTbl = {ControlBlockVTable<Ts, Ts...>::compareEq...};
+ return cmpTbl[tag()](_object, rhs._object);
}
auto ref() {
@@ -537,5 +510,4 @@ public:
}
};
-} // namespace algebra
-} // namespace mongo::optimizer
+} // namespace mongo::optimizer::algebra
diff --git a/src/mongo/db/query/optimizer/bool_expression.h b/src/mongo/db/query/optimizer/bool_expression.h
index bf00f907504..b4bdf0a6a11 100644
--- a/src/mongo/db/query/optimizer/bool_expression.h
+++ b/src/mongo/db/query/optimizer/bool_expression.h
@@ -53,8 +53,8 @@ struct BoolExpr {
using NodeVector = std::vector<Node>;
- class Atom final : public algebra::OpSpecificArity<Node, Atom, 0> {
- using Base = algebra::OpSpecificArity<Node, Atom, 0>;
+ class Atom final : public algebra::OpSpecificArity<Node, 0> {
+ using Base = algebra::OpSpecificArity<Node, 0>;
public:
Atom(T expr) : Base(), _expr(std::move(expr)) {}
@@ -74,8 +74,8 @@ struct BoolExpr {
T _expr;
};
- class Conjunction final : public algebra::OpSpecificDynamicArity<Node, Conjunction, 0> {
- using Base = algebra::OpSpecificDynamicArity<Node, Conjunction, 0>;
+ class Conjunction final : public algebra::OpSpecificDynamicArity<Node, 0> {
+ using Base = algebra::OpSpecificDynamicArity<Node, 0>;
public:
Conjunction(NodeVector children) : Base(std::move(children)) {
@@ -87,8 +87,8 @@ struct BoolExpr {
}
};
- class Disjunction final : public algebra::OpSpecificDynamicArity<Node, Disjunction, 0> {
- using Base = algebra::OpSpecificDynamicArity<Node, Disjunction, 0>;
+ class Disjunction final : public algebra::OpSpecificDynamicArity<Node, 0> {
+ using Base = algebra::OpSpecificDynamicArity<Node, 0>;
public:
Disjunction(NodeVector children) : Base(std::move(children)) {
diff --git a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
index fd6bf9e1e40..4ecaf2c0795 100644
--- a/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
+++ b/src/mongo/db/query/optimizer/cascades/logical_rewriter.cpp
@@ -624,16 +624,17 @@ static void convertFilterToSargableNode(ABT::reference_type node,
return;
}
- PartialSchemaReqConversion conversion = convertExprToPartialSchemaReq(filterNode.getFilter());
- if (!conversion._success) {
+ auto conversion =
+ convertExprToPartialSchemaReq(filterNode.getFilter(), true /*isFilterContext*/);
+ if (!conversion) {
return;
}
- if (conversion._hasEmptyInterval) {
+ if (conversion->_hasEmptyInterval) {
addEmptyValueScanNode(ctx);
return;
}
- for (const auto& entry : conversion._reqMap) {
+ for (const auto& entry : conversion->_reqMap) {
uassert(6624111,
"Filter partial schema requirement must contain a variable name.",
!entry.first._projectionName.empty());
@@ -648,29 +649,29 @@ static void convertFilterToSargableNode(ABT::reference_type node,
// If in substitution mode, disallow retaining original predicate. If in exploration mode, only
// allow retaining the original predicate and if we have at least one index available.
if constexpr (isSubstitution) {
- if (conversion._retainPredicate) {
+ if (conversion->_retainPredicate) {
return;
}
- } else if (!conversion._retainPredicate || scanDef.getIndexDefs().empty()) {
+ } else if (!conversion->_retainPredicate || scanDef.getIndexDefs().empty()) {
return;
}
bool hasEmptyInterval = false;
auto candidateIndexMap = computeCandidateIndexMap(ctx.getPrefixId(),
indexingAvailability.getScanProjection(),
- conversion._reqMap,
+ conversion->_reqMap,
scanDef,
hasEmptyInterval);
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
} else {
- ABT sargableNode = make<SargableNode>(std::move(conversion._reqMap),
+ ABT sargableNode = make<SargableNode>(std::move(conversion->_reqMap),
std::move(candidateIndexMap),
IndexReqTarget::Complete,
filterNode.getChild());
- if (conversion._retainPredicate) {
+ if (conversion->_retainPredicate) {
const GroupIdType childGroupId =
filterNode.getChild().cast<MemoLogicalDelegatorNode>()->getGroupId();
if (childGroupId == indexingAvailability.getScanGroupId()) {
@@ -813,22 +814,24 @@ struct SubstituteConvert<EvaluationNode> {
}
// We still want to extract sargable nodes from EvalNode to use for PhysicalScans.
- PartialSchemaReqConversion conversion =
- convertExprToPartialSchemaReq(evalNode.getProjection());
+ auto conversion =
+ convertExprToPartialSchemaReq(evalNode.getProjection(), false /*isFilterContext*/);
+ if (!conversion) {
+ return;
+ }
uassert(6624165,
"Should not be getting retainPredicate set for EvalNodes",
- !conversion._retainPredicate);
-
- if (!conversion._success || conversion._reqMap.size() != 1) {
+ !conversion->_retainPredicate);
+ if (conversion->_reqMap.size() != 1) {
// For evaluation nodes we expect to create a single entry.
return;
}
- if (conversion._hasEmptyInterval) {
+ if (conversion->_hasEmptyInterval) {
addEmptyValueScanNode(ctx);
return;
}
- for (auto& entry : conversion._reqMap) {
+ for (auto& entry : conversion->_reqMap) {
PartialSchemaRequirement& req = entry.second;
req.setBoundProjectionName(evalNode.getProjectionName());
@@ -842,12 +845,12 @@ struct SubstituteConvert<EvaluationNode> {
bool hasEmptyInterval = false;
auto candidateIndexMap = computeCandidateIndexMap(
- ctx.getPrefixId(), scanProjName, conversion._reqMap, scanDef, hasEmptyInterval);
+ ctx.getPrefixId(), scanProjName, conversion->_reqMap, scanDef, hasEmptyInterval);
if (hasEmptyInterval) {
addEmptyValueScanNode(ctx);
} else {
- ABT newNode = make<SargableNode>(std::move(conversion._reqMap),
+ ABT newNode = make<SargableNode>(std::move(conversion->_reqMap),
std::move(candidateIndexMap),
IndexReqTarget::Complete,
evalNode.getChild());
diff --git a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
index 6f6f6c743ed..58cbf9dee2b 100644
--- a/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
+++ b/src/mongo/db/query/optimizer/physical_rewriter_optimizer_test.cpp
@@ -4310,13 +4310,15 @@ TEST(PhysRewriter, PartialIndex1) {
// TODO: Test cases where partial filter bound is a range which subsumes the query
// requirement
// TODO: (e.g. half open interval)
- auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
- make<PathGet>("b",
- make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(2)))),
- make<Variable>("root")));
- ASSERT_TRUE(conversionResult._success);
- ASSERT_FALSE(conversionResult._hasEmptyInterval);
- ASSERT_FALSE(conversionResult._retainPredicate);
+ auto conversionResult = convertExprToPartialSchemaReq(
+ make<EvalFilter>(
+ make<PathGet>(
+ "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(2)))),
+ make<Variable>("root")),
+ true /*isFilterContext*/);
+ ASSERT_TRUE(conversionResult.has_value());
+ ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+ ASSERT_FALSE(conversionResult->_retainPredicate);
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4329,7 +4331,7 @@ TEST(PhysRewriter, PartialIndex1) {
IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
true /*isMultiKey*/,
{DistributionType::Centralized},
- std::move(conversionResult._reqMap)}}}}}}},
+ std::move(conversionResult->_reqMap)}}}}}}},
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
@@ -4387,13 +4389,15 @@ TEST(PhysRewriter, PartialIndex2) {
ABT rootNode =
make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterANode));
- auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
- make<PathGet>("a",
- make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(3)))),
- make<Variable>("root")));
- ASSERT_TRUE(conversionResult._success);
- ASSERT_FALSE(conversionResult._hasEmptyInterval);
- ASSERT_FALSE(conversionResult._retainPredicate);
+ auto conversionResult = convertExprToPartialSchemaReq(
+ make<EvalFilter>(
+ make<PathGet>(
+ "a", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(3)))),
+ make<Variable>("root")),
+ true /*isFilterContext*/);
+ ASSERT_TRUE(conversionResult.has_value());
+ ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+ ASSERT_FALSE(conversionResult->_retainPredicate);
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4406,7 +4410,7 @@ TEST(PhysRewriter, PartialIndex2) {
IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
true /*isMultiKey*/,
{DistributionType::Centralized},
- std::move(conversionResult._reqMap)}}}}}}},
+ std::move(conversionResult->_reqMap)}}}}}}},
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
@@ -4462,13 +4466,15 @@ TEST(PhysRewriter, PartialIndexReject) {
ABT rootNode =
make<RootNode>(ProjectionRequirement{ProjectionNameVector{"root"}}, std::move(filterBNode));
- auto conversionResult = convertExprToPartialSchemaReq(make<EvalFilter>(
- make<PathGet>("b",
- make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(4)))),
- make<Variable>("root")));
- ASSERT_TRUE(conversionResult._success);
- ASSERT_FALSE(conversionResult._hasEmptyInterval);
- ASSERT_FALSE(conversionResult._retainPredicate);
+ auto conversionResult = convertExprToPartialSchemaReq(
+ make<EvalFilter>(
+ make<PathGet>(
+ "b", make<PathTraverse>(make<PathCompare>(Operations::Eq, Constant::int64(4)))),
+ make<Variable>("root")),
+ true /*isFilterContext*/);
+ ASSERT_TRUE(conversionResult.has_value());
+ ASSERT_FALSE(conversionResult->_hasEmptyInterval);
+ ASSERT_FALSE(conversionResult->_retainPredicate);
OptPhaseManager phaseManager(
{OptPhaseManager::OptPhase::MemoSubstitutionPhase,
@@ -4481,7 +4487,7 @@ TEST(PhysRewriter, PartialIndexReject) {
IndexDefinition{{{makeIndexPath("a"), CollationOp::Ascending}},
true /*isMultiKey*/,
{DistributionType::Centralized},
- std::move(conversionResult._reqMap)}}}}}}},
+ std::move(conversionResult->_reqMap)}}}}}}},
{true /*debugMode*/, 2 /*debugLevel*/, DebugInfo::kIterationLimitForTests});
ABT optimized = rootNode;
diff --git a/src/mongo/db/query/optimizer/rewrites/const_eval.cpp b/src/mongo/db/query/optimizer/rewrites/const_eval.cpp
index 89bfe74551f..0278e20700e 100644
--- a/src/mongo/db/query/optimizer/rewrites/const_eval.cpp
+++ b/src/mongo/db/query/optimizer/rewrites/const_eval.cpp
@@ -86,7 +86,7 @@ void ConstEval::removeUnusedEvalNodes() {
// TODO: consider caching.
// TODO: consider deriving IndexingAvailability.
if (!_disableSargableInlining ||
- !convertExprToPartialSchemaReq(k->getProjection())._success) {
+ !convertExprToPartialSchemaReq(k->getProjection(), false /*isFilterContext*/)) {
// Schedule node inlining as there is exactly one reference.
_singleRef.emplace(v.front());
_changed = true;
diff --git a/src/mongo/db/query/optimizer/syntax/syntax.h b/src/mongo/db/query/optimizer/syntax/syntax.h
index 7c46df654d8..0abfb54a0f1 100644
--- a/src/mongo/db/query/optimizer/syntax/syntax.h
+++ b/src/mongo/db/query/optimizer/syntax/syntax.h
@@ -93,10 +93,10 @@ using ABT = algebra::PolyValue<Blackhole,
ExpressionBinder>;
template <typename Derived, size_t Arity>
-using Operator = algebra::OpSpecificArity<ABT, Derived, Arity>;
+using Operator = algebra::OpSpecificArity<ABT, Arity>;
template <typename Derived, size_t Arity>
-using OperatorDynamic = algebra::OpSpecificDynamicArity<ABT, Derived, Arity>;
+using OperatorDynamic = algebra::OpSpecificDynamicArity<ABT, Arity>;
template <typename Derived>
using OperatorDynamicHomogenous = OperatorDynamic<Derived, 0>;
diff --git a/src/mongo/db/query/optimizer/utils/utils.cpp b/src/mongo/db/query/optimizer/utils/utils.cpp
index 322ae174570..da4be863228 100644
--- a/src/mongo/db/query/optimizer/utils/utils.cpp
+++ b/src/mongo/db/query/optimizer/utils/utils.cpp
@@ -62,9 +62,12 @@ std::vector<ABT::reference_type> collectComposed(const ABT& n) {
return {n.ref()};
}
-FieldNameType getSimpleField(const ABT& node) {
- const PathGet* pathGet = node.cast<PathGet>();
- return pathGet != nullptr ? pathGet->name() : "";
+bool isSimplePath(const ABT& node) {
+ if (auto getPtr = node.cast<PathGet>();
+ getPtr != nullptr && getPtr->getPath().is<PathIdentity>()) {
+ return true;
+ }
+ return false;
}
std::string PrefixId::getNextId(const std::string& key) {
@@ -337,18 +340,8 @@ VariableNameSetType collectVariableReferences(const ABT& n) {
return NodeVariableTracker::collect(n);
}
-PartialSchemaReqConversion::PartialSchemaReqConversion()
- : _success(false),
- _bound(),
- _reqMap(),
- _hasIntersected(false),
- _hasTraversed(false),
- _hasEmptyInterval(false),
- _retainPredicate(false) {}
-
PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements reqMap)
- : _success(true),
- _bound(),
+ : _bound(),
_reqMap(std::move(reqMap)),
_hasIntersected(false),
_hasTraversed(false),
@@ -356,8 +349,7 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(PartialSchemaRequirements
_retainPredicate(false) {}
PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound)
- : _success(true),
- _bound(std::move(bound)),
+ : _bound(std::move(bound)),
_reqMap(),
_hasIntersected(false),
_hasTraversed(false),
@@ -369,23 +361,24 @@ PartialSchemaReqConversion::PartialSchemaReqConversion(ABT bound)
*/
class PartialSchemaReqConverter {
public:
- PartialSchemaReqConverter() = default;
+ using ResultType = boost::optional<PartialSchemaReqConversion>;
+
+ PartialSchemaReqConverter(const bool isFilterContext) : _isFilterContext(isFilterContext) {}
- PartialSchemaReqConversion handleEvalPathAndEvalFilter(PartialSchemaReqConversion pathResult,
- PartialSchemaReqConversion inputResult) {
- if (!pathResult._success || !inputResult._success) {
+ ResultType handleEvalPathAndEvalFilter(ResultType pathResult, ResultType inputResult) {
+ if (!pathResult || !inputResult) {
return {};
}
- if (pathResult._bound.has_value() || !inputResult._bound.has_value() ||
- !inputResult._reqMap.empty()) {
+ if (pathResult->_bound.has_value() || !inputResult->_bound.has_value() ||
+ !inputResult->_reqMap.empty()) {
return {};
}
- if (auto boundPtr = inputResult._bound->cast<Variable>(); boundPtr != nullptr) {
+ if (auto boundPtr = inputResult->_bound->cast<Variable>(); boundPtr != nullptr) {
const ProjectionName& boundVarName = boundPtr->name();
PartialSchemaRequirements newMap;
- for (auto& [key, req] : pathResult._reqMap) {
+ for (auto& [key, req] : pathResult->_reqMap) {
if (!key._projectionName.empty()) {
return {};
}
@@ -393,40 +386,40 @@ public:
}
PartialSchemaReqConversion result{std::move(newMap)};
- result._hasEmptyInterval = pathResult._hasEmptyInterval;
- result._retainPredicate = pathResult._retainPredicate;
+ result._hasEmptyInterval = pathResult->_hasEmptyInterval;
+ result._retainPredicate = pathResult->_retainPredicate;
return result;
}
return {};
}
- PartialSchemaReqConversion transport(const ABT& n,
- const EvalPath& evalPath,
- PartialSchemaReqConversion pathResult,
- PartialSchemaReqConversion inputResult) {
+ ResultType transport(const ABT& n,
+ const EvalPath& evalPath,
+ ResultType pathResult,
+ ResultType inputResult) {
return handleEvalPathAndEvalFilter(std::move(pathResult), std::move(inputResult));
}
- PartialSchemaReqConversion transport(const ABT& n,
- const EvalFilter& evalFilter,
- PartialSchemaReqConversion pathResult,
- PartialSchemaReqConversion inputResult) {
+ ResultType transport(const ABT& n,
+ const EvalFilter& evalFilter,
+ ResultType pathResult,
+ ResultType inputResult) {
return handleEvalPathAndEvalFilter(std::move(pathResult), std::move(inputResult));
}
- static PartialSchemaReqConversion handleComposition(const bool isMultiplicative,
- PartialSchemaReqConversion leftResult,
- PartialSchemaReqConversion rightResult) {
- if (!leftResult._success || !rightResult._success) {
+ static ResultType handleComposition(const bool isMultiplicative,
+ ResultType leftResult,
+ ResultType rightResult) {
+ if (!leftResult || !rightResult) {
return {};
}
- if (leftResult._bound.has_value() || rightResult._bound.has_value()) {
+ if (leftResult->_bound.has_value() || rightResult->_bound.has_value()) {
return {};
}
- auto& leftReqMap = leftResult._reqMap;
- auto& rightReqMap = rightResult._reqMap;
+ auto& leftReqMap = leftResult->_reqMap;
+ auto& rightReqMap = rightResult->_reqMap;
if (isMultiplicative) {
{
ProjectionRenames projectionRenames;
@@ -438,7 +431,7 @@ public:
}
}
- if (!leftResult._hasTraversed && !rightResult._hasTraversed) {
+ if (!leftResult->_hasTraversed && !rightResult->_hasTraversed) {
// Intersect intervals only if we have not traversed. E.g. (-inf, 90) ^ (70, +inf)
// becomes (70, 90).
for (auto& [key, req] : leftReqMap) {
@@ -446,7 +439,7 @@ public:
if (newIntervals) {
req.getIntervals() = std::move(newIntervals.get());
} else {
- leftResult._hasEmptyInterval = true;
+ leftResult->_hasEmptyInterval = true;
break;
}
}
@@ -455,7 +448,7 @@ public:
return {};
}
- leftResult._hasIntersected = true;
+ leftResult->_hasIntersected = true;
return leftResult;
}
@@ -534,32 +527,40 @@ public:
rightPath.is<PathIdentity>()) {
// leftPath = Id, rightPath = Traverse Id.
combineIntervalsDNF(false /*intersect*/, leftIntervals, newInterval);
- leftResult._retainPredicate = true;
+ leftResult->_retainPredicate = true;
return leftResult;
} else if (const auto rightTraversePtr = rightPath.cast<PathTraverse>();
rightTraversePtr != nullptr && rightTraversePtr->getPath().is<PathIdentity>() &&
leftPath.is<PathIdentity>()) {
// leftPath = Traverse Id, rightPath = Id.
combineIntervalsDNF(false /*intersect*/, rightIntervals, newInterval);
- rightResult._retainPredicate = true;
+ rightResult->_retainPredicate = true;
return rightResult;
}
return {};
}
- PartialSchemaReqConversion transport(const ABT& n,
- const PathComposeM& pathComposeM,
- PartialSchemaReqConversion leftResult,
- PartialSchemaReqConversion rightResult) {
+ ResultType transport(const ABT& n,
+ const PathComposeM& pathComposeM,
+ ResultType leftResult,
+ ResultType rightResult) {
+ if (!_isFilterContext) {
+ return {};
+ }
+
return handleComposition(
true /*isMultiplicative*/, std::move(leftResult), std::move(rightResult));
}
- PartialSchemaReqConversion transport(const ABT& n,
- const PathComposeA& pathComposeA,
- PartialSchemaReqConversion leftResult,
- PartialSchemaReqConversion rightResult) {
+ ResultType transport(const ABT& n,
+ const PathComposeA& pathComposeA,
+ ResultType leftResult,
+ ResultType rightResult) {
+ if (!_isFilterContext) {
+ return {};
+ }
+
const auto& path1 = pathComposeA.getPath1();
const auto& path2 = pathComposeA.getPath2();
const auto& eqNull = make<PathCompare>(Operations::Eq, Constant::null());
@@ -571,9 +572,9 @@ public:
auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{
{true /*inclusive*/, Constant::null()}, {true /*inclusive*/, Constant::null()}});
- return {PartialSchemaRequirements{
+ return {{PartialSchemaRequirements{
{PartialSchemaKey{},
- PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}};
+ PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}}};
}
return handleComposition(
@@ -581,19 +582,18 @@ public:
}
template <class T>
- static PartialSchemaReqConversion handleGetAndTraverse(const ABT& n,
- PartialSchemaReqConversion inputResult) {
- if (!inputResult._success) {
+ static ResultType handleGetAndTraverse(const ABT& n, ResultType inputResult) {
+ if (!inputResult) {
return {};
}
- if (inputResult._bound.has_value()) {
+ if (inputResult->_bound.has_value()) {
return {};
}
// New map has keys with appended paths.
PartialSchemaRequirements newMap;
- for (auto& entry : inputResult._reqMap) {
+ for (auto& entry : inputResult->_reqMap) {
if (!entry.first._projectionName.empty()) {
return {};
}
@@ -608,41 +608,39 @@ public:
newMap.emplace(PartialSchemaKey{"", std::move(path)}, std::move(entry.second));
}
- inputResult._reqMap = std::move(newMap);
+ inputResult->_reqMap = std::move(newMap);
return inputResult;
}
- PartialSchemaReqConversion transport(const ABT& n,
- const PathGet& pathGet,
- PartialSchemaReqConversion inputResult) {
+ ResultType transport(const ABT& n, const PathGet& pathGet, ResultType inputResult) {
return handleGetAndTraverse<PathGet>(n, std::move(inputResult));
}
- PartialSchemaReqConversion transport(const ABT& n,
- const PathTraverse& pathTraverse,
- PartialSchemaReqConversion inputResult) {
- if (inputResult._reqMap.size() > 1) {
+ ResultType transport(const ABT& n, const PathTraverse& pathTraverse, ResultType inputResult) {
+ if (!inputResult) {
+ return {};
+ }
+ if (inputResult->_reqMap.size() > 1) {
// Cannot append traverse if we have more than one requirement.
return {};
}
- PartialSchemaReqConversion result =
- handleGetAndTraverse<PathTraverse>(n, std::move(inputResult));
- result._hasTraversed = true;
+ auto result = handleGetAndTraverse<PathTraverse>(n, std::move(inputResult));
+ if (result) {
+ result->_hasTraversed = true;
+ }
return result;
}
- PartialSchemaReqConversion transport(const ABT& n,
- const PathCompare& pathCompare,
- PartialSchemaReqConversion inputResult) {
- if (!inputResult._success) {
+ ResultType transport(const ABT& n, const PathCompare& pathCompare, ResultType inputResult) {
+ if (!inputResult) {
return {};
}
- if (!inputResult._bound.has_value() || !inputResult._reqMap.empty()) {
+ if (!inputResult->_bound.has_value() || !inputResult->_reqMap.empty()) {
return {};
}
- const auto& bound = inputResult._bound;
+ const auto& bound = inputResult->_bound;
bool lowBoundInclusive = false;
boost::optional<ABT> lowBound;
bool highBoundInclusive = false;
@@ -678,51 +676,53 @@ public:
auto intervalExpr = IntervalReqExpr::makeSingularDNF(IntervalRequirement{
{lowBoundInclusive, std::move(lowBound)}, {highBoundInclusive, std::move(highBound)}});
- return {PartialSchemaRequirements{
+ return {{PartialSchemaRequirements{
{PartialSchemaKey{},
- PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}};
+ PartialSchemaRequirement{"" /*boundProjectionName*/, std::move(intervalExpr)}}}}};
}
- PartialSchemaReqConversion transport(const ABT& n, const PathIdentity& pathIdentity) {
- return {PartialSchemaRequirements{{{}, {}}}};
+ ResultType transport(const ABT& n, const PathIdentity& pathIdentity) {
+ return {{PartialSchemaRequirements{{{}, {}}}}};
}
- PartialSchemaReqConversion transport(const ABT& n, const Constant& c) {
+ ResultType transport(const ABT& n, const Constant& c) {
if (c.isNull()) {
// Cannot create bounds with just NULL.
return {};
}
- return {n};
+ return {{n}};
}
template <typename T, typename... Ts>
- PartialSchemaReqConversion transport(const ABT& n, const T& node, Ts&&...) {
+ ResultType transport(const ABT& n, const T& node, Ts&&...) {
if constexpr (std::is_base_of_v<ExpressionSyntaxSort, T>) {
// We allow expressions to participate in bounds.
- return {n};
+ return {{n}};
}
// General case. Reject conversion.
return {};
}
- PartialSchemaReqConversion convert(const ABT& input) {
+ ResultType convert(const ABT& input) {
return algebra::transport<true>(input, *this);
}
+
+private:
+ const bool _isFilterContext;
};
-PartialSchemaReqConversion convertExprToPartialSchemaReq(const ABT& expr) {
- PartialSchemaReqConverter converter;
- PartialSchemaReqConversion result = converter.convert(expr);
- if (result._reqMap.empty()) {
- result._success = false;
- return result;
+boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(
+ const ABT& expr, const bool isFilterContext) {
+ PartialSchemaReqConverter converter(isFilterContext);
+ auto result = converter.convert(expr);
+ if (!result || result->_reqMap.empty()) {
+ return {};
}
- for (const auto& entry : result._reqMap) {
+ for (const auto& entry : result->_reqMap) {
if (entry.first.emptyPath() && isIntervalReqFullyOpenDNF(entry.second.getIntervals())) {
// We need to determine either path or interval (or both).
- result._success = false;
- return result;
+ return {};
}
}
return result;
diff --git a/src/mongo/db/query/optimizer/utils/utils.h b/src/mongo/db/query/optimizer/utils/utils.h
index 42845f0ce95..d3164d10db6 100644
--- a/src/mongo/db/query/optimizer/utils/utils.h
+++ b/src/mongo/db/query/optimizer/utils/utils.h
@@ -65,10 +65,9 @@ size_t roundUpToNextPow2(size_t v, size_t maxPower);
std::vector<ABT::reference_type> collectComposed(const ABT& n);
/**
- * Returns the path represented by 'node' as a simple dotted string. Returns an empty string if
- * 'node' is not a path.
+ * Returns true if the path represented by 'node' is of the form PathGet "field" PathId
*/
-FieldNameType getSimpleField(const ABT& node);
+bool isSimplePath(const ABT& node);
template <class Element = PathComposeM>
inline void maybeComposePath(ABT& composition, ABT child) {
@@ -155,13 +154,9 @@ private:
};
struct PartialSchemaReqConversion {
- PartialSchemaReqConversion();
PartialSchemaReqConversion(PartialSchemaRequirements reqMap);
PartialSchemaReqConversion(ABT bound);
- // Is our current bottom-up conversion successful. If not shortcut to top.
- bool _success;
-
// If set, contains a Constant or Variable bound of an (yet unknown) interval.
boost::optional<ABT> _bound;
@@ -186,9 +181,11 @@ struct PartialSchemaReqConversion {
/**
* Takes an expression that comes from an Filter or Evaluation node, and attempt to convert
* to a PartialSchemaReqConversion. This is done independent of the availability of indexes.
- * Essentially this means to extract intervals over paths whenever possible.
+ * Essentially this means to extract intervals over paths whenever possible. If the conversion is
+ * not possible, return empty result.
*/
-PartialSchemaReqConversion convertExprToPartialSchemaReq(const ABT& expr);
+boost::optional<PartialSchemaReqConversion> convertExprToPartialSchemaReq(const ABT& expr,
+ bool isFilterContext);
bool intersectPartialSchemaReq(PartialSchemaRequirements& target,
const PartialSchemaRequirements& source,
diff --git a/src/mongo/db/query/plan_cache_key_factory.cpp b/src/mongo/db/query/plan_cache_key_factory.cpp
index 6b154b29105..b330fa5ccd6 100644
--- a/src/mongo/db/query/plan_cache_key_factory.cpp
+++ b/src/mongo/db/query/plan_cache_key_factory.cpp
@@ -89,12 +89,6 @@ PlanCacheKeyInfo makePlanCacheKeyInfo(const CanonicalQuery& query,
return PlanCacheKeyInfo(shapeString, indexabilityKeyBuilder.str());
}
-PlanCacheKey make(const CanonicalQuery& query,
- const CollectionPtr& collection,
- PlanCacheKeyTag<PlanCacheKey>) {
- return {makePlanCacheKeyInfo(query, collection)};
-}
-
namespace {
/**
* Returns the highest index commit timestamp associated with an index on 'collection' that is
@@ -129,24 +123,62 @@ boost::optional<Timestamp> computeNewestVisibleIndexTimestamp(OperationContext*
return currentNewestVisible.isNull() ? boost::optional<Timestamp>{} : currentNewestVisible;
}
+
+sbe::PlanCacheKeyCollectionState computeCollectionState(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ bool isSecondaryColl) {
+ boost::optional<sbe::PlanCacheKeyShardingEpoch> keyShardingEpoch;
+ // We don't version secondary collections in the current shard versioning protocol. Also, since
+ // currently we only push down $lookup to SBE when secondary collections (and main collection)
+ // are unsharded, it's OK to not encode the sharding information here.
+ if (!isSecondaryColl) {
+ const auto shardVersion{
+ OperationShardingState::get(opCtx).getShardVersion(collection->ns())};
+ if (shardVersion) {
+ keyShardingEpoch =
+ sbe::PlanCacheKeyShardingEpoch{shardVersion->epoch(), shardVersion->getTimestamp()};
+ }
+ }
+ return {collection->uuid(),
+ CollectionQueryInfo::get(collection).getPlanCacheInvalidatorVersion(),
+ plan_cache_detail::computeNewestVisibleIndexTimestamp(opCtx, collection),
+ keyShardingEpoch};
+}
} // namespace
+PlanCacheKey make(const CanonicalQuery& query,
+ const CollectionPtr& collection,
+ PlanCacheKeyTag<PlanCacheKey> tag) {
+ return {plan_cache_detail::makePlanCacheKeyInfo(query, collection)};
+}
+
sbe::PlanCacheKey make(const CanonicalQuery& query,
const CollectionPtr& collection,
- PlanCacheKeyTag<sbe::PlanCacheKey>) {
- OperationContext* opCtx = query.getOpCtx();
- auto collectionVersion = CollectionQueryInfo::get(collection).getPlanCacheInvalidatorVersion();
- const auto shardVersion{OperationShardingState::get(opCtx).getShardVersion(collection->ns())};
- const auto keyShardingEpoch = shardVersion
- ? boost::make_optional(
- sbe::PlanCacheKeyShardingEpoch{shardVersion->epoch(), shardVersion->getTimestamp()})
- : boost::none;
-
- return {makePlanCacheKeyInfo(query, collection),
- collection->uuid(),
- collectionVersion,
- computeNewestVisibleIndexTimestamp(opCtx, collection),
- keyShardingEpoch};
+ PlanCacheKeyTag<sbe::PlanCacheKey> tag) {
+ return plan_cache_key_factory::make(query, MultipleCollectionAccessor(collection));
}
} // namespace plan_cache_detail
+
+namespace plan_cache_key_factory {
+sbe::PlanCacheKey make(const CanonicalQuery& query, const MultipleCollectionAccessor& collections) {
+ OperationContext* opCtx = query.getOpCtx();
+ auto mainCollectionState = plan_cache_detail::computeCollectionState(
+ opCtx, collections.getMainCollection(), false /* isSecondaryColl */);
+ std::vector<sbe::PlanCacheKeyCollectionState> secondaryCollectionStates;
+ secondaryCollectionStates.reserve(collections.getSecondaryCollections().size());
+ // We always use the collection order saved in MultipleCollectionAccessor to populate the plan
+ // cache key, which is ordered by the secondary collection namespaces.
+ for (auto& [_, collection] : collections.getSecondaryCollections()) {
+ if (collection) {
+ secondaryCollectionStates.emplace_back(plan_cache_detail::computeCollectionState(
+ opCtx, collection, true /* isSecondaryColl */));
+ }
+ }
+
+ return {plan_cache_detail::makePlanCacheKeyInfo(query, collections.getMainCollection()),
+ std::move(mainCollectionState),
+ std::move(secondaryCollectionStates)};
+}
+} // namespace plan_cache_key_factory
+
} // namespace mongo
diff --git a/src/mongo/db/query/plan_cache_key_factory.h b/src/mongo/db/query/plan_cache_key_factory.h
index 8d811793211..663297093c7 100644
--- a/src/mongo/db/query/plan_cache_key_factory.h
+++ b/src/mongo/db/query/plan_cache_key_factory.h
@@ -52,14 +52,14 @@ template <typename KeyType>
struct PlanCacheKeyTag {};
/**
- * Creates a key for the classic plan cache from the canonical query and collection instances.
+ * Creates a key for the classic plan cache from the canonical query and a single collection.
*/
PlanCacheKey make(const CanonicalQuery& query,
const CollectionPtr& collection,
PlanCacheKeyTag<PlanCacheKey> tag);
/**
- * Creates a key for the SBE plan cache from the canonical query and collection instances.
+ * Similar to above, but for the SBE plan cache key.
*/
sbe::PlanCacheKey make(const CanonicalQuery& query,
const CollectionPtr& collection,
@@ -77,5 +77,12 @@ template <typename Key>
Key make(const CanonicalQuery& query, const CollectionPtr& collection) {
return plan_cache_detail::make(query, collection, plan_cache_detail::PlanCacheKeyTag<Key>{});
}
+
+/**
+ * Similar to above, a factory helper to make a SBE plan cache key, but used for agg queries that
+ * might involve multiple collections.
+ */
+sbe::PlanCacheKey make(const CanonicalQuery& query, const MultipleCollectionAccessor& collections);
+
} // namespace plan_cache_key_factory
} // namespace mongo
diff --git a/src/mongo/db/query/plan_executor.cpp b/src/mongo/db/query/plan_executor.cpp
index ee41d15d84c..99b2fd8fefa 100644
--- a/src/mongo/db/query/plan_executor.cpp
+++ b/src/mongo/db/query/plan_executor.cpp
@@ -38,6 +38,10 @@ namespace {
MONGO_FAIL_POINT_DEFINE(planExecutorAlwaysFails);
} // namespace
+const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>>
+ planExecutorShardingCriticalSectionFuture =
+ OperationContext::declareDecoration<boost::optional<SharedSemiFuture<void>>>();
+
std::string PlanExecutor::stateToStr(ExecState execState) {
switch (execState) {
case PlanExecutor::ADVANCED:
diff --git a/src/mongo/db/query/plan_executor.h b/src/mongo/db/query/plan_executor.h
index bf7799dd3b3..a94e87648dd 100644
--- a/src/mongo/db/query/plan_executor.h
+++ b/src/mongo/db/query/plan_executor.h
@@ -56,6 +56,15 @@ class RecordId;
extern const OperationContext::Decoration<repl::OpTime> clientsLastKnownCommittedOpTime;
/**
+ * If a plan yielded because it encountered a sharding critical section,
+ * 'planExecutorShardingCriticalSectionFuture' will be set to a future that becomes ready when the
+ * critical section ends. This future can be waited on to hold off resuming the plan execution while
+ * the critical section is still active.
+ */
+extern const OperationContext::Decoration<boost::optional<SharedSemiFuture<void>>>
+ planExecutorShardingCriticalSectionFuture;
+
+/**
* A PlanExecutor is the abstraction that knows how to crank a tree of stages into execution.
* The executor is usually part of a larger abstraction that is interacting with the cache
* and/or the query optimizer.
diff --git a/src/mongo/db/query/plan_executor_impl.cpp b/src/mongo/db/query/plan_executor_impl.cpp
index 76559f3d003..808b0800d23 100644
--- a/src/mongo/db/query/plan_executor_impl.cpp
+++ b/src/mongo/db/query/plan_executor_impl.cpp
@@ -60,6 +60,7 @@
#include "mongo/db/query/plan_yield_policy_impl.h"
#include "mongo/db/query/yield_policy_callbacks_impl.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/operation_sharding_state.h"
#include "mongo/db/service_context.h"
#include "mongo/logv2/log.h"
#include "mongo/util/fail_point.h"
@@ -361,8 +362,25 @@ PlanExecutor::ExecState PlanExecutorImpl::_getNextImpl(Snapshotted<Document>* ob
// 2) some stage requested a yield, or
// 3) we need to yield and retry due to a WriteConflictException.
// In all cases, the actual yielding happens here.
+
+ const auto whileYieldingFn = [&]() {
+ // If we yielded because we encountered a sharding critical section, wait for the
+ // critical section to end before continuing. By waiting for the critical section to be
+ // exited we avoid busy spinning immediately and encountering the same critical section
+ // again. It is important that this wait happens after having released the lock
+ // hierarchy -- otherwise deadlocks could happen, or the very least, locks would be
+ // unnecessarily held while waiting.
+ const auto& shardingCriticalSection = planExecutorShardingCriticalSectionFuture(_opCtx);
+ if (shardingCriticalSection) {
+ OperationShardingState::waitForCriticalSectionToComplete(_opCtx,
+ *shardingCriticalSection)
+ .ignore();
+ planExecutorShardingCriticalSectionFuture(_opCtx).reset();
+ }
+ };
+
if (_yieldPolicy->shouldYieldOrInterrupt(_opCtx)) {
- uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx));
+ uassertStatusOK(_yieldPolicy->yieldOrInterrupt(_opCtx, whileYieldingFn));
}
WorkingSetID id = WorkingSet::INVALID_ID;
diff --git a/src/mongo/db/query/plan_executor_sbe.h b/src/mongo/db/query/plan_executor_sbe.h
index f906d48d843..71b894c9f60 100644
--- a/src/mongo/db/query/plan_executor_sbe.h
+++ b/src/mongo/db/query/plan_executor_sbe.h
@@ -128,9 +128,8 @@ public:
BSONObj getPostBatchResumeToken() const override;
/**
- * Even though the leaves of '_root' will acquire AutoGet objects, the caller must acquire a top
- * level AutoGet object outside of this PlanExecutor in order to open a storage transaction and
- * establish a consistent view of the catalog.
+ * The caller must acquire a top level AutoGet object outside of this PlanExecutor in order to
+ * open a storage transaction and establish a consistent view of the catalog.
*/
LockPolicy lockPolicy() const override {
return LockPolicy::kLockExternally;
diff --git a/src/mongo/db/query/plan_yield_policy.cpp b/src/mongo/db/query/plan_yield_policy.cpp
index 545460f083e..dd660ef657e 100644
--- a/src/mongo/db/query/plan_yield_policy.cpp
+++ b/src/mongo/db/query/plan_yield_policy.cpp
@@ -90,7 +90,7 @@ Status PlanYieldPolicy::yieldOrInterrupt(OperationContext* opCtx,
for (int attempt = 1; true; attempt++) {
try {
- // Saving and restoring can modifies '_yieldable', so we make a copy before we start.
+ // Saving and restoring can modify '_yieldable', so we make a copy before we start.
const Yieldable* yieldable = _yieldable;
try {
@@ -122,7 +122,8 @@ Status PlanYieldPolicy::yieldOrInterrupt(OperationContext* opCtx,
invariant(!opCtx->isLockFreeReadsOp());
opCtx->recoveryUnit()->abandonSnapshot();
} else {
- performYield(opCtx, yieldable, whileYieldingFn);
+ invariant(yieldable);
+ performYield(opCtx, *yieldable, whileYieldingFn);
}
restoreState(opCtx, yieldable);
@@ -144,7 +145,7 @@ Status PlanYieldPolicy::yieldOrInterrupt(OperationContext* opCtx,
}
void PlanYieldPolicy::performYield(OperationContext* opCtx,
- const Yieldable* yieldable,
+ const Yieldable& yieldable,
std::function<void()> whileYieldingFn) {
// Things have to happen here in a specific order:
// * Release 'yieldable'.
@@ -162,9 +163,7 @@ void PlanYieldPolicy::performYield(OperationContext* opCtx,
// Since the locks are not recursively held, this is a top level operation and we can safely
// clear the 'yieldable' state before unlocking and then re-establish it after re-locking.
- if (yieldable) {
- yieldable->yield();
- }
+ yieldable.yield();
Locker::LockSnapshot snapshot;
auto unlocked = locker->saveLockStateAndUnlock(&snapshot);
@@ -179,9 +178,7 @@ void PlanYieldPolicy::performYield(OperationContext* opCtx,
if (!unlocked) {
// Nothing was unlocked. Recursively held locks are not the only reason locks cannot be
// released. Restore the 'yieldable' state before returning.
- if (yieldable) {
- yieldable->restore();
- }
+ yieldable.restore();
return;
}
@@ -199,15 +196,10 @@ void PlanYieldPolicy::performYield(OperationContext* opCtx,
locker->restoreLockState(opCtx, snapshot);
- // A yield has occurred, but there still may not be a 'yieldable'. This is true, for example,
- // when executing a getMore for the slot-based execution engine. SBE uses the "locks internally"
- // lock policy, and therefore the getMore code path does not acquire any db_raii object. As a
- // result, there is no db_raii object to restore here when executing a getMore against a cursor
- // using SBE.
- if (yieldable) {
- // Yieldable restore may set a new read source if necessary.
- yieldable->restore();
- }
+ // A yield has occurred, but there still may not be a 'yieldable' if the PlanExecutor
+ // has a 'locks internally' lock policy.
+ // Yieldable restore may set a new read source if necessary.
+ yieldable.restore();
}
} // namespace mongo
diff --git a/src/mongo/db/query/plan_yield_policy.h b/src/mongo/db/query/plan_yield_policy.h
index 9d44ba8e903..4cc060abf3a 100644
--- a/src/mongo/db/query/plan_yield_policy.h
+++ b/src/mongo/db/query/plan_yield_policy.h
@@ -301,7 +301,7 @@ private:
* storage engine snapshot.
*/
void performYield(OperationContext* opCtx,
- const Yieldable* yieldable,
+ const Yieldable& yieldable,
std::function<void()> whileYieldingFn);
const YieldPolicy _policy;
diff --git a/src/mongo/db/query/planner_access.cpp b/src/mongo/db/query/planner_access.cpp
index f283979cc5c..c226061c03b 100644
--- a/src/mongo/db/query/planner_access.cpp
+++ b/src/mongo/db/query/planner_access.cpp
@@ -374,7 +374,9 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
const BSONObj& hint = query.getFindCommandRequest().getHint();
if (!hint.isEmpty()) {
BSONElement natural = hint[query_request_helper::kNaturalSortField];
- if (natural) {
+ // If we have a natural hint and a time series traversal preference, let the traversal
+ // preference decide what order to scan, so that we can avoid a blocking sort.
+ if (natural && !params.traversalPreference) {
// If the hint is {$natural: +-1} this changes the direction of the collection scan.
csn->direction = natural.safeNumberInt() >= 0 ? 1 : -1;
}
@@ -384,8 +386,8 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
// the collection scan to return timestamp-based tokens. Otherwise, we should
// return generic RecordId-based tokens.
if (query.getFindCommandRequest().getRequestResumeToken()) {
- csn->shouldTrackLatestOplogTimestamp = query.nss().isOplog();
- csn->requestResumeToken = !query.nss().isOplog();
+ csn->shouldTrackLatestOplogTimestamp = query.nss().isOplogOrChangeCollection();
+ csn->requestResumeToken = !query.nss().isOplogOrChangeCollection();
}
// Extract and assign the RecordId from the 'resumeAfter' token, if present.
@@ -397,26 +399,31 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
const bool assertMinTsHasNotFallenOffOplog =
params.options & QueryPlannerParams::ASSERT_MIN_TS_HAS_NOT_FALLEN_OFF_OPLOG;
- if (query.nss().isOplog() && csn->direction == 1) {
+ if (query.nss().isOplogOrChangeCollection() && csn->direction == 1) {
+ // Takes Timestamp 'ts' as input, transforms it to the RecordIdBound and assigns it to the
+ // output parameter 'recordId'. The RecordId format for the change collection is a string,
+ // where as the RecordId format for the oplog is a long integer. The timestamp should be
+ // converted to the required format before assigning it to the 'recordId'.
+ auto assignRecordIdFromTimestamp = [&](auto& ts, auto* recordId) {
+ auto keyFormat = query.nss().isChangeCollection() ? KeyFormat::String : KeyFormat::Long;
+ auto status = record_id_helpers::keyForOptime(ts, keyFormat);
+ if (status.isOK()) {
+ *recordId = RecordIdBound(status.getValue());
+ }
+ };
+
// Optimizes the start and end location parameters for a collection scan for an oplog
// collection. Not compatible with $_resumeAfter so we do not optimize in that case.
if (resumeAfterObj.isEmpty()) {
auto [minTs, maxTs] = extractTsRange(query.root());
if (minTs) {
- StatusWith<RecordId> goal = record_id_helpers::keyForOptime(*minTs);
- if (goal.isOK()) {
- csn->minRecord = RecordIdBound(goal.getValue());
- }
-
+ assignRecordIdFromTimestamp(*minTs, &csn->minRecord);
if (assertMinTsHasNotFallenOffOplog) {
- csn->assertTsHasNotFallenOffOplog = *minTs;
+ csn->assertTsHasNotFallenOff = *minTs;
}
}
if (maxTs) {
- StatusWith<RecordId> goal = record_id_helpers::keyForOptime(*maxTs);
- if (goal.isOK()) {
- csn->maxRecord = RecordIdBound(goal.getValue());
- }
+ assignRecordIdFromTimestamp(*maxTs, &csn->maxRecord);
}
}
@@ -433,9 +440,9 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAccess::makeCollectionScan(
// specify a minimum timestamp. This is not a valid request, so we throw InvalidOptions.
if (assertMinTsHasNotFallenOffOplog) {
uassert(ErrorCodes::InvalidOptions,
- str::stream() << "assertTsHasNotFallenOffOplog cannot be applied to a query "
+ str::stream() << "assertTsHasNotFallenOff cannot be applied to a query "
"which does not imply a minimum 'ts' value ",
- csn->assertTsHasNotFallenOffOplog);
+ csn->assertTsHasNotFallenOff);
}
auto queryCollator = query.getCollator();
diff --git a/src/mongo/db/query/planner_access.h b/src/mongo/db/query/planner_access.h
index 3a133aae486..6ea44830415 100644
--- a/src/mongo/db/query/planner_access.h
+++ b/src/mongo/db/query/planner_access.h
@@ -35,6 +35,7 @@
#include "mongo/db/query/index_bounds_builder.h"
#include "mongo/db/query/index_tag.h"
#include "mongo/db/query/interval_evaluation_tree.h"
+#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/query_solution.h"
diff --git a/src/mongo/db/query/planner_analysis.cpp b/src/mongo/db/query/planner_analysis.cpp
index 9f3460a1cc6..921b79dc70b 100644
--- a/src/mongo/db/query/planner_analysis.cpp
+++ b/src/mongo/db/query/planner_analysis.cpp
@@ -873,6 +873,41 @@ bool QueryPlannerAnalysis::explodeForSort(const CanonicalQuery& query,
return true;
}
+// This function is used to check if the given index pattern and direction in the traversal
+// preference can be used to satisfy the given sort pattern (specifically for time series
+// collections).
+bool sortMatchesTraversalPreference(const TraversalPreference& traversalPreference,
+ const BSONObj& indexPattern) {
+ BSONObjIterator sortIter(traversalPreference.sortPattern);
+ BSONObjIterator indexIter(indexPattern);
+ while (sortIter.more() && indexIter.more()) {
+ BSONElement sortPart = sortIter.next();
+ BSONElement indexPart = indexIter.next();
+
+ if (!sortPart.isNumber() || !indexPart.isNumber()) {
+ return false;
+ }
+
+ // If the field doesn't match or the directions don't match, we return false.
+ if (strcmp(sortPart.fieldName(), indexPart.fieldName()) != 0 ||
+ (sortPart.safeNumberInt() > 0) != (indexPart.safeNumberInt() > 0)) {
+ return false;
+ }
+ }
+
+ if (!indexIter.more() && sortIter.more()) {
+ // The sort still has more, so it cannot be a prefix of the index.
+ return false;
+ }
+ return true;
+}
+
+bool isShardedCollScan(QuerySolutionNode* solnRoot) {
+ return solnRoot->getType() == StageType::STAGE_SHARDING_FILTER &&
+ solnRoot->children.size() == 1 &&
+ solnRoot->children[0]->getType() == StageType::STAGE_COLLSCAN;
+}
+
// static
std::unique_ptr<QuerySolutionNode> QueryPlannerAnalysis::analyzeSort(
const CanonicalQuery& query,
@@ -882,6 +917,28 @@ std::unique_ptr<QuerySolutionNode> QueryPlannerAnalysis::analyzeSort(
*blockingSortOut = false;
const FindCommandRequest& findCommand = query.getFindCommandRequest();
+ if (params.traversalPreference) {
+ // If we've been passed a traversal preference, we might want to reverse the order we scan
+ // the data to avoid a blocking sort later in the pipeline.
+ auto providedSorts = solnRoot->providedSorts();
+
+ BSONObj solnSortPattern;
+ if (solnRoot->getType() == StageType::STAGE_COLLSCAN || isShardedCollScan(solnRoot.get())) {
+ BSONObjBuilder builder;
+ builder.append(params.traversalPreference->clusterField, 1);
+ solnSortPattern = builder.obj();
+ } else {
+ solnSortPattern = providedSorts.getBaseSortPattern();
+ }
+
+ if (sortMatchesTraversalPreference(params.traversalPreference.get(), solnSortPattern) &&
+ QueryPlannerCommon::scanDirectionsEqual(solnRoot.get(),
+ -params.traversalPreference->direction)) {
+ QueryPlannerCommon::reverseScans(solnRoot.get(), true);
+ return solnRoot;
+ }
+ }
+
const BSONObj& sortObj = findCommand.getSort();
if (sortObj.isEmpty()) {
diff --git a/src/mongo/db/query/planner_analysis.h b/src/mongo/db/query/planner_analysis.h
index d7473336384..b4b8979c29d 100644
--- a/src/mongo/db/query/planner_analysis.h
+++ b/src/mongo/db/query/planner_analysis.h
@@ -30,6 +30,7 @@
#pragma once
#include "mongo/db/query/canonical_query.h"
+#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_params.h"
#include "mongo/db/query/query_solution.h"
diff --git a/src/mongo/db/query/projection.cpp b/src/mongo/db/query/projection.cpp
index 3c93367ae8c..af5fedfe780 100644
--- a/src/mongo/db/query/projection.cpp
+++ b/src/mongo/db/query/projection.cpp
@@ -104,6 +104,7 @@ public:
void visit(const ProjectionElemMatchASTNode* node) final {
_deps->requiresDocument = true;
_deps->hasExpressions = true;
+ _deps->containsElemMatch = true;
}
void visit(const ExpressionASTNode* node) final {
diff --git a/src/mongo/db/query/projection.h b/src/mongo/db/query/projection.h
index 9987dd641db..914567e87ed 100644
--- a/src/mongo/db/query/projection.h
+++ b/src/mongo/db/query/projection.h
@@ -46,6 +46,7 @@ struct ProjectionDependencies {
// Whether the entire document is required to do the projection.
bool requiresDocument = false;
bool hasExpressions = false;
+ bool containsElemMatch = false;
// Which fields are necessary to perform the projection, or boost::none if all are required.
boost::optional<std::set<std::string>> requiredFields;
@@ -137,6 +138,10 @@ public:
_deps.metadataRequested.none() && !_deps.requiresDocument && !_deps.hasExpressions;
}
+ bool containsElemMatch() const {
+ return _deps.containsElemMatch;
+ }
+
private:
ProjectionPathASTNode _root;
ProjectType _type;
diff --git a/src/mongo/db/query/query_feature_flags.idl b/src/mongo/db/query/query_feature_flags.idl
index eb6ae4782fd..61e50906c87 100644
--- a/src/mongo/db/query/query_feature_flags.idl
+++ b/src/mongo/db/query/query_feature_flags.idl
@@ -48,12 +48,6 @@ feature_flags:
default: true
version: 5.1
- featureFlagSBEGroupPushdown:
- description: "Feature flag for allowing SBE $group pushdown"
- cpp_varname: gFeatureFlagSBEGroupPushdown
- default: true
- version: 5.2
-
featureFlagExactTopNAccumulator:
description: "Feature flag for allowing use of topN family of accumulators"
cpp_varname: gFeatureFlagExactTopNAccumulator
@@ -115,7 +109,7 @@ feature_flags:
featureFlagCommonQueryFramework:
description: "Feature flag for allowing use of Cascades-based query optimizer"
- cpp_varname: gfeatureFlagCommonQueryFramework
+ cpp_varname: gFeatureFlagCommonQueryFramework
default: false
featureFlagLastPointQuery:
@@ -124,12 +118,6 @@ feature_flags:
default: true
version: 6.0
- featureFlagSBELookupPushdown:
- description: "Feature flag for allowing SBE $lookup pushdown"
- cpp_varname: gFeatureFlagSBELookupPushdown
- default: true
- version: 6.0
-
featureFlagSearchShardedFacets:
description: "Enable use of $$SEARCH_META on sharded collections"
cpp_varname: gFeatureFlagSearchShardedFacets
@@ -155,5 +143,9 @@ feature_flags:
featureFlagSbeFull:
description: "Feature flag to enable using SBE for a larger number of queries"
cpp_varname: gFeatureFlagSbeFull
- default: true
- version: 6.0
+ default: false
+
+ featureFlagTimeSeriesChangeStreams:
+ description: "Feature flag for $changeStream support for time series"
+ cpp_varname: gFeatureFlagTimeSeriesChangeStreams
+ default: false
diff --git a/src/mongo/db/query/query_knobs.idl b/src/mongo/db/query/query_knobs.idl
index 18851f0ddb9..53c1e5e7617 100644
--- a/src/mongo/db/query/query_knobs.idl
+++ b/src/mongo/db/query/query_knobs.idl
@@ -728,7 +728,7 @@ server_parameters:
set_at: [ startup, runtime ]
cpp_varname: "internalQueryEnableCascadesOptimizer"
cpp_vartype: AtomicWord<bool>
- default: false
+ default: true
internalCascadesOptimizerDisableScan:
description: "Disable full collection scans in the Cascades optimizer."
@@ -780,6 +780,14 @@ server_parameters:
cpp_vartype: AtomicWord<bool>
default: false
+ internalQueryForceCommonQueryFramework:
+ description: "Set to always use the bonsai optimizer, regardless of the query."
+ set_at: [ startup, runtime ]
+ cpp_varname: "internalQueryForceCommonQueryFramework"
+ cpp_vartype: AtomicWord<bool>
+ test_only: true
+ default: false
+
internalQueryCollectionMaxNoOfDocumentsToChooseHashJoin:
description: "Up to what number of documents do we choose the hash join algorithm when $lookup
is translated to a SBE plan."
@@ -863,6 +871,14 @@ server_parameters:
gt: 0
lt: 16777216
+ internalQueryFLEAlwaysUseHighCardinalityMode:
+ description: "Boolean flag to force FLE to always use low selectivity mode"
+ set_at: [ startup, runtime ]
+ cpp_varname: "internalQueryFLEAlwaysUseHighCardinalityMode"
+ cpp_vartype: AtomicWord<bool>
+ default:
+ expr: false
+
# Note for adding additional query knobs:
#
# When adding a new query knob, you should consider whether or not you need to add an 'on_update'
diff --git a/src/mongo/db/query/query_planner.cpp b/src/mongo/db/query/query_planner.cpp
index de727ab6190..0a4f40fea4a 100644
--- a/src/mongo/db/query/query_planner.cpp
+++ b/src/mongo/db/query/query_planner.cpp
@@ -30,15 +30,15 @@
#include "mongo/platform/basic.h"
-#include "mongo/db/query/query_planner.h"
-
#include <boost/optional.hpp>
#include <vector>
#include "mongo/base/string_data.h"
+#include "mongo/bson/bsonobj.h"
#include "mongo/bson/simple_bsonelement_comparator.h"
#include "mongo/db/bson/dotted_path_support.h"
#include "mongo/db/catalog/clustered_collection_util.h"
+#include "mongo/db/exec/bucket_unpacker.h"
#include "mongo/db/index/wildcard_key_generator.h"
#include "mongo/db/index_names.h"
#include "mongo/db/matcher/expression_algo.h"
@@ -50,14 +50,19 @@
#include "mongo/db/query/classic_plan_cache.h"
#include "mongo/db/query/collation/collation_index_key.h"
#include "mongo/db/query/collation/collator_interface.h"
+#include "mongo/db/query/internal_plans.h"
#include "mongo/db/query/plan_cache.h"
#include "mongo/db/query/plan_enumerator.h"
#include "mongo/db/query/planner_access.h"
#include "mongo/db/query/planner_analysis.h"
#include "mongo/db/query/planner_ixselect.h"
+#include "mongo/db/query/projection_parser.h"
+#include "mongo/db/query/query_knobs_gen.h"
+#include "mongo/db/query/query_planner.h"
#include "mongo/db/query/query_planner_common.h"
#include "mongo/db/query/query_solution.h"
#include "mongo/logv2/log.h"
+#include "mongo/util/assert_util_core.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kQuery
@@ -168,8 +173,8 @@ bool hintMatchesClusterKey(const boost::optional<ClusteredCollectionInfo>& clust
}
/**
- * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter, and
- * those needed for "everything else" which is the project and sort.
+ * Returns the dependencies for the CanoncialQuery, split by those needed to answer the filter,
+ * and those needed for "everything else" which is the project and sort.
*/
std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params,
const CanonicalQuery& query) {
@@ -189,8 +194,8 @@ std::pair<DepsTracker, DepsTracker> computeDeps(const QueryPlannerParams& params
outputDeps.fields.emplace(field.fieldNameStringData());
}
}
- // There's no known way a sort would depend on the whole document, and we already verified that
- // the projection doesn't depend on the whole document.
+ // There's no known way a sort would depend on the whole document, and we already verified
+ // that the projection doesn't depend on the whole document.
tassert(6430503, "Unexpectedly required entire object", !outputDeps.needWholeDocument);
return {std::move(filterDeps), std::move(outputDeps)};
}
@@ -285,8 +290,8 @@ string optionString(size_t options) {
ss << "DEFAULT ";
}
while (options) {
- // The expression (x & (x - 1)) yields x with the lowest bit cleared. Then the exclusive-or
- // of the result with the original yields the lowest bit by itself.
+ // The expression (x & (x - 1)) yields x with the lowest bit cleared. Then the
+ // exclusive-or of the result with the original yields the lowest bit by itself.
size_t new_options = options & (options - 1);
QueryPlannerParams::Options opt = QueryPlannerParams::Options(new_options ^ options);
options = new_options;
@@ -477,12 +482,16 @@ std::unique_ptr<QuerySolution> buildCollscanSoln(const CanonicalQuery& query,
return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot));
}
-std::unique_ptr<QuerySolution> buildWholeIXSoln(const IndexEntry& index,
- const CanonicalQuery& query,
- const QueryPlannerParams& params,
- int direction = 1) {
+std::unique_ptr<QuerySolution> buildWholeIXSoln(
+ const IndexEntry& index,
+ const CanonicalQuery& query,
+ const QueryPlannerParams& params,
+ const boost::optional<int>& direction = boost::none) {
+ tassert(6499400,
+ "Cannot pass both an explicit direction and a traversal preference",
+ !(direction.has_value() && params.traversalPreference));
std::unique_ptr<QuerySolutionNode> solnRoot(
- QueryPlannerAccess::scanWholeIndex(index, query, params, direction));
+ QueryPlannerAccess::scanWholeIndex(index, query, params, direction.value_or(1)));
return QueryPlannerAnalysis::analyzeDataAccess(query, params, std::move(solnRoot));
}
@@ -702,7 +711,8 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache(
return s;
}
- // The MatchExpression tree is in canonical order. We must order the nodes for access planning.
+ // The MatchExpression tree is in canonical order. We must order the nodes for access
+ // planning.
prepareForAccessPlanning(clone.get());
LOGV2_DEBUG(20965, 5, "Tagged tree", "tree"_attr = redact(clone->debugString()));
@@ -733,8 +743,8 @@ StatusWith<std::unique_ptr<QuerySolution>> QueryPlanner::planFromCache(
StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
const CanonicalQuery& query, const QueryPlannerParams& params) {
- // It's a little silly to ask for a count and for owned data. This could indicate a bug earlier
- // on.
+ // It's a little silly to ask for a count and for owned data. This could indicate a bug
+ // earlier on.
tassert(5397500,
"Count and owned data requested",
!((params.options & QueryPlannerParams::IS_COUNT) &&
@@ -780,10 +790,10 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
const BSONObj& hintObj = query.getFindCommandRequest().getHint();
const auto naturalHint = hintObj[query_request_helper::kNaturalSortField];
if (naturalHint || hintMatchesClusterKey(params.clusteredInfo, hintObj)) {
- // The hint can be {$natural: +/-1}. If this happens, output a collscan. We expect any
- // $natural sort to have been normalized to a $natural hint upstream. Additionally, if
- // the hint matches the collection's cluster key, we also output a collscan utilizing
- // the cluster key.
+ // The hint can be {$natural: +/-1}. If this happens, output a collscan. We expect
+ // any $natural sort to have been normalized to a $natural hint upstream.
+ // Additionally, if the hint matches the collection's cluster key, we also output a
+ // collscan utilizing the cluster key.
if (naturalHint) {
// Perform validation specific to $natural.
@@ -804,8 +814,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
const auto clusterKey = params.clusteredInfo->getIndexSpec().getKey();
- // Check if the query collator is compatible with the collection collator for the
- // provided min and max values.
+ // Check if the query collator is compatible with the collection collator for
+ // the provided min and max values.
if ((!minObj.isEmpty() &&
!indexCompatibleMaxMin(minObj,
query.getCollator(),
@@ -846,17 +856,17 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
}
}
- // Hints require us to only consider the hinted index. If index filters in the query settings
- // were used to override the allowed indices for planning, we should not use the hinted index
- // requested in the query.
+ // Hints require us to only consider the hinted index. If index filters in the query
+ // settings were used to override the allowed indices for planning, we should not use the
+ // hinted index requested in the query.
BSONObj hintedIndex;
if (!params.indexFiltersApplied) {
hintedIndex = query.getFindCommandRequest().getHint();
}
- // Either the list of indices passed in by the caller, or the list of indices filtered according
- // to the hint. This list is later expanded in order to allow the planner to handle wildcard
- // indexes.
+ // Either the list of indices passed in by the caller, or the list of indices filtered
+ // according to the hint. This list is later expanded in order to allow the planner to
+ // handle wildcard indexes.
std::vector<IndexEntry> fullIndexList;
// Will hold a copy of the index entry chosen by the hint.
@@ -896,7 +906,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
} else {
relevantIndices = fullIndexList;
- // Relevant indices should only ever exceed a size of 1 when there is a hint in the case of
+ // Relevant indices should only ever exceed a size of 1 when there is a hint in the case
+ // of
// $** index.
if (relevantIndices.size() > 1) {
for (auto&& entry : relevantIndices) {
@@ -931,13 +942,13 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
invariant(*hintedIndexEntry == fullIndexList.front());
// In order to be fully compatible, the min has to be less than the max according to the
- // index key pattern ordering. The first step in verifying this is "finish" the min and max
- // by replacing empty objects and stripping field names.
+ // index key pattern ordering. The first step in verifying this is "finish" the min and
+ // max by replacing empty objects and stripping field names.
BSONObj finishedMinObj = finishMinObj(*hintedIndexEntry, minObj, maxObj);
BSONObj finishedMaxObj = finishMaxObj(*hintedIndexEntry, minObj, maxObj);
- // Now we have the final min and max. This index is only relevant for the min/max query if
- // min < max.
+ // Now we have the final min and max. This index is only relevant for the min/max query
+ // if min < max.
if (finishedMinObj.woCompare(finishedMaxObj, hintedIndexEntry->keyPattern, false) >= 0) {
return Status(ErrorCodes::Error(51175),
"The value provided for min() does not come before the value provided "
@@ -1069,9 +1080,9 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
"About to build solntree from tagged tree",
"tree"_attr = redact(nextTaggedTree->debugString()));
- // Store the plan cache index tree before calling prepareForAccessingPlanning(), so that
- // the PlanCacheIndexTree has the same sort as the MatchExpression used to generate the
- // plan cache key.
+ // Store the plan cache index tree before calling prepareForAccessingPlanning(), so
+ // that the PlanCacheIndexTree has the same sort as the MatchExpression used to
+ // generate the plan cache key.
std::unique_ptr<MatchExpression> clone(nextTaggedTree->shallowClone());
std::unique_ptr<PlanCacheIndexTree> cacheData;
auto statusWithCacheData = cacheDataFromTaggedTree(clone.get(), relevantIndices);
@@ -1084,8 +1095,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
cacheData = std::move(statusWithCacheData.getValue());
}
- // We have already cached the tree in canonical order, so now we can order the nodes for
- // access planning.
+ // We have already cached the tree in canonical order, so now we can order the nodes
+ // for access planning.
prepareForAccessPlanning(nextTaggedTree.get());
// This can fail if enumeration makes a mistake.
@@ -1134,7 +1145,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
// An index was hinted. If there are any solutions, they use the hinted index. If not, we
// scan the entire index to provide results and output that as our plan. This is the
- // desired behavior when an index is hinted that is not relevant to the query. In the case that
+ // desired behavior when an index is hinted that is not relevant to the query. In the case
+ // that
// $** index is hinted, we do not want this behavior.
if (!hintedIndex.isEmpty() && relevantIndices.size() == 1) {
if (out.size() > 0) {
@@ -1145,6 +1157,7 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
ErrorCodes::NoQueryExecutionPlans,
"$hint: refusing to build whole-index solution, because it's a wildcard index");
}
+
// Return hinted index solution if found.
auto soln = buildWholeIXSoln(relevantIndices.front(), query, params);
if (!soln) {
@@ -1177,8 +1190,9 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
if (!usingIndexToSort) {
for (size_t i = 0; i < fullIndexList.size(); ++i) {
const IndexEntry& index = fullIndexList[i];
- // Only a regular index or the non-hashed prefix of a compound hashed index can be
- // used to provide a sort. In addition, the index needs to be a non-sparse index.
+ // Only a regular index or the non-hashed prefix of a compound hashed index can
+ // be used to provide a sort. In addition, the index needs to be a non-sparse
+ // index.
//
// TODO: Sparse indexes can't normally provide a sort, because non-indexed
// documents could potentially be missing from the result set. However, if the
@@ -1198,14 +1212,14 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
continue;
}
- // If the index collation differs from the query collation, the index should not be
- // used to provide a sort, because strings will be ordered incorrectly.
+ // If the index collation differs from the query collation, the index should not
+ // be used to provide a sort, because strings will be ordered incorrectly.
if (!CollatorInterface::collatorsMatch(index.collator, query.getCollator())) {
continue;
}
- // Partial indexes can only be used to provide a sort only if the query predicate is
- // compatible.
+ // Partial indexes can only be used to provide a sort only if the query
+ // predicate is compatible.
if (index.filterExpr && !expression::isSubsetOf(query.root(), index.filterExpr)) {
continue;
}
@@ -1264,10 +1278,10 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
if (direction != 0) {
auto soln = buildCollscanSoln(query, isTailable, params, direction);
if (soln) {
- LOGV2_DEBUG(
- 6082401,
- 5,
- "Planner: outputting soln that uses clustered index to provide sort");
+ LOGV2_DEBUG(6082401,
+ 5,
+ "Planner: outputting soln that uses clustered index to "
+ "provide sort");
SolutionCacheData* scd = new SolutionCacheData();
scd->solnType = SolutionCacheData::COLLSCAN_SOLN;
scd->wholeIXSolnDir = direction;
@@ -1280,8 +1294,8 @@ StatusWith<std::vector<std::unique_ptr<QuerySolution>>> QueryPlanner::plan(
}
}
- // If a projection exists, there may be an index that allows for a covered plan, even if none
- // were considered earlier.
+ // If a projection exists, there may be an index that allows for a covered plan, even if
+ // none were considered earlier.
const auto projection = query.getProj();
if (params.options & QueryPlannerParams::GENERATE_COVERED_IXSCANS && out.size() == 0 &&
query.getQueryObj().isEmpty() && projection && !projection->requiresDocument()) {
diff --git a/src/mongo/db/query/query_planner_common.cpp b/src/mongo/db/query/query_planner_common.cpp
index 013c2b88378..b6c3253fd28 100644
--- a/src/mongo/db/query/query_planner_common.cpp
+++ b/src/mongo/db/query/query_planner_common.cpp
@@ -33,7 +33,10 @@
#include "mongo/base/exact_cast.h"
#include "mongo/db/query/projection_ast_path_tracking_visitor.h"
#include "mongo/db/query/query_planner_common.h"
+#include "mongo/db/query/query_solution.h"
+#include "mongo/db/query/stage_types.h"
#include "mongo/db/query/tree_walker.h"
+#include "mongo/logv2/log.h"
#include "mongo/logv2/redaction.h"
#include "mongo/util/assert_util.h"
@@ -42,7 +45,38 @@
namespace mongo {
-void QueryPlannerCommon::reverseScans(QuerySolutionNode* node) {
+bool QueryPlannerCommon::scanDirectionsEqual(QuerySolutionNode* node, int direction) {
+ StageType type = node->getType();
+
+ boost::optional<int> scanDir;
+ if (STAGE_IXSCAN == type) {
+ IndexScanNode* isn = static_cast<IndexScanNode*>(node);
+ scanDir = isn->direction;
+ } else if (STAGE_DISTINCT_SCAN == type) {
+ DistinctNode* dn = static_cast<DistinctNode*>(node);
+ scanDir = dn->direction;
+ } else if (STAGE_COLLSCAN == type) {
+ CollectionScanNode* collScan = static_cast<CollectionScanNode*>(node);
+ scanDir = collScan->direction;
+ } else {
+ // We shouldn't encounter a sort stage.
+ invariant(!isSortStageType(type));
+ }
+
+ // If we found something with a direction, and the direction doesn't match, we return false.
+ if (scanDir && scanDir != direction) {
+ return false;
+ }
+
+ for (size_t i = 0; i < node->children.size(); ++i) {
+ if (!scanDirectionsEqual(node->children[i].get(), direction)) {
+ return false;
+ }
+ }
+ return true;
+}
+
+void QueryPlannerCommon::reverseScans(QuerySolutionNode* node, bool reverseCollScans) {
StageType type = node->getType();
if (STAGE_IXSCAN == type) {
@@ -72,6 +106,9 @@ void QueryPlannerCommon::reverseScans(QuerySolutionNode* node) {
// reverse direction of comparison for merge
MergeSortNode* msn = static_cast<MergeSortNode*>(node);
msn->sort = reverseSortObj(msn->sort);
+ } else if (reverseCollScans && STAGE_COLLSCAN == type) {
+ CollectionScanNode* collScan = static_cast<CollectionScanNode*>(node);
+ collScan->direction *= -1;
} else {
// Reversing scans is done in order to determine whether or not we need to add an explicit
// SORT stage. There shouldn't already be one present in the plan.
@@ -79,7 +116,7 @@ void QueryPlannerCommon::reverseScans(QuerySolutionNode* node) {
}
for (size_t i = 0; i < node->children.size(); ++i) {
- reverseScans(node->children[i].get());
+ reverseScans(node->children[i].get(), reverseCollScans);
}
}
diff --git a/src/mongo/db/query/query_planner_common.h b/src/mongo/db/query/query_planner_common.h
index 3c3bb88936c..6d441155b54 100644
--- a/src/mongo/db/query/query_planner_common.h
+++ b/src/mongo/db/query/query_planner_common.h
@@ -79,10 +79,17 @@ public:
}
/**
+ * Traverses the tree rooted at 'node'. Tests scan directions recursively to see if they are
+ * equal to the given direction argument. Returns true if they are and false otherwise.
+ */
+ static bool scanDirectionsEqual(QuerySolutionNode* node, int direction);
+
+ /**
* Traverses the tree rooted at 'node'. For every STAGE_IXSCAN encountered, reverse
- * the scan direction and index bounds.
+ * the scan direction and index bounds, unless reverseCollScans equals true, in which case
+ * STAGE_COLLSCAN is reversed as well.
*/
- static void reverseScans(QuerySolutionNode* node);
+ static void reverseScans(QuerySolutionNode* node, bool reverseCollScans = false);
/**
* Extracts all field names for the sortKey meta-projection and stores them in the returned
diff --git a/src/mongo/db/query/query_planner_params.h b/src/mongo/db/query/query_planner_params.h
index af643a632c8..3a7dc82c79f 100644
--- a/src/mongo/db/query/query_planner_params.h
+++ b/src/mongo/db/query/query_planner_params.h
@@ -58,9 +58,24 @@ struct SecondaryCollectionInfo {
long long storageSizeBytes{0};
};
+
+// This holds information about the internal traversal preference used for time series. If we choose
+// an index that involves fields we're interested in, we prefer a specific direction to avoid a
+// blocking sort.
+struct TraversalPreference {
+ // If we end up with an index that provides {sortPattern}, we prefer to scan it in direction
+ // {direction}.
+ BSONObj sortPattern;
+ int direction;
+ // Cluster key for the collection this query accesses (for time-series it's control.min.time).
+ // If a collection scan is chosen, this will be compared against the sortPattern to see if we
+ // can satisfy the traversal preference.
+ std::string clusterField;
+};
+
struct QueryPlannerParams {
- QueryPlannerParams()
- : options(DEFAULT),
+ QueryPlannerParams(size_t options = DEFAULT)
+ : options(options),
indexFiltersApplied(false),
maxIndexedSolutions(internalQueryPlannerMaxIndexedSolutions.load()),
clusteredCollectionCollator(nullptr) {}
@@ -178,6 +193,8 @@ struct QueryPlannerParams {
// List of information about any secondary collections that can be executed against.
std::map<NamespaceString, SecondaryCollectionInfo> secondaryCollectionsInfo;
+
+ boost::optional<TraversalPreference> traversalPreference = boost::none;
};
} // namespace mongo
diff --git a/src/mongo/db/query/query_request_helper.cpp b/src/mongo/db/query/query_request_helper.cpp
index 53e8092de75..370a9a1137e 100644
--- a/src/mongo/db/query/query_request_helper.cpp
+++ b/src/mongo/db/query/query_request_helper.cpp
@@ -36,7 +36,6 @@
#include "mongo/base/status.h"
#include "mongo/base/status_with.h"
#include "mongo/bson/simple_bsonobj_comparator.h"
-#include "mongo/client/query.h"
#include "mongo/db/commands/test_commands_enabled.h"
#include "mongo/db/dbmessage.h"
diff --git a/src/mongo/db/query/query_request_helper.h b/src/mongo/db/query/query_request_helper.h
index 4d3ec6143c8..4edad47e067 100644
--- a/src/mongo/db/query/query_request_helper.h
+++ b/src/mongo/db/query/query_request_helper.h
@@ -40,15 +40,13 @@
namespace mongo {
-class QueryMessage;
class Status;
-class Query;
template <typename T>
class StatusWith;
/**
- * Parses the QueryMessage or find command received from the user and makes the various fields
- * more easily accessible.
+ * Parses the find command received from the user and makes the various fields more easily
+ * accessible.
*/
namespace query_request_helper {
diff --git a/src/mongo/db/query/query_solution.cpp b/src/mongo/db/query/query_solution.cpp
index 893fef833e0..b62b54c386c 100644
--- a/src/mongo/db/query/query_solution.cpp
+++ b/src/mongo/db/query/query_solution.cpp
@@ -332,7 +332,7 @@ std::unique_ptr<QuerySolutionNode> CollectionScanNode::clone() const {
copy->tailable = this->tailable;
copy->direction = this->direction;
copy->shouldTrackLatestOplogTimestamp = this->shouldTrackLatestOplogTimestamp;
- copy->assertTsHasNotFallenOffOplog = this->assertTsHasNotFallenOffOplog;
+ copy->assertTsHasNotFallenOff = this->assertTsHasNotFallenOff;
copy->shouldWaitForOplogVisibility = this->shouldWaitForOplogVisibility;
copy->clusteredIndex = this->clusteredIndex;
copy->hasCompatibleCollation = this->hasCompatibleCollation;
diff --git a/src/mongo/db/query/query_solution.h b/src/mongo/db/query/query_solution.h
index 455c5aabcaa..27a4ff33977 100644
--- a/src/mongo/db/query/query_solution.h
+++ b/src/mongo/db/query/query_solution.h
@@ -489,7 +489,7 @@ struct CollectionScanNode : public QuerySolutionNodeWithSortSet {
bool shouldTrackLatestOplogTimestamp = false;
// Assert that the specified timestamp has not fallen off the oplog.
- boost::optional<Timestamp> assertTsHasNotFallenOffOplog = boost::none;
+ boost::optional<Timestamp> assertTsHasNotFallenOff = boost::none;
int direction{1};
diff --git a/src/mongo/db/query/sbe_cached_solution_planner.cpp b/src/mongo/db/query/sbe_cached_solution_planner.cpp
index 5f1b8f008d6..0ecd5ba50f5 100644
--- a/src/mongo/db/query/sbe_cached_solution_planner.cpp
+++ b/src/mongo/db/query/sbe_cached_solution_planner.cpp
@@ -53,10 +53,17 @@ CandidatePlans CachedSolutionPlanner::plan(
// If the cached plan is accepted we'd like to keep the results from the trials even if there
// are parts of agg pipelines being lowered into SBE, so we run the trial with the extended
- // plan. This works because TrialRunTracker, attached to HashAgg stage, tracks as "results" the
- // results of its child stage. Thus, we can use the number of reads the plan was cached with
- // during multiplanning even though multiplanning ran trials of pre-extended plans.
- if (!_cq.pipeline().empty()) {
+ // plan. This works because TrialRunTracker, attached to HashAgg stage in $group queries, tracks
+ // as "results" the results of its child stage. For $lookup queries, the TrialRunTracker will
+ // only track the number of reads from the local side. Thus, we can use the number of reads the
+ // plan was cached with during multiplanning even though multiplanning ran trials of
+ // pre-extended plans.
+ //
+ // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown is integrated with
+ // SBE plan cache.
+ if (!_cq.pipeline().empty() &&
+ !(feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV() &&
+ canonical_query_encoder::canUseSbePlanCache(_cq))) {
_yieldPolicy->clearRegisteredPlans();
auto secondaryCollectionsInfo =
fillOutSecondaryCollectionsInformation(_opCtx, _collections, &_cq);
@@ -184,7 +191,7 @@ CandidatePlans CachedSolutionPlanner::replan(bool shouldCache, std::string reaso
cache->deactivate(plan_cache_key_factory::make<mongo::PlanCacheKey>(_cq, mainColl));
if (feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
auto&& sbePlanCache = sbe::getPlanCache(_opCtx);
- sbePlanCache.deactivate(plan_cache_key_factory::make<sbe::PlanCacheKey>(_cq, mainColl));
+ sbePlanCache.deactivate(plan_cache_key_factory::make(_cq, _collections));
}
}
diff --git a/src/mongo/db/query/sbe_multi_planner.cpp b/src/mongo/db/query/sbe_multi_planner.cpp
index b9966e74683..c4ba4f7efad 100644
--- a/src/mongo/db/query/sbe_multi_planner.cpp
+++ b/src/mongo/db/query/sbe_multi_planner.cpp
@@ -130,13 +130,13 @@ CandidatePlans MultiPlanner::finalizeExecutionPlans(
winner.root->open(false);
}
- // Writes a cache entry for the winning plan to the plan cache if possible.
- plan_cache_util::updatePlanCache(_opCtx,
- _collections.getMainCollection(),
- _cachingMode,
- _cq,
- std::move(decision),
- candidates);
+ // If there is a pushed down pipeline that cannot use SBE plan cache, then write a cache entry
+ // before extending the pipeline.
+ // TODO SERVER-61507: Remove this block once $group pushdown is integrated with SBE plan cache.
+ if (!canonical_query_encoder::canUseSbePlanCache(_cq)) {
+ plan_cache_util::updatePlanCache(
+ _opCtx, _collections, _cachingMode, _cq, std::move(decision), candidates);
+ }
// Extend the winning candidate with the agg pipeline and rebuild the execution tree. Because
// the trial was done with find-only part of the query, we cannot reuse the results. The
@@ -152,10 +152,16 @@ CandidatePlans MultiPlanner::finalizeExecutionPlans(
// The winner might have been replanned. So, pass through the replanning reason to the new
// plan.
data.replanReason = std::move(winner.data.replanReason);
+
+ // We need to clone the plan here for the plan cache to use. The clone will be stored in the
+ // cache prior to preparation, whereas the original copy of the tree will be prepared and
+ // used to execute this query.
+ auto clonedPlan = std::make_pair(rootStage->clone(), stage_builder::PlanStageData(data));
stage_builder::prepareSlotBasedExecutableTree(
_opCtx, rootStage.get(), &data, _cq, _collections, _yieldPolicy);
candidates[winnerIdx] = sbe::plan_ranker::CandidatePlan{
std::move(solution), std::move(rootStage), std::move(data)};
+ candidates[winnerIdx].clonedPlan.emplace(std::move(clonedPlan));
candidates[winnerIdx].root->open(false);
if (_cq.getExplain()) {
@@ -173,6 +179,16 @@ CandidatePlans MultiPlanner::finalizeExecutionPlans(
}
}
+ // If pipeline can use SBE plan cache or there is no pushed down pipeline, then write a cache
+ // entry after extending the pipeline.
+ // TODO SERVER-61507: Remove canUseSbePlanCache check once $group pushdown is
+ // integrated with SBE plan cache.
+ if (canonical_query_encoder::canUseSbePlanCache(_cq)) {
+ // Writes a cache entry for the winning plan to the plan cache if possible.
+ plan_cache_util::updatePlanCache(
+ _opCtx, _collections, _cachingMode, _cq, std::move(decision), candidates);
+ }
+
return {std::move(candidates), winnerIdx};
}
} // namespace mongo::sbe
diff --git a/src/mongo/db/query/sbe_plan_cache.cpp b/src/mongo/db/query/sbe_plan_cache.cpp
index 0d7a90e9ed5..bbd6db6418a 100644
--- a/src/mongo/db/query/sbe_plan_cache.cpp
+++ b/src/mongo/db/query/sbe_plan_cache.cpp
@@ -160,8 +160,17 @@ void clearPlanCacheEntriesWith(ServiceContext* serviceCtx,
sbe::getPlanCache(serviceCtx)
.removeIf([&collectionUuid, collectionVersion](const PlanCacheKey& key,
const sbe::PlanCacheEntry& entry) {
- return key.getCollectionVersion() == collectionVersion &&
- key.getCollectionUuid() == collectionUuid;
+ if (key.getMainCollectionState().version == collectionVersion &&
+ key.getMainCollectionState().uuid == collectionUuid) {
+ return true;
+ }
+ for (auto& collectionState : key.getSecondaryCollectionStates()) {
+ if (collectionState.version == collectionVersion &&
+ collectionState.uuid == collectionUuid) {
+ return true;
+ }
+ }
+ return false;
});
LOGV2_DEBUG(6006600,
diff --git a/src/mongo/db/query/sbe_plan_cache.h b/src/mongo/db/query/sbe_plan_cache.h
index 6e7853fa817..b33488ade0f 100644
--- a/src/mongo/db/query/sbe_plan_cache.h
+++ b/src/mongo/db/query/sbe_plan_cache.h
@@ -56,35 +56,91 @@ struct PlanCacheKeyShardingEpoch {
Timestamp ts;
};
+struct PlanCacheKeyCollectionState {
+ bool operator==(const PlanCacheKeyCollectionState& other) const {
+ return other.uuid == uuid && other.version == version &&
+ other.newestVisibleIndexTimestamp == newestVisibleIndexTimestamp &&
+ other.shardVersion == shardVersion;
+ }
+
+ size_t hashCode() const {
+ size_t hash = UUID::Hash{}(uuid);
+ boost::hash_combine(hash, version);
+ if (newestVisibleIndexTimestamp) {
+ boost::hash_combine(hash, newestVisibleIndexTimestamp->asULL());
+ }
+ if (shardVersion) {
+ shardVersion->epoch.hash_combine(hash);
+ boost::hash_combine(hash, shardVersion->ts.asULL());
+ }
+ return hash;
+ }
+
+ UUID uuid;
+
+ // There is a special collection versioning scheme associated with the SBE plan cache. Whenever
+ // an action against a collection is made which should invalidate the plan cache entries for the
+ // collection -- in particular index builds and drops -- the version number is incremented.
+ // Readers specify the version number that they are reading at so that they only pick up cache
+ // entries with the right set of indexes.
+ //
+ // We also clean up all cache entries for a particular (collectionUuid, versionNumber) pair when
+ // all readers seeing this version of the collection have drained.
+ size_t version;
+
+ // The '_collectionVersion' is not currently sufficient in order to ensure that the indexes
+ // visible to the reader are consistent with the indexes present in the cache entry. The reason
+ // is that all readers see the latest copy-on-write version of the 'Collection' object, even
+ // though they are allowed to read at an older timestamp, potentially at a time before an index
+ // build completed.
+ //
+ // To solve this problem, we incorporate the timestamp of the newest index visible to the reader
+ // into the plan cache key. This ensures that the set of indexes visible to the reader match
+ // those present in the plan cache entry, preventing a situation where the plan cache entry
+ // reflects a newer version of the index catalog than the one visible to the reader.
+ //
+ // In the future, this could instead be solved with point-in-time catalog lookups.
+ boost::optional<Timestamp> newestVisibleIndexTimestamp;
+
+ // Ensures that a cached SBE plan cannot be reused if the collection has since become sharded or
+ // changed its shard key. The cached plan may no longer be valid after sharding or shard key
+ // refining since the structure of the plan depends on whether the collection is sharded, and if
+ // sharded depends on the shard key.
+ const boost::optional<PlanCacheKeyShardingEpoch> shardVersion;
+};
+
/**
* Represents the "key" used in the PlanCache mapping from query shape -> query plan.
*/
class PlanCacheKey {
public:
PlanCacheKey(PlanCacheKeyInfo&& info,
- UUID collectionUuid,
- size_t collectionVersion,
- boost::optional<Timestamp> newestVisibleIndexTimestamp,
- boost::optional<PlanCacheKeyShardingEpoch> shardVersion)
+ PlanCacheKeyCollectionState mainCollectionState,
+ std::vector<PlanCacheKeyCollectionState> secondaryCollectionStates)
: _info{std::move(info)},
- _collectionUuid{collectionUuid},
- _collectionVersion{collectionVersion},
- _newestVisibleIndexTimestamp{newestVisibleIndexTimestamp},
- _shardVersion{shardVersion} {}
+ _mainCollectionState{std::move(mainCollectionState)},
+ _secondaryCollectionStates{std::move(secondaryCollectionStates)} {
+ // For secondary collections, we don't encode shard version in the key since we don't shard
+ // version these collections. This is OK because we only push down $lookup queries to SBE
+ // when involved collections are unsharded.
+ for (const auto& collState : _secondaryCollectionStates) {
+ tassert(6443202,
+ "Secondary collections should not encode shard version in plan cache key",
+ collState.shardVersion == boost::none);
+ }
+ }
- const UUID& getCollectionUuid() const {
- return _collectionUuid;
+ const PlanCacheKeyCollectionState& getMainCollectionState() const {
+ return _mainCollectionState;
}
- size_t getCollectionVersion() const {
- return _collectionVersion;
+ const std::vector<PlanCacheKeyCollectionState>& getSecondaryCollectionStates() const {
+ return _secondaryCollectionStates;
}
bool operator==(const PlanCacheKey& other) const {
- return other._collectionVersion == _collectionVersion &&
- other._collectionUuid == _collectionUuid &&
- other._newestVisibleIndexTimestamp == _newestVisibleIndexTimestamp &&
- other._info == _info && other._shardVersion == _shardVersion;
+ return other._info == _info && other._mainCollectionState == _mainCollectionState &&
+ other._secondaryCollectionStates == _secondaryCollectionStates;
}
bool operator!=(const PlanCacheKey& other) const {
@@ -97,14 +153,9 @@ public:
uint32_t planCacheKeyHash() const {
size_t hash = _info.planCacheKeyHash();
- boost::hash_combine(hash, UUID::Hash{}(_collectionUuid));
- boost::hash_combine(hash, _collectionVersion);
- if (_newestVisibleIndexTimestamp) {
- boost::hash_combine(hash, _newestVisibleIndexTimestamp->asULL());
- }
- if (_shardVersion) {
- _shardVersion->epoch.hash_combine(hash);
- boost::hash_combine(hash, _shardVersion->ts.asULL());
+ boost::hash_combine(hash, _mainCollectionState.hashCode());
+ for (auto& collectionState : _secondaryCollectionStates) {
+ boost::hash_combine(hash, collectionState.hashCode());
}
return hash;
}
@@ -117,37 +168,12 @@ private:
// Contains the actual encoding of the query shape as well as the index discriminators.
const PlanCacheKeyInfo _info;
- const UUID _collectionUuid;
-
- // There is a special collection versioning scheme associated with the SBE plan cache. Whenever
- // an action against a collection is made which should invalidate the plan cache entries for the
- // collection -- in particular index builds and drops -- the version number is incremented.
- // Readers specify the version number that they are reading at so that they only pick up cache
- // entries with the right set of indexes.
- //
- // We also clean up all cache entries for a particular (collectionUuid, versionNumber) pair when
- // all readers seeing this version of the collection have drained.
- const size_t _collectionVersion;
-
- // The '_collectionVersion' is not currently sufficient in order to ensure that the indexes
- // visible to the reader are consistent with the indexes present in the cache entry. The reason
- // is that all readers see the latest copy-on-write version of the 'Collection' object, even
- // though they are allowed to read at an older timestamp, potentially at a time before an index
- // build completed.
- //
- // To solve this problem, we incorporate the timestamp of the newest index visible to the reader
- // into the plan cache key. This ensures that the set of indexes visible to the reader match
- // those present in the plan cache entry, preventing a situation where the plan cache entry
- // reflects a newer version of the index catalog than the one visible to the reader.
- //
- // In the future, this could instead be solved with point-in-time catalog lookups.
- const boost::optional<Timestamp> _newestVisibleIndexTimestamp;
+ const PlanCacheKeyCollectionState _mainCollectionState;
- // Ensures that a cached SBE plan cannot be reused if the collection has since become sharded or
- // changed its shard key. The cached plan may no longer be valid after sharding or shard key
- // refining since the structure of the plan depends on whether the collection is sharded, and if
- // sharded depends on the shard key.
- const boost::optional<PlanCacheKeyShardingEpoch> _shardVersion;
+ // To make sure the plan cache key matches, the secondary collection states need to be passed
+ // in a defined order. Currently, we use the collection order stored in
+ // MultipleCollectionAccessor, which is ordered by the collection namespaces.
+ const std::vector<PlanCacheKeyCollectionState> _secondaryCollectionStates;
};
class PlanCacheKeyHasher {
diff --git a/src/mongo/db/query/sbe_stage_builder.cpp b/src/mongo/db/query/sbe_stage_builder.cpp
index 99384dc11fb..63ccb11ca66 100644
--- a/src/mongo/db/query/sbe_stage_builder.cpp
+++ b/src/mongo/db/query/sbe_stage_builder.cpp
@@ -85,61 +85,6 @@
namespace mongo::stage_builder {
namespace {
/**
- * Tree representing index key pattern or a subset of it.
- *
- * For example, the key pattern {a.b: 1, x: 1, a.c: 1} would look like:
- *
- * <root>
- * / |
- * a x
- * / \
- * b c
- *
- * This tree is used for building SBE subtrees to re-hydrate index keys and for covered projections.
- */
-struct IndexKeyPatternTreeNode {
- IndexKeyPatternTreeNode* emplace(StringData fieldComponent) {
- auto newNode = std::make_unique<IndexKeyPatternTreeNode>();
- const auto newNodeRaw = newNode.get();
- children.emplace(fieldComponent, std::move(newNode));
- childrenOrder.push_back(fieldComponent.toString());
-
- return newNodeRaw;
- }
-
- /**
- * Returns leaf node matching field path. If the field path provided resolves to a non-leaf
- * node, null will be returned.
- *
- * For example, if tree was built for key pattern {a: 1, a.b: 1}, this method will return
- * nullptr for field path "a". On the other hand, this method will return corresponding node for
- * field path "a.b".
- */
- IndexKeyPatternTreeNode* findLeafNode(const FieldRef& fieldRef, size_t currentIndex = 0) {
- if (currentIndex == fieldRef.numParts()) {
- if (children.empty()) {
- return this;
- }
- return nullptr;
- }
-
- auto currentPart = fieldRef.getPart(currentIndex);
- if (auto it = children.find(currentPart); it != children.end()) {
- return it->second->findLeafNode(fieldRef, currentIndex + 1);
- } else {
- return nullptr;
- }
- }
-
- StringMap<std::unique_ptr<IndexKeyPatternTreeNode>> children;
- std::vector<std::string> childrenOrder;
-
- // Which slot the index key for this component is stored in. May be boost::none for non-leaf
- // nodes.
- boost::optional<sbe::value::SlotId> indexKeySlot;
-};
-
-/**
* For covered projections, each of the projection field paths represent respective index key. To
* rehydrate index keys into the result object, we first need to convert projection AST into
* 'IndexKeyPatternTreeNode' structure. Context structure and visitors below are used for this
@@ -246,94 +191,6 @@ public:
};
/**
- * Given a key pattern and an array of slots of equal size, builds an IndexKeyPatternTreeNode
- * representing the mapping between key pattern component and slot.
- *
- * Note that this will "short circuit" in cases where the index key pattern contains two components
- * where one is a subpath of the other. For example with the key pattern {a:1, a.b: 1}, the "a.b"
- * component will not be represented in the output tree. For the purpose of rehydrating index keys,
- * this is fine (and actually preferable).
- */
-std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
- const sbe::value::SlotVector& slots) {
- size_t i = 0;
-
- auto root = std::make_unique<IndexKeyPatternTreeNode>();
- for (auto&& elem : keyPattern) {
- auto* node = root.get();
- bool skipElem = false;
-
- FieldRef fr(elem.fieldNameStringData());
- for (FieldIndex j = 0; j < fr.numParts(); ++j) {
- const auto part = fr.getPart(j);
- if (auto it = node->children.find(part); it != node->children.end()) {
- node = it->second.get();
- if (node->indexKeySlot) {
- // We're processing the a sub-path of a path that's already indexed. We can
- // bail out here since we won't use the sub-path when reconstructing the
- // object.
- skipElem = true;
- break;
- }
- } else {
- node = node->emplace(part);
- }
- }
-
- if (!skipElem) {
- node->indexKeySlot = slots[i];
- }
-
- ++i;
- }
-
- return root;
-}
-
-/**
- * Given a root IndexKeyPatternTreeNode, this function will construct an SBE expression for
- * producing a partial object from an index key.
- *
- * For example, given the index key pattern {a.b: 1, x: 1, a.c: 1} and the index key
- * {"": 1, "": 2, "": 3}, the SBE expression would produce the object {a: {b:1, c: 3}, x: 2}.
- */
-std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree) {
-
- sbe::EExpression::Vector args;
- for (auto&& fieldName : kpTree->childrenOrder) {
- auto it = kpTree->children.find(fieldName);
-
- args.emplace_back(makeConstant(fieldName));
- if (it->second->indexKeySlot) {
- args.emplace_back(makeVariable(*it->second->indexKeySlot));
- } else {
- // The reason this is in an else branch is that in the case where we have an index key
- // like {a.b: ..., a: ...}, we've already made the logic for reconstructing the 'a'
- // portion, so the 'a.b' subtree can be skipped.
- args.push_back(buildNewObjExpr(it->second.get()));
- }
- }
-
- return sbe::makeE<sbe::EFunction>("newObj", std::move(args));
-}
-
-/**
- * Given a stage, and index key pattern a corresponding array of slot IDs, this function
- * add a ProjectStage to the tree which rehydrates the index key and stores the result in
- * 'resultSlot.'
- */
-std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
- const BSONObj& indexKeyPattern,
- PlanNodeId nodeId,
- const sbe::value::SlotVector& indexKeySlots,
- sbe::value::SlotId resultSlot) {
- auto kpTree = buildKeyPatternTree(indexKeyPattern, indexKeySlots);
- auto keyExpr = buildNewObjExpr(kpTree.get());
-
- return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr));
-}
-
-/**
* Generates an EOF plan. Note that even though this plan will return nothing, it will still define
* the slots specified by 'reqs'.
*/
@@ -1635,19 +1492,16 @@ SlotBasedStageBuilder::buildProjectionSimple(const QuerySolutionNode* root,
const auto childResult = outputs.get(kResult);
outputs.set(kResult, _slotIdGenerator.generate());
- inputStage = sbe::makeS<sbe::MakeBsonObjStage>(
- std::move(inputStage),
- outputs.get(kResult),
- childResult,
- sbe::MakeBsonObjStage::FieldBehavior::keep,
- // TODO SERVER-67039 take a set instead of a vector here.
- std::vector<std::string>{pn->proj.getRequiredFields().begin(),
- pn->proj.getRequiredFields().end()},
- std::vector<std::string>{},
- sbe::value::SlotVector{},
- true,
- false,
- root->nodeId());
+ inputStage = sbe::makeS<sbe::MakeBsonObjStage>(std::move(inputStage),
+ outputs.get(kResult),
+ childResult,
+ sbe::MakeBsonObjStage::FieldBehavior::keep,
+ pn->proj.getRequiredFields(),
+ std::set<std::string>{},
+ sbe::value::SlotVector{},
+ true,
+ false,
+ root->nodeId());
return {std::move(inputStage), std::move(outputs)};
}
@@ -2948,7 +2802,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> SlotBasedStageBuilder
boost::none,
boost::none,
std::vector<std::string>{},
- projectFields,
+ std::move(projectFields),
fieldSlots,
true,
false,
diff --git a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
index f3011ec2bac..35c752f7dcb 100644
--- a/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_coll_scan.cpp
@@ -328,7 +328,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo
// replica set initialization message. If this fails, then we throw
// ErrorCodes::OplogQueryMinTsMissing. We avoid doing this check on the resumable branch of a
// tailable scan; it only needs to be done once, when the initial branch is run.
- if (csn->assertTsHasNotFallenOffOplog && !isTailableResumeBranch) {
+ if (csn->assertTsHasNotFallenOff && !isTailableResumeBranch) {
invariant(csn->shouldTrackLatestOplogTimestamp);
// There should always be a 'tsSlot' already allocated on the RuntimeEnvironment for the
@@ -388,7 +388,7 @@ std::pair<std::unique_ptr<sbe::PlanStage>, PlanStageSlots> generateOptimizedOplo
makeBinaryOp(sbe::EPrimBinary::lessEq,
makeVariable(minTsSlot),
makeConstant(sbe::value::TypeTags::Timestamp,
- csn->assertTsHasNotFallenOffOplog->asULL())),
+ csn->assertTsHasNotFallenOff->asULL())),
makeBinaryOp(
sbe::EPrimBinary::logicAnd,
makeBinaryOp(sbe::EPrimBinary::eq,
diff --git a/src/mongo/db/query/sbe_stage_builder_expression.cpp b/src/mongo/db/query/sbe_stage_builder_expression.cpp
index 89541a241bc..cbeac015678 100644
--- a/src/mongo/db/query/sbe_stage_builder_expression.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_expression.cpp
@@ -377,6 +377,7 @@ public:
void visit(const ExpressionLn* expr) final {}
void visit(const ExpressionLog* expr) final {}
void visit(const ExpressionLog10* expr) final {}
+ void visit(const ExpressionInternalFLEEqual* expr) final {}
void visit(const ExpressionMap* expr) final {}
void visit(const ExpressionMeta* expr) final {}
void visit(const ExpressionMod* expr) final {}
@@ -609,6 +610,7 @@ public:
void visit(const ExpressionLn* expr) final {}
void visit(const ExpressionLog* expr) final {}
void visit(const ExpressionLog10* expr) final {}
+ void visit(const ExpressionInternalFLEEqual* expr) final {}
void visit(const ExpressionMap* expr) final {}
void visit(const ExpressionMeta* expr) final {}
void visit(const ExpressionMod* expr) final {}
@@ -2317,6 +2319,9 @@ public:
_context->pushExpr(
sbe::makeE<sbe::ELocalBind>(frameId, std::move(binds), std::move(log10Expr)));
}
+ void visit(const ExpressionInternalFLEEqual* expr) final {
+ unsupportedExpression("$_internalFleEq");
+ }
void visit(const ExpressionMap* expr) final {
unsupportedExpression("$map");
}
diff --git a/src/mongo/db/query/sbe_stage_builder_filter.cpp b/src/mongo/db/query/sbe_stage_builder_filter.cpp
index 80dedd6d89c..5005ddef8a5 100644
--- a/src/mongo/db/query/sbe_stage_builder_filter.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_filter.cpp
@@ -2060,6 +2060,23 @@ std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter(
return {boost::none, std::move(stage)};
}
+ // We only use the classic matcher path (aka "franken matcher") when the plan cache is off,
+ // because embedding the classic matcher into the query execution tree is not compatible with
+ // auto parameterization. All of the constants used in the filter are in the MatchExpression
+ // itself, rather than in slots.
+ if (!feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
+ tassert(6681403, "trackIndex=true not supported for classic matcher in SBE", !trackIndex);
+
+ auto expr = makeFunction("applyClassicMatcher",
+ makeConstant(sbe::value::TypeTags::classicMatchExpresion,
+ sbe::value::bitcastFrom<const MatchExpression*>(
+ root->shallowClone().release())),
+ makeVariable(inputSlot));
+
+ auto filterStage = makeFilter<false>(std::move(stage), std::move(expr), planNodeId);
+ return {boost::none, std::move(filterStage)};
+ }
+
auto stateHelper = makeFilterStateHelper(trackIndex);
MatchExpressionVisitorContext context{
state, std::move(stage), inputSlot, root, planNodeId, *stateHelper};
@@ -2068,7 +2085,6 @@ std::pair<boost::optional<sbe::value::SlotId>, EvalStage> generateFilter(
MatchExpressionPostVisitor postVisitor{&context};
MatchExpressionWalker walker{&preVisitor, &inVisitor, &postVisitor};
tree_walker::walk<true, MatchExpression>(root, &walker);
-
auto [resultSlot, resultStage] = context.done();
return {resultSlot, std::move(resultStage)};
}
@@ -2085,8 +2101,29 @@ EvalStage generateIndexFilter(StageBuilderState& state,
return stage;
}
- // Index filters never need to track the index of a matching element in the array as they cannot
- // be used with a positional projection.
+ // We only use the classic matcher path (aka "franken matcher") when the plan cache is off,
+ // because embedding the classic matcher into the query execution tree is not compatible with
+ // auto parameterization. All of the constants used in the filter are in the MatchExpression
+ // itself, rather than in slots.
+ if (!feature_flags::gFeatureFlagSbePlanCache.isEnabledAndIgnoreFCV()) {
+ BSONObjBuilder keyPatternBuilder;
+ for (auto& field : keyFields) {
+ keyPatternBuilder.append(field, 1);
+ }
+ auto keyPatternTree = buildKeyPatternTree(keyPatternBuilder.obj(), keySlots);
+ auto mkObjExpr = buildNewObjExpr(keyPatternTree.get());
+
+ auto expr = makeFunction("applyClassicMatcher",
+ makeConstant(sbe::value::TypeTags::classicMatchExpresion,
+ sbe::value::bitcastFrom<const MatchExpression*>(
+ root->shallowClone().release())),
+ std::move(mkObjExpr));
+
+ return makeFilter<false>(std::move(stage), std::move(expr), planNodeId);
+ }
+
+ // Covered filters never need to track the index of a matching element in the array as they
+ // cannot be used with a positional projection.
const bool trackIndex = false;
auto stateHelper = makeFilterStateHelper(trackIndex);
MatchExpressionVisitorContext context{state,
diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.cpp b/src/mongo/db/query/sbe_stage_builder_helpers.cpp
index e881a9ab4eb..cb61aa76a3d 100644
--- a/src/mongo/db/query/sbe_stage_builder_helpers.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_helpers.cpp
@@ -1015,4 +1015,93 @@ sbe::value::SlotId StageBuilderState::registerInputParamSlot(
return slotId;
}
+
+/**
+ * Given a key pattern and an array of slots of equal size, builds an IndexKeyPatternTreeNode
+ * representing the mapping between key pattern component and slot.
+ *
+ * Note that this will "short circuit" in cases where the index key pattern contains two components
+ * where one is a subpath of the other. For example with the key pattern {a:1, a.b: 1}, the "a.b"
+ * component will not be represented in the output tree. For the purpose of rehydrating index keys,
+ * this is fine (and actually preferable).
+ */
+std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
+ const sbe::value::SlotVector& slots) {
+ size_t i = 0;
+
+ auto root = std::make_unique<IndexKeyPatternTreeNode>();
+ for (auto&& elem : keyPattern) {
+ auto* node = root.get();
+ bool skipElem = false;
+
+ FieldRef fr(elem.fieldNameStringData());
+ for (FieldIndex j = 0; j < fr.numParts(); ++j) {
+ const auto part = fr.getPart(j);
+ if (auto it = node->children.find(part); it != node->children.end()) {
+ node = it->second.get();
+ if (node->indexKeySlot) {
+ // We're processing the a sub-path of a path that's already indexed. We can
+ // bail out here since we won't use the sub-path when reconstructing the
+ // object.
+ skipElem = true;
+ break;
+ }
+ } else {
+ node = node->emplace(part);
+ }
+ }
+
+ if (!skipElem) {
+ node->indexKeySlot = slots[i];
+ }
+
+ ++i;
+ }
+
+ return root;
+}
+
+/**
+ * Given a root IndexKeyPatternTreeNode, this function will construct an SBE expression for
+ * producing a partial object from an index key.
+ *
+ * For example, given the index key pattern {a.b: 1, x: 1, a.c: 1} and the index key
+ * {"": 1, "": 2, "": 3}, the SBE expression would produce the object {a: {b:1, c: 3}, x: 2}.
+ */
+std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree) {
+
+ sbe::EExpression::Vector args;
+ for (auto&& fieldName : kpTree->childrenOrder) {
+ auto it = kpTree->children.find(fieldName);
+
+ args.emplace_back(makeConstant(fieldName));
+ if (it->second->indexKeySlot) {
+ args.emplace_back(makeVariable(*it->second->indexKeySlot));
+ } else {
+ // The reason this is in an else branch is that in the case where we have an index key
+ // like {a.b: ..., a: ...}, we've already made the logic for reconstructing the 'a'
+ // portion, so the 'a.b' subtree can be skipped.
+ args.push_back(buildNewObjExpr(it->second.get()));
+ }
+ }
+
+ return sbe::makeE<sbe::EFunction>("newObj", std::move(args));
+}
+
+/**
+ * Given a stage, and index key pattern a corresponding array of slot IDs, this function
+ * add a ProjectStage to the tree which rehydrates the index key and stores the result in
+ * 'resultSlot.'
+ */
+std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
+ const BSONObj& indexKeyPattern,
+ PlanNodeId nodeId,
+ const sbe::value::SlotVector& indexKeySlots,
+ sbe::value::SlotId resultSlot) {
+ auto kpTree = buildKeyPatternTree(indexKeyPattern, indexKeySlots);
+ auto keyExpr = buildNewObjExpr(kpTree.get());
+
+ return sbe::makeProjectStage(std::move(stage), nodeId, resultSlot, std::move(keyExpr));
+}
+
} // namespace mongo::stage_builder
diff --git a/src/mongo/db/query/sbe_stage_builder_helpers.h b/src/mongo/db/query/sbe_stage_builder_helpers.h
index ce718023632..05cf73896e0 100644
--- a/src/mongo/db/query/sbe_stage_builder_helpers.h
+++ b/src/mongo/db/query/sbe_stage_builder_helpers.h
@@ -39,6 +39,7 @@
#include "mongo/db/exec/sbe/stages/makeobj.h"
#include "mongo/db/exec/sbe/stages/project.h"
#include "mongo/db/pipeline/expression.h"
+#include "mongo/db/query/projection_ast.h"
#include "mongo/db/query/sbe_stage_builder_eval_frame.h"
#include "mongo/db/query/stage_types.h"
@@ -948,4 +949,69 @@ struct StageBuilderState {
stdx::unordered_map<std::string /*field path*/, EvalExpr> preGeneratedExprs;
};
+/**
+ * Tree representing index key pattern or a subset of it.
+ *
+ * For example, the key pattern {a.b: 1, x: 1, a.c: 1} would look like:
+ *
+ * <root>
+ * / |
+ * a x
+ * / \
+ * b c
+ *
+ * This tree is used for building SBE subtrees to re-hydrate index keys and for covered projections.
+ */
+struct IndexKeyPatternTreeNode {
+ IndexKeyPatternTreeNode* emplace(StringData fieldComponent) {
+ auto newNode = std::make_unique<IndexKeyPatternTreeNode>();
+ const auto newNodeRaw = newNode.get();
+ children.emplace(fieldComponent, std::move(newNode));
+ childrenOrder.push_back(fieldComponent.toString());
+
+ return newNodeRaw;
+ }
+
+ /**
+ * Returns leaf node matching field path. If the field path provided resolves to a non-leaf
+ * node, null will be returned.
+ *
+ * For example, if tree was built for key pattern {a: 1, a.b: 1}, this method will return
+ * nullptr for field path "a". On the other hand, this method will return corresponding node for
+ * field path "a.b".
+ */
+ IndexKeyPatternTreeNode* findLeafNode(const FieldRef& fieldRef, size_t currentIndex = 0) {
+ if (currentIndex == fieldRef.numParts()) {
+ if (children.empty()) {
+ return this;
+ }
+ return nullptr;
+ }
+
+ auto currentPart = fieldRef.getPart(currentIndex);
+ if (auto it = children.find(currentPart); it != children.end()) {
+ return it->second->findLeafNode(fieldRef, currentIndex + 1);
+ } else {
+ return nullptr;
+ }
+ }
+
+ StringMap<std::unique_ptr<IndexKeyPatternTreeNode>> children;
+ std::vector<std::string> childrenOrder;
+
+ // Which slot the index key for this component is stored in. May be boost::none for non-leaf
+ // nodes.
+ boost::optional<sbe::value::SlotId> indexKeySlot;
+};
+
+std::unique_ptr<IndexKeyPatternTreeNode> buildKeyPatternTree(const BSONObj& keyPattern,
+ const sbe::value::SlotVector& slots);
+std::unique_ptr<sbe::EExpression> buildNewObjExpr(const IndexKeyPatternTreeNode* kpTree);
+
+std::unique_ptr<sbe::PlanStage> rehydrateIndexKey(std::unique_ptr<sbe::PlanStage> stage,
+ const BSONObj& indexKeyPattern,
+ PlanNodeId nodeId,
+ const sbe::value::SlotVector& indexKeySlots,
+ sbe::value::SlotId resultSlot);
+
} // namespace mongo::stage_builder
diff --git a/src/mongo/db/query/sbe_stage_builder_projection.cpp b/src/mongo/db/query/sbe_stage_builder_projection.cpp
index e4e222bdff8..bff2daf2f78 100644
--- a/src/mongo/db/query/sbe_stage_builder_projection.cpp
+++ b/src/mongo/db/query/sbe_stage_builder_projection.cpp
@@ -413,7 +413,7 @@ public:
childLevelResultSlot,
childLevelInputSlot,
sbe::MakeBsonObjStage::FieldBehavior::keep,
- keepFields,
+ std::move(keepFields),
std::move(projectFields),
std::move(projectSlots),
true,
diff --git a/src/mongo/db/query/sbe_sub_planner.cpp b/src/mongo/db/query/sbe_sub_planner.cpp
index e5e714ad3aa..c6ce37cb434 100644
--- a/src/mongo/db/query/sbe_sub_planner.cpp
+++ b/src/mongo/db/query/sbe_sub_planner.cpp
@@ -116,8 +116,9 @@ CandidatePlans SubPlanner::plan(
// TODO SERVER-61507: do it unconditionally when $group pushdown is integrated with the SBE plan
// cache.
- if (_cq.pipeline().empty()) {
- plan_cache_util::updatePlanCache(_opCtx, mainColl, _cq, *compositeSolution, *root, data);
+ if (canonical_query_encoder::canUseSbePlanCache(_cq)) {
+ plan_cache_util::updatePlanCache(
+ _opCtx, _collections, _cq, *compositeSolution, *root, data);
}
return {makeVector(plan_ranker::CandidatePlan{
diff --git a/src/mongo/db/query/sbe_utils.cpp b/src/mongo/db/query/sbe_utils.cpp
index 043027f1e89..4284646b510 100644
--- a/src/mongo/db/query/sbe_utils.cpp
+++ b/src/mongo/db/query/sbe_utils.cpp
@@ -238,6 +238,7 @@ bool isQuerySbeCompatible(const CollectionPtr* collection,
const bool allExpressionsSupported = expCtx && expCtx->sbeCompatible;
const bool isNotCount = !(plannerOptions & QueryPlannerParams::IS_COUNT);
const bool isNotOplog = !cq->nss().isOplog();
+ const bool isNotChangeCollection = !cq->nss().isChangeCollection();
const bool doesNotContainMetadataRequirements = cq->metadataDeps().none();
const bool doesNotSortOnMetaOrPathWithNumericComponents =
!sortPattern || std::all_of(sortPattern->begin(), sortPattern->end(), [](auto&& part) {
@@ -253,9 +254,15 @@ bool isQuerySbeCompatible(const CollectionPtr* collection,
const bool isQueryNotAgainstClusteredCollection =
!(collection->get() && collection->get()->isClustered());
+ const bool doesNotRequireMatchDetails =
+ !cq->getProj() || !cq->getProj()->requiresMatchDetails();
+
+ const bool doesNotHaveElemMatchProject = !cq->getProj() || !cq->getProj()->containsElemMatch();
+
return allExpressionsSupported && isNotCount && doesNotContainMetadataRequirements &&
isQueryNotAgainstTimeseriesCollection && isQueryNotAgainstClusteredCollection &&
- doesNotSortOnMetaOrPathWithNumericComponents && isNotOplog;
+ doesNotSortOnMetaOrPathWithNumericComponents && isNotOplog && doesNotRequireMatchDetails &&
+ doesNotHaveElemMatchProject && isNotChangeCollection;
}
bool validateInputParamsBindings(
diff --git a/src/mongo/db/record_id.h b/src/mongo/db/record_id.h
index 544518ad544..2302630b37b 100644
--- a/src/mongo/db/record_id.h
+++ b/src/mongo/db/record_id.h
@@ -311,11 +311,6 @@ public:
int size;
auto str = elem.binData(size);
return RecordId(str, size);
- } else if (elem.type() == BSONType::String) {
- // Support old format for upgrades during resumable index builds.
- // TODO SERVER-62369: Remove when we branch out 6.0.
- auto str = hexblob::decode(elem.String());
- return RecordId(str.c_str(), str.size());
} else {
uasserted(ErrorCodes::BadValue,
fmt::format("Could not deserialize RecordId with type {}", elem.type()));
diff --git a/src/mongo/db/record_id_helpers.cpp b/src/mongo/db/record_id_helpers.cpp
index e9147666da8..bf313976a3b 100644
--- a/src/mongo/db/record_id_helpers.cpp
+++ b/src/mongo/db/record_id_helpers.cpp
@@ -48,23 +48,35 @@
namespace mongo {
namespace record_id_helpers {
-StatusWith<RecordId> keyForOptime(const Timestamp& opTime) {
- // Make sure secs and inc wouldn't be negative if treated as signed. This ensures that they
- // don't sort differently when put in a RecordId. It also avoids issues with Null/Invalid
- // RecordIds
- if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max()))
- return {ErrorCodes::BadValue, "ts secs too high"};
-
- if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max()))
- return {ErrorCodes::BadValue, "ts inc too high"};
-
- const auto out = RecordId(opTime.getSecs(), opTime.getInc());
- if (out <= RecordId::minLong())
- return {ErrorCodes::BadValue, "ts too low"};
- if (out >= RecordId::maxLong())
- return {ErrorCodes::BadValue, "ts too high"};
-
- return out;
+StatusWith<RecordId> keyForOptime(const Timestamp& opTime, const KeyFormat keyFormat) {
+ switch (keyFormat) {
+ case KeyFormat::Long: {
+ // Make sure secs and inc wouldn't be negative if treated as signed. This ensures that
+ // they don't sort differently when put in a RecordId. It also avoids issues with
+ // Null/Invalid RecordIds
+ if (opTime.getSecs() > uint32_t(std::numeric_limits<int32_t>::max()))
+ return {ErrorCodes::BadValue, "ts secs too high"};
+
+ if (opTime.getInc() > uint32_t(std::numeric_limits<int32_t>::max()))
+ return {ErrorCodes::BadValue, "ts inc too high"};
+
+ const auto out = RecordId(opTime.getSecs(), opTime.getInc());
+ if (out <= RecordId::minLong())
+ return {ErrorCodes::BadValue, "ts too low"};
+ if (out >= RecordId::maxLong())
+ return {ErrorCodes::BadValue, "ts too high"};
+
+ return out;
+ }
+ case KeyFormat::String: {
+ KeyString::Builder keyBuilder(KeyString::Version::kLatestVersion);
+ keyBuilder.appendTimestamp(opTime);
+ return RecordId(keyBuilder.getBuffer(), keyBuilder.getSize());
+ }
+ default: { MONGO_UNREACHABLE_TASSERT(6521004); }
+ }
+
+ MONGO_UNREACHABLE_TASSERT(6521005);
}
@@ -84,7 +96,7 @@ StatusWith<RecordId> extractKeyOptime(const char* data, int len) {
if (elem.type() != bsonTimestamp)
return {ErrorCodes::BadValue, "ts must be a Timestamp"};
- return keyForOptime(elem.timestamp());
+ return keyForOptime(elem.timestamp(), KeyFormat::Long);
}
StatusWith<RecordId> keyForDoc(const BSONObj& doc,
diff --git a/src/mongo/db/record_id_helpers.h b/src/mongo/db/record_id_helpers.h
index 378466df45a..b957b30cce6 100644
--- a/src/mongo/db/record_id_helpers.h
+++ b/src/mongo/db/record_id_helpers.h
@@ -46,7 +46,7 @@ namespace record_id_helpers {
* Converts Timestamp to a RecordId in an unspecified manor that is safe to use as the key to
* in a RecordStore.
*/
-StatusWith<RecordId> keyForOptime(const Timestamp& opTime);
+StatusWith<RecordId> keyForOptime(const Timestamp& opTime, KeyFormat keyFormat);
/**
* For clustered collections, converts various values into a RecordId.
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index e9bcecfbdbf..962477568b3 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -261,6 +261,7 @@ env.Library(
'$BUILD_DIR/mongo/db/catalog/catalog_helpers',
'$BUILD_DIR/mongo/db/catalog/database_holder',
'$BUILD_DIR/mongo/db/catalog/multi_index_block',
+ '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
'$BUILD_DIR/mongo/db/common',
'$BUILD_DIR/mongo/db/concurrency/exception_util',
'$BUILD_DIR/mongo/db/dbhelpers',
@@ -529,9 +530,11 @@ env.Library(
'roll_back_local_operations',
],
LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/catalog/database_holder',
'$BUILD_DIR/mongo/db/catalog/import_collection_oplog_entry',
'$BUILD_DIR/mongo/db/index_builds_coordinator_interface',
'$BUILD_DIR/mongo/db/multitenancy',
+ '$BUILD_DIR/mongo/db/repl/tenant_migration_access_blocker',
'$BUILD_DIR/mongo/db/s/sharding_runtime_d',
'$BUILD_DIR/mongo/db/storage/historical_ident_tracker',
'$BUILD_DIR/mongo/idl/server_parameter',
@@ -619,6 +622,7 @@ env.Library(
'storage_interface',
],
LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/db/change_stream_change_collection_manager',
'$BUILD_DIR/mongo/db/commands/mongod_fsync',
'$BUILD_DIR/mongo/db/concurrency/exception_util',
'$BUILD_DIR/mongo/db/storage/storage_control',
@@ -1705,6 +1709,7 @@ if wiredtiger:
'$BUILD_DIR/mongo/db/logical_time',
'$BUILD_DIR/mongo/db/multitenancy',
'$BUILD_DIR/mongo/db/op_observer_impl',
+ '$BUILD_DIR/mongo/db/pipeline/change_stream_expired_pre_image_remover',
'$BUILD_DIR/mongo/db/query/command_request_response',
'$BUILD_DIR/mongo/db/s/sharding_runtime_d',
'$BUILD_DIR/mongo/db/service_context_d_test_fixture',
diff --git a/src/mongo/db/repl/apply_ops.cpp b/src/mongo/db/repl/apply_ops.cpp
index 972c1fb2580..4887982c95c 100644
--- a/src/mongo/db/repl/apply_ops.cpp
+++ b/src/mongo/db/repl/apply_ops.cpp
@@ -28,11 +28,10 @@
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/repl/apply_ops.h"
#include "mongo/bson/util/bson_extract.h"
+#include "mongo/client/client_deprecated.h"
#include "mongo/db/catalog/collection.h"
#include "mongo/db/catalog/database.h"
#include "mongo/db/catalog/database_holder.h"
@@ -297,12 +296,11 @@ Status _checkPrecondition(OperationContext* opCtx,
DBDirectClient db(opCtx);
// The preconditions come in "q: {{query: {...}, orderby: ..., etc.}}" format. This format
// is no longer used either internally or over the wire in other contexts. We are using a
- // legacy API from 'DBDirectClient' in order to parse this format and convert it into the
+ // legacy API from 'client_deprecated' in order to parse this format and convert it into the
// corresponding find command.
- auto preconditionQuery = Query::fromBSONDeprecated(preCondition["q"].Obj());
- auto cursor =
- db.query_DEPRECATED(nss, preconditionQuery.getFilter(), preconditionQuery, 1 /*limit*/);
- BSONObj realres = cursor->more() ? cursor->nextSafe() : BSONObj{};
+ FindCommandRequest findCmd{nss};
+ client_deprecated::initFindFromLegacyOptions(preCondition["q"].Obj(), 0, &findCmd);
+ BSONObj realres = db.findOne(std::move(findCmd));
// Get collection default collation.
auto databaseHolder = DatabaseHolder::get(opCtx);
diff --git a/src/mongo/db/repl/collection_bulk_loader_impl.cpp b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
index 574607ea257..f000e93150c 100644
--- a/src/mongo/db/repl/collection_bulk_loader_impl.cpp
+++ b/src/mongo/db/repl/collection_bulk_loader_impl.cpp
@@ -95,7 +95,8 @@ Status CollectionBulkLoaderImpl::init(const std::vector<BSONObj>& secondaryIndex
UnreplicatedWritesBlock uwb(_opCtx.get());
// This enforces the buildIndexes setting in the replica set configuration.
CollectionWriter collWriter(_opCtx.get(), *_collection);
- auto indexCatalog = collWriter.getWritableCollection()->getIndexCatalog();
+ auto indexCatalog =
+ collWriter.getWritableCollection(_opCtx.get())->getIndexCatalog();
auto specs = indexCatalog->removeExistingIndexesNoChecks(
_opCtx.get(), collWriter.get(), secondaryIndexSpecs);
if (specs.size()) {
diff --git a/src/mongo/db/repl/collection_cloner.cpp b/src/mongo/db/repl/collection_cloner.cpp
index bde00eef906..e380fbe6238 100644
--- a/src/mongo/db/repl/collection_cloner.cpp
+++ b/src/mongo/db/repl/collection_cloner.cpp
@@ -317,38 +317,43 @@ BaseCloner::AfterStageBehavior CollectionCloner::setupIndexBuildersForUnfinished
}
void CollectionCloner::runQuery() {
- // Non-resumable query.
- Query query;
+ FindCommandRequest findCmd{_sourceDbAndUuid};
if (_resumeToken) {
// Resume the query from where we left off.
LOGV2_DEBUG(21133, 1, "Collection cloner will resume the last successful query");
- query.requestResumeToken(true).resumeAfter(_resumeToken.get());
+ findCmd.setRequestResumeToken(true);
+ findCmd.setResumeAfter(_resumeToken.get());
} else {
// New attempt at a resumable query.
LOGV2_DEBUG(21134, 1, "Collection cloner will run a new query");
- query.requestResumeToken(true);
+ findCmd.setRequestResumeToken(true);
}
- query.hint(BSON("$natural" << 1));
+
+ findCmd.setHint(BSON("$natural" << 1));
+ findCmd.setNoCursorTimeout(true);
+ findCmd.setReadConcern(ReadConcernArgs::kLocal);
+ if (_collectionClonerBatchSize) {
+ findCmd.setBatchSize(_collectionClonerBatchSize);
+ }
+
+ ExhaustMode exhaustMode = collectionClonerUsesExhaust ? ExhaustMode::kOn : ExhaustMode::kOff;
// We reset this every time we retry or resume a query.
// We distinguish the first batch from the rest so that we only store the remote cursor id
// the first time we get it.
_firstBatchOfQueryRound = true;
- getClient()->query_DEPRECATED(
- [this](DBClientCursorBatchIterator& iter) { handleNextBatch(iter); },
- _sourceDbAndUuid,
- BSONObj{},
- query,
- nullptr /* fieldsToReturn */,
- QueryOption_NoCursorTimeout | QueryOption_SecondaryOk |
- (collectionClonerUsesExhaust ? QueryOption_Exhaust : 0),
- _collectionClonerBatchSize,
- ReadConcernArgs::kLocal);
+ auto cursor = getClient()->find(
+ std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryPreferred}, exhaustMode);
+
+ // Process the results of the cursor one batch at a time.
+ while (cursor->more()) {
+ handleNextBatch(*cursor);
+ }
}
-void CollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
+void CollectionCloner::handleNextBatch(DBClientCursor& cursor) {
{
stdx::lock_guard<InitialSyncSharedData> lk(*getSharedData());
if (!getSharedData()->getStatus(lk).isOK()) {
@@ -370,15 +375,15 @@ void CollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
if (_firstBatchOfQueryRound) {
// Store the cursorId of the remote cursor.
- _remoteCursorId = iter.getCursorId();
+ _remoteCursorId = cursor.getCursorId();
}
_firstBatchOfQueryRound = false;
{
stdx::lock_guard<Latch> lk(_mutex);
_stats.receivedBatches++;
- while (iter.moreInCurrentBatch()) {
- _documentsToInsert.emplace_back(iter.nextSafe());
+ while (cursor.moreInCurrentBatch()) {
+ _documentsToInsert.emplace_back(cursor.nextSafe());
}
}
@@ -394,7 +399,7 @@ void CollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
}
// Store the resume token for this batch.
- _resumeToken = iter.getPostBatchResumeToken();
+ _resumeToken = cursor.getPostBatchResumeToken();
initialSyncHangCollectionClonerAfterHandlingBatchResponse.executeIf(
[&](const BSONObj&) {
diff --git a/src/mongo/db/repl/collection_cloner.h b/src/mongo/db/repl/collection_cloner.h
index 80d8a9d72bc..085c6abdb3f 100644
--- a/src/mongo/db/repl/collection_cloner.h
+++ b/src/mongo/db/repl/collection_cloner.h
@@ -207,10 +207,10 @@ private:
AfterStageBehavior setupIndexBuildersForUnfinishedIndexesStage();
/**
- * Put all results from a query batch into a buffer to be inserted, and schedule
- * it to be inserted.
+ * Put all results from a query batch into a buffer to be inserted, and schedule it to be
+ * inserted.
*/
- void handleNextBatch(DBClientCursorBatchIterator& iter);
+ void handleNextBatch(DBClientCursor& cursor);
/**
* Called whenever there is a new batch of documents ready from the DBClientConnection.
diff --git a/src/mongo/db/repl/data_replicator_external_state.h b/src/mongo/db/repl/data_replicator_external_state.h
index 87826b0f199..219b5a7ec31 100644
--- a/src/mongo/db/repl/data_replicator_external_state.h
+++ b/src/mongo/db/repl/data_replicator_external_state.h
@@ -90,7 +90,7 @@ public:
* Forwards the parsed metadata in the query results to the replication system.
*/
virtual void processMetadata(const rpc::ReplSetMetadata& replMetadata,
- rpc::OplogQueryMetadata oqMetadata) = 0;
+ const rpc::OplogQueryMetadata& oqMetadata) = 0;
/**
* Evaluates quality of sync source. Accepts the current sync source; the last optime on this
diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.cpp b/src/mongo/db/repl/data_replicator_external_state_impl.cpp
index 8c43a013e9a..330cdf51305 100644
--- a/src/mongo/db/repl/data_replicator_external_state_impl.cpp
+++ b/src/mongo/db/repl/data_replicator_external_state_impl.cpp
@@ -84,7 +84,7 @@ OpTimeWithTerm DataReplicatorExternalStateImpl::getCurrentTermAndLastCommittedOp
}
void DataReplicatorExternalStateImpl::processMetadata(const rpc::ReplSetMetadata& replMetadata,
- rpc::OplogQueryMetadata oqMetadata) {
+ const rpc::OplogQueryMetadata& oqMetadata) {
OpTimeAndWallTime newCommitPoint = oqMetadata.getLastOpCommitted();
const bool fromSyncSource = true;
diff --git a/src/mongo/db/repl/data_replicator_external_state_impl.h b/src/mongo/db/repl/data_replicator_external_state_impl.h
index c408c484dc9..284cea32b41 100644
--- a/src/mongo/db/repl/data_replicator_external_state_impl.h
+++ b/src/mongo/db/repl/data_replicator_external_state_impl.h
@@ -53,7 +53,7 @@ public:
OpTimeWithTerm getCurrentTermAndLastCommittedOpTime() override;
void processMetadata(const rpc::ReplSetMetadata& replMetadata,
- rpc::OplogQueryMetadata oqMetadata) override;
+ const rpc::OplogQueryMetadata& oqMetadata) override;
ChangeSyncSourceAction shouldStopFetching(const HostAndPort& source,
const rpc::ReplSetMetadata& replMetadata,
diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.cpp b/src/mongo/db/repl/data_replicator_external_state_mock.cpp
index ddcfc701ca6..0ee71071f03 100644
--- a/src/mongo/db/repl/data_replicator_external_state_mock.cpp
+++ b/src/mongo/db/repl/data_replicator_external_state_mock.cpp
@@ -87,9 +87,9 @@ OpTimeWithTerm DataReplicatorExternalStateMock::getCurrentTermAndLastCommittedOp
}
void DataReplicatorExternalStateMock::processMetadata(const rpc::ReplSetMetadata& replMetadata,
- rpc::OplogQueryMetadata oqMetadata) {
- replMetadataProcessed = replMetadata;
- oqMetadataProcessed = oqMetadata;
+ const rpc::OplogQueryMetadata& oqMetadata) {
+ replMetadataProcessed = rpc::ReplSetMetadata(replMetadata);
+ oqMetadataProcessed = rpc::OplogQueryMetadata(oqMetadata);
metadataWasProcessed = true;
}
diff --git a/src/mongo/db/repl/data_replicator_external_state_mock.h b/src/mongo/db/repl/data_replicator_external_state_mock.h
index 535ee513102..7ec17591a44 100644
--- a/src/mongo/db/repl/data_replicator_external_state_mock.h
+++ b/src/mongo/db/repl/data_replicator_external_state_mock.h
@@ -50,7 +50,7 @@ public:
OpTimeWithTerm getCurrentTermAndLastCommittedOpTime() override;
void processMetadata(const rpc::ReplSetMetadata& metadata,
- rpc::OplogQueryMetadata oqMetadata) override;
+ const rpc::OplogQueryMetadata& oqMetadata) override;
ChangeSyncSourceAction shouldStopFetching(const HostAndPort& source,
const rpc::ReplSetMetadata& replMetadata,
diff --git a/src/mongo/db/repl/idempotency_test.cpp b/src/mongo/db/repl/idempotency_test.cpp
index 9e94154f1c0..69777fdbc55 100644
--- a/src/mongo/db/repl/idempotency_test.cpp
+++ b/src/mongo/db/repl/idempotency_test.cpp
@@ -131,7 +131,7 @@ BSONObj RandomizedIdempotencyTest::canonicalizeDocumentForDataHash(const BSONObj
BSONObj RandomizedIdempotencyTest::getDoc() {
AutoGetCollectionForReadCommand autoColl(_opCtx.get(), nss);
BSONObj doc;
- Helpers::findById(_opCtx.get(), autoColl.getDb(), nss.ns(), kDocIdQuery, doc);
+ Helpers::findById(_opCtx.get(), nss.ns(), kDocIdQuery, doc);
return doc.getOwned();
}
diff --git a/src/mongo/db/repl/oplog.cpp b/src/mongo/db/repl/oplog.cpp
index 0908b06213a..7ffffbbf2c1 100644
--- a/src/mongo/db/repl/oplog.cpp
+++ b/src/mongo/db/repl/oplog.cpp
@@ -389,7 +389,7 @@ void _logOpsInner(OperationContext* opCtx,
}
// Insert the oplog records to the respective tenants change collections.
- if (ChangeStreamChangeCollectionManager::isChangeCollectionEnabled()) {
+ if (ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive()) {
ChangeStreamChangeCollectionManager::get(opCtx).insertDocumentsToChangeCollection(
opCtx, *records, timestamps);
}
@@ -1578,7 +1578,7 @@ Status applyOperation_inlock(OperationContext* opCtx,
invariant(op.getObject2());
auto&& documentId = *op.getObject2();
auto documentFound = Helpers::findById(
- opCtx, db, collection->ns().ns(), documentId, changeStreamPreImage);
+ opCtx, collection->ns().ns(), documentId, changeStreamPreImage);
invariant(documentFound);
}
diff --git a/src/mongo/db/repl/oplog_applier_impl.cpp b/src/mongo/db/repl/oplog_applier_impl.cpp
index e9ca22da35c..575035711e0 100644
--- a/src/mongo/db/repl/oplog_applier_impl.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl.cpp
@@ -623,8 +623,6 @@ void OplogApplierImpl::_deriveOpsAndFillWriterVectors(
LogicalSessionIdMap<std::vector<OplogEntry*>> partialTxnOps;
CachedCollectionProperties collPropertiesCache;
- // Used to serialize writes to the tenant migrations donor and recipient namespaces.
- boost::optional<uint32_t> tenantMigrationsWriterId;
for (auto&& op : *ops) {
// If the operation's optime is before or the same as the beginApplyingOpTime we don't want
// to apply it, so don't include it in writerVectors.
@@ -706,19 +704,6 @@ void OplogApplierImpl::_deriveOpsAndFillWriterVectors(
continue;
}
- // Writes to the tenant migration namespaces must be serialized to preserve the order of
- // migration and access blocker states.
- if (op.getNss() == NamespaceString::kTenantMigrationDonorsNamespace ||
- op.getNss() == NamespaceString::kTenantMigrationRecipientsNamespace) {
- auto writerId = OplogApplierUtils::addToWriterVector(
- opCtx, &op, writerVectors, &collPropertiesCache, tenantMigrationsWriterId);
- if (!tenantMigrationsWriterId) {
- tenantMigrationsWriterId.emplace(writerId);
- } else {
- invariant(writerId == *tenantMigrationsWriterId);
- }
- continue;
- }
OplogApplierUtils::addToWriterVector(opCtx, &op, writerVectors, &collPropertiesCache);
}
}
diff --git a/src/mongo/db/repl/oplog_applier_impl_test.cpp b/src/mongo/db/repl/oplog_applier_impl_test.cpp
index 5784b645cc5..b734004bb28 100644
--- a/src/mongo/db/repl/oplog_applier_impl_test.cpp
+++ b/src/mongo/db/repl/oplog_applier_impl_test.cpp
@@ -2644,42 +2644,6 @@ TEST_F(OplogApplierImplWithSlowAutoAdvancingClockTest, DoNotLogNonSlowOpApplicat
ASSERT_EQUALS(0, countTextFormatLogLinesContaining(expected.str()));
}
-TEST_F(OplogApplierImplTest, SerializeOplogApplicationOfWritesToTenantMigrationNamespaces) {
- auto writerPool = makeReplWriterPool();
- NoopOplogApplierObserver observer;
- TrackOpsAppliedApplier oplogApplier(
- nullptr, // executor
- nullptr, // oplogBuffer
- &observer,
- ReplicationCoordinator::get(_opCtx.get()),
- getConsistencyMarkers(),
- getStorageInterface(),
- repl::OplogApplier::Options(repl::OplogApplication::Mode::kSecondary),
- writerPool.get());
-
- const auto donorNss = NamespaceString::kTenantMigrationDonorsNamespace;
- const auto recipientNss = NamespaceString::kTenantMigrationRecipientsNamespace;
-
- std::vector<OplogEntry> opsToApply;
- opsToApply.push_back(
- makeDeleteDocumentOplogEntry({Timestamp(Seconds(2), 0), 1LL}, donorNss, BSON("_id" << 2)));
- opsToApply.push_back(makeInsertDocumentOplogEntry(
- {Timestamp(Seconds(3), 0), 1LL}, recipientNss, BSON("_id" << 3)));
- opsToApply.push_back(makeDeleteDocumentOplogEntry(
- {Timestamp(Seconds(4), 0), 1LL}, recipientNss, BSON("_id" << 3)));
- opsToApply.push_back(
- makeInsertDocumentOplogEntry({Timestamp(Seconds(5), 0), 1LL}, donorNss, BSON("_id" << 4)));
-
- ASSERT_OK(oplogApplier.applyOplogBatch(_opCtx.get(), opsToApply));
- const auto applied = oplogApplier.getOperationsApplied();
- ASSERT_EQ(4U, applied.size());
- ASSERT_BSONOBJ_EQ(opsToApply[0].getEntry().toBSON(), applied[0].getEntry().toBSON());
- ASSERT_BSONOBJ_EQ(opsToApply[1].getEntry().toBSON(), applied[1].getEntry().toBSON());
- ASSERT_BSONOBJ_EQ(opsToApply[2].getEntry().toBSON(), applied[2].getEntry().toBSON());
- ASSERT_BSONOBJ_EQ(opsToApply[3].getEntry().toBSON(), applied[3].getEntry().toBSON());
-}
-
-
class OplogApplierImplTxnTableTest : public OplogApplierImplTest {
public:
void setUp() override {
@@ -3319,10 +3283,7 @@ TEST_F(IdempotencyTest, EmptyCappedNamespaceNotFound) {
ASSERT_OK(runOpInitialSync(emptyCappedOp));
AutoGetCollectionForReadCommand autoColl(_opCtx.get(), nss);
-
- // Ensure that autoColl.getCollection() and autoColl.getDb() are both null.
- ASSERT_FALSE(autoColl.getCollection());
- ASSERT_FALSE(autoColl.getDb());
+ ASSERT_FALSE(autoColl);
}
TEST_F(IdempotencyTest, UpdateTwoFields) {
diff --git a/src/mongo/db/repl/oplog_entry.idl b/src/mongo/db/repl/oplog_entry.idl
index 7c1ba09f320..987f5806cbf 100644
--- a/src/mongo/db/repl/oplog_entry.idl
+++ b/src/mongo/db/repl/oplog_entry.idl
@@ -59,6 +59,9 @@ enums:
kPostImage: "postImage"
structs:
+ # TODO SERVER-67155 Ensure the tenantId is included in the serialized "ns" field when
+ # multitenancySupport is on but featureFlagRequireTenantId is off. Currently it will not be
+ # included in either place
DurableReplOperation:
description: "A document that represents an operation. Should never be used directly in
server code. Instead, create an instance of ReplOperation."
diff --git a/src/mongo/db/repl/oplog_entry_test.cpp b/src/mongo/db/repl/oplog_entry_test.cpp
index ae5039be724..4bcc4adfeb0 100644
--- a/src/mongo/db/repl/oplog_entry_test.cpp
+++ b/src/mongo/db/repl/oplog_entry_test.cpp
@@ -150,7 +150,9 @@ TEST(OplogEntryTest, InsertIncludesTidField) {
ASSERT(entry.getTid());
ASSERT_EQ(*entry.getTid(), tid);
- ASSERT_EQ(entry.getNss(), nss);
+ // TODO SERVER-66708 Check that (entry.getNss() == nss) once the OplogEntry deserializer
+ // passes "tid" to the NamespaceString constructor
+ ASSERT_EQ(entry.getNss(), NamespaceString(boost::none, nss.ns()));
ASSERT_BSONOBJ_EQ(entry.getIdElement().wrap("_id"), BSON("_id" << docId));
ASSERT_BSONOBJ_EQ(entry.getOperationToApply(), doc);
}
diff --git a/src/mongo/db/repl/oplog_fetcher.cpp b/src/mongo/db/repl/oplog_fetcher.cpp
index d50917d7fd7..6ec6c9778de 100644
--- a/src/mongo/db/repl/oplog_fetcher.cpp
+++ b/src/mongo/db/repl/oplog_fetcher.cpp
@@ -265,12 +265,8 @@ OpTime OplogFetcher::getLastOpTimeFetched_forTest() const {
return _getLastOpTimeFetched();
}
-BSONObj OplogFetcher::getFindQueryFilter_forTest() const {
- return _makeFindQueryFilter();
-}
-
-Query OplogFetcher::getFindQuerySettings_forTest(long long findTimeout) const {
- return _makeFindQuerySettings(findTimeout);
+FindCommandRequest OplogFetcher::makeFindCmdRequest_forTest(long long findTimeout) const {
+ return _makeFindCmdRequest(findTimeout);
}
Milliseconds OplogFetcher::getAwaitDataTimeout_forTest() const {
@@ -584,46 +580,56 @@ AggregateCommandRequest OplogFetcher::_makeAggregateCommandRequest(long long max
return aggRequest;
}
-BSONObj OplogFetcher::_makeFindQueryFilter() const {
- BSONObjBuilder queryBob;
-
- auto lastOpTimeFetched = _getLastOpTimeFetched();
- BSONObjBuilder filterBob;
- filterBob.append("ts", BSON("$gte" << lastOpTimeFetched.getTimestamp()));
- // Handle caller-provided filter.
- if (!_config.queryFilter.isEmpty()) {
- filterBob.append(
- "$or",
- BSON_ARRAY(_config.queryFilter << BSON("ts" << lastOpTimeFetched.getTimestamp())));
+FindCommandRequest OplogFetcher::_makeFindCmdRequest(long long findTimeout) const {
+ FindCommandRequest findCmd{_nss};
+
+ // Construct the find command's filter and set it on the 'FindCommandRequest'.
+ {
+ BSONObjBuilder queryBob;
+
+ auto lastOpTimeFetched = _getLastOpTimeFetched();
+ BSONObjBuilder filterBob;
+ filterBob.append("ts", BSON("$gte" << lastOpTimeFetched.getTimestamp()));
+ // Handle caller-provided filter.
+ if (!_config.queryFilter.isEmpty()) {
+ filterBob.append(
+ "$or",
+ BSON_ARRAY(_config.queryFilter << BSON("ts" << lastOpTimeFetched.getTimestamp())));
+ }
+ findCmd.setFilter(filterBob.obj());
+ }
+
+ findCmd.setTailable(true);
+ findCmd.setAwaitData(true);
+ findCmd.setMaxTimeMS(findTimeout);
+
+ if (_config.batchSize) {
+ findCmd.setBatchSize(_config.batchSize);
}
- return filterBob.obj();
-}
-Query OplogFetcher::_makeFindQuerySettings(long long findTimeout) const {
- Query query = Query().maxTimeMS(findTimeout);
if (_config.requestResumeToken) {
- query.hint(BSON("$natural" << 1)).requestResumeToken(true);
+ findCmd.setHint(BSON("$natural" << 1));
+ findCmd.setRequestResumeToken(true);
}
auto lastCommittedWithCurrentTerm =
_dataReplicatorExternalState->getCurrentTermAndLastCommittedOpTime();
auto term = lastCommittedWithCurrentTerm.value;
if (term != OpTime::kUninitializedTerm) {
- query.term(term);
+ findCmd.setTerm(term);
}
if (_config.queryReadConcern.isEmpty()) {
// This ensures that the sync source waits for all earlier oplog writes to be visible.
// Since Timestamp(0, 0) isn't allowed, Timestamp(0, 1) is the minimal we can use.
- query.readConcern(BSON("level"
- << "local"
- << "afterClusterTime" << Timestamp(0, 1)));
+ findCmd.setReadConcern(BSON("level"
+ << "local"
+ << "afterClusterTime" << Timestamp(0, 1)));
} else {
// Caller-provided read concern.
- query.appendElements(_config.queryReadConcern.toBSON());
+ findCmd.setReadConcern(_config.queryReadConcern.toBSONInner());
}
-
- return query;
+ return findCmd;
}
Status OplogFetcher::_createNewCursor(bool initialFind) {
@@ -651,17 +657,9 @@ Status OplogFetcher::_createNewCursor(bool initialFind) {
}
_cursor = std::move(ret.getValue());
} else {
+ auto findCmd = _makeFindCmdRequest(maxTimeMs);
_cursor = std::make_unique<DBClientCursor>(
- _conn.get(),
- _nss,
- _makeFindQueryFilter(),
- _makeFindQuerySettings(maxTimeMs),
- 0 /* limit */,
- 0 /* nToSkip */,
- nullptr /* fieldsToReturn */,
- QueryOption_CursorTailable | QueryOption_AwaitData |
- (oplogFetcherUsesExhaust ? QueryOption_Exhaust : 0),
- _config.batchSize);
+ _conn.get(), std::move(findCmd), ReadPreferenceSetting{}, oplogFetcherUsesExhaust);
}
_firstBatch = true;
@@ -817,7 +815,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
"metadata"_attr = _metadataObj);
return oqMetadataResult.getStatus();
}
- auto oqMetadata = oqMetadataResult.getValue();
+ const auto& oqMetadata = oqMetadataResult.getValue();
if (_firstBatch) {
auto status =
@@ -884,7 +882,7 @@ Status OplogFetcher::_onSuccessfulBatch(const Documents& documents) {
"metadata"_attr = _metadataObj);
return metadataResult.getStatus();
}
- auto replSetMetadata = metadataResult.getValue();
+ const auto& replSetMetadata = metadataResult.getValue();
// Determine if we should stop syncing from our current sync source.
auto changeSyncSourceAction = _dataReplicatorExternalState->shouldStopFetching(
diff --git a/src/mongo/db/repl/oplog_fetcher.h b/src/mongo/db/repl/oplog_fetcher.h
index 01a4347669b..2147eb9ebde 100644
--- a/src/mongo/db/repl/oplog_fetcher.h
+++ b/src/mongo/db/repl/oplog_fetcher.h
@@ -275,8 +275,7 @@ public:
/**
* Returns the `find` query run on the sync source's oplog.
*/
- BSONObj getFindQueryFilter_forTest() const;
- Query getFindQuerySettings_forTest(long long findTimeout) const;
+ FindCommandRequest makeFindCmdRequest_forTest(long long findTimeout) const;
/**
* Returns the OpTime of the last oplog entry fetched and processed.
@@ -387,11 +386,9 @@ private:
/**
* This function will create the `find` query to issue to the sync source. It is provided with
- * whether this is the initial attempt to create the `find` query to determine what the find
- * timeout should be.
+ * the value to use as the "maxTimeMS" for the find command.
*/
- BSONObj _makeFindQueryFilter() const;
- Query _makeFindQuerySettings(long long findTimeout) const;
+ FindCommandRequest _makeFindCmdRequest(long long findTimeout) const;
/**
* Gets the next batch from the exhaust cursor.
diff --git a/src/mongo/db/repl/oplog_fetcher_test.cpp b/src/mongo/db/repl/oplog_fetcher_test.cpp
index e98039a0f8a..adc09da1300 100644
--- a/src/mongo/db/repl/oplog_fetcher_test.cpp
+++ b/src/mongo/db/repl/oplog_fetcher_test.cpp
@@ -806,19 +806,25 @@ TEST_F(OplogFetcherTest,
auto oplogFetcher = makeOplogFetcher();
auto findTimeout = durationCount<Milliseconds>(oplogFetcher->getInitialFindMaxTime_forTest());
- auto filter = oplogFetcher->getFindQueryFilter_forTest();
+ auto findCmdRequest = oplogFetcher->makeFindCmdRequest_forTest(findTimeout);
+
+ auto filter = findCmdRequest.getFilter();
ASSERT_BSONOBJ_EQ(BSON("ts" << BSON("$gte" << lastFetched.getTimestamp())), filter);
- auto queryObj =
- (oplogFetcher->getFindQuerySettings_forTest(findTimeout)).getFullSettingsDeprecated();
- ASSERT_EQUALS(60000, queryObj.getIntField("$maxTimeMS"));
+ auto maxTimeMS = findCmdRequest.getMaxTimeMS();
+ ASSERT(maxTimeMS);
+ ASSERT_EQUALS(60000, *maxTimeMS);
- ASSERT_EQUALS(mongo::BSONType::Object, queryObj["readConcern"].type());
+ auto readConcern = findCmdRequest.getReadConcern();
+ ASSERT(readConcern);
ASSERT_BSONOBJ_EQ(BSON("level"
<< "local"
<< "afterClusterTime" << Timestamp(0, 1)),
- queryObj["readConcern"].Obj());
- ASSERT_EQUALS(dataReplicatorExternalState->currentTerm, queryObj["term"].numberLong());
+ *readConcern);
+
+ auto term = findCmdRequest.getTerm();
+ ASSERT(term);
+ ASSERT_EQUALS(dataReplicatorExternalState->currentTerm, *term);
}
TEST_F(OplogFetcherTest,
@@ -826,21 +832,26 @@ TEST_F(OplogFetcherTest,
dataReplicatorExternalState->currentTerm = OpTime::kUninitializedTerm;
auto oplogFetcher = makeOplogFetcher();
- auto filter = oplogFetcher->getFindQueryFilter_forTest();
- ASSERT_BSONOBJ_EQ(BSON("ts" << BSON("$gte" << lastFetched.getTimestamp())), filter);
-
// Test that the correct maxTimeMS is set if we are retrying the 'find' query.
auto findTimeout = durationCount<Milliseconds>(oplogFetcher->getRetriedFindMaxTime_forTest());
- auto queryObj =
- (oplogFetcher->getFindQuerySettings_forTest(findTimeout)).getFullSettingsDeprecated();
- ASSERT_EQUALS(2000, queryObj.getIntField("$maxTimeMS"));
+ auto findCmdRequest = oplogFetcher->makeFindCmdRequest_forTest(findTimeout);
- ASSERT_EQUALS(mongo::BSONType::Object, queryObj["readConcern"].type());
+ auto filter = findCmdRequest.getFilter();
+ ASSERT_BSONOBJ_EQ(BSON("ts" << BSON("$gte" << lastFetched.getTimestamp())), filter);
+
+ auto maxTimeMS = findCmdRequest.getMaxTimeMS();
+ ASSERT(maxTimeMS);
+ ASSERT_EQUALS(2000, *maxTimeMS);
+
+ auto readConcern = findCmdRequest.getReadConcern();
+ ASSERT(readConcern);
ASSERT_BSONOBJ_EQ(BSON("level"
<< "local"
<< "afterClusterTime" << Timestamp(0, 1)),
- queryObj["readConcern"].Obj());
- ASSERT_FALSE(queryObj.hasField("term"));
+ *readConcern);
+
+ auto term = findCmdRequest.getTerm();
+ ASSERT(!term);
}
TEST_F(
diff --git a/src/mongo/db/repl/primary_only_service.cpp b/src/mongo/db/repl/primary_only_service.cpp
index cb79c007ced..dbe696ecce7 100644
--- a/src/mongo/db/repl/primary_only_service.cpp
+++ b/src/mongo/db/repl/primary_only_service.cpp
@@ -362,6 +362,9 @@ void PrimaryOnlyService::onStepUp(const OpTime& stepUpOpTime) {
instance.second.waitForCompletion();
}
+ savedInstances.clear();
+ newThenOldScopedExecutor.reset();
+
PrimaryOnlyServiceHangBeforeLaunchingStepUpLogic.pauseWhileSet();
// Now wait for the first write of the new term to be majority committed, so that we know
diff --git a/src/mongo/db/repl/repl_set_commands.cpp b/src/mongo/db/repl/repl_set_commands.cpp
index 5823d880c14..7f35d2cfb31 100644
--- a/src/mongo/db/repl/repl_set_commands.cpp
+++ b/src/mongo/db/repl/repl_set_commands.cpp
@@ -528,6 +528,11 @@ public:
"primary.)\n"
"http://dochub.mongodb.org/core/replicasetcommands";
}
+
+ bool shouldCheckoutSession() const final {
+ return false;
+ }
+
CmdReplSetStepDown()
: ReplSetCommand("replSetStepDown"),
_stepDownCmdsWithForceExecutedMetric("commands.replSetStepDownWithForce.total",
@@ -685,7 +690,7 @@ public:
if (metadataResult.isOK()) {
// New style update position command has metadata, which may inform the
// upstream of a higher term.
- auto metadata = metadataResult.getValue();
+ const auto& metadata = metadataResult.getValue();
replCoord->processReplSetMetadata(metadata);
}
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 7cbc79f9aed..5d450af12d7 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -560,7 +560,7 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC
// TODO: SERVER-65948 move the change collection creation logic from here to the PM-2502 hooks.
// The change collection will be created when the change stream is enabled.
- if (ChangeStreamChangeCollectionManager::isChangeCollectionEnabled()) {
+ if (ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive()) {
auto status = ChangeStreamChangeCollectionManager::get(opCtx).createChangeCollection(
opCtx, boost::none);
if (!status.isOK()) {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index c2f2aa1ad08..fe769df7572 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -1340,7 +1340,6 @@ void ReplicationCoordinatorImpl::signalDrainComplete(OperationContext* opCtx,
_updateMemberStateFromTopologyCoordinator(lk);
LOGV2(21331, "Transition to primary complete; database writes are now permitted");
- _drainFinishedCond.notify_all();
_externalState->startNoopWriter(_getMyLastAppliedOpTime_inlock());
}
@@ -1830,8 +1829,9 @@ Status ReplicationCoordinatorImpl::setLastDurableOptime_forTest(long long cfgVer
const UpdatePositionArgs::UpdateInfo update(
OpTime(), Date_t(), opTime, wallTime, cfgVer, memberId);
- const auto status = _setLastOptime(lock, update);
- return status;
+ const auto statusWithOpTime = _setLastOptimeForMember(lock, update);
+ _updateStateAfterRemoteOpTimeUpdates(lock, statusWithOpTime.getValue());
+ return statusWithOpTime.getStatus();
}
Status ReplicationCoordinatorImpl::setLastAppliedOptime_forTest(long long cfgVer,
@@ -1847,25 +1847,29 @@ Status ReplicationCoordinatorImpl::setLastAppliedOptime_forTest(long long cfgVer
const UpdatePositionArgs::UpdateInfo update(
opTime, wallTime, OpTime(), Date_t(), cfgVer, memberId);
- const auto status = _setLastOptime(lock, update);
- return status;
+ const auto statusWithOpTime = _setLastOptimeForMember(lock, update);
+ _updateStateAfterRemoteOpTimeUpdates(lock, statusWithOpTime.getValue());
+ return statusWithOpTime.getStatus();
}
-Status ReplicationCoordinatorImpl::_setLastOptime(WithLock lk,
- const UpdatePositionArgs::UpdateInfo& args) {
- auto result = _topCoord->setLastOptime(args, _replExecutor->now());
+StatusWith<OpTime> ReplicationCoordinatorImpl::_setLastOptimeForMember(
+ WithLock lk, const UpdatePositionArgs::UpdateInfo& args) {
+ auto result = _topCoord->setLastOptimeForMember(args, _replExecutor->now());
if (!result.isOK())
return result.getStatus();
const bool advancedOpTime = result.getValue();
+ _rescheduleLivenessUpdate_inlock(args.memberId);
+ return advancedOpTime ? std::max(args.appliedOpTime, args.durableOpTime) : OpTime();
+}
+
+void ReplicationCoordinatorImpl::_updateStateAfterRemoteOpTimeUpdates(
+ WithLock lk, const OpTime& maxRemoteOpTime) {
// Only update committed optime if the remote optimes increased.
- if (advancedOpTime) {
+ if (!maxRemoteOpTime.isNull()) {
_updateLastCommittedOpTimeAndWallTime(lk);
// Wait up replication waiters on optime changes.
- _wakeReadyWaiters(lk, std::max(args.appliedOpTime, args.durableOpTime));
+ _wakeReadyWaiters(lk, maxRemoteOpTime);
}
-
- _rescheduleLivenessUpdate_inlock(args.memberId);
- return Status::OK();
}
bool ReplicationCoordinatorImpl::isCommitQuorumSatisfied(
@@ -4415,7 +4419,7 @@ void ReplicationCoordinatorImpl::_errorOnPromisesIfHorizonChanged(WithLock lk,
HelloMetrics::get(opCtx)->resetNumAwaitingTopologyChanges();
}
- if (oldIndex >= 0 && newIndex >= 0) {
+ if (oldIndex >= 0) {
invariant(_sniToValidConfigPromiseMap.empty());
const auto oldHorizonMappings = oldConfig.getMemberAt(oldIndex).getHorizonMappings();
@@ -5079,18 +5083,22 @@ void ReplicationCoordinatorImpl::_wakeReadyWaiters(WithLock lk, boost::optional<
Status ReplicationCoordinatorImpl::processReplSetUpdatePosition(const UpdatePositionArgs& updates) {
stdx::unique_lock<Latch> lock(_mutex);
Status status = Status::OK();
- bool somethingChanged = false;
+ bool gotValidUpdate = false;
+ OpTime maxRemoteOpTime;
for (UpdatePositionArgs::UpdateIterator update = updates.updatesBegin();
update != updates.updatesEnd();
++update) {
- status = _setLastOptime(lock, *update);
- if (!status.isOK()) {
+ auto statusWithOpTime = _setLastOptimeForMember(lock, *update);
+ if (!statusWithOpTime.isOK()) {
+ status = statusWithOpTime.getStatus();
break;
}
- somethingChanged = true;
+ maxRemoteOpTime = std::max(maxRemoteOpTime, statusWithOpTime.getValue());
+ gotValidUpdate = true;
}
+ _updateStateAfterRemoteOpTimeUpdates(lock, maxRemoteOpTime);
- if (somethingChanged && !_getMemberState_inlock().primary()) {
+ if (gotValidUpdate && !_getMemberState_inlock().primary()) {
lock.unlock();
// Must do this outside _mutex
_externalState->forwardSecondaryProgress();
@@ -5716,28 +5724,27 @@ void ReplicationCoordinatorImpl::prepareReplMetadata(const BSONObj& metadataRequ
invariant(-1 != rbid);
}
- stdx::lock_guard<Latch> lk(_mutex);
+ boost::optional<rpc::ReplSetMetadata> replSetMetadata;
+ boost::optional<rpc::OplogQueryMetadata> oplogQueryMetadata;
+ {
+ stdx::lock_guard<Latch> lk(_mutex);
- if (hasReplSetMetadata) {
- _prepareReplSetMetadata_inlock(lastOpTimeFromClient, builder);
- }
+ if (hasReplSetMetadata) {
+ OpTime lastVisibleOpTime =
+ std::max(lastOpTimeFromClient, _getCurrentCommittedSnapshotOpTime_inlock());
+ replSetMetadata = _topCoord->prepareReplSetMetadata(lastVisibleOpTime);
+ }
- if (hasOplogQueryMetadata) {
- _prepareOplogQueryMetadata_inlock(rbid, builder);
+ if (hasOplogQueryMetadata) {
+ oplogQueryMetadata = _topCoord->prepareOplogQueryMetadata(rbid);
+ }
}
-}
-
-void ReplicationCoordinatorImpl::_prepareReplSetMetadata_inlock(const OpTime& lastOpTimeFromClient,
- BSONObjBuilder* builder) const {
- OpTime lastVisibleOpTime =
- std::max(lastOpTimeFromClient, _getCurrentCommittedSnapshotOpTime_inlock());
- auto metadata = _topCoord->prepareReplSetMetadata(lastVisibleOpTime);
- metadata.writeToMetadata(builder).transitional_ignore();
-}
-void ReplicationCoordinatorImpl::_prepareOplogQueryMetadata_inlock(int rbid,
- BSONObjBuilder* builder) const {
- _topCoord->prepareOplogQueryMetadata(rbid).writeToMetadata(builder).transitional_ignore();
+ // Do BSON serialization outside lock.
+ if (replSetMetadata)
+ invariantStatusOK(replSetMetadata->writeToMetadata(builder));
+ if (oplogQueryMetadata)
+ invariantStatusOK(oplogQueryMetadata->writeToMetadata(builder));
}
bool ReplicationCoordinatorImpl::getWriteConcernMajorityShouldJournal() {
diff --git a/src/mongo/db/repl/replication_coordinator_impl.h b/src/mongo/db/repl/replication_coordinator_impl.h
index a6dc8fe9066..9ac44fdc62e 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_impl.h
@@ -469,7 +469,7 @@ public:
executor::TaskExecutor::CallbackHandle getCatchupTakeoverCbh_forTest() const;
/**
- * Simple wrappers around _setLastOptime to make it easier to test.
+ * Simple wrappers around _setLastOptimeForMember to make it easier to test.
*/
Status setLastAppliedOptime_forTest(long long cfgVer,
long long memberId,
@@ -1099,8 +1099,19 @@ private:
* This is only valid to call on replica sets.
* "configVersion" will be populated with our config version if it and the configVersion
* of "args" differ.
+ *
+ * If either applied or durable optime has changed, returns the later of the two (even if
+ * that's not the one which changed). Otherwise returns a null optime.
+ */
+ StatusWith<OpTime> _setLastOptimeForMember(WithLock lk,
+ const UpdatePositionArgs::UpdateInfo& args);
+
+ /**
+ * Helper for processReplSetUpdatePosition, companion to _setLastOptimeForMember above. Updates
+ * replication coordinator state and notifies waiters after remote optime updates. Must be
+ * called within the same critical section as _setLastOptimeForMember.
*/
- Status _setLastOptime(WithLock lk, const UpdatePositionArgs::UpdateInfo& args);
+ void _updateStateAfterRemoteOpTimeUpdates(WithLock lk, const OpTime& maxRemoteOpTime);
/**
* This function will report our position externally (like upstream) if necessary.
@@ -1463,17 +1474,6 @@ private:
EventHandle _processReplSetMetadata_inlock(const rpc::ReplSetMetadata& replMetadata);
/**
- * Prepares a metadata object for ReplSetMetadata.
- */
- void _prepareReplSetMetadata_inlock(const OpTime& lastOpTimeFromClient,
- BSONObjBuilder* builder) const;
-
- /**
- * Prepares a metadata object for OplogQueryMetadata.
- */
- void _prepareOplogQueryMetadata_inlock(int rbid, BSONObjBuilder* builder) const;
-
- /**
* Blesses a snapshot to be used for new committed reads.
*
* Returns true if the value was updated to `newCommittedSnapshot`.
@@ -1719,9 +1719,6 @@ private:
// Current ReplicaSet state.
MemberState _memberState; // (M)
- // Used to signal threads waiting for changes to _memberState.
- stdx::condition_variable _drainFinishedCond; // (M)
-
ReplicationCoordinator::ApplierState _applierState = ApplierState::Running; // (M)
// Used to signal threads waiting for changes to _rsConfigState.
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
index cfb8b355366..1392cceb923 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat.cpp
@@ -661,51 +661,51 @@ void ReplicationCoordinatorImpl::_scheduleHeartbeatReconfig(WithLock lk,
std::tuple<StatusWith<ReplSetConfig>, bool> ReplicationCoordinatorImpl::_resolveConfigToApply(
const ReplSetConfig& config) {
+ if (!_settings.isServerless() || !config.isSplitConfig()) {
+ return {config, false};
+ }
+
stdx::unique_lock<Latch> lk(_mutex);
- if (config.isSplitConfig()) {
- if (!_rsConfig.isInitialized()) {
- // Unlock the lock because isSelf performs network I/O.
- lk.unlock();
+ if (!_rsConfig.isInitialized()) {
+ // Unlock the lock because isSelf performs network I/O.
+ lk.unlock();
- // If this node is listed in the members of incoming config, accept the config.
- const auto foundSelfInMembers =
- std::any_of(config.membersBegin(),
- config.membersEnd(),
- [externalState = _externalState.get()](const MemberConfig& config) {
- return externalState->isSelf(config.getHostAndPort(),
- getGlobalServiceContext());
- });
-
- if (foundSelfInMembers) {
- return {config, false};
- }
+ // If this node is listed in the members of incoming config, accept the config.
+ const auto foundSelfInMembers = std::any_of(
+ config.membersBegin(),
+ config.membersEnd(),
+ [externalState = _externalState.get()](const MemberConfig& config) {
+ return externalState->isSelf(config.getHostAndPort(), getGlobalServiceContext());
+ });
- return {Status(ErrorCodes::NotYetInitialized,
- "Cannot apply a split config if the current config is uninitialized"),
- false};
+ if (foundSelfInMembers) {
+ return {config, false};
}
- auto recipientConfig = config.getRecipientConfig();
- const auto& selfMember = _rsConfig.getMemberAt(_selfIndex);
- if (recipientConfig->findMemberByHostAndPort(selfMember.getHostAndPort())) {
- if (selfMember.getNumVotes() > 0) {
- return {
- Status(ErrorCodes::BadValue, "Cannot apply recipient config to a voting node"),
- false};
- }
+ return {Status(ErrorCodes::NotYetInitialized,
+ "Cannot apply a split config if the current config is uninitialized"),
+ false};
+ }
- if (_rsConfig.getReplSetName() == recipientConfig->getReplSetName()) {
- return {Status(ErrorCodes::InvalidReplicaSetConfig,
- "Cannot apply recipient config since current config and recipient "
- "config have the same set name."),
- false};
- }
+ auto recipientConfig = config.getRecipientConfig();
+ const auto& selfMember = _rsConfig.getMemberAt(_selfIndex);
+ if (recipientConfig->findMemberByHostAndPort(selfMember.getHostAndPort())) {
+ if (selfMember.getNumVotes() > 0) {
+ return {Status(ErrorCodes::BadValue, "Cannot apply recipient config to a voting node"),
+ false};
+ }
- auto mutableConfig = recipientConfig->getMutable();
- mutableConfig.setConfigVersion(1);
- mutableConfig.setConfigTerm(1);
- return {ReplSetConfig(std::move(mutableConfig)), true};
+ if (_rsConfig.getReplSetName() == recipientConfig->getReplSetName()) {
+ return {Status(ErrorCodes::InvalidReplicaSetConfig,
+ "Cannot apply recipient config since current config and recipient "
+ "config have the same set name."),
+ false};
}
+
+ auto mutableConfig = recipientConfig->getMutable();
+ mutableConfig.setConfigVersion(1);
+ mutableConfig.setConfigTerm(1);
+ return {ReplSetConfig(std::move(mutableConfig)), true};
}
return {config, false};
diff --git a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
index 5203980b575..e619276b129 100644
--- a/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl_heartbeat_v1_test.cpp
@@ -58,6 +58,7 @@ namespace {
using executor::NetworkInterfaceMock;
using executor::RemoteCommandRequest;
using executor::RemoteCommandResponse;
+using InNetworkGuard = NetworkInterfaceMock::InNetworkGuard;
TEST(ReplSetHeartbeatArgs, AcceptsUnknownField) {
ReplSetHeartbeatArgsV1 hbArgs;
@@ -116,7 +117,8 @@ protected:
void processResponseFromPrimary(const ReplSetConfig& config,
long long version = -2,
- long long term = OpTime::kInitialTerm);
+ long long term = OpTime::kInitialTerm,
+ const HostAndPort& target = HostAndPort{"h1", 1});
};
void ReplCoordHBV1Test::assertMemberState(const MemberState expected, std::string msg) {
@@ -160,13 +162,14 @@ ReplCoordHBV1Test::performSyncToFinishReconfigHeartbeat() {
void ReplCoordHBV1Test::processResponseFromPrimary(const ReplSetConfig& config,
long long version,
- long long term) {
+ long long term,
+ const HostAndPort& target) {
NetworkInterfaceMock* net = getNet();
const Date_t startDate = getNet()->now();
NetworkInterfaceMock::NetworkOperationIterator noi = net->getNextReadyRequest();
const RemoteCommandRequest& request = noi->getRequest();
- ASSERT_EQUALS(HostAndPort("h1", 1), request.target);
+ ASSERT_EQUALS(target, request.target);
ReplSetHeartbeatArgsV1 hbArgs;
ASSERT_OK(hbArgs.initialize(request.cmdObj));
ASSERT_EQUALS("mySet", hbArgs.getSetName());
@@ -266,6 +269,85 @@ TEST_F(ReplCoordHBV1Test,
ASSERT_TRUE(getExternalState()->threadsStarted());
}
+TEST_F(ReplCoordHBV1Test, RejectSplitConfigWhenNotInServerlessMode) {
+ auto severityGuard = unittest::MinimumLoggedSeverityGuard{logv2::LogComponent::kDefault,
+ logv2::LogSeverity::Debug(3)};
+
+ // Start up with three nodes, and assume the role of "node2" as a secondary. Notably, the local
+ // node is NOT started in serverless mode. "node2" is configured as having no votes, no
+ // priority, so that we can pass validation for accepting a split config.
+ assertStartSuccess(BSON("_id"
+ << "mySet"
+ << "protocolVersion" << 1 << "version" << 2 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345"
+ << "votes" << 0 << "priority" << 0)
+ << BSON("_id" << 3 << "host"
+ << "node3:12345"))),
+ HostAndPort("node2", 12345));
+ ASSERT_OK(getReplCoord()->setFollowerMode(MemberState::RS_SECONDARY));
+ getReplCoord()->updateTerm_forTest(1, nullptr);
+ ASSERT_EQ(getReplCoord()->getTerm(), 1);
+ // respond to initial heartbeat requests
+ for (int j = 0; j < 2; ++j) {
+ replyToReceivedHeartbeatV1();
+ }
+
+ // Verify that there are no further heartbeat requests, since the heartbeat requests should be
+ // scheduled for the future.
+ {
+ InNetworkGuard guard(getNet());
+ assertMemberState(MemberState::RS_SECONDARY);
+ ASSERT_FALSE(getNet()->hasReadyRequests());
+ }
+
+ ReplSetConfig splitConfig =
+ assertMakeRSConfig(BSON("_id"
+ << "mySet"
+ << "version" << 3 << "term" << 1 << "protocolVersion" << 1
+ << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345"))
+ << "recipientConfig"
+ << BSON("_id"
+ << "recipientSet"
+ << "version" << 1 << "term" << 1 << "members"
+ << BSON_ARRAY(BSON("_id" << 1 << "host"
+ << "node1:12345")
+ << BSON("_id" << 2 << "host"
+ << "node2:12345")
+ << BSON("_id" << 3 << "host"
+ << "node3:12345")))));
+
+ // Accept a heartbeat from `node1` which has a split config. The split config lists this node
+ // ("node2") in the recipient member list, but a node started not in serverless mode should not
+ // accept and install the recipient config.
+ receiveHeartbeatFrom(splitConfig, 1, HostAndPort("node1", 12345));
+
+ {
+ InNetworkGuard guard(getNet());
+ processResponseFromPrimary(splitConfig, 2, 1, HostAndPort{"node1", 12345});
+ assertMemberState(MemberState::RS_SECONDARY);
+ OperationContextNoop opCtx;
+ auto storedConfig = ReplSetConfig::parse(
+ unittest::assertGet(getExternalState()->loadLocalConfigDocument(&opCtx)));
+ ASSERT_OK(storedConfig.validate());
+
+ // Verify that the recipient config was not accepted. A successfully applied splitConfig
+ // will install at version and term {1, 1}.
+ ASSERT_EQUALS(ConfigVersionAndTerm(3, 1), storedConfig.getConfigVersionAndTerm());
+ ASSERT_EQUALS("mySet", storedConfig.getReplSetName());
+ }
+
+ ASSERT_TRUE(getExternalState()->threadsStarted());
+}
+
TEST_F(ReplCoordHBV1Test, NodeRejectsSplitConfigWhenNotInitialized) {
ReplSetConfig rsConfig =
assertMakeRSConfig(BSON("_id"
@@ -556,6 +638,10 @@ TEST_F(
class ReplCoordHBV1SplitConfigTest : public ReplCoordHBV1Test {
public:
void startUp(const std::string& hostAndPort) {
+ ReplSettings settings;
+ settings.setServerlessMode();
+ init(settings);
+
BSONObj configBson =
BSON("_id" << _donorSetName << "version" << _configVersion << "term" << _configTerm
<< "members" << _members << "protocolVersion" << 1);
@@ -740,7 +826,6 @@ TEST_F(ReplCoordHBV1SplitConfigTest, RecipientNodeApplyConfig) {
validateNextRequest("", _recipientSetName, 1, 1);
}
-using InNetworkGuard = NetworkInterfaceMock::InNetworkGuard;
TEST_F(ReplCoordHBV1SplitConfigTest, RejectMismatchedSetNameInHeartbeatResponse) {
startUp(_recipientSecondaryNode);
@@ -813,9 +898,9 @@ TEST_F(ReplCoordHBV1SplitConfigTest, RecipientNodeNonZeroVotes) {
getNet()->runReadyNetworkOperations();
// The node rejected the config as it's a voting node and its version has not changed.
- ASSERT_EQ(getReplCoord()->getConfigVersion(), _configVersion);
- ASSERT_EQ(getReplCoord()->getConfigTerm(), _configTerm);
- ASSERT_EQ(getReplCoord()->getSettings().ourSetName(), _donorSetName);
+ auto config = getReplCoord()->getConfig();
+ ASSERT_EQ(config.getConfigVersionAndTerm(), ConfigVersionAndTerm(_configVersion, _configTerm));
+ ASSERT_EQ(config.getReplSetName(), _donorSetName);
}
class ReplCoordHBV1ReconfigTest : public ReplCoordHBV1Test {
diff --git a/src/mongo/db/repl/replication_coordinator_mock.cpp b/src/mongo/db/repl/replication_coordinator_mock.cpp
index bbe14690c7a..31a307a96b0 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.cpp
+++ b/src/mongo/db/repl/replication_coordinator_mock.cpp
@@ -236,11 +236,11 @@ void ReplicationCoordinatorMock::setMyHeartbeatMessage(const std::string& msg) {
}
void ReplicationCoordinatorMock::_setMyLastAppliedOpTimeAndWallTime(
- const OpTimeAndWallTime& opTimeAndWallTime) {
+ WithLock lk, const OpTimeAndWallTime& opTimeAndWallTime) {
_myLastAppliedOpTime = opTimeAndWallTime.opTime;
_myLastAppliedWallTime = opTimeAndWallTime.wallTime;
- setCurrentCommittedSnapshotOpTime(opTimeAndWallTime.opTime);
+ _setCurrentCommittedSnapshotOpTime(lk, opTimeAndWallTime.opTime);
if (auto storageEngine = _service->getStorageEngine()) {
if (auto snapshotManager = storageEngine->getSnapshotManager()) {
@@ -253,7 +253,7 @@ void ReplicationCoordinatorMock::setMyLastAppliedOpTimeAndWallTime(
const OpTimeAndWallTime& opTimeAndWallTime) {
stdx::lock_guard<Mutex> lk(_mutex);
- _setMyLastAppliedOpTimeAndWallTime(opTimeAndWallTime);
+ _setMyLastAppliedOpTimeAndWallTime(lk, opTimeAndWallTime);
}
void ReplicationCoordinatorMock::setMyLastDurableOpTimeAndWallTime(
@@ -269,7 +269,7 @@ void ReplicationCoordinatorMock::setMyLastAppliedOpTimeAndWallTimeForward(
stdx::lock_guard<Mutex> lk(_mutex);
if (opTimeAndWallTime.opTime > _myLastAppliedOpTime) {
- _setMyLastAppliedOpTimeAndWallTime(opTimeAndWallTime);
+ _setMyLastAppliedOpTimeAndWallTime(lk, opTimeAndWallTime);
}
}
@@ -657,11 +657,17 @@ Status ReplicationCoordinatorMock::updateTerm(OperationContext* opCtx, long long
void ReplicationCoordinatorMock::clearCommittedSnapshot() {}
-void ReplicationCoordinatorMock::setCurrentCommittedSnapshotOpTime(OpTime time) {
+void ReplicationCoordinatorMock::_setCurrentCommittedSnapshotOpTime(WithLock lk, OpTime time) {
_currentCommittedSnapshotOpTime = time;
}
+void ReplicationCoordinatorMock::setCurrentCommittedSnapshotOpTime(OpTime time) {
+ stdx::lock_guard<Mutex> lk(_mutex);
+ _setCurrentCommittedSnapshotOpTime(lk, time);
+}
+
OpTime ReplicationCoordinatorMock::getCurrentCommittedSnapshotOpTime() const {
+ stdx::lock_guard<Mutex> lk(_mutex);
return _currentCommittedSnapshotOpTime;
}
diff --git a/src/mongo/db/repl/replication_coordinator_mock.h b/src/mongo/db/repl/replication_coordinator_mock.h
index 3ac7686ea34..dbe7b28ef83 100644
--- a/src/mongo/db/repl/replication_coordinator_mock.h
+++ b/src/mongo/db/repl/replication_coordinator_mock.h
@@ -422,7 +422,9 @@ public:
virtual WriteConcernTagChanges* getWriteConcernTagChanges() override;
private:
- void _setMyLastAppliedOpTimeAndWallTime(const OpTimeAndWallTime& opTimeAndWallTime);
+ void _setMyLastAppliedOpTimeAndWallTime(WithLock lk,
+ const OpTimeAndWallTime& opTimeAndWallTime);
+ void _setCurrentCommittedSnapshotOpTime(WithLock lk, OpTime time);
ServiceContext* const _service;
ReplSettings _settings;
diff --git a/src/mongo/db/repl/roll_back_local_operations_test.cpp b/src/mongo/db/repl/roll_back_local_operations_test.cpp
index b71765e33d3..70421f959e1 100644
--- a/src/mongo/db/repl/roll_back_local_operations_test.cpp
+++ b/src/mongo/db/repl/roll_back_local_operations_test.cpp
@@ -321,7 +321,8 @@ public:
DBClientConnectionForTest(int numInitFailures) : _initFailuresLeft(numInitFailures) {}
std::unique_ptr<DBClientCursor> find(FindCommandRequest findRequest,
- const ReadPreferenceSetting& readPref) override {
+ const ReadPreferenceSetting& readPref,
+ ExhaustMode exhaustMode) override {
if (_initFailuresLeft > 0) {
_initFailuresLeft--;
LOGV2(21657,
diff --git a/src/mongo/db/repl/rollback_source_impl.cpp b/src/mongo/db/repl/rollback_source_impl.cpp
index 9c56b0ff21e..8b427be197c 100644
--- a/src/mongo/db/repl/rollback_source_impl.cpp
+++ b/src/mongo/db/repl/rollback_source_impl.cpp
@@ -94,7 +94,8 @@ std::pair<BSONObj, NamespaceString> RollbackSourceImpl::findOneByUUID(const std:
auto cursor =
std::make_unique<DBClientCursor>(_getConnection(),
std::move(findRequest),
- ReadPreferenceSetting{ReadPreference::SecondaryPreferred});
+ ReadPreferenceSetting{ReadPreference::SecondaryPreferred},
+ false /*isExhaust*/);
uassert(6138500, "find one by UUID failed", cursor->init());
BSONObj result = cursor->more() ? cursor->nextSafe() : BSONObj{};
NamespaceString nss = cursor->getNamespaceString();
diff --git a/src/mongo/db/repl/rs_rollback.cpp b/src/mongo/db/repl/rs_rollback.cpp
index e527aa204eb..8777903803c 100644
--- a/src/mongo/db/repl/rs_rollback.cpp
+++ b/src/mongo/db/repl/rs_rollback.cpp
@@ -949,7 +949,7 @@ void rollbackCreateIndexes(OperationContext* opCtx, UUID uuid, std::set<std::str
"indexName"_attr = indexName);
WriteUnitOfWork wuow(opCtx);
- dropIndex(opCtx, collection.getWritableCollection(), indexName, *nss);
+ dropIndex(opCtx, collection.getWritableCollection(opCtx), indexName, *nss);
wuow.commit();
LOGV2_DEBUG(21673,
@@ -1634,12 +1634,12 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
WriteUnitOfWork wuow(opCtx);
// Set collection to whatever temp status is on the sync source.
- collection.getWritableCollection()->setIsTemp(opCtx, options.temp);
+ collection.getWritableCollection(opCtx)->setIsTemp(opCtx, options.temp);
// Set any document validation options. We update the validator fields without
// parsing/validation, since we fetched the options object directly from the sync
// source, and we should set our validation options to match it exactly.
- auto validatorStatus = collection.getWritableCollection()->updateValidator(
+ auto validatorStatus = collection.getWritableCollection(opCtx)->updateValidator(
opCtx, options.validator, options.validationLevel, options.validationAction);
if (!validatorStatus.isOK()) {
throw RSFatalException(str::stream()
@@ -1811,16 +1811,16 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
// RecordId loc = Helpers::findById(nsd, pattern);
if (!loc.isNull()) {
try {
- writeConflictRetry(opCtx,
- "cappedTruncateAfter",
- collection->ns().ns(),
- [&] {
- WriteUnitOfWork wunit(opCtx);
- collection.getWritableCollection()
- ->cappedTruncateAfter(
- opCtx, loc, true);
- wunit.commit();
- });
+ writeConflictRetry(
+ opCtx,
+ "cappedTruncateAfter",
+ collection->ns().ns(),
+ [&] {
+ WriteUnitOfWork wunit(opCtx);
+ collection.getWritableCollection(opCtx)
+ ->cappedTruncateAfter(opCtx, loc, true);
+ wunit.commit();
+ });
} catch (const DBException& e) {
if (e.code() == 13415) {
// hack: need to just make cappedTruncate do this...
@@ -1828,7 +1828,7 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
opCtx, "truncate", collection->ns().ns(), [&] {
WriteUnitOfWork wunit(opCtx);
uassertStatusOK(
- collection.getWritableCollection()
+ collection.getWritableCollection(opCtx)
->truncate(opCtx));
wunit.commit();
});
@@ -2012,14 +2012,6 @@ void rollback_internal::syncFixUp(OperationContext* opCtx,
validator->resetKeyManagerCache();
}
- // Force the config server to update its shard registry on next access. Otherwise it may have
- // the stale data that has been just rolled back.
- if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- if (auto shardRegistry = Grid::get(opCtx)->shardRegistry()) {
- shardRegistry->clearEntries();
- }
- }
-
// Force the default read/write concern cache to reload on next access in case the defaults
// document was rolled back.
ReadWriteConcernDefaults::get(opCtx).invalidate();
diff --git a/src/mongo/db/repl/storage_interface_impl.cpp b/src/mongo/db/repl/storage_interface_impl.cpp
index 1a90e3a57c8..22d7c7648e4 100644
--- a/src/mongo/db/repl/storage_interface_impl.cpp
+++ b/src/mongo/db/repl/storage_interface_impl.cpp
@@ -50,6 +50,7 @@
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/catalog/document_validation.h"
#include "mongo/db/catalog/index_catalog.h"
+#include "mongo/db/change_stream_change_collection_manager.h"
#include "mongo/db/client.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/concurrency/exception_util.h"
@@ -323,12 +324,6 @@ template <typename AutoGetCollectionType>
StatusWith<const CollectionPtr*> getCollection(const AutoGetCollectionType& autoGetCollection,
const NamespaceStringOrUUID& nsOrUUID,
const std::string& message) {
- if (!autoGetCollection.getDb()) {
- StringData dbName = nsOrUUID.nss() ? nsOrUUID.nss()->db() : nsOrUUID.dbname();
- return {ErrorCodes::NamespaceNotFound,
- str::stream() << "Database [" << dbName << "] not found. " << message};
- }
-
const auto& collection = autoGetCollection.getCollection();
if (!collection) {
return {ErrorCodes::NamespaceNotFound,
@@ -347,6 +342,8 @@ Status insertDocumentsSingleBatch(OperationContext* opCtx,
boost::optional<AutoGetOplog> autoOplog;
const CollectionPtr* collection;
+ bool shouldWriteToChangeCollections = false;
+
auto nss = nsOrUUID.nss();
if (nss && nss->isOplog()) {
// Simplify locking rules for oplog collection.
@@ -355,6 +352,9 @@ Status insertDocumentsSingleBatch(OperationContext* opCtx,
if (!*collection) {
return {ErrorCodes::NamespaceNotFound, "Oplog collection does not exist"};
}
+
+ shouldWriteToChangeCollections =
+ ChangeStreamChangeCollectionManager::isChangeCollectionsModeActive();
} else {
autoColl.emplace(opCtx, nsOrUUID, MODE_IX);
auto collectionResult = getCollection(
@@ -371,6 +371,18 @@ Status insertDocumentsSingleBatch(OperationContext* opCtx,
if (!status.isOK()) {
return status;
}
+
+ // Insert oplog entries to change collections if we are running in the serverless and the 'nss'
+ // is 'local.oplog.rs'.
+ if (shouldWriteToChangeCollections) {
+ auto& changeCollectionManager = ChangeStreamChangeCollectionManager::get(opCtx);
+ status = changeCollectionManager.insertDocumentsToChangeCollection(
+ opCtx, begin, end, nullOpDebug);
+ if (!status.isOK()) {
+ return status;
+ }
+ }
+
wunit.commit();
return Status::OK();
diff --git a/src/mongo/db/repl/storage_interface_impl_test.cpp b/src/mongo/db/repl/storage_interface_impl_test.cpp
index 3c942ed7361..14362a56821 100644
--- a/src/mongo/db/repl/storage_interface_impl_test.cpp
+++ b/src/mongo/db/repl/storage_interface_impl_test.cpp
@@ -2684,7 +2684,6 @@ TEST_F(StorageInterfaceImplTest,
auto doc = BSON("_id" << 0 << "x" << 1);
auto status = storage.upsertById(opCtx, nss, doc["_id"], doc);
ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, status);
- ASSERT_EQUALS("Database [nosuchdb] not found. Unable to update document.", status.reason());
}
TEST_F(StorageInterfaceImplTest,
@@ -2879,10 +2878,6 @@ TEST_F(StorageInterfaceImplTest, DeleteByFilterReturnsNamespaceNotFoundWhenDatab
auto filter = BSON("x" << 1);
auto status = storage.deleteByFilter(opCtx, nss, filter);
ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, status);
- ASSERT_EQUALS(std::string(str::stream()
- << "Database [nosuchdb] not found. Unable to delete documents in "
- << nss.ns() << " using filter " << filter),
- status.reason());
}
TEST_F(StorageInterfaceImplTest, DeleteByFilterReturnsBadValueWhenFilterContainsUnknownOperator) {
diff --git a/src/mongo/db/repl/storage_timestamp_test.cpp b/src/mongo/db/repl/storage_timestamp_test.cpp
index cc0f88d0779..fb9325c1978 100644
--- a/src/mongo/db/repl/storage_timestamp_test.cpp
+++ b/src/mongo/db/repl/storage_timestamp_test.cpp
@@ -162,7 +162,7 @@ Status createIndexFromSpec(OperationContext* opCtx,
}
WriteUnitOfWork wunit(opCtx);
ASSERT_OK(indexer.commit(opCtx,
- collection.getWritableCollection(),
+ collection.getWritableCollection(opCtx),
MultiIndexBlock::kNoopOnCreateEachFn,
MultiIndexBlock::kNoopOnCommitFn));
LogicalTime indexTs = clock->tickClusterTime(1);
@@ -394,7 +394,7 @@ public:
// Timestamping index completion. Primaries write an oplog entry.
ASSERT_OK(
indexer.commit(_opCtx,
- coll.getWritableCollection(),
+ coll.getWritableCollection(_opCtx),
[&](const BSONObj& indexSpec) {
_opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
_opCtx, coll->ns(), coll->uuid(), indexSpec, false);
@@ -2787,7 +2787,7 @@ TEST_F(StorageTimestampTest, IndexBuildsResolveErrorsDuringStateChangeToPrimary)
WriteUnitOfWork wuow(_opCtx);
ASSERT_OK(
indexer.commit(_opCtx,
- collection.getWritableCollection(),
+ collection.getWritableCollection(_opCtx),
[&](const BSONObj& indexSpec) {
_opCtx->getServiceContext()->getOpObserver()->onCreateIndex(
_opCtx, collection->ns(), collection->uuid(), indexSpec, false);
diff --git a/src/mongo/db/repl/tenant_collection_cloner.cpp b/src/mongo/db/repl/tenant_collection_cloner.cpp
index 9e6d5f7e02a..165538954bd 100644
--- a/src/mongo/db/repl/tenant_collection_cloner.cpp
+++ b/src/mongo/db/repl/tenant_collection_cloner.cpp
@@ -474,36 +474,42 @@ BaseCloner::AfterStageBehavior TenantCollectionCloner::queryStage() {
}
void TenantCollectionCloner::runQuery() {
- const BSONObj& filter = _lastDocId.isEmpty()
- ? BSONObj{} // Use $expr and the aggregation version of $gt to avoid type bracketing.
- : BSON("$expr" << BSON("$gt" << BSON_ARRAY("$_id" << _lastDocId["_id"])));
-
- auto query = _collectionOptions.clusteredIndex
- // RecordIds are _id values and has no separate _id index
- ? Query().hint(BSON("$natural" << 1))
- : Query().hint(BSON("_id" << 1));
-
-
- // Any errors that are thrown here (including NamespaceNotFound) will be handled on the stage
- // level.
- getClient()->query_DEPRECATED(
- [this](DBClientCursorBatchIterator& iter) { handleNextBatch(iter); },
- _sourceDbAndUuid,
- filter,
- query,
- nullptr /* fieldsToReturn */,
- QueryOption_NoCursorTimeout | QueryOption_SecondaryOk |
- (collectionClonerUsesExhaust ? QueryOption_Exhaust : 0),
- _collectionClonerBatchSize,
- ReadConcernArgs(ReadConcernLevel::kMajorityReadConcern).toBSONInner());
+ FindCommandRequest findCmd{_sourceDbAndUuid};
+
+ findCmd.setFilter(
+ _lastDocId.isEmpty()
+ ? BSONObj{} // Use $expr and the aggregation version of $gt to avoid type bracketing.
+ : BSON("$expr" << BSON("$gt" << BSON_ARRAY("$_id" << _lastDocId["_id"]))));
+
+ if (_collectionOptions.clusteredIndex) {
+ findCmd.setHint(BSON("$natural" << 1));
+ } else {
+ findCmd.setHint(BSON("_id" << 1));
+ }
+
+ findCmd.setNoCursorTimeout(true);
+ findCmd.setReadConcern(ReadConcernArgs(ReadConcernLevel::kMajorityReadConcern).toBSONInner());
+ if (_collectionClonerBatchSize) {
+ findCmd.setBatchSize(_collectionClonerBatchSize);
+ }
+
+ ExhaustMode exhaustMode = collectionClonerUsesExhaust ? ExhaustMode::kOn : ExhaustMode::kOff;
+
+ auto cursor = getClient()->find(
+ std::move(findCmd), ReadPreferenceSetting{ReadPreference::SecondaryPreferred}, exhaustMode);
+
+ // Process the results of the cursor one batch at a time.
+ while (cursor->more()) {
+ handleNextBatch(*cursor);
+ }
}
-void TenantCollectionCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
+void TenantCollectionCloner::handleNextBatch(DBClientCursor& cursor) {
{
stdx::lock_guard<Latch> lk(_mutex);
_stats.receivedBatches++;
- while (iter.moreInCurrentBatch()) {
- _documentsToInsert.emplace_back(iter.nextSafe());
+ while (cursor.moreInCurrentBatch()) {
+ _documentsToInsert.emplace_back(cursor.nextSafe());
}
}
diff --git a/src/mongo/db/repl/tenant_collection_cloner.h b/src/mongo/db/repl/tenant_collection_cloner.h
index b9c22928917..12bd9bbb832 100644
--- a/src/mongo/db/repl/tenant_collection_cloner.h
+++ b/src/mongo/db/repl/tenant_collection_cloner.h
@@ -209,10 +209,10 @@ private:
AfterStageBehavior queryStage();
/**
- * Put all results from a query batch into a buffer to be inserted, and schedule
- * it to be inserted.
+ * Put all results from a query batch into a buffer to be inserted, and schedule it to be
+ * inserted.
*/
- void handleNextBatch(DBClientCursorBatchIterator& iter);
+ void handleNextBatch(DBClientCursor& cursor);
/**
* Called whenever there is a new batch of documents ready from the DBClientConnection.
diff --git a/src/mongo/db/repl/tenant_file_cloner.cpp b/src/mongo/db/repl/tenant_file_cloner.cpp
index 83ae3c65fc8..b909039eed1 100644
--- a/src/mongo/db/repl/tenant_file_cloner.cpp
+++ b/src/mongo/db/repl/tenant_file_cloner.cpp
@@ -188,8 +188,7 @@ void TenantFileCloner::runQuery() {
getClient(), std::move(aggRequest), true /* secondaryOk */, useExhaust));
try {
while (cursor->more()) {
- DBClientCursorBatchIterator iter(*cursor);
- handleNextBatch(iter);
+ handleNextBatch(*cursor);
}
} catch (const DBException& e) {
// We cannot continue after an error when processing exhaust cursors. Instead we must
@@ -207,7 +206,7 @@ void TenantFileCloner::runQuery() {
}
}
-void TenantFileCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
+void TenantFileCloner::handleNextBatch(DBClientCursor& cursor) {
LOGV2_DEBUG(6113307,
3,
"TenantFileCloner handleNextBatch",
@@ -215,7 +214,7 @@ void TenantFileCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
"backupId"_attr = _backupId,
"remoteFile"_attr = _remoteFileName,
"fileOffset"_attr = getFileOffset(),
- "moreInCurrentBatch"_attr = iter.moreInCurrentBatch());
+ "moreInCurrentBatch"_attr = cursor.moreInCurrentBatch());
{
stdx::lock_guard<TenantMigrationSharedData> lk(*getSharedData());
if (!getSharedData()->getStatus(lk).isOK()) {
@@ -225,11 +224,11 @@ void TenantFileCloner::handleNextBatch(DBClientCursorBatchIterator& iter) {
str::stream() << message << ": " << getSharedData()->getStatus(lk));
}
}
- while (iter.moreInCurrentBatch()) {
+ while (cursor.moreInCurrentBatch()) {
stdx::lock_guard<Latch> lk(_mutex);
_stats.receivedBatches++;
- while (iter.moreInCurrentBatch()) {
- _dataToWrite.emplace_back(iter.nextSafe());
+ while (cursor.moreInCurrentBatch()) {
+ _dataToWrite.emplace_back(cursor.nextSafe());
}
}
diff --git a/src/mongo/db/repl/tenant_file_cloner.h b/src/mongo/db/repl/tenant_file_cloner.h
index 90e37946224..27ff89fbc3a 100644
--- a/src/mongo/db/repl/tenant_file_cloner.h
+++ b/src/mongo/db/repl/tenant_file_cloner.h
@@ -160,7 +160,7 @@ private:
/**
* Put all results from a query batch into a buffer, and schedule it to be written to disk.
*/
- void handleNextBatch(DBClientCursorBatchIterator& iter);
+ void handleNextBatch(DBClientCursor& cursor);
/**
* Called whenever there is a new batch of documents ready from the DBClientConnection.
diff --git a/src/mongo/db/repl/tenant_file_importer_service.cpp b/src/mongo/db/repl/tenant_file_importer_service.cpp
index 85d95d7e22d..af565c3c713 100644
--- a/src/mongo/db/repl/tenant_file_importer_service.cpp
+++ b/src/mongo/db/repl/tenant_file_importer_service.cpp
@@ -118,14 +118,21 @@ TenantFileImporterService* TenantFileImporterService::get(ServiceContext* servic
void TenantFileImporterService::startMigration(const UUID& migrationId,
const StringData& donorConnectionString) {
stdx::lock_guard lk(_mutex);
+ if (migrationId == _migrationId && _state >= State::kStarted && _state < State::kInterrupted) {
+ return;
+ }
+
_reset(lk);
_migrationId = migrationId;
_donorConnectionString = donorConnectionString.toString();
- _eventQueue = std::make_unique<Queue>();
- _state.setState(ImporterState::State::kStarted);
+ _eventQueue = std::make_shared<Queue>();
+ _state = State::kStarted;
- _thread = std::make_unique<stdx::thread>([this] {
+ _thread = std::make_unique<stdx::thread>([this, migrationId] {
Client::initThread("TenantFileImporterService");
+ LOGV2_INFO(6378904,
+ "TenantFileImporterService starting worker thread",
+ "migrationId"_attr = migrationId.toString());
auto opCtx = cc().makeOperationContext();
_handleEvents(opCtx.get());
});
@@ -134,48 +141,55 @@ void TenantFileImporterService::startMigration(const UUID& migrationId,
void TenantFileImporterService::learnedFilename(const UUID& migrationId,
const BSONObj& metadataDoc) {
stdx::lock_guard lk(_mutex);
+ if (migrationId == _migrationId && _state >= State::kLearnedAllFilenames) {
+ return;
+ }
+
tassert(8423347,
"Called learnedFilename with migrationId {}, but {} is active"_format(
migrationId.toString(), _migrationId ? _migrationId->toString() : "no migration"),
migrationId == _migrationId);
- _state.setState(ImporterState::State::kLearnedFilename);
+ _state = State::kLearnedFilename;
ImporterEvent event{ImporterEvent::Type::kLearnedFileName, migrationId};
event.metadataDoc = metadataDoc.getOwned();
+ invariant(_eventQueue);
auto success = _eventQueue->tryPush(std::move(event));
- uassert(6378904,
+ uassert(6378903,
"TenantFileImporterService failed to push '{}' event without blocking"_format(
- _state.toString()),
+ stateToString(_state)),
success);
}
void TenantFileImporterService::learnedAllFilenames(const UUID& migrationId) {
stdx::lock_guard lk(_mutex);
+ if (migrationId == _migrationId && _state >= State::kLearnedAllFilenames) {
+ return;
+ }
+
tassert(8423345,
"Called learnedAllFilenames with migrationId {}, but {} is active"_format(
migrationId.toString(), _migrationId ? _migrationId->toString() : "no migration"),
migrationId == _migrationId);
- _state.setState(ImporterState::State::kLearnedAllFilenames);
+ _state = State::kLearnedAllFilenames;
+ invariant(_eventQueue);
auto success = _eventQueue->tryPush({ImporterEvent::Type::kLearnedAllFilenames, migrationId});
- uassert(6378905,
+ uassert(6378902,
"TenantFileImporterService failed to push '{}' event without blocking"_format(
- _state.toString()),
+ stateToString(_state)),
success);
}
void TenantFileImporterService::interrupt(const UUID& migrationId) {
stdx::lock_guard lk(_mutex);
- if (!_migrationId) {
- return;
- }
if (migrationId != _migrationId) {
LOGV2_WARNING(
- 6378907,
+ 6378901,
"Called interrupt with migrationId {migrationId}, but {activeMigrationId} is active",
"migrationId"_attr = migrationId.toString(),
- "activeMigrationId"_attr = _migrationId->toString());
+ "activeMigrationId"_attr = _migrationId ? _migrationId->toString() : "no migration");
return;
}
_interrupt(lk);
@@ -195,8 +209,11 @@ void TenantFileImporterService::_handleEvents(OperationContext* opCtx) {
std::string donorConnectionString;
boost::optional<UUID> migrationId;
+ std::shared_ptr<Queue> eventQueueRef;
{
stdx::lock_guard lk(_mutex);
+ invariant(_eventQueue);
+ eventQueueRef = _eventQueue;
donorConnectionString = _donorConnectionString;
migrationId = _migrationId;
}
@@ -206,9 +223,9 @@ void TenantFileImporterService::_handleEvents(OperationContext* opCtx) {
opCtx->checkForInterrupt();
try {
- event = _eventQueue->pop(opCtx);
+ event = eventQueueRef->pop(opCtx);
} catch (const ExceptionFor<ErrorCodes::ProducerConsumerQueueEndClosed>& err) {
- LOGV2_WARNING(6378908, "Event queue was interrupted", "error"_attr = err);
+ LOGV2_WARNING(6378900, "Event queue was interrupted", "error"_attr = err);
break;
}
@@ -259,7 +276,7 @@ void TenantFileImporterService::_voteImportedFiles(OperationContext* opCtx) {
}
void TenantFileImporterService::_interrupt(WithLock) {
- if (_state.is(ImporterState::State::kInterrupted)) {
+ if (_state == State::kInterrupted) {
return;
}
@@ -276,11 +293,16 @@ void TenantFileImporterService::_interrupt(WithLock) {
// _opCtx->markKilled(ErrorCodes::Interrupted);
}
- _state.setState(ImporterState::State::kInterrupted);
+ _state = State::kInterrupted;
}
void TenantFileImporterService::_reset(WithLock) {
- _migrationId.reset();
+ if (_migrationId) {
+ LOGV2_INFO(6378905,
+ "TenantFileImporterService resetting migration",
+ "migrationId"_attr = _migrationId->toString());
+ _migrationId.reset();
+ }
if (_thread && _thread->joinable()) {
_thread->join();
@@ -292,6 +314,6 @@ void TenantFileImporterService::_reset(WithLock) {
}
// TODO SERVER-66907: how should we be resetting _opCtx?
- _state.setState(ImporterState::State::kUninitialized);
+ _state = State::kUninitialized;
}
} // namespace mongo::repl
diff --git a/src/mongo/db/repl/tenant_file_importer_service.h b/src/mongo/db/repl/tenant_file_importer_service.h
index 9a27af816da..d7188f9a0e6 100644
--- a/src/mongo/db/repl/tenant_file_importer_service.h
+++ b/src/mongo/db/repl/tenant_file_importer_service.h
@@ -82,75 +82,35 @@ private:
boost::optional<UUID> _migrationId;
std::string _donorConnectionString;
Mutex _mutex = MONGO_MAKE_LATCH("TenantFileImporterService::_mutex");
- class ImporterState {
- public:
- enum class State {
- kUninitialized,
- kStarted,
- kLearnedFilename,
- kLearnedAllFilenames,
- kInterrupted
- };
- void setState(State nextState) {
- tassert(6114403,
- str::stream() << "current state: " << toString(_state)
- << ", new state: " << toString(nextState),
- isValidTransition(nextState));
- _state = nextState;
- }
-
- bool is(State state) const {
- return _state == state;
- }
-
- StringData toString() const {
- return toString(_state);
- }
- private:
- static StringData toString(State value) {
- switch (value) {
- case State::kUninitialized:
- return "uninitialized";
- case State::kStarted:
- return "started";
- case State::kLearnedFilename:
- return "learned filename";
- case State::kLearnedAllFilenames:
- return "learned all filenames";
- case State::kInterrupted:
- return "interrupted";
- }
- MONGO_UNREACHABLE;
- return StringData();
- }
+ // Explicit State enum ordering defined here because we rely on comparison
+ // operators for state checking in various TenantFileImporterService methods.
+ enum class State {
+ kUninitialized = 0,
+ kStarted = 1,
+ kLearnedFilename = 2,
+ kLearnedAllFilenames = 3,
+ kInterrupted = 4
+ };
- bool isValidTransition(State newState) {
- if (_state == newState) {
- return true;
- }
-
- switch (_state) {
- case State::kUninitialized:
- return newState == State::kStarted || newState == State::kInterrupted;
- case State::kStarted:
- return newState == State::kInterrupted || newState == State::kLearnedFilename ||
- newState == State::kLearnedAllFilenames;
- case State::kLearnedFilename:
- return newState == State::kInterrupted || newState == State::kLearnedFilename ||
- newState == State::kLearnedAllFilenames;
- case State::kLearnedAllFilenames:
- return newState == State::kInterrupted;
- case State::kInterrupted:
- return newState == State::kUninitialized || newState == State::kStarted;
- }
- MONGO_UNREACHABLE;
+ static StringData stateToString(State state) {
+ switch (state) {
+ case State::kUninitialized:
+ return "uninitialized";
+ case State::kStarted:
+ return "started";
+ case State::kLearnedFilename:
+ return "learned filename";
+ case State::kLearnedAllFilenames:
+ return "learned all filenames";
+ case State::kInterrupted:
+ return "interrupted";
}
+ MONGO_UNREACHABLE;
+ return StringData();
+ }
- State _state = State::kUninitialized;
- };
-
- ImporterState _state;
+ State _state;
struct ImporterEvent {
enum class Type { kNone, kLearnedFileName, kLearnedAllFilenames };
@@ -166,6 +126,6 @@ private:
MultiProducerSingleConsumerQueue<ImporterEvent,
producer_consumer_queue_detail::DefaultCostFunction>;
- std::unique_ptr<Queue> _eventQueue;
+ std::shared_ptr<Queue> _eventQueue;
};
} // namespace mongo::repl
diff --git a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
index 53e7b24f135..fc693f64c20 100644
--- a/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
+++ b/src/mongo/db/repl/tenant_migration_access_blocker_util.cpp
@@ -437,7 +437,7 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
// Recover TenantMigrationDonorAccessBlockers for ShardSplit.
PersistentTaskStore<ShardSplitDonorDocument> shardSplitDonorStore(
- NamespaceString::kTenantSplitDonorsNamespace);
+ NamespaceString::kShardSplitDonorsNamespace);
shardSplitDonorStore.forEach(opCtx, {}, [&](const ShardSplitDonorDocument& doc) {
// Skip creating a TenantMigrationDonorAccessBlocker for terminal shard split that have been
@@ -462,6 +462,8 @@ void recoverTenantMigrationAccessBlockers(OperationContext* opCtx) {
.add(tenantId.toString(), mtab);
switch (doc.getState()) {
+ case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+ break;
case ShardSplitDonorStateEnum::kBlocking:
invariant(doc.getBlockTimestamp());
mtab->startBlockingWrites();
diff --git a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
index 34700086793..4cfdb60b43c 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_op_observer.cpp
@@ -282,11 +282,11 @@ void TenantMigrationRecipientOpObserver::onDelete(OperationContext* opCtx,
if (nss == NamespaceString::kTenantMigrationRecipientsNamespace &&
!tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
if (tenantIdToDeleteDecoration(opCtx)) {
+ auto tenantId = tenantIdToDeleteDecoration(opCtx).get();
LOGV2_INFO(8423337, "Removing expired 'multitenant migration' migration");
- opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) {
+ opCtx->recoveryUnit()->onCommit([opCtx, tenantId](boost::optional<Timestamp>) {
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
- .remove(tenantIdToDeleteDecoration(opCtx).get(),
- TenantMigrationAccessBlocker::BlockerType::kRecipient);
+ .remove(tenantId, TenantMigrationAccessBlocker::BlockerType::kRecipient);
});
}
@@ -297,8 +297,7 @@ void TenantMigrationRecipientOpObserver::onDelete(OperationContext* opCtx,
"migrationId"_attr = migrationId);
opCtx->recoveryUnit()->onCommit([opCtx, migrationId](boost::optional<Timestamp>) {
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
- .removeRecipientAccessBlockersForMigration(
- migrationIdToDeleteDecoration(opCtx).get());
+ .removeRecipientAccessBlockersForMigration(migrationId);
repl::TenantFileImporterService::get(opCtx->getServiceContext())
->interrupt(migrationId);
});
diff --git a/src/mongo/db/repl/tenant_migration_recipient_service.cpp b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
index facaf190ab8..f355b7a3ac6 100644
--- a/src/mongo/db/repl/tenant_migration_recipient_service.cpp
+++ b/src/mongo/db/repl/tenant_migration_recipient_service.cpp
@@ -43,6 +43,7 @@
#include "mongo/db/commands/tenant_migration_donor_cmds_gen.h"
#include "mongo/db/commands/test_commands_enabled.h"
#include "mongo/db/concurrency/exception_util.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/namespace_string.h"
@@ -213,7 +214,7 @@ public:
// Tenant migration does not require the metadata from the oplog query.
void processMetadata(const rpc::ReplSetMetadata& replMetadata,
- rpc::OplogQueryMetadata oqMetadata) final {}
+ const rpc::OplogQueryMetadata& oqMetadata) final {}
// Tenant migration does not change sync source depending on metadata.
ChangeSyncSourceAction shouldStopFetching(const HostAndPort& source,
@@ -2516,7 +2517,8 @@ void TenantMigrationRecipientService::Instance::_startOplogApplier() {
}
void TenantMigrationRecipientService::Instance::_setup() {
- auto opCtx = cc().makeOperationContext();
+ auto uniqueOpCtx = cc().makeOperationContext();
+ auto opCtx = uniqueOpCtx.get();
{
stdx::lock_guard lk(_mutex);
// Do not set the internal states if the migration is already interrupted.
@@ -2543,12 +2545,23 @@ void TenantMigrationRecipientService::Instance::_setup() {
_sharedData = std::make_unique<TenantMigrationSharedData>(
getGlobalServiceContext()->getFastClockSource(), getMigrationUUID(), resumePhase);
- _createOplogBuffer(lk, opCtx.get());
+ _createOplogBuffer(lk, opCtx);
}
// Start the oplog buffer outside the mutex to avoid deadlock on a concurrent stepdown.
try {
- _donorOplogBuffer->startup(opCtx.get());
+ // It is illegal to start the replicated donor buffer when the node is not primary.
+ // So ensure we are primary before trying to startup the oplog buffer.
+ repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+
+ auto oplogBufferNS = getOplogBufferNs(getMigrationUUID());
+ if (!repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(
+ opCtx, oplogBufferNS.db())) {
+ uassertStatusOK(
+ Status(ErrorCodes::NotWritablePrimary, "Recipient node is no longer a primary."));
+ }
+
+ _donorOplogBuffer->startup(opCtx);
} catch (DBException& ex) {
ex.addContext("Failed to create oplog buffer collection.");
throw;
diff --git a/src/mongo/db/repl/tenant_oplog_applier_test.cpp b/src/mongo/db/repl/tenant_oplog_applier_test.cpp
index 4215b04043a..864960d84d7 100644
--- a/src/mongo/db/repl/tenant_oplog_applier_test.cpp
+++ b/src/mongo/db/repl/tenant_oplog_applier_test.cpp
@@ -201,10 +201,14 @@ private:
logv2::LogComponent::kTenantMigration, logv2::LogSeverity::Debug(1)};
};
+// TODO SERVER-67155 Remove all calls to DatabaseName::toStringWithTenantId() once the OplogEntry
+// deserializer passes "tid" to the NamespaceString constructor
TEST_F(TenantOplogApplierTest, NoOpsForSingleBatch) {
std::vector<OplogEntry> srcOps;
- srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
- srcOps.push_back(makeInsertOplogEntry(2, NamespaceString(_dbName, "bar"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 2, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
pushOps(srcOps);
auto writerPool = makeTenantMigrationWriterPool();
@@ -235,7 +239,8 @@ TEST_F(TenantOplogApplierTest, NoOpsForLargeBatch) {
std::vector<OplogEntry> srcOps;
// This should be big enough to use several threads to do the writing
for (int i = 0; i < 64; i++) {
- srcOps.push_back(makeInsertOplogEntry(i + 1, NamespaceString(_dbName, "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ i + 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
}
pushOps(srcOps);
@@ -266,10 +271,14 @@ TEST_F(TenantOplogApplierTest, NoOpsForLargeBatch) {
TEST_F(TenantOplogApplierTest, NoOpsForMultipleBatches) {
std::vector<OplogEntry> srcOps;
- srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
- srcOps.push_back(makeInsertOplogEntry(2, NamespaceString(_dbName, "bar"), UUID::gen()));
- srcOps.push_back(makeInsertOplogEntry(3, NamespaceString(_dbName, "baz"), UUID::gen()));
- srcOps.push_back(makeInsertOplogEntry(4, NamespaceString(_dbName, "bif"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 2, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 3, NamespaceString(_dbName.toStringWithTenantId(), "baz"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 4, NamespaceString(_dbName.toStringWithTenantId(), "bif"), UUID::gen()));
auto writerPool = makeTenantMigrationWriterPool();
@@ -305,14 +314,20 @@ TEST_F(TenantOplogApplierTest, NoOpsForMultipleBatches) {
TEST_F(TenantOplogApplierTest, NoOpsForLargeTransaction) {
std::vector<OplogEntry> innerOps1;
- innerOps1.push_back(makeInsertOplogEntry(11, NamespaceString(_dbName, "bar"), UUID::gen()));
- innerOps1.push_back(makeInsertOplogEntry(12, NamespaceString(_dbName, "bar"), UUID::gen()));
+ innerOps1.push_back(makeInsertOplogEntry(
+ 11, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+ innerOps1.push_back(makeInsertOplogEntry(
+ 12, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
std::vector<OplogEntry> innerOps2;
- innerOps2.push_back(makeInsertOplogEntry(21, NamespaceString(_dbName, "bar"), UUID::gen()));
- innerOps2.push_back(makeInsertOplogEntry(22, NamespaceString(_dbName, "bar"), UUID::gen()));
+ innerOps2.push_back(makeInsertOplogEntry(
+ 21, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+ innerOps2.push_back(makeInsertOplogEntry(
+ 22, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
std::vector<OplogEntry> innerOps3;
- innerOps3.push_back(makeInsertOplogEntry(31, NamespaceString(_dbName, "bar"), UUID::gen()));
- innerOps3.push_back(makeInsertOplogEntry(32, NamespaceString(_dbName, "bar"), UUID::gen()));
+ innerOps3.push_back(makeInsertOplogEntry(
+ 31, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
+ innerOps3.push_back(makeInsertOplogEntry(
+ 32, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen()));
// Makes entries with ts from range [2, 5).
std::vector<OplogEntry> srcOps = makeMultiEntryTransactionOplogEntries(
@@ -353,7 +368,7 @@ TEST_F(TenantOplogApplierTest, CommitUnpreparedTransaction_DataPartiallyApplied)
client.createIndexes(NamespaceString::kSessionTransactionsTableNamespace.ns(),
{MongoDSessionCatalog::getConfigTxnPartialIndexSpec()});
}
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto lsid = makeLogicalSessionId(_opCtx.get());
TxnNumber txnNum(0);
@@ -411,7 +426,8 @@ TEST_F(TenantOplogApplierTest, CommitUnpreparedTransaction_DataPartiallyApplied)
}
TEST_F(TenantOplogApplierTest, ApplyInsert_DatabaseMissing) {
- auto entry = makeInsertOplogEntry(1, NamespaceString(_dbName, "bar"), UUID::gen());
+ auto entry = makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
bool onInsertsCalled = false;
_opObserver->onInsertsFn = [&](OperationContext* opCtx,
const NamespaceString&,
@@ -439,7 +455,8 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_DatabaseMissing) {
TEST_F(TenantOplogApplierTest, ApplyInsert_CollectionMissing) {
createDatabase(_opCtx.get(), _dbName.toString());
- auto entry = makeInsertOplogEntry(1, NamespaceString(_dbName, "bar"), UUID::gen());
+ auto entry = makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
bool onInsertsCalled = false;
_opObserver->onInsertsFn = [&](OperationContext* opCtx,
const NamespaceString&,
@@ -466,7 +483,7 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_CollectionMissing) {
}
TEST_F(TenantOplogApplierTest, ApplyInsert_InsertExisting) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
ASSERT_OK(getStorageInterface()->insertDocument(_opCtx.get(),
nss,
@@ -504,7 +521,7 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_InsertExisting) {
}
TEST_F(TenantOplogApplierTest, ApplyInsert_UniqueKey_InsertExisting) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
// Create unique key index on the collection.
@@ -545,7 +562,7 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_UniqueKey_InsertExisting) {
}
TEST_F(TenantOplogApplierTest, ApplyInsert_Success) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto entry = makeInsertOplogEntry(1, nss, uuid);
bool onInsertsCalled = false;
@@ -553,7 +570,9 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_Success) {
[&](OperationContext* opCtx, const NamespaceString& nss, const std::vector<BSONObj>& docs) {
ASSERT_FALSE(onInsertsCalled);
onInsertsCalled = true;
- ASSERT_EQUALS(nss.db(), _dbName.toString());
+ // TODO Check that (nss.dbName() == _dbName) once the OplogEntry deserializer passes
+ // "tid" to the NamespaceString constructor
+ ASSERT_EQUALS(nss.dbName().db(), _dbName.toStringWithTenantId());
ASSERT_EQUALS(nss.coll(), "bar");
ASSERT_EQUALS(1, docs.size());
ASSERT_BSONOBJ_EQ(docs[0], entry.getObject());
@@ -581,9 +600,9 @@ TEST_F(TenantOplogApplierTest, ApplyInsert_Success) {
TEST_F(TenantOplogApplierTest, ApplyInserts_Grouped) {
// TODO(SERVER-50256): remove nss_workaround, which is used to work around a bug where
// the first operation assigned to a worker cannot be grouped.
- NamespaceString nss_workaround(_dbName, "a");
- NamespaceString nss1(_dbName, "bar");
- NamespaceString nss2(_dbName, "baz");
+ NamespaceString nss_workaround(_dbName.toStringWithTenantId(), "a");
+ NamespaceString nss1(_dbName.toStringWithTenantId(), "bar");
+ NamespaceString nss2(_dbName.toStringWithTenantId(), "baz");
auto uuid1 = createCollectionWithUuid(_opCtx.get(), nss1);
auto uuid2 = createCollectionWithUuid(_opCtx.get(), nss2);
std::vector<OplogEntry> entries;
@@ -641,7 +660,7 @@ TEST_F(TenantOplogApplierTest, ApplyInserts_Grouped) {
}
TEST_F(TenantOplogApplierTest, ApplyUpdate_MissingDocument) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto entry = makeOplogEntry(
repl::OpTypeEnum::kUpdate, nss, uuid, BSON("$set" << BSON("a" << 1)), BSON("_id" << 0));
@@ -676,7 +695,7 @@ TEST_F(TenantOplogApplierTest, ApplyUpdate_MissingDocument) {
}
TEST_F(TenantOplogApplierTest, ApplyUpdate_Success) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
ASSERT_OK(getStorageInterface()->insertDocument(_opCtx.get(), nss, {BSON("_id" << 0)}, 0));
auto entry = makeOplogEntry(
@@ -708,7 +727,8 @@ TEST_F(TenantOplogApplierTest, ApplyUpdate_Success) {
}
TEST_F(TenantOplogApplierTest, ApplyDelete_DatabaseMissing) {
- auto entry = makeOplogEntry(OpTypeEnum::kDelete, NamespaceString(_dbName, "bar"), UUID::gen());
+ auto entry = makeOplogEntry(
+ OpTypeEnum::kDelete, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
bool onDeleteCalled = false;
_opObserver->onDeleteFn = [&](OperationContext* opCtx,
const NamespaceString&,
@@ -738,7 +758,8 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_DatabaseMissing) {
TEST_F(TenantOplogApplierTest, ApplyDelete_CollectionMissing) {
createDatabase(_opCtx.get(), _dbName.toString());
- auto entry = makeOplogEntry(OpTypeEnum::kDelete, NamespaceString(_dbName, "bar"), UUID::gen());
+ auto entry = makeOplogEntry(
+ OpTypeEnum::kDelete, NamespaceString(_dbName.toStringWithTenantId(), "bar"), UUID::gen());
bool onDeleteCalled = false;
_opObserver->onDeleteFn = [&](OperationContext* opCtx,
const NamespaceString&,
@@ -767,7 +788,7 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_CollectionMissing) {
}
TEST_F(TenantOplogApplierTest, ApplyDelete_DocumentMissing) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto entry = makeOplogEntry(OpTypeEnum::kDelete, nss, uuid, BSON("_id" << 0));
bool onDeleteCalled = false;
@@ -798,7 +819,7 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_DocumentMissing) {
}
TEST_F(TenantOplogApplierTest, ApplyDelete_Success) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
ASSERT_OK(getStorageInterface()->insertDocument(_opCtx.get(), nss, {BSON("_id" << 0)}, 0));
auto entry = makeOplogEntry(OpTypeEnum::kDelete, nss, uuid, BSON("_id" << 0));
@@ -814,7 +835,9 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_Success) {
ASSERT_TRUE(opCtx->lockState()->isCollectionLockedForMode(nss, MODE_IX));
ASSERT_TRUE(opCtx->writesAreReplicated());
ASSERT_FALSE(args.fromMigrate);
- ASSERT_EQUALS(nss.db(), _dbName.toString());
+ // TODO SERVER-66708 Check that (nss.dbName() == _dbName) once the OplogEntry deserializer
+ // passes "tid" to the NamespaceString constructor
+ ASSERT_EQUALS(nss.dbName().db(), _dbName.toStringWithTenantId());
ASSERT_EQUALS(nss.coll(), "bar");
ASSERT_EQUALS(uuid, observer_uuid);
};
@@ -839,7 +862,7 @@ TEST_F(TenantOplogApplierTest, ApplyDelete_Success) {
}
TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_CollExisting) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto op = BSON("op"
<< "c"
@@ -874,8 +897,8 @@ TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_CollExisting) {
}
TEST_F(TenantOplogApplierTest, ApplyRenameCollCommand_CollExisting) {
- NamespaceString nss1(_dbName, "foo");
- NamespaceString nss2(_dbName, "bar");
+ NamespaceString nss1(_dbName.toStringWithTenantId(), "foo");
+ NamespaceString nss2(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss2);
auto op =
BSON("op"
@@ -914,7 +937,7 @@ TEST_F(TenantOplogApplierTest, ApplyRenameCollCommand_CollExisting) {
}
TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_Success) {
- NamespaceString nss(_dbName, "t");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "t");
auto op =
BSON("op"
<< "c"
@@ -954,7 +977,7 @@ TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_Success) {
}
TEST_F(TenantOplogApplierTest, ApplyCreateIndexesCommand_Success) {
- NamespaceString nss(_dbName, "t");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "t");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto op =
BSON("op"
@@ -1001,7 +1024,7 @@ TEST_F(TenantOplogApplierTest, ApplyCreateIndexesCommand_Success) {
}
TEST_F(TenantOplogApplierTest, ApplyStartIndexBuildCommand_Failure) {
- NamespaceString nss(_dbName, "t");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "t");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto op = BSON("op"
<< "c"
@@ -1066,7 +1089,7 @@ TEST_F(TenantOplogApplierTest, ApplyCreateCollCommand_WrongNSS) {
}
TEST_F(TenantOplogApplierTest, ApplyDropIndexesCommand_IndexNotFound) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto op = BSON("op"
<< "c"
@@ -1104,7 +1127,7 @@ TEST_F(TenantOplogApplierTest, ApplyDropIndexesCommand_IndexNotFound) {
}
TEST_F(TenantOplogApplierTest, ApplyCollModCommand_IndexNotFound) {
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
auto uuid = createCollectionWithUuid(_opCtx.get(), nss);
auto op = BSON("op"
<< "c"
@@ -1148,7 +1171,7 @@ TEST_F(TenantOplogApplierTest, ApplyCollModCommand_IndexNotFound) {
TEST_F(TenantOplogApplierTest, ApplyCollModCommand_CollectionMissing) {
createDatabase(_opCtx.get(), _dbName.toString());
- NamespaceString nss(_dbName, "bar");
+ NamespaceString nss(_dbName.toStringWithTenantId(), "bar");
UUID uuid(UUID::gen());
auto op = BSON("op"
<< "c"
@@ -1312,7 +1335,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenNoop_Success) {
TEST_F(TenantOplogApplierTest, ApplyInsertThenResumeTokenNoopInDifferentBatch_Success) {
std::vector<OplogEntry> srcOps;
- srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
srcOps.push_back(makeNoopOplogEntry(2, TenantMigrationRecipientService::kNoopMsg));
pushOps(srcOps);
auto writerPool = makeTenantMigrationWriterPool();
@@ -1349,7 +1373,8 @@ TEST_F(TenantOplogApplierTest, ApplyInsertThenResumeTokenNoopInDifferentBatch_Su
TEST_F(TenantOplogApplierTest, ApplyResumeTokenNoopThenInsertInSameBatch_Success) {
std::vector<OplogEntry> srcOps;
srcOps.push_back(makeNoopOplogEntry(1, TenantMigrationRecipientService::kNoopMsg));
- srcOps.push_back(makeInsertOplogEntry(2, NamespaceString(_dbName, "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 2, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
pushOps(srcOps);
auto writerPool = makeTenantMigrationWriterPool();
@@ -1380,7 +1405,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenNoopThenInsertInSameBatch_Success
TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoopSameTimestamp_Success) {
std::vector<OplogEntry> srcOps;
- srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
srcOps.push_back(makeNoopOplogEntry(1, TenantMigrationRecipientService::kNoopMsg));
pushOps(srcOps);
ASSERT_EQ(srcOps[0].getOpTime(), srcOps[1].getOpTime());
@@ -1413,7 +1439,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoopSameTimestamp_Succe
TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoop_Success) {
std::vector<OplogEntry> srcOps;
- srcOps.push_back(makeInsertOplogEntry(1, NamespaceString(_dbName, "foo"), UUID::gen()));
+ srcOps.push_back(makeInsertOplogEntry(
+ 1, NamespaceString(_dbName.toStringWithTenantId(), "foo"), UUID::gen()));
srcOps.push_back(makeNoopOplogEntry(2, TenantMigrationRecipientService::kNoopMsg));
pushOps(srcOps);
auto writerPool = makeTenantMigrationWriterPool();
@@ -1445,8 +1472,8 @@ TEST_F(TenantOplogApplierTest, ApplyResumeTokenInsertThenNoop_Success) {
TEST_F(TenantOplogApplierTest, ApplyInsert_MultiKeyIndex) {
createCollectionWithUuid(_opCtx.get(), NamespaceString::kSessionTransactionsTableNamespace);
- NamespaceString indexedNss(_dbName, "indexedColl");
- NamespaceString nonIndexedNss(_dbName, "nonIndexedColl");
+ NamespaceString indexedNss(_dbName.toStringWithTenantId(), "indexedColl");
+ NamespaceString nonIndexedNss(_dbName.toStringWithTenantId(), "nonIndexedColl");
auto indexedCollUUID = createCollectionWithUuid(_opCtx.get(), indexedNss);
createCollection(_opCtx.get(), nonIndexedNss, CollectionOptions());
diff --git a/src/mongo/db/repl/topology_coordinator.cpp b/src/mongo/db/repl/topology_coordinator.cpp
index 7f30b7b113d..c72bb2ddfb3 100644
--- a/src/mongo/db/repl/topology_coordinator.cpp
+++ b/src/mongo/db/repl/topology_coordinator.cpp
@@ -1364,14 +1364,14 @@ void TopologyCoordinator::setMyLastDurableOpTimeAndWallTime(OpTimeAndWallTime op
myMemberData.setLastDurableOpTimeAndWallTime(opTimeAndWallTime, now);
}
-StatusWith<bool> TopologyCoordinator::setLastOptime(const UpdatePositionArgs::UpdateInfo& args,
- Date_t now) {
+StatusWith<bool> TopologyCoordinator::setLastOptimeForMember(
+ const UpdatePositionArgs::UpdateInfo& args, Date_t now) {
if (_selfIndex == -1) {
// Ignore updates when we're in state REMOVED.
return Status(ErrorCodes::NotPrimaryOrSecondary,
"Received replSetUpdatePosition command but we are in state REMOVED");
}
- invariant(_rsConfig.isInitialized()); // Can only use setLastOptime in replSet mode.
+ invariant(_rsConfig.isInitialized()); // Can only use setLastOptimeForMember in replSet mode.
MemberId memberId;
try {
diff --git a/src/mongo/db/repl/topology_coordinator.h b/src/mongo/db/repl/topology_coordinator.h
index fb9f7a196f7..3285a5b4825 100644
--- a/src/mongo/db/repl/topology_coordinator.h
+++ b/src/mongo/db/repl/topology_coordinator.h
@@ -585,7 +585,7 @@ public:
* Returns a Status if the position could not be set, false if the last optimes for the node
* did not change, or true if either the last applied or last durable optime did change.
*/
- StatusWith<bool> setLastOptime(const UpdatePositionArgs::UpdateInfo& args, Date_t now);
+ StatusWith<bool> setLastOptimeForMember(const UpdatePositionArgs::UpdateInfo& args, Date_t now);
/**
* Sets the latest optime committed in the previous config to the current lastCommitted optime.
diff --git a/src/mongo/db/repl_index_build_state.h b/src/mongo/db/repl_index_build_state.h
index 16e1bbeb34c..fadcc67896b 100644
--- a/src/mongo/db/repl_index_build_state.h
+++ b/src/mongo/db/repl_index_build_state.h
@@ -302,8 +302,6 @@ public:
/**
* Called when commit quorum is satisfied.
- * Invokes 'onCommitQuorumSatisfied' if state is successfully transitioned to commit quorum
- * satisfied.
*/
void setCommitQuorumSatisfied(OperationContext* opCtx);
diff --git a/src/mongo/db/s/README.md b/src/mongo/db/s/README.md
index b7d8bdff562..f3e67bce8b8 100644
--- a/src/mongo/db/s/README.md
+++ b/src/mongo/db/s/README.md
@@ -752,10 +752,14 @@ operations. The metadata is reaped if the cluster does not receive a new operati
session for a reasonably long time (the default is 30 minutes).
A logical session is identified by its "logical session id," or `lsid`. An `lsid` is a combination
-of two pieces of information:
+of up to four pieces of information:
1. `id` - A globally unique id (UUID) generated by the mongo shell, driver, or the `startSession` server command
1. `uid` (user id) - The identification information for the logged-in user (if authentication is enabled)
+1. `txnNumber` - An optional parameter set only for internal transactions spawned from retryable writes. Strictly-increasing counter set by the transaction API to match the txnNumber of the corresponding retryable write.
+1. `txnUUID` - An optional parameter set only for internal transactions spawned inside client sessions. The txnUUID is a globally unique id generated by the transaction API.
+
+A logical session with a `txnNumber` and `txnUUID` is considered a child of the session with matching `id` and `uid` values. There may be multiple child sessions per parent session, and checking out a child/parents session checks out the other and updates the `lastUsedTime` of both. Killing a parent session also kills all of its child sessions.
The order of operations in the logical session that need to durably store metadata is defined by an
integer counter, called the `txnNumber`. When the cluster receives a retryable write or transaction
@@ -848,8 +852,12 @@ and to check the session back in upon completion. When a session is checked out,
until it is checked back in, forcing other operations to wait for the ongoing operation to complete
or yield the session.
+Checking out an internal/child session additionally checks out its parent session (the session with the same `id` and `uid` value in the lsid, but without a `txnNumber` or `txnUUID` value), and vice versa.
+
The runtime state for a session consists of the last checkout time and operation, the number of operations
-waiting to check out the session, and the number of kills requested. The last checkout time is used by
+waiting to check out the session, and the number of kills requested. Retryable internal sessions are reaped from the logical session catalog [eagerly](https://github.com/mongodb/mongo/blob/67e37f8e806a6a5d402e20eee4b3097e2b11f820/src/mongo/db/session_catalog.cpp#L342), meaning that if a transaction session with a higher transaction number has successfully started, sessions with lower txnNumbers are removed from the session catalog and inserted into an in-memory buffer by the [InternalTransactionsReapService](https://github.com/mongodb/mongo/blob/67e37f8e806a6a5d402e20eee4b3097e2b11f820/src/mongo/db/internal_transactions_reap_service.h#L42) until a configurable threshold is met (1000 by default), after which they are deleted from the transactions table (`config.transactions`) and `config.image_collection` all at once. Eager reaping is best-effort, in that the in-memory buffer is cleared on stepdown or restart. Any missed sessions will be reaped once the session expires or their `config.transactions` entries have not been written to for `TransactionRecordMinimumLifetimeMinutes` minutes.
+
+The last checkout time is used by
the [periodic job inside the logical session cache](#periodic-cleanup-of-the-session-catalog-and-transactions-table)
to determine when a session should be reaped from the session catalog, whereas the number of
operations waiting to check out a session is used to block reaping of sessions that are still in
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 415bd49e852..96f4e84813a 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -55,6 +55,7 @@ env.Library(
'collection_critical_section_document.idl',
'collection_sharding_runtime.cpp',
'collection_sharding_state_factory_shard.cpp',
+ 'commit_chunk_migration.idl',
'config_server_op_observer.cpp',
'global_index_metrics.cpp',
'metadata_manager.cpp',
@@ -96,7 +97,7 @@ env.Library(
'resharding/resharding_future_util.cpp',
'resharding/resharding_manual_cleanup.cpp',
'resharding/resharding_metrics_helpers.cpp',
- 'resharding/resharding_metrics_new.cpp',
+ 'resharding/resharding_metrics.cpp',
'resharding/resharding_op_observer.cpp',
'resharding/resharding_oplog_applier.cpp',
'resharding/resharding_oplog_applier_metrics.cpp',
@@ -137,6 +138,7 @@ env.Library(
'type_shard_collection.idl',
],
LIBDEPS=[
+ '$BUILD_DIR/mongo/client/remote_command_targeter',
'$BUILD_DIR/mongo/db/catalog/multi_index_block',
'$BUILD_DIR/mongo/db/client_metadata_propagation_egress_hook',
'$BUILD_DIR/mongo/db/commands/mongod_fcv',
@@ -173,6 +175,7 @@ env.Library(
'$BUILD_DIR/mongo/db/repl/image_collection_entry',
'$BUILD_DIR/mongo/db/rs_local_client',
'$BUILD_DIR/mongo/db/session_catalog',
+ '$BUILD_DIR/mongo/db/timeseries/bucket_catalog',
'$BUILD_DIR/mongo/idl/server_parameter',
'$BUILD_DIR/mongo/util/future_util',
],
@@ -334,7 +337,6 @@ env.Library(
'cluster_pipeline_cmd_d.cpp',
'cluster_write_cmd_d.cpp',
'collmod_coordinator_document.idl',
- 'collmod_coordinator_pre60_compatible.cpp',
'collmod_coordinator.cpp',
'compact_structured_encryption_data_coordinator.cpp',
'compact_structured_encryption_data_coordinator.idl',
@@ -402,7 +404,6 @@ env.Library(
'resharding_test_commands.idl',
'set_allow_migrations_coordinator_document.idl',
'set_allow_migrations_coordinator.cpp',
- 'set_shard_version_command.cpp',
'sharded_collmod.idl',
'sharded_index_consistency_server_status.cpp',
'sharded_rename_collection.idl',
@@ -550,7 +551,6 @@ env.CppUnitTest(
'collection_metadata_filtering_test.cpp',
'collection_metadata_test.cpp',
'collection_sharding_runtime_test.cpp',
- 'create_collection_coordinator_test.cpp',
'database_sharding_state_test.cpp',
'dist_lock_catalog_mock.cpp',
'dist_lock_catalog_replset_test.cpp',
@@ -574,7 +574,7 @@ env.CppUnitTest(
'resharding/resharding_donor_oplog_iterator_test.cpp',
'resharding/resharding_donor_recipient_common_test.cpp',
'resharding/resharding_donor_service_test.cpp',
- 'resharding/resharding_metrics_new_test.cpp',
+ 'resharding/resharding_metrics_test.cpp',
'resharding/resharding_oplog_applier_test.cpp',
'resharding/resharding_oplog_applier_metrics_test.cpp',
'resharding/resharding_oplog_batch_applier_test.cpp',
@@ -618,6 +618,8 @@ env.CppUnitTest(
'$BUILD_DIR/mongo/db/exec/document_value/document_value_test_util',
'$BUILD_DIR/mongo/db/keys_collection_client_direct',
'$BUILD_DIR/mongo/db/logical_session_cache_impl',
+ '$BUILD_DIR/mongo/db/op_observer',
+ '$BUILD_DIR/mongo/db/op_observer_util',
'$BUILD_DIR/mongo/db/ops/write_ops_exec',
'$BUILD_DIR/mongo/db/pipeline/document_source_mock',
'$BUILD_DIR/mongo/db/pipeline/expression_context',
diff --git a/src/mongo/db/s/balancer/balance_stats_test.cpp b/src/mongo/db/s/balancer/balance_stats_test.cpp
index 9381e0a2da6..aa7b056ae34 100644
--- a/src/mongo/db/s/balancer/balance_stats_test.cpp
+++ b/src/mongo/db/s/balancer/balance_stats_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/bson/oid.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/s/balancer/balance_stats.h"
@@ -79,7 +77,7 @@ private:
const Timestamp _timestamp{Timestamp(1, 1)};
const ShardId _shardPrimary{"dummyShardPrimary"};
const DatabaseVersion _dbVersion{UUID::gen(), _timestamp};
- ChunkVersion _nextVersion{1, 0, _epoch, _timestamp};
+ ChunkVersion _nextVersion{{_epoch, _timestamp}, {1, 0}};
};
TEST_F(BalanceStatsTest, SingleChunkNoZones) {
diff --git a/src/mongo/db/s/balancer/balancer.cpp b/src/mongo/db/s/balancer/balancer.cpp
index 2ec66bc8ffd..fc2c42a59c1 100644
--- a/src/mongo/db/s/balancer/balancer.cpp
+++ b/src/mongo/db/s/balancer/balancer.cpp
@@ -80,13 +80,13 @@ namespace {
MONGO_FAIL_POINT_DEFINE(overrideBalanceRoundInterval);
-const Seconds kBalanceRoundDefaultInterval(10);
+const Milliseconds kBalanceRoundDefaultInterval(10 * 1000);
// Sleep between balancer rounds in the case where the last round found some chunks which needed to
// be balanced. This value should be set sufficiently low so that imbalanced clusters will quickly
// reach balanced state, but setting it too low may cause CRUD operations to start failing due to
// not being able to establish a stable shard version.
-const Seconds kShortBalanceRoundInterval(1);
+const Milliseconds kBalancerMigrationsThrottling(1 * 1000);
/**
* Balancer status response
@@ -293,11 +293,11 @@ void Balancer::initiateBalancer(OperationContext* opCtx) {
void Balancer::interruptBalancer() {
stdx::lock_guard<Latch> scopedLock(_mutex);
- if (_state != kRunning)
+ if (_state != kRunning) {
return;
+ }
_state = kStopping;
- _thread.detach();
// Interrupt the balancer thread if it has been started. We are guaranteed that the operation
// context of that thread is still alive, because we hold the balancer mutex.
@@ -312,8 +312,10 @@ void Balancer::interruptBalancer() {
void Balancer::waitForBalancerToStop() {
stdx::unique_lock<Latch> scopedLock(_mutex);
-
_joinCond.wait(scopedLock, [this] { return _state == kStopped; });
+ if (_thread.joinable()) {
+ _thread.join();
+ }
}
void Balancer::joinCurrentRound(OperationContext* opCtx) {
@@ -612,12 +614,12 @@ void Balancer::_consumeActionStreamLoop() {
void Balancer::_mainThread() {
ON_BLOCK_EXIT([this] {
- stdx::lock_guard<Latch> scopedLock(_mutex);
-
- _state = kStopped;
+ {
+ stdx::lock_guard<Latch> scopedLock(_mutex);
+ _state = kStopped;
+ LOGV2_DEBUG(21855, 1, "Balancer thread terminated");
+ }
_joinCond.notify_all();
-
- LOGV2_DEBUG(21855, 1, "Balancer thread terminated");
});
Client::initThread("Balancer");
@@ -664,6 +666,7 @@ void Balancer::_mainThread() {
LOGV2(6036606, "Balancer worker thread initialised. Entering main loop.");
// Main balancer loop
+ auto lastMigrationTime = Date_t::fromMillisSinceEpoch(0);
while (!_stopRequested()) {
BalanceRoundDetails roundDetails;
@@ -691,6 +694,14 @@ void Balancer::_mainThread() {
continue;
}
+ boost::optional<Milliseconds> forcedBalancerRoundInterval(boost::none);
+ overrideBalanceRoundInterval.execute([&](const BSONObj& data) {
+ forcedBalancerRoundInterval = Milliseconds(data["intervalMs"].numberInt());
+ LOGV2(21864,
+ "overrideBalanceRoundInterval: using customized balancing interval",
+ "balancerInterval"_attr = *forcedBalancerRoundInterval);
+ });
+
// The current configuration is allowing the balancer to perform operations.
// Unblock the secondary thread if needed.
_defragmentationCondVar.notify_all();
@@ -739,9 +750,20 @@ void Balancer::_mainThread() {
if (chunksToRebalance.empty() && chunksToDefragment.empty()) {
LOGV2_DEBUG(21862, 1, "No need to move any chunk");
_balancedLastTime = 0;
+ LOGV2_DEBUG(21863, 1, "End balancing round");
+ _endRound(opCtx.get(),
+ forcedBalancerRoundInterval ? *forcedBalancerRoundInterval
+ : kBalanceRoundDefaultInterval);
} else {
+ auto timeSinceLastMigration = Date_t::now() - lastMigrationTime;
+ _sleepFor(opCtx.get(),
+ forcedBalancerRoundInterval
+ ? *forcedBalancerRoundInterval - timeSinceLastMigration
+ : kBalancerMigrationsThrottling - timeSinceLastMigration);
+
_balancedLastTime =
_moveChunks(opCtx.get(), chunksToRebalance, chunksToDefragment);
+ lastMigrationTime = Date_t::now();
roundDetails.setSucceeded(
static_cast<int>(chunksToRebalance.size() + chunksToDefragment.size()),
@@ -750,24 +772,13 @@ void Balancer::_mainThread() {
ShardingLogging::get(opCtx.get())
->logAction(opCtx.get(), "balancer.round", "", roundDetails.toBSON())
.ignore();
- }
- LOGV2_DEBUG(21863, 1, "End balancing round");
+ LOGV2_DEBUG(6679500, 1, "End balancing round");
+ // Migration throttling of kBalancerMigrationsThrottling will be applied before
+ // the next call to _moveChunks, so don't sleep here.
+ _endRound(opCtx.get(), Milliseconds(0));
+ }
}
-
- Milliseconds balancerInterval =
- _balancedLastTime ? kShortBalanceRoundInterval : kBalanceRoundDefaultInterval;
-
- overrideBalanceRoundInterval.execute([&](const BSONObj& data) {
- balancerInterval = Milliseconds(data["intervalMs"].numberInt());
- LOGV2(21864,
- "overrideBalanceRoundInterval: using shorter balancing interval: "
- "{balancerInterval}",
- "overrideBalanceRoundInterval: using shorter balancing interval",
- "balancerInterval"_attr = balancerInterval);
- });
-
- _endRound(opCtx.get(), balancerInterval);
} catch (const DBException& e) {
LOGV2(21865,
"caught exception while doing balance: {error}",
@@ -976,15 +987,6 @@ int Balancer::_moveChunks(OperationContext* opCtx,
return coll.getMaxChunkSizeBytes().value_or(balancerConfig->getMaxChunkSizeBytes());
}();
- if (serverGlobalParams.featureCompatibility.isLessThan(
- multiversion::FeatureCompatibilityVersion::kVersion_6_0)) {
- // TODO SERVER-65322 only use `moveRange` once v6.0 branches out
- MoveChunkSettings settings(maxChunkSizeBytes,
- balancerConfig->getSecondaryThrottle(),
- balancerConfig->waitForDelete());
- return _commandScheduler->requestMoveChunk(opCtx, migrateInfo, settings);
- }
-
MoveRangeRequestBase requestBase(migrateInfo.to);
requestBase.setWaitForDelete(balancerConfig->waitForDelete());
requestBase.setMin(migrateInfo.minKey);
@@ -1086,7 +1088,7 @@ SharedSemiFuture<void> Balancer::applyLegacyChunkSizeConstraintsOnClusterData(
NamespaceString::kLogicalSessionsNamespace,
0,
boost::none /*defragmentCollection*/,
- boost::none /*enableAutoSplitter*/);
+ false /*enableAutoSplitter*/);
} catch (const ExceptionFor<ErrorCodes::NamespaceNotSharded>&) {
// config.system.collections does not appear in config.collections; continue.
}
diff --git a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp
index bf22d67619e..8b50d3d002f 100644
--- a/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_chunk_selection_policy_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/commands.h"
#include "mongo/db/s/balancer/balancer_chunk_selection_policy_impl.h"
#include "mongo/db/s/balancer/cluster_statistics_impl.h"
@@ -133,7 +131,7 @@ TEST_F(BalancerChunkSelectionTest, TagRangesOverlap) {
// Set up a database and a sharded collection in the metadata.
const auto collUUID = UUID::gen();
- ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+ ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
setUpDatabase(kDbName, kShardId0);
setUpCollection(kNamespace, collUUID, version);
@@ -192,7 +190,7 @@ TEST_F(BalancerChunkSelectionTest, TagRangeMaxNotAlignedWithChunkMax) {
// Set up a database and a sharded collection in the metadata.
const auto collUUID = UUID::gen();
- ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+ ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
setUpDatabase(kDbName, kShardId0);
setUpCollection(kNamespace, collUUID, version);
@@ -251,7 +249,7 @@ TEST_F(BalancerChunkSelectionTest, ShardedTimeseriesCollectionsCanBeAutoSplitted
// Set up a database and a sharded collection in the metadata.
const auto collUUID = UUID::gen();
- ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+ ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
setUpDatabase(kDbName, kShardId0);
TypeCollectionTimeseriesFields tsFields;
@@ -302,7 +300,7 @@ TEST_F(BalancerChunkSelectionTest, ShardedTimeseriesCollectionsCanBeBalanced) {
// Set up a database and a sharded collection in the metadata.
const auto collUUID = UUID::gen();
- ChunkVersion version(2, 0, OID::gen(), Timestamp(42));
+ ChunkVersion version({OID::gen(), Timestamp(42)}, {2, 0});
setUpDatabase(kDbName, kShardId0);
TypeCollectionTimeseriesFields tsFields;
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
index e78ae862393..7ebe9dac42c 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_impl.cpp
@@ -155,7 +155,7 @@ std::vector<RequestData> rebuildRequestsFromRecoveryInfo(
DBDirectClient dbClient(opCtx);
try {
FindCommandRequest findRequest{MigrationType::ConfigNS};
- dbClient.find(std::move(findRequest), ReadPreferenceSetting{}, documentProcessor);
+ dbClient.find(std::move(findRequest), documentProcessor);
} catch (const DBException& e) {
LOGV2_ERROR(5847215, "Failed to fetch requests to recover", "error"_attr = redact(e));
}
diff --git a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
index 678e5f63f9f..72e86413aa9 100644
--- a/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_commands_scheduler_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/client/remote_command_targeter_mock.h"
#include "mongo/db/s/balancer/balancer_commands_scheduler.h"
#include "mongo/db/s/balancer/balancer_commands_scheduler_impl.h"
@@ -65,7 +63,7 @@ public:
chunk.setMax(BSON("x" << min + 10));
chunk.setJumbo(false);
chunk.setShard(shardId);
- chunk.setVersion(ChunkVersion(1, 1, OID::gen(), Timestamp(10)));
+ chunk.setVersion(ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1}));
return chunk;
}
@@ -76,7 +74,7 @@ public:
kUuid,
BSON("x" << min),
BSON("x" << min + 10),
- ChunkVersion(1, 1, OID::gen(), Timestamp(10)),
+ ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1}),
MoveChunkRequest::ForceJumbo::kDoNotForce);
}
@@ -234,7 +232,7 @@ TEST_F(BalancerCommandsSchedulerTest, SuccessfulMergeChunkCommand) {
_scheduler.start(operationContext(), getMigrationRecoveryDefaultValues());
ChunkRange range(BSON("x" << 0), BSON("x" << 20));
- ChunkVersion version(1, 1, OID::gen(), Timestamp(10));
+ ChunkVersion version({OID::gen(), Timestamp(10)}, {1, 1});
auto futureResponse =
_scheduler.requestMergeChunks(operationContext(), kNss, kShardId0, range, version);
ASSERT_OK(futureResponse.getNoThrow());
@@ -246,7 +244,7 @@ TEST_F(BalancerCommandsSchedulerTest, MergeChunkNonexistentShard) {
auto remoteResponsesFuture = setRemoteResponses();
_scheduler.start(operationContext(), getMigrationRecoveryDefaultValues());
ChunkRange range(BSON("x" << 0), BSON("x" << 20));
- ChunkVersion version(1, 1, OID::gen(), Timestamp(10));
+ ChunkVersion version({OID::gen(), Timestamp(10)}, {1, 1});
auto futureResponse = _scheduler.requestMergeChunks(
operationContext(), kNss, ShardId("nonexistent"), range, version);
auto shardNotFoundError = Status{ErrorCodes::ShardNotFound, "Shard nonexistent not found"};
diff --git a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp
index c42f7e86cd7..d1f431b4082 100644
--- a/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_defragmentation_policy_test.cpp
@@ -47,7 +47,7 @@ protected:
const ShardId kShardId1 = ShardId("shard1");
const ShardId kShardId2 = ShardId("shard2");
const ShardId kShardId3 = ShardId("shard3");
- const ChunkVersion kCollectionVersion = ChunkVersion(1, 1, OID::gen(), Timestamp(10));
+ const ChunkVersion kCollectionVersion = ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1});
const KeyPattern kShardKeyPattern = KeyPattern(BSON("x" << 1));
const BSONObj kKeyAtMin = BSONObjBuilder().appendMinKey("x").obj();
const BSONObj kKeyAtZero = BSON("x" << 0);
@@ -494,7 +494,8 @@ TEST_F(BalancerDefragmentationPolicyTest, TestPhaseOneAllConsecutive) {
ChunkType chunk(
kUuid,
ChunkRange(minKey, maxKey),
- ChunkVersion(1, i, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()},
+ {1, uint32_t(i)}),
kShardId0);
chunkList.push_back(chunk);
}
@@ -504,7 +505,8 @@ TEST_F(BalancerDefragmentationPolicyTest, TestPhaseOneAllConsecutive) {
ChunkType chunk(
kUuid,
ChunkRange(minKey, maxKey),
- ChunkVersion(1, i, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()},
+ {1, uint32_t(i)}),
kShardId1);
chunkList.push_back(chunk);
}
@@ -543,7 +545,8 @@ TEST_F(BalancerDefragmentationPolicyTest, PhaseOneNotConsecutive) {
ChunkType chunk(
kUuid,
ChunkRange(minKey, maxKey),
- ChunkVersion(1, i, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()},
+ {1, uint32_t(i)}),
chosenShard);
chunkList.push_back(chunk);
}
@@ -620,13 +623,13 @@ TEST_F(BalancerDefragmentationPolicyTest, TestPhaseTwoChunkCanBeMovedAndMergedWi
ChunkType biggestChunk(
kUuid,
ChunkRange(kKeyAtMin, kKeyAtZero),
- ChunkVersion(1, 0, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 0}),
kShardId0);
biggestChunk.setEstimatedSizeBytes(2048);
ChunkType smallestChunk(
kUuid,
ChunkRange(kKeyAtZero, kKeyAtMax),
- ChunkVersion(1, 1, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 1}),
kShardId1);
smallestChunk.setEstimatedSizeBytes(1024);
@@ -682,42 +685,42 @@ TEST_F(BalancerDefragmentationPolicyTest,
ChunkType firstChunkOnShard0(
kUuid,
ChunkRange(kKeyAtMin, kKeyAtZero),
- ChunkVersion(1, 0, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 0}),
kShardId0);
firstChunkOnShard0.setEstimatedSizeBytes(1);
ChunkType firstChunkOnShard1(
kUuid,
ChunkRange(kKeyAtZero, kKeyAtTen),
- ChunkVersion(1, 1, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 1}),
kShardId1);
firstChunkOnShard1.setEstimatedSizeBytes(1);
ChunkType chunkOnShard2(
kUuid,
ChunkRange(kKeyAtTen, kKeyAtTwenty),
- ChunkVersion(1, 2, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 2}),
kShardId2);
chunkOnShard2.setEstimatedSizeBytes(1);
ChunkType chunkOnShard3(
kUuid,
ChunkRange(kKeyAtTwenty, kKeyAtThirty),
- ChunkVersion(1, 3, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 3}),
kShardId3);
chunkOnShard3.setEstimatedSizeBytes(1);
ChunkType secondChunkOnShard0(
kUuid,
ChunkRange(kKeyAtThirty, kKeyAtForty),
- ChunkVersion(1, 4, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 4}),
kShardId0);
secondChunkOnShard0.setEstimatedSizeBytes(1);
ChunkType secondChunkOnShard1(
kUuid,
ChunkRange(kKeyAtForty, kKeyAtMax),
- ChunkVersion(1, 5, kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()),
+ ChunkVersion({kCollectionVersion.epoch(), kCollectionVersion.getTimestamp()}, {1, 5}),
kShardId1);
secondChunkOnShard1.setEstimatedSizeBytes(1);
diff --git a/src/mongo/db/s/balancer/balancer_policy_test.cpp b/src/mongo/db/s/balancer/balancer_policy_test.cpp
index fb98d610b00..be3532fee56 100644
--- a/src/mongo/db/s/balancer/balancer_policy_test.cpp
+++ b/src/mongo/db/s/balancer/balancer_policy_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/keypattern.h"
#include "mongo/db/s/balancer/balancer_policy.h"
#include "mongo/platform/random.h"
@@ -79,7 +76,7 @@ std::pair<ShardStatisticsVector, ShardToChunksMap> generateCluster(
int64_t currentChunk = 0;
- ChunkVersion chunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+ ChunkVersion chunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
const UUID uuid = UUID::gen();
const KeyPattern shardKeyPattern(BSON("x" << 1));
diff --git a/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp b/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp
index 607e57dab44..94b6e874cbf 100644
--- a/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp
+++ b/src/mongo/db/s/balancer/cluster_chunks_resize_policy_test.cpp
@@ -30,6 +30,7 @@
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/s/balancer/cluster_chunks_resize_policy_impl.h"
#include "mongo/db/s/config/config_server_test_fixture.h"
+
namespace mongo {
namespace {
@@ -37,7 +38,7 @@ class ClusterChunksResizePolicyTest : public ConfigServerTestFixture {
protected:
const NamespaceString kNss{"testDb.testColl"};
const UUID kUuid = UUID::gen();
- const ChunkVersion kCollectionVersion = ChunkVersion(1, 1, OID::gen(), Timestamp(10));
+ const ChunkVersion kCollectionVersion = ChunkVersion({OID::gen(), Timestamp(10)}, {1, 1});
const ShardId kShardId0 = ShardId("shard0");
const ShardId kShardId1 = ShardId("shard1");
diff --git a/src/mongo/db/s/balancer/type_migration.cpp b/src/mongo/db/s/balancer/type_migration.cpp
index 1aac063b940..a47fdff6197 100644
--- a/src/mongo/db/s/balancer/type_migration.cpp
+++ b/src/mongo/db/s/balancer/type_migration.cpp
@@ -113,8 +113,7 @@ StatusWith<MigrationType> MigrationType::fromBSON(const BSONObj& source) {
}
try {
- auto chunkVersionStatus =
- ChunkVersion::fromBSONPositionalOrNewerFormat(source[chunkVersion.name()]);
+ auto chunkVersionStatus = ChunkVersion::parse(source[chunkVersion.name()]);
migrationType._chunkVersion = chunkVersionStatus;
} catch (const DBException& ex) {
return ex.toStatus();
diff --git a/src/mongo/db/s/balancer/type_migration_test.cpp b/src/mongo/db/s/balancer/type_migration_test.cpp
index f605983fe2c..610e150c963 100644
--- a/src/mongo/db/s/balancer/type_migration_test.cpp
+++ b/src/mongo/db/s/balancer/type_migration_test.cpp
@@ -27,12 +27,9 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/jsobj.h"
#include "mongo/db/s/balancer/type_migration.h"
#include "mongo/s/catalog/type_chunk.h"
-
#include "mongo/unittest/unittest.h"
namespace mongo {
@@ -48,7 +45,7 @@ const ShardId kToShard("shard0001");
const bool kWaitForDelete{true};
TEST(MigrationTypeTest, FromAndToBSONWithoutOptionalFields) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
BSONObjBuilder builder;
builder.append(MigrationType::ns(), kNs);
@@ -68,7 +65,7 @@ TEST(MigrationTypeTest, FromAndToBSONWithoutOptionalFields) {
}
TEST(MigrationTypeTest, FromAndToBSONWitOptionalFields) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
const auto secondaryThrottle =
MigrationSecondaryThrottleOptions::createWithWriteConcern(WriteConcernOptions(
"majority", WriteConcernOptions::SyncMode::JOURNAL, Milliseconds(60000)));
@@ -94,7 +91,7 @@ TEST(MigrationTypeTest, FromAndToBSONWitOptionalFields) {
}
TEST(MigrationTypeTest, MissingRequiredNamespaceField) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
BSONObjBuilder builder;
builder.append(MigrationType::min(), kMin);
@@ -111,7 +108,7 @@ TEST(MigrationTypeTest, MissingRequiredNamespaceField) {
}
TEST(MigrationTypeTest, MissingRequiredMinField) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
BSONObjBuilder builder;
builder.append(MigrationType::ns(), kNs);
@@ -128,7 +125,7 @@ TEST(MigrationTypeTest, MissingRequiredMinField) {
}
TEST(MigrationTypeTest, MissingRequiredMaxField) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
BSONObjBuilder builder;
builder.append(MigrationType::ns(), kNs);
@@ -145,7 +142,7 @@ TEST(MigrationTypeTest, MissingRequiredMaxField) {
}
TEST(MigrationTypeTest, MissingRequiredFromShardField) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
BSONObjBuilder builder;
builder.append(MigrationType::ns(), kNs);
@@ -162,7 +159,7 @@ TEST(MigrationTypeTest, MissingRequiredFromShardField) {
}
TEST(MigrationTypeTest, MissingRequiredToShardField) {
- const ChunkVersion version(1, 2, OID::gen(), Timestamp(1, 1));
+ const ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 2});
BSONObjBuilder builder;
builder.append(MigrationType::ns(), kNs);
diff --git a/src/mongo/db/s/check_sharding_index_command.cpp b/src/mongo/db/s/check_sharding_index_command.cpp
index 1422dc7c4c8..004c23b2d31 100644
--- a/src/mongo/db/s/check_sharding_index_command.cpp
+++ b/src/mongo/db/s/check_sharding_index_command.cpp
@@ -27,7 +27,6 @@
* it in the license file.
*/
-
#include "mongo/platform/basic.h"
#include "mongo/db/auth/action_type.h"
@@ -40,7 +39,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace {
@@ -96,13 +94,15 @@ public:
return false;
}
+ std::string tmpErrMsg = "couldn't find valid index for shard key";
auto shardKeyIdx = findShardKeyPrefixedIndex(opCtx,
*collection,
collection->getIndexCatalog(),
keyPattern,
- /*requireSingleKey=*/true);
+ /*requireSingleKey=*/true,
+ &tmpErrMsg);
if (!shardKeyIdx) {
- errmsg = "couldn't find valid index for shard key";
+ errmsg = tmpErrMsg;
return false;
}
diff --git a/src/mongo/db/s/chunk_splitter.cpp b/src/mongo/db/s/chunk_splitter.cpp
index e0fb5839a09..043b0139b20 100644
--- a/src/mongo/db/s/chunk_splitter.cpp
+++ b/src/mongo/db/s/chunk_splitter.cpp
@@ -31,7 +31,6 @@
#include "mongo/db/s/chunk_splitter.h"
#include "mongo/client/dbclient_cursor.h"
-#include "mongo/client/query.h"
#include "mongo/db/client.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/namespace_string.h"
diff --git a/src/mongo/db/s/collection_metadata_filtering_test.cpp b/src/mongo/db/s/collection_metadata_filtering_test.cpp
index e2e3081b436..74dc6a9e655 100644
--- a/src/mongo/db/s/collection_metadata_filtering_test.cpp
+++ b/src/mongo/db/s/collection_metadata_filtering_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/catalog_raii.h"
#include "mongo/db/s/collection_sharding_runtime.h"
#include "mongo/db/s/operation_sharding_state.h"
@@ -79,7 +77,7 @@ protected:
boost::none,
true,
[&] {
- ChunkVersion version(1, 0, epoch, Timestamp(1, 1));
+ ChunkVersion version({epoch, Timestamp(1, 1)}, {1, 0});
ChunkType chunk1(uuid,
{shardKeyPattern.getKeyPattern().globalMin(), BSON("_id" << -100)},
diff --git a/src/mongo/db/s/collection_metadata_test.cpp b/src/mongo/db/s/collection_metadata_test.cpp
index 8f789549796..4084fe8e9e2 100644
--- a/src/mongo/db/s/collection_metadata_test.cpp
+++ b/src/mongo/db/s/collection_metadata_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/base/status.h"
#include "mongo/db/range_arithmetic.h"
#include "mongo/db/s/collection_metadata.h"
@@ -62,7 +60,7 @@ CollectionMetadata makeCollectionMetadataImpl(
std::vector<ChunkType> allChunks;
auto nextMinKey = shardKeyPattern.globalMin();
- ChunkVersion version{1, 0, epoch, timestamp};
+ ChunkVersion version({epoch, timestamp}, {1, 0});
for (const auto& myNextChunk : thisShardsChunks) {
if (SimpleBSONObjComparator::kInstance.evaluate(nextMinKey < myNextChunk.first)) {
// Need to add a chunk to the other shard from nextMinKey to myNextChunk.first.
@@ -125,7 +123,7 @@ protected:
reshardingFields.setRecipientFields(std::move(recipientFields));
} else if (state == CoordinatorStateEnum::kBlockingWrites) {
TypeCollectionDonorFields donorFields{
- constructTemporaryReshardingNss(kNss.db(), existingUuid),
+ resharding::constructTemporaryReshardingNss(kNss.db(), existingUuid),
KeyPattern{BSON("newKey" << 1)},
{kThisShard, kOtherShard}};
reshardingFields.setDonorFields(std::move(donorFields));
diff --git a/src/mongo/db/s/collection_sharding_runtime_test.cpp b/src/mongo/db/s/collection_sharding_runtime_test.cpp
index dcee5b73ac0..c6985aa5742 100644
--- a/src/mongo/db/s/collection_sharding_runtime_test.cpp
+++ b/src/mongo/db/s/collection_sharding_runtime_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "boost/optional/optional_io.hpp"
#include "mongo/db/catalog/create_collection.h"
#include "mongo/db/catalog_raii.h"
@@ -63,7 +61,7 @@ protected:
const Timestamp timestamp(1, 1);
auto range = ChunkRange(BSON(kShardKey << MINKEY), BSON(kShardKey << MAXKEY));
auto chunk = ChunkType(
- uuid, std::move(range), ChunkVersion(1, 0, epoch, timestamp), ShardId("other"));
+ uuid, std::move(range), ChunkVersion({epoch, timestamp}, {1, 0}), ShardId("other"));
ChunkManager cm(ShardId("0"),
DatabaseVersion(UUID::gen(), timestamp),
makeStandaloneRoutingTableHistory(
@@ -218,8 +216,8 @@ TEST_F(CollectionShardingRuntimeTest, ReturnUnshardedMetadataInServerlessMode) {
ScopedSetShardRole scopedSetShardRole2{
opCtx,
NamespaceString::kLogicalSessionsNamespace,
- ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1)), /* shardVersion */
- boost::none /* databaseVersion */
+ ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0}), /* shardVersion */
+ boost::none /* databaseVersion */
};
CollectionShardingRuntime csrLogicalSession(
@@ -324,11 +322,11 @@ public:
const Timestamp& timestamp) {
auto range1 = ChunkRange(BSON(kShardKey << MINKEY), BSON(kShardKey << 5));
ChunkType chunk1(
- uuid, range1, ChunkVersion(1, 0, epoch, timestamp), kShardList[0].getName());
+ uuid, range1, ChunkVersion({epoch, timestamp}, {1, 0}), kShardList[0].getName());
auto range2 = ChunkRange(BSON(kShardKey << 5), BSON(kShardKey << MAXKEY));
ChunkType chunk2(
- uuid, range2, ChunkVersion(1, 1, epoch, timestamp), kShardList[0].getName());
+ uuid, range2, ChunkVersion({epoch, timestamp}, {1, 1}), kShardList[0].getName());
return {chunk1, chunk2};
}
diff --git a/src/mongo/db/s/collmod_coordinator.cpp b/src/mongo/db/s/collmod_coordinator.cpp
index ebf179c21c8..50e92b41571 100644
--- a/src/mongo/db/s/collmod_coordinator.cpp
+++ b/src/mongo/db/s/collmod_coordinator.cpp
@@ -77,10 +77,7 @@ bool hasTimeSeriesGranularityUpdate(const CollModRequest& request) {
CollModCoordinator::CollModCoordinator(ShardingDDLCoordinatorService* service,
const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _initialState{initialState.getOwned()},
- _doc{CollModCoordinatorDocument::parse(IDLParserErrorContext("CollModCoordinatorDocument"),
- _initialState)},
+ : RecoverableShardingDDLCoordinator(service, "CollModCoordinator", initialState),
_request{_doc.getCollModRequest()} {}
void CollModCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
@@ -96,54 +93,9 @@ void CollModCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
SimpleBSONObjComparator::kInstance.evaluate(selfReq == otherReq));
}
-boost::optional<BSONObj> CollModCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
-
- const auto currPhase = [&]() {
- stdx::lock_guard l{_docMutex};
- return _doc.getPhase();
- }();
-
- cmdBob.appendElements(_request.toBSON());
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "CollModCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
- return bob.obj();
-}
-
-void CollModCoordinator::_enterPhase(Phase newPhase) {
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(6069401,
- 2,
- "CollMod coordinator phase transition",
- "namespace"_attr = nss(),
- "newPhase"_attr = CollModCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = CollModCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
-}
+void CollModCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ cmdInfoBuilder->appendElements(_request.toBSON());
+};
void CollModCoordinator::_performNoopRetryableWriteOnParticipants(
OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor) {
@@ -154,9 +106,9 @@ void CollModCoordinator::_performNoopRetryableWriteOnParticipants(
return participants;
}();
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
sharding_ddl_util::performNoopRetryableWriteOnShards(
- opCtx, shardsAndConfigsvr, getCurrentSession(_doc), executor);
+ opCtx, shardsAndConfigsvr, getCurrentSession(), executor);
}
void CollModCoordinator::_saveCollectionInfoOnCoordinatorIfNecessary(OperationContext* opCtx) {
@@ -229,14 +181,15 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
auto* opCtx = opCtxHolder.get();
getForwardableOpMetadata().setOn(opCtx);
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_saveCollectionInfoOnCoordinatorIfNecessary(opCtx);
if (_collInfo->isSharded) {
- _doc.setCollUUID(
- sharding_ddl_util::getCollectionUUID(opCtx, nss(), true /* allowViews */));
- sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollUUID());
+ _doc.setCollUUID(sharding_ddl_util::getCollectionUUID(
+ opCtx, _collInfo->nsForTargeting, true /* allowViews */));
+ sharding_ddl_util::stopMigrations(
+ opCtx, _collInfo->nsForTargeting, _doc.getCollUUID());
}
_saveShardingInfoOnCoordinatorIfNecessary(opCtx);
@@ -258,7 +211,7 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
auto* opCtx = opCtxHolder.get();
getForwardableOpMetadata().setOn(opCtx);
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_saveCollectionInfoOnCoordinatorIfNecessary(opCtx);
_saveShardingInfoOnCoordinatorIfNecessary(opCtx);
@@ -285,7 +238,7 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
auto* opCtx = opCtxHolder.get();
getForwardableOpMetadata().setOn(opCtx);
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_saveCollectionInfoOnCoordinatorIfNecessary(opCtx);
_saveShardingInfoOnCoordinatorIfNecessary(opCtx);
@@ -335,7 +288,8 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
CommandHelpers::appendSimpleCommandStatus(builder, ok, errmsg);
}
_result = builder.obj();
- sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
+ sharding_ddl_util::resumeMigrations(
+ opCtx, _collInfo->nsForTargeting, _doc.getCollUUID());
} else {
CollMod cmd(nss());
cmd.setCollModRequest(_request);
@@ -370,7 +324,8 @@ ExecutorFuture<void> CollModCoordinator::_runImpl(
auto* opCtx = opCtxHolder.get();
getForwardableOpMetadata().setOn(opCtx);
- sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
+ sharding_ddl_util::resumeMigrations(
+ opCtx, _collInfo->nsForTargeting, _doc.getCollUUID());
}
}
return status;
diff --git a/src/mongo/db/s/collmod_coordinator.h b/src/mongo/db/s/collmod_coordinator.h
index b85b6b16d5a..4b65502f78d 100644
--- a/src/mongo/db/s/collmod_coordinator.h
+++ b/src/mongo/db/s/collmod_coordinator.h
@@ -35,7 +35,9 @@
namespace mongo {
-class CollModCoordinator final : public ShardingDDLCoordinator {
+class CollModCoordinator final
+ : public RecoverableShardingDDLCoordinator<CollModCoordinatorDocument,
+ CollModCoordinatorPhaseEnum> {
public:
using StateDoc = CollModCoordinatorDocument;
using Phase = CollModCoordinatorPhaseEnum;
@@ -44,9 +46,7 @@ public:
void checkIfOptionsConflict(const BSONObj& doc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
/**
* Waits for the termination of the parent DDLCoordinator (so all the resources are liberated)
@@ -74,32 +74,13 @@ private:
std::vector<ShardId> shardsOwningChunks;
};
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return CollModCoordinatorPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
- void _enterPhase(Phase newPhase);
-
void _performNoopRetryableWriteOnParticipants(
OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor);
@@ -107,10 +88,6 @@ private:
void _saveShardingInfoOnCoordinatorIfNecessary(OperationContext* opCtx);
- BSONObj _initialState;
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("CollModCoordinator::_docMutex");
- CollModCoordinatorDocument _doc;
-
const mongo::CollModRequest _request;
boost::optional<BSONObj> _result;
diff --git a/src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp b/src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp
deleted file mode 100644
index 37005996f3a..00000000000
--- a/src/mongo/db/s/collmod_coordinator_pre60_compatible.cpp
+++ /dev/null
@@ -1,264 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-
-#include "mongo/db/s/collmod_coordinator_pre60_compatible.h"
-
-#include "mongo/db/catalog/collection_catalog.h"
-#include "mongo/db/catalog/database_holder.h"
-#include "mongo/db/coll_mod_gen.h"
-#include "mongo/db/db_raii.h"
-#include "mongo/db/ops/insert.h"
-#include "mongo/db/s/sharded_collmod_gen.h"
-#include "mongo/db/s/sharding_ddl_util.h"
-#include "mongo/db/s/sharding_state.h"
-#include "mongo/db/timeseries/catalog_helper.h"
-#include "mongo/db/timeseries/timeseries_collmod.h"
-#include "mongo/idl/idl_parser.h"
-#include "mongo/logv2/log.h"
-#include "mongo/s/async_requests_sender.h"
-#include "mongo/s/cluster_commands_helpers.h"
-#include "mongo/s/grid.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
-
-namespace mongo {
-
-namespace {
-
-bool isShardedColl(OperationContext* opCtx, const NamespaceString& nss) {
- try {
- auto coll = Grid::get(opCtx)->catalogClient()->getCollection(opCtx, nss);
- return true;
- } catch (const ExceptionFor<ErrorCodes::NamespaceNotFound>&) {
- // The collection is not sharded or doesn't exist.
- return false;
- }
-}
-
-bool hasTimeSeriesGranularityUpdate(const CollModRequest& request) {
- return request.getTimeseries() && request.getTimeseries()->getGranularity();
-}
-
-} // namespace
-
-CollModCoordinatorPre60Compatible::CollModCoordinatorPre60Compatible(
- ShardingDDLCoordinatorService* service, const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState) {
- _initialState = initialState.getOwned();
- _doc = CollModCoordinatorDocument::parse(IDLParserErrorContext("CollModCoordinatorDocument"),
- _initialState);
-}
-
-void CollModCoordinatorPre60Compatible::checkIfOptionsConflict(const BSONObj& doc) const {
- const auto otherDoc =
- CollModCoordinatorDocument::parse(IDLParserErrorContext("CollModCoordinatorDocument"), doc);
-
- const auto& selfReq = _doc.getCollModRequest().toBSON();
- const auto& otherReq = otherDoc.getCollModRequest().toBSON();
-
- uassert(ErrorCodes::ConflictingOperationInProgress,
- str::stream() << "Another collMod for namespace " << nss()
- << " is being executed with different parameters: " << selfReq,
- SimpleBSONObjComparator::kInstance.evaluate(selfReq == otherReq));
-}
-
-boost::optional<BSONObj> CollModCoordinatorPre60Compatible::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
-
- const auto currPhase = [&]() {
- stdx::lock_guard l{_docMutex};
- return _doc.getPhase();
- }();
-
- cmdBob.appendElements(_doc.getCollModRequest().toBSON());
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "CollModCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
- return bob.obj();
-}
-
-void CollModCoordinatorPre60Compatible::_enterPhase(Phase newPhase) {
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(6482601,
- 2,
- "CollMod coordinator phase transition",
- "namespace"_attr = nss(),
- "newPhase"_attr = CollModCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = CollModCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
-}
-
-void CollModCoordinatorPre60Compatible::_performNoopRetryableWriteOnParticipants(
- OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor) {
- auto shardsAndConfigsvr = [&] {
- const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
- auto participants = shardRegistry->getAllShardIds(opCtx);
- participants.emplace_back(shardRegistry->getConfigShard()->getId());
- return participants;
- }();
-
- _doc = _updateSession(opCtx, _doc);
- sharding_ddl_util::performNoopRetryableWriteOnShards(
- opCtx, shardsAndConfigsvr, getCurrentSession(_doc), executor);
-}
-
-ExecutorFuture<void> CollModCoordinatorPre60Compatible::_runImpl(
- std::shared_ptr<executor::ScopedTaskExecutor> executor,
- const CancellationToken& token) noexcept {
- return ExecutorFuture<void>(**executor)
- .then(_executePhase(
- Phase::kUpdateShards,
- [this, executor = executor, anchor = shared_from_this()] {
- auto opCtxHolder = cc().makeOperationContext();
- auto* opCtx = opCtxHolder.get();
- getForwardableOpMetadata().setOn(opCtx);
-
- const auto isTimeSeries = timeseries::getTimeseriesOptions(
- opCtx, nss(), !nss().isTimeseriesBucketsCollection());
- const auto collNss = isTimeSeries && !nss().isTimeseriesBucketsCollection()
- ? nss().makeTimeseriesBucketsNamespace()
- : nss();
- const auto isSharded = isShardedColl(opCtx, collNss);
-
- if (isSharded) {
- // Updating granularity on sharded time-series collections is not allowed.
- if (isTimeSeries) {
- uassert(
- ErrorCodes::NotImplemented,
- str::stream()
- << "Cannot update granularity of a sharded time-series collection.",
- !hasTimeSeriesGranularityUpdate(_doc.getCollModRequest()));
- }
- _doc.setCollUUID(
- sharding_ddl_util::getCollectionUUID(opCtx, nss(), true /* allowViews */));
-
- sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollUUID());
-
- if (!_firstExecution) {
- _performNoopRetryableWriteOnParticipants(opCtx, **executor);
- }
-
- _doc = _updateSession(opCtx, _doc);
- const OperationSessionInfo osi = getCurrentSession(_doc);
-
- const auto chunkManager = uassertStatusOK(
- Grid::get(opCtx)->catalogCache()->getCollectionRoutingInfoWithRefresh(
- opCtx, collNss));
- std::unique_ptr<CollatorInterface> collator;
- const auto expCtx =
- make_intrusive<ExpressionContext>(opCtx, std::move(collator), collNss);
- std::set<ShardId> participants;
- chunkManager.getShardIdsForQuery(
- expCtx, {} /* query */, {} /* collation */, &participants);
-
- ShardsvrCollModParticipant request(nss(), _doc.getCollModRequest());
- const auto cmdObj =
- CommandHelpers::appendMajorityWriteConcern(request.toBSON({}));
- const auto& responses = sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx,
- nss().db(),
- cmdObj.addFields(osi.toBSON()),
- {std::make_move_iterator(participants.begin()),
- std::make_move_iterator(participants.end())},
- **executor);
- BSONObjBuilder builder;
- std::string errmsg;
- auto ok = appendRawResponses(opCtx, &errmsg, &builder, responses).responseOK;
- if (!errmsg.empty()) {
- CommandHelpers::appendSimpleCommandStatus(builder, ok, errmsg);
- }
- _result = builder.obj();
- sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
- } else {
- CollMod cmd(nss());
- cmd.setCollModRequest(_doc.getCollModRequest());
- BSONObjBuilder collModResBuilder;
- uassertStatusOK(timeseries::processCollModCommandWithTimeSeriesTranslation(
- opCtx, nss(), cmd, true, &collModResBuilder));
- auto collModRes = collModResBuilder.obj();
-
- const auto dbInfo = uassertStatusOK(
- Grid::get(opCtx)->catalogCache()->getDatabase(opCtx, nss().db()));
- const auto shard = uassertStatusOK(
- Grid::get(opCtx)->shardRegistry()->getShard(opCtx, dbInfo->getPrimary()));
- BSONObjBuilder builder;
- builder.appendElements(collModRes);
- BSONObjBuilder subBuilder(builder.subobjStart("raw"));
- subBuilder.append(shard->getConnString().toString(), collModRes);
- subBuilder.doneFast();
- _result = builder.obj();
- }
- }))
- .onError([this, anchor = shared_from_this()](const Status& status) {
- if (!status.isA<ErrorCategory::NotPrimaryError>() &&
- !status.isA<ErrorCategory::ShutdownError>()) {
- LOGV2_ERROR(6482602,
- "Error running collMod",
- "namespace"_attr = nss(),
- "error"_attr = redact(status));
- // If we have the collection UUID set, this error happened in a sharded collection,
- // we should restore the migrations.
- if (_doc.getCollUUID()) {
- auto opCtxHolder = cc().makeOperationContext();
- auto* opCtx = opCtxHolder.get();
- getForwardableOpMetadata().setOn(opCtx);
-
- sharding_ddl_util::resumeMigrations(opCtx, nss(), _doc.getCollUUID());
- }
- }
- return status;
- });
-}
-
-} // namespace mongo
diff --git a/src/mongo/db/s/collmod_coordinator_pre60_compatible.h b/src/mongo/db/s/collmod_coordinator_pre60_compatible.h
deleted file mode 100644
index a8de0c67f53..00000000000
--- a/src/mongo/db/s/collmod_coordinator_pre60_compatible.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/**
- * Copyright (C) 2021-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#pragma once
-
-#include "mongo/db/s/collmod_coordinator_document_gen.h"
-#include "mongo/db/s/sharding_ddl_coordinator.h"
-#include "mongo/s/request_types/sharded_ddl_commands_gen.h"
-#include "mongo/stdx/mutex.h"
-
-namespace mongo {
-
-class CollModCoordinatorPre60Compatible final : public ShardingDDLCoordinator {
-public:
- using StateDoc = CollModCoordinatorDocument;
- using Phase = CollModCoordinatorPhaseEnum;
-
- CollModCoordinatorPre60Compatible(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState);
-
- void checkIfOptionsConflict(const BSONObj& doc) const override;
-
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
-
- /**
- * Waits for the termination of the parent DDLCoordinator (so all the resources are liberated)
- * and then return the result.
- */
- BSONObj getResult(OperationContext* opCtx) {
- getCompletionFuture().get(opCtx);
- invariant(_result.is_initialized());
- return *_result;
- }
-
-private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- stdx::lock_guard l{_docMutex};
- return _doc.getShardingDDLCoordinatorMetadata();
- }
-
- ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
- const CancellationToken& token) noexcept override;
-
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
- void _enterPhase(Phase newPhase);
-
- void _performNoopRetryableWriteOnParticipants(
- OperationContext* opCtx, const std::shared_ptr<executor::TaskExecutor>& executor);
-
- BSONObj _initialState;
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("CollModCoordinatorPre60Compatible::_docMutex");
- CollModCoordinatorDocument _doc;
-
- boost::optional<BSONObj> _result;
-};
-
-} // namespace mongo
diff --git a/src/mongo/db/s/commit_chunk_migration.idl b/src/mongo/db/s/commit_chunk_migration.idl
new file mode 100644
index 00000000000..6484623cd5c
--- /dev/null
+++ b/src/mongo/db/s/commit_chunk_migration.idl
@@ -0,0 +1,85 @@
+
+ # Copyright (C) 2019-present MongoDB, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the Server Side Public License, version 1,
+# as published by MongoDB, Inc.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# Server Side Public License for more details.
+#
+# You should have received a copy of the Server Side Public License
+# along with this program. If not, see
+# <http://www.mongodb.com/licensing/server-side-public-license>.
+#
+# As a special exception, the copyright holders give permission to link the
+# code of portions of this program with the OpenSSL library under certain
+# conditions as described in each individual source file and distribute
+# linked combinations including the program with the OpenSSL library. You
+# must comply with the Server Side Public License in all respects for
+# all of the code used other than as permitted herein. If you modify file(s)
+# with this exception, you may extend this exception to your version of the
+# file(s), but you are not obligated to do so. If you do not wish to do so,
+# delete this exception statement from your version. If you delete this
+# exception statement from all source files in the program, then also delete
+# it in the license file.
+#
+
+
+global:
+ cpp_namespace: "mongo"
+
+imports:
+ - "mongo/idl/basic_types.idl"
+ - "mongo/s/sharding_types.idl"
+ - "mongo/s/chunk_version.idl"
+
+structs:
+ ConfigSvrCommitChunkMigrationResponse:
+ description: "Response of the _configsvrCommitChunkMigration command."
+ strict: false
+ fields:
+ shardVersion:
+ type: ChunkVersion
+ description: "Collection version at the end of the migration."
+
+ MigratedChunkType:
+ description: "ChunkType describing a migrated chunk"
+ strict: false
+ fields:
+ lastmod : ChunkVersion
+ min: object
+ max: object
+
+commands:
+ _configsvrCommitChunkMigration:
+ command_name: _configsvrCommitChunkMigration
+ cpp_name: CommitChunkMigrationRequest
+ description: "internal _configsvrCommitChunkMigration command for config server"
+ namespace: type
+ api_version: ""
+ type: namespacestring
+ strict: false
+ reply_type: ConfigSvrCommitChunkMigrationResponse
+ fields:
+ fromShard:
+ type: shard_id
+ description: "from shard name"
+
+ toShard:
+ type: shard_id
+ description: "to shard name"
+
+ migratedChunk:
+ type: MigratedChunkType
+ description: "ChunkType describing a migrated chunk"
+
+ fromShardCollectionVersion:
+ type: ChunkVersion
+ description: "{ shardVersionField: <version> }"
+
+ validAfter:
+ type: timestamp
+ description: "The time after which this chunk is at the new shard" \ No newline at end of file
diff --git a/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp b/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp
index 04048f7946b..69c67d89dcb 100644
--- a/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp
+++ b/src/mongo/db/s/compact_structured_encryption_data_coordinator.cpp
@@ -187,94 +187,35 @@ void doDropOperation(const CompactStructuredEncryptionDataState& state) {
boost::optional<BSONObj> CompactStructuredEncryptionDataCoordinator::reportForCurrentOp(
MongoProcessInterface::CurrentOpConnectionsMode connMode,
MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder bob;
-
- CompactStructuredEncryptionDataPhaseEnum currPhase;
- std::string nss;
- std::string escNss;
- std::string eccNss;
- std::string ecoNss;
- std::string ecocNss;
- std::string ecocRenameUuid;
- std::string ecocUiid;
- std::string ecocRenameNss;
- {
- stdx::lock_guard l{_docMutex};
- currPhase = _doc.getPhase();
- nss = _doc.getId().getNss().ns();
- escNss = _doc.getEscNss().ns();
- eccNss = _doc.getEccNss().ns();
- ecoNss = _doc.getEcocNss().ns();
- ecocNss = _doc.getEcocNss().ns();
- ecocRenameUuid =
- _doc.getEcocRenameUuid() ? _doc.getEcocRenameUuid().value().toString() : "none";
- ecocUiid = _doc.getEcocUuid() ? _doc.getEcocUuid().value().toString() : "none";
- ecocRenameNss = _doc.getEcocRenameNss().ns();
- }
-
- bob.append("type", "op");
- bob.append("desc", "CompactStructuredEncryptionDataCoordinator");
- bob.append("op", "command");
- bob.append("nss", nss);
- bob.append("escNss", escNss);
- bob.append("eccNss", eccNss);
- bob.append("ecocNss", ecocNss);
- bob.append("ecocUuid", ecocUiid);
- bob.append("ecocRenameNss", ecocRenameNss);
- bob.append("ecocRenameUuid", ecocRenameUuid);
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
+ auto bob = basicReportBuilder();
+
+ stdx::lock_guard lg{_docMutex};
+ bob.append("escNss", _doc.getEscNss().ns());
+ bob.append("eccNss", _doc.getEccNss().ns());
+ bob.append("ecocNss", _doc.getEcocNss().ns());
+ bob.append("ecocUuid", _doc.getEcocUuid() ? _doc.getEcocUuid().value().toString() : "none");
+ bob.append("ecocRenameNss", _doc.getEcocRenameNss().ns());
+ bob.append("ecocRenameUuid",
+ _doc.getEcocRenameUuid() ? _doc.getEcocRenameUuid().value().toString() : "none");
return bob.obj();
}
-void CompactStructuredEncryptionDataCoordinator::_enterPhase(Phase newPhase) {
- StateDoc doc(_doc);
- doc.setPhase(newPhase);
-
- LOGV2_DEBUG(6350490,
- 2,
- "Transitioning phase for CompactStructuredEncryptionDataCoordinator",
- "nss"_attr = _doc.getId().getNss().ns(),
- "escNss"_attr = _doc.getEscNss().ns(),
- "eccNss"_attr = _doc.getEccNss().ns(),
- "ecocNss"_attr = _doc.getEcocNss().ns(),
- "ecocUuid"_attr = _doc.getEcocUuid(),
- "ecocRenameNss"_attr = _doc.getEcocRenameNss().ns(),
- "ecocRenameUuid"_attr = _doc.getEcocRenameUuid(),
- "skipCompact"_attr = _doc.getSkipCompact(),
- "compactionTokens"_attr = _doc.getCompactionTokens(),
- "oldPhase"_attr = CompactStructuredEncryptionDataPhase_serializer(_doc.getPhase()),
- "newPhase"_attr = CompactStructuredEncryptionDataPhase_serializer(newPhase));
-
- if (_doc.getPhase() == Phase::kUnset) {
- doc = _insertStateDocument(std::move(doc));
- } else {
- auto opCtx = cc().makeOperationContext();
- doc = _updateStateDocument(opCtx.get(), std::move(doc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(doc);
- }
-}
-
ExecutorFuture<void> CompactStructuredEncryptionDataCoordinator::_runImpl(
std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept {
return ExecutorFuture<void>(**executor)
.then(_executePhase(Phase::kRenameEcocForCompact,
- [this, anchor = shared_from_this()](const auto& state) {
- doRenameOperation(state, &_skipCompact, &_ecocRenameUuid);
+ [this, anchor = shared_from_this()]() {
+ doRenameOperation(_doc, &_skipCompact, &_ecocRenameUuid);
stdx::unique_lock ul{_docMutex};
_doc.setSkipCompact(_skipCompact);
_doc.setEcocRenameUuid(_ecocRenameUuid);
}))
- .then(_executePhase(Phase::kCompactStructuredEncryptionData,
- [this, anchor = shared_from_this()](const auto& state) {
- _response = doCompactOperation(state);
- }))
- .then(_executePhase(Phase::kDropTempCollection, doDropOperation));
+ .then(_executePhase(
+ Phase::kCompactStructuredEncryptionData,
+ [this, anchor = shared_from_this()]() { _response = doCompactOperation(_doc); }))
+ .then(_executePhase(Phase::kDropTempCollection,
+ [this, anchor = shared_from_this()] { doDropOperation(_doc); }));
}
} // namespace mongo
diff --git a/src/mongo/db/s/compact_structured_encryption_data_coordinator.h b/src/mongo/db/s/compact_structured_encryption_data_coordinator.h
index 4b8ffd33441..b030e19910a 100644
--- a/src/mongo/db/s/compact_structured_encryption_data_coordinator.h
+++ b/src/mongo/db/s/compact_structured_encryption_data_coordinator.h
@@ -40,7 +40,9 @@
namespace mongo {
-class CompactStructuredEncryptionDataCoordinator final : public ShardingDDLCoordinator {
+class CompactStructuredEncryptionDataCoordinator final
+ : public RecoverableShardingDDLCoordinator<CompactStructuredEncryptionDataState,
+ CompactStructuredEncryptionDataPhaseEnum> {
public:
static constexpr auto kStateContext = "CompactStructuredEncryptionDataState"_sd;
using StateDoc = CompactStructuredEncryptionDataState;
@@ -48,7 +50,8 @@ public:
CompactStructuredEncryptionDataCoordinator(ShardingDDLCoordinatorService* service,
const BSONObj& doc)
- : ShardingDDLCoordinator(service, doc), _doc(StateDoc::parse({kStateContext}, doc)) {}
+ : RecoverableShardingDDLCoordinator(
+ service, "CompactStructuredEncryptionDataCoordinator", doc) {}
boost::optional<BSONObj> reportForCurrentOp(
MongoProcessInterface::CurrentOpConnectionsMode connMode,
@@ -63,36 +66,14 @@ public:
void checkIfOptionsConflict(const BSONObj& doc) const final {}
private:
- void _enterPhase(Phase newPhase);
-
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
- if (currPhase > newPhase) {
- return;
- }
- if (currPhase < newPhase) {
- _enterPhase(newPhase);
- }
-
- return func(_doc);
- };
- }
-
-private:
- ShardingDDLCoordinatorMetadata const& metadata() const final {
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return CompactStructuredEncryptionDataPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept final;
private:
- mutable Mutex _docMutex =
- MONGO_MAKE_LATCH("CompactStructuredEncryptionDataCoordinator::_docMutex");
- StateDoc _doc;
-
boost::optional<CompactStructuredEncryptionDataCommandReply> _response;
bool _skipCompact{false};
boost::optional<UUID> _ecocRenameUuid;
diff --git a/src/mongo/db/s/config/config_server_test_fixture.cpp b/src/mongo/db/s/config/config_server_test_fixture.cpp
index 198371cf17a..d697528a86d 100644
--- a/src/mongo/db/s/config/config_server_test_fixture.cpp
+++ b/src/mongo/db/s/config/config_server_test_fixture.cpp
@@ -70,7 +70,6 @@
#include "mongo/s/config_server_catalog_cache_loader.h"
#include "mongo/s/database_version.h"
#include "mongo/s/query/cluster_cursor_manager.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
#include "mongo/s/shard_id.h"
#include "mongo/s/write_ops/batched_command_response.h"
#include "mongo/util/clock_source_mock.h"
@@ -452,30 +451,6 @@ std::vector<KeysCollectionDocument> ConfigServerTestFixture::getKeys(OperationCo
return keys;
}
-void ConfigServerTestFixture::expectSetShardVersion(
- const HostAndPort& expectedHost,
- const ShardType& expectedShard,
- const NamespaceString& expectedNs,
- boost::optional<ChunkVersion> expectedChunkVersion) {
- onCommand([&](const RemoteCommandRequest& request) {
- ASSERT_EQ(expectedHost, request.target);
- ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(),
- rpc::TrackingMetadata::removeTrackingData(request.metadata));
-
- SetShardVersionRequest ssv =
- assertGet(SetShardVersionRequest::parseFromBSON(request.cmdObj));
-
- ASSERT(ssv.isAuthoritative());
- ASSERT_EQ(expectedNs.toString(), ssv.getNS().ns());
-
- if (expectedChunkVersion) {
- ASSERT_EQ(*expectedChunkVersion, ssv.getNSVersion());
- }
-
- return BSON("ok" << true);
- });
-}
-
void ConfigServerTestFixture::setupOpObservers() {
auto opObserverRegistry =
checked_cast<OpObserverRegistry*>(getServiceContext()->getOpObserver());
diff --git a/src/mongo/db/s/config/config_server_test_fixture.h b/src/mongo/db/s/config/config_server_test_fixture.h
index 05ed2b55a67..bd2a41b41a0 100644
--- a/src/mongo/db/s/config/config_server_test_fixture.h
+++ b/src/mongo/db/s/config/config_server_test_fixture.h
@@ -166,17 +166,6 @@ protected:
StatusWith<std::vector<BSONObj>> getIndexes(OperationContext* opCtx, const NamespaceString& ns);
/**
- * Expects a setShardVersion command to be executed on the specified shard.
- *
- * The expectedChunkVersion is optional, because in some cases it may not be possible to know
- * the OID of a ChunkVersion generated by some internal code. (See SERVER-29451).
- */
- void expectSetShardVersion(const HostAndPort& expectedHost,
- const ShardType& expectedShard,
- const NamespaceString& expectedNs,
- boost::optional<ChunkVersion> expectedChunkVersion);
-
- /**
* Returns the stored raw pointer to the addShard TaskExecutor's NetworkInterface.
*/
executor::NetworkInterfaceMock* networkForAddShard() const;
diff --git a/src/mongo/db/s/config/configsvr_collmod_command.cpp b/src/mongo/db/s/config/configsvr_collmod_command.cpp
index e4bda1b9995..6d224756002 100644
--- a/src/mongo/db/s/config/configsvr_collmod_command.cpp
+++ b/src/mongo/db/s/config/configsvr_collmod_command.cpp
@@ -66,6 +66,10 @@ public:
return Command::AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
index 9dcff9c96d0..a50f499662f 100644
--- a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
+++ b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
@@ -31,12 +31,14 @@
#include "mongo/platform/basic.h"
#include "mongo/base/status_with.h"
+#include "mongo/bson/util/bson_extract.h"
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/commands.h"
#include "mongo/db/concurrency/d_concurrency.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/s/chunk_move_write_concern_options.h"
+#include "mongo/db/s/commit_chunk_migration_gen.h"
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/rpc/get_status_from_command_result.h"
@@ -44,7 +46,6 @@
#include "mongo/s/chunk_version.h"
#include "mongo/s/client/shard_registry.h"
#include "mongo/s/grid.h"
-#include "mongo/s/request_types/commit_chunk_migration_request_type.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
@@ -79,9 +80,23 @@ namespace {
* }
*
*/
-class ConfigSvrCommitChunkMigrationCommand : public BasicCommand {
+
+
+ChunkType toChunkType(const MigratedChunkType& migratedChunk) {
+
+ ChunkType chunk;
+ chunk.setMin(migratedChunk.getMin());
+ chunk.setMax(migratedChunk.getMax());
+ chunk.setVersion(migratedChunk.getLastmod());
+ return chunk;
+}
+
+
+class ConfigSvrCommitChunkMigrationCommand
+ : public TypedCommand<ConfigSvrCommitChunkMigrationCommand> {
public:
- ConfigSvrCommitChunkMigrationCommand() : BasicCommand("_configsvrCommitChunkMigration") {}
+ using Request = CommitChunkMigrationRequest;
+ using Response = ConfigSvrCommitChunkMigrationResponse;
bool skipApiVersionCheck() const override {
// Internal command (server to server).
@@ -100,51 +115,57 @@ public:
return true;
}
- virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
- return true;
- }
+ class Invocation : public InvocationBase {
+ public:
+ using InvocationBase::InvocationBase;
+
+ ConfigSvrCommitChunkMigrationResponse typedRun(OperationContext* opCtx) {
+
+ uassert(ErrorCodes::IllegalOperation,
+ "_configsvrClearJumboFlag can only be run on config servers",
+ serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
+
+ // Set the operation context read concern level to local for reads into the config
+ // database.
+ repl::ReadConcernArgs::get(opCtx) =
+ repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
+
+ const NamespaceString nss = ns();
+ auto migratedChunk = toChunkType(request().getMigratedChunk());
- Status checkAuthForCommand(Client* client,
- const std::string& dbname,
- const BSONObj& cmdObj) const override {
- if (!AuthorizationSession::get(client)->isAuthorizedForActionsOnResource(
- ResourcePattern::forClusterResource(), ActionType::internal)) {
- return Status(ErrorCodes::Unauthorized, "Unauthorized");
+ StatusWith<BSONObj> chunkVersionResponse =
+ ShardingCatalogManager::get(opCtx)->commitChunkMigration(
+ opCtx,
+ nss,
+ migratedChunk,
+ request().getFromShardCollectionVersion().epoch(),
+ request().getFromShardCollectionVersion().getTimestamp(),
+ request().getFromShard(),
+ request().getToShard(),
+ request().getValidAfter());
+
+ auto chunkVersionObj = uassertStatusOK(chunkVersionResponse);
+
+ return Response{ChunkVersion::parse(chunkVersionObj[ChunkVersion::kShardVersionField])};
}
- return Status::OK();
- }
- std::string parseNs(const std::string& dbname, const BSONObj& cmdObj) const override {
- return CommandHelpers::parseNsFullyQualified(cmdObj);
- }
+ private:
+ bool supportsWriteConcern() const override {
+ return true;
+ }
- bool run(OperationContext* opCtx,
- const std::string& dbName,
- const BSONObj& cmdObj,
- BSONObjBuilder& result) override {
-
- // Set the operation context read concern level to local for reads into the config database.
- repl::ReadConcernArgs::get(opCtx) =
- repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
-
- const NamespaceString nss = NamespaceString(parseNs(dbName, cmdObj));
-
- auto commitRequest =
- uassertStatusOK(CommitChunkMigrationRequest::createFromCommand(nss, cmdObj));
-
- StatusWith<BSONObj> response = ShardingCatalogManager::get(opCtx)->commitChunkMigration(
- opCtx,
- nss,
- commitRequest.getMigratedChunk(),
- commitRequest.getCollectionEpoch(),
- commitRequest.getCollectionTimestamp(),
- commitRequest.getFromShard(),
- commitRequest.getToShard(),
- commitRequest.getValidAfter());
- uassertStatusOK(response.getStatus());
- result.appendElements(response.getValue());
- return true;
- }
+ NamespaceString ns() const override {
+ return request().getCommandParameter();
+ }
+
+ void doCheckAuthorization(OperationContext* opCtx) const override {
+ uassert(ErrorCodes::Unauthorized,
+ "Unauthorized",
+ AuthorizationSession::get(opCtx->getClient())
+ ->isAuthorizedForActionsOnResource(ResourcePattern::forClusterResource(),
+ ActionType::internal));
+ }
+ };
} configsvrCommitChunkMigrationCommand;
diff --git a/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp b/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp
index 136af191f6d..8769cbe9b53 100644
--- a/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp
+++ b/src/mongo/db/s/config/configsvr_configure_collection_balancing.cpp
@@ -66,11 +66,6 @@ public:
str::stream() << Request::kCommandName << " can only be run on config servers",
serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
- uassert(8423309,
- str::stream() << Request::kCommandName << " command not supported",
- mongo::feature_flags::gPerCollBalancingSettings.isEnabled(
- serverGlobalParams.featureCompatibility));
-
const NamespaceString& nss = ns();
uassert(ErrorCodes::InvalidNamespace,
diff --git a/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp b/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp
index ea2823dcdf0..db155fa6bea 100644
--- a/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp
+++ b/src/mongo/db/s/config/configsvr_merge_chunks_command.cpp
@@ -96,8 +96,8 @@ public:
request().getChunkRange(),
request().getShard(),
request().getValidAfter()));
- return ConfigSvrMergeResponse{ChunkVersion::fromBSONPositionalOrNewerFormat(
- shardAndCollVers[ChunkVersion::kShardVersionField])};
+ return ConfigSvrMergeResponse{
+ ChunkVersion::parse(shardAndCollVers[ChunkVersion::kShardVersionField])};
}
private:
diff --git a/src/mongo/db/s/config/configsvr_move_chunk_command.cpp b/src/mongo/db/s/config/configsvr_move_chunk_command.cpp
index 41d1679b4c9..cfa02c94711 100644
--- a/src/mongo/db/s/config/configsvr_move_chunk_command.cpp
+++ b/src/mongo/db/s/config/configsvr_move_chunk_command.cpp
@@ -96,20 +96,10 @@ public:
repl::ReadConcernArgs::get(opCtx) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
- auto request = uassertStatusOK(
- BalanceChunkRequest::parseFromConfigCommand(cmdObj, false /* requireUUID */));
+ auto request = uassertStatusOK(BalanceChunkRequest::parseFromConfigCommand(cmdObj));
const auto& nss = request.getNss();
- // In case of mixed binaries including v5.0, the collection UUID field may not be attached
- // to the chunk.
- if (!request.getChunk().hasCollectionUUID_UNSAFE()) {
- // TODO (SERVER-60792): Remove the following logic after v6.0 branches out.
- const auto& collection = Grid::get(opCtx)->catalogClient()->getCollection(
- opCtx, nss, repl::ReadConcernLevel::kLocalReadConcern);
- request.setCollectionUUID(collection.getUuid()); // Set collection UUID on chunk member
- }
-
if (request.hasToShardId()) {
uassertStatusOK(Balancer::get(opCtx)->moveSingleChunk(opCtx,
nss,
diff --git a/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp b/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp
index c6ceb8a4ca2..da6ec5ed2b9 100644
--- a/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp
+++ b/src/mongo/db/s/config/configsvr_remove_chunks_command.cpp
@@ -149,6 +149,10 @@ public:
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
return AllowedOnSecondary::kNever;
}
+
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
} configsvrRemoveChunksCmd;
} // namespace
diff --git a/src/mongo/db/s/config/configsvr_remove_tags_command.cpp b/src/mongo/db/s/config/configsvr_remove_tags_command.cpp
index f880d9be4bf..7333b0036dc 100644
--- a/src/mongo/db/s/config/configsvr_remove_tags_command.cpp
+++ b/src/mongo/db/s/config/configsvr_remove_tags_command.cpp
@@ -144,6 +144,10 @@ public:
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
return AllowedOnSecondary::kNever;
}
+
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
} configsvrRemoveTagsCmd;
} // namespace
diff --git a/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp b/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp
index 438a7d3227a..df59c5135ea 100644
--- a/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp
+++ b/src/mongo/db/s/config/configsvr_rename_collection_metadata_command.cpp
@@ -68,6 +68,10 @@ public:
return AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp b/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp
index 7f284e2c642..1a094c7db5f 100644
--- a/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp
+++ b/src/mongo/db/s/config/configsvr_reshard_collection_cmd.cpp
@@ -63,8 +63,9 @@ getExistingInstanceToJoin(OperationContext* opCtx,
const NamespaceString& nss,
const BSONObj& newShardKey) {
auto instances =
- getReshardingStateMachines<ReshardingCoordinatorService,
- ReshardingCoordinatorService::ReshardingCoordinator>(opCtx, nss);
+ resharding::getReshardingStateMachines<ReshardingCoordinatorService,
+ ReshardingCoordinatorService::ReshardingCoordinator>(
+ opCtx, nss);
for (const auto& instance : instances) {
if (SimpleBSONObjComparator::kInstance.evaluate(
instance->getMetadata().getReshardingKey().toBSON() == newShardKey)) {
@@ -139,7 +140,7 @@ public:
"Must specify only one of _presetReshardedChunks or numInitialChunks",
!(bool(request().getNumInitialChunks())));
- validateReshardedChunks(
+ resharding::validateReshardedChunks(
*presetChunks, opCtx, ShardKeyPattern(request().getKey()).getKeyPattern());
}
@@ -183,11 +184,12 @@ public:
return boost::none;
}
- auto tempReshardingNss = constructTemporaryReshardingNss(nss.db(), cm.getUUID());
+ auto tempReshardingNss =
+ resharding::constructTemporaryReshardingNss(nss.db(), cm.getUUID());
if (auto zones = request().getZones()) {
- checkForOverlappingZones(*zones);
+ resharding::checkForOverlappingZones(*zones);
}
auto coordinatorDoc =
diff --git a/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp b/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp
index 31a20120586..3b2a6c883df 100644
--- a/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp
+++ b/src/mongo/db/s/config/configsvr_set_cluster_parameter_command.cpp
@@ -62,12 +62,6 @@ public:
serverGlobalParams.clusterRole == ClusterRole::ConfigServer);
const auto coordinatorCompletionFuture = [&]() -> SharedSemiFuture<void> {
- FixedFCVRegion fcvRegion(opCtx);
- uassert(ErrorCodes::IllegalOperation,
- "featureFlagClusterWideConfig not enabled",
- gFeatureFlagClusterWideConfig.isEnabled(
- serverGlobalParams.featureCompatibility));
-
// Validate parameter before creating coordinator.
{
BSONObj cmdParamObj = request().getCommandParameter();
diff --git a/src/mongo/db/s/config/initial_split_policy.cpp b/src/mongo/db/s/config/initial_split_policy.cpp
index 1be2dd486fb..0b2ab1b0474 100644
--- a/src/mongo/db/s/config/initial_split_policy.cpp
+++ b/src/mongo/db/s/config/initial_split_policy.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/config/initial_split_policy.h"
#include "mongo/client/read_preference.h"
@@ -50,7 +47,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace {
@@ -73,17 +69,11 @@ void appendChunk(const SplitPolicyParams& params,
const BSONObj& min,
const BSONObj& max,
ChunkVersion* version,
- const Timestamp& creationTimestamp,
const ShardId& shardId,
std::vector<ChunkType>* chunks) {
- chunks->emplace_back(
- params.collectionUUID,
- ChunkRange(min, max),
- ChunkVersion(
- version->majorVersion(), version->minorVersion(), version->epoch(), creationTimestamp),
- shardId);
+ chunks->emplace_back(params.collectionUUID, ChunkRange(min, max), *version, shardId);
auto& chunk = chunks->back();
- chunk.setHistory({ChunkHistory(creationTimestamp, shardId)});
+ chunk.setHistory({ChunkHistory(version->getTimestamp(), shardId)});
version->incMinor();
}
@@ -238,7 +228,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::generateShardColle
finalSplitPoints.push_back(splitPoint);
}
- ChunkVersion version(1, 0, OID::gen(), validAfter);
+ ChunkVersion version({OID::gen(), validAfter}, {1, 0});
const auto& keyPattern(shardKeyPattern.getKeyPattern());
std::vector<ChunkType> chunks;
@@ -254,7 +244,7 @@ InitialSplitPolicy::ShardCollectionConfig InitialSplitPolicy::generateShardColle
? params.primaryShardId
: allShardIds[(i / numContiguousChunksPerShard) % allShardIds.size()];
- appendChunk(params, min, max, &version, validAfter, shardId, &chunks);
+ appendChunk(params, min, max, &version, shardId, &chunks);
}
return {std::move(chunks)};
@@ -327,14 +317,13 @@ InitialSplitPolicy::ShardCollectionConfig SingleChunkOnPrimarySplitPolicy::creat
const auto currentTime = VectorClock::get(opCtx)->getTime();
const auto validAfter = currentTime.clusterTime().asTimestamp();
- ChunkVersion version(1, 0, OID::gen(), validAfter);
+ ChunkVersion version({OID::gen(), validAfter}, {1, 0});
const auto& keyPattern = shardKeyPattern.getKeyPattern();
std::vector<ChunkType> chunks;
appendChunk(params,
keyPattern.globalMin(),
keyPattern.globalMax(),
&version,
- validAfter,
params.primaryShardId,
&chunks);
@@ -421,19 +410,14 @@ InitialSplitPolicy::ShardCollectionConfig AbstractTagsBasedSplitPolicy::createFi
return shardIds[indx++ % shardIds.size()];
};
- ChunkVersion version(1, 0, OID::gen(), validAfter);
+ ChunkVersion version({OID::gen(), validAfter}, {1, 0});
auto lastChunkMax = keyPattern.globalMin();
std::vector<ChunkType> chunks;
for (const auto& tag : _tags) {
// Create a chunk for the hole [lastChunkMax, tag.getMinKey)
if (tag.getMinKey().woCompare(lastChunkMax) > 0) {
- appendChunk(params,
- lastChunkMax,
- tag.getMinKey(),
- &version,
- validAfter,
- nextShardIdForHole(),
- &chunks);
+ appendChunk(
+ params, lastChunkMax, tag.getMinKey(), &version, nextShardIdForHole(), &chunks);
}
// Create chunk for the actual tag - [tag.getMinKey, tag.getMaxKey)
const auto it = tagToShards.find(tag.getTag());
@@ -470,7 +454,7 @@ InitialSplitPolicy::ShardCollectionConfig AbstractTagsBasedSplitPolicy::createFi
const BSONObj max = (splitPointIdx == splitInfo.splitPoints.size())
? tag.getMaxKey()
: splitInfo.splitPoints[splitPointIdx];
- appendChunk(params, min, max, &version, validAfter, targetShard, &chunks);
+ appendChunk(params, min, max, &version, targetShard, &chunks);
}
}
lastChunkMax = tag.getMaxKey();
@@ -478,13 +462,8 @@ InitialSplitPolicy::ShardCollectionConfig AbstractTagsBasedSplitPolicy::createFi
// Create a chunk for the hole [lastChunkMax, MaxKey]
if (lastChunkMax.woCompare(keyPattern.globalMax()) < 0) {
- appendChunk(params,
- lastChunkMax,
- keyPattern.globalMax(),
- &version,
- validAfter,
- nextShardIdForHole(),
- &chunks);
+ appendChunk(
+ params, lastChunkMax, keyPattern.globalMax(), &version, nextShardIdForHole(), &chunks);
}
return {std::move(chunks)};
@@ -765,13 +744,13 @@ InitialSplitPolicy::ShardCollectionConfig ReshardingSplitPolicy::createFirstChun
const auto currentTime = VectorClock::get(opCtx)->getTime();
const auto validAfter = currentTime.clusterTime().asTimestamp();
- ChunkVersion version(1, 0, OID::gen(), validAfter);
+ ChunkVersion version({OID::gen(), validAfter}, {1, 0});
splitPoints.insert(keyPattern.globalMax());
for (const auto& splitPoint : splitPoints) {
auto bestShard = selectBestShard(
chunkDistribution, zoneInfo, zoneToShardMap, {lastChunkMax, splitPoint});
- appendChunk(params, lastChunkMax, splitPoint, &version, validAfter, bestShard, &chunks);
+ appendChunk(params, lastChunkMax, splitPoint, &version, bestShard, &chunks);
lastChunkMax = splitPoint;
chunkDistribution[bestShard]++;
diff --git a/src/mongo/db/s/config/initial_split_policy_test.cpp b/src/mongo/db/s/config/initial_split_policy_test.cpp
index 2eea0b6905f..9fc9a5576d0 100644
--- a/src/mongo/db/s/config/initial_split_policy_test.cpp
+++ b/src/mongo/db/s/config/initial_split_policy_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/bson/json.h"
#include "mongo/db/s/config/config_server_test_fixture.h"
#include "mongo/db/s/config/initial_split_policy.h"
@@ -40,7 +37,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -208,7 +204,7 @@ public:
std::vector<ChunkType> chunks;
for (unsigned long i = 0; i < chunkRanges.size(); ++i) {
- ChunkVersion version(1, 0, OID::gen(), Timestamp(1, 1));
+ ChunkVersion version({OID::gen(), Timestamp(1, 1)}, {1, 0});
ChunkType chunk(_uuid, chunkRanges[i], version, shardIds[i]);
chunk.setHistory({ChunkHistory(timeStamp, shardIds[i])});
chunks.push_back(chunk);
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
index 0a45a9d3a6d..bfef69bcb9f 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
@@ -175,8 +175,6 @@ protected:
}
void expectClusterParametersRequest(const HostAndPort& target) {
- if (!gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility))
- return;
auto clusterParameterDocs = uassertStatusOK(getConfigShard()->exhaustiveFindOnConfig(
operationContext(),
ReadPreferenceSetting(ReadPreference::PrimaryOnly),
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp
index a4abd0ff45b..fbb502f933b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_bump_collection_version_and_change_metadata_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/concurrency/exception_util.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/logical_session_cache_noop.h"
@@ -43,7 +41,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -121,17 +118,17 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest,
const auto collUUID = UUID::gen();
const auto shard0Chunk0 = generateChunkType(collUUID,
- ChunkVersion(10, 1, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
kShard0.getName(),
BSON("a" << 1),
BSON("a" << 10));
const auto shard0Chunk1 = generateChunkType(collUUID,
- ChunkVersion(11, 2, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {11, 2}),
kShard0.getName(),
BSON("a" << 11),
BSON("a" << 20));
const auto shard1Chunk0 = generateChunkType(collUUID,
- ChunkVersion(8, 1, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {8, 1}),
kShard1.getName(),
BSON("a" << 21),
BSON("a" << 100));
@@ -157,7 +154,7 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest, NoChunk
const auto collUUID = UUID::gen();
const auto shard0Chunk0 = generateChunkType(collUUID,
- ChunkVersion(10, 1, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
kShard0.getName(),
BSON("a" << 1),
BSON("a" << 10));
@@ -182,12 +179,12 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest,
const auto collUUID = UUID::gen();
const auto shard0Chunk0 = generateChunkType(collUUID,
- ChunkVersion(10, 1, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
kShard0.getName(),
BSON("a" << 1),
BSON("a" << 10));
const auto shard1Chunk0 = generateChunkType(collUUID,
- ChunkVersion(11, 2, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {11, 2}),
kShard1.getName(),
BSON("a" << 11),
BSON("a" << 20));
@@ -244,12 +241,12 @@ TEST_F(ShardingCatalogManagerBumpCollectionVersionAndChangeMetadataTest,
const auto collUUID = UUID::gen();
const auto shard0Chunk0 = generateChunkType(collUUID,
- ChunkVersion(10, 1, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {10, 1}),
kShard0.getName(),
BSON("a" << 1),
BSON("a" << 10));
const auto shard1Chunk0 = generateChunkType(collUUID,
- ChunkVersion(11, 2, collEpoch, collTimestamp),
+ ChunkVersion({collEpoch, collTimestamp}, {11, 2}),
kShard1.getName(),
BSON("a" << 11),
BSON("a" << 20));
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
index 0cf64cc5288..f461f1ae0a5 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/base/status_with.h"
@@ -67,7 +64,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace {
@@ -87,7 +83,7 @@ void appendShortVersion(BufBuilder* out, const ChunkType& chunk) {
bb.append(ChunkType::min(), chunk.getMin());
bb.append(ChunkType::max(), chunk.getMax());
if (chunk.isVersionSet()) {
- chunk.getVersion().appendLegacyWithField(&bb, ChunkType::lastmod());
+ chunk.getVersion().serializeToBSON(ChunkType::lastmod(), &bb);
}
bb.done();
}
@@ -268,7 +264,8 @@ ChunkVersion getShardVersion(OperationContext* opCtx,
if (swDonorShardVersion.getStatus().code() == 50577) {
// The query to find 'nss' chunks belonging to the donor shard didn't return any chunks,
// meaning the last chunk for fromShard was donated. Gracefully handle the error.
- return ChunkVersion(0, 0, collectionVersion.epoch(), collectionVersion.getTimestamp());
+ return ChunkVersion({collectionVersion.epoch(), collectionVersion.getTimestamp()},
+ {0, 0});
} else {
// Bubble up any other error
uassertStatusOK(swDonorShardVersion);
@@ -391,10 +388,9 @@ void ShardingCatalogManager::bumpMajorVersionOneChunkPerShard(
TxnNumber txnNumber,
const std::vector<ShardId>& shardIds) {
auto curCollectionVersion = uassertStatusOK(getCollectionVersion(opCtx, nss));
- ChunkVersion targetChunkVersion(curCollectionVersion.majorVersion() + 1,
- 0,
- curCollectionVersion.epoch(),
- curCollectionVersion.getTimestamp());
+ ChunkVersion targetChunkVersion(
+ {curCollectionVersion.epoch(), curCollectionVersion.getTimestamp()},
+ {curCollectionVersion.majorVersion() + 1, 0});
auto const configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
auto findCollResponse = uassertStatusOK(
@@ -684,7 +680,7 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkSplit(
BSONObjBuilder b(logDetail.subobjStart("before"));
b.append(ChunkType::min(), range.getMin());
b.append(ChunkType::max(), range.getMax());
- collVersion.appendLegacyWithField(&b, ChunkType::lastmod());
+ collVersion.serializeToBSON(ChunkType::lastmod(), &b);
}
if (splitChunkResult.newChunks->size() == 2) {
@@ -960,8 +956,8 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunksMerge(
b.append(chunkToMerge.toConfigBSON());
}
}
- initialVersion.appendLegacyWithField(&logDetail, "prevShardVersion");
- mergeVersion.appendLegacyWithField(&logDetail, "mergedVersion");
+ initialVersion.serializeToBSON("prevShardVersion", &logDetail);
+ mergeVersion.serializeToBSON("mergedVersion", &logDetail);
logDetail.append("owningShard", shardId);
ShardingLogging::get(opCtx)->logChange(
@@ -1127,10 +1123,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
newMigratedChunk->setMin(migratedChunk.getMin());
newMigratedChunk->setMax(migratedChunk.getMax());
newMigratedChunk->setShard(toShard);
- newMigratedChunk->setVersion(ChunkVersion(currentCollectionVersion.majorVersion() + 1,
- minVersionIncrement++,
- currentCollectionVersion.epoch(),
- currentCollectionVersion.getTimestamp()));
+ newMigratedChunk->setVersion(
+ ChunkVersion({currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()},
+ {currentCollectionVersion.majorVersion() + 1, minVersionIncrement++}));
// Copy the complete history.
auto newHistory = currentChunk.getHistory();
@@ -1186,10 +1181,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
ChunkType leftSplitChunk = currentChunk;
leftSplitChunk.setName(OID::gen());
leftSplitChunk.setMax(movedChunkMin);
- leftSplitChunk.setVersion(ChunkVersion(movedChunkVersion.majorVersion(),
- minVersionIncrement++,
- movedChunkVersion.epoch(),
- movedChunkVersion.getTimestamp()));
+ leftSplitChunk.setVersion(
+ ChunkVersion({movedChunkVersion.epoch(), movedChunkVersion.getTimestamp()},
+ {movedChunkVersion.majorVersion(), minVersionIncrement++}));
newSplitChunks->emplace_back(std::move(leftSplitChunk));
}
@@ -1199,10 +1193,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
ChunkType rightSplitChunk = currentChunk;
rightSplitChunk.setName(OID::gen());
rightSplitChunk.setMin(movedChunkMax);
- rightSplitChunk.setVersion(ChunkVersion(movedChunkVersion.majorVersion(),
- minVersionIncrement++,
- movedChunkVersion.epoch(),
- movedChunkVersion.getTimestamp()));
+ rightSplitChunk.setVersion(
+ ChunkVersion({movedChunkVersion.epoch(), movedChunkVersion.getTimestamp()},
+ {movedChunkVersion.majorVersion(), minVersionIncrement++}));
newSplitChunks->emplace_back(std::move(rightSplitChunk));
}
}
@@ -1218,10 +1211,9 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
newControlChunk = std::make_shared<ChunkType>(origControlChunk);
// Setting control chunk's minor version to 1 on the donor shard.
- newControlChunk->setVersion(ChunkVersion(currentCollectionVersion.majorVersion() + 1,
- minVersionIncrement++,
- currentCollectionVersion.epoch(),
- currentCollectionVersion.getTimestamp()));
+ newControlChunk->setVersion(ChunkVersion(
+ {currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()},
+ {currentCollectionVersion.majorVersion() + 1, minVersionIncrement++}));
}
_commitChunkMigrationInTransaction(
@@ -1232,7 +1224,7 @@ StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
// We migrated the last chunk from the donor shard.
newMigratedChunk->getVersion().serializeToBSON(kCollectionVersionField, &response);
const ChunkVersion donorShardVersion(
- 0, 0, currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp());
+ {currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()}, {0, 0});
donorShardVersion.serializeToBSON(ChunkVersion::kShardVersionField, &response);
} else {
newControlChunk->getVersion().serializeToBSON(kCollectionVersionField, &response);
@@ -1349,8 +1341,8 @@ void ShardingCatalogManager::upgradeChunksHistory(OperationContext* opCtx,
}();
// Bump the major version in order to be guaranteed to trigger refresh on every shard
- ChunkVersion newCollectionVersion(
- collVersion.majorVersion() + 1, 0, collVersion.epoch(), collVersion.getTimestamp());
+ ChunkVersion newCollectionVersion({collVersion.epoch(), collVersion.getTimestamp()},
+ {collVersion.majorVersion() + 1, 0});
std::set<ShardId> changedShardIds;
for (const auto& chunk : allChunksVector) {
auto upgradeChunk = uassertStatusOK(
@@ -1491,10 +1483,9 @@ void ShardingCatalogManager::clearJumboFlag(OperationContext* opCtx,
<< chunk.toString() << ").",
currentCollectionVersion.epoch() == collectionEpoch);
- ChunkVersion newVersion(currentCollectionVersion.majorVersion() + 1,
- 0,
- currentCollectionVersion.epoch(),
- currentCollectionVersion.getTimestamp());
+ ChunkVersion newVersion(
+ {currentCollectionVersion.epoch(), currentCollectionVersion.getTimestamp()},
+ {currentCollectionVersion.majorVersion() + 1, 0});
BSONObj chunkQuery(BSON(ChunkType::min(chunk.getMin())
<< ChunkType::max(chunk.getMax()) << ChunkType::collectionUUID
@@ -1653,8 +1644,8 @@ void ShardingCatalogManager::ensureChunkVersionIsGreaterThan(OperationContext* o
// Generate a new version for the chunk by incrementing the collectionVersion's major
// version.
auto newChunk = matchingChunk;
- newChunk.setVersion(ChunkVersion(
- highestChunk.getVersion().majorVersion() + 1, 0, coll.getEpoch(), coll.getTimestamp()));
+ newChunk.setVersion(ChunkVersion({coll.getEpoch(), coll.getTimestamp()},
+ {highestChunk.getVersion().majorVersion() + 1, 0}));
// Update the chunk, if it still exists, to have the bumped version.
earlyReturnBeforeDoingWriteGuard.dismiss();
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp
index 762961eaac3..9f883997a3d 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_clear_jumbo_flag_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/client/read_preference.h"
@@ -72,7 +70,7 @@ protected:
ChunkType chunk;
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- chunk.setVersion({12, 7, epoch, timestamp});
+ chunk.setVersion(ChunkVersion({epoch, timestamp}, {12, 7}));
chunk.setShard(_shardName);
chunk.setMin(jumboChunk().getMin());
chunk.setMax(jumboChunk().getMax());
@@ -81,7 +79,7 @@ protected:
ChunkType otherChunk;
otherChunk.setName(OID::gen());
otherChunk.setCollectionUUID(collUuid);
- otherChunk.setVersion({14, 7, epoch, timestamp});
+ otherChunk.setVersion(ChunkVersion({epoch, timestamp}, {14, 7}));
otherChunk.setShard(_shardName);
otherChunk.setMin(nonJumboChunk().getMin());
otherChunk.setMax(nonJumboChunk().getMax());
@@ -107,7 +105,7 @@ TEST_F(ClearJumboFlagTest, ClearJumboShouldBumpVersion) {
operationContext(), collUuid, jumboChunk().getMin(), collEpoch, collTimestamp));
ASSERT_FALSE(chunkDoc.getJumbo());
auto chunkVersion = chunkDoc.getVersion();
- ASSERT_EQ(ChunkVersion(15, 0, collEpoch, collTimestamp), chunkVersion);
+ ASSERT_EQ(ChunkVersion({collEpoch, collTimestamp}, {15, 0}), chunkVersion);
};
test(_nss2, Timestamp(42));
@@ -125,7 +123,7 @@ TEST_F(ClearJumboFlagTest, ClearJumboShouldNotBumpVersionIfChunkNotJumbo) {
auto chunkDoc = uassertStatusOK(getChunkDoc(
operationContext(), collUuid, nonJumboChunk().getMin(), collEpoch, collTimestamp));
ASSERT_FALSE(chunkDoc.getJumbo());
- ASSERT_EQ(ChunkVersion(14, 7, collEpoch, collTimestamp), chunkDoc.getVersion());
+ ASSERT_EQ(ChunkVersion({collEpoch, collTimestamp}, {14, 7}), chunkDoc.getVersion());
};
test(_nss2, Timestamp(42));
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
index 235954c5d5d..fc8a55a9635 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
#include "mongo/client/read_preference.h"
@@ -49,7 +47,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -104,7 +101,7 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectly) {
ChunkType migratedChunk, controlChunk;
{
- ChunkVersion origVersion(12, 7, collEpoch, collTimestamp);
+ ChunkVersion origVersion({collEpoch, collTimestamp}, {12, 7});
migratedChunk.setName(OID::gen());
migratedChunk.setCollectionUUID(collUUID);
@@ -140,15 +137,14 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectly) {
validAfter));
// Verify the versions returned match expected values.
- auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
- ASSERT_EQ(ChunkVersion(migratedChunk.getVersion().majorVersion() + 1,
- 1,
- migratedChunk.getVersion().epoch(),
- migratedChunk.getVersion().getTimestamp()),
+ auto mver = ChunkVersion::parse(versions["shardVersion"]);
+ ASSERT_EQ(ChunkVersion(
+ {migratedChunk.getVersion().epoch(), migratedChunk.getVersion().getTimestamp()},
+ {migratedChunk.getVersion().majorVersion() + 1, 1}),
mver);
// Verify that a collection version is returned
- auto cver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
+ auto cver = ChunkVersion::parse(versions["collectionVersion"]);
ASSERT_TRUE(mver.isOlderOrEqualThan(cver));
// Verify the chunks ended up in the right shards.
@@ -188,8 +184,8 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectlyWithoutControlChunk) {
setupShards({shard0, shard1});
- int origMajorVersion = 15;
- auto const origVersion = ChunkVersion(origMajorVersion, 4, collEpoch, collTimestamp);
+ uint32_t origMajorVersion = 15;
+ auto const origVersion = ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion, 4});
ChunkType chunk0;
chunk0.setName(OID::gen());
@@ -222,12 +218,12 @@ TEST_F(CommitChunkMigrate, ChunksUpdatedCorrectlyWithoutControlChunk) {
// Verify the version returned matches expected value.
BSONObj versions = resultBSON.getValue();
- auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
- ASSERT_EQ(ChunkVersion(0, 0, origVersion.epoch(), origVersion.getTimestamp()), mver);
+ auto mver = ChunkVersion::parse(versions["shardVersion"]);
+ ASSERT_EQ(ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()}, {0, 0}), mver);
// Verify that a collection version is returned
- auto cver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
- ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 0, collEpoch, collTimestamp), cver);
+ auto cver = ChunkVersion::parse(versions["collectionVersion"]);
+ ASSERT_EQ(ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion + 1, 0}), cver);
// Verify the chunk ended up in the right shard.
auto chunkDoc0 =
@@ -253,8 +249,8 @@ TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandNoCtlTrimHistory) {
setupShards({shard0, shard1});
- int origMajorVersion = 15;
- auto const origVersion = ChunkVersion(origMajorVersion, 4, collEpoch, collTimestamp);
+ uint32_t origMajorVersion = 15;
+ auto const origVersion = ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion, 4});
ChunkType chunk0;
chunk0.setName(OID::gen());
@@ -288,8 +284,8 @@ TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandNoCtlTrimHistory) {
// Verify the version returned matches expected value.
BSONObj versions = resultBSON.getValue();
- auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
- ASSERT_EQ(ChunkVersion(0, 0, origVersion.epoch(), origVersion.getTimestamp()), mver);
+ auto mver = ChunkVersion::parse(versions["shardVersion"]);
+ ASSERT_EQ(ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()}, {0, 0}), mver);
// Verify the chunk ended up in the right shard.
auto chunkDoc0 =
@@ -314,9 +310,8 @@ TEST_F(CommitChunkMigrate, RejectOutOfOrderHistory) {
setupShards({shard0, shard1});
- int origMajorVersion = 15;
- auto const origVersion =
- ChunkVersion(origMajorVersion, 4, OID::gen(), Timestamp(42) /* timestamp */);
+ uint32_t origMajorVersion = 15;
+ auto const origVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 4});
ChunkType chunk0;
chunk0.setName(OID::gen());
@@ -362,9 +357,8 @@ TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch0) {
setupShards({shard0, shard1});
- int origMajorVersion = 12;
- auto const origVersion =
- ChunkVersion(origMajorVersion, 7, OID::gen(), Timestamp(42) /* timestamp */);
+ uint32_t origMajorVersion = 12;
+ auto const origVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 7});
ChunkType chunk0;
chunk0.setName(OID::gen());
@@ -418,11 +412,9 @@ TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch1) {
setupShards({shard0, shard1});
- int origMajorVersion = 12;
- auto const origVersion =
- ChunkVersion(origMajorVersion, 7, OID::gen(), Timestamp(42) /* timestamp */);
- auto const otherVersion =
- ChunkVersion(origMajorVersion, 7, OID::gen(), Timestamp(42) /* timestamp */);
+ uint32_t origMajorVersion = 12;
+ auto const origVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 7});
+ auto const otherVersion = ChunkVersion({OID::gen(), Timestamp(42)}, {origMajorVersion, 7});
ChunkType chunk0;
chunk0.setName(OID::gen());
@@ -479,8 +471,8 @@ TEST_F(CommitChunkMigrate, CommitWithLastChunkOnShardShouldNotAffectOtherChunks)
setupShards({shard0, shard1});
- int origMajorVersion = 12;
- auto const origVersion = ChunkVersion(origMajorVersion, 7, collEpoch, collTimestamp);
+ uint32_t origMajorVersion = 12;
+ auto const origVersion = ChunkVersion({collEpoch, collTimestamp}, {origMajorVersion, 7});
ChunkType chunk0;
chunk0.setName(OID::gen());
@@ -525,8 +517,8 @@ TEST_F(CommitChunkMigrate, CommitWithLastChunkOnShardShouldNotAffectOtherChunks)
// Verify the versions returned match expected values.
BSONObj versions = resultBSON.getValue();
- auto mver = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
- ASSERT_EQ(ChunkVersion(0, 0, origVersion.epoch(), origVersion.getTimestamp()), mver);
+ auto mver = ChunkVersion::parse(versions["shardVersion"]);
+ ASSERT_EQ(ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()}, {0, 0}), mver);
// Verify the chunks ended up in the right shards.
auto chunkDoc0 =
@@ -560,7 +552,7 @@ TEST_F(CommitChunkMigrate, RejectMissingChunkVersion) {
setupShards({shard0, shard1});
- ChunkVersion origVersion(12, 7, OID::gen(), Timestamp(42) /* timestamp */);
+ ChunkVersion origVersion({OID::gen(), Timestamp(42)}, {12, 7});
// Create migrate chunk with no chunk version set.
ChunkType migratedChunk;
@@ -610,7 +602,7 @@ TEST_F(CommitChunkMigrate, RejectOlderChunkVersion) {
setupShards({shard0, shard1});
auto epoch = OID::gen();
- ChunkVersion origVersion(12, 7, epoch, Timestamp(42) /* timestamp */);
+ ChunkVersion origVersion({epoch, Timestamp(42)}, {12, 7});
ChunkType migratedChunk;
migratedChunk.setName(OID::gen());
@@ -621,7 +613,7 @@ TEST_F(CommitChunkMigrate, RejectOlderChunkVersion) {
migratedChunk.setMin(BSON("a" << 1));
migratedChunk.setMax(BSON("a" << 10));
- ChunkVersion currentChunkVersion(14, 7, epoch, Timestamp(42) /* timestamp */);
+ ChunkVersion currentChunkVersion({epoch, Timestamp(42)}, {14, 7});
ChunkType currentChunk;
currentChunk.setName(OID::gen());
@@ -662,7 +654,7 @@ TEST_F(CommitChunkMigrate, RejectMismatchedEpoch) {
setupShards({shard0, shard1});
- ChunkVersion origVersion(12, 7, OID::gen(), Timestamp(42) /* timestamp */);
+ ChunkVersion origVersion({OID::gen(), Timestamp(42)}, {12, 7});
ChunkType migratedChunk;
migratedChunk.setName(OID::gen());
@@ -673,7 +665,7 @@ TEST_F(CommitChunkMigrate, RejectMismatchedEpoch) {
migratedChunk.setMin(BSON("a" << 1));
migratedChunk.setMax(BSON("a" << 10));
- ChunkVersion currentChunkVersion(12, 7, OID::gen(), Timestamp(42) /* timestamp */);
+ ChunkVersion currentChunkVersion({OID::gen(), Timestamp(42)}, {12, 7});
ChunkType currentChunk;
currentChunk.setName(OID::gen());
@@ -730,7 +722,7 @@ public:
void setupCollectionWithNChunks(int numberOfChunks) {
invariant(numberOfChunks > 0);
- int currentMajorVersion = 1;
+ uint32_t currentMajorVersion = 1;
int historyTimestampSecond = 100;
std::vector<ChunkHistory> history;
@@ -745,7 +737,7 @@ public:
const auto max = chunksMin.at(i + 1); // Max key of the chunk being created
const auto shardId = _shardIds.at(i % 2); // Shard owning the chunk
ChunkVersion version =
- ChunkVersion(currentMajorVersion++, 0, _collEpoch, _collTimestamp);
+ ChunkVersion({_collEpoch, _collTimestamp}, {currentMajorVersion++, 0});
history.insert(history.begin(),
{ChunkHistory(Timestamp(historyTimestampSecond++, 0), shardId)});
ChunkType chunk = createChunk(_collUUID, min, max, version, shardId, history);
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp
index 20e8b2ecc6a..8921d0c2e8b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_ensure_chunk_version_is_greater_than_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/config/config_server_test_fixture.h"
#include "mongo/db/s/config/sharding_catalog_manager.h"
@@ -95,7 +93,7 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest, IfNoCollectionFoundReturnsSuccess) {
const auto requestedChunkType =
generateChunkType(_nss,
_collUuid,
- ChunkVersion(10, 2, OID::gen(), Timestamp(1, 1)),
+ ChunkVersion({OID::gen(), Timestamp(1, 1)}, {10, 2}),
ShardId(_shardName),
BSON("a" << 1),
BSON("a" << 10));
@@ -112,12 +110,13 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest, IfNoChunkWithMatchingMinKeyFoundRetu
const auto collEpoch = OID::gen();
const auto collTimestamp = Timestamp(42);
- const auto requestedChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(10, 2, collEpoch, collTimestamp),
- ShardId(_shardName),
- BSON("a" << 1),
- BSON("a" << 10));
+ const auto requestedChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+ ShardId(_shardName),
+ BSON("a" << 1),
+ BSON("a" << 10));
ChunkType existingChunkType = requestedChunkType;
// Min key is different.
@@ -140,12 +139,13 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest, IfNoChunkWithMatchingMaxKeyFoundRetu
const auto collEpoch = OID::gen();
const auto collTimestamp = Timestamp(42);
- const auto requestedChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(10, 2, collEpoch, collTimestamp),
- ShardId(_shardName),
- BSON("a" << 1),
- BSON("a" << 10));
+ const auto requestedChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+ ShardId(_shardName),
+ BSON("a" << 1),
+ BSON("a" << 10));
ChunkType existingChunkType = requestedChunkType;
// Max key is different.
@@ -169,20 +169,22 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
const auto collEpoch = OID::gen();
const auto collTimestamp = Timestamp(42);
- const auto requestedChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(10, 2, collEpoch, collTimestamp),
- ShardId(_shardName),
- BSON("a" << 1),
- BSON("a" << 10));
+ const auto requestedChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+ ShardId(_shardName),
+ BSON("a" << 1),
+ BSON("a" << 10));
const auto existingChunkType = requestedChunkType;
- const auto highestChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(20, 3, collEpoch, collTimestamp),
- ShardId("shard0001"),
- BSON("a" << 11),
- BSON("a" << 20));
+ const auto highestChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {20, 3}),
+ ShardId("shard0001"),
+ BSON("a" << 11),
+ BSON("a" << 20));
setupCollection(_nss, _keyPattern, {existingChunkType, highestChunkType});
ShardingCatalogManager::get(operationContext())
@@ -195,8 +197,8 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
assertChunkVersionWasBumpedTo(
existingChunkType,
getChunkDoc(operationContext(), existingChunkType.getMin(), collEpoch, collTimestamp),
- ChunkVersion(
- highestChunkType.getVersion().majorVersion() + 1, 0, collEpoch, collTimestamp));
+ ChunkVersion({collEpoch, collTimestamp},
+ {highestChunkType.getVersion().majorVersion() + 1, 0}));
}
TEST_F(EnsureChunkVersionIsGreaterThanTest,
@@ -204,20 +206,22 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
const auto collEpoch = OID::gen();
const auto collTimestamp = Timestamp(42);
- const auto requestedChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(10, 2, collEpoch, collTimestamp),
- ShardId(_shardName),
- BSON("a" << 1),
- BSON("a" << 10));
+ const auto requestedChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+ ShardId(_shardName),
+ BSON("a" << 1),
+ BSON("a" << 10));
const auto existingChunkType = requestedChunkType;
- const auto highestChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(20, 3, collEpoch, collTimestamp),
- ShardId("shard0001"),
- BSON("a" << 11),
- BSON("a" << 20));
+ const auto highestChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {20, 3}),
+ ShardId("shard0001"),
+ BSON("a" << 11),
+ BSON("a" << 20));
setupCollection(_nss, _keyPattern, {existingChunkType, highestChunkType});
ShardingCatalogManager::get(operationContext())
@@ -230,8 +234,8 @@ TEST_F(EnsureChunkVersionIsGreaterThanTest,
assertChunkVersionWasBumpedTo(
existingChunkType,
getChunkDoc(operationContext(), existingChunkType.getMin(), collEpoch, collTimestamp),
- ChunkVersion(
- highestChunkType.getVersion().majorVersion() + 1, 0, collEpoch, collTimestamp));
+ ChunkVersion({collEpoch, collTimestamp},
+ {highestChunkType.getVersion().majorVersion() + 1, 0}));
}
TEST_F(
@@ -240,15 +244,16 @@ TEST_F(
const auto collEpoch = OID::gen();
const auto collTimestamp = Timestamp(42);
- const auto requestedChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(10, 2, collEpoch, collTimestamp),
- ShardId(_shardName),
- BSON("a" << 1),
- BSON("a" << 10));
+ const auto requestedChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+ ShardId(_shardName),
+ BSON("a" << 1),
+ BSON("a" << 10));
ChunkType existingChunkType = requestedChunkType;
- existingChunkType.setVersion(ChunkVersion(11, 1, collEpoch, collTimestamp));
+ existingChunkType.setVersion(ChunkVersion({collEpoch, collTimestamp}, {11, 1}));
setupCollection(_nss, _keyPattern, {existingChunkType});
ShardingCatalogManager::get(operationContext())
@@ -269,15 +274,16 @@ TEST_F(
const auto collEpoch = OID::gen();
const auto collTimestamp = Timestamp(42);
- const auto requestedChunkType = generateChunkType(_nss,
- _collUuid,
- ChunkVersion(10, 2, collEpoch, collTimestamp),
- ShardId(_shardName),
- BSON("a" << 1),
- BSON("a" << 10));
+ const auto requestedChunkType =
+ generateChunkType(_nss,
+ _collUuid,
+ ChunkVersion({collEpoch, collTimestamp}, {10, 2}),
+ ShardId(_shardName),
+ BSON("a" << 1),
+ BSON("a" << 10));
ChunkType existingChunkType = requestedChunkType;
- existingChunkType.setVersion(ChunkVersion(11, 1, collEpoch, collTimestamp));
+ existingChunkType.setVersion(ChunkVersion({collEpoch, collTimestamp}, {11, 1}));
setupCollection(_nss, _keyPattern, {existingChunkType});
ShardingCatalogManager::get(operationContext())
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
index 3b5951cd82e..9d7e68c9a93 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/client/read_preference.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/logical_session_cache_noop.h"
@@ -85,7 +83,7 @@ TEST_F(MergeChunkTest, MergeExistingChunksCorrectlyShouldSucceed) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(_shardId);
@@ -119,17 +117,16 @@ TEST_F(MergeChunkTest, MergeExistingChunksCorrectlyShouldSucceed) {
_shardId,
validAfter));
- auto collVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
- auto shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
+ auto collVersion = ChunkVersion::parse(versions["collectionVersion"]);
+ auto shardVersion = ChunkVersion::parse(versions["shardVersion"]);
ASSERT_TRUE(origVersion.isOlderThan(shardVersion));
ASSERT_EQ(collVersion, shardVersion);
// Check for increment on mergedChunk's minor version
- auto expectedShardVersion = ChunkVersion(origVersion.majorVersion(),
- origVersion.minorVersion() + 1,
- origVersion.epoch(),
- origVersion.getTimestamp());
+ auto expectedShardVersion =
+ ChunkVersion({origVersion.epoch(), origVersion.getTimestamp()},
+ {origVersion.majorVersion(), origVersion.minorVersion() + 1});
ASSERT_EQ(expectedShardVersion, shardVersion);
@@ -170,7 +167,7 @@ TEST_F(MergeChunkTest, MergeSeveralChunksCorrectlyShouldSucceed) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(_shardId);
@@ -251,7 +248,7 @@ TEST_F(MergeChunkTest, NewMergeShouldClaimHighestVersion) {
otherChunk.setName(OID::gen());
otherChunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 2, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 2});
chunk.setVersion(origVersion);
chunk.setShard(_shardId);
@@ -273,7 +270,7 @@ TEST_F(MergeChunkTest, NewMergeShouldClaimHighestVersion) {
ChunkRange rangeToBeMerged(chunk.getMin(), chunk2.getMax());
// Set up other chunk with competing version
- auto competingVersion = ChunkVersion(2, 1, collEpoch, collTimestamp);
+ auto competingVersion = ChunkVersion({collEpoch, collTimestamp}, {2, 1});
otherChunk.setVersion(competingVersion);
otherChunk.setShard(_shardId);
otherChunk.setMin(BSON("a" << 10));
@@ -334,7 +331,7 @@ TEST_F(MergeChunkTest, MergeLeavesOtherChunksAlone) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 2, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 2});
chunk.setVersion(origVersion);
chunk.setShard(shardId);
@@ -415,7 +412,7 @@ TEST_F(MergeChunkTest, NonExistingNamespace) {
ChunkType chunk;
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
// Construct chunk to be merged
@@ -457,7 +454,7 @@ TEST_F(MergeChunkTest, NonMatchingUUIDsOfChunkAndRequestErrors) {
ChunkType chunk;
chunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(_shardId);
@@ -503,7 +500,7 @@ TEST_F(MergeChunkTest, MergeAlreadyHappenedSucceeds) {
ChunkRange rangeToBeMerged(chunkMin, chunkMax);
// Store a chunk that matches the range that will be requested
- auto mergedVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto mergedVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
mergedVersion.incMinor();
ChunkType mergedChunk;
mergedChunk.setVersion(mergedVersion);
@@ -559,7 +556,7 @@ TEST_F(MergeChunkTest, MergingChunksWithDollarPrefixShouldSucceed) {
chunk1.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk1.setVersion(origVersion);
chunk1.setShard(_shardId);
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
index 32544cacc7b..b54338947b1 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include <string>
#include <vector>
@@ -58,7 +55,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace {
@@ -227,15 +223,15 @@ TEST_F(RemoveShardTest, RemoveShardStillDrainingChunksRemaining) {
const auto timestamp = Timestamp(1);
ChunkType chunk1(uuid,
ChunkRange(BSON("_id" << 0), BSON("_id" << 20)),
- ChunkVersion(1, 1, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 1}),
shard1.getName());
ChunkType chunk2(uuid,
ChunkRange(BSON("_id" << 21), BSON("_id" << 50)),
- ChunkVersion(1, 2, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 2}),
shard1.getName());
ChunkType chunk3(uuid,
ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)),
- ChunkVersion(1, 3, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 3}),
shard1.getName());
chunk3.setJumbo(true);
@@ -314,15 +310,15 @@ TEST_F(RemoveShardTest, RemoveShardCompletion) {
Timestamp timestamp = Timestamp(1);
ChunkType chunk1(uuid,
ChunkRange(BSON("_id" << 0), BSON("_id" << 20)),
- ChunkVersion(1, 1, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 1}),
shard1.getName());
ChunkType chunk2(uuid,
ChunkRange(BSON("_id" << 21), BSON("_id" << 50)),
- ChunkVersion(1, 2, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 2}),
shard1.getName());
ChunkType chunk3(uuid,
ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)),
- ChunkVersion(1, 3, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 3}),
shard1.getName());
std::vector<ChunkType> chunks{chunk1, chunk2, chunk3};
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
index 7de1d4c3efe..947ec9fb3c2 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -1201,9 +1201,6 @@ void ShardingCatalogManager::_pushClusterParametersToNewShard(
void ShardingCatalogManager::_standardizeClusterParameters(OperationContext* opCtx,
RemoteCommandTargeter* targeter) {
- if (!gFeatureFlagClusterWideConfig.isEnabled(serverGlobalParams.featureCompatibility))
- return;
-
auto clusterParameterDocs =
uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
opCtx,
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
index 1cc5f1c677d..9b9e48cfe0b 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
@@ -80,7 +80,7 @@ TEST_F(SplitChunkTest, SplitExistingChunkCorrectlyShouldSucceed) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -105,16 +105,16 @@ TEST_F(SplitChunkTest, SplitExistingChunkCorrectlyShouldSucceed) {
splitPoints,
"shard0000",
false /* fromChunkSplitter*/));
- auto collVersion =
- ChunkVersion::fromBSONPositionalOrNewerFormat(versions["collectionVersion"]);
- auto shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(versions["shardVersion"]);
+ auto collVersion = ChunkVersion::parse(versions["collectionVersion"]);
+ auto shardVersion = ChunkVersion::parse(versions["shardVersion"]);
ASSERT_TRUE(origVersion.isOlderThan(shardVersion));
ASSERT_EQ(collVersion, shardVersion);
// Check for increment on mergedChunk's minor version
- auto expectedShardVersion = ChunkVersion(
- origVersion.majorVersion(), origVersion.minorVersion() + 2, collEpoch, collTimestamp);
+ auto expectedShardVersion =
+ ChunkVersion({collEpoch, collTimestamp},
+ {origVersion.majorVersion(), origVersion.minorVersion() + 2});
ASSERT_EQ(expectedShardVersion, shardVersion);
ASSERT_EQ(shardVersion, collVersion);
@@ -164,7 +164,7 @@ TEST_F(SplitChunkTest, MultipleSplitsOnExistingChunkShouldSucceed) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -256,7 +256,7 @@ TEST_F(SplitChunkTest, NewSplitShouldClaimHighestVersion) {
chunk2.setCollectionUUID(collUuid);
// set up first chunk
- auto origVersion = ChunkVersion(1, 2, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 2});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -270,7 +270,7 @@ TEST_F(SplitChunkTest, NewSplitShouldClaimHighestVersion) {
splitPoints.push_back(chunkSplitPoint);
// set up second chunk (chunk2)
- auto competingVersion = ChunkVersion(2, 1, collEpoch, collTimestamp);
+ auto competingVersion = ChunkVersion({collEpoch, collTimestamp}, {2, 1});
chunk2.setVersion(competingVersion);
chunk2.setShard(ShardId(_shardName));
chunk2.setMin(BSON("a" << 10));
@@ -324,7 +324,7 @@ TEST_F(SplitChunkTest, PreConditionFailErrors) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -362,7 +362,7 @@ TEST_F(SplitChunkTest, NonExisingNamespaceErrors) {
ChunkType chunk;
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -398,7 +398,7 @@ TEST_F(SplitChunkTest, NonMatchingEpochsOfChunkAndRequestErrors) {
ChunkType chunk;
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -434,7 +434,7 @@ TEST_F(SplitChunkTest, SplitPointsOutOfOrderShouldFail) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -470,7 +470,7 @@ TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMinShouldFail) {
ChunkType chunk;
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -507,7 +507,7 @@ TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMaxShouldFail) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -543,7 +543,7 @@ TEST_F(SplitChunkTest, SplitPointsWithDollarPrefixShouldFail) {
ChunkType chunk;
chunk.setCollectionUUID(UUID::gen());
- auto origVersion = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto origVersion = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(origVersion);
chunk.setShard(ShardId(_shardName));
@@ -588,7 +588,7 @@ TEST_F(SplitChunkTest, CantCommitSplitFromChunkSplitterDuringDefragmentation) {
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUuid);
- auto version = ChunkVersion(1, 0, collEpoch, collTimestamp);
+ auto version = ChunkVersion({collEpoch, collTimestamp}, {1, 0});
chunk.setVersion(version);
chunk.setShard(ShardId(_shardName));
diff --git a/src/mongo/db/s/create_collection_coordinator.cpp b/src/mongo/db/s/create_collection_coordinator.cpp
index a5b499cfe81..ccbad667d35 100644
--- a/src/mongo/db/s/create_collection_coordinator.cpp
+++ b/src/mongo/db/s/create_collection_coordinator.cpp
@@ -359,39 +359,8 @@ void broadcastDropCollection(OperationContext* opCtx,
} // namespace
-CreateCollectionCoordinator::CreateCollectionCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(CreateCollectionCoordinatorDocument::parse(
- IDLParserErrorContext("CreateCollectionCoordinatorDocument"), initialState)),
- _request(_doc.getCreateCollectionRequest()),
- _critSecReason(BSON("command"
- << "createCollection"
- << "ns" << nss().toString())) {}
-
-boost::optional<BSONObj> CreateCollectionCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
- cmdBob.appendElements(_request.toBSON());
-
- const auto currPhase = [&]() {
- stdx::lock_guard l{_docMutex};
- return _doc.getPhase();
- }();
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "CreateCollectionCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
- return bob.obj();
+void CreateCollectionCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ cmdInfoBuilder->appendElements(_request.toBSON());
}
void CreateCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
@@ -435,9 +404,9 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
// Additionally we want to perform a majority write on the CSRS to ensure that
// all the subsequent reads will see all the writes performed from a previous
// execution of this coordinator.
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
- opCtx, getCurrentSession(_doc), **executor);
+ opCtx, getCurrentSession(), **executor);
}
// Log the start of the event only if we're not recovering.
@@ -461,7 +430,7 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
->releaseRecoverableCriticalSection(
opCtx,
nss(),
- _getCriticalSectionReason(),
+ _critSecReason,
ShardingCatalogClient::kMajorityWriteConcern);
_result = createCollectionResponseOpt;
@@ -474,10 +443,7 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
// presence of a stepdown.
RecoverableCriticalSectionService::get(opCtx)
->acquireRecoverableCriticalSectionBlockWrites(
- opCtx,
- nss(),
- _getCriticalSectionReason(),
- ShardingCatalogClient::kMajorityWriteConcern);
+ opCtx, nss(), _critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
if (!_firstExecution) {
auto uuid = sharding_ddl_util::getCollectionUUID(opCtx, nss());
@@ -489,12 +455,11 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
"Removing partial changes from previous run",
"namespace"_attr = nss());
- _doc = _updateSession(opCtx, _doc);
- cleanupPartialChunksFromPreviousAttempt(
- opCtx, *uuid, getCurrentSession(_doc));
+ _updateSession(opCtx);
+ cleanupPartialChunksFromPreviousAttempt(opCtx, *uuid, getCurrentSession());
- _doc = _updateSession(opCtx, _doc);
- broadcastDropCollection(opCtx, nss(), **executor, getCurrentSession(_doc));
+ _updateSession(opCtx);
+ broadcastDropCollection(opCtx, nss(), **executor, getCurrentSession());
}
}
@@ -517,28 +482,18 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
->promoteRecoverableCriticalSectionToBlockAlsoReads(
opCtx,
nss(),
- _getCriticalSectionReason(),
+ _critSecReason,
ShardingCatalogClient::kMajorityWriteConcern);
- _doc = _updateSession(opCtx, _doc);
- try {
- _createCollectionOnNonPrimaryShards(opCtx, getCurrentSession(_doc));
- } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
- // Older 5.0 binaries don't support running the
- // _shardsvrCreateCollectionParticipant command as a retryable write yet. In
- // that case, retry without attaching session info.
- _createCollectionOnNonPrimaryShards(opCtx, boost::none);
- }
+ _updateSession(opCtx);
+ _createCollectionOnNonPrimaryShards(opCtx, getCurrentSession());
_commit(opCtx);
}
// End of the critical section, from now on, read and writes are permitted.
RecoverableCriticalSectionService::get(opCtx)->releaseRecoverableCriticalSection(
- opCtx,
- nss(),
- _getCriticalSectionReason(),
- ShardingCatalogClient::kMajorityWriteConcern);
+ opCtx, nss(), _critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
// Slow path. Create chunks (which might incur in an index scan) and commit must be
// done outside of the critical section to prevent writes from stalling in unsharded
@@ -566,10 +521,7 @@ ExecutorFuture<void> CreateCollectionCoordinator::_runImpl(
auto* opCtx = opCtxHolder.get();
RecoverableCriticalSectionService::get(opCtx)->releaseRecoverableCriticalSection(
- opCtx,
- nss(),
- _getCriticalSectionReason(),
- ShardingCatalogClient::kMajorityWriteConcern);
+ opCtx, nss(), _critSecReason, ShardingCatalogClient::kMajorityWriteConcern);
}
return status;
});
@@ -751,7 +703,7 @@ void CreateCollectionCoordinator::_createChunks(OperationContext* opCtx) {
}
void CreateCollectionCoordinator::_createCollectionOnNonPrimaryShards(
- OperationContext* opCtx, const boost::optional<OperationSessionInfo>& osi) {
+ OperationContext* opCtx, const OperationSessionInfo& osi) {
LOGV2_DEBUG(5277905,
2,
"Create collection _createCollectionOnNonPrimaryShards",
@@ -778,10 +730,9 @@ void CreateCollectionCoordinator::_createCollectionOnNonPrimaryShards(
createCollectionParticipantRequest.setIdIndex(idIndex);
createCollectionParticipantRequest.setIndexes(indexes);
- requests.emplace_back(
- chunkShardId,
- CommandHelpers::appendMajorityWriteConcern(
- createCollectionParticipantRequest.toBSON(osi ? osi->toBSON() : BSONObj())));
+ requests.emplace_back(chunkShardId,
+ CommandHelpers::appendMajorityWriteConcern(
+ createCollectionParticipantRequest.toBSON(osi.toBSON())));
initializedShards.emplace(chunkShardId);
}
@@ -817,8 +768,8 @@ void CreateCollectionCoordinator::_commit(OperationContext* opCtx) {
LOGV2_DEBUG(5277906, 2, "Create collection _commit", "namespace"_attr = nss());
// Upsert Chunks.
- _doc = _updateSession(opCtx, _doc);
- insertChunks(opCtx, _initialChunks->chunks, getCurrentSession(_doc));
+ _updateSession(opCtx);
+ insertChunks(opCtx, _initialChunks->chunks, getCurrentSession());
CollectionType coll(nss(),
_initialChunks->collVersion().epoch(),
@@ -841,9 +792,9 @@ void CreateCollectionCoordinator::_commit(OperationContext* opCtx) {
coll.setUnique(*_request.getUnique());
}
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
try {
- insertCollectionEntry(opCtx, nss(), coll, getCurrentSession(_doc));
+ insertCollectionEntry(opCtx, nss(), coll, getCurrentSession());
notifyChangeStreamsOnShardCollection(opCtx, nss(), *_collectionUUID, _request.toBSON());
@@ -927,57 +878,4 @@ void CreateCollectionCoordinator::_logEndCreateCollection(OperationContext* opCt
opCtx, "shardCollection.end", nss().ns(), collectionDetail.obj());
}
-// Phase change API.
-
-void CreateCollectionCoordinator::_enterPhase(Phase newPhase) {
- CoordDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(5565600,
- 2,
- "Create collection coordinator phase transition",
- "namespace"_attr = nss(),
- "newPhase"_attr = CreateCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = CreateCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
-}
-
-const BSONObj CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields =
- BSON(CreateCollectionRequest::kCollectionUUIDFieldName
- << 1 << CreateCollectionRequest::kImplicitlyCreateIndexFieldName << 1
- << CreateCollectionRequest::kEnforceUniquenessCheckFieldName << 1);
-
-void CreateCollectionCoordinatorDocumentPre60Compatible::serialize(BSONObjBuilder* builder) const {
- BSONObjBuilder internalBuilder;
- CreateCollectionCoordinatorDocument::serialize(&internalBuilder);
- internalBuilder.asTempObj().filterFieldsUndotted(builder, kPre60IncompatibleFields, false);
-}
-
-BSONObj CreateCollectionCoordinatorDocumentPre60Compatible::toBSON() const {
- BSONObjBuilder builder;
- serialize(&builder);
- return builder.obj();
-}
-
-CreateCollectionCoordinatorPre60Compatible::CreateCollectionCoordinatorPre60Compatible(
- ShardingDDLCoordinatorService* service, const BSONObj& initialState)
- : CreateCollectionCoordinator(service, initialState),
- _critSecReason(
- BSON("command"
- << "createCollection"
- << "ns" << nss().toString() << "request"
- << _request.toBSON().filterFieldsUndotted(
- CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields,
- false))) {}
-
} // namespace mongo
diff --git a/src/mongo/db/s/create_collection_coordinator.h b/src/mongo/db/s/create_collection_coordinator.h
index 565972afcb1..a1f8bbea4e8 100644
--- a/src/mongo/db/s/create_collection_coordinator.h
+++ b/src/mongo/db/s/create_collection_coordinator.h
@@ -39,21 +39,26 @@
namespace mongo {
-class CreateCollectionCoordinator : public ShardingDDLCoordinator {
+class CreateCollectionCoordinator
+ : public RecoverableShardingDDLCoordinator<CreateCollectionCoordinatorDocument,
+ CreateCollectionCoordinatorPhaseEnum> {
public:
using CoordDoc = CreateCollectionCoordinatorDocument;
using Phase = CreateCollectionCoordinatorPhaseEnum;
- CreateCollectionCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState);
+ CreateCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+ : RecoverableShardingDDLCoordinator(service, "CreateCollectionCoordinator", initialState),
+ _request(_doc.getCreateCollectionRequest()),
+ _critSecReason(BSON("command"
+ << "createCollection"
+ << "ns" << nss().toString())) {}
+
~CreateCollectionCoordinator() = default;
void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
/**
* Waits for the termination of the parent DDLCoordinator (so all the resources are liberated)
@@ -66,38 +71,16 @@ public:
}
protected:
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("CreateCollectionCoordinator::_docMutex");
- CoordDoc _doc;
-
const mongo::CreateCollectionRequest _request;
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return CreateCollectionCoordinatorPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- };
-
- void _enterPhase(Phase newState);
-
/**
* Performs all required checks before holding the critical sections.
*/
@@ -128,7 +111,7 @@ private:
* participant shards.
*/
void _createCollectionOnNonPrimaryShards(OperationContext* opCtx,
- const boost::optional<OperationSessionInfo>& osi);
+ const OperationSessionInfo& osi);
/**
* Does the following writes:
@@ -147,16 +130,6 @@ private:
*/
void _logEndCreateCollection(OperationContext* opCtx);
- /**
- * Returns the BSONObj used as critical section reason
- *
- * TODO SERVER-64720 remove this function, directly access _critSecReason
- *
- */
- virtual const BSONObj& _getCriticalSectionReason() const {
- return _critSecReason;
- };
-
const BSONObj _critSecReason;
// The shard key of the collection, static for the duration of the coordinator and reflects the
@@ -177,32 +150,4 @@ private:
boost::optional<bool> _collectionEmpty;
};
-class CreateCollectionCoordinatorDocumentPre60Compatible final
- : public CreateCollectionCoordinatorDocument {
- // TODO SERVER-64720 remove once 6.0 becomes last LTS
-public:
- using CreateCollectionCoordinatorDocument::CreateCollectionCoordinatorDocument;
-
- static const BSONObj kPre60IncompatibleFields;
- void serialize(BSONObjBuilder* builder) const;
- BSONObj toBSON() const;
-};
-
-class CreateCollectionCoordinatorPre60Compatible final : public CreateCollectionCoordinator {
- // TODO SERVER-64720 remove once 6.0 becomes last LTS
-public:
- using CreateCollectionCoordinator::CreateCollectionCoordinator;
- using CoordDoc = CreateCollectionCoordinatorDocumentPre60Compatible;
-
- CreateCollectionCoordinatorPre60Compatible(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState);
-
- virtual const BSONObj& _getCriticalSectionReason() const override {
- return _critSecReason;
- };
-
-private:
- const BSONObj _critSecReason;
-};
-
} // namespace mongo
diff --git a/src/mongo/db/s/create_collection_coordinator_test.cpp b/src/mongo/db/s/create_collection_coordinator_test.cpp
deleted file mode 100644
index 772ac8933a1..00000000000
--- a/src/mongo/db/s/create_collection_coordinator_test.cpp
+++ /dev/null
@@ -1,133 +0,0 @@
-/**
- * Copyright (C) 2022-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-#include "mongo/unittest/unittest.h"
-
-#include "mongo/db/s/create_collection_coordinator.h"
-
-namespace mongo {
-namespace {
-
-static const auto kShardKey = BSON("x" << 1);
-static const NamespaceString kNs{"db.test"};
-
-TEST(CreateCollectionCoordinator, pre60CompatibleGetters) {
- const auto kUUID = UUID::gen();
-
- auto req = [&] {
- CreateCollectionRequest creq;
- creq.setShardKey(kShardKey.getOwned());
- creq.setCollectionUUID(kUUID);
- creq.setImplicitlyCreateIndex(false);
- creq.setEnforceUniquenessCheck(false);
- return creq;
- };
-
- auto pre60CompatDoc = [&] {
- auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
- doc.setShardingDDLCoordinatorMetadata(
- {{kNs, DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
- doc.setCreateCollectionRequest(req());
- return doc;
- }();
-
- auto latestDoc = [&] {
- auto doc = CreateCollectionCoordinatorDocument();
- doc.setShardingDDLCoordinatorMetadata({{kNs, DDLCoordinatorTypeEnum::kCreateCollection}});
- doc.setCreateCollectionRequest(req());
- return doc;
- }();
-
- ASSERT(pre60CompatDoc.getShardKey());
- ASSERT(latestDoc.getShardKey());
- ASSERT_BSONOBJ_EQ(*pre60CompatDoc.getShardKey(), *latestDoc.getShardKey());
- ASSERT(pre60CompatDoc.getCollectionUUID());
- ASSERT(latestDoc.getCollectionUUID());
- ASSERT_EQ(*pre60CompatDoc.getCollectionUUID(), *latestDoc.getCollectionUUID());
- ASSERT_EQ(pre60CompatDoc.getImplicitlyCreateIndex(), latestDoc.getImplicitlyCreateIndex());
- ASSERT_EQ(pre60CompatDoc.getEnforceUniquenessCheck(), latestDoc.getEnforceUniquenessCheck());
-}
-
-TEST(CreateCollectionCoordinator, pre60CompatibleSerialization) {
- auto req = [&] {
- CreateCollectionRequest creq;
- creq.setShardKey(kShardKey.getOwned());
- creq.setCollectionUUID(UUID::gen());
- creq.setImplicitlyCreateIndex(false);
- creq.setEnforceUniquenessCheck(false);
- return creq;
- };
-
- auto pre60CompatDoc = [&] {
- auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
- doc.setShardingDDLCoordinatorMetadata(
- {{kNs, DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
- doc.setCreateCollectionRequest(req());
- return doc;
- }();
-
- BSONObjBuilder builder;
- pre60CompatDoc.serialize(&builder);
- auto serialized = builder.asTempObj();
-
- ASSERT_BSONOBJ_EQ(
- BSONObj{},
- serialized.extractFieldsUndotted(
- CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields));
-}
-
-TEST(CreateCollectionCoordinator, pre60CompatibleToBSON) {
-
- auto req = [&] {
- CreateCollectionRequest creq;
- creq.setShardKey(kShardKey.getOwned());
- creq.setCollectionUUID(UUID::gen());
- creq.setImplicitlyCreateIndex(false);
- creq.setEnforceUniquenessCheck(false);
- return creq;
- };
-
- auto pre60CompatDoc = [&] {
- auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
- doc.setShardingDDLCoordinatorMetadata(
- {{kNs, DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
- doc.setCreateCollectionRequest(req());
- return doc;
- }();
-
- auto serialized = pre60CompatDoc.toBSON();
-
- ASSERT_BSONOBJ_EQ(
- BSONObj{},
- serialized.extractFieldsUndotted(
- CreateCollectionCoordinatorDocumentPre60Compatible::kPre60IncompatibleFields));
-}
-
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/s/database_sharding_state.cpp b/src/mongo/db/s/database_sharding_state.cpp
index ad18b8b9526..776b23857d0 100644
--- a/src/mongo/db/s/database_sharding_state.cpp
+++ b/src/mongo/db/s/database_sharding_state.cpp
@@ -185,9 +185,12 @@ void DatabaseShardingState::checkDbVersion(OperationContext* opCtx, DSSLock&) co
auto criticalSectionSignal = _critSec.getSignal(
opCtx->lockState()->isWriteLocked() ? ShardingMigrationCriticalSection::kWrite
: ShardingMigrationCriticalSection::kRead);
+ const std::string reason =
+ _critSec.getReason() ? _critSec.getReason()->toString() : "unknown";
uassert(
StaleDbRoutingVersion(_dbName, *clientDbVersion, boost::none, criticalSectionSignal),
- str::stream() << "movePrimary commit in progress for " << _dbName,
+ str::stream() << "The critical section for " << _dbName
+ << " is acquired with reason: " << reason,
!criticalSectionSignal);
}
diff --git a/src/mongo/db/s/drop_collection_coordinator.cpp b/src/mongo/db/s/drop_collection_coordinator.cpp
index 7dabcae0f16..fa1e2f4b84e 100644
--- a/src/mongo/db/s/drop_collection_coordinator.cpp
+++ b/src/mongo/db/s/drop_collection_coordinator.cpp
@@ -47,37 +47,6 @@
namespace mongo {
-DropCollectionCoordinator::DropCollectionCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(DropCollectionCoordinatorDocument::parse(
- IDLParserErrorContext("DropCollectionCoordinatorDocument"), initialState)) {}
-
-boost::optional<BSONObj> DropCollectionCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
-
- const auto currPhase = [&]() {
- stdx::lock_guard l{_docMutex};
- return _doc.getPhase();
- }();
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "DropCollectionCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
- return bob.obj();
-}
-
DropReply DropCollectionCoordinator::dropCollectionLocally(OperationContext* opCtx,
const NamespaceString& nss) {
{
@@ -101,29 +70,6 @@ DropReply DropCollectionCoordinator::dropCollectionLocally(OperationContext* opC
return result;
}
-void DropCollectionCoordinator::_enterPhase(Phase newPhase) {
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(5390501,
- 2,
- "Drop collection coordinator phase transition",
- "namespace"_attr = nss(),
- "newPhase"_attr = DropCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = DropCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
-}
-
ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept {
@@ -161,7 +107,7 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
// Persist the collection info before sticking to using it's uuid. This ensures this
// node is still the RS primary, so it was also the primary at the moment we read
// the collection metadata.
- _doc = _updateStateDocument(opCtx, StateDoc(_doc));
+ _updateStateDocument(opCtx, StateDoc(_doc));
if (_doc.getCollInfo()) {
sharding_ddl_util::stopMigrations(opCtx, nss(), _doc.getCollInfo()->getUuid());
@@ -178,9 +124,9 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
// Perform a noop write on the participants in order to advance the txnNumber
// for this coordinator's lsid so that requests with older txnNumbers can no
// longer execute.
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
- opCtx, getCurrentSession(_doc), **executor);
+ opCtx, getCurrentSession(), **executor);
}
const auto collIsSharded = bool(_doc.getCollInfo());
@@ -199,12 +145,11 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
}
// Remove tags even if the collection is not sharded or didn't exist
- _doc = _updateSession(opCtx, _doc);
- sharding_ddl_util::removeTagsMetadataFromConfig(
- opCtx, nss(), getCurrentSession(_doc));
+ _updateSession(opCtx);
+ sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss(), getCurrentSession());
// get a Lsid and an incremented txnNumber. Ensures we are the primary
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
const auto primaryShardId = ShardingState::get(opCtx)->shardId();
@@ -217,13 +162,13 @@ ExecutorFuture<void> DropCollectionCoordinator::_runImpl(
participants.end());
sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
- opCtx, nss(), participants, **executor, getCurrentSession(_doc));
+ opCtx, nss(), participants, **executor, getCurrentSession());
// The sharded collection must be dropped on the primary shard after it has been
// dropped on all of the other shards to ensure it can only be re-created as
// unsharded with a higher optime than all of the drops.
sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
- opCtx, nss(), {primaryShardId}, **executor, getCurrentSession(_doc));
+ opCtx, nss(), {primaryShardId}, **executor, getCurrentSession());
ShardingLogging::get(opCtx)->logChange(opCtx, "dropCollection", nss().ns());
LOGV2(5390503, "Collection dropped", "namespace"_attr = nss());
diff --git a/src/mongo/db/s/drop_collection_coordinator.h b/src/mongo/db/s/drop_collection_coordinator.h
index 140013e41e1..46b37d2a415 100644
--- a/src/mongo/db/s/drop_collection_coordinator.h
+++ b/src/mongo/db/s/drop_collection_coordinator.h
@@ -35,20 +35,20 @@
#include "mongo/db/s/sharding_ddl_coordinator.h"
namespace mongo {
-class DropCollectionCoordinator final : public ShardingDDLCoordinator {
+class DropCollectionCoordinator final
+ : public RecoverableShardingDDLCoordinator<DropCollectionCoordinatorDocument,
+ DropCollectionCoordinatorPhaseEnum> {
public:
using StateDoc = DropCollectionCoordinatorDocument;
using Phase = DropCollectionCoordinatorPhaseEnum;
- DropCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState);
+ DropCollectionCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+ : RecoverableShardingDDLCoordinator(service, "DropCollectionCoordinator", initialState) {}
+
~DropCollectionCoordinator() = default;
void checkIfOptionsConflict(const BSONObj& doc) const override {}
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
-
/**
* Locally drops a collection, cleans its CollectionShardingRuntime metadata and refreshes the
* catalog cache.
@@ -56,34 +56,12 @@ public:
static DropReply dropCollectionLocally(OperationContext* opCtx, const NamespaceString& nss);
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return DropCollectionCoordinatorPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
-
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
- void _enterPhase(Phase newPhase);
-
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("DropCollectionCoordinator::_docMutex");
- DropCollectionCoordinatorDocument _doc;
};
} // namespace mongo
diff --git a/src/mongo/db/s/drop_database_coordinator.cpp b/src/mongo/db/s/drop_database_coordinator.cpp
index b49e36d302c..54b8ef1108e 100644
--- a/src/mongo/db/s/drop_database_coordinator.cpp
+++ b/src/mongo/db/s/drop_database_coordinator.cpp
@@ -122,11 +122,11 @@ void DropDatabaseCoordinator::_dropShardedCollection(
sharding_ddl_util::removeCollAndChunksMetadataFromConfig(
opCtx, coll, ShardingCatalogClient::kMajorityWriteConcern);
- _doc = _updateSession(opCtx, _doc);
- sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss, getCurrentSession(_doc));
+ _updateSession(opCtx);
+ sharding_ddl_util::removeTagsMetadataFromConfig(opCtx, nss, getCurrentSession());
const auto primaryShardId = ShardingState::get(opCtx)->shardId();
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
// We need to send the drop to all the shards because both movePrimary and
// moveChunk leave garbage behind for sharded collections.
@@ -135,67 +135,13 @@ void DropDatabaseCoordinator::_dropShardedCollection(
participants.erase(std::remove(participants.begin(), participants.end(), primaryShardId),
participants.end());
sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
- opCtx, nss, participants, **executor, getCurrentSession(_doc));
+ opCtx, nss, participants, **executor, getCurrentSession());
// The sharded collection must be dropped on the primary shard after it has been dropped on all
// of the other shards to ensure it can only be re-created as unsharded with a higher optime
// than all of the drops.
sharding_ddl_util::sendDropCollectionParticipantCommandToShards(
- opCtx, nss, {primaryShardId}, **executor, getCurrentSession(_doc));
-}
-
-DropDatabaseCoordinator::DropDatabaseCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(DropDatabaseCoordinatorDocument::parse(
- IDLParserErrorContext("DropDatabaseCoordinatorDocument"), initialState)),
- _dbName(nss().db()) {}
-
-boost::optional<BSONObj> DropDatabaseCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
-
- const auto currPhase = [&]() {
- stdx::lock_guard l{_docMutex};
- return _doc.getPhase();
- }();
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "DropDatabaseCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
- return bob.obj();
-}
-
-void DropDatabaseCoordinator::_enterPhase(Phase newPhase) {
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(5494501,
- 2,
- "Drop database coordinator phase transition",
- "db"_attr = _dbName,
- "newPhase"_attr = DropDatabaseCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = DropDatabaseCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
+ opCtx, nss, {primaryShardId}, **executor, getCurrentSession());
}
void DropDatabaseCoordinator::_clearDatabaseInfoOnPrimary(OperationContext* opCtx) {
@@ -238,9 +184,9 @@ ExecutorFuture<void> DropDatabaseCoordinator::_runImpl(
// Perform a noop write on the participants in order to advance the txnNumber
// for this coordinator's lsid so that requests with older txnNumbers can no
// longer execute.
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
- opCtx, getCurrentSession(_doc), **executor);
+ opCtx, getCurrentSession(), **executor);
}
ShardingLogging::get(opCtx)->logChange(opCtx, "dropDatabase.start", _dbName);
@@ -284,7 +230,7 @@ ExecutorFuture<void> DropDatabaseCoordinator::_runImpl(
auto newStateDoc = _doc;
newStateDoc.setCollInfo(coll);
- _doc = _updateStateDocument(opCtx, std::move(newStateDoc));
+ _updateStateDocument(opCtx, std::move(newStateDoc));
_dropShardedCollection(opCtx, coll, executor);
}
diff --git a/src/mongo/db/s/drop_database_coordinator.h b/src/mongo/db/s/drop_database_coordinator.h
index 47d63310a19..f70ea2981cb 100644
--- a/src/mongo/db/s/drop_database_coordinator.h
+++ b/src/mongo/db/s/drop_database_coordinator.h
@@ -34,48 +34,29 @@
namespace mongo {
-class DropDatabaseCoordinator final : public ShardingDDLCoordinator {
+class DropDatabaseCoordinator final
+ : public RecoverableShardingDDLCoordinator<DropDatabaseCoordinatorDocument,
+ DropDatabaseCoordinatorPhaseEnum> {
+
public:
using StateDoc = DropDatabaseCoordinatorDocument;
using Phase = DropDatabaseCoordinatorPhaseEnum;
- DropDatabaseCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState);
+ DropDatabaseCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+ : RecoverableShardingDDLCoordinator(service, "DropDatabaseCoordinator", initialState),
+ _dbName(nss().db()) {}
~DropDatabaseCoordinator() = default;
void checkIfOptionsConflict(const BSONObj& doc) const override {}
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
-
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- stdx::lock_guard l{_docMutex};
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return DropDatabaseCoordinatorPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
- void _enterPhase(Phase newPhase);
-
void _dropShardedCollection(OperationContext* opCtx,
const CollectionType& coll,
std::shared_ptr<executor::ScopedTaskExecutor> executor);
@@ -84,10 +65,6 @@ private:
void _clearDatabaseInfoOnSecondaries(OperationContext* opCtx);
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("DropDatabaseCoordinator::_docMutex");
- DropDatabaseCoordinatorDocument _doc;
-
-
StringData _dbName;
};
diff --git a/src/mongo/db/s/flush_resharding_state_change_command.cpp b/src/mongo/db/s/flush_resharding_state_change_command.cpp
index 85f0c42cff0..95439564643 100644
--- a/src/mongo/db/s/flush_resharding_state_change_command.cpp
+++ b/src/mongo/db/s/flush_resharding_state_change_command.cpp
@@ -131,7 +131,7 @@ public:
.getAsync([](auto) {});
// Ensure the command isn't run on a stale primary.
- doNoopWrite(opCtx, "_flushReshardingStateChange no-op", ns());
+ resharding::doNoopWrite(opCtx, "_flushReshardingStateChange no-op", ns());
}
};
} _flushReshardingStateChange;
diff --git a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
index 9ea3f94af97..f85f73c0ef4 100644
--- a/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
+++ b/src/mongo/db/s/flush_routing_table_cache_updates_command.cpp
@@ -117,7 +117,8 @@ public:
boost::optional<SharedSemiFuture<void>> criticalSectionSignal;
{
- AutoGetCollection autoColl(opCtx, ns(), MODE_IS);
+ AutoGetCollection autoColl(
+ opCtx, ns(), MODE_IS, AutoGetCollectionViewMode::kViewsPermitted);
// If the primary is in the critical section, secondaries must wait for the commit
// to finish on the primary in case a secondary's caller has an afterClusterTime
diff --git a/src/mongo/db/s/metadata_manager_test.cpp b/src/mongo/db/s/metadata_manager_test.cpp
index 2f39ef09147..a842e4cfe03 100644
--- a/src/mongo/db/s/metadata_manager_test.cpp
+++ b/src/mongo/db/s/metadata_manager_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include <boost/optional.hpp>
#include "mongo/bson/bsonobjbuilder.h"
@@ -93,7 +91,7 @@ protected:
boost::none,
boost::none /* chunkSizeBytes */,
true,
- {ChunkType{uuid, range, ChunkVersion(1, 0, epoch, Timestamp(1, 1)), kOtherShard}});
+ {ChunkType{uuid, range, ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}), kOtherShard}});
return CollectionMetadata(ChunkManager(kThisShard,
DatabaseVersion(UUID::gen(), Timestamp(1, 1)),
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index 462baee5069..1a76fd9a5a5 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -300,12 +300,12 @@ Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* opCtx,
opCtx->recoveryUnit()->setPrepareConflictBehavior(
PrepareConflictBehavior::kIgnoreConflicts);
- auto storeCurrentLocsStatus = _storeCurrentLocs(opCtx);
- if (storeCurrentLocsStatus == ErrorCodes::ChunkTooBig && _forceJumbo) {
+ auto storeCurrentRecordIdStatus = _storeCurrentRecordId(opCtx);
+ if (storeCurrentRecordIdStatus == ErrorCodes::ChunkTooBig && _forceJumbo) {
stdx::lock_guard<Latch> sl(_mutex);
_jumboChunkCloneState.emplace();
- } else if (!storeCurrentLocsStatus.isOK()) {
- return storeCurrentLocsStatus;
+ } else if (!storeCurrentRecordIdStatus.isOK()) {
+ return storeCurrentRecordIdStatus;
}
}
@@ -381,7 +381,7 @@ StatusWith<BSONObj> MigrationChunkClonerSourceLegacy::commitClone(OperationConte
}
} else {
invariant(PlanExecutor::IS_EOF == _jumboChunkCloneState->clonerState);
- invariant(_cloneLocs.empty());
+ invariant(_cloneRecordIds.empty());
}
}
@@ -680,17 +680,16 @@ void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromIndexScan(OperationCon
_jumboChunkCloneState->clonerExec->detachFromOperationContext();
}
-void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromCloneLocs(OperationContext* opCtx,
- const CollectionPtr& collection,
- BSONArrayBuilder* arrBuilder) {
+void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromCloneRecordIds(
+ OperationContext* opCtx, const CollectionPtr& collection, BSONArrayBuilder* arrBuilder) {
ElapsedTracker tracker(opCtx->getServiceContext()->getFastClockSource(),
internalQueryExecYieldIterations.load(),
Milliseconds(internalQueryExecYieldPeriodMS.load()));
stdx::unique_lock<Latch> lk(_mutex);
- auto iter = _cloneLocs.begin();
+ auto iter = _cloneRecordIds.begin();
- for (; iter != _cloneLocs.end(); ++iter) {
+ for (; iter != _cloneRecordIds.end(); ++iter) {
// We must always make progress in this method by at least one document because empty
// return indicates there is no more initial clone data.
if (arrBuilder->arrSize() && tracker.intervalHasElapsed()) {
@@ -718,7 +717,7 @@ void MigrationChunkClonerSourceLegacy::_nextCloneBatchFromCloneLocs(OperationCon
lk.lock();
}
- _cloneLocs.erase(_cloneLocs.begin(), iter);
+ _cloneRecordIds.erase(_cloneRecordIds.begin(), iter);
}
uint64_t MigrationChunkClonerSourceLegacy::getCloneBatchBufferAllocationSize() {
@@ -727,7 +726,7 @@ uint64_t MigrationChunkClonerSourceLegacy::getCloneBatchBufferAllocationSize() {
return static_cast<uint64_t>(BSONObjMaxUserSize);
return std::min(static_cast<uint64_t>(BSONObjMaxUserSize),
- _averageObjectSizeForCloneLocs * _cloneLocs.size());
+ _averageObjectSizeForCloneRecordIds * _cloneRecordIds.size());
}
Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
@@ -735,8 +734,8 @@ Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
BSONArrayBuilder* arrBuilder) {
dassert(opCtx->lockState()->isCollectionLockedForMode(nss(), MODE_IS));
- // If this chunk is too large to store records in _cloneLocs and the command args specify to
- // attempt to move it, scan the collection directly.
+ // If this chunk is too large to store records in _cloneRecordIds and the command args specify
+ // to attempt to move it, scan the collection directly.
if (_jumboChunkCloneState && _forceJumbo) {
try {
_nextCloneBatchFromIndexScan(opCtx, collection, arrBuilder);
@@ -746,12 +745,11 @@ Status MigrationChunkClonerSourceLegacy::nextCloneBatch(OperationContext* opCtx,
}
}
- _nextCloneBatchFromCloneLocs(opCtx, collection, arrBuilder);
+ _nextCloneBatchFromCloneRecordIds(opCtx, collection, arrBuilder);
return Status::OK();
}
Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx,
- Database* db,
BSONObjBuilder* builder) {
dassert(opCtx->lockState()->isCollectionLockedForMode(nss(), MODE_IS));
@@ -761,7 +759,7 @@ Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx,
{
// All clone data must have been drained before starting to fetch the incremental changes.
stdx::unique_lock<Latch> lk(_mutex);
- invariant(_cloneLocs.empty());
+ invariant(_cloneRecordIds.empty());
// The "snapshot" for delete and update list must be taken under a single lock. This is to
// ensure that we will preserve the causal order of writes. Always consume the delete
@@ -784,8 +782,8 @@ Status MigrationChunkClonerSourceLegacy::nextModsBatch(OperationContext* opCtx,
if (deleteList.empty()) {
BSONArrayBuilder arrUpd(builder->subarrayStart("reload"));
- auto findByIdWrapper = [opCtx, db, ns](BSONObj idDoc, BSONObj* fullDoc) {
- return Helpers::findById(opCtx, db, ns, idDoc, *fullDoc);
+ auto findByIdWrapper = [opCtx, ns](BSONObj idDoc, BSONObj* fullDoc) {
+ return Helpers::findById(opCtx, ns, idDoc, *fullDoc);
};
totalDocSize = xferMods(&arrUpd, &updateList, totalDocSize, findByIdWrapper);
arrUpd.done();
@@ -874,7 +872,7 @@ MigrationChunkClonerSourceLegacy::_getIndexScanExecutor(
if (!shardKeyIdx) {
return {ErrorCodes::IndexNotFound,
str::stream() << "can't find index with prefix " << _shardKeyPattern.toBSON()
- << " in storeCurrentLocs for " << nss().ns()};
+ << " in storeCurrentRecordId for " << nss().ns()};
}
// Assume both min and max non-empty, append MinKey's to make them fit chosen index
@@ -896,7 +894,7 @@ MigrationChunkClonerSourceLegacy::_getIndexScanExecutor(
scanOption);
}
-Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opCtx) {
+Status MigrationChunkClonerSourceLegacy::_storeCurrentRecordId(OperationContext* opCtx) {
AutoGetCollection collection(opCtx, nss(), MODE_IS);
if (!collection) {
return {ErrorCodes::NamespaceNotFound,
@@ -948,14 +946,14 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
if (!isLargeChunk) {
stdx::lock_guard<Latch> lk(_mutex);
- _cloneLocs.insert(recordId);
+ _cloneRecordIds.insert(recordId);
}
if (++recCount > maxRecsWhenFull) {
isLargeChunk = true;
if (_forceJumbo) {
- _cloneLocs.clear();
+ _cloneRecordIds.clear();
break;
}
}
@@ -975,7 +973,7 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
const auto idIdx = collection->getIndexCatalog()->findIdIndex(opCtx)->getEntry();
if (!idIdx) {
return {ErrorCodes::IndexNotFound,
- str::stream() << "can't find index '_id' in storeCurrentLocs for "
+ str::stream() << "can't find index '_id' in storeCurrentRecordId for "
<< nss().ns()};
}
averageObjectIdSize = idIdx->accessMethod()->getSpaceUsedBytes(opCtx) / totalRecs;
@@ -992,7 +990,7 @@ Status MigrationChunkClonerSourceLegacy::_storeCurrentLocs(OperationContext* opC
}
stdx::lock_guard<Latch> lk(_mutex);
- _averageObjectSizeForCloneLocs = collectionAverageObjectSize + defaultObjectIdSize;
+ _averageObjectSizeForCloneRecordIds = collectionAverageObjectSize + defaultObjectIdSize;
_averageObjectIdSize = std::max(averageObjectIdSize, defaultObjectIdSize);
return Status::OK();
}
@@ -1057,9 +1055,9 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC
stdx::lock_guard<Latch> sl(_mutex);
- const std::size_t cloneLocsRemaining = _cloneLocs.size();
+ const std::size_t cloneRecordIdsRemaining = _cloneRecordIds.size();
int64_t untransferredModsSizeBytes = _untransferredDeletesCounter * _averageObjectIdSize +
- _untransferredUpsertsCounter * _averageObjectSizeForCloneLocs;
+ _untransferredUpsertsCounter * _averageObjectSizeForCloneRecordIds;
if (_forceJumbo && _jumboChunkCloneState) {
LOGV2(21992,
@@ -1079,13 +1077,13 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC
"moveChunk data transfer progress",
"response"_attr = redact(res),
"memoryUsedBytes"_attr = _memoryUsed,
- "docsRemainingToClone"_attr = cloneLocsRemaining,
+ "docsRemainingToClone"_attr = cloneRecordIdsRemaining,
"untransferredModsSizeBytes"_attr = untransferredModsSizeBytes);
}
if (res["state"].String() == "steady" && sessionCatalogSourceInCatchupPhase &&
estimateUntransferredSessionsSize == 0) {
- if (cloneLocsRemaining != 0 ||
+ if (cloneRecordIdsRemaining != 0 ||
(_jumboChunkCloneState && _forceJumbo &&
PlanExecutor::IS_EOF != _jumboChunkCloneState->clonerState)) {
return {ErrorCodes::OperationIncomplete,
@@ -1124,7 +1122,8 @@ Status MigrationChunkClonerSourceLegacy::_checkRecipientCloningStatus(OperationC
"moveChunk data transfer within threshold to allow write blocking",
"_untransferredUpsertsCounter"_attr = _untransferredUpsertsCounter,
"_untransferredDeletesCounter"_attr = _untransferredDeletesCounter,
- "_averageObjectSizeForCloneLocs"_attr = _averageObjectSizeForCloneLocs,
+ "_averageObjectSizeForCloneRecordIds"_attr =
+ _averageObjectSizeForCloneRecordIds,
"_averageObjectIdSize"_attr = _averageObjectIdSize,
"untransferredModsSizeBytes"_attr = untransferredModsSizeBytes,
"untransferredSessionDataInBytes"_attr = estimateUntransferredSessionsSize,
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
index 8c15fa7a0cb..1912c947ad9 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
@@ -174,7 +174,7 @@ public:
*
* NOTE: Must be called with the collection lock held in at least IS mode.
*/
- Status nextModsBatch(OperationContext* opCtx, Database* db, BSONObjBuilder* builder);
+ Status nextModsBatch(OperationContext* opCtx, BSONObjBuilder* builder);
/**
* Appends to 'arrBuilder' oplog entries which wrote to the currently migrated chunk and contain
@@ -248,17 +248,17 @@ private:
const CollectionPtr& collection,
BSONArrayBuilder* arrBuilder);
- void _nextCloneBatchFromCloneLocs(OperationContext* opCtx,
- const CollectionPtr& collection,
- BSONArrayBuilder* arrBuilder);
+ void _nextCloneBatchFromCloneRecordIds(OperationContext* opCtx,
+ const CollectionPtr& collection,
+ BSONArrayBuilder* arrBuilder);
/**
- * Get the disklocs that belong to the chunk migrated and sort them in _cloneLocs (to avoid
- * seeking disk later).
+ * Get the recordIds that belong to the chunk migrated and sort them in _cloneRecordIds (to
+ * avoid seeking disk later).
*
* Returns OK or any error status otherwise.
*/
- Status _storeCurrentLocs(OperationContext* opCtx);
+ Status _storeCurrentRecordId(OperationContext* opCtx);
/**
* Adds the OpTime to the list of OpTimes for oplog entries that we should consider migrating as
@@ -349,11 +349,11 @@ private:
State _state{kNew};
// List of record ids that needs to be transferred (initial clone)
- std::set<RecordId> _cloneLocs;
+ std::set<RecordId> _cloneRecordIds;
// The estimated average object size during the clone phase. Used for buffer size
// pre-allocation (initial clone).
- uint64_t _averageObjectSizeForCloneLocs{0};
+ uint64_t _averageObjectSizeForCloneRecordIds{0};
// The estimated average object _id size during the clone phase.
uint64_t _averageObjectIdSize{0};
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp
index d4c7593370c..8be0acd90df 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_commands.cpp
@@ -103,11 +103,6 @@ public:
_autoColl = boost::none;
}
- Database* getDb() const {
- invariant(_autoColl);
- return _autoColl->getDb();
- }
-
const CollectionPtr& getColl() const {
invariant(_autoColl);
return _autoColl->getCollection();
@@ -235,7 +230,7 @@ public:
AutoGetActiveCloner autoCloner(opCtx, migrationSessionId, true);
- uassertStatusOK(autoCloner.getCloner()->nextModsBatch(opCtx, autoCloner.getDb(), &result));
+ uassertStatusOK(autoCloner.getCloner()->nextModsBatch(opCtx, &result));
return true;
}
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
index 91e1b4a21bc..dc1eb4579e5 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
@@ -179,7 +179,7 @@ protected:
true,
{ChunkType{uuid,
ChunkRange{BSON(kShardKey << MINKEY), BSON(kShardKey << MAXKEY)},
- ChunkVersion(1, 0, epoch, timestamp),
+ ChunkVersion({epoch, timestamp}, {1, 0}),
ShardId("dummyShardId")}});
AutoGetDb autoDb(operationContext(), kNss.db(), MODE_IX);
@@ -355,7 +355,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, CorrectDocumentsFetched) {
{
BSONObjBuilder modsBuilder;
- ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+ ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
const auto modsObj = modsBuilder.obj();
ASSERT_EQ(2U, modsObj["reload"].Array().size());
@@ -455,7 +455,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, RemoveDuplicateDocuments) {
AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
{
BSONObjBuilder modsBuilder;
- ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+ ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
const auto modsObj = modsBuilder.obj();
ASSERT_EQ(1U, modsObj["reload"].Array().size());
@@ -522,7 +522,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, OneLargeDocumentTransferMods) {
AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
{
BSONObjBuilder modsBuilder;
- ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+ ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
const auto modsObj = modsBuilder.obj();
ASSERT_EQ(1, modsObj["reload"].Array().size());
@@ -600,7 +600,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, ManySmallDocumentsTransferMods) {
AutoGetCollection autoColl(operationContext(), kNss, MODE_IS);
{
BSONObjBuilder modsBuilder;
- ASSERT_OK(cloner.nextModsBatch(operationContext(), autoColl.getDb(), &modsBuilder));
+ ASSERT_OK(cloner.nextModsBatch(operationContext(), &modsBuilder));
const auto modsObj = modsBuilder.obj();
ASSERT_EQ(modsObj["reload"].Array().size(), numDocuments);
}
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 09a39686779..fa303126008 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -201,11 +201,10 @@ bool willOverrideLocalId(OperationContext* opCtx,
BSONObj min,
BSONObj max,
BSONObj shardKeyPattern,
- Database* db,
BSONObj remoteDoc,
BSONObj* localDoc) {
*localDoc = BSONObj();
- if (Helpers::findById(opCtx, db, nss.ns(), remoteDoc, *localDoc)) {
+ if (Helpers::findById(opCtx, nss.ns(), remoteDoc, *localDoc)) {
return !isInRange(*localDoc, min, max, shardKeyPattern);
}
@@ -819,7 +818,7 @@ MigrationDestinationManager::IndexesAndIdIndex MigrationDestinationManager::getC
Milliseconds(-1)));
for (auto&& spec : indexes.docs) {
- if (spec["clustered"]) {
+ if (spec[IndexDescriptor::kClusteredFieldName]) {
// The 'clustered' index is implicitly created upon clustered collection creation.
} else {
donorIndexSpecs.push_back(spec);
@@ -1774,7 +1773,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
// Do not apply delete if doc does not belong to the chunk being migrated
BSONObj fullObj;
- if (Helpers::findById(opCtx, autoColl.getDb(), _nss.ns(), id, fullObj)) {
+ if (Helpers::findById(opCtx, _nss.ns(), id, fullObj)) {
if (!isInRange(fullObj, _min, _max, _shardKeyPattern)) {
if (MONGO_unlikely(failMigrationReceivedOutOfRangeOperation.shouldFail())) {
MONGO_UNREACHABLE;
@@ -1823,14 +1822,8 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, const
}
BSONObj localDoc;
- if (willOverrideLocalId(opCtx,
- _nss,
- _min,
- _max,
- _shardKeyPattern,
- autoColl.getDb(),
- updatedDoc,
- &localDoc)) {
+ if (willOverrideLocalId(
+ opCtx, _nss, _min, _max, _shardKeyPattern, updatedDoc, &localDoc)) {
// Exception will abort migration cleanly
LOGV2_ERROR_OPTIONS(
16977,
diff --git a/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp b/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp
index 4b0d94e98bc..7ee89b9f26c 100644
--- a/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp
+++ b/src/mongo/db/s/migration_destination_manager_legacy_commands.cpp
@@ -91,6 +91,14 @@ public:
out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
+ bool shouldCheckoutSession() const final {
+ return false;
+ }
+
bool errmsgRun(OperationContext* opCtx,
const std::string& dbname,
const BSONObj& cmdObj,
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 1a6909e629f..a0fc3e650ee 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -39,6 +39,7 @@
#include "mongo/db/read_concern.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/s/auto_split_vector.h"
+#include "mongo/db/s/commit_chunk_migration_gen.h"
#include "mongo/db/s/migration_chunk_cloner_source_legacy.h"
#include "mongo/db/s/migration_coordinator.h"
#include "mongo/db/s/migration_util.h"
@@ -59,8 +60,6 @@
#include "mongo/s/catalog_cache_loader.h"
#include "mongo/s/grid.h"
#include "mongo/s/pm2423_feature_flags_gen.h"
-#include "mongo/s/request_types/commit_chunk_migration_request_type.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
#include "mongo/s/shard_key_pattern.h"
#include "mongo/util/duration.h"
#include "mongo/util/elapsed_tracker.h"
@@ -93,12 +92,10 @@ void refreshRecipientRoutingTable(OperationContext* opCtx,
const NamespaceString& nss,
const HostAndPort& toShardHost,
const ChunkVersion& newCollVersion) {
- SetShardVersionRequest ssv(nss, newCollVersion, false);
-
const executor::RemoteCommandRequest request(
toShardHost,
NamespaceString::kAdminDb.toString(),
- ssv.toBSON(),
+ BSON("_flushRoutingTableCacheUpdates" << nss.ns()),
ReadPreferenceSetting{ReadPreference::PrimaryOnly}.toContainingBSON(),
opCtx,
executor::RemoteCommandRequest::kNoTimeout);
@@ -560,20 +557,18 @@ void MigrationSourceManager::commitChunkMetadataOnConfig() {
{
const auto metadata = _getCurrentMetadataAndCheckEpoch();
- ChunkType migratedChunkType;
- migratedChunkType.setMin(*_args.getMin());
- migratedChunkType.setMax(*_args.getMax());
- migratedChunkType.setVersion(*_chunkVersion);
+ auto migratedChunk = MigratedChunkType(*_chunkVersion, *_args.getMin(), *_args.getMax());
const auto currentTime = VectorClock::get(_opCtx)->getTime();
- CommitChunkMigrationRequest::appendAsCommand(&builder,
- nss(),
- _args.getFromShard(),
- _args.getToShard(),
- migratedChunkType,
- metadata.getCollVersion(),
- currentTime.clusterTime().asTimestamp());
+ CommitChunkMigrationRequest request(nss(),
+ _args.getFromShard(),
+ _args.getToShard(),
+ migratedChunk,
+ metadata.getCollVersion(),
+ currentTime.clusterTime().asTimestamp());
+
+ request.serialize({}, &builder);
builder.append(kWriteConcernField, kMajorityWriteConcern.toBSON());
}
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index bd8600c1518..b877424f86a 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -37,7 +37,6 @@
#include "mongo/base/error_codes.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/bsonobjbuilder.h"
-#include "mongo/client/query.h"
#include "mongo/db/catalog/collection_catalog_helper.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/commands.h"
@@ -520,14 +519,27 @@ void resubmitRangeDeletionsOnStepUp(ServiceContext* serviceContext) {
FindCommandRequest findCommand(NamespaceString::kRangeDeletionNamespace);
findCommand.setFilter(BSON(RangeDeletionTask::kProcessingFieldName << true));
auto cursor = client.find(std::move(findCommand));
- if (cursor->more()) {
- return migrationutil::submitRangeDeletionTask(
+
+ auto retFuture = ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext));
+
+ int rangeDeletionsMarkedAsProcessing = 0;
+ while (cursor->more()) {
+ retFuture = migrationutil::submitRangeDeletionTask(
opCtx.get(),
RangeDeletionTask::parse(IDLParserErrorContext("rangeDeletionRecovery"),
cursor->next()));
- } else {
- return ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext));
+ rangeDeletionsMarkedAsProcessing++;
}
+
+ if (rangeDeletionsMarkedAsProcessing > 1) {
+ LOGV2_WARNING(
+ 6695800,
+ "Rescheduling several range deletions marked as processing. Orphans count "
+ "may be off while they are not drained",
+ "numRangeDeletionsMarkedAsProcessing"_attr = rangeDeletionsMarkedAsProcessing);
+ }
+
+ return retFuture;
})
.then([serviceContext] {
ThreadClient tc("ResubmitRangeDeletions", serviceContext);
diff --git a/src/mongo/db/s/migration_util_test.cpp b/src/mongo/db/s/migration_util_test.cpp
index 8e6f02043da..90a1e9016a1 100644
--- a/src/mongo/db/s/migration_util_test.cpp
+++ b/src/mongo/db/s/migration_util_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/client/remote_command_targeter_factory_mock.h"
#include "mongo/client/remote_command_targeter_mock.h"
#include "mongo/db/catalog_raii.h"
@@ -591,7 +589,7 @@ TEST_F(SubmitRangeDeletionTaskTest, SucceedsIfFilteringMetadataUUIDMatchesTaskUU
_mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
_mockCatalogCacheLoader->setCollectionRefreshReturnValue(coll);
_mockCatalogCacheLoader->setChunkRefreshReturnValue(
- makeChangedChunks(ChunkVersion(1, 0, kEpoch, kDefaultTimestamp)));
+ makeChangedChunks(ChunkVersion({kEpoch, kDefaultTimestamp}, {1, 0})));
_mockCatalogClient->setCollections({coll});
forceShardFilteringMetadataRefresh(opCtx, kTestNss);
@@ -619,7 +617,7 @@ TEST_F(
_mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
_mockCatalogCacheLoader->setCollectionRefreshReturnValue(coll);
_mockCatalogCacheLoader->setChunkRefreshReturnValue(
- makeChangedChunks(ChunkVersion(1, 0, kEpoch, kDefaultTimestamp)));
+ makeChangedChunks(ChunkVersion({kEpoch, kDefaultTimestamp}, {1, 0})));
_mockCatalogClient->setCollections({coll});
auto metadata = makeShardedMetadata(opCtx, collectionUUID);
@@ -654,7 +652,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
auto matchingColl = makeCollectionType(collectionUUID, kEpoch, kDefaultTimestamp);
_mockCatalogCacheLoader->setCollectionRefreshReturnValue(matchingColl);
_mockCatalogCacheLoader->setChunkRefreshReturnValue(
- makeChangedChunks(ChunkVersion(10, 0, kEpoch, kDefaultTimestamp)));
+ makeChangedChunks(ChunkVersion({kEpoch, kDefaultTimestamp}, {10, 0})));
_mockCatalogClient->setCollections({matchingColl});
auto metadata = makeShardedMetadata(opCtx, collectionUUID);
@@ -684,7 +682,7 @@ TEST_F(SubmitRangeDeletionTaskTest,
_mockCatalogCacheLoader->setDatabaseRefreshReturnValue(kDefaultDatabaseType);
_mockCatalogCacheLoader->setCollectionRefreshReturnValue(otherColl);
_mockCatalogCacheLoader->setChunkRefreshReturnValue(
- makeChangedChunks(ChunkVersion(1, 0, otherEpoch, otherTimestamp)));
+ makeChangedChunks(ChunkVersion({otherEpoch, otherTimestamp}, {1, 0})));
_mockCatalogClient->setCollections({otherColl});
// The task should not have been submitted, and the task's entry should have been removed from
diff --git a/src/mongo/db/s/move_primary_coordinator.cpp b/src/mongo/db/s/move_primary_coordinator.cpp
index c7fb0d9e44d..863a4c17b9d 100644
--- a/src/mongo/db/s/move_primary_coordinator.cpp
+++ b/src/mongo/db/s/move_primary_coordinator.cpp
@@ -46,30 +46,10 @@
namespace mongo {
-MovePrimaryCoordinator::MovePrimaryCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(MovePrimaryCoordinatorDocument::parse(
- IDLParserErrorContext("MovePrimaryCoordinatorDocument"), initialState)) {}
-
-boost::optional<BSONObj> MovePrimaryCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
- cmdBob.append("request", BSON(_doc.kToShardIdFieldName << _doc.getToShardId()));
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "MovePrimaryCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("active", true);
- return bob.obj();
-}
+void MovePrimaryCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ stdx::lock_guard lk{_docMutex};
+ cmdInfoBuilder->append("request", BSON(_doc.kToShardIdFieldName << _doc.getToShardId()));
+};
void MovePrimaryCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
// If we have two shard collections on the same namespace, then the arguments must be the same.
diff --git a/src/mongo/db/s/move_primary_coordinator.h b/src/mongo/db/s/move_primary_coordinator.h
index 2e501419255..80a1586e0a4 100644
--- a/src/mongo/db/s/move_primary_coordinator.h
+++ b/src/mongo/db/s/move_primary_coordinator.h
@@ -35,30 +35,25 @@
namespace mongo {
-class MovePrimaryCoordinator final : public ShardingDDLCoordinator {
+class MovePrimaryCoordinator final
+ : public ShardingDDLCoordinatorImpl<MovePrimaryCoordinatorDocument> {
public:
- MovePrimaryCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState);
+ MovePrimaryCoordinator(ShardingDDLCoordinatorService* service, const BSONObj& initialState)
+ : ShardingDDLCoordinatorImpl(service, "MovePrimaryCoordinator", initialState) {}
+
~MovePrimaryCoordinator() = default;
void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
bool canAlwaysStartWhenUserWritesAreDisabled() const override {
return true;
}
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
- }
-
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
-
- MovePrimaryCoordinatorDocument _doc;
};
} // namespace mongo
diff --git a/src/mongo/db/s/move_primary_source_manager.cpp b/src/mongo/db/s/move_primary_source_manager.cpp
index b4382f21e0f..aaca2f82bbb 100644
--- a/src/mongo/db/s/move_primary_source_manager.cpp
+++ b/src/mongo/db/s/move_primary_source_manager.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/move_primary_source_manager.h"
#include "mongo/client/connpool.h"
@@ -50,14 +47,11 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
MONGO_FAIL_POINT_DEFINE(hangInCloneStage);
MONGO_FAIL_POINT_DEFINE(hangInCleanStaleDataStage);
-using namespace shardmetadatautil;
-
MovePrimarySourceManager::MovePrimarySourceManager(OperationContext* opCtx,
ShardMovePrimary requestArgs,
StringData dbname,
@@ -192,7 +186,7 @@ Status MovePrimarySourceManager::enterCriticalSection(OperationContext* opCtx) {
// time inclusive of the move primary config commit update from accessing secondary data.
// Note: this write must occur after the critSec flag is set, to ensure the secondary refresh
// will stall behind the flag.
- Status signalStatus = updateShardDatabasesEntry(
+ Status signalStatus = shardmetadatautil::updateShardDatabasesEntry(
opCtx,
BSON(ShardDatabaseType::kNameFieldName << getNss().toString()),
BSONObj(),
diff --git a/src/mongo/db/s/op_observer_sharding_test.cpp b/src/mongo/db/s/op_observer_sharding_test.cpp
index 1ef41426f26..2c4859ba782 100644
--- a/src/mongo/db/s/op_observer_sharding_test.cpp
+++ b/src/mongo/db/s/op_observer_sharding_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/catalog_raii.h"
#include "mongo/db/op_observer_util.h"
#include "mongo/db/s/collection_sharding_runtime.h"
@@ -59,8 +57,10 @@ protected:
const UUID uuid = UUID::gen();
const OID epoch = OID::gen();
auto range = ChunkRange(BSON("key" << MINKEY), BSON("key" << MAXKEY));
- auto chunk = ChunkType(
- uuid, std::move(range), ChunkVersion(1, 0, epoch, Timestamp(1, 1)), ShardId("other"));
+ auto chunk = ChunkType(uuid,
+ std::move(range),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
+ ShardId("other"));
auto rt = RoutingTableHistory::makeNew(kTestNss,
uuid,
KeyPattern(keyPattern),
diff --git a/src/mongo/db/s/operation_sharding_state_test.cpp b/src/mongo/db/s/operation_sharding_state_test.cpp
index 0c4732b51ab..9c275398f85 100644
--- a/src/mongo/db/s/operation_sharding_state_test.cpp
+++ b/src/mongo/db/s/operation_sharding_state_test.cpp
@@ -47,7 +47,7 @@ TEST_F(OperationShardingStateTest, ScopedSetShardRoleDbVersion) {
}
TEST_F(OperationShardingStateTest, ScopedSetShardRoleShardVersion) {
- ChunkVersion shardVersion(1, 0, OID::gen(), Timestamp(1, 0));
+ ChunkVersion shardVersion({OID::gen(), Timestamp(1, 0)}, {1, 0});
ScopedSetShardRole scopedSetShardRole(operationContext(), kNss, shardVersion, boost::none);
auto& oss = OperationShardingState::get(operationContext());
@@ -58,13 +58,13 @@ TEST_F(OperationShardingStateTest, ScopedSetShardRoleChangeShardVersionSameNames
auto& oss = OperationShardingState::get(operationContext());
{
- ChunkVersion shardVersion1(1, 0, OID::gen(), Timestamp(10, 0));
+ ChunkVersion shardVersion1({OID::gen(), Timestamp(10, 0)}, {1, 0});
ScopedSetShardRole scopedSetShardRole1(
operationContext(), kNss, shardVersion1, boost::none);
ASSERT_EQ(shardVersion1, *oss.getShardVersion(kNss));
}
{
- ChunkVersion shardVersion2(1, 0, OID::gen(), Timestamp(20, 0));
+ ChunkVersion shardVersion2({OID::gen(), Timestamp(20, 0)}, {1, 0});
ScopedSetShardRole scopedSetShardRole2(
operationContext(), kNss, shardVersion2, boost::none);
ASSERT_EQ(shardVersion2, *oss.getShardVersion(kNss));
@@ -72,8 +72,8 @@ TEST_F(OperationShardingStateTest, ScopedSetShardRoleChangeShardVersionSameNames
}
TEST_F(OperationShardingStateTest, ScopedSetShardRoleRecursiveShardVersionDifferentNamespaces) {
- ChunkVersion shardVersion1(1, 0, OID::gen(), Timestamp(10, 0));
- ChunkVersion shardVersion2(1, 0, OID::gen(), Timestamp(20, 0));
+ ChunkVersion shardVersion1({OID::gen(), Timestamp(10, 0)}, {1, 0});
+ ChunkVersion shardVersion2({OID::gen(), Timestamp(20, 0)}, {1, 0});
ScopedSetShardRole scopedSetShardRole1(operationContext(), kNss, shardVersion1, boost::none);
ScopedSetShardRole scopedSetShardRole2(
diff --git a/src/mongo/db/s/range_deletion_util_test.cpp b/src/mongo/db/s/range_deletion_util_test.cpp
index 6efd33ce9d6..567d50748df 100644
--- a/src/mongo/db/s/range_deletion_util_test.cpp
+++ b/src/mongo/db/s/range_deletion_util_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/catalog/create_collection.h"
#include "mongo/db/db_raii.h"
#include "mongo/db/dbdirectclient.h"
@@ -113,7 +111,7 @@ public:
true,
{ChunkType{uuid,
ChunkRange{BSON(kShardKey << MINKEY), BSON(kShardKey << MAXKEY)},
- ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
ShardId("dummyShardId")}});
ChunkManager cm(ShardId("dummyShardId"),
DatabaseVersion(UUID::gen(), Timestamp(1, 1)),
diff --git a/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp b/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp
index a5a01e7f309..b8d981bb4ce 100644
--- a/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp
+++ b/src/mongo/db/s/refine_collection_shard_key_coordinator.cpp
@@ -86,9 +86,8 @@ void notifyChangeStreamsOnRefineCollectionShardKeyComplete(OperationContext* opC
RefineCollectionShardKeyCoordinator::RefineCollectionShardKeyCoordinator(
ShardingDDLCoordinatorService* service, const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(RefineCollectionShardKeyCoordinatorDocument::parse(
- IDLParserErrorContext("RefineCollectionShardKeyCoordinatorDocument"), initialState)),
+ : RecoverableShardingDDLCoordinator(
+ service, "RefineCollectionShardKeyCoordinator", initialState),
_request(_doc.getRefineCollectionShardKeyRequest()),
_newShardKey(_doc.getNewShardKey()) {}
@@ -104,47 +103,8 @@ void RefineCollectionShardKeyCoordinator::checkIfOptionsConflict(const BSONObj&
_request.toBSON() == otherDoc.getRefineCollectionShardKeyRequest().toBSON()));
}
-boost::optional<BSONObj> RefineCollectionShardKeyCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
- cmdBob.appendElements(_request.toBSON());
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "RefineCollectionShardKeyCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("active", true);
- return bob.obj();
-}
-
-void RefineCollectionShardKeyCoordinator::_enterPhase(Phase newPhase) {
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(
- 6233200,
- 2,
- "Refine collection shard key coordinator phase transition",
- "namespace"_attr = nss(),
- "newPhase"_attr = RefineCollectionShardKeyCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = RefineCollectionShardKeyCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
+void RefineCollectionShardKeyCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ cmdInfoBuilder->appendElements(_request.toBSON());
}
ExecutorFuture<void> RefineCollectionShardKeyCoordinator::_runImpl(
diff --git a/src/mongo/db/s/refine_collection_shard_key_coordinator.h b/src/mongo/db/s/refine_collection_shard_key_coordinator.h
index c2e70a0d067..c461383e876 100644
--- a/src/mongo/db/s/refine_collection_shard_key_coordinator.h
+++ b/src/mongo/db/s/refine_collection_shard_key_coordinator.h
@@ -35,7 +35,9 @@
namespace mongo {
-class RefineCollectionShardKeyCoordinator : public ShardingDDLCoordinator {
+class RefineCollectionShardKeyCoordinator
+ : public RecoverableShardingDDLCoordinator<RefineCollectionShardKeyCoordinatorDocument,
+ RefineCollectionShardKeyCoordinatorPhaseEnum> {
public:
using StateDoc = RefineCollectionShardKeyCoordinatorDocument;
using Phase = RefineCollectionShardKeyCoordinatorPhaseEnum;
@@ -45,40 +47,16 @@ public:
void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return RefineCollectionShardKeyCoordinatorPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
- void _enterPhase(Phase newPhase);
-
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("RefineCollectionShardKeyCoordinator::_docMutex");
- RefineCollectionShardKeyCoordinatorDocument _doc;
-
const mongo::RefineCollectionShardKeyRequest _request;
const KeyPattern _newShardKey;
diff --git a/src/mongo/db/s/rename_collection_coordinator.cpp b/src/mongo/db/s/rename_collection_coordinator.cpp
index 789f8ade994..64680e96cc2 100644
--- a/src/mongo/db/s/rename_collection_coordinator.cpp
+++ b/src/mongo/db/s/rename_collection_coordinator.cpp
@@ -90,9 +90,7 @@ boost::optional<UUID> getCollectionUUID(OperationContext* opCtx,
RenameCollectionCoordinator::RenameCollectionCoordinator(ShardingDDLCoordinatorService* service,
const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(RenameCollectionCoordinatorDocument::parse(
- IDLParserErrorContext("RenameCollectionCoordinatorDocument"), initialState)),
+ : RecoverableShardingDDLCoordinator(service, "RenameCollectionCoordinator", initialState),
_request(_doc.getRenameCollectionRequest()) {}
void RenameCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
@@ -113,54 +111,8 @@ std::vector<StringData> RenameCollectionCoordinator::_acquireAdditionalLocks(
return {_request.getTo().ns()};
}
-boost::optional<BSONObj> RenameCollectionCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
-
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
- cmdBob.appendElements(_request.toBSON());
-
- const auto currPhase = [&]() {
- stdx::lock_guard l{_docMutex};
- return _doc.getPhase();
- }();
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "RenameCollectionCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("currentPhase", currPhase);
- bob.append("active", true);
- return bob.obj();
-}
-
-void RenameCollectionCoordinator::_enterPhase(Phase newPhase) {
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(5460501,
- 2,
- "Rename collection coordinator phase transition",
- "fromNs"_attr = nss(),
- "toNs"_attr = _request.getTo(),
- "newPhase"_attr = RenameCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = RenameCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
+void RenameCollectionCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ cmdInfoBuilder->appendElements(_request.toBSON());
}
ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
@@ -275,15 +227,15 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
getForwardableOpMetadata().setOn(opCtx);
if (!_firstExecution) {
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
- opCtx, getCurrentSession(_doc), **executor);
+ opCtx, getCurrentSession(), **executor);
}
const auto& fromNss = nss();
- _doc = _updateSession(opCtx, _doc);
- const OperationSessionInfo osi = getCurrentSession(_doc);
+ _updateSession(opCtx);
+ const OperationSessionInfo osi = getCurrentSession();
// On participant shards:
// - Block CRUD on source and target collection in case at least one
@@ -303,20 +255,8 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
const auto cmdObj = CommandHelpers::appendMajorityWriteConcern(
renameCollParticipantRequest.toBSON({}));
- try {
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx,
- fromNss.db(),
- cmdObj.addFields(osi.toBSON()),
- participants,
- **executor);
-
- } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
- // Older 5.0 binaries don't support running the command as a
- // retryable write yet. In that case, retry without attaching session info.
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx, fromNss.db(), cmdObj, participants, **executor);
- }
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, fromNss.db(), cmdObj.addFields(osi.toBSON()), participants, **executor);
}))
.then(_executePhase(
Phase::kRenameMetadata,
@@ -325,10 +265,13 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
auto* opCtx = opCtxHolder.get();
getForwardableOpMetadata().setOn(opCtx);
+ // For an unsharded collection the CSRS server can not verify the targetUUID.
+ // Use the session ID + txnNumber to ensure no stale requests get through.
+ _updateSession(opCtx);
+
if (!_firstExecution) {
- _doc = _updateSession(opCtx, _doc);
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
- opCtx, getCurrentSession(_doc), **executor);
+ opCtx, getCurrentSession(), **executor);
}
ConfigsvrRenameCollectionMetadata req(nss(), _request.getTo());
@@ -336,28 +279,12 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
const auto cmdObj = CommandHelpers::appendMajorityWriteConcern(req.toBSON({}));
const auto& configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
- // For an unsharded collection the CSRS server can not verify the targetUUID.
- // Use the session ID + txnNumber to ensure no stale requests get through.
- _doc = _updateSession(opCtx, _doc);
- const OperationSessionInfo osi = getCurrentSession(_doc);
-
- try {
- uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(
- configShard->runCommand(opCtx,
- ReadPreferenceSetting(ReadPreference::PrimaryOnly),
- "admin",
- cmdObj.addFields(osi.toBSON()),
- Shard::RetryPolicy::kIdempotent)));
- } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
- // Older 5.0 binaries don't support running the command as a
- // retryable write yet. In that case, retry without attaching session info.
- uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(
- configShard->runCommand(opCtx,
- ReadPreferenceSetting(ReadPreference::PrimaryOnly),
- "admin",
- cmdObj,
- Shard::RetryPolicy::kIdempotent)));
- }
+ uassertStatusOK(Shard::CommandResponse::getEffectiveStatus(
+ configShard->runCommand(opCtx,
+ ReadPreferenceSetting(ReadPreference::PrimaryOnly),
+ "admin",
+ cmdObj.addFields(getCurrentSession().toBSON()),
+ Shard::RetryPolicy::kIdempotent)));
}))
.then(_executePhase(
Phase::kUnblockCRUD,
@@ -367,9 +294,9 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
getForwardableOpMetadata().setOn(opCtx);
if (!_firstExecution) {
- _doc = _updateSession(opCtx, _doc);
+ _updateSession(opCtx);
_performNoopRetryableWriteOnAllShardsAndConfigsvr(
- opCtx, getCurrentSession(_doc), **executor);
+ opCtx, getCurrentSession(), **executor);
}
const auto& fromNss = nss();
@@ -383,22 +310,11 @@ ExecutorFuture<void> RenameCollectionCoordinator::_runImpl(
unblockParticipantRequest.toBSON({}));
auto participants = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
- _doc = _updateSession(opCtx, _doc);
- const OperationSessionInfo osi = getCurrentSession(_doc);
+ _updateSession(opCtx);
+ const OperationSessionInfo osi = getCurrentSession();
- try {
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx,
- fromNss.db(),
- cmdObj.addFields(osi.toBSON()),
- participants,
- **executor);
- } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
- // Older 5.0 binaries don't support running the command as a
- // retryable write yet. In that case, retry without attaching session info.
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx, fromNss.db(), cmdObj, participants, **executor);
- }
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, fromNss.db(), cmdObj.addFields(osi.toBSON()), participants, **executor);
}))
.then(_executePhase(Phase::kSetResponse,
[this, anchor = shared_from_this()] {
diff --git a/src/mongo/db/s/rename_collection_coordinator.h b/src/mongo/db/s/rename_collection_coordinator.h
index af395745001..32621bb6ea4 100644
--- a/src/mongo/db/s/rename_collection_coordinator.h
+++ b/src/mongo/db/s/rename_collection_coordinator.h
@@ -35,7 +35,9 @@
namespace mongo {
-class RenameCollectionCoordinator final : public ShardingDDLCoordinator {
+class RenameCollectionCoordinator final
+ : public RecoverableShardingDDLCoordinator<RenameCollectionCoordinatorDocument,
+ RenameCollectionCoordinatorPhaseEnum> {
public:
using StateDoc = RenameCollectionCoordinatorDocument;
using Phase = RenameCollectionCoordinatorPhaseEnum;
@@ -45,9 +47,7 @@ public:
void checkIfOptionsConflict(const BSONObj& doc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
/**
* Waits for the rename to complete and returns the collection version.
@@ -59,41 +59,19 @@ public:
}
private:
+ StringData serializePhase(const Phase& phase) const override {
+ return RenameCollectionCoordinatorPhase_serializer(phase);
+ }
+
bool _mustAlwaysMakeProgress() override {
return _doc.getPhase() >= Phase::kFreezeMigrations;
};
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
- }
-
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
std::vector<StringData> _acquireAdditionalLocks(OperationContext* opCtx) override;
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
- void _enterPhase(Phase newPhase);
-
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("RenameCollectionCoordinator::_docMutex");
- RenameCollectionCoordinatorDocument _doc;
-
boost::optional<RenameCollectionResponse> _response;
const RenameCollectionRequest _request;
};
diff --git a/src/mongo/db/s/rename_collection_participant_service.cpp b/src/mongo/db/s/rename_collection_participant_service.cpp
index bf48f41b581..64419c6c5e4 100644
--- a/src/mongo/db/s/rename_collection_participant_service.cpp
+++ b/src/mongo/db/s/rename_collection_participant_service.cpp
@@ -27,8 +27,7 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
+#include "mongo/db/s/rename_collection_participant_service.h"
#include "mongo/base/checked_cast.h"
#include "mongo/db/catalog/collection_catalog.h"
@@ -40,8 +39,6 @@
#include "mongo/db/s/operation_sharding_state.h"
#include "mongo/db/s/range_deletion_util.h"
#include "mongo/db/s/recoverable_critical_section_service.h"
-#include "mongo/db/s/rename_collection_participant_service.h"
-#include "mongo/db/s/shard_metadata_util.h"
#include "mongo/logv2/log.h"
#include "mongo/s/catalog/sharding_catalog_client.h"
#include "mongo/s/grid.h"
@@ -49,9 +46,7 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
-
namespace {
const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max());
@@ -76,7 +71,6 @@ void dropCollectionLocally(OperationContext* opCtx, const NamespaceString& nss)
"collectionExisted"_attr = knownNss);
}
-/* Clear the CollectionShardingRuntime entry for the specified namespace */
void clearFilteringMetadata(OperationContext* opCtx, const NamespaceString& nss) {
UninterruptibleLockGuard noInterrupt(opCtx->lockState());
Lock::DBLock dbLock(opCtx, nss.db(), MODE_IX);
@@ -135,6 +129,7 @@ void renameOrDropTarget(OperationContext* opCtx,
deleteRangeDeletionTasksForRename(opCtx, fromNss, toNss);
}
}
+
} // namespace
RenameCollectionParticipantService* RenameCollectionParticipantService::getService(
diff --git a/src/mongo/db/s/reshard_collection_coordinator.cpp b/src/mongo/db/s/reshard_collection_coordinator.cpp
index 30ff299e538..c0c9648e0a4 100644
--- a/src/mongo/db/s/reshard_collection_coordinator.cpp
+++ b/src/mongo/db/s/reshard_collection_coordinator.cpp
@@ -107,10 +107,7 @@ ReshardCollectionCoordinator::ReshardCollectionCoordinator(ShardingDDLCoordinato
ReshardCollectionCoordinator::ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
const BSONObj& initialState,
bool persistCoordinatorDocument)
- : ShardingDDLCoordinator(service, initialState),
- _initialState(initialState.getOwned()),
- _doc(ReshardCollectionCoordinatorDocument::parse(
- IDLParserErrorContext("ReshardCollectionCoordinatorDocument"), _initialState)),
+ : RecoverableShardingDDLCoordinator(service, "ReshardCollectionCoordinator", initialState),
_request(_doc.getReshardCollectionRequest()),
_persistCoordinatorDocument(persistCoordinatorDocument) {}
@@ -125,50 +122,15 @@ void ReshardCollectionCoordinator::checkIfOptionsConflict(const BSONObj& doc) co
_request.toBSON() == otherDoc.getReshardCollectionRequest().toBSON()));
}
-boost::optional<BSONObj> ReshardCollectionCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
- cmdBob.appendElements(_request.toBSON());
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "ReshardCollectionCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("active", true);
- return bob.obj();
+void ReshardCollectionCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ cmdInfoBuilder->appendElements(_request.toBSON());
}
void ReshardCollectionCoordinator::_enterPhase(Phase newPhase) {
if (!_persistCoordinatorDocument) {
return;
}
-
- StateDoc newDoc(_doc);
- newDoc.setPhase(newPhase);
-
- LOGV2_DEBUG(6206400,
- 2,
- "Reshard collection coordinator phase transition",
- "namespace"_attr = nss(),
- "newPhase"_attr = ReshardCollectionCoordinatorPhase_serializer(newDoc.getPhase()),
- "oldPhase"_attr = ReshardCollectionCoordinatorPhase_serializer(_doc.getPhase()));
-
- if (_doc.getPhase() == Phase::kUnset) {
- newDoc = _insertStateDocument(std::move(newDoc));
- } else {
- newDoc = _updateStateDocument(cc().makeOperationContext().get(), std::move(newDoc));
- }
-
- {
- stdx::unique_lock ul{_docMutex};
- _doc = std::move(newDoc);
- }
+ RecoverableShardingDDLCoordinator::_enterPhase(newPhase);
}
ExecutorFuture<void> ReshardCollectionCoordinator::_runImpl(
@@ -196,7 +158,7 @@ ExecutorFuture<void> ReshardCollectionCoordinator::_runImpl(
StateDoc newDoc(_doc);
newDoc.setOldShardKey(cmOld.getShardKeyPattern().getKeyPattern().toBSON());
newDoc.setOldCollectionUUID(cmOld.getUUID());
- _doc = _updateStateDocument(opCtx, std::move(newDoc));
+ _updateStateDocument(opCtx, std::move(newDoc));
} else {
_doc.setOldShardKey(cmOld.getShardKeyPattern().getKeyPattern().toBSON());
_doc.setOldCollectionUUID(cmOld.getUUID());
diff --git a/src/mongo/db/s/reshard_collection_coordinator.h b/src/mongo/db/s/reshard_collection_coordinator.h
index 54d98ee03d1..085c183dc55 100644
--- a/src/mongo/db/s/reshard_collection_coordinator.h
+++ b/src/mongo/db/s/reshard_collection_coordinator.h
@@ -34,7 +34,9 @@
#include "mongo/util/future.h"
namespace mongo {
-class ReshardCollectionCoordinator : public ShardingDDLCoordinator {
+class ReshardCollectionCoordinator
+ : public RecoverableShardingDDLCoordinator<ReshardCollectionCoordinatorDocument,
+ ReshardCollectionCoordinatorPhaseEnum> {
public:
using StateDoc = ReshardCollectionCoordinatorDocument;
using Phase = ReshardCollectionCoordinatorPhaseEnum;
@@ -44,9 +46,7 @@ public:
void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
protected:
ReshardCollectionCoordinator(ShardingDDLCoordinatorService* service,
@@ -54,37 +54,15 @@ protected:
bool persistCoordinatorDocument);
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- stdx::lock_guard l{_docMutex};
- return _doc.getShardingDDLCoordinatorMetadata();
+ StringData serializePhase(const Phase& phase) const override {
+ return ReshardCollectionCoordinatorPhase_serializer(phase);
}
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
- template <typename Func>
- auto _executePhase(const Phase& newPhase, Func&& func) {
- return [=] {
- const auto& currPhase = _doc.getPhase();
-
- if (currPhase > newPhase) {
- // Do not execute this phase if we already reached a subsequent one.
- return;
- }
- if (currPhase < newPhase) {
- // Persist the new phase if this is the first time we are executing it.
- _enterPhase(newPhase);
- }
- return func();
- };
- }
-
void _enterPhase(Phase newPhase);
- const BSONObj _initialState;
- mutable Mutex _docMutex = MONGO_MAKE_LATCH("ReshardCollectionCoordinator::_docMutex");
- ReshardCollectionCoordinatorDocument _doc;
-
const mongo::ReshardCollectionRequest _request;
const bool _persistCoordinatorDocument; // TODO: SERVER-62338 remove this then 6.0 branches out
diff --git a/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp b/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp
index aaeb6180654..dc3176cf3e8 100644
--- a/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp
+++ b/src/mongo/db/s/resharding/document_source_resharding_ownership_match.cpp
@@ -117,7 +117,8 @@ DocumentSource::GetModPathsReturn DocumentSourceReshardingOwnershipMatch::getMod
DocumentSource::GetNextResult DocumentSourceReshardingOwnershipMatch::doGetNext() {
if (!_tempReshardingChunkMgr) {
// TODO: Actually propagate the temporary resharding namespace from the recipient.
- auto tempReshardingNss = constructTemporaryReshardingNss(pExpCtx->ns.db(), *pExpCtx->uuid);
+ auto tempReshardingNss =
+ resharding::constructTemporaryReshardingNss(pExpCtx->ns.db(), *pExpCtx->uuid);
auto* catalogCache = Grid::get(pExpCtx->opCtx)->catalogCache();
_tempReshardingChunkMgr =
diff --git a/src/mongo/db/s/resharding/resharding_agg_test.cpp b/src/mongo/db/s/resharding/resharding_agg_test.cpp
index ce8d110e5ab..c49467f79f9 100644
--- a/src/mongo/db/s/resharding/resharding_agg_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_agg_test.cpp
@@ -362,7 +362,7 @@ protected:
expCtx->ns = kRemoteOplogNss;
expCtx->mongoProcessInterface = std::make_shared<MockMongoInterface>(pipelineSource);
- auto pipeline = createOplogFetchingPipelineForResharding(
+ auto pipeline = resharding::createOplogFetchingPipelineForResharding(
expCtx,
ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()),
_reshardingCollUUID,
@@ -524,13 +524,14 @@ TEST_F(ReshardingAggTest, VerifyPipelineOutputHasOplogSchema) {
expCtx->ns = kRemoteOplogNss;
expCtx->mongoProcessInterface = std::make_shared<MockMongoInterface>(pipelineSource);
- std::unique_ptr<Pipeline, PipelineDeleter> pipeline = createOplogFetchingPipelineForResharding(
- expCtx,
- // Use the test to also exercise the stages for resuming. The timestamp passed in is
- // excluded from the results.
- ReshardingDonorOplogId(insertOplog.getTimestamp(), insertOplog.getTimestamp()),
- _reshardingCollUUID,
- {_destinedRecipient});
+ std::unique_ptr<Pipeline, PipelineDeleter> pipeline =
+ resharding::createOplogFetchingPipelineForResharding(
+ expCtx,
+ // Use the test to also exercise the stages for resuming. The timestamp passed in is
+ // excluded from the results.
+ ReshardingDonorOplogId(insertOplog.getTimestamp(), insertOplog.getTimestamp()),
+ _reshardingCollUUID,
+ {_destinedRecipient});
auto bsonPipeline = pipeline->serializeToBson();
if (debug) {
std::cout << "Pipeline stages:" << std::endl;
@@ -624,11 +625,12 @@ TEST_F(ReshardingAggTest, VerifyPipelinePreparedTxn) {
expCtx->ns = kRemoteOplogNss;
expCtx->mongoProcessInterface = std::make_shared<MockMongoInterface>(pipelineSource);
- std::unique_ptr<Pipeline, PipelineDeleter> pipeline = createOplogFetchingPipelineForResharding(
- expCtx,
- ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()),
- _reshardingCollUUID,
- {_destinedRecipient});
+ std::unique_ptr<Pipeline, PipelineDeleter> pipeline =
+ resharding::createOplogFetchingPipelineForResharding(
+ expCtx,
+ ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()),
+ _reshardingCollUUID,
+ {_destinedRecipient});
if (debug) {
std::cout << "Pipeline stages:" << std::endl;
// This is can be changed to process a prefix of the pipeline for debugging.
@@ -1476,7 +1478,7 @@ TEST_F(ReshardingAggWithStorageTest, RetryableFindAndModifyWithImageLookup) {
expCtx->mongoProcessInterface = std::move(mockMongoInterface);
}
- auto pipeline = createOplogFetchingPipelineForResharding(
+ auto pipeline = resharding::createOplogFetchingPipelineForResharding(
expCtx, ReshardingDonorOplogId(Timestamp::min(), Timestamp::min()), kCrudUUID, kMyShardId);
pipeline->addInitialSource(DocumentSourceMock::createForTest(pipelineSource, expCtx));
@@ -1578,8 +1580,8 @@ TEST_F(ReshardingAggWithStorageTest,
expCtx->mongoProcessInterface = std::move(mockMongoInterface);
}
- auto pipeline =
- createOplogFetchingPipelineForResharding(expCtx, startAt, kCrudUUID, kMyShardId);
+ auto pipeline = resharding::createOplogFetchingPipelineForResharding(
+ expCtx, startAt, kCrudUUID, kMyShardId);
pipeline->addInitialSource(DocumentSourceMock::createForTest(pipelineSource, expCtx));
return pipeline;
};
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
index a80bfbb88ec..8bd04ebfe37 100644
--- a/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_collection_cloner.cpp
@@ -50,7 +50,7 @@
#include "mongo/db/s/resharding/document_source_resharding_ownership_match.h"
#include "mongo/db/s/resharding/resharding_data_copy_util.h"
#include "mongo/db/s/resharding/resharding_future_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_server_parameters_gen.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/service_context.h"
@@ -80,7 +80,7 @@ bool collectionHasSimpleCollation(OperationContext* opCtx, const NamespaceString
} // namespace
-ReshardingCollectionCloner::ReshardingCollectionCloner(ReshardingMetricsNew* metrics,
+ReshardingCollectionCloner::ReshardingCollectionCloner(ReshardingMetrics* metrics,
ShardKeyPattern newShardKeyPattern,
NamespaceString sourceNss,
const UUID& sourceUUID,
@@ -109,7 +109,7 @@ std::unique_ptr<Pipeline, PipelineDeleter> ReshardingCollectionCloner::makePipel
resolvedNamespaces[_sourceNss.coll()] = {_sourceNss, std::vector<BSONObj>{}};
// Assume that the config.cache.chunks collection isn't a view either.
- auto tempNss = constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
+ auto tempNss = resharding::constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
auto tempCacheChunksNss =
NamespaceString(NamespaceString::kConfigDb, "cache.chunks." + tempNss.ns());
resolvedNamespaces[tempCacheChunksNss.coll()] = {tempCacheChunksNss, std::vector<BSONObj>{}};
diff --git a/src/mongo/db/s/resharding/resharding_collection_cloner.h b/src/mongo/db/s/resharding/resharding_collection_cloner.h
index e24b03c76b6..97e28a4fce5 100644
--- a/src/mongo/db/s/resharding/resharding_collection_cloner.h
+++ b/src/mongo/db/s/resharding/resharding_collection_cloner.h
@@ -52,7 +52,7 @@ class TaskExecutor;
class OperationContext;
class MongoProcessInterface;
-class ReshardingMetricsNew;
+class ReshardingMetrics;
class ServiceContext;
/**
@@ -61,7 +61,7 @@ class ServiceContext;
*/
class ReshardingCollectionCloner {
public:
- ReshardingCollectionCloner(ReshardingMetricsNew* metrics,
+ ReshardingCollectionCloner(ReshardingMetrics* metrics,
ShardKeyPattern newShardKeyPattern,
NamespaceString sourceNss,
const UUID& sourceUUID,
@@ -99,7 +99,7 @@ private:
std::unique_ptr<Pipeline, PipelineDeleter> _restartPipeline(OperationContext* opCtx);
- ReshardingMetricsNew* _metrics;
+ ReshardingMetrics* _metrics;
const ShardKeyPattern _newShardKeyPattern;
const NamespaceString _sourceNss;
const UUID _sourceUUID;
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
index ae6b61fb314..61eb1a620c4 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.cpp
@@ -88,13 +88,13 @@ boost::optional<Milliseconds> extractOperationRemainingTime(const BSONObj& obj)
} // namespace
CoordinatorCommitMonitor::CoordinatorCommitMonitor(
- std::shared_ptr<ReshardingMetricsNew> metricsNew,
+ std::shared_ptr<ReshardingMetrics> metrics,
NamespaceString ns,
std::vector<ShardId> recipientShards,
CoordinatorCommitMonitor::TaskExecutorPtr executor,
CancellationToken cancelToken,
Milliseconds maxDelayBetweenQueries)
- : _metricsNew{std::move(metricsNew)},
+ : _metrics{std::move(metrics)},
_ns(std::move(ns)),
_recipientShards(std::move(recipientShards)),
_executor(std::move(executor)),
@@ -209,8 +209,8 @@ ExecutorFuture<void> CoordinatorCommitMonitor::_makeFuture() const {
return RemainingOperationTimes{Milliseconds(0), Milliseconds::max()};
})
.then([this, anchor = shared_from_this()](RemainingOperationTimes remainingTimes) {
- _metricsNew->setCoordinatorHighEstimateRemainingTimeMillis(remainingTimes.max);
- _metricsNew->setCoordinatorLowEstimateRemainingTimeMillis(remainingTimes.min);
+ _metrics->setCoordinatorHighEstimateRemainingTimeMillis(remainingTimes.max);
+ _metrics->setCoordinatorLowEstimateRemainingTimeMillis(remainingTimes.min);
// Check if all recipient shards are within the commit threshold.
if (remainingTimes.max <= _threshold)
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h
index fb9f55d614f..aa3ff0727e9 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor.h
@@ -33,7 +33,7 @@
#include <vector>
#include "mongo/db/namespace_string.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/executor/task_executor.h"
#include "mongo/s/shard_id.h"
#include "mongo/util/cancellation.h"
@@ -69,7 +69,7 @@ public:
Milliseconds max;
};
- CoordinatorCommitMonitor(std::shared_ptr<ReshardingMetricsNew> metricsNew,
+ CoordinatorCommitMonitor(std::shared_ptr<ReshardingMetrics> metrics,
NamespaceString ns,
std::vector<ShardId> recipientShards,
TaskExecutorPtr executor,
@@ -95,7 +95,7 @@ private:
static constexpr auto kDiagnosticLogLevel = 0;
static constexpr auto kMaxDelayBetweenQueries = Seconds(30);
- std::shared_ptr<ReshardingMetricsNew> _metricsNew;
+ std::shared_ptr<ReshardingMetrics> _metrics;
const NamespaceString _ns;
const std::vector<ShardId> _recipientShards;
const TaskExecutorPtr _executor;
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
index 1cc717b7aec..d8740053c80 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_commit_monitor_test.cpp
@@ -40,7 +40,7 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/s/config/config_server_test_fixture.h"
#include "mongo/db/s/resharding/resharding_coordinator_commit_monitor.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_server_parameters_gen.h"
#include "mongo/executor/thread_pool_task_executor_test_fixture.h"
#include "mongo/logv2/log.h"
@@ -109,7 +109,7 @@ private:
boost::optional<Callback> _runOnMockingNextResponse;
ShardingDataTransformCumulativeMetrics _cumulativeMetrics{"dummyForTest"};
- std::shared_ptr<ReshardingMetricsNew> _metrics;
+ std::shared_ptr<ReshardingMetrics> _metrics;
};
auto makeExecutor() {
@@ -151,7 +151,7 @@ void CoordinatorCommitMonitorTest::setUp() {
_cancellationSource = std::make_unique<CancellationSource>();
auto clockSource = getServiceContext()->getFastClockSource();
- _metrics = std::make_shared<ReshardingMetricsNew>(
+ _metrics = std::make_shared<ReshardingMetrics>(
UUID::gen(),
BSON("y" << 1),
_ns,
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp b/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp
index 5f78cac592c..da457d8eab3 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_observer.cpp
@@ -112,7 +112,7 @@ bool stateTransistionsComplete(WithLock lk,
template <class TParticipant>
Status getStatusFromAbortReasonWithShardInfo(const TParticipant& participant,
StringData participantType) {
- return getStatusFromAbortReason(participant.getMutableState())
+ return resharding::getStatusFromAbortReason(participant.getMutableState())
.withContext("{} shard {} reached an unrecoverable error"_format(
participantType, participant.getId().toString()));
}
@@ -128,7 +128,7 @@ boost::optional<Status> getAbortReasonIfExists(
if (updatedStateDoc.getAbortReason()) {
// Note: the absence of context specifying which shard the abortReason originates from
// implies the abortReason originates from the coordinator.
- return getStatusFromAbortReason(updatedStateDoc);
+ return resharding::getStatusFromAbortReason(updatedStateDoc);
}
for (const auto& donorShard : updatedStateDoc.getDonorShards()) {
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
index 0f3803ab04e..bd893bd6dee 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_observer_test.cpp
@@ -53,7 +53,7 @@ protected:
auto coordinatorDoc = ReshardingCoordinatorDocument();
coordinatorDoc.setRecipientShards(std::move(recipients));
coordinatorDoc.setDonorShards(std::move(donors));
- emplaceTruncatedAbortReasonIfExists(coordinatorDoc, abortReason);
+ resharding::emplaceTruncatedAbortReasonIfExists(coordinatorDoc, abortReason);
return coordinatorDoc;
}
@@ -62,9 +62,9 @@ protected:
boost::optional<Timestamp> timestamp = boost::none,
boost::optional<Status> abortReason = boost::none) {
// The mock state here is simulating only one donor shard having errored locally.
- return {makeDonorShard(ShardId{"s1"}, donorState, timestamp),
- makeDonorShard(ShardId{"s2"}, donorState, timestamp, abortReason),
- makeDonorShard(ShardId{"s3"}, donorState, timestamp)};
+ return {resharding::makeDonorShard(ShardId{"s1"}, donorState, timestamp),
+ resharding::makeDonorShard(ShardId{"s2"}, donorState, timestamp, abortReason),
+ resharding::makeDonorShard(ShardId{"s3"}, donorState, timestamp)};
}
std::vector<RecipientShardEntry> makeMockRecipientsInState(
@@ -72,9 +72,9 @@ protected:
boost::optional<Timestamp> timestamp = boost::none,
boost::optional<Status> abortReason = boost::none) {
// The mock state here is simulating only one donor shard having errored locally.
- return {makeRecipientShard(ShardId{"s1"}, recipientState),
- makeRecipientShard(ShardId{"s2"}, recipientState, abortReason),
- makeRecipientShard(ShardId{"s3"}, recipientState)};
+ return {resharding::makeRecipientShard(ShardId{"s1"}, recipientState),
+ resharding::makeRecipientShard(ShardId{"s2"}, recipientState, abortReason),
+ resharding::makeRecipientShard(ShardId{"s3"}, recipientState)};
}
};
@@ -85,15 +85,15 @@ TEST_F(ReshardingCoordinatorObserverTest, onReshardingParticipantTransitionSucce
auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
std::vector<RecipientShardEntry> recipientShards0{
- makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning),
- makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
+ resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning),
+ resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards0, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
ASSERT_FALSE(fut.isReady());
std::vector<RecipientShardEntry> recipientShards1{
- makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
- makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
+ resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
+ resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)};
auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards1, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
ASSERT_TRUE(fut.isReady());
@@ -110,25 +110,25 @@ TEST_F(ReshardingCoordinatorObserverTest, onReshardingParticipantTransitionTwoOu
auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
std::vector<RecipientShardEntry> recipientShards0{
- {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
- {makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
- {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
+ {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
+ {resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
+ {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards0, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
ASSERT_FALSE(fut.isReady());
std::vector<RecipientShardEntry> recipientShards1{
- {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
- {makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
- {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kCloning)}};
+ {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
+ {resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
+ {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kCloning)}};
auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards1, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
ASSERT_FALSE(fut.isReady());
std::vector<RecipientShardEntry> recipientShards2{
- {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying)},
- {makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
- {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
+ {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying)},
+ {resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kApplying)},
+ {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
auto coordinatorDoc2 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards2, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc2);
ASSERT_TRUE(fut.isReady());
@@ -145,11 +145,11 @@ TEST_F(ReshardingCoordinatorObserverTest, participantReportsError) {
auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
std::vector<RecipientShardEntry> recipientShards{
- {makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
- {makeRecipientShard(ShardId{"s2"},
- RecipientStateEnum::kError,
- Status{ErrorCodes::InternalError, "We gotta abort"})},
- {makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
+ {resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kCloning)},
+ {resharding::makeRecipientShard(ShardId{"s2"},
+ RecipientStateEnum::kError,
+ Status{ErrorCodes::InternalError, "We gotta abort"})},
+ {resharding::makeRecipientShard(ShardId{"s3"}, RecipientStateEnum::kApplying)}};
auto coordinatorDoc = makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc);
auto resp = fut.getNoThrow();
@@ -173,9 +173,11 @@ TEST_F(ReshardingCoordinatorObserverTest, participantsDoneAborting) {
// donor who hasn't seen there was an error yet.
auto recipientShards = makeMockRecipientsInState(RecipientStateEnum::kDone, Timestamp(1, 1));
std::vector<DonorShardEntry> donorShards0{
- {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
- {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDonatingOplogEntries, Timestamp(1, 1))},
- {makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
+ {resharding::makeDonorShard(
+ ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
+ {resharding::makeDonorShard(
+ ShardId{"s2"}, DonorStateEnum::kDonatingOplogEntries, Timestamp(1, 1))},
+ {resharding::makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
auto coordinatorDoc0 =
makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards0, abortReason);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
@@ -183,9 +185,10 @@ TEST_F(ReshardingCoordinatorObserverTest, participantsDoneAborting) {
// All participants are done.
std::vector<DonorShardEntry> donorShards1{
- {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
- {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDone, Timestamp(1, 1))},
- {makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
+ {resharding::makeDonorShard(
+ ShardId{"s1"}, DonorStateEnum::kDone, Timestamp(1, 1), abortReason)},
+ {resharding::makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDone, Timestamp(1, 1))},
+ {resharding::makeDonorShard(ShardId{"s3"}, DonorStateEnum::kDone, Timestamp(1, 1))}};
auto coordinatorDoc1 =
makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards1, abortReason);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
@@ -206,15 +209,15 @@ TEST_F(ReshardingCoordinatorObserverTest, onReshardingRecipientsOutOfSync) {
auto donorShards = makeMockDonorsInState(DonorStateEnum::kDonatingInitialData, Timestamp(1, 1));
std::vector<RecipientShardEntry> recipientShards0{
- makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kUnused),
- makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
+ resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kUnused),
+ resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards0, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
ASSERT_FALSE(fut.isReady());
std::vector<RecipientShardEntry> recipientShards1{
- makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
- makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
+ resharding::makeRecipientShard(ShardId{"s1"}, RecipientStateEnum::kApplying),
+ resharding::makeRecipientShard(ShardId{"s2"}, RecipientStateEnum::kStrictConsistency)};
auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards1, donorShards);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
ASSERT_TRUE(fut.isReady());
@@ -231,15 +234,18 @@ TEST_F(ReshardingCoordinatorObserverTest, onDonorsReportedMinFetchTimestamp) {
auto recipientShards = makeMockRecipientsInState(RecipientStateEnum::kUnused);
std::vector<DonorShardEntry> donorShards0{
- {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
- {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kPreparingToDonate)}};
+ {resharding::makeDonorShard(
+ ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
+ {resharding::makeDonorShard(ShardId{"s2"}, DonorStateEnum::kPreparingToDonate)}};
auto coordinatorDoc0 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards0);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc0);
ASSERT_FALSE(fut.isReady());
std::vector<DonorShardEntry> donorShards1{
- {makeDonorShard(ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
- {makeDonorShard(ShardId{"s2"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))}};
+ {resharding::makeDonorShard(
+ ShardId{"s1"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))},
+ {resharding::makeDonorShard(
+ ShardId{"s2"}, DonorStateEnum::kDonatingInitialData, Timestamp(1, 1))}};
auto coordinatorDoc1 = makeCoordinatorDocWithRecipientsAndDonors(recipientShards, donorShards1);
reshardingObserver->onReshardingParticipantTransition(coordinatorDoc1);
ASSERT_TRUE(fut.isReady());
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
index cbd6232a5d1..9aa5ed7c223 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/resharding/resharding_coordinator_service.h"
#include "mongo/base/string_data.h"
@@ -79,7 +76,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kResharding
-
namespace mongo {
namespace {
@@ -150,12 +146,12 @@ using resharding_metrics::getIntervalStartFieldName;
using DocT = ReshardingCoordinatorDocument;
const auto metricsPrefix = resharding_metrics::getMetricsPrefix<DocT>();
-void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
bob.append(getIntervalStartFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
metrics->getCopyingBegin());
}
-void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
bob.append(getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
metrics->getCopyingEnd());
bob.append(
@@ -164,14 +160,14 @@ void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetr
}
void buildStateDocumentBlockingWritesMetricsForUpdate(BSONObjBuilder& bob,
- ReshardingMetricsNew* metrics) {
+ ReshardingMetrics* metrics) {
bob.append(
getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kOplogApplicationFieldName),
metrics->getApplyingEnd());
}
void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
CoordinatorStateEnum newState) {
switch (newState) {
case CoordinatorStateEnum::kCloning:
@@ -189,7 +185,7 @@ void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
}
void writeToCoordinatorStateNss(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc,
TxnNumber txnNumber) {
BatchedCommandRequest request([&] {
@@ -295,9 +291,9 @@ TypeCollectionRecipientFields constructRecipientFields(
coordinatorDoc.getSourceNss(),
resharding::gReshardingMinimumOperationDurationMillis.load());
- emplaceCloneTimestampIfExists(recipientFields, coordinatorDoc.getCloneTimestamp());
- emplaceApproxBytesToCopyIfExists(recipientFields,
- coordinatorDoc.getReshardingApproxCopySizeStruct());
+ resharding::emplaceCloneTimestampIfExists(recipientFields, coordinatorDoc.getCloneTimestamp());
+ resharding::emplaceApproxBytesToCopyIfExists(
+ recipientFields, coordinatorDoc.getReshardingApproxCopySizeStruct());
return recipientFields;
}
@@ -323,10 +319,10 @@ BSONObj createReshardingFieldsUpdateForOriginalNss(
<< CollectionType::kAllowMigrationsFieldName << false));
}
case CoordinatorStateEnum::kPreparingToDonate: {
- TypeCollectionDonorFields donorFields(
- coordinatorDoc.getTempReshardingNss(),
- coordinatorDoc.getReshardingKey(),
- extractShardIdsFromParticipantEntries(coordinatorDoc.getRecipientShards()));
+ TypeCollectionDonorFields donorFields(coordinatorDoc.getTempReshardingNss(),
+ coordinatorDoc.getReshardingKey(),
+ resharding::extractShardIdsFromParticipantEntries(
+ coordinatorDoc.getRecipientShards()));
BSONObjBuilder updateBuilder;
{
@@ -394,7 +390,7 @@ BSONObj createReshardingFieldsUpdateForOriginalNss(
// If the abortReason exists, include it in the update.
setBuilder.append("reshardingFields.abortReason", *abortReason);
- auto abortStatus = getStatusFromAbortReason(coordinatorDoc);
+ auto abortStatus = resharding::getStatusFromAbortReason(coordinatorDoc);
setBuilder.append("reshardingFields.userCanceled",
abortStatus == ErrorCodes::ReshardCollectionAborted);
}
@@ -504,7 +500,7 @@ void writeToConfigCollectionsForTempNss(OperationContext* opCtx,
if (auto abortReason = coordinatorDoc.getAbortReason()) {
setBuilder.append("reshardingFields.abortReason", *abortReason);
- auto abortStatus = getStatusFromAbortReason(coordinatorDoc);
+ auto abortStatus = resharding::getStatusFromAbortReason(coordinatorDoc);
setBuilder.append("reshardingFields.userCanceled",
abortStatus == ErrorCodes::ReshardCollectionAborted);
}
@@ -608,8 +604,8 @@ BSONObj makeFlushRoutingTableCacheUpdatesCmd(const NamespaceString& nss) {
BSON(WriteConcernOptions::kWriteConcernField << kMajorityWriteConcern.toBSON()));
}
-ReshardingMetricsNew::CoordinatorState toMetricsState(CoordinatorStateEnum state) {
- return ReshardingMetricsNew::CoordinatorState(state);
+ReshardingMetrics::CoordinatorState toMetricsState(CoordinatorStateEnum state) {
+ return ReshardingMetrics::CoordinatorState(state);
}
} // namespace
@@ -664,7 +660,7 @@ void cleanupSourceConfigCollections(OperationContext* opCtx,
}
void writeDecisionPersistedState(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc,
OID newCollectionEpoch,
Timestamp newCollectionTimestamp) {
@@ -691,7 +687,7 @@ void writeDecisionPersistedState(OperationContext* opCtx,
}
void insertCoordDocAndChangeOrigCollEntry(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc) {
ShardingCatalogManager::get(opCtx)->bumpCollectionVersionAndChangeMetadataInTxn(
opCtx,
@@ -741,7 +737,7 @@ void insertCoordDocAndChangeOrigCollEntry(OperationContext* opCtx,
void writeParticipantShardsAndTempCollInfo(
OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& updatedCoordinatorDoc,
std::vector<ChunkType> initialChunks,
std::vector<BSONObj> zones) {
@@ -770,7 +766,7 @@ void writeParticipantShardsAndTempCollInfo(
void writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc) {
// Run updates to config.reshardingOperations and config.collections in a transaction
auto nextState = coordinatorDoc.getState();
@@ -804,7 +800,7 @@ void writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
}
void removeCoordinatorDocAndReshardingFields(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc,
boost::optional<Status> abortReason) {
// If the coordinator needs to abort and isn't in kInitializing, additional collections need to
@@ -857,7 +853,7 @@ ChunkVersion ReshardingCoordinatorExternalState::calculateChunkVersionForInitial
OperationContext* opCtx) {
const auto now = VectorClock::get(opCtx)->getTime();
const auto timestamp = now.clusterTime().asTimestamp();
- return ChunkVersion(1, 0, OID::gen(), timestamp);
+ return ChunkVersion({OID::gen(), timestamp}, {1, 0});
}
std::vector<DonorShardEntry> constructDonorShardEntries(const std::set<ShardId>& donorShardIds) {
@@ -1036,7 +1032,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::ReshardingCoordinator(
: PrimaryOnlyService::TypedInstance<ReshardingCoordinator>(),
_id(coordinatorDoc.getReshardingUUID().toBSON()),
_coordinatorService(coordinatorService),
- _metricsNew{ReshardingMetricsNew::initializeFrom(coordinatorDoc, getGlobalServiceContext())},
+ _metrics{ReshardingMetrics::initializeFrom(coordinatorDoc, getGlobalServiceContext())},
_metadata(coordinatorDoc.getCommonReshardingMetadata()),
_coordinatorDoc(coordinatorDoc),
_markKilledExecutor(std::make_shared<ThreadPool>([] {
@@ -1055,7 +1051,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::ReshardingCoordinator(
_reshardingCoordinatorObserver->onReshardingParticipantTransition(coordinatorDoc);
}
- _metricsNew->onStateTransition(boost::none, toMetricsState(coordinatorDoc.getState()));
+ _metrics->onStateTransition(boost::none, toMetricsState(coordinatorDoc.getState()));
}
void ReshardingCoordinatorService::ReshardingCoordinator::installCoordinatorDoc(
@@ -1080,8 +1076,8 @@ void ReshardingCoordinatorService::ReshardingCoordinator::installCoordinatorDoc(
const auto previousState = _coordinatorDoc.getState();
_coordinatorDoc = doc;
- _metricsNew->onStateTransition(toMetricsState(previousState),
- toMetricsState(_coordinatorDoc.getState()));
+ _metrics->onStateTransition(toMetricsState(previousState),
+ toMetricsState(_coordinatorDoc.getState()));
ShardingLogging::get(opCtx)->logChange(opCtx,
"resharding.coordinator.transition",
@@ -1090,7 +1086,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::installCoordinatorDoc(
kMajorityWriteConcern);
}
-void markCompleted(const Status& status, ReshardingMetricsNew* metrics) {
+void markCompleted(const Status& status, ReshardingMetrics* metrics) {
if (status.isOK()) {
metrics->onSuccess();
} else if (status == ErrorCodes::ReshardCollectionAborted) {
@@ -1320,7 +1316,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::_commitAndFinishReshardOper
})
.then([this, executor] { return _awaitAllParticipantShardsDone(executor); })
.then([this, executor] {
- _metricsNew->onCriticalSectionEnd();
+ _metrics->onCriticalSectionEnd();
// Best-effort attempt to trigger a refresh on the participant shards so
// they see the collection metadata without reshardingFields and no longer
@@ -1403,6 +1399,14 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
.onCompletion([outerStatus](Status) { return outerStatus; });
})
.onCompletion([this, self = shared_from_this()](Status status) {
+ _metrics->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none);
+
+ // Destroy metrics early so it's lifetime will not be tied to the lifetime of this
+ // state machine. This is because we have future callbacks copy shared pointers to this
+ // state machine that causes it to live longer than expected and potentially overlap
+ // with a newer instance when stepping up.
+ _metrics.reset();
+
if (!status.isOK()) {
{
auto lg = stdx::lock_guard(_fulfillmentMutex);
@@ -1416,8 +1420,6 @@ SemiFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::run(
}
_reshardingCoordinatorObserver->interrupt(status);
}
-
- _metricsNew->onStateTransition(toMetricsState(_coordinatorDoc.getState()), boost::none);
})
.semi();
}
@@ -1432,12 +1434,12 @@ ExecutorFuture<void> ReshardingCoordinatorService::ReshardingCoordinator::_onAbo
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
// Notify metrics as the operation is now complete for external observers.
- markCompleted(status, _metricsNew.get());
+ markCompleted(status, _metrics.get());
// The temporary collection and its corresponding entries were never created. Only
// the coordinator document and reshardingFields require cleanup.
resharding::removeCoordinatorDocAndReshardingFields(
- opCtx.get(), _metricsNew.get(), _coordinatorDoc, status);
+ opCtx.get(), _metrics.get(), _coordinatorDoc, status);
return status;
})
.onTransientError([](const Status& retryStatus) {
@@ -1506,7 +1508,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::abort() {
boost::optional<BSONObj> ReshardingCoordinatorService::ReshardingCoordinator::reportForCurrentOp(
MongoProcessInterface::CurrentOpConnectionsMode,
MongoProcessInterface::CurrentOpSessionsMode) noexcept {
- return _metricsNew->reportForCurrentOp();
+ return _metrics->reportForCurrentOp();
}
std::shared_ptr<ReshardingCoordinatorObserver>
@@ -1561,13 +1563,13 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_insertCoordDocAndChan
ReshardingCoordinatorDocument updatedCoordinatorDoc = _coordinatorDoc;
updatedCoordinatorDoc.setState(CoordinatorStateEnum::kInitializing);
resharding::insertCoordDocAndChangeOrigCollEntry(
- opCtx.get(), _metricsNew.get(), updatedCoordinatorDoc);
+ opCtx.get(), _metrics.get(), updatedCoordinatorDoc);
installCoordinatorDoc(opCtx.get(), updatedCoordinatorDoc);
{
// Note: don't put blocking or interruptible code in this block.
_coordinatorDocWrittenPromise.emplaceValue();
- _metricsNew->onStarted();
+ _metrics->onStarted();
}
pauseBeforeInsertCoordinatorDoc.pauseWhileSet();
@@ -1592,14 +1594,14 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
// the possibility of the document reaching the BSONObj size constraint.
std::vector<BSONObj> zones;
if (updatedCoordinatorDoc.getZones()) {
- zones = buildTagsDocsFromZones(updatedCoordinatorDoc.getTempReshardingNss(),
- *updatedCoordinatorDoc.getZones());
+ zones = resharding::buildTagsDocsFromZones(updatedCoordinatorDoc.getTempReshardingNss(),
+ *updatedCoordinatorDoc.getZones());
}
updatedCoordinatorDoc.setPresetReshardedChunks(boost::none);
updatedCoordinatorDoc.setZones(boost::none);
resharding::writeParticipantShardsAndTempCollInfo(opCtx.get(),
- _metricsNew.get(),
+ _metrics.get(),
updatedCoordinatorDoc,
std::move(shardsAndChunks.initialChunks),
std::move(zones));
@@ -1652,14 +1654,14 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllDonorsReadyToDonat
opCtx.get(), _ctHolder->getAbortToken());
}
- auto highestMinFetchTimestamp =
- getHighestMinFetchTimestamp(coordinatorDocChangedOnDisk.getDonorShards());
+ auto highestMinFetchTimestamp = resharding::getHighestMinFetchTimestamp(
+ coordinatorDocChangedOnDisk.getDonorShards());
_updateCoordinatorDocStateAndCatalogEntries(
CoordinatorStateEnum::kCloning,
coordinatorDocChangedOnDisk,
highestMinFetchTimestamp,
computeApproxCopySize(coordinatorDocChangedOnDisk));
- _metricsNew->onCopyingBegin();
+ _metrics->onCopyingBegin();
})
.then([this] { return _waitForMajority(_ctHolder->getAbortToken()); });
}
@@ -1678,8 +1680,8 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
.then([this](ReshardingCoordinatorDocument coordinatorDocChangedOnDisk) {
this->_updateCoordinatorDocStateAndCatalogEntries(CoordinatorStateEnum::kApplying,
coordinatorDocChangedOnDisk);
- _metricsNew->onCopyingEnd();
- _metricsNew->onApplyingBegin();
+ _metrics->onCopyingEnd();
+ _metrics->onApplyingBegin();
})
.then([this] { return _waitForMajority(_ctHolder->getAbortToken()); });
}
@@ -1691,9 +1693,9 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_startCommitMonitor(
}
_commitMonitor = std::make_shared<resharding::CoordinatorCommitMonitor>(
- _metricsNew,
+ _metrics,
_coordinatorDoc.getSourceNss(),
- extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards()),
+ resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards()),
**executor,
_ctHolder->getCommitMonitorToken());
@@ -1718,9 +1720,22 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
_startCommitMonitor(executor);
LOGV2(5391602, "Resharding operation waiting for an okay to enter critical section");
- return future_util::withCancellation(_canEnterCritical.getFuture(),
- _ctHolder->getAbortToken())
+
+ // The _reshardingCoordinatorObserver->awaitAllRecipientsInStrictConsistency() future is
+ // used for reporting recipient shard errors encountered during the Applying phase and
+ // in turn aborting the resharding operation.
+ // For all other cases, the _canEnterCritical.getFuture() resolves first and the
+ // operation can then proceed to entering the critical section depending on the status
+ // returned.
+ return future_util::withCancellation(
+ whenAny(
+ _canEnterCritical.getFuture().thenRunOn(**executor),
+ _reshardingCoordinatorObserver->awaitAllRecipientsInStrictConsistency()
+ .thenRunOn(**executor)
+ .ignoreValue()),
+ _ctHolder->getAbortToken())
.thenRunOn(**executor)
+ .then([](auto result) { return result.result; })
.onCompletion([this](Status status) {
_ctHolder->cancelCommitMonitor();
if (status.isOK()) {
@@ -1738,8 +1753,8 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllRecipientsFinished
this->_updateCoordinatorDocStateAndCatalogEntries(CoordinatorStateEnum::kBlockingWrites,
_coordinatorDoc);
- _metricsNew->onApplyingEnd();
- _metricsNew->onCriticalSectionBegin();
+ _metrics->onApplyingEnd();
+ _metrics->onCriticalSectionBegin();
})
.then([this] { return _waitForMajority(_ctHolder->getAbortToken()); })
.thenRunOn(**executor)
@@ -1805,7 +1820,7 @@ Future<void> ReshardingCoordinatorService::ReshardingCoordinator::_commit(
resharding::writeDecisionPersistedState(opCtx.get(),
- _metricsNew.get(),
+ _metrics.get(),
updatedCoordinatorDoc,
std::move(newCollectionEpoch),
std::move(newCollectionTimestamp));
@@ -1836,7 +1851,7 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllParticipantShardsD
boost::optional<Status> abortReason;
if (coordinatorDoc.getAbortReason()) {
- abortReason = getStatusFromAbortReason(coordinatorDoc);
+ abortReason = resharding::getStatusFromAbortReason(coordinatorDoc);
}
if (!abortReason) {
@@ -1849,40 +1864,18 @@ ReshardingCoordinatorService::ReshardingCoordinator::_awaitAllParticipantShardsD
const auto cmdObj =
ShardsvrDropCollectionIfUUIDNotMatchingRequest(nss, notMatchingThisUUID)
.toBSON({});
-
- try {
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx.get(), nss.db(), cmdObj, allShardIds, **executor);
- } catch (const DBException& ex) {
- if (ex.code() == ErrorCodes::CommandNotFound) {
- // TODO SERVER-60531 get rid of the catch logic
- // Cleanup failed because at least one shard could is using a binary
- // not supporting the ShardsvrDropCollectionIfUUIDNotMatching command.
- LOGV2_INFO(5423100,
- "Resharding coordinator couldn't guarantee older incarnations "
- "of the collection were dropped. A chunk migration to a shard "
- "with an older incarnation of the collection will fail",
- "namespace"_attr = nss.ns());
- } else if (opCtx->checkForInterruptNoAssert().isOK()) {
- LOGV2_INFO(
- 5423101,
- "Resharding coordinator failed while trying to drop possible older "
- "incarnations of the collection. A chunk migration to a shard with "
- "an older incarnation of the collection will fail",
- "namespace"_attr = nss.ns(),
- "error"_attr = redact(ex.toStatus()));
- }
- }
+ _reshardingCoordinatorExternalState->sendCommandToShards(
+ opCtx.get(), nss.db(), cmdObj, allShardIds, **executor);
}
reshardingPauseCoordinatorBeforeRemovingStateDoc.pauseWhileSetAndNotCanceled(
opCtx.get(), _ctHolder->getStepdownToken());
// Notify metrics as the operation is now complete for external observers.
- markCompleted(abortReason ? *abortReason : Status::OK(), _metricsNew.get());
+ markCompleted(abortReason ? *abortReason : Status::OK(), _metrics.get());
resharding::removeCoordinatorDocAndReshardingFields(
- opCtx.get(), _metricsNew.get(), coordinatorDoc, abortReason);
+ opCtx.get(), _metrics.get(), coordinatorDoc, abortReason);
});
}
@@ -1896,13 +1889,13 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
// Build new state doc for coordinator state update
ReshardingCoordinatorDocument updatedCoordinatorDoc = coordinatorDoc;
updatedCoordinatorDoc.setState(nextState);
- emplaceApproxBytesToCopyIfExists(updatedCoordinatorDoc, std::move(approxCopySize));
- emplaceCloneTimestampIfExists(updatedCoordinatorDoc, std::move(cloneTimestamp));
- emplaceTruncatedAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
+ resharding::emplaceApproxBytesToCopyIfExists(updatedCoordinatorDoc, std::move(approxCopySize));
+ resharding::emplaceCloneTimestampIfExists(updatedCoordinatorDoc, std::move(cloneTimestamp));
+ resharding::emplaceTruncatedAbortReasonIfExists(updatedCoordinatorDoc, abortReason);
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
resharding::writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
- opCtx.get(), _metricsNew.get(), updatedCoordinatorDoc);
+ opCtx.get(), _metrics.get(), updatedCoordinatorDoc);
// Update in-memory coordinator doc
installCoordinatorDoc(opCtx.get(), updatedCoordinatorDoc);
@@ -1911,9 +1904,10 @@ void ReshardingCoordinatorService::ReshardingCoordinator::
void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllParticipants(
const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const BSONObj& command) {
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
- auto donorShardIds = extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
+ auto donorShardIds =
+ resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
auto recipientShardIds =
- extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
+ resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
std::set<ShardId> participantShardIds{donorShardIds.begin(), donorShardIds.end()};
participantShardIds.insert(recipientShardIds.begin(), recipientShardIds.end());
@@ -1929,7 +1923,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllRecip
const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const BSONObj& command) {
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
auto recipientShardIds =
- extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
+ resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getRecipientShards());
_reshardingCoordinatorExternalState->sendCommandToShards(
opCtx.get(),
@@ -1942,7 +1936,8 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllRecip
void ReshardingCoordinatorService::ReshardingCoordinator::_sendCommandToAllDonors(
const std::shared_ptr<executor::ScopedTaskExecutor>& executor, const BSONObj& command) {
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
- auto donorShardIds = extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
+ auto donorShardIds =
+ resharding::extractShardIdsFromParticipantEntries(_coordinatorDoc.getDonorShards());
_reshardingCoordinatorExternalState->sendCommandToShards(
opCtx.get(),
@@ -2036,7 +2031,7 @@ void ReshardingCoordinatorService::ReshardingCoordinator::_updateChunkImbalanceM
auto imbalanceCount =
getMaxChunkImbalanceCount(routingInfo, allShardsWithOpTime.value, zoneInfo);
- _metricsNew->setLastOpEndingChunkImbalance(imbalanceCount);
+ _metrics->setLastOpEndingChunkImbalance(imbalanceCount);
} catch (const DBException& ex) {
LOGV2_WARNING(5543000,
"Encountered error while trying to update resharding chunk imbalance metrics",
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service.h b/src/mongo/db/s/resharding/resharding_coordinator_service.h
index d24c23f6b68..6f0eb95c79a 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service.h
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service.h
@@ -33,7 +33,7 @@
#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/s/resharding/coordinator_document_gen.h"
#include "mongo/db/s/resharding/resharding_coordinator_observer.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/platform/mutex.h"
#include "mongo/s/catalog/type_chunk.h"
#include "mongo/s/catalog/type_collection.h"
@@ -55,28 +55,28 @@ void cleanupSourceConfigCollections(OperationContext* opCtx,
const ReshardingCoordinatorDocument& coordinatorDoc);
void writeDecisionPersistedState(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc,
OID newCollectionEpoch,
Timestamp newCollectionTimestamp);
void insertCoordDocAndChangeOrigCollEntry(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc);
void writeParticipantShardsAndTempCollInfo(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc,
std::vector<ChunkType> initialChunks,
std::vector<BSONObj> zones);
void writeStateTransitionAndCatalogUpdatesThenBumpShardVersions(
OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc);
void removeCoordinatorDocAndReshardingFields(OperationContext* opCtx,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
const ReshardingCoordinatorDocument& coordinatorDoc,
boost::optional<Status> abortReason = boost::none);
} // namespace resharding
@@ -513,7 +513,7 @@ private:
// The primary-only service instance corresponding to the coordinator instance. Not owned.
const ReshardingCoordinatorService* const _coordinatorService;
- std::shared_ptr<ReshardingMetricsNew> _metricsNew;
+ std::shared_ptr<ReshardingMetrics> _metrics;
// The in-memory representation of the immutable portion of the document in
// config.reshardingOperations.
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
index dc16d5fe271..1fc380093bf 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_service_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include <boost/optional.hpp>
#include <functional>
@@ -59,7 +56,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -203,7 +199,7 @@ public:
{DonorShardEntry(ShardId("shard0000"), {})},
{RecipientShardEntry(ShardId("shard0001"), {})});
doc.setCommonReshardingMetadata(meta);
- emplaceCloneTimestampIfExists(doc, cloneTimestamp);
+ resharding::emplaceCloneTimestampIfExists(doc, cloneTimestamp);
return doc;
}
@@ -372,10 +368,11 @@ public:
TypeCollectionReshardingFields reshardingFields(coordinatorDoc.getReshardingUUID());
reshardingFields.setState(coordinatorDoc.getState());
- reshardingFields.setDonorFields(TypeCollectionDonorFields(
- coordinatorDoc.getTempReshardingNss(),
- coordinatorDoc.getReshardingKey(),
- extractShardIdsFromParticipantEntries(coordinatorDoc.getRecipientShards())));
+ reshardingFields.setDonorFields(
+ TypeCollectionDonorFields(coordinatorDoc.getTempReshardingNss(),
+ coordinatorDoc.getReshardingKey(),
+ resharding::extractShardIdsFromParticipantEntries(
+ coordinatorDoc.getRecipientShards())));
auto originalNssCatalogEntry = makeOriginalCollectionCatalogEntry(
coordinatorDoc,
@@ -414,7 +411,7 @@ public:
_newShardKey.isShardKey(shardKey.toBSON()) ? _newChunkRanges : _oldChunkRanges;
// Create two chunks, one on each shard with the given namespace and epoch
- ChunkVersion version(1, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {1, 0});
ChunkType chunk1(uuid, chunkRanges[0], version, ShardId("shard0000"));
chunk1.setName(ids[0]);
version.incMinor();
diff --git a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
index da56d0d8cb5..35ffa75b31a 100644
--- a/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_coordinator_test.cpp
@@ -27,10 +27,6 @@
* it in the license file.
*/
-#include "mongo/db/s/resharding/coordinator_document_gen.h"
-
-#include "mongo/platform/basic.h"
-
#include <boost/optional.hpp>
#include "mongo/client/remote_command_targeter_mock.h"
@@ -40,6 +36,7 @@
#include "mongo/db/repl/storage_interface_mock.h"
#include "mongo/db/s/config/config_server_test_fixture.h"
#include "mongo/db/s/config/index_on_config.h"
+#include "mongo/db/s/resharding/coordinator_document_gen.h"
#include "mongo/db/s/resharding/resharding_coordinator_service.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/s/transaction_coordinator_service.h"
@@ -52,7 +49,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -85,13 +81,12 @@ protected:
TransactionCoordinatorService::get(operationContext())
->onShardingInitialization(operationContext(), true);
- _metrics =
- ReshardingMetricsNew::makeInstance(_originalUUID,
- _newShardKey.toBSON(),
- _originalNss,
- ReshardingMetricsNew::Role::kCoordinator,
- getServiceContext()->getFastClockSource()->now(),
- getServiceContext());
+ _metrics = ReshardingMetrics::makeInstance(_originalUUID,
+ _newShardKey.toBSON(),
+ _originalNss,
+ ReshardingMetrics::Role::kCoordinator,
+ getServiceContext()->getFastClockSource()->now(),
+ getServiceContext());
}
void tearDown() override {
@@ -180,7 +175,7 @@ protected:
_newShardKey.isShardKey(shardKey.toBSON()) ? _newChunkRanges : _oldChunkRanges;
// Create two chunks, one on each shard with the given namespace and epoch
- ChunkVersion version(1, 0, epoch, Timestamp(1, 2));
+ ChunkVersion version({epoch, Timestamp(1, 2)}, {1, 0});
ChunkType chunk1(uuid, chunkRanges[0], version, ShardId("shard0000"));
chunk1.setName(ids[0]);
version.incMinor();
@@ -227,7 +222,7 @@ protected:
client.insert(CollectionType::ConfigNS.ns(), originalNssCatalogEntry.toBSON());
auto tempNssCatalogEntry = createTempReshardingCollectionType(
- opCtx, coordinatorDoc, ChunkVersion(1, 1, OID::gen(), Timestamp(1, 2)), BSONObj());
+ opCtx, coordinatorDoc, ChunkVersion({OID::gen(), Timestamp(1, 2)}, {1, 1}), BSONObj());
client.insert(CollectionType::ConfigNS.ns(), tempNssCatalogEntry.toBSON());
return coordinatorDoc;
@@ -519,11 +514,11 @@ protected:
// collection should have been removed.
boost::optional<CollectionType> expectedTempCollType = boost::none;
if (expectedCoordinatorDoc.getState() < CoordinatorStateEnum::kCommitting) {
- expectedTempCollType =
- createTempReshardingCollectionType(opCtx,
- expectedCoordinatorDoc,
- ChunkVersion(1, 1, OID::gen(), Timestamp(1, 2)),
- BSONObj());
+ expectedTempCollType = createTempReshardingCollectionType(
+ opCtx,
+ expectedCoordinatorDoc,
+ ChunkVersion({OID::gen(), Timestamp(1, 2)}, {1, 1}),
+ BSONObj());
// It's necessary to add the userCanceled field because the call into
// createTempReshardingCollectionType assumes that the collection entry is
@@ -723,7 +718,7 @@ protected:
ShardKeyPattern _oldShardKey = ShardKeyPattern(BSON("oldSK" << 1));
ShardKeyPattern _newShardKey = ShardKeyPattern(BSON("newSK" << 1));
- std::unique_ptr<ReshardingMetricsNew> _metrics;
+ std::unique_ptr<ReshardingMetrics> _metrics;
const std::vector<ChunkRange> _oldChunkRanges = {
ChunkRange(_oldShardKey.getKeyPattern().globalMin(), BSON("oldSK" << 12345)),
diff --git a/src/mongo/db/s/resharding/resharding_data_replication.cpp b/src/mongo/db/s/resharding/resharding_data_replication.cpp
index 4143c8c0c76..ff7bc064939 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication.cpp
+++ b/src/mongo/db/s/resharding/resharding_data_replication.cpp
@@ -81,12 +81,12 @@ void ensureFulfilledPromise(SharedPromise<void>& sp, Status error) {
} // namespace
std::unique_ptr<ReshardingCollectionCloner> ReshardingDataReplication::_makeCollectionCloner(
- ReshardingMetricsNew* metricsNew,
+ ReshardingMetrics* metrics,
const CommonReshardingMetadata& metadata,
const ShardId& myShardId,
Timestamp cloneTimestamp) {
return std::make_unique<ReshardingCollectionCloner>(
- metricsNew,
+ metrics,
ShardKeyPattern{metadata.getReshardingKey()},
metadata.getSourceNss(),
metadata.getSourceUUID(),
@@ -112,7 +112,7 @@ std::vector<std::unique_ptr<ReshardingTxnCloner>> ReshardingDataReplication::_ma
std::vector<std::unique_ptr<ReshardingOplogFetcher>> ReshardingDataReplication::_makeOplogFetchers(
OperationContext* opCtx,
- ReshardingMetricsNew* metricsNew,
+ ReshardingMetrics* metrics,
const CommonReshardingMetadata& metadata,
const std::vector<DonorShardFetchTimestamp>& donorShards,
const ShardId& myShardId) {
@@ -121,14 +121,14 @@ std::vector<std::unique_ptr<ReshardingOplogFetcher>> ReshardingDataReplication::
for (const auto& donor : donorShards) {
auto oplogBufferNss =
- getLocalOplogBufferNamespace(metadata.getSourceUUID(), donor.getShardId());
+ resharding::getLocalOplogBufferNamespace(metadata.getSourceUUID(), donor.getShardId());
auto minFetchTimestamp = *donor.getMinFetchTimestamp();
auto idToResumeFrom = getOplogFetcherResumeId(
opCtx, metadata.getReshardingUUID(), oplogBufferNss, minFetchTimestamp);
invariant((idToResumeFrom >= ReshardingDonorOplogId{minFetchTimestamp, minFetchTimestamp}));
oplogFetchers.emplace_back(std::make_unique<ReshardingOplogFetcher>(
- std::make_unique<ReshardingOplogFetcher::Env>(opCtx->getServiceContext(), metricsNew),
+ std::make_unique<ReshardingOplogFetcher::Env>(opCtx->getServiceContext(), metrics),
metadata.getReshardingUUID(),
metadata.getSourceUUID(),
// The recipient fetches oplog entries from the donor starting from the largest _id
@@ -182,7 +182,7 @@ std::vector<std::unique_ptr<ReshardingOplogApplier>> ReshardingDataReplication::
invariant((idToResumeFrom >= ReshardingDonorOplogId{minFetchTimestamp, minFetchTimestamp}));
const auto& oplogBufferNss =
- getLocalOplogBufferNamespace(metadata.getSourceUUID(), donorShardId);
+ resharding::getLocalOplogBufferNamespace(metadata.getSourceUUID(), donorShardId);
auto applierMetrics = (*applierMetricsMap)[donorShardId].get();
oplogAppliers.emplace_back(std::make_unique<ReshardingOplogApplier>(
@@ -206,7 +206,7 @@ std::vector<std::unique_ptr<ReshardingOplogApplier>> ReshardingDataReplication::
std::unique_ptr<ReshardingDataReplicationInterface> ReshardingDataReplication::make(
OperationContext* opCtx,
- ReshardingMetricsNew* metricsNew,
+ ReshardingMetrics* metrics,
ReshardingApplierMetricsMap* applierMetricsMap,
CommonReshardingMetadata metadata,
const std::vector<DonorShardFetchTimestamp>& donorShards,
@@ -218,11 +218,11 @@ std::unique_ptr<ReshardingDataReplicationInterface> ReshardingDataReplication::m
std::vector<std::unique_ptr<ReshardingTxnCloner>> txnCloners;
if (!cloningDone) {
- collectionCloner = _makeCollectionCloner(metricsNew, metadata, myShardId, cloneTimestamp);
+ collectionCloner = _makeCollectionCloner(metrics, metadata, myShardId, cloneTimestamp);
txnCloners = _makeTxnCloners(metadata, donorShards);
}
- auto oplogFetchers = _makeOplogFetchers(opCtx, metricsNew, metadata, donorShards, myShardId);
+ auto oplogFetchers = _makeOplogFetchers(opCtx, metrics, metadata, donorShards, myShardId);
auto oplogFetcherExecutor = _makeOplogFetcherExecutor(donorShards.size());
@@ -456,7 +456,7 @@ ReshardingDonorOplogId ReshardingDataReplication::getOplogFetcherResumeId(
if (highestOplogBufferId) {
auto oplogEntry = repl::OplogEntry{highestOplogBufferId->toBson()};
- if (isFinalOplog(oplogEntry, reshardingUUID)) {
+ if (resharding::isFinalOplog(oplogEntry, reshardingUUID)) {
return ReshardingOplogFetcher::kFinalOpAlreadyFetched;
}
diff --git a/src/mongo/db/s/resharding/resharding_data_replication.h b/src/mongo/db/s/resharding/resharding_data_replication.h
index f8348646758..2e44a5d2a21 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication.h
+++ b/src/mongo/db/s/resharding/resharding_data_replication.h
@@ -140,7 +140,7 @@ private:
public:
static std::unique_ptr<ReshardingDataReplicationInterface> make(
OperationContext* opCtx,
- ReshardingMetricsNew* metricsNew,
+ ReshardingMetrics* metrics,
ReshardingApplierMetricsMap* applierMetricsMap,
CommonReshardingMetadata metadata,
const std::vector<DonorShardFetchTimestamp>& donorShards,
@@ -196,7 +196,7 @@ public:
private:
static std::unique_ptr<ReshardingCollectionCloner> _makeCollectionCloner(
- ReshardingMetricsNew* metricsNew,
+ ReshardingMetrics* metrics,
const CommonReshardingMetadata& metadata,
const ShardId& myShardId,
Timestamp cloneTimestamp);
@@ -207,7 +207,7 @@ private:
static std::vector<std::unique_ptr<ReshardingOplogFetcher>> _makeOplogFetchers(
OperationContext* opCtx,
- ReshardingMetricsNew* metricsNew,
+ ReshardingMetrics* metrics,
const CommonReshardingMetadata& metadata,
const std::vector<DonorShardFetchTimestamp>& donorShards,
const ShardId& myShardId);
diff --git a/src/mongo/db/s/resharding/resharding_data_replication_test.cpp b/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
index f71ce9f0356..f5f588ac948 100644
--- a/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_data_replication_test.cpp
@@ -27,12 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
-#include <memory>
-#include <vector>
-
#include "mongo/bson/bsonmisc.h"
#include "mongo/db/persistent_task_store.h"
#include "mongo/db/query/collation/collator_factory_mock.h"
@@ -50,7 +44,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -78,7 +71,7 @@ public:
std::vector<ChunkType> chunks = {ChunkType{
_sourceUUID,
ChunkRange{BSON(_currentShardKey << MINKEY), BSON(_currentShardKey << MAXKEY)},
- ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
_myDonorId}};
auto rt = RoutingTableHistory::makeNew(_sourceNss,
@@ -193,7 +186,7 @@ TEST_F(ReshardingDataReplicationTest, GetOplogFetcherResumeId) {
auto opCtx = makeOperationContext();
const auto reshardingUUID = UUID::gen();
- auto oplogBufferNss = getLocalOplogBufferNamespace(reshardingUUID, {"shard0"});
+ auto oplogBufferNss = resharding::getLocalOplogBufferNamespace(reshardingUUID, {"shard0"});
const auto minFetchTimestamp = Timestamp{10, 0};
const auto oplogId1 = ReshardingDonorOplogId{{20, 0}, {18, 0}};
diff --git a/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp b/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp
index d95f0fdc23e..632b387a817 100644
--- a/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_destined_recipient_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/catalog/create_collection.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/dbdirectclient.h"
@@ -55,7 +52,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -164,11 +160,11 @@ protected:
const std::string& shardKey) {
auto range1 = ChunkRange(BSON(shardKey << MINKEY), BSON(shardKey << 5));
ChunkType chunk1(
- uuid, range1, ChunkVersion(1, 0, epoch, timestamp), kShardList[0].getName());
+ uuid, range1, ChunkVersion({epoch, timestamp}, {1, 0}), kShardList[0].getName());
auto range2 = ChunkRange(BSON(shardKey << 5), BSON(shardKey << MAXKEY));
ChunkType chunk2(
- uuid, range2, ChunkVersion(1, 0, epoch, timestamp), kShardList[1].getName());
+ uuid, range2, ChunkVersion({epoch, timestamp}, {1, 0}), kShardList[1].getName());
return {chunk1, chunk2};
}
@@ -199,7 +195,7 @@ protected:
ReshardingEnv env(CollectionCatalog::get(opCtx)->lookupUUIDByNSS(opCtx, kNss).value());
env.destShard = kShardList[1].getName();
- env.version = ChunkVersion(1, 0, OID::gen(), Timestamp(1, 1));
+ env.version = ChunkVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
env.tempNss =
NamespaceString(kNss.db(),
fmt::format("{}{}",
diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp
index 5213b170753..0a9027deea2 100644
--- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator.cpp
@@ -129,7 +129,7 @@ std::vector<repl::OplogEntry> ReshardingDonorOplogIterator::_fillBatch(Pipeline&
numBytes += obj.objsize();
- if (isFinalOplog(entry)) {
+ if (resharding::isFinalOplog(entry)) {
// The ReshardingOplogFetcher should never insert documents after the reshardFinalOp
// entry. We defensively check each oplog entry for being the reshardFinalOp and confirm
// the pipeline has been exhausted.
@@ -185,7 +185,7 @@ ExecutorFuture<std::vector<repl::OplogEntry>> ReshardingDonorOplogIterator::getN
const auto& lastEntryInBatch = batch.back();
_resumeToken = getId(lastEntryInBatch);
- if (isFinalOplog(lastEntryInBatch)) {
+ if (resharding::isFinalOplog(lastEntryInBatch)) {
_hasSeenFinalOplogEntry = true;
// Skip returning the final oplog entry because it is known to be a no-op.
batch.pop_back();
diff --git a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp
index 26b7646283f..a0491b06e7c 100644
--- a/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_oplog_iterator_test.cpp
@@ -95,7 +95,7 @@ public:
const BSONObj oField(BSON("msg"
<< "Created temporary resharding collection"));
const BSONObj o2Field(
- BSON("type" << kReshardFinalOpLogType << "reshardingUUID" << UUID::gen()));
+ BSON("type" << resharding::kReshardFinalOpLogType << "reshardingUUID" << UUID::gen()));
return makeOplog(_crudNss, _uuid, repl::OpTypeEnum::kNoop, oField, o2Field, oplogId);
}
@@ -103,7 +103,7 @@ public:
ReshardingDonorOplogId oplogId(ts, ts);
const BSONObj oField(BSON("msg"
<< "Latest oplog ts from donor's cursor response"));
- const BSONObj o2Field(BSON("type" << kReshardProgressMark));
+ const BSONObj o2Field(BSON("type" << resharding::kReshardProgressMark));
return makeOplog(_crudNss, _uuid, repl::OpTypeEnum::kNoop, oField, o2Field, oplogId);
}
diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp b/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp
index 43d91e83b97..27157f82b66 100644
--- a/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common.cpp
@@ -332,7 +332,12 @@ void clearFilteringMetadata(OperationContext* opCtx, bool scheduleAsyncRefresh)
return true;
});
}
+ clearFilteringMetadata(opCtx, namespacesToRefresh, scheduleAsyncRefresh);
+}
+void clearFilteringMetadata(OperationContext* opCtx,
+ stdx::unordered_set<NamespaceString> namespacesToRefresh,
+ bool scheduleAsyncRefresh) {
for (const auto& nss : namespacesToRefresh) {
AutoGetCollection autoColl(opCtx, nss, MODE_IX);
CollectionShardingRuntime::get(opCtx, nss)->clearFilteringMetadata(opCtx);
diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common.h b/src/mongo/db/s/resharding/resharding_donor_recipient_common.h
index 2efba26f659..10be195c586 100644
--- a/src/mongo/db/s/resharding/resharding_donor_recipient_common.h
+++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common.h
@@ -77,6 +77,10 @@ void processReshardingFieldsForCollection(OperationContext* opCtx,
void clearFilteringMetadata(OperationContext* opCtx, bool scheduleAsyncRefresh);
+void clearFilteringMetadata(OperationContext* opCtx,
+ stdx::unordered_set<NamespaceString> namespacesToRefresh,
+ bool scheduleAsyncRefresh);
+
void refreshShardVersion(OperationContext* opCtx, const NamespaceString& nss);
} // namespace resharding
diff --git a/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp b/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp
index 3fccff9812c..e5bd8defdbd 100644
--- a/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_recipient_common_test.cpp
@@ -58,7 +58,7 @@ public:
const NamespaceString kOriginalNss = NamespaceString("db", "foo");
const NamespaceString kTemporaryReshardingNss =
- constructTemporaryReshardingNss("db", kExistingUUID);
+ resharding::constructTemporaryReshardingNss("db", kExistingUUID);
const std::string kOriginalShardKey = "oldKey";
const BSONObj kOriginalShardKeyPattern = BSON(kOriginalShardKey << 1);
const std::string kReshardingKey = "newKey";
@@ -111,8 +111,10 @@ protected:
const OID& epoch,
const ShardId& shardThatChunkExistsOn) {
auto range = ChunkRange(BSON(shardKey << MINKEY), BSON(shardKey << MAXKEY));
- auto chunk = ChunkType(
- uuid, std::move(range), ChunkVersion(1, 0, epoch, timestamp), shardThatChunkExistsOn);
+ auto chunk = ChunkType(uuid,
+ std::move(range),
+ ChunkVersion({epoch, timestamp}, {1, 0}),
+ shardThatChunkExistsOn);
ChunkManager cm(kThisShard.getShardId(),
DatabaseVersion(uuid, timestamp),
makeStandaloneRoutingTableHistory(
@@ -133,17 +135,18 @@ protected:
return CollectionMetadata(std::move(cm), kThisShard.getShardId());
}
- ReshardingDonorDocument makeDonorStateDoc() {
+ ReshardingDonorDocument makeDonorStateDoc(NamespaceString sourceNss,
+ NamespaceString tempReshardingNss,
+ BSONObj reshardingKey,
+ std::vector<mongo::ShardId> recipientShards) {
DonorShardContext donorCtx;
donorCtx.setState(DonorStateEnum::kPreparingToDonate);
- ReshardingDonorDocument doc(std::move(donorCtx),
- {kThisShard.getShardId(), kOtherShard.getShardId()});
+ ReshardingDonorDocument doc(std::move(donorCtx), recipientShards);
- NamespaceString sourceNss = kOriginalNss;
auto sourceUUID = UUID::gen();
auto commonMetadata = CommonReshardingMetadata(
- UUID::gen(), sourceNss, sourceUUID, kTemporaryReshardingNss, kReshardingKeyPattern);
+ UUID::gen(), sourceNss, sourceUUID, tempReshardingNss, reshardingKey);
doc.setCommonReshardingMetadata(std::move(commonMetadata));
return doc;
@@ -194,7 +197,7 @@ protected:
const boost::optional<Timestamp>& cloneTimestamp = boost::none) {
auto recipientFields =
TypeCollectionRecipientFields(donorShards, existingUUID, originalNss, 5000);
- emplaceCloneTimestampIfExists(recipientFields, cloneTimestamp);
+ resharding::emplaceCloneTimestampIfExists(recipientFields, cloneTimestamp);
fields.setRecipientFields(std::move(recipientFields));
}
@@ -262,6 +265,19 @@ protected:
ASSERT(donorShardMap.empty());
}
+ void addFilteringMetadata(OperationContext* opCtx, NamespaceString sourceNss, ShardId shardId) {
+ AutoGetCollection autoColl(opCtx, sourceNss, LockMode::MODE_IS);
+ const auto metadata{makeShardedMetadataForOriginalCollection(opCtx, shardId)};
+ ScopedSetShardRole scopedSetShardRole{opCtx,
+ sourceNss,
+ metadata.getShardVersion() /* shardVersion */,
+ boost::none /* databaseVersion */};
+
+ auto csr = CollectionShardingRuntime::get(opCtx, sourceNss);
+ csr->setFilteringMetadata(opCtx, metadata);
+ ASSERT(csr->getCurrentMetadataIfKnown());
+ }
+
private:
DonorShardFetchTimestamp makeDonorShardFetchTimestamp(
ShardId shardId, boost::optional<Timestamp> fetchTimestamp) {
@@ -553,34 +569,10 @@ TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMeta
}
// Add filtering metadata for the collection being resharded.
- {
- AutoGetCollection autoColl(opCtx, kOriginalNss, LockMode::MODE_IS);
- const auto metadata{
- makeShardedMetadataForOriginalCollection(opCtx, kThisShard.getShardId())};
- ScopedSetShardRole scopedSetShardRole{opCtx,
- kOriginalNss,
- metadata.getShardVersion() /* shardVersion */,
- boost::none /* databaseVersion */};
-
- auto csr = CollectionShardingRuntime::get(opCtx, kOriginalNss);
- csr->setFilteringMetadata(opCtx, metadata);
- ASSERT(csr->getCurrentMetadataIfKnown());
- }
+ addFilteringMetadata(opCtx, kOriginalNss, kThisShard.getShardId());
// Add filtering metadata for the temporary resharding namespace.
- {
- AutoGetCollection autoColl(opCtx, kTemporaryReshardingNss, LockMode::MODE_IS);
- const auto metadata{makeShardedMetadataForTemporaryReshardingCollection(
- opCtx, kThisShard.getShardId())};
- ScopedSetShardRole scopedSetShardRole{opCtx,
- kTemporaryReshardingNss,
- metadata.getShardVersion() /* shardVersion */,
- boost::none /* databaseVersion */};
-
- auto csr = CollectionShardingRuntime::get(opCtx, kTemporaryReshardingNss);
- csr->setFilteringMetadata(opCtx, metadata);
- ASSERT(csr->getCurrentMetadataIfKnown());
- }
+ addFilteringMetadata(opCtx, kTemporaryReshardingNss, kThisShard.getShardId());
// Prior to adding a resharding document, assert that attempting to clear filtering does
// nothing.
@@ -595,7 +587,11 @@ TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMeta
doSetupFunc();
// Add a resharding donor document that targets the namespaces involved in resharding.
- ReshardingDonorDocument donorDoc = makeDonorStateDoc();
+ ReshardingDonorDocument donorDoc =
+ makeDonorStateDoc(kOriginalNss,
+ kTemporaryReshardingNss,
+ kReshardingKeyPattern,
+ {kThisShard.getShardId(), kOtherShard.getShardId()});
ReshardingDonorService::DonorStateMachine::insertStateDocument(opCtx, donorDoc);
// Clear the filtering metadata (without scheduling a refresh) and assert the metadata is gone.
@@ -622,5 +618,49 @@ TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMeta
}
}
+TEST_F(ReshardingDonorRecipientCommonInternalsTest, ClearReshardingFilteringMetaDataForActiveOp) {
+ OperationContext* opCtx = operationContext();
+ NamespaceString sourceNss1 = NamespaceString("db", "one");
+ NamespaceString tempReshardingNss1 =
+ resharding::constructTemporaryReshardingNss(sourceNss1.db(), UUID::gen());
+ NamespaceString sourceNss2 = NamespaceString("db", "two");
+ NamespaceString tempReshardingNss2 =
+ resharding::constructTemporaryReshardingNss(sourceNss2.db(), UUID::gen());
+ ShardId shardId1 = ShardId{"recipient1"};
+ ShardId shardId2 = ShardId{"recipient2"};
+ ReshardingDonorDocument doc1 =
+ makeDonorStateDoc(sourceNss1, tempReshardingNss1, BSON("newKey1" << 1), {shardId1});
+ ReshardingDonorDocument doc2 =
+ makeDonorStateDoc(sourceNss2, tempReshardingNss2, BSON("newKey2" << 1), {shardId2});
+
+ ReshardingDonorService::DonorStateMachine::insertStateDocument(opCtx, doc1);
+ ReshardingDonorService::DonorStateMachine::insertStateDocument(opCtx, doc2);
+
+ // Add filtering metadata for the collection being resharded.
+ addFilteringMetadata(opCtx, sourceNss1, {shardId1});
+ addFilteringMetadata(opCtx, sourceNss2, {shardId2});
+
+ // Add filtering metadata for the temporary resharding namespace.
+ addFilteringMetadata(opCtx, tempReshardingNss1, {shardId1});
+ addFilteringMetadata(opCtx, tempReshardingNss2, {shardId2});
+
+ // Clear the filtering metadata (without scheduling a refresh) for only on single operation
+ // related namespaces
+ resharding::clearFilteringMetadata(opCtx, {sourceNss1, tempReshardingNss1}, false);
+
+ for (auto const& nss : {sourceNss1, tempReshardingNss1}) {
+ AutoGetCollection autoColl(opCtx, nss, LockMode::MODE_IS);
+ auto csr = CollectionShardingRuntime::get(opCtx, nss);
+ ASSERT(csr->getCurrentMetadataIfKnown() == boost::none);
+ }
+
+ // Assert that the filtering metadata is not cleared for other operation
+ for (auto const& nss : {sourceNss2, tempReshardingNss2}) {
+ AutoGetCollection autoColl(opCtx, nss, LockMode::MODE_IS);
+ auto csr = CollectionShardingRuntime::get(opCtx, nss);
+ ASSERT(csr->getCurrentMetadataIfKnown() != boost::none);
+ }
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.cpp b/src/mongo/db/s/resharding/resharding_donor_service.cpp
index 7f870033a1f..40b1f17f179 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service.cpp
@@ -178,13 +178,17 @@ public:
}
}
- void clearFilteringMetadata(OperationContext* opCtx) {
- resharding::clearFilteringMetadata(opCtx, true /* scheduleAsyncRefresh */);
+ void clearFilteringMetadata(OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) {
+ stdx::unordered_set<NamespaceString> namespacesToRefresh{sourceNss, tempReshardingNss};
+ resharding::clearFilteringMetadata(
+ opCtx, namespacesToRefresh, true /* scheduleAsyncRefresh */);
}
};
-ReshardingMetricsNew::DonorState toMetricsState(DonorStateEnum state) {
- return ReshardingMetricsNew::DonorState(state);
+ReshardingMetrics::DonorState toMetricsState(DonorStateEnum state) {
+ return ReshardingMetrics::DonorState(state);
}
} // namespace
@@ -209,7 +213,7 @@ ReshardingDonorService::DonorStateMachine::DonorStateMachine(
std::unique_ptr<DonorStateMachineExternalState> externalState)
: repl::PrimaryOnlyService::TypedInstance<DonorStateMachine>(),
_donorService(donorService),
- _metricsNew{ReshardingMetricsNew::initializeFrom(donorDoc, getGlobalServiceContext())},
+ _metrics{ReshardingMetrics::initializeFrom(donorDoc, getGlobalServiceContext())},
_metadata{donorDoc.getCommonReshardingMetadata()},
_recipientShardIds{donorDoc.getRecipientShards()},
_donorCtx{donorDoc.getMutableState()},
@@ -233,7 +237,7 @@ ReshardingDonorService::DonorStateMachine::DonorStateMachine(
}()) {
invariant(_externalState);
- _metricsNew->onStateTransition(boost::none, toMetricsState(_donorCtx.getState()));
+ _metrics->onStateTransition(boost::none, toMetricsState(_donorCtx.getState()));
}
ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_runUntilBlockingWritesOrErrored(
@@ -375,8 +379,8 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
{
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
-
- _externalState->clearFilteringMetadata(opCtx.get());
+ _externalState->clearFilteringMetadata(
+ opCtx.get(), _metadata.getSourceNss(), _metadata.getTempReshardingNss());
RecoverableCriticalSectionService::get(opCtx.get())
->releaseRecoverableCriticalSection(
@@ -385,7 +389,7 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
_critSecReason,
ShardingCatalogClient::kLocalWriteConcern);
- _metricsNew->onCriticalSectionEnd();
+ _metrics->onCriticalSectionEnd();
}
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
@@ -410,6 +414,14 @@ ExecutorFuture<void> ReshardingDonorService::DonorStateMachine::_finishReshardin
Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup(
Status status, const CancellationToken& stepdownToken) {
+ _metrics->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none);
+
+ // Destroy metrics early so it's lifetime will not be tied to the lifetime of this state
+ // machine. This is because we have future callbacks copy shared pointers to this state machine
+ // that causes it to live longer than expected and potentially overlap with a newer instance
+ // when stepping up.
+ _metrics.reset();
+
if (!status.isOK()) {
// If the stepdownToken was triggered, it takes priority in order to make sure that
// the promise is set with an error that can be retried with. If it ran into an
@@ -427,8 +439,6 @@ Status ReshardingDonorService::DonorStateMachine::_runMandatoryCleanup(
ensureFulfilledPromise(lk, _completionPromise, statusForPromise);
}
- _metricsNew->onStateTransition(toMetricsState(_donorCtx.getState()), boost::none);
-
return status;
}
@@ -493,7 +503,7 @@ void ReshardingDonorService::DonorStateMachine::interrupt(Status status) {}
boost::optional<BSONObj> ReshardingDonorService::DonorStateMachine::reportForCurrentOp(
MongoProcessInterface::CurrentOpConnectionsMode connMode,
MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- return _metricsNew->reportForCurrentOp();
+ return _metrics->reportForCurrentOp();
}
void ReshardingDonorService::DonorStateMachine::onReshardingFieldsChanges(
@@ -521,11 +531,11 @@ void ReshardingDonorService::DonorStateMachine::onReshardingFieldsChanges(
}
void ReshardingDonorService::DonorStateMachine::onWriteDuringCriticalSection() {
- _metricsNew->onWriteDuringCriticalSection();
+ _metrics->onWriteDuringCriticalSection();
}
void ReshardingDonorService::DonorStateMachine::onReadDuringCriticalSection() {
- _metricsNew->onReadDuringCriticalSection();
+ _metrics->onReadDuringCriticalSection();
}
SharedSemiFuture<void> ReshardingDonorService::DonorStateMachine::awaitCriticalSectionAcquired() {
@@ -690,7 +700,7 @@ void ReshardingDonorService::DonorStateMachine::
_critSecReason,
ShardingCatalogClient::kLocalWriteConcern);
- _metricsNew->onCriticalSectionBegin();
+ _metrics->onCriticalSectionBegin();
}
{
@@ -711,7 +721,7 @@ void ReshardingDonorService::DonorStateMachine::
oplog.setObject(
BSON("msg" << fmt::format("Writes to {} are temporarily blocked for resharding.",
_metadata.getSourceNss().toString())));
- oplog.setObject2(BSON("type" << kReshardFinalOpLogType << "reshardingUUID"
+ oplog.setObject2(BSON("type" << resharding::kReshardFinalOpLogType << "reshardingUUID"
<< _metadata.getReshardingUUID()));
oplog.setOpTime(OplogSlot());
oplog.setWallClockTime(opCtx->getServiceContext()->getFastClockSource()->now());
@@ -828,7 +838,7 @@ void ReshardingDonorService::DonorStateMachine::_transitionState(DonorShardConte
_updateDonorDocument(std::move(newDonorCtx));
- _metricsNew->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
+ _metrics->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
LOGV2_INFO(5279505,
"Transitioned resharding donor state",
@@ -852,7 +862,7 @@ void ReshardingDonorService::DonorStateMachine::_transitionToDonatingInitialData
void ReshardingDonorService::DonorStateMachine::_transitionToError(Status abortReason) {
auto newDonorCtx = _donorCtx;
newDonorCtx.setState(DonorStateEnum::kError);
- emplaceTruncatedAbortReasonIfExists(newDonorCtx, abortReason);
+ resharding::emplaceTruncatedAbortReasonIfExists(newDonorCtx, abortReason);
_transitionState(std::move(newDonorCtx));
}
diff --git a/src/mongo/db/s/resharding/resharding_donor_service.h b/src/mongo/db/s/resharding/resharding_donor_service.h
index f2f4d99d2e8..3f3d88965db 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service.h
+++ b/src/mongo/db/s/resharding/resharding_donor_service.h
@@ -32,7 +32,7 @@
#include "mongo/db/cancelable_operation_context.h"
#include "mongo/db/repl/primary_only_service.h"
#include "mongo/db/s/resharding/donor_document_gen.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/s/resharding/type_collection_fields_gen.h"
namespace mongo {
@@ -218,7 +218,7 @@ private:
// The primary-only service instance corresponding to the donor instance. Not owned.
const ReshardingDonorService* const _donorService;
- std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+ std::unique_ptr<ReshardingMetrics> _metrics;
// The in-memory representation of the immutable portion of the document in
// config.localReshardingOperations.donor.
@@ -297,7 +297,9 @@ public:
const BSONObj& query,
const BSONObj& update) = 0;
- virtual void clearFilteringMetadata(OperationContext* opCtx) = 0;
+ virtual void clearFilteringMetadata(OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) = 0;
};
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
index 0f40919d14d..4d83cfe5e44 100644
--- a/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_donor_service_test.cpp
@@ -85,7 +85,9 @@ public:
const BSONObj& query,
const BSONObj& update) override {}
- void clearFilteringMetadata(OperationContext* opCtx) override {}
+ void clearFilteringMetadata(OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) override {}
};
class DonorOpObserverForTest : public OpObserverForTest {
@@ -148,12 +150,12 @@ public:
NamespaceString sourceNss("sourcedb.sourcecollection");
auto sourceUUID = UUID::gen();
- auto commonMetadata =
- CommonReshardingMetadata(UUID::gen(),
- sourceNss,
- sourceUUID,
- constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
- BSON("newKey" << 1));
+ auto commonMetadata = CommonReshardingMetadata(
+ UUID::gen(),
+ sourceNss,
+ sourceUUID,
+ resharding::constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
+ BSON("newKey" << 1));
commonMetadata.setStartTime(getServiceContext()->getFastClockSource()->now());
doc.setCommonReshardingMetadata(std::move(commonMetadata));
@@ -348,7 +350,7 @@ TEST_F(ReshardingDonorServiceTest, WritesFinalReshardOpOplogEntriesWhileWritesBl
DBDirectClient client(opCtx.get());
FindCommandRequest findRequest{NamespaceString::kRsOplogNamespace};
- findRequest.setFilter(BSON("o2.type" << kReshardFinalOpLogType));
+ findRequest.setFilter(BSON("o2.type" << resharding::kReshardFinalOpLogType));
auto cursor = client.find(std::move(findRequest));
ASSERT_TRUE(cursor->more()) << "Found no oplog entries for source collection";
@@ -710,7 +712,7 @@ TEST_F(ReshardingDonorServiceTest, TruncatesXLErrorOnDonorDocument) {
// to the primitive truncation algorithm - Check that the total size is less than
// kReshardErrorMaxBytes + a couple additional bytes to provide a buffer for the field
// name sizes.
- int maxReshardErrorBytesCeiling = kReshardErrorMaxBytes + 200;
+ int maxReshardErrorBytesCeiling = resharding::kReshardErrorMaxBytes + 200;
ASSERT_LT(persistedAbortReasonBSON->objsize(), maxReshardErrorBytesCeiling);
ASSERT_EQ(persistedAbortReasonBSON->getIntField("code"),
ErrorCodes::ReshardCollectionTruncatedError);
diff --git a/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp b/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp
index 9c2b78385fa..74911c8518f 100644
--- a/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp
+++ b/src/mongo/db/s/resharding/resharding_manual_cleanup.cpp
@@ -48,8 +48,9 @@ namespace {
std::vector<ShardId> getAllParticipantsFromCoordDoc(const ReshardingCoordinatorDocument& doc) {
std::vector<ShardId> participants;
- auto donorShards = extractShardIdsFromParticipantEntriesAsSet(doc.getDonorShards());
- auto recipientShards = extractShardIdsFromParticipantEntriesAsSet(doc.getRecipientShards());
+ auto donorShards = resharding::extractShardIdsFromParticipantEntriesAsSet(doc.getDonorShards());
+ auto recipientShards =
+ resharding::extractShardIdsFromParticipantEntriesAsSet(doc.getRecipientShards());
std::set_union(donorShards.begin(),
donorShards.end(),
recipientShards.begin(),
diff --git a/src/mongo/db/s/resharding/resharding_metrics_new.cpp b/src/mongo/db/s/resharding/resharding_metrics.cpp
index e07468ad1b9..610ef970475 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_new.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics.cpp
@@ -26,15 +26,15 @@
* exception statement from all source files in the program, then also delete
* it in the license file.
*/
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/exec/document_value/document.h"
namespace mongo {
namespace {
-inline ReshardingMetricsNew::State getDefaultState(ReshardingMetricsNew::Role role) {
- using Role = ReshardingMetricsNew::Role;
+inline ReshardingMetrics::State getDefaultState(ReshardingMetrics::Role role) {
+ using Role = ReshardingMetrics::Role;
switch (role) {
case Role::kCoordinator:
return CoordinatorStateEnum::kUnused;
@@ -70,14 +70,13 @@ Date_t readStartTime(const CommonReshardingMetadata& metadata, ClockSource* fall
} // namespace
-ReshardingMetricsNew::ReshardingMetricsNew(
- UUID instanceId,
- BSONObj shardKey,
- NamespaceString nss,
- Role role,
- Date_t startTime,
- ClockSource* clockSource,
- ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
+ReshardingMetrics::ReshardingMetrics(UUID instanceId,
+ BSONObj shardKey,
+ NamespaceString nss,
+ Role role,
+ Date_t startTime,
+ ClockSource* clockSource,
+ ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
: ShardingDataTransformInstanceMetrics{std::move(instanceId),
createOriginalCommand(nss, std::move(shardKey)),
nss,
@@ -87,44 +86,42 @@ ReshardingMetricsNew::ReshardingMetricsNew(
cumulativeMetrics},
_state{getDefaultState(role)} {}
-ReshardingMetricsNew::ReshardingMetricsNew(
- const CommonReshardingMetadata& metadata,
- Role role,
- ClockSource* clockSource,
- ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
- : ReshardingMetricsNew{metadata.getReshardingUUID(),
- metadata.getReshardingKey().toBSON(),
- metadata.getSourceNss(),
- role,
- readStartTime(metadata, clockSource),
- clockSource,
- cumulativeMetrics} {}
-
-std::string ReshardingMetricsNew::createOperationDescription() const noexcept {
+ReshardingMetrics::ReshardingMetrics(const CommonReshardingMetadata& metadata,
+ Role role,
+ ClockSource* clockSource,
+ ShardingDataTransformCumulativeMetrics* cumulativeMetrics)
+ : ReshardingMetrics{metadata.getReshardingUUID(),
+ metadata.getReshardingKey().toBSON(),
+ metadata.getSourceNss(),
+ role,
+ readStartTime(metadata, clockSource),
+ clockSource,
+ cumulativeMetrics} {}
+
+std::string ReshardingMetrics::createOperationDescription() const noexcept {
return fmt::format("ReshardingMetrics{}Service {}",
ShardingDataTransformMetrics::getRoleName(_role),
_instanceId.toString());
}
-std::unique_ptr<ReshardingMetricsNew> ReshardingMetricsNew::makeInstance(
- UUID instanceId,
- BSONObj shardKey,
- NamespaceString nss,
- Role role,
- Date_t startTime,
- ServiceContext* serviceContext) {
+std::unique_ptr<ReshardingMetrics> ReshardingMetrics::makeInstance(UUID instanceId,
+ BSONObj shardKey,
+ NamespaceString nss,
+ Role role,
+ Date_t startTime,
+ ServiceContext* serviceContext) {
auto cumulativeMetrics =
ShardingDataTransformCumulativeMetrics::getForResharding(serviceContext);
- return std::make_unique<ReshardingMetricsNew>(instanceId,
- createOriginalCommand(nss, std::move(shardKey)),
- std::move(nss),
- role,
- startTime,
- serviceContext->getFastClockSource(),
- cumulativeMetrics);
+ return std::make_unique<ReshardingMetrics>(instanceId,
+ createOriginalCommand(nss, std::move(shardKey)),
+ std::move(nss),
+ role,
+ startTime,
+ serviceContext->getFastClockSource(),
+ cumulativeMetrics);
}
-StringData ReshardingMetricsNew::getStateString() const noexcept {
+StringData ReshardingMetrics::getStateString() const noexcept {
return stdx::visit(
visit_helper::Overloaded{
[](CoordinatorStateEnum state) { return CoordinatorState_serializer(state); },
@@ -133,7 +130,7 @@ StringData ReshardingMetricsNew::getStateString() const noexcept {
_state.load());
}
-void ReshardingMetricsNew::accumulateFrom(const ReshardingOplogApplierProgress& progressDoc) {
+void ReshardingMetrics::accumulateFrom(const ReshardingOplogApplierProgress& progressDoc) {
invariant(_role == Role::kRecipient);
accumulateValues(progressDoc.getInsertsApplied(),
@@ -142,7 +139,7 @@ void ReshardingMetricsNew::accumulateFrom(const ReshardingOplogApplierProgress&
progressDoc.getWritesToStashCollections());
}
-void ReshardingMetricsNew::restoreRecipientSpecificFields(
+void ReshardingMetrics::restoreRecipientSpecificFields(
const ReshardingRecipientDocument& document) {
auto metrics = document.getMetrics();
if (!metrics) {
@@ -161,14 +158,14 @@ void ReshardingMetricsNew::restoreRecipientSpecificFields(
restorePhaseDurationFields(document);
}
-void ReshardingMetricsNew::restoreCoordinatorSpecificFields(
+void ReshardingMetrics::restoreCoordinatorSpecificFields(
const ReshardingCoordinatorDocument& document) {
restorePhaseDurationFields(document);
}
-ReshardingMetricsNew::DonorState::DonorState(DonorStateEnum enumVal) : _enumVal(enumVal) {}
+ReshardingMetrics::DonorState::DonorState(DonorStateEnum enumVal) : _enumVal(enumVal) {}
-ShardingDataTransformCumulativeMetrics::DonorStateEnum ReshardingMetricsNew::DonorState::toMetrics()
+ShardingDataTransformCumulativeMetrics::DonorStateEnum ReshardingMetrics::DonorState::toMetrics()
const {
using MetricsEnum = ShardingDataTransformCumulativeMetrics::DonorStateEnum;
@@ -204,15 +201,14 @@ ShardingDataTransformCumulativeMetrics::DonorStateEnum ReshardingMetricsNew::Don
}
}
-DonorStateEnum ReshardingMetricsNew::DonorState::getState() const {
+DonorStateEnum ReshardingMetrics::DonorState::getState() const {
return _enumVal;
}
-ReshardingMetricsNew::RecipientState::RecipientState(RecipientStateEnum enumVal)
- : _enumVal(enumVal) {}
+ReshardingMetrics::RecipientState::RecipientState(RecipientStateEnum enumVal) : _enumVal(enumVal) {}
ShardingDataTransformCumulativeMetrics::RecipientStateEnum
-ReshardingMetricsNew::RecipientState::toMetrics() const {
+ReshardingMetrics::RecipientState::toMetrics() const {
using MetricsEnum = ShardingDataTransformCumulativeMetrics::RecipientStateEnum;
switch (_enumVal) {
@@ -248,15 +244,15 @@ ReshardingMetricsNew::RecipientState::toMetrics() const {
}
}
-RecipientStateEnum ReshardingMetricsNew::RecipientState::getState() const {
+RecipientStateEnum ReshardingMetrics::RecipientState::getState() const {
return _enumVal;
}
-ReshardingMetricsNew::CoordinatorState::CoordinatorState(CoordinatorStateEnum enumVal)
+ReshardingMetrics::CoordinatorState::CoordinatorState(CoordinatorStateEnum enumVal)
: _enumVal(enumVal) {}
ShardingDataTransformCumulativeMetrics::CoordinatorStateEnum
-ReshardingMetricsNew::CoordinatorState::toMetrics() const {
+ReshardingMetrics::CoordinatorState::toMetrics() const {
switch (_enumVal) {
case CoordinatorStateEnum::kUnused:
return ShardingDataTransformCumulativeMetrics::CoordinatorStateEnum::kUnused;
@@ -292,7 +288,7 @@ ReshardingMetricsNew::CoordinatorState::toMetrics() const {
}
}
-CoordinatorStateEnum ReshardingMetricsNew::CoordinatorState::getState() const {
+CoordinatorStateEnum ReshardingMetrics::CoordinatorState::getState() const {
return _enumVal;
}
diff --git a/src/mongo/db/s/resharding/resharding_metrics_new.h b/src/mongo/db/s/resharding/resharding_metrics.h
index b8e96698b0d..a1faa5a96da 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_new.h
+++ b/src/mongo/db/s/resharding/resharding_metrics.h
@@ -38,7 +38,7 @@
namespace mongo {
-class ReshardingMetricsNew : public ShardingDataTransformInstanceMetrics {
+class ReshardingMetrics : public ShardingDataTransformInstanceMetrics {
public:
using State = stdx::variant<CoordinatorStateEnum, RecipientStateEnum, DonorStateEnum>;
@@ -78,24 +78,24 @@ public:
CoordinatorStateEnum _enumVal;
};
- ReshardingMetricsNew(UUID instanceId,
- BSONObj shardKey,
- NamespaceString nss,
- Role role,
- Date_t startTime,
- ClockSource* clockSource,
- ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
- ReshardingMetricsNew(const CommonReshardingMetadata& metadata,
- Role role,
- ClockSource* clockSource,
- ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
-
- static std::unique_ptr<ReshardingMetricsNew> makeInstance(UUID instanceId,
- BSONObj shardKey,
- NamespaceString nss,
- Role role,
- Date_t startTime,
- ServiceContext* serviceContext);
+ ReshardingMetrics(UUID instanceId,
+ BSONObj shardKey,
+ NamespaceString nss,
+ Role role,
+ Date_t startTime,
+ ClockSource* clockSource,
+ ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
+ ReshardingMetrics(const CommonReshardingMetadata& metadata,
+ Role role,
+ ClockSource* clockSource,
+ ShardingDataTransformCumulativeMetrics* cumulativeMetrics);
+
+ static std::unique_ptr<ReshardingMetrics> makeInstance(UUID instanceId,
+ BSONObj shardKey,
+ NamespaceString nss,
+ Role role,
+ Date_t startTime,
+ ServiceContext* serviceContext);
template <typename T>
static auto initializeFrom(const T& document,
@@ -103,10 +103,10 @@ public:
ShardingDataTransformCumulativeMetrics* cumulativeMetrics) {
static_assert(resharding_metrics::isStateDocument<T>);
auto result =
- std::make_unique<ReshardingMetricsNew>(document.getCommonReshardingMetadata(),
- resharding_metrics::getRoleForStateDocument<T>(),
- clockSource,
- cumulativeMetrics);
+ std::make_unique<ReshardingMetrics>(document.getCommonReshardingMetadata(),
+ resharding_metrics::getRoleForStateDocument<T>(),
+ clockSource,
+ cumulativeMetrics);
result->setState(resharding_metrics::getState(document));
result->restoreRoleSpecificFields(document);
return result;
diff --git a/src/mongo/db/s/resharding/resharding_metrics_new_test.cpp b/src/mongo/db/s/resharding/resharding_metrics_test.cpp
index 82bcba56d43..e57581cf8dd 100644
--- a/src/mongo/db/s/resharding/resharding_metrics_new_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_metrics_test.cpp
@@ -30,7 +30,7 @@
#include "mongo/platform/basic.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_service_test_helpers.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/s/sharding_data_transform_cumulative_metrics.h"
@@ -49,16 +49,16 @@ const auto kShardKey = BSON("newKey" << 1);
class ReshardingMetricsTest : public ShardingDataTransformMetricsTestFixture {
public:
- std::unique_ptr<ReshardingMetricsNew> createInstanceMetrics(ClockSource* clockSource,
- UUID instanceId = UUID::gen(),
- Role role = Role::kDonor) {
- return std::make_unique<ReshardingMetricsNew>(instanceId,
- BSON("y" << 1),
- kTestNamespace,
- role,
- clockSource->now(),
- clockSource,
- &_cumulativeMetrics);
+ std::unique_ptr<ReshardingMetrics> createInstanceMetrics(ClockSource* clockSource,
+ UUID instanceId = UUID::gen(),
+ Role role = Role::kDonor) {
+ return std::make_unique<ReshardingMetrics>(instanceId,
+ BSON("y" << 1),
+ kTestNamespace,
+ role,
+ clockSource->now(),
+ clockSource,
+ &_cumulativeMetrics);
}
const UUID& getSourceCollectionId() {
@@ -69,7 +69,7 @@ public:
template <typename T>
BSONObj getReportFromStateDocument(T document) {
auto metrics =
- ReshardingMetricsNew::initializeFrom(document, getClockSource(), &_cumulativeMetrics);
+ ReshardingMetrics::initializeFrom(document, getClockSource(), &_cumulativeMetrics);
return metrics->reportForCurrentOp();
}
@@ -98,12 +98,12 @@ public:
}
CommonReshardingMetadata createCommonReshardingMetadata(const UUID& operationId) {
- CommonReshardingMetadata metadata{
- operationId,
- kTestNamespace,
- getSourceCollectionId(),
- constructTemporaryReshardingNss(kTestNamespace.db(), getSourceCollectionId()),
- kShardKey};
+ CommonReshardingMetadata metadata{operationId,
+ kTestNamespace,
+ getSourceCollectionId(),
+ resharding::constructTemporaryReshardingNss(
+ kTestNamespace.db(), getSourceCollectionId()),
+ kShardKey};
metadata.setStartTime(getClockSource()->now() - kRunningTime);
return metadata;
}
@@ -169,7 +169,7 @@ public:
doc.setMetrics(metricsDoc);
auto metrics =
- ReshardingMetricsNew::initializeFrom(doc, getClockSource(), &_cumulativeMetrics);
+ ReshardingMetrics::initializeFrom(doc, getClockSource(), &_cumulativeMetrics);
clock->advance(kInterval);
auto report = metrics->reportForCurrentOp();
diff --git a/src/mongo/db/s/resharding/resharding_oplog_application.cpp b/src/mongo/db/s/resharding/resharding_oplog_application.cpp
index 1478a3ec30c..9a643ef819e 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_application.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_application.cpp
@@ -252,7 +252,7 @@ void ReshardingOplogApplicationRules::_applyInsert_inlock(OperationContext* opCt
// First, query the conflict stash collection using [op _id] as the query. If a doc exists,
// apply rule #1 and run a replacement update on the stash collection.
- auto stashCollDoc = _queryStashCollById(opCtx, db, stashColl, idQuery);
+ auto stashCollDoc = _queryStashCollById(opCtx, stashColl, idQuery);
if (!stashCollDoc.isEmpty()) {
auto request = UpdateRequest();
request.setNamespaceString(_myStashNss);
@@ -348,7 +348,7 @@ void ReshardingOplogApplicationRules::_applyUpdate_inlock(OperationContext* opCt
// First, query the conflict stash collection using [op _id] as the query. If a doc exists,
// apply rule #1 and update the doc from the stash collection.
- auto stashCollDoc = _queryStashCollById(opCtx, db, stashColl, idQuery);
+ auto stashCollDoc = _queryStashCollById(opCtx, stashColl, idQuery);
if (!stashCollDoc.isEmpty()) {
auto request = UpdateRequest();
request.setNamespaceString(_myStashNss);
@@ -430,7 +430,7 @@ void ReshardingOplogApplicationRules::_applyDelete_inlock(OperationContext* opCt
// First, query the conflict stash collection using [op _id] as the query. If a doc exists,
// apply rule #1 and delete the doc from the stash collection.
- auto stashCollDoc = _queryStashCollById(opCtx, db, stashColl, idQuery);
+ auto stashCollDoc = _queryStashCollById(opCtx, stashColl, idQuery);
if (!stashCollDoc.isEmpty()) {
auto nDeleted = deleteObjects(opCtx, stashColl, _myStashNss, idQuery, true /* justOne */);
invariant(nDeleted != 0);
@@ -543,7 +543,6 @@ void ReshardingOplogApplicationRules::_applyDelete_inlock(OperationContext* opCt
}
BSONObj ReshardingOplogApplicationRules::_queryStashCollById(OperationContext* opCtx,
- Database* db,
const CollectionPtr& coll,
const BSONObj& idQuery) const {
const IndexCatalog* indexCatalog = coll->getIndexCatalog();
@@ -552,7 +551,7 @@ BSONObj ReshardingOplogApplicationRules::_queryStashCollById(OperationContext* o
indexCatalog->haveIdIndex(opCtx));
BSONObj result;
- Helpers::findById(opCtx, db, _myStashNss.ns(), idQuery, result);
+ Helpers::findById(opCtx, _myStashNss.ns(), idQuery, result);
return result;
}
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_oplog_application.h b/src/mongo/db/s/resharding/resharding_oplog_application.h
index b8bd3942b40..4e00a62a269 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_application.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_application.h
@@ -96,7 +96,6 @@ private:
// Queries '_stashNss' using 'idQuery'.
BSONObj _queryStashCollById(OperationContext* opCtx,
- Database* db,
const CollectionPtr& coll,
const BSONObj& idQuery) const;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
index cf449c4c00c..d9edf786371 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier.cpp
@@ -271,7 +271,7 @@ NamespaceString ReshardingOplogApplier::ensureStashCollectionExists(
const UUID& existingUUID,
const ShardId& donorShardId,
const CollectionOptions& options) {
- auto nss = getLocalConflictStashNamespace(existingUUID, donorShardId);
+ auto nss = resharding::getLocalConflictStashNamespace(existingUUID, donorShardId);
resharding::data_copy::ensureCollectionExists(opCtx, nss, options);
return nss;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier.h b/src/mongo/db/s/resharding/resharding_oplog_applier.h
index 56a7e9d3a0a..f1df65219cc 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier.h
@@ -36,7 +36,7 @@
#include "mongo/db/repl/oplog_entry.h"
#include "mongo/db/s/resharding/donor_oplog_id_gen.h"
#include "mongo/db/s/resharding/resharding_donor_oplog_iterator.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_oplog_application.h"
#include "mongo/db/s/resharding/resharding_oplog_applier_progress_gen.h"
#include "mongo/db/s/resharding/resharding_oplog_batch_applier.h"
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp
index 31bb6ca8dd6..7a474b7edf1 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.cpp
@@ -34,8 +34,8 @@
namespace mongo {
ReshardingOplogApplierMetrics::ReshardingOplogApplierMetrics(
- ReshardingMetricsNew* metricsNew, boost::optional<ReshardingOplogApplierProgress> progressDoc)
- : _metricsNew(metricsNew) {
+ ReshardingMetrics* metrics, boost::optional<ReshardingOplogApplierProgress> progressDoc)
+ : _metrics(metrics) {
if (progressDoc) {
_insertsApplied = progressDoc->getInsertsApplied();
_updatesApplied = progressDoc->getUpdatesApplied();
@@ -46,35 +46,35 @@ ReshardingOplogApplierMetrics::ReshardingOplogApplierMetrics(
void ReshardingOplogApplierMetrics::onInsertApplied() {
_insertsApplied++;
- _metricsNew->onInsertApplied();
+ _metrics->onInsertApplied();
}
void ReshardingOplogApplierMetrics::onUpdateApplied() {
_updatesApplied++;
- _metricsNew->onUpdateApplied();
+ _metrics->onUpdateApplied();
}
void ReshardingOplogApplierMetrics::onDeleteApplied() {
_deletesApplied++;
- _metricsNew->onDeleteApplied();
+ _metrics->onDeleteApplied();
}
void ReshardingOplogApplierMetrics::onBatchRetrievedDuringOplogApplying(Milliseconds elapsed) {
- _metricsNew->onBatchRetrievedDuringOplogApplying(elapsed);
+ _metrics->onBatchRetrievedDuringOplogApplying(elapsed);
}
void ReshardingOplogApplierMetrics::onOplogLocalBatchApplied(Milliseconds elapsed) {
- _metricsNew->onOplogLocalBatchApplied(elapsed);
+ _metrics->onOplogLocalBatchApplied(elapsed);
}
void ReshardingOplogApplierMetrics::onOplogEntriesApplied(int64_t numEntries) {
_oplogEntriesApplied += numEntries;
- _metricsNew->onOplogEntriesApplied(numEntries);
+ _metrics->onOplogEntriesApplied(numEntries);
}
void ReshardingOplogApplierMetrics::onWriteToStashCollections() {
_writesToStashCollections++;
- _metricsNew->onWriteToStashedCollections();
+ _metrics->onWriteToStashedCollections();
}
int64_t ReshardingOplogApplierMetrics::getInsertsApplied() const {
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h
index 28830da1bfc..14347ce0b6b 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics.h
@@ -29,7 +29,7 @@
#pragma once
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_oplog_applier_progress_gen.h"
#include "mongo/util/duration.h"
@@ -40,7 +40,7 @@ namespace mongo {
*/
class ReshardingOplogApplierMetrics {
public:
- ReshardingOplogApplierMetrics(ReshardingMetricsNew* metricsNew,
+ ReshardingOplogApplierMetrics(ReshardingMetrics* metrics,
boost::optional<ReshardingOplogApplierProgress> progressDoc);
void onInsertApplied();
@@ -59,7 +59,7 @@ public:
int64_t getWritesToStashCollections() const;
private:
- ReshardingMetricsNew* _metricsNew;
+ ReshardingMetrics* _metrics;
int64_t _insertsApplied{0};
int64_t _updatesApplied{0};
int64_t _deletesApplied{0};
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp
index 44ea5efb842..7c04439713a 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_metrics_test.cpp
@@ -42,14 +42,14 @@ namespace {
class ReshardingOplogApplierMetricsTest : public ShardingDataTransformMetricsTestFixture {
public:
- std::unique_ptr<ReshardingMetricsNew> createInstanceMetrics() {
- return std::make_unique<ReshardingMetricsNew>(UUID::gen(),
- kTestCommand,
- kTestNamespace,
- ReshardingMetricsNew::Role::kRecipient,
- getClockSource()->now(),
- getClockSource(),
- &_cumulativeMetrics);
+ std::unique_ptr<ReshardingMetrics> createInstanceMetrics() {
+ return std::make_unique<ReshardingMetrics>(UUID::gen(),
+ kTestCommand,
+ kTestNamespace,
+ ReshardingMetrics::Role::kRecipient,
+ getClockSource()->now(),
+ getClockSource(),
+ &_cumulativeMetrics);
}
};
diff --git a/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp
index 0e3f5a87504..d2313684ff9 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_applier_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include <fmt/format.h>
#include "mongo/db/cancelable_operation_context.h"
@@ -64,7 +61,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -158,13 +154,12 @@ public:
_cm = createChunkManagerForOriginalColl();
- _metrics =
- ReshardingMetricsNew::makeInstance(kCrudUUID,
- BSON("y" << 1),
- kCrudNs,
- ReshardingMetricsNew::Role::kRecipient,
- getServiceContext()->getFastClockSource()->now(),
- getServiceContext());
+ _metrics = ReshardingMetrics::makeInstance(kCrudUUID,
+ BSON("y" << 1),
+ kCrudNs,
+ ReshardingMetrics::Role::kRecipient,
+ getServiceContext()->getFastClockSource()->now(),
+ getServiceContext());
_applierMetrics =
std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none);
@@ -195,17 +190,17 @@ public:
kCrudUUID,
ChunkRange{BSON(kOriginalShardKey << MINKEY),
BSON(kOriginalShardKey << -std::numeric_limits<double>::infinity())},
- ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
_sourceId.getShardId()},
ChunkType{
kCrudUUID,
ChunkRange{BSON(kOriginalShardKey << -std::numeric_limits<double>::infinity()),
BSON(kOriginalShardKey << 0)},
- ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
kOtherShardId},
ChunkType{kCrudUUID,
ChunkRange{BSON(kOriginalShardKey << 0), BSON(kOriginalShardKey << MAXKEY)},
- ChunkVersion(1, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {1, 0}),
_sourceId.getShardId()}};
auto rt = RoutingTableHistory::makeNew(kCrudNs,
@@ -363,7 +358,7 @@ protected:
boost::optional<ChunkManager> _cm;
const ReshardingSourceId _sourceId{UUID::gen(), kMyShardId};
- std::unique_ptr<ReshardingMetricsNew> _metrics;
+ std::unique_ptr<ReshardingMetrics> _metrics;
std::unique_ptr<ReshardingOplogApplierMetrics> _applierMetrics;
std::shared_ptr<executor::ThreadPoolTaskExecutor> _executor;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp
index f8af8d80998..ca596e65e16 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_batch_applier_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include <boost/optional/optional_io.hpp>
#include <memory>
#include <vector>
@@ -46,7 +43,7 @@
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/s/op_observer_sharding_impl.h"
#include "mongo/db/s/resharding/resharding_data_copy_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_oplog_application.h"
#include "mongo/db/s/resharding/resharding_oplog_batch_applier.h"
#include "mongo/db/s/resharding/resharding_oplog_session_application.h"
@@ -66,7 +63,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
namespace mongo {
namespace {
@@ -111,15 +107,15 @@ public:
opCtx.get(), nss, CollectionOptions{});
}
- _metricsNew =
- ReshardingMetricsNew::makeInstance(UUID::gen(),
- BSON("y" << 1),
- _outputNss,
- ShardingDataTransformMetrics::Role::kRecipient,
- serviceContext->getFastClockSource()->now(),
- serviceContext);
+ _metrics =
+ ReshardingMetrics::makeInstance(UUID::gen(),
+ BSON("y" << 1),
+ _outputNss,
+ ShardingDataTransformMetrics::Role::kRecipient,
+ serviceContext->getFastClockSource()->now(),
+ serviceContext);
_applierMetrics =
- std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none);
+ std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none);
_crudApplication = std::make_unique<ReshardingOplogApplicationRules>(
_outputNss,
std::vector<NamespaceString>{_myStashNss, _otherStashNss},
@@ -318,7 +314,7 @@ private:
std::vector<ChunkType> chunks = {ChunkType{
_sourceUUID,
ChunkRange{BSON(_currentShardKey << MINKEY), BSON(_currentShardKey << MAXKEY)},
- ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
_myDonorId}};
auto rt = RoutingTableHistory::makeNew(_sourceNss,
@@ -356,13 +352,15 @@ private:
const ShardId _otherDonorId{"otherDonorId"};
const NamespaceString _outputNss =
- constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
- const NamespaceString _myStashNss = getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
+ resharding::constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
+ const NamespaceString _myStashNss =
+ resharding::getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
const NamespaceString _otherStashNss =
- getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
- const NamespaceString _myOplogBufferNss = getLocalOplogBufferNamespace(_sourceUUID, _myDonorId);
+ resharding::getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
+ const NamespaceString _myOplogBufferNss =
+ resharding::getLocalOplogBufferNamespace(_sourceUUID, _myDonorId);
- std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+ std::unique_ptr<ReshardingMetrics> _metrics;
std::unique_ptr<ReshardingOplogApplierMetrics> _applierMetrics;
std::unique_ptr<ReshardingOplogApplicationRules> _crudApplication;
diff --git a/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp
index 4114100a5bc..9c09f5ebcf0 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_crud_application_test.cpp
@@ -27,12 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
-#include <memory>
-#include <vector>
-
#include "mongo/bson/bsonmisc.h"
#include "mongo/db/catalog/collection_options.h"
#include "mongo/db/catalog_raii.h"
@@ -47,7 +41,7 @@
#include "mongo/db/s/collection_sharding_runtime.h"
#include "mongo/db/s/op_observer_sharding_impl.h"
#include "mongo/db/s/resharding/resharding_data_copy_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_oplog_application.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/s/sharding_state.h"
@@ -112,15 +106,15 @@ public:
CollectionMetadata(makeChunkManagerForOutputCollection(), _myDonorId));
}
- _metricsNew =
- ReshardingMetricsNew::makeInstance(_sourceUUID,
- BSON(_newShardKey << 1),
- _outputNss,
- ShardingDataTransformMetrics::Role::kRecipient,
- serviceContext->getFastClockSource()->now(),
- serviceContext);
+ _metrics =
+ ReshardingMetrics::makeInstance(_sourceUUID,
+ BSON(_newShardKey << 1),
+ _outputNss,
+ ShardingDataTransformMetrics::Role::kRecipient,
+ serviceContext->getFastClockSource()->now(),
+ serviceContext);
_oplogApplierMetrics =
- std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none);
+ std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none);
_applier = std::make_unique<ReshardingOplogApplicationRules>(
_outputNss,
std::vector<NamespaceString>{_myStashNss, _otherStashNss},
@@ -289,16 +283,16 @@ private:
_sourceUUID,
ChunkRange{BSON(_currentShardKey << MINKEY),
BSON(_currentShardKey << -std::numeric_limits<double>::infinity())},
- ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
_myDonorId},
ChunkType{_sourceUUID,
ChunkRange{BSON(_currentShardKey << -std::numeric_limits<double>::infinity()),
BSON(_currentShardKey << 0)},
- ChunkVersion(100, 1, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 1}),
_otherDonorId},
ChunkType{_sourceUUID,
ChunkRange{BSON(_currentShardKey << 0), BSON(_currentShardKey << MAXKEY)},
- ChunkVersion(100, 2, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 2}),
_myDonorId}};
return makeChunkManager(
@@ -311,7 +305,7 @@ private:
std::vector<ChunkType> chunks = {
ChunkType{outputUuid,
ChunkRange{BSON(_newShardKey << MINKEY), BSON(_newShardKey << MAXKEY)},
- ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
_myDonorId}};
return makeChunkManager(
@@ -335,13 +329,14 @@ private:
const ShardId _otherDonorId{"otherDonorId"};
const NamespaceString _outputNss =
- constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
- const NamespaceString _myStashNss = getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
+ resharding::constructTemporaryReshardingNss(_sourceNss.db(), _sourceUUID);
+ const NamespaceString _myStashNss =
+ resharding::getLocalConflictStashNamespace(_sourceUUID, _myDonorId);
const NamespaceString _otherStashNss =
- getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
+ resharding::getLocalConflictStashNamespace(_sourceUUID, _otherDonorId);
std::unique_ptr<ReshardingOplogApplicationRules> _applier;
- std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+ std::unique_ptr<ReshardingMetrics> _metrics;
std::unique_ptr<ReshardingOplogApplierMetrics> _oplogApplierMetrics;
};
diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp b/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp
index 41f87420e70..ac62a1cee4d 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher.cpp
@@ -45,7 +45,7 @@
#include "mongo/db/pipeline/aggregate_command_gen.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/repl/read_concern_level.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/s/sharding_data_transform_cumulative_metrics.h"
#include "mongo/db/storage/write_unit_of_work.h"
@@ -272,9 +272,9 @@ AggregateCommandRequest ReshardingOplogFetcher::_makeAggregateCommandRequest(
auto opCtx = opCtxRaii.get();
auto expCtx = _makeExpressionContext(opCtx);
- auto serializedPipeline =
- createOplogFetchingPipelineForResharding(expCtx, _startAt, _collUUID, _recipientShard)
- ->serializeToBson();
+ auto serializedPipeline = resharding::createOplogFetchingPipelineForResharding(
+ expCtx, _startAt, _collUUID, _recipientShard)
+ ->serializeToBson();
AggregateCommandRequest aggRequest(NamespaceString::kRsOplogNamespace,
std::move(serializedPipeline));
@@ -326,8 +326,8 @@ bool ReshardingOplogFetcher::consume(Client* client,
[this, &batchesProcessed, &moreToCome, &opCtxRaii, &batchFetchTimer, factory](
const std::vector<BSONObj>& batch,
const boost::optional<BSONObj>& postBatchResumeToken) {
- _env->metricsNew()->onOplogEntriesFetched(batch.size(),
- Milliseconds(batchFetchTimer.millis()));
+ _env->metrics()->onOplogEntriesFetched(batch.size(),
+ Milliseconds(batchFetchTimer.millis()));
ThreadClient client(fmt::format("ReshardingFetcher-{}-{}",
_reshardingUUID.toString(),
@@ -354,7 +354,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
uassertStatusOK(toWriteTo->insertDocument(opCtx, InsertStatement{doc}, nullptr));
wuow.commit();
- _env->metricsNew()->onLocalInsertDuringOplogFetching(
+ _env->metrics()->onLocalInsertDuringOplogFetching(
Milliseconds(insertTimer.millis()));
++_numOplogEntriesCopied;
@@ -368,7 +368,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
_onInsertFuture = std::move(f);
}
- if (isFinalOplog(nextOplog, _reshardingUUID)) {
+ if (resharding::isFinalOplog(nextOplog, _reshardingUUID)) {
moreToCome = false;
return false;
}
@@ -392,7 +392,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
oplog.set_id(Value(startAt.toBSON()));
oplog.setObject(BSON("msg"
<< "Latest oplog ts from donor's cursor response"));
- oplog.setObject2(BSON("type" << kReshardProgressMark));
+ oplog.setObject2(BSON("type" << resharding::kReshardProgressMark));
oplog.setOpTime(OplogSlot());
oplog.setWallClockTime(opCtx->getServiceContext()->getFastClockSource()->now());
@@ -402,7 +402,7 @@ bool ReshardingOplogFetcher::consume(Client* client,
// Also include synthetic oplog in the fetched count so it can match up with the
// total oplog applied count in the end.
- _env->metricsNew()->onOplogEntriesFetched(1, Milliseconds(0));
+ _env->metrics()->onOplogEntriesFetched(1, Milliseconds(0));
auto [p, f] = makePromiseFuture<void>();
{
diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher.h b/src/mongo/db/s/resharding/resharding_oplog_fetcher.h
index 5772c6bdfaa..37f5090f0e2 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_fetcher.h
+++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher.h
@@ -50,25 +50,25 @@
namespace mongo {
-class ReshardingMetricsNew;
+class ReshardingMetrics;
class ReshardingOplogFetcher : public resharding::OnInsertAwaitable {
public:
class Env {
public:
- Env(ServiceContext* service, ReshardingMetricsNew* metricsNew)
- : _service(service), _metricsNew(metricsNew) {}
+ Env(ServiceContext* service, ReshardingMetrics* metrics)
+ : _service(service), _metrics(metrics) {}
ServiceContext* service() const {
return _service;
}
- ReshardingMetricsNew* metricsNew() const {
- return _metricsNew;
+ ReshardingMetrics* metrics() const {
+ return _metrics;
}
private:
ServiceContext* _service;
- ReshardingMetricsNew* _metricsNew;
+ ReshardingMetrics* _metrics;
};
// Special value to use for startAt to indicate there are no more oplog entries needing to be
diff --git a/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp
index 17624acced9..68523519f41 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_fetcher_test.cpp
@@ -45,7 +45,7 @@
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/wait_for_majority_service.h"
#include "mongo/db/s/operation_sharding_state.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_oplog_fetcher.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/db/s/shard_server_test_fixture.h"
@@ -98,13 +98,12 @@ public:
OldClientContext ctx(_opCtx, NamespaceString::kRsOplogNamespace.ns());
}
- _metrics =
- ReshardingMetricsNew::makeInstance(_reshardingUUID,
- BSON("y" << 1),
- NamespaceString{""},
- ReshardingMetricsNew::Role::kRecipient,
- getServiceContext()->getFastClockSource()->now(),
- getServiceContext());
+ _metrics = ReshardingMetrics::makeInstance(_reshardingUUID,
+ BSON("y" << 1),
+ NamespaceString{""},
+ ReshardingMetrics::Role::kRecipient,
+ getServiceContext()->getFastClockSource()->now(),
+ getServiceContext());
for (const auto& shardId : kTwoShardIdList) {
auto shardTargeter = RemoteCommandTargeterMock::get(
@@ -299,7 +298,8 @@ public:
BSON(
"msg" << fmt::format("Writes to {} are temporarily blocked for resharding.",
dataColl.getCollection()->ns().toString())),
- BSON("type" << kReshardFinalOpLogType << "reshardingUUID" << _reshardingUUID),
+ BSON("type" << resharding::kReshardFinalOpLogType << "reshardingUUID"
+ << _reshardingUUID),
boost::none,
boost::none,
boost::none,
@@ -343,7 +343,7 @@ protected:
Timestamp _fetchTimestamp;
ShardId _donorShard;
ShardId _destinationShard;
- std::unique_ptr<ReshardingMetricsNew> _metrics;
+ std::unique_ptr<ReshardingMetrics> _metrics;
private:
static HostAndPort makeHostAndPort(const ShardId& shardId) {
diff --git a/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp b/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp
index bd3d602f3a7..5be42b0c30d 100644
--- a/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_oplog_session_application_test.cpp
@@ -705,6 +705,16 @@ TEST_F(ReshardingOplogSessionApplicationTest,
TxnNumber internalTxnTxnNumber = 1;
StmtId stmtId = 2;
+ // Make two in progress transactions so the one started by resharding must block.
+ {
+ auto newClientOwned = getServiceContext()->makeClient("newClient");
+ AlternativeClientRegion acr(newClientOwned);
+ auto newOpCtx = cc().makeOperationContext();
+ makeInProgressTxn(newOpCtx.get(),
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(retryableWriteLsid,
+ retryableWriteTxnNumber),
+ internalTxnTxnNumber);
+ }
{
auto opCtx = makeOperationContext();
makeInProgressTxn(opCtx.get(), internalTxnLsid, internalTxnTxnNumber);
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.cpp b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
index baaf64fc5e3..5b66d19e8bd 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.cpp
@@ -120,12 +120,12 @@ using resharding_metrics::getIntervalStartFieldName;
using DocT = ReshardingRecipientDocument;
const auto metricsPrefix = resharding_metrics::getMetricsPrefix<DocT>();
-void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentCloneMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
bob.append(getIntervalStartFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
metrics->getCopyingBegin());
}
-void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetricsNew* metrics) {
+void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetrics* metrics) {
bob.append(getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kDocumentCopyFieldName),
metrics->getCopyingEnd());
bob.append(
@@ -138,14 +138,14 @@ void buildStateDocumentApplyMetricsForUpdate(BSONObjBuilder& bob, ReshardingMetr
}
void buildStateDocumentStrictConsistencyMetricsForUpdate(BSONObjBuilder& bob,
- ReshardingMetricsNew* metrics) {
+ ReshardingMetrics* metrics) {
bob.append(
getIntervalEndFieldName<DocT>(ReshardingRecipientMetrics::kOplogApplicationFieldName),
metrics->getApplyingEnd());
}
void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
- ReshardingMetricsNew* metrics,
+ ReshardingMetrics* metrics,
RecipientStateEnum newState) {
switch (newState) {
case RecipientStateEnum::kCloning:
@@ -162,8 +162,8 @@ void buildStateDocumentMetricsForUpdate(BSONObjBuilder& bob,
}
}
-ReshardingMetricsNew::RecipientState toMetricsState(RecipientStateEnum state) {
- return ReshardingMetricsNew::RecipientState(state);
+ReshardingMetrics::RecipientState toMetricsState(RecipientStateEnum state) {
+ return ReshardingMetrics::RecipientState(state);
}
} // namespace
@@ -190,7 +190,7 @@ ReshardingRecipientService::RecipientStateMachine::RecipientStateMachine(
ReshardingDataReplicationFactory dataReplicationFactory)
: repl::PrimaryOnlyService::TypedInstance<RecipientStateMachine>(),
_recipientService{recipientService},
- _metricsNew{ReshardingMetricsNew::initializeFrom(recipientDoc, getGlobalServiceContext())},
+ _metrics{ReshardingMetrics::initializeFrom(recipientDoc, getGlobalServiceContext())},
_metadata{recipientDoc.getCommonReshardingMetadata()},
_minimumOperationDuration{Milliseconds{recipientDoc.getMinimumOperationDurationMillis()}},
_recipientCtx{recipientDoc.getMutableState()},
@@ -219,7 +219,7 @@ ReshardingRecipientService::RecipientStateMachine::RecipientStateMachine(
}()) {
invariant(_externalState);
- _metricsNew->onStateTransition(boost::none, toMetricsState(_recipientCtx.getState()));
+ _metrics->onStateTransition(boost::none, toMetricsState(_recipientCtx.getState()));
}
ExecutorFuture<void>
@@ -370,7 +370,9 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_finishR
if (!_isAlsoDonor) {
auto opCtx = factory.makeOperationContext(&cc());
- _externalState->clearFilteringMetadata(opCtx.get());
+ _externalState->clearFilteringMetadata(opCtx.get(),
+ _metadata.getSourceNss(),
+ _metadata.getTempReshardingNss());
RecoverableCriticalSectionService::get(opCtx.get())
->releaseRecoverableCriticalSection(
@@ -417,7 +419,13 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand
self = shared_from_this(),
outerStatus = status,
isCanceled = stepdownToken.isCanceled()](Status dataReplicationHaltStatus) {
- _metricsNew->onStateTransition(toMetricsState(_recipientCtx.getState()), boost::none);
+ _metrics->onStateTransition(toMetricsState(_recipientCtx.getState()), boost::none);
+
+ // Destroy metrics early so it's lifetime will not be tied to the lifetime of this
+ // state machine. This is because we have future callbacks copy shared pointers to this
+ // state machine that causes it to live longer than expected and potentially overlap
+ // with a newer instance when stepping up.
+ _metrics.reset();
// If the stepdownToken was triggered, it takes priority in order to make sure that
// the promise is set with an error that the coordinator can retry with. If it ran into
@@ -432,7 +440,6 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::_runMand
// replication errors because resharding is known to have failed already.
stdx::lock_guard<Latch> lk(_mutex);
ensureFulfilledPromise(lk, _completionPromise, outerStatus);
-
return outerStatus;
});
}
@@ -504,7 +511,7 @@ void ReshardingRecipientService::RecipientStateMachine::interrupt(Status status)
boost::optional<BSONObj> ReshardingRecipientService::RecipientStateMachine::reportForCurrentOp(
MongoProcessInterface::CurrentOpConnectionsMode,
MongoProcessInterface::CurrentOpSessionsMode) noexcept {
- return _metricsNew->reportForCurrentOp();
+ return _metrics->reportForCurrentOp();
}
void ReshardingRecipientService::RecipientStateMachine::onReshardingFieldsChanges(
@@ -550,8 +557,8 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::
ReshardingRecipientService::RecipientStateMachine::CloneDetails cloneDetails) {
_transitionToCreatingCollection(
cloneDetails, (*executor)->now() + _minimumOperationDuration, factory);
- _metricsNew->setDocumentsToCopyCounts(cloneDetails.approxDocumentsToCopy,
- cloneDetails.approxBytesToCopy);
+ _metrics->setDocumentsToCopyCounts(cloneDetails.approxDocumentsToCopy,
+ cloneDetails.approxBytesToCopy);
});
}
@@ -616,7 +623,7 @@ ReshardingRecipientService::RecipientStateMachine::_makeDataReplication(Operatio
for (const auto& donor : _donorShards) {
_applierMetricsMap.emplace(
donor.getShardId(),
- std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none));
+ std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none));
}
} else {
invariant(_applierMetricsMap.size() == _donorShards.size(),
@@ -625,7 +632,7 @@ ReshardingRecipientService::RecipientStateMachine::_makeDataReplication(Operatio
}
return _dataReplicationFactory(opCtx,
- _metricsNew.get(),
+ _metrics.get(),
&_applierMetricsMap,
_metadata,
_donorShards,
@@ -726,8 +733,8 @@ ExecutorFuture<void> ReshardingRecipientService::RecipientStateMachine::
.then([this, &factory] {
auto opCtx = factory.makeOperationContext(&cc());
for (const auto& donor : _donorShards) {
- auto stashNss =
- getLocalConflictStashNamespace(_metadata.getSourceUUID(), donor.getShardId());
+ auto stashNss = resharding::getLocalConflictStashNamespace(
+ _metadata.getSourceUUID(), donor.getShardId());
AutoGetCollection stashColl(opCtx.get(), stashNss, MODE_IS);
uassert(5356800,
"Resharding completed with non-empty stash collections",
@@ -846,7 +853,7 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionState(
_updateRecipientDocument(
std::move(newRecipientCtx), std::move(cloneDetails), std::move(configStartTime), factory);
- _metricsNew->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
+ _metrics->onStateTransition(toMetricsState(oldState), toMetricsState(newState));
LOGV2_INFO(5279506,
"Transitioned resharding recipient state",
@@ -871,7 +878,7 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToCreatingCol
void ReshardingRecipientService::RecipientStateMachine::_transitionToCloning(
const CancelableOperationContextFactory& factory) {
- _metricsNew->onCopyingBegin();
+ _metrics->onCopyingBegin();
auto newRecipientCtx = _recipientCtx;
newRecipientCtx.setState(RecipientStateEnum::kCloning);
_transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
@@ -883,8 +890,8 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToApplying(
newRecipientCtx.setState(RecipientStateEnum::kApplying);
_transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
- _metricsNew->onCopyingEnd();
- _metricsNew->onApplyingBegin();
+ _metrics->onCopyingEnd();
+ _metrics->onApplyingBegin();
}
void ReshardingRecipientService::RecipientStateMachine::_transitionToStrictConsistency(
@@ -893,14 +900,14 @@ void ReshardingRecipientService::RecipientStateMachine::_transitionToStrictConsi
newRecipientCtx.setState(RecipientStateEnum::kStrictConsistency);
_transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
- _metricsNew->onApplyingEnd();
+ _metrics->onApplyingEnd();
}
void ReshardingRecipientService::RecipientStateMachine::_transitionToError(
Status abortReason, const CancelableOperationContextFactory& factory) {
auto newRecipientCtx = _recipientCtx;
newRecipientCtx.setState(RecipientStateEnum::kError);
- emplaceTruncatedAbortReasonIfExists(newRecipientCtx, abortReason);
+ resharding::emplaceTruncatedAbortReasonIfExists(newRecipientCtx, abortReason);
_transitionState(std::move(newRecipientCtx), boost::none, boost::none, factory);
}
@@ -1052,8 +1059,7 @@ void ReshardingRecipientService::RecipientStateMachine::_updateRecipientDocument
*configStartTime);
}
- buildStateDocumentMetricsForUpdate(
- setBuilder, _metricsNew.get(), newRecipientCtx.getState());
+ buildStateDocumentMetricsForUpdate(setBuilder, _metrics.get(), newRecipientCtx.getState());
setBuilder.doneFast();
}
@@ -1156,7 +1162,7 @@ void ReshardingRecipientService::RecipientStateMachine::_restoreMetrics(
// metrics section of the recipient state document and restored during metrics
// initialization. This is so that applied oplog entries that add or remove documents do
// not affect the cloning metrics.
- _metricsNew->restoreDocumentsCopied(documentCountCopied, documentBytesCopied);
+ _metrics->restoreDocumentsCopied(documentCountCopied, documentBytesCopied);
}
}
@@ -1167,10 +1173,10 @@ void ReshardingRecipientService::RecipientStateMachine::_restoreMetrics(
progressDocList;
for (const auto& donor : _donorShards) {
{
- AutoGetCollection oplogBufferColl(
- opCtx.get(),
- getLocalOplogBufferNamespace(_metadata.getSourceUUID(), donor.getShardId()),
- MODE_IS);
+ AutoGetCollection oplogBufferColl(opCtx.get(),
+ resharding::getLocalOplogBufferNamespace(
+ _metadata.getSourceUUID(), donor.getShardId()),
+ MODE_IS);
if (oplogBufferColl) {
oplogEntriesFetched += oplogBufferColl->numRecords(opCtx.get());
}
@@ -1208,19 +1214,19 @@ void ReshardingRecipientService::RecipientStateMachine::_restoreMetrics(
if (!progressDoc) {
_applierMetricsMap.emplace(
shardId,
- std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), boost::none));
+ std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), boost::none));
continue;
}
- _metricsNew->accumulateFrom(*progressDoc);
+ _metrics->accumulateFrom(*progressDoc);
auto applierMetrics =
- std::make_unique<ReshardingOplogApplierMetrics>(_metricsNew.get(), progressDoc);
+ std::make_unique<ReshardingOplogApplierMetrics>(_metrics.get(), progressDoc);
_applierMetricsMap.emplace(shardId, std::move(applierMetrics));
}
- _metricsNew->restoreOplogEntriesFetched(oplogEntriesFetched);
- _metricsNew->restoreOplogEntriesApplied(oplogEntriesApplied);
+ _metrics->restoreOplogEntriesFetched(oplogEntriesFetched);
+ _metrics->restoreOplogEntriesApplied(oplogEntriesApplied);
}
CancellationToken ReshardingRecipientService::RecipientStateMachine::_initAbortSource(
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service.h b/src/mongo/db/s/resharding/resharding_recipient_service.h
index fc41ba0e9ee..5dab490b96f 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service.h
+++ b/src/mongo/db/s/resharding/resharding_recipient_service.h
@@ -33,7 +33,7 @@
#include "mongo/db/s/resharding/recipient_document_gen.h"
#include "mongo/db/s/resharding/resharding_data_replication.h"
#include "mongo/db/s/resharding/resharding_future_util.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding/resharding_oplog_applier_metrics.h"
#include "mongo/db/s/resharding/resharding_util.h"
#include "mongo/s/resharding/type_collection_fields_gen.h"
@@ -163,9 +163,9 @@ public:
return _metadata;
}
- inline const ReshardingMetricsNew& getMetrics() const {
- invariant(_metricsNew);
- return *_metricsNew;
+ inline const ReshardingMetrics& getMetrics() const {
+ invariant(_metrics);
+ return *_metrics;
}
boost::optional<BSONObj> reportForCurrentOp(
@@ -289,7 +289,7 @@ private:
// The primary-only service instance corresponding to the recipient instance. Not owned.
const ReshardingRecipientService* const _recipientService;
- std::unique_ptr<ReshardingMetricsNew> _metricsNew;
+ std::unique_ptr<ReshardingMetrics> _metrics;
ReshardingApplierMetricsMap _applierMetricsMap;
// The in-memory representation of the immutable portion of the document in
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp
index 3e929815454..222a2c6f86a 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.cpp
@@ -186,8 +186,12 @@ void RecipientStateMachineExternalStateImpl::updateCoordinatorDocument(Operation
}
}
-void RecipientStateMachineExternalStateImpl::clearFilteringMetadata(OperationContext* opCtx) {
- resharding::clearFilteringMetadata(opCtx, true /* scheduleAsyncRefresh */);
+void RecipientStateMachineExternalStateImpl::clearFilteringMetadata(
+ OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) {
+ stdx::unordered_set<NamespaceString> namespacesToRefresh{sourceNss, tempReshardingNss};
+ resharding::clearFilteringMetadata(opCtx, namespacesToRefresh, true /* scheduleAsyncRefresh */);
}
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h
index c1597da7f7c..0a2749a66fc 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_external_state.h
@@ -90,7 +90,9 @@ public:
const BSONObj& query,
const BSONObj& update) = 0;
- virtual void clearFilteringMetadata(OperationContext* opCtx) = 0;
+ virtual void clearFilteringMetadata(OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) = 0;
/**
* Creates the temporary resharding collection locally.
@@ -137,7 +139,9 @@ public:
const BSONObj& query,
const BSONObj& update) override;
- void clearFilteringMetadata(OperationContext* opCtx) override;
+ void clearFilteringMetadata(OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) override;
private:
template <typename Callable>
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp
index c4e193e6897..62776bba466 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_external_state_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/bson/unordered_fields_bsonobj_comparator.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/dbdirectclient.h"
@@ -48,9 +45,6 @@
#include "mongo/s/database_version.h"
#include "mongo/s/stale_exception.h"
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kTest
-
-
namespace mongo {
namespace {
@@ -168,7 +162,7 @@ public:
reshardingFields.setRecipientFields(recipientFields);
coll.setReshardingFields(reshardingFields);
- ChunkVersion version(1, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {1, 0});
ChunkType chunk(uuid,
{skey.getKeyPattern().globalMin(), skey.getKeyPattern().globalMax()},
@@ -193,7 +187,7 @@ public:
CollectionType coll(
origNss, epoch, timestamp, Date_t::now(), uuid, skey.getKeyPattern());
- ChunkVersion version(2, 0, epoch, timestamp);
+ ChunkVersion version({epoch, timestamp}, {2, 0});
ChunkType chunk(uuid,
{skey.getKeyPattern().globalMin(), skey.getKeyPattern().globalMax()},
diff --git a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
index 78316aacca7..4e6a5489f71 100644
--- a/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_recipient_service_test.cpp
@@ -83,7 +83,7 @@ public:
std::vector<ChunkType> chunks = {ChunkType{
_sourceUUID,
ChunkRange{BSON(_currentShardKey << MINKEY), BSON(_currentShardKey << MAXKEY)},
- ChunkVersion(100, 0, epoch, Timestamp(1, 1)),
+ ChunkVersion({epoch, Timestamp(1, 1)}, {100, 0}),
_someDonorId}};
auto rt = RoutingTableHistory::makeNew(_sourceNss,
@@ -136,7 +136,9 @@ public:
const BSONObj& query,
const BSONObj& update) override {}
- void clearFilteringMetadata(OperationContext* opCtx) override {}
+ void clearFilteringMetadata(OperationContext* opCtx,
+ const NamespaceString& sourceNss,
+ const NamespaceString& tempReshardingNss) override {}
private:
RoutingTableHistoryValueHandle _makeStandaloneRoutingTableHistory(RoutingTableHistory rt) {
@@ -250,12 +252,12 @@ public:
NamespaceString sourceNss("sourcedb", "sourcecollection");
auto sourceUUID = UUID::gen();
- auto commonMetadata =
- CommonReshardingMetadata(UUID::gen(),
- sourceNss,
- sourceUUID,
- constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
- newShardKeyPattern());
+ auto commonMetadata = CommonReshardingMetadata(
+ UUID::gen(),
+ sourceNss,
+ sourceUUID,
+ resharding::constructTemporaryReshardingNss(sourceNss.db(), sourceUUID),
+ newShardKeyPattern());
commonMetadata.setStartTime(getServiceContext()->getFastClockSource()->now());
doc.setCommonReshardingMetadata(std::move(commonMetadata));
@@ -625,7 +627,8 @@ TEST_F(ReshardingRecipientServiceTest, WritesNoopOplogEntryOnReshardDoneCatchUp)
ErrorCodes::InterruptedDueToReplStateChange);
DBDirectClient client(opCtx.get());
- NamespaceString sourceNss = constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
+ NamespaceString sourceNss =
+ resharding::constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
FindCommandRequest findRequest{NamespaceString::kRsOplogNamespace};
findRequest.setFilter(
@@ -671,7 +674,8 @@ TEST_F(ReshardingRecipientServiceTest, WritesNoopOplogEntryForImplicitShardColle
ErrorCodes::InterruptedDueToReplStateChange);
DBDirectClient client(opCtx.get());
- NamespaceString sourceNss = constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
+ NamespaceString sourceNss =
+ resharding::constructTemporaryReshardingNss("sourcedb", doc.getSourceUUID());
FindCommandRequest findRequest{NamespaceString::kRsOplogNamespace};
findRequest.setFilter(
@@ -739,7 +743,7 @@ TEST_F(ReshardingRecipientServiceTest, TruncatesXLErrorOnRecipientDocument) {
// to the primitive truncation algorithm - Check that the total size is less than
// kReshardErrorMaxBytes + a couple additional bytes to provide a buffer for the field
// name sizes.
- int maxReshardErrorBytesCeiling = kReshardErrorMaxBytes + 200;
+ int maxReshardErrorBytesCeiling = resharding::kReshardErrorMaxBytes + 200;
ASSERT_LT(persistedAbortReasonBSON->objsize(), maxReshardErrorBytesCeiling);
ASSERT_EQ(persistedAbortReasonBSON->getIntField("code"),
ErrorCodes::ReshardCollectionTruncatedError);
@@ -815,7 +819,8 @@ TEST_F(ReshardingRecipientServiceTest, RestoreMetricsAfterStepUp) {
for (const auto& donor : donorShards) {
// Setup oplogBuffer collection.
ReshardingDonorOplogId donorOplogId{{20, i}, {19, 0}};
- insertFn(getLocalOplogBufferNamespace(doc.getSourceUUID(), donor.getShardId()),
+ insertFn(resharding::getLocalOplogBufferNamespace(doc.getSourceUUID(),
+ donor.getShardId()),
InsertStatement{BSON("_id" << donorOplogId.toBSON())});
++i;
@@ -923,7 +928,7 @@ TEST_F(ReshardingRecipientServiceTest, RestoreMetricsAfterStepUpWithMissingProgr
// Setup oplogBuffer collection.
ReshardingDonorOplogId donorOplogId{{20, i}, {19, 0}};
- insertFn(getLocalOplogBufferNamespace(doc.getSourceUUID(), donor.getShardId()),
+ insertFn(resharding::getLocalOplogBufferNamespace(doc.getSourceUUID(), donor.getShardId()),
InsertStatement{BSON("_id" << donorOplogId.toBSON())});
// Setup reshardingApplierProgress collection.
diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
index 478c0272c7d..1f074af6f75 100644
--- a/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
+++ b/src/mongo/db/s/resharding/resharding_txn_cloner.cpp
@@ -35,7 +35,6 @@
#include <vector>
#include "mongo/bson/bsonmisc.h"
-#include "mongo/client/query.h"
#include "mongo/client/read_preference.h"
#include "mongo/db/client.h"
#include "mongo/db/concurrency/d_concurrency.h"
diff --git a/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp b/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp
index 1e22bc5a4a7..24045678550 100644
--- a/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_txn_cloner_test.cpp
@@ -1004,6 +1004,16 @@ TEST_F(ReshardingTxnClonerTest,
retryableWriteTxnNumber);
TxnNumber internalTxnTxnNumber = 1;
+ // Make two in progress transactions so the one started by resharding must block.
+ {
+ auto newClientOwned = getServiceContext()->makeClient("newClient");
+ AlternativeClientRegion acr(newClientOwned);
+ auto newOpCtx = cc().makeOperationContext();
+ makeInProgressTxn(newOpCtx.get(),
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(retryableWriteLsid,
+ retryableWriteTxnNumber),
+ internalTxnTxnNumber);
+ }
makeInProgressTxn(operationContext(), internalTxnLsid, internalTxnTxnNumber);
auto lastOplogTs = getLatestOplogTimestamp(operationContext());
@@ -1096,6 +1106,16 @@ TEST_F(ReshardingTxnClonerTest, CancelableWhileWaitingOnInProgressInternalTxnFor
retryableWriteTxnNumber);
TxnNumber internalTxnTxnNumber = 1;
+ // Make two in progress transactions so the one started by resharding must block.
+ {
+ auto newClientOwned = getServiceContext()->makeClient("newClient");
+ AlternativeClientRegion acr(newClientOwned);
+ auto newOpCtx = cc().makeOperationContext();
+ makeInProgressTxn(newOpCtx.get(),
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(retryableWriteLsid,
+ retryableWriteTxnNumber),
+ internalTxnTxnNumber);
+ }
makeInProgressTxn(operationContext(), internalTxnLsid, internalTxnTxnNumber);
ON_BLOCK_EXIT([&] { abortTxn(operationContext(), internalTxnLsid, internalTxnTxnNumber); });
diff --git a/src/mongo/db/s/resharding/resharding_util.cpp b/src/mongo/db/s/resharding/resharding_util.cpp
index d9a04064d3c..873fc7ce5d5 100644
--- a/src/mongo/db/s/resharding/resharding_util.cpp
+++ b/src/mongo/db/s/resharding/resharding_util.cpp
@@ -48,7 +48,7 @@
#include "mongo/db/s/collection_sharding_state.h"
#include "mongo/db/s/resharding/document_source_resharding_add_resume_id.h"
#include "mongo/db/s/resharding/document_source_resharding_iterate_transaction.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/storage/write_unit_of_work.h"
#include "mongo/logv2/log.h"
@@ -63,6 +63,7 @@
namespace mongo {
+namespace resharding {
namespace {
/**
@@ -414,4 +415,5 @@ boost::optional<Milliseconds> estimateRemainingRecipientTime(bool applyingBegan,
return {};
}
+} // namespace resharding
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_util.h b/src/mongo/db/s/resharding/resharding_util.h
index 194381e7e78..0d8aacbe3f7 100644
--- a/src/mongo/db/s/resharding/resharding_util.h
+++ b/src/mongo/db/s/resharding/resharding_util.h
@@ -50,6 +50,7 @@
#include "mongo/util/str.h"
namespace mongo {
+namespace resharding {
constexpr auto kReshardFinalOpLogType = "reshardFinalOp"_sd;
constexpr auto kReshardProgressMark = "reshardProgressMark"_sd;
@@ -324,5 +325,6 @@ std::vector<std::shared_ptr<Instance>> getReshardingStateMachines(OperationConte
return result;
}
+} // namespace resharding
} // namespace mongo
diff --git a/src/mongo/db/s/resharding/resharding_util_test.cpp b/src/mongo/db/s/resharding/resharding_util_test.cpp
index 5fd40fd86b7..12e5e15ddcd 100644
--- a/src/mongo/db/s/resharding/resharding_util_test.cpp
+++ b/src/mongo/db/s/resharding/resharding_util_test.cpp
@@ -52,6 +52,7 @@
namespace mongo {
+namespace resharding {
namespace {
class ReshardingUtilTest : public ConfigServerTestFixture {
@@ -309,4 +310,7 @@ TEST_F(ReshardingTxnCloningPipelineTest, TxnPipelineAfterID) {
}
} // namespace
+
+} // namespace resharding
+
} // namespace mongo
diff --git a/src/mongo/db/s/resharding_test_commands.cpp b/src/mongo/db/s/resharding_test_commands.cpp
index 61fa835829f..74688928784 100644
--- a/src/mongo/db/s/resharding_test_commands.cpp
+++ b/src/mongo/db/s/resharding_test_commands.cpp
@@ -37,7 +37,7 @@
#include "mongo/db/commands.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/s/resharding/resharding_collection_cloner.h"
-#include "mongo/db/s/resharding/resharding_metrics_new.h"
+#include "mongo/db/s/resharding/resharding_metrics.h"
#include "mongo/db/s/resharding_test_commands_gen.h"
#include "mongo/db/vector_clock_metadata_hook.h"
#include "mongo/executor/network_interface_factory.h"
@@ -79,11 +79,11 @@ public:
}
};
- auto metrics = ReshardingMetricsNew::makeInstance(
+ auto metrics = ReshardingMetrics::makeInstance(
request().getUuid(),
request().getShardKey(),
ns(),
- ReshardingMetricsNew::Role::kRecipient,
+ ReshardingMetrics::Role::kRecipient,
opCtx->getServiceContext()->getFastClockSource()->now(),
opCtx->getServiceContext());
diff --git a/src/mongo/db/s/sessions_collection_config_server.cpp b/src/mongo/db/s/sessions_collection_config_server.cpp
index 60c72dcab47..4376166a365 100644
--- a/src/mongo/db/s/sessions_collection_config_server.cpp
+++ b/src/mongo/db/s/sessions_collection_config_server.cpp
@@ -125,8 +125,10 @@ void SessionsCollectionConfigServer::setupSessionsCollection(OperationContext* o
auto filterQuery =
BSON("_id" << NamespaceString::kLogicalSessionsNamespace.ns()
<< CollectionType::kMaxChunkSizeBytesFieldName << BSON("$exists" << false));
- auto updateQuery =
- BSON("$set" << BSON(CollectionType::kMaxChunkSizeBytesFieldName << kMaxChunkSizeBytes));
+ auto updateQuery = BSON("$set" << BSON(CollectionType::kMaxChunkSizeBytesFieldName
+ << kMaxChunkSizeBytes
+ << CollectionType::kNoAutoSplitFieldName << true));
+
uassertStatusOK(Grid::get(opCtx)->catalogClient()->updateConfigDocument(
opCtx,
CollectionType::ConfigNS,
diff --git a/src/mongo/db/s/set_allow_migrations_coordinator.cpp b/src/mongo/db/s/set_allow_migrations_coordinator.cpp
index 3395aa7f465..d8cb15afb2e 100644
--- a/src/mongo/db/s/set_allow_migrations_coordinator.cpp
+++ b/src/mongo/db/s/set_allow_migrations_coordinator.cpp
@@ -50,14 +50,6 @@ bool isCollectionSharded(OperationContext* opCtx, const NamespaceString& nss) {
}
}
-SetAllowMigrationsCoordinator::SetAllowMigrationsCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState)
- : ShardingDDLCoordinator(service, initialState),
- _doc(SetAllowMigrationsCoordinatorDocument::parse(
- IDLParserErrorContext("SetAllowMigrationsCoordinatorDocument"), initialState)),
- _allowMigrations(_doc.getAllowMigrations()) {}
-
-
void SetAllowMigrationsCoordinator::checkIfOptionsConflict(const BSONObj& doc) const {
// If we have two set allow migrations on the same namespace, then the arguments must be the
// same.
@@ -72,23 +64,9 @@ void SetAllowMigrationsCoordinator::checkIfOptionsConflict(const BSONObj& doc) c
otherDoc.getSetAllowMigrationsRequest().toBSON()));
}
-boost::optional<BSONObj> SetAllowMigrationsCoordinator::reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept {
- BSONObjBuilder cmdBob;
- if (const auto& optComment = getForwardableOpMetadata().getComment()) {
- cmdBob.append(optComment.get().firstElement());
- }
- cmdBob.appendElements(_doc.getSetAllowMigrationsRequest().toBSON());
-
- BSONObjBuilder bob;
- bob.append("type", "op");
- bob.append("desc", "SetAllowMigrationsCoordinator");
- bob.append("op", "command");
- bob.append("ns", nss().toString());
- bob.append("command", cmdBob.obj());
- bob.append("active", true);
- return bob.obj();
+void SetAllowMigrationsCoordinator::appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {
+ stdx::lock_guard lk{_docMutex};
+ cmdInfoBuilder->appendElements(_doc.getSetAllowMigrationsRequest().toBSON());
}
ExecutorFuture<void> SetAllowMigrationsCoordinator::_runImpl(
diff --git a/src/mongo/db/s/set_allow_migrations_coordinator.h b/src/mongo/db/s/set_allow_migrations_coordinator.h
index cf8e14348d7..78d2e03696a 100644
--- a/src/mongo/db/s/set_allow_migrations_coordinator.h
+++ b/src/mongo/db/s/set_allow_migrations_coordinator.h
@@ -38,31 +38,27 @@
namespace mongo {
-class SetAllowMigrationsCoordinator final : public ShardingDDLCoordinator {
+class SetAllowMigrationsCoordinator final
+ : public ShardingDDLCoordinatorImpl<SetAllowMigrationsCoordinatorDocument> {
public:
SetAllowMigrationsCoordinator(ShardingDDLCoordinatorService* service,
- const BSONObj& initialState);
+ const BSONObj& initialState)
+ : ShardingDDLCoordinatorImpl(service, "SetAllowMigrationsCoordinator", initialState),
+ _allowMigrations(_doc.getAllowMigrations()) {}
void checkIfOptionsConflict(const BSONObj& coorDoc) const override;
- boost::optional<BSONObj> reportForCurrentOp(
- MongoProcessInterface::CurrentOpConnectionsMode connMode,
- MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override;
+ void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const override;
bool canAlwaysStartWhenUserWritesAreDisabled() const override {
return true;
}
private:
- ShardingDDLCoordinatorMetadata const& metadata() const override {
- return _doc.getShardingDDLCoordinatorMetadata();
- }
-
ExecutorFuture<void> _runImpl(std::shared_ptr<executor::ScopedTaskExecutor> executor,
const CancellationToken& token) noexcept override;
- SetAllowMigrationsCoordinatorDocument _doc;
const bool _allowMigrations;
};
} // namespace mongo
diff --git a/src/mongo/db/s/set_shard_version_command.cpp b/src/mongo/db/s/set_shard_version_command.cpp
deleted file mode 100644
index 0c8e2da5037..00000000000
--- a/src/mongo/db/s/set_shard_version_command.cpp
+++ /dev/null
@@ -1,340 +0,0 @@
-/**
- * Copyright (C) 2018-present MongoDB, Inc.
- *
- * This program is free software: you can redistribute it and/or modify
- * it under the terms of the Server Side Public License, version 1,
- * as published by MongoDB, Inc.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * Server Side Public License for more details.
- *
- * You should have received a copy of the Server Side Public License
- * along with this program. If not, see
- * <http://www.mongodb.com/licensing/server-side-public-license>.
- *
- * As a special exception, the copyright holders give permission to link the
- * code of portions of this program with the OpenSSL library under certain
- * conditions as described in each individual source file and distribute
- * linked combinations including the program with the OpenSSL library. You
- * must comply with the Server Side Public License in all respects for
- * all of the code used other than as permitted herein. If you modify file(s)
- * with this exception, you may extend this exception to your version of the
- * file(s), but you are not obligated to do so. If you do not wish to do so,
- * delete this exception statement from your version. If you delete this
- * exception statement from all source files in the program, then also delete
- * it in the license file.
- */
-
-
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/auth/action_set.h"
-#include "mongo/db/auth/action_type.h"
-#include "mongo/db/auth/authorization_session.h"
-#include "mongo/db/auth/privilege.h"
-#include "mongo/db/catalog/collection_catalog.h"
-#include "mongo/db/catalog_raii.h"
-#include "mongo/db/client.h"
-#include "mongo/db/commands.h"
-#include "mongo/db/not_primary_error_tracker.h"
-#include "mongo/db/operation_context.h"
-#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/db/s/collection_sharding_runtime.h"
-#include "mongo/db/s/shard_filtering_metadata_refresh.h"
-#include "mongo/db/s/sharding_state.h"
-#include "mongo/logv2/log.h"
-#include "mongo/s/client/shard_registry.h"
-#include "mongo/s/grid.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
-#include "mongo/util/str.h"
-
-#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
-
-namespace mongo {
-namespace {
-
-class SetShardVersion : public ErrmsgCommandDeprecated {
-public:
- SetShardVersion() : ErrmsgCommandDeprecated("setShardVersion") {}
-
- std::string help() const override {
- return "internal";
- }
-
- bool adminOnly() const override {
- return true;
- }
-
- AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
- return AllowedOnSecondary::kAlways;
- }
-
- virtual bool supportsWriteConcern(const BSONObj& cmd) const override {
- return false;
- }
-
- void addRequiredPrivileges(const std::string& dbname,
- const BSONObj& cmdObj,
- std::vector<Privilege>* out) const override {
- ActionSet actions;
- actions.addAction(ActionType::internal);
- out->push_back(Privilege(ResourcePattern::forClusterResource(), actions));
- }
-
- bool errmsgRun(OperationContext* opCtx,
- const std::string&,
- const BSONObj& cmdObj,
- std::string& errmsg,
- BSONObjBuilder& result) {
- uassert(ErrorCodes::IllegalOperation,
- "can't issue setShardVersion from 'eval'",
- !opCtx->getClient()->isInDirectClient());
-
- auto const shardingState = ShardingState::get(opCtx);
- uassertStatusOK(shardingState->canAcceptShardedCommands());
-
- // Steps
- // 1. Set the `authoritative` variable from the command object.
- //
- // 2. Validate all command parameters against the info in our ShardingState, and return an
- // error if they do not match.
- //
- // 3. If the sent shardVersion is compatible with our shardVersion, return.
- //
- // 4. If the sent shardVersion indicates a drop, jump to step 6.
- //
- // 5. If the sent shardVersion is staler than ours, return a stale config error.
- //
- // 6. If the sent shardVersion is newer than ours (or indicates a drop), reload our metadata
- // and compare the sent shardVersion with what we reloaded. If the sent shardVersion is
- // staler than what we reloaded, return a stale config error, as in step 5.
-
- // Step 1
-
- Client* client = opCtx->getClient();
- NotPrimaryErrorTracker::get(client).disable();
-
- const bool authoritative = cmdObj.getBoolField("authoritative");
-
- // Step 2
-
- // Validate namespace parameter.
- const NamespaceString nss(cmdObj["setShardVersion"].String());
- uassert(ErrorCodes::InvalidNamespace,
- str::stream() << "Invalid namespace " << nss.ns(),
- nss.isValid());
-
- // Validate chunk version parameter.
- auto requestedVersion = ChunkVersion::parse(cmdObj[SetShardVersionRequest::kVersion]);
-
- // Step 3
-
- {
- boost::optional<AutoGetDb> autoDb;
- autoDb.emplace(opCtx, nss.db(), MODE_IS);
-
- // Secondary nodes cannot support set shard version
- uassert(ErrorCodes::NotWritablePrimary,
- str::stream() << "setShardVersion with collection version is only supported "
- "against primary nodes, but it was received for namespace "
- << nss.ns(),
- repl::ReplicationCoordinator::get(opCtx)->canAcceptWritesForDatabase(opCtx,
- nss.db()));
-
- boost::optional<Lock::CollectionLock> collLock;
- collLock.emplace(opCtx, nss, MODE_IS);
-
- // Views do not require a shard version check. We do not care about invalid system views
- // for this check, only to validate if a view already exists for this namespace.
- if (autoDb->getDb() &&
- !CollectionCatalog::get(opCtx)->lookupCollectionByNamespace(opCtx, nss) &&
- CollectionCatalog::get(opCtx)->lookupViewWithoutValidatingDurable(opCtx, nss)) {
- return true;
- }
-
- auto* const csr = CollectionShardingRuntime::get(opCtx, nss);
- const ChunkVersion collectionShardVersion = [&] {
- auto optMetadata = csr->getCurrentMetadataIfKnown();
- return (optMetadata && optMetadata->isSharded()) ? optMetadata->getShardVersion()
- : ChunkVersion::UNSHARDED();
- }();
-
- if (requestedVersion.isWriteCompatibleWith(collectionShardVersion)) {
- return true;
- }
-
- // Step 4
-
- const bool isDropRequested =
- !requestedVersion.isSet() && collectionShardVersion.isSet();
-
- if (isDropRequested) {
- if (!authoritative) {
- result.appendBool("need_authoritative", true);
- result.append("ns", nss.ns());
- collectionShardVersion.appendLegacyWithField(&result, "globalVersion");
- errmsg = "dropping needs to be authoritative";
- return false;
- }
-
- // Fall through to metadata reload below
- } else {
- // Not Dropping
-
- // Step 5
-
- const auto kTenSeconds = Milliseconds(10000);
-
- if (requestedVersion.isOlderThan(collectionShardVersion)) {
- auto critSecSignal = csr->getCriticalSectionSignal(
- opCtx, ShardingMigrationCriticalSection::kWrite);
- if (critSecSignal) {
- collLock.reset();
- autoDb.reset();
- LOGV2(22056, "waiting till out of critical section");
- auto deadline = opCtx->getServiceContext()->getFastClockSource()->now() +
- std::min(opCtx->getRemainingMaxTimeMillis(), kTenSeconds);
-
- opCtx->runWithDeadline(deadline, ErrorCodes::ExceededTimeLimit, [&] {
- critSecSignal->wait(opCtx);
- });
- }
-
- errmsg = str::stream() << "shard global version for collection is higher "
- << "than trying to set to '" << nss.ns() << "'";
- result.append("ns", nss.ns());
- requestedVersion.appendLegacyWithField(&result, "version");
- collectionShardVersion.appendLegacyWithField(&result, "globalVersion");
- result.appendBool("reloadConfig", true);
- return false;
- }
-
- if (!collectionShardVersion.isSet() && !authoritative) {
- // Needed b/c when the last chunk is moved off a shard, the version gets reset
- // to zero, which should require a reload.
- auto critSecSignal = csr->getCriticalSectionSignal(
- opCtx, ShardingMigrationCriticalSection::kWrite);
- if (critSecSignal) {
- collLock.reset();
- autoDb.reset();
- LOGV2(22057, "waiting till out of critical section");
-
- auto deadline = opCtx->getServiceContext()->getFastClockSource()->now() +
- std::min(opCtx->getRemainingMaxTimeMillis(), kTenSeconds);
-
- opCtx->runWithDeadline(deadline, ErrorCodes::ExceededTimeLimit, [&] {
- critSecSignal->wait(opCtx);
- });
- }
-
- // need authoritative for first look
- result.append("ns", nss.ns());
- result.appendBool("need_authoritative", true);
- errmsg = str::stream() << "first time for collection '" << nss.ns() << "'";
- return false;
- }
-
- // Fall through to metadata reload below
- }
- }
-
- // Step 6
-
- const auto status = [&] {
- try {
- // TODO (SERVER-50812) remove this if-else: just call onShardVersionMismatch
- if (requestedVersion == ChunkVersion::UNSHARDED()) {
- forceShardFilteringMetadataRefresh(opCtx, nss);
- } else {
- onShardVersionMismatch(opCtx, nss, requestedVersion);
- }
- } catch (const DBException& ex) {
- return ex.toStatus();
- }
- return Status::OK();
- }();
-
- {
- // Avoid using AutoGetCollection() as it returns the InvalidViewDefinition error code
- // if an invalid view is in the 'system.views' collection.
- AutoGetDb autoDb(opCtx, nss.db(), MODE_IS);
- Lock::CollectionLock collLock(opCtx, nss, MODE_IS);
-
- const ChunkVersion currVersion = [&] {
- auto* const csr = CollectionShardingRuntime::get(opCtx, nss);
- auto optMetadata = csr->getCurrentMetadataIfKnown();
- return (optMetadata && optMetadata->isSharded()) ? optMetadata->getShardVersion()
- : ChunkVersion::UNSHARDED();
- }();
-
- if (!status.isOK()) {
- // The reload itself was interrupted or confused here
- LOGV2_WARNING(
- 22058,
- "Could not refresh metadata for the namespace {namespace} with the requested "
- "shard version {requestedShardVersion}; the current shard version is "
- "{currentShardVersion}: {error}",
- "Could not refresh metadata",
- "namespace"_attr = nss.ns(),
- "requestedShardVersion"_attr = requestedVersion,
- "currentShardVersion"_attr = currVersion,
- "error"_attr = redact(status));
-
- result.append("ns", nss.ns());
- status.serializeErrorToBSON(&result);
- requestedVersion.appendLegacyWithField(&result, "version");
- currVersion.appendLegacyWithField(&result, "globalVersion");
- result.appendBool("reloadConfig", true);
-
- return false;
- } else if (!requestedVersion.isWriteCompatibleWith(currVersion)) {
- // We reloaded a version that doesn't match the version mongos was trying to
- // set.
- static Occasionally sampler;
- if (sampler.tick()) {
- LOGV2_WARNING(
- 22059,
- "Requested shard version differs from the authoritative (current) shard "
- "version for the namespace {namespace}; the requested version is "
- "{requestedShardVersion}, but the current version is "
- "{currentShardVersion}",
- "Requested shard version differs from the authoritative (current) shard "
- "version for this namespace",
- "namespace"_attr = nss.ns(),
- "requestedShardVersion"_attr = requestedVersion,
- "currentShardVersion"_attr = currVersion);
- }
-
- // WARNING: the exact fields below are important for compatibility with mongos
- // version reload.
-
- result.append("ns", nss.ns());
- currVersion.appendLegacyWithField(&result, "globalVersion");
-
- // If this was a reset of a collection or the last chunk moved out, inform mongos to
- // do a full reload.
- if (currVersion.epoch() != requestedVersion.epoch() || !currVersion.isSet()) {
- result.appendBool("reloadConfig", true);
- // Zero-version also needed to trigger full mongos reload, sadly
- // TODO: Make this saner, and less impactful (full reload on last chunk is bad)
- ChunkVersion::UNSHARDED().appendLegacyWithField(&result, "version");
- // For debugging
- requestedVersion.appendLegacyWithField(&result, "origVersion");
- } else {
- requestedVersion.appendLegacyWithField(&result, "version");
- }
-
- return false;
- }
- }
-
- return true;
- }
-
-} setShardVersionCmd;
-
-} // namespace
-} // namespace mongo
diff --git a/src/mongo/db/s/shard_key_index_util.cpp b/src/mongo/db/s/shard_key_index_util.cpp
index 9b3b6371a4a..1cdd4f99008 100644
--- a/src/mongo/db/s/shard_key_index_util.cpp
+++ b/src/mongo/db/s/shard_key_index_util.cpp
@@ -48,7 +48,8 @@ boost::optional<ShardKeyIndex> _findShardKeyPrefixedIndex(
const IndexCatalog* indexCatalog,
const boost::optional<std::string>& excludeName,
const BSONObj& shardKey,
- bool requireSingleKey) {
+ bool requireSingleKey,
+ std::string* errMsg = nullptr) {
if (collection->isClustered() &&
clustered_util::matchesClusterKey(shardKey, collection->getClusteredInfo())) {
auto clusteredIndexSpec = collection->getClusteredInfo()->getIndexSpec();
@@ -67,7 +68,8 @@ boost::optional<ShardKeyIndex> _findShardKeyPrefixedIndex(
continue;
}
- if (isCompatibleWithShardKey(opCtx, collection, indexEntry, shardKey, requireSingleKey)) {
+ if (isCompatibleWithShardKey(
+ opCtx, collection, indexEntry, shardKey, requireSingleKey, errMsg)) {
if (!indexEntry->isMultikey(opCtx, collection)) {
return ShardKeyIndex(indexDescriptor);
}
@@ -108,26 +110,72 @@ bool isCompatibleWithShardKey(OperationContext* opCtx,
const CollectionPtr& collection,
const IndexCatalogEntry* indexEntry,
const BSONObj& shardKey,
- bool requireSingleKey) {
+ bool requireSingleKey,
+ std::string* errMsg) {
+ // Return a descriptive error for each index that shares a prefix with shardKey but
+ // cannot be used for sharding.
+ const int kErrorPartial = 0x01;
+ const int kErrorSparse = 0x02;
+ const int kErrorMultikey = 0x04;
+ const int kErrorCollation = 0x08;
+ const int kErrorNotPrefix = 0x10;
+ int reasons = 0;
+
auto desc = indexEntry->descriptor();
bool hasSimpleCollation = desc->collation().isEmpty();
- if (desc->isPartial() || desc->isSparse()) {
- return false;
+ if (desc->isPartial()) {
+ reasons |= kErrorPartial;
+ }
+
+ if (desc->isSparse()) {
+ reasons |= kErrorSparse;
}
if (!shardKey.isPrefixOf(desc->keyPattern(), SimpleBSONElementComparator::kInstance)) {
- return false;
+ reasons |= kErrorNotPrefix;
}
- if (!indexEntry->isMultikey(opCtx, collection) && hasSimpleCollation) {
- return true;
+ if (reasons == 0) { // that is, not partial index, not sparse, and not prefix, then:
+ if (!indexEntry->isMultikey(opCtx, collection)) {
+ if (hasSimpleCollation) {
+ return true;
+ }
+ } else {
+ reasons |= kErrorMultikey;
+ }
+ if (!requireSingleKey && hasSimpleCollation) {
+ return true;
+ }
}
- if (!requireSingleKey && hasSimpleCollation) {
- return true;
+ if (!hasSimpleCollation) {
+ reasons |= kErrorCollation;
}
+ if (errMsg && reasons != 0) {
+ std::string errors = "Index " + indexEntry->descriptor()->indexName() +
+ " cannot be used for sharding because:";
+ if (reasons & kErrorPartial) {
+ errors += " Index key is partial.";
+ }
+ if (reasons & kErrorSparse) {
+ errors += " Index key is sparse.";
+ }
+ if (reasons & kErrorMultikey) {
+ errors += " Index key is multikey.";
+ }
+ if (reasons & kErrorCollation) {
+ errors += " Index has a non-simple collation.";
+ }
+ if (reasons & kErrorNotPrefix) {
+ errors += " Shard key is not a prefix of index key.";
+ }
+ if (!errMsg->empty()) {
+ *errMsg += "\n";
+ }
+ *errMsg += errors;
+ }
return false;
}
@@ -145,9 +193,10 @@ boost::optional<ShardKeyIndex> findShardKeyPrefixedIndex(OperationContext* opCtx
const CollectionPtr& collection,
const IndexCatalog* indexCatalog,
const BSONObj& shardKey,
- bool requireSingleKey) {
+ bool requireSingleKey,
+ std::string* errMsg) {
return _findShardKeyPrefixedIndex(
- opCtx, collection, indexCatalog, boost::none, shardKey, requireSingleKey);
+ opCtx, collection, indexCatalog, boost::none, shardKey, requireSingleKey, errMsg);
}
} // namespace mongo
diff --git a/src/mongo/db/s/shard_key_index_util.h b/src/mongo/db/s/shard_key_index_util.h
index 515523b0803..c474363d8ac 100644
--- a/src/mongo/db/s/shard_key_index_util.h
+++ b/src/mongo/db/s/shard_key_index_util.h
@@ -67,12 +67,16 @@ private:
/**
* Returns true if the given index is compatible with the shard key pattern.
+ *
+ * If return value is false and errMsg is non-null, the reasons that the existing index is
+ * incompatible will be appended to errMsg.
*/
bool isCompatibleWithShardKey(OperationContext* opCtx,
const CollectionPtr& collection,
const IndexCatalogEntry* indexEntry,
const BSONObj& shardKey,
- bool requireSingleKey);
+ bool requireSingleKey,
+ std::string* errMsg = nullptr);
/**
* Returns an index suitable for shard key range scans if it exists.
@@ -89,7 +93,8 @@ boost::optional<ShardKeyIndex> findShardKeyPrefixedIndex(OperationContext* opCtx
const CollectionPtr& collection,
const IndexCatalog* indexCatalog,
const BSONObj& shardKey,
- bool requireSingleKey);
+ bool requireSingleKey,
+ std::string* errMsg = nullptr);
/**
* Returns true if the given index name is the last remaining index that is compatible with the
diff --git a/src/mongo/db/s/shard_key_util.cpp b/src/mongo/db/s/shard_key_util.cpp
index 5a0acaeb2a4..a0363a907d4 100644
--- a/src/mongo/db/s/shard_key_util.cpp
+++ b/src/mongo/db/s/shard_key_util.cpp
@@ -107,7 +107,8 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
const ShardKeyPattern& shardKeyPattern,
const boost::optional<BSONObj>& defaultCollation,
bool requiresUnique,
- const ShardKeyValidationBehaviors& behaviors) {
+ const ShardKeyValidationBehaviors& behaviors,
+ std::string* errMsg) {
auto indexes = behaviors.loadIndexes(nss);
// 1. Verify consistency with existing unique indexes
@@ -124,7 +125,9 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
// 2. Check for a useful index
bool hasUsefulIndexForKey = false;
+ std::string allReasons;
for (const auto& idx : indexes) {
+ std::string reasons;
BSONObj currentKey = idx["key"].embeddedObject();
// Check 2.i. and 2.ii.
if (!idx["sparse"].trueValue() && idx["filter"].eoo() && idx["collation"].eoo() &&
@@ -143,6 +146,19 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
idx["seed"].numberInt() == BSONElementHasher::DEFAULT_HASH_SEED);
hasUsefulIndexForKey = true;
}
+ if (idx["sparse"].trueValue()) {
+ reasons += " Index key is sparse.";
+ }
+ if (idx["filter"].ok()) {
+ reasons += " Index key is partial.";
+ }
+ if (idx["collation"].ok()) {
+ reasons += " Index has a non-simple collation.";
+ }
+ if (!reasons.empty()) {
+ allReasons =
+ " Index " + idx["name"] + " cannot be used for sharding because [" + reasons + " ]";
+ }
}
// 3. If proposed key is required to be unique, additionally check for exact match.
@@ -173,6 +189,10 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
}
}
+ if (errMsg && !allReasons.empty()) {
+ *errMsg += allReasons;
+ }
+
if (hasUsefulIndexForKey) {
// Check 2.iii Make sure that there is a useful, non-multikey index available.
behaviors.verifyUsefulNonMultiKeyIndex(nss, shardKeyPattern.toBSON());
@@ -188,17 +208,19 @@ bool validateShardKeyIndexExistsOrCreateIfPossible(OperationContext* opCtx,
bool unique,
bool enforceUniquenessCheck,
const ShardKeyValidationBehaviors& behaviors) {
+ std::string errMsg;
if (validShardKeyIndexExists(opCtx,
nss,
shardKeyPattern,
defaultCollation,
unique && enforceUniquenessCheck,
- behaviors)) {
+ behaviors,
+ &errMsg)) {
return false;
}
// 4. If no useful index, verify we can create one.
- behaviors.verifyCanCreateShardKeyIndex(nss);
+ behaviors.verifyCanCreateShardKeyIndex(nss, &errMsg);
// 5. If no useful index exists and we can create one, create one on proposedKey. Only need
// to call ensureIndex on primary shard, since indexes get copied to receiving shard
@@ -271,11 +293,12 @@ void ValidationBehaviorsShardCollection::verifyUsefulNonMultiKeyIndex(
uassert(ErrorCodes::InvalidOptions, res["errmsg"].str(), success);
}
-void ValidationBehaviorsShardCollection::verifyCanCreateShardKeyIndex(
- const NamespaceString& nss) const {
+void ValidationBehaviorsShardCollection::verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+ std::string* errMsg) const {
uassert(ErrorCodes::InvalidOptions,
- "Please create an index that starts with the proposed shard key before "
- "sharding the collection",
+ str::stream() << "Please create an index that starts with the proposed shard key before"
+ " sharding the collection. "
+ << *errMsg,
_localClient->findOne(nss, BSONObj{}).isEmpty());
}
@@ -334,11 +357,13 @@ void ValidationBehaviorsRefineShardKey::verifyUsefulNonMultiKeyIndex(
uassertStatusOK(checkShardingIndexRes.commandStatus);
}
-void ValidationBehaviorsRefineShardKey::verifyCanCreateShardKeyIndex(
- const NamespaceString& nss) const {
- uasserted(ErrorCodes::InvalidOptions,
- "Please create an index that starts with the proposed shard key before "
- "refining the shard key of the collection");
+void ValidationBehaviorsRefineShardKey::verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+ std::string* errMsg) const {
+ uasserted(
+ ErrorCodes::InvalidOptions,
+ str::stream() << "Please create an index that starts with the proposed shard key before"
+ " sharding the collection. "
+ << *errMsg);
}
void ValidationBehaviorsRefineShardKey::createShardKeyIndex(
diff --git a/src/mongo/db/s/shard_key_util.h b/src/mongo/db/s/shard_key_util.h
index 5d20a013bef..55905e7beb7 100644
--- a/src/mongo/db/s/shard_key_util.h
+++ b/src/mongo/db/s/shard_key_util.h
@@ -51,7 +51,8 @@ public:
virtual void verifyUsefulNonMultiKeyIndex(const NamespaceString& nss,
const BSONObj& proposedKey) const = 0;
- virtual void verifyCanCreateShardKeyIndex(const NamespaceString& nss) const = 0;
+ virtual void verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+ std::string* errMsg) const = 0;
virtual void createShardKeyIndex(const NamespaceString& nss,
const BSONObj& proposedKey,
@@ -72,7 +73,8 @@ public:
void verifyUsefulNonMultiKeyIndex(const NamespaceString& nss,
const BSONObj& proposedKey) const override;
- void verifyCanCreateShardKeyIndex(const NamespaceString& nss) const override;
+ void verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+ std::string* errMsg) const override;
void createShardKeyIndex(const NamespaceString& nss,
const BSONObj& proposedKey,
@@ -95,7 +97,8 @@ public:
void verifyUsefulNonMultiKeyIndex(const NamespaceString& nss,
const BSONObj& proposedKey) const override;
- void verifyCanCreateShardKeyIndex(const NamespaceString& nss) const override;
+ void verifyCanCreateShardKeyIndex(const NamespaceString& nss,
+ std::string* errMsg) const override;
void createShardKeyIndex(const NamespaceString& nss,
const BSONObj& proposedKey,
@@ -165,7 +168,8 @@ bool validShardKeyIndexExists(OperationContext* opCtx,
const ShardKeyPattern& shardKeyPattern,
const boost::optional<BSONObj>& defaultCollation,
bool requiresUnique,
- const ShardKeyValidationBehaviors& behaviors);
+ const ShardKeyValidationBehaviors& behaviors,
+ std::string* errMsg = nullptr);
void validateShardKeyIsNotEncrypted(OperationContext* opCtx,
const NamespaceString& nss,
diff --git a/src/mongo/db/s/shard_metadata_util.cpp b/src/mongo/db/s/shard_metadata_util.cpp
index 1651cfc167e..e52a5e28d1a 100644
--- a/src/mongo/db/s/shard_metadata_util.cpp
+++ b/src/mongo/db/s/shard_metadata_util.cpp
@@ -27,13 +27,8 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/shard_metadata_util.h"
-#include <memory>
-
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/ops/write_ops.h"
#include "mongo/db/s/type_shard_collection.h"
@@ -49,7 +44,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace shardmetadatautil {
namespace {
@@ -105,8 +99,9 @@ Status unsetPersistedRefreshFlags(OperationContext* opCtx,
// Set 'refreshing' to false and update the last refreshed collection version.
BSONObjBuilder updateBuilder;
updateBuilder.append(ShardCollectionType::kRefreshingFieldName, false);
- updateBuilder.appendTimestamp(ShardCollectionType::kLastRefreshedCollectionVersionFieldName,
- refreshedVersion.toLong());
+ updateBuilder.appendTimestamp(
+ ShardCollectionType::kLastRefreshedCollectionMajorMinorVersionFieldName,
+ refreshedVersion.toLong());
return updateShardCollectionsEntry(opCtx,
BSON(ShardCollectionType::kNssFieldName << nss.ns()),
@@ -141,12 +136,11 @@ StatusWith<RefreshState> getPersistedRefreshFlags(OperationContext* opCtx,
entry.getRefreshing() ? *entry.getRefreshing() : true,
entry.getLastRefreshedCollectionVersion()
? *entry.getLastRefreshedCollectionVersion()
- : ChunkVersion(0, 0, entry.getEpoch(), entry.getTimestamp())};
+ : ChunkVersion({entry.getEpoch(), entry.getTimestamp()}, {0, 0})};
}
StatusWith<ShardCollectionType> readShardCollectionsEntry(OperationContext* opCtx,
const NamespaceString& nss) {
-
try {
DBDirectClient client(opCtx);
FindCommandRequest findRequest{NamespaceString::kShardConfigCollectionsNamespace};
@@ -211,7 +205,8 @@ Status updateShardCollectionsEntry(OperationContext* opCtx,
if (upsert) {
// If upserting, this should be an update from the config server that does not have shard
// refresh / migration inc signal information.
- invariant(!update.hasField(ShardCollectionType::kLastRefreshedCollectionVersionFieldName));
+ invariant(!update.hasField(
+ ShardCollectionType::kLastRefreshedCollectionMajorMinorVersionFieldName));
}
try {
diff --git a/src/mongo/db/s/shard_metadata_util.h b/src/mongo/db/s/shard_metadata_util.h
index 52f043a0b9a..a23efa4b577 100644
--- a/src/mongo/db/s/shard_metadata_util.h
+++ b/src/mongo/db/s/shard_metadata_util.h
@@ -32,7 +32,7 @@
#include <string>
#include <vector>
-#include "mongo/base/status.h"
+#include "mongo/base/status_with.h"
#include "mongo/bson/bsonobj.h"
#include "mongo/bson/oid.h"
#include "mongo/s/chunk_version.h"
@@ -40,17 +40,11 @@
namespace mongo {
class ChunkType;
-class CollectionMetadata;
class NamespaceString;
class OperationContext;
class ShardCollectionType;
class ShardDatabaseType;
-template <typename T>
-class StatusWith;
-/**
- * Function helpers to locally, using a DBDirectClient, read and write sharding metadata on a shard.
- */
namespace shardmetadatautil {
/**
@@ -62,25 +56,6 @@ struct QueryAndSort {
};
/**
- * Subset of the shard's collections collection document that relates to refresh state.
- */
-struct RefreshState {
- bool operator==(const RefreshState& other) const;
-
- std::string toString() const;
-
- // The current generation of the collection.
- CollectionGeneration generation;
-
- // Whether a refresh is currently in progress.
- bool refreshing;
-
- // The collection version after the last complete refresh. Indicates change if refreshing has
- // started and finished since last loaded.
- ChunkVersion lastRefreshedCollectionVersion;
-};
-
-/**
* Returns the query needed to find incremental changes to the chunks collection on a shard server.
*
* The query has to find all the chunks $gte the current max version. Currently, any splits, merges
@@ -115,6 +90,26 @@ Status unsetPersistedRefreshFlags(OperationContext* opCtx,
const ChunkVersion& refreshedVersion);
/**
+ * Represents a subset of a collection's config.cache.collections entry that relates to refresh
+ * state.
+ */
+struct RefreshState {
+ bool operator==(const RefreshState& other) const;
+
+ std::string toString() const;
+
+ // The current generation of the collection.
+ CollectionGeneration generation;
+
+ // Whether a refresh is currently in progress.
+ bool refreshing;
+
+ // The collection version after the last complete refresh. Indicates change if refreshing has
+ // started and finished since last loaded.
+ ChunkVersion lastRefreshedCollectionVersion;
+};
+
+/**
* Reads the persisted refresh signal for 'nss' and returns those settings.
*/
StatusWith<RefreshState> getPersistedRefreshFlags(OperationContext* opCtx,
diff --git a/src/mongo/db/s/shard_metadata_util_test.cpp b/src/mongo/db/s/shard_metadata_util_test.cpp
index 6bad5d66ac1..af35cf373e8 100644
--- a/src/mongo/db/s/shard_metadata_util_test.cpp
+++ b/src/mongo/db/s/shard_metadata_util_test.cpp
@@ -27,14 +27,10 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
-#include "mongo/db/s/shard_metadata_util.h"
-
-#include "mongo/base/status.h"
#include "mongo/client/remote_command_targeter_mock.h"
#include "mongo/db/commands.h"
#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/s/shard_metadata_util.h"
#include "mongo/db/s/shard_server_test_fixture.h"
#include "mongo/db/s/type_shard_collection.h"
#include "mongo/rpc/get_status_from_command_result.h"
@@ -159,7 +155,7 @@ struct ShardMetadataUtilTest : public ShardServerTestFixture {
}
}
- ChunkVersion maxCollVersion{0, 0, OID::gen(), Timestamp(1, 1)};
+ ChunkVersion maxCollVersion{{OID::gen(), Timestamp(1, 1)}, {0, 0}};
const KeyPattern keyPattern{BSON("a" << 1)};
const BSONObj defaultCollation{BSON("locale"
<< "fr_CA")};
@@ -216,7 +212,7 @@ TEST_F(ShardMetadataUtilTest, PersistedRefreshSignalStartAndFinish) {
ASSERT(state.generation.isSameCollection(maxCollVersion));
ASSERT_EQUALS(state.refreshing, true);
ASSERT_EQUALS(state.lastRefreshedCollectionVersion,
- ChunkVersion(0, 0, maxCollVersion.epoch(), maxCollVersion.getTimestamp()));
+ ChunkVersion({maxCollVersion.epoch(), maxCollVersion.getTimestamp()}, {0, 0}));
// Signal refresh finish
ASSERT_OK(unsetPersistedRefreshFlags(operationContext(), kNss, maxCollVersion));
@@ -235,7 +231,7 @@ TEST_F(ShardMetadataUtilTest, WriteAndReadChunks) {
// read all the chunks
QueryAndSort allChunkDiff = createShardChunkDiffQuery(
- ChunkVersion(0, 0, maxCollVersion.epoch(), maxCollVersion.getTimestamp()));
+ ChunkVersion({maxCollVersion.epoch(), maxCollVersion.getTimestamp()}, {0, 0}));
std::vector<ChunkType> readChunks = assertGet(readShardChunks(operationContext(),
kNss,
allChunkDiff.query,
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
index e344e20b5e6..93a685475d4 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
@@ -27,17 +27,8 @@
* it in the license file.
*/
-
-#define LOGV2_FOR_CATALOG_REFRESH(ID, DLEVEL, MESSAGE, ...) \
- LOGV2_DEBUG_OPTIONS( \
- ID, DLEVEL, {logv2::LogComponent::kShardingCatalogRefresh}, MESSAGE, ##__VA_ARGS__)
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/shard_server_catalog_cache_loader.h"
-#include <memory>
-
#include "mongo/db/catalog/rename_collection.h"
#include "mongo/db/client.h"
#include "mongo/db/db_raii.h"
@@ -57,7 +48,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
using namespace shardmetadatautil;
@@ -66,7 +56,6 @@ using CollectionAndChangedChunks = CatalogCacheLoader::CollectionAndChangedChunk
namespace {
-MONGO_FAIL_POINT_DEFINE(hangPersistCollectionAndChangedChunksAfterDropChunks);
MONGO_FAIL_POINT_DEFINE(hangCollectionFlush);
AtomicWord<unsigned long long> taskIdGenerator{0};
@@ -85,11 +74,6 @@ void dropChunksIfEpochChanged(OperationContext* opCtx,
// Drop the 'config.cache.chunks.<ns>' collection
dropChunks(opCtx, nss);
- if (MONGO_unlikely(hangPersistCollectionAndChangedChunksAfterDropChunks.shouldFail())) {
- LOGV2(22093, "Hit hangPersistCollectionAndChangedChunksAfterDropChunks failpoint");
- hangPersistCollectionAndChangedChunksAfterDropChunks.pauseWhileSet(opCtx);
- }
-
LOGV2(5990400,
"Dropped persisted chunk metadata due to epoch change",
"namespace"_attr = nss,
@@ -131,7 +115,6 @@ Status persistCollectionAndChangedChunks(OperationContext* opCtx,
return status;
}
- // Update the chunk metadata.
try {
dropChunksIfEpochChanged(opCtx, maxLoaderVersion, collAndChunks.epoch, nss);
} catch (const DBException& ex) {
@@ -211,13 +194,13 @@ ChunkVersion getPersistedMaxChunkVersion(OperationContext* opCtx, const Namespac
return ChunkVersion::UNSHARDED();
}
- auto statusWithChunk = shardmetadatautil::readShardChunks(opCtx,
- nss,
- BSONObj(),
- BSON(ChunkType::lastmod() << -1),
- 1LL,
- cachedCollection.getEpoch(),
- cachedCollection.getTimestamp());
+ auto statusWithChunk = readShardChunks(opCtx,
+ nss,
+ BSONObj(),
+ BSON(ChunkType::lastmod() << -1),
+ 1LL,
+ cachedCollection.getEpoch(),
+ cachedCollection.getTimestamp());
uassertStatusOKWithContext(
statusWithChunk,
str::stream() << "Failed to read highest version persisted chunk for collection '"
@@ -247,11 +230,9 @@ CollectionAndChangedChunks getPersistedMetadataSinceVersion(OperationContext* op
// If the epochs are the same we can safely take the timestamp from the shard coll entry.
ChunkVersion startingVersion = version.isSameCollection({shardCollectionEntry.getEpoch(),
shardCollectionEntry.getTimestamp()})
- ? ChunkVersion(version.majorVersion(),
- version.minorVersion(),
- version.epoch(),
- shardCollectionEntry.getTimestamp())
- : ChunkVersion(0, 0, shardCollectionEntry.getEpoch(), shardCollectionEntry.getTimestamp());
+ ? version
+ : ChunkVersion({shardCollectionEntry.getEpoch(), shardCollectionEntry.getTimestamp()},
+ {0, 0});
QueryAndSort diff = createShardChunkDiffQuery(startingVersion);
@@ -647,7 +628,14 @@ StatusWith<CollectionAndChangedChunks> ShardServerCatalogCacheLoader::_runSecond
const NamespaceString& nss,
const ChunkVersion& catalogCacheSinceVersion) {
+ Timer t;
forcePrimaryCollectionRefreshAndWaitForReplication(opCtx, nss);
+ LOGV2_FOR_CATALOG_REFRESH(5965800,
+ 2,
+ "Cache loader on secondary successfully waited for primary refresh "
+ "and replication of collection",
+ "namespace"_attr = nss,
+ "duration"_attr = Milliseconds(t.millis()));
// Read the local metadata.
@@ -776,7 +764,14 @@ ShardServerCatalogCacheLoader::_schedulePrimaryGetChunksSince(
StatusWith<DatabaseType> ShardServerCatalogCacheLoader::_runSecondaryGetDatabase(
OperationContext* opCtx, StringData dbName) {
+ Timer t;
forcePrimaryDatabaseRefreshAndWaitForReplication(opCtx, dbName);
+ LOGV2_FOR_CATALOG_REFRESH(5965801,
+ 2,
+ "Cache loader on secondary successfully waited for primary refresh "
+ "and replication of database",
+ "db"_attr = dbName,
+ "duration"_attr = Milliseconds(t.millis()));
return readShardDatabasesEntry(opCtx, dbName);
}
@@ -1280,16 +1275,7 @@ ShardServerCatalogCacheLoader::CollAndChunkTask::CollAndChunkTask(
if (statusWithCollectionAndChangedChunks.isOK()) {
collectionAndChangedChunks = std::move(statusWithCollectionAndChangedChunks.getValue());
invariant(!collectionAndChangedChunks->changedChunks.empty());
- const auto highestVersion = collectionAndChangedChunks->changedChunks.back().getVersion();
- // Note that due to the way Phase 1 of the FCV upgrade writes timestamps to chunks
- // (non-atomically), it is possible that chunks exist with timestamps, but the
- // corresponding config.collections entry doesn't. In this case, the chunks timestamp
- // should be ignored when computing the max query version and we should use the
- // timestamp that comes from config.collections.
- maxQueryVersion = ChunkVersion(highestVersion.majorVersion(),
- highestVersion.minorVersion(),
- highestVersion.epoch(),
- collectionAndChangedChunks->timestamp);
+ maxQueryVersion = collectionAndChangedChunks->changedChunks.back().getVersion();
} else {
invariant(statusWithCollectionAndChangedChunks == ErrorCodes::NamespaceNotFound);
dropped = true;
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
index 9f2f1ddf8d0..a111b9bf592 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader_test.cpp
@@ -27,8 +27,6 @@
* it in the license file.
*/
-#include "mongo/platform/basic.h"
-
#include <boost/optional/optional_io.hpp>
#include "mongo/db/s/shard_server_catalog_cache_loader.h"
@@ -203,7 +201,7 @@ CollectionType ShardServerCatalogCacheLoaderTest::makeCollectionType(
std::pair<CollectionType, vector<ChunkType>>
ShardServerCatalogCacheLoaderTest::setUpChunkLoaderWithFiveChunks() {
- ChunkVersion collectionVersion(1, 0, OID::gen(), Timestamp(1, 1));
+ ChunkVersion collectionVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
CollectionType collectionType = makeCollectionType(collectionVersion);
vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
@@ -371,7 +369,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindNewEpoch)
// Then refresh again and find that the collection has been dropped and recreated.
- ChunkVersion collVersionWithNewEpoch(1, 0, OID::gen(), Timestamp(2, 0));
+ ChunkVersion collVersionWithNewEpoch({OID::gen(), Timestamp(2, 0)}, {1, 0});
CollectionType collectionTypeWithNewEpoch = makeCollectionType(collVersionWithNewEpoch);
vector<ChunkType> chunksWithNewEpoch = makeFiveChunks(collVersionWithNewEpoch);
_remoteLoaderMock->setCollectionRefreshReturnValue(collectionTypeWithNewEpoch);
@@ -398,7 +396,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindMixedChun
// Then refresh again and retrieve chunks from the config server that have mixed epoches, like
// as if the chunks read yielded around a drop and recreate of the collection.
- ChunkVersion collVersionWithNewEpoch(1, 0, OID::gen(), Timestamp(2, 0));
+ ChunkVersion collVersionWithNewEpoch({OID::gen(), Timestamp(2, 0)}, {1, 0});
CollectionType collectionTypeWithNewEpoch = makeCollectionType(collVersionWithNewEpoch);
vector<ChunkType> chunksWithNewEpoch = makeFiveChunks(collVersionWithNewEpoch);
vector<ChunkType> mixedChunks;
@@ -441,7 +439,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, PrimaryLoadFromShardedAndFindMixedChun
}
TEST_F(ShardServerCatalogCacheLoaderTest, TimeseriesFieldsAreProperlyPropagatedOnSSCCL) {
- ChunkVersion collectionVersion(1, 0, OID::gen(), Timestamp(1, 1));
+ ChunkVersion collectionVersion({OID::gen(), Timestamp(1, 1)}, {1, 0});
CollectionType collectionType = makeCollectionType(collectionVersion);
vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
@@ -483,7 +481,7 @@ TEST_F(ShardServerCatalogCacheLoaderTest, TimeseriesFieldsAreProperlyPropagatedO
}
void ShardServerCatalogCacheLoaderTest::refreshCollectionEpochOnRemoteLoader() {
- ChunkVersion collectionVersion(1, 2, OID::gen(), Timestamp(1, 1));
+ ChunkVersion collectionVersion({OID::gen(), Timestamp(1, 1)}, {1, 2});
CollectionType collectionType = makeCollectionType(collectionVersion);
vector<ChunkType> chunks = makeFiveChunks(collectionVersion);
_remoteLoaderMock->setCollectionRefreshReturnValue(collectionType);
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
index add2ac6f728..dca4b07d7cf 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.cpp
@@ -50,8 +50,8 @@ constexpr auto kCountFailed = "countFailed";
constexpr auto kCountCanceled = "countCanceled";
constexpr auto kLastOpEndingChunkImbalance = "lastOpEndingChunkImbalance";
constexpr auto kActive = "active";
-constexpr auto kDocumentsProcessed = "documentsProcessed";
-constexpr auto kBytesWritten = "bytesWritten";
+constexpr auto kDocumentsCopied = "documentsCopied";
+constexpr auto kBytesCopied = "bytesCopied";
constexpr auto kOplogEntriesFetched = "oplogEntriesFetched";
constexpr auto kOplogEntriesApplied = "oplogEntriesApplied";
constexpr auto kInsertsApplied = "insertsApplied";
@@ -240,8 +240,8 @@ void ShardingDataTransformCumulativeMetrics::reportForServerStatus(BSONObjBuilde
void ShardingDataTransformCumulativeMetrics::reportActive(BSONObjBuilder* bob) const {
BSONObjBuilder s(bob->subobjStart(kActive));
- s.append(kDocumentsProcessed, _documentsProcessed.load());
- s.append(kBytesWritten, _bytesWritten.load());
+ s.append(kDocumentsCopied, _documentsCopied.load());
+ s.append(kBytesCopied, _bytesCopied.load());
s.append(kOplogEntriesFetched, _oplogEntriesFetched.load());
s.append(kOplogEntriesApplied, _oplogEntriesApplied.load());
s.append(kInsertsApplied, _insertsApplied.load());
@@ -422,8 +422,8 @@ const char* ShardingDataTransformCumulativeMetrics::fieldNameFor(
void ShardingDataTransformCumulativeMetrics::onInsertsDuringCloning(
int64_t count, int64_t bytes, const Milliseconds& elapsedTime) {
_collectionCloningTotalLocalBatchInserts.fetchAndAdd(1);
- _documentsProcessed.fetchAndAdd(count);
- _bytesWritten.fetchAndAdd(bytes);
+ _documentsCopied.fetchAndAdd(count);
+ _bytesCopied.fetchAndAdd(bytes);
_collectionCloningTotalLocalInsertTimeMillis.fetchAndAdd(
durationCount<Milliseconds>(elapsedTime));
}
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
index dfd8c989628..5e6949cf001 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics.h
@@ -191,8 +191,8 @@ private:
AtomicWord<int64_t> _totalBatchRetrievedDuringCloneMillis{0};
AtomicWord<int64_t> _oplogBatchApplied{0};
AtomicWord<int64_t> _oplogBatchAppliedMillis{0};
- AtomicWord<int64_t> _documentsProcessed{0};
- AtomicWord<int64_t> _bytesWritten{0};
+ AtomicWord<int64_t> _documentsCopied{0};
+ AtomicWord<int64_t> _bytesCopied{0};
AtomicWord<int64_t> _lastOpEndingChunkImbalance{0};
AtomicWord<int64_t> _readsDuringCriticalSection{0};
diff --git a/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp b/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp
index 99a221b10ba..5d6603c954c 100644
--- a/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp
+++ b/src/mongo/db/s/sharding_data_transform_cumulative_metrics_test.cpp
@@ -330,8 +330,8 @@ TEST_F(ShardingDataTransformCumulativeMetricsTest, ReportContainsInsertsDuringCl
ASSERT_EQ(latencySection.getIntField("collectionCloningTotalLocalInsertTimeMillis"), 0);
auto activeSection = getActiveSection(_cumulativeMetrics);
- ASSERT_EQ(activeSection.getIntField("documentsProcessed"), 0);
- ASSERT_EQ(activeSection.getIntField("bytesWritten"), 0);
+ ASSERT_EQ(activeSection.getIntField("documentsCopied"), 0);
+ ASSERT_EQ(activeSection.getIntField("bytesCopied"), 0);
_cumulativeMetrics.onInsertsDuringCloning(140, 20763, Milliseconds(15));
@@ -340,8 +340,8 @@ TEST_F(ShardingDataTransformCumulativeMetricsTest, ReportContainsInsertsDuringCl
ASSERT_EQ(latencySection.getIntField("collectionCloningTotalLocalInsertTimeMillis"), 15);
activeSection = getActiveSection(_cumulativeMetrics);
- ASSERT_EQ(activeSection.getIntField("documentsProcessed"), 140);
- ASSERT_EQ(activeSection.getIntField("bytesWritten"), 20763);
+ ASSERT_EQ(activeSection.getIntField("documentsCopied"), 140);
+ ASSERT_EQ(activeSection.getIntField("bytesCopied"), 20763);
}
TEST_F(ShardingDataTransformCumulativeMetricsTest, ReportContainsInsertsDuringFetching) {
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp b/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
index e74155e374b..807195c689d 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics.cpp
@@ -85,11 +85,11 @@ ShardingDataTransformInstanceMetrics::ShardingDataTransformInstanceMetrics(
_originalCommand{std::move(originalCommand)},
_sourceNs{std::move(sourceNs)},
_role{role},
+ _startTime{startTime},
_clockSource{clockSource},
_observer{std::move(observer)},
_cumulativeMetrics{cumulativeMetrics},
_deregister{_cumulativeMetrics->registerInstanceMetrics(_observer.get())},
- _startTime{startTime},
_copyingStartTime{kNoDate},
_copyingEndTime{kNoDate},
_approxDocumentsToCopy{0},
@@ -118,7 +118,8 @@ ShardingDataTransformInstanceMetrics::~ShardingDataTransformInstanceMetrics() {
Milliseconds ShardingDataTransformInstanceMetrics::getHighEstimateRemainingTimeMillis() const {
switch (_role) {
case Role::kRecipient: {
- auto estimate = estimateRemainingRecipientTime(_applyingStartTime.load() != kNoDate,
+ auto estimate =
+ resharding::estimateRemainingRecipientTime(_applyingStartTime.load() != kNoDate,
_bytesCopied.load(),
_approxBytesToCopy.load(),
getCopyingElapsedTimeSecs(),
diff --git a/src/mongo/db/s/sharding_data_transform_instance_metrics.h b/src/mongo/db/s/sharding_data_transform_instance_metrics.h
index 6c508bbafd8..dbf81eabffb 100644
--- a/src/mongo/db/s/sharding_data_transform_instance_metrics.h
+++ b/src/mongo/db/s/sharding_data_transform_instance_metrics.h
@@ -164,13 +164,13 @@ protected:
"allShardsHighestRemainingOperationTimeEstimatedSecs";
private:
+ const Date_t _startTime;
+
ClockSource* _clockSource;
ObserverPtr _observer;
ShardingDataTransformCumulativeMetrics* _cumulativeMetrics;
ShardingDataTransformCumulativeMetrics::DeregistrationFunction _deregister;
- const Date_t _startTime;
-
AtomicWord<Date_t> _copyingStartTime;
AtomicWord<Date_t> _copyingEndTime;
AtomicWord<int32_t> _approxDocumentsToCopy;
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.h b/src/mongo/db/s/sharding_ddl_coordinator.h
index 5972c7ce9e6..51dcc023f60 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.h
+++ b/src/mongo/db/s/sharding_ddl_coordinator.h
@@ -40,8 +40,11 @@
#include "mongo/db/s/sharding_ddl_coordinator_gen.h"
#include "mongo/db/s/sharding_ddl_coordinator_service.h"
#include "mongo/executor/task_executor.h"
+#include "mongo/logv2/log.h"
#include "mongo/util/future.h"
+#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
+
namespace mongo {
ShardingDDLCoordinatorMetadata extractShardingDDLCoordinatorMetadata(const BSONObj& coorDoc);
@@ -117,74 +120,6 @@ protected:
virtual ShardingDDLCoordinatorMetadata const& metadata() const = 0;
- template <typename StateDoc>
- StateDoc _insertStateDocument(StateDoc&& newDoc) {
- auto copyMetadata = newDoc.getShardingDDLCoordinatorMetadata();
- copyMetadata.setRecoveredFromDisk(true);
- newDoc.setShardingDDLCoordinatorMetadata(copyMetadata);
-
- auto opCtx = cc().makeOperationContext();
- PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
- try {
- store.add(opCtx.get(), newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout);
- } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
- // A series of step-up and step-down events can cause a node to try and insert the
- // document when it has already been persisted locally, but we must still wait for
- // majority commit.
- const auto replCoord = repl::ReplicationCoordinator::get(opCtx.get());
- const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime();
- WaitForMajorityService::get(opCtx->getServiceContext())
- .waitUntilMajority(lastLocalOpTime, opCtx.get()->getCancellationToken())
- .get(opCtx.get());
- }
-
- return std::move(newDoc);
- }
-
- template <typename StateDoc>
- StateDoc _updateStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
- PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
- invariant(newDoc.getShardingDDLCoordinatorMetadata().getRecoveredFromDisk());
- store.update(opCtx,
- BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()),
- newDoc.toBSON(),
- WriteConcerns::kMajorityWriteConcernNoTimeout);
- return std::move(newDoc);
- }
-
- // lazily acqiure Logical Session ID and a txn number
- template <typename StateDoc>
- StateDoc _updateSession(OperationContext* opCtx, StateDoc const& doc) {
- auto newShardingDDLCoordinatorMetadata = doc.getShardingDDLCoordinatorMetadata();
-
- auto optSession = newShardingDDLCoordinatorMetadata.getSession();
- if (optSession) {
- auto txnNumber = optSession->getTxnNumber();
- optSession->setTxnNumber(++txnNumber);
- newShardingDDLCoordinatorMetadata.setSession(optSession);
- } else {
- auto session = InternalSessionPool::get(opCtx)->acquireSystemSession();
- newShardingDDLCoordinatorMetadata.setSession(
- ShardingDDLSession(session.getSessionId(), session.getTxnNumber()));
- }
-
- StateDoc newDoc(doc);
- newDoc.setShardingDDLCoordinatorMetadata(std::move(newShardingDDLCoordinatorMetadata));
- return _updateStateDocument(opCtx, std::move(newDoc));
- }
-
- template <typename StateDoc>
- OperationSessionInfo getCurrentSession(StateDoc const& doc) const {
- invariant(doc.getShardingDDLCoordinatorMetadata().getSession());
- ShardingDDLSession shardingDDLSession =
- *doc.getShardingDDLCoordinatorMetadata().getSession();
-
- OperationSessionInfo osi;
- osi.setSessionId(shardingDDLSession.getLsid());
- osi.setTxnNumber(shardingDDLSession.getTxnNumber());
- return osi;
- }
-
/*
* Performs a noop write on all shards and the configsvr using the sessionId and txnNumber
* specified in 'osi'.
@@ -237,4 +172,204 @@ private:
std::stack<DistLockManager::ScopedLock> _scopedLocks;
};
+template <class StateDoc>
+class ShardingDDLCoordinatorImpl : public ShardingDDLCoordinator {
+public:
+ boost::optional<BSONObj> reportForCurrentOp(
+ MongoProcessInterface::CurrentOpConnectionsMode connMode,
+ MongoProcessInterface::CurrentOpSessionsMode sessionMode) noexcept override {
+ return basicReportBuilder().obj();
+ }
+
+protected:
+ ShardingDDLCoordinatorImpl(ShardingDDLCoordinatorService* service,
+ const std::string& name,
+ const BSONObj& initialStateDoc)
+ : ShardingDDLCoordinator(service, initialStateDoc),
+ _coordinatorName(name),
+ _initialState(initialStateDoc.getOwned()),
+ _doc(StateDoc::parse(IDLParserErrorContext("CoordinatorDocument"), _initialState)) {}
+
+ ShardingDDLCoordinatorMetadata const& metadata() const override {
+ return _doc.getShardingDDLCoordinatorMetadata();
+ }
+
+
+ virtual void appendCommandInfo(BSONObjBuilder* cmdInfoBuilder) const {};
+
+ virtual BSONObjBuilder basicReportBuilder() const noexcept {
+ BSONObjBuilder bob;
+
+ // Append static info
+ bob.append("type", "op");
+ bob.append("ns", nss().toString());
+ bob.append("desc", _coordinatorName);
+ bob.append("op", "command");
+ bob.append("active", true);
+
+ // Create command description
+ BSONObjBuilder cmdInfoBuilder;
+ {
+ stdx::lock_guard lk{_docMutex};
+ if (const auto& optComment = getForwardableOpMetadata().getComment()) {
+ cmdInfoBuilder.append(optComment.get().firstElement());
+ }
+ }
+ appendCommandInfo(&cmdInfoBuilder);
+ bob.append("command", cmdInfoBuilder.obj());
+
+ return bob;
+ }
+
+ const std::string _coordinatorName;
+ const BSONObj _initialState;
+ mutable Mutex _docMutex = MONGO_MAKE_LATCH("ShardingDDLCoordinator::_docMutex");
+ StateDoc _doc;
+};
+
+template <class StateDoc, class Phase>
+class RecoverableShardingDDLCoordinator : public ShardingDDLCoordinatorImpl<StateDoc> {
+protected:
+ using ShardingDDLCoordinatorImpl<StateDoc>::_doc;
+ using ShardingDDLCoordinatorImpl<StateDoc>::_docMutex;
+
+ RecoverableShardingDDLCoordinator(ShardingDDLCoordinatorService* service,
+ const std::string& name,
+ const BSONObj& initialStateDoc)
+ : ShardingDDLCoordinatorImpl<StateDoc>(service, name, initialStateDoc) {}
+
+ virtual StringData serializePhase(const Phase& phase) const = 0;
+
+ template <typename Func>
+ auto _executePhase(const Phase& newPhase, Func&& func) {
+ return [=] {
+ const auto& currPhase = _doc.getPhase();
+
+ if (currPhase > newPhase) {
+ // Do not execute this phase if we already reached a subsequent one.
+ return;
+ }
+ if (currPhase < newPhase) {
+ // Persist the new phase if this is the first time we are executing it.
+ _enterPhase(newPhase);
+ }
+ return func();
+ };
+ }
+
+ void _enterPhase(const Phase& newPhase) {
+ auto newDoc = [&] {
+ stdx::lock_guard lk{_docMutex};
+ return _doc;
+ }();
+
+ newDoc.setPhase(newPhase);
+
+ LOGV2_DEBUG(5390501,
+ 2,
+ "DDL coordinator phase transition",
+ "coordinatorId"_attr = _doc.getId(),
+ "newPhase"_attr = serializePhase(newDoc.getPhase()),
+ "oldPhase"_attr = serializePhase(_doc.getPhase()));
+
+ auto opCtx = cc().makeOperationContext();
+
+ if (_doc.getPhase() == Phase::kUnset) {
+ _insertStateDocument(opCtx.get(), std::move(newDoc));
+ } else {
+ _updateStateDocument(opCtx.get(), std::move(newDoc));
+ }
+ }
+
+ BSONObjBuilder basicReportBuilder() const noexcept override {
+ auto baseReportBuilder = ShardingDDLCoordinatorImpl<StateDoc>::basicReportBuilder();
+
+ const auto currPhase = [&]() {
+ stdx::lock_guard l{_docMutex};
+ return _doc.getPhase();
+ }();
+
+ baseReportBuilder.append("currentPhase", serializePhase(currPhase));
+ return baseReportBuilder;
+ }
+
+ void _insertStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
+ auto copyMetadata = newDoc.getShardingDDLCoordinatorMetadata();
+ copyMetadata.setRecoveredFromDisk(true);
+ newDoc.setShardingDDLCoordinatorMetadata(copyMetadata);
+
+ PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+ try {
+ store.add(opCtx, newDoc, WriteConcerns::kMajorityWriteConcernNoTimeout);
+ } catch (const ExceptionFor<ErrorCodes::DuplicateKey>&) {
+ // A series of step-up and step-down events can cause a node to try and insert the
+ // document when it has already been persisted locally, but we must still wait for
+ // majority commit.
+ const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ const auto lastLocalOpTime = replCoord->getMyLastAppliedOpTime();
+ WaitForMajorityService::get(opCtx->getServiceContext())
+ .waitUntilMajority(lastLocalOpTime, opCtx->getCancellationToken())
+ .get(opCtx);
+ }
+
+ {
+ stdx::lock_guard lk{_docMutex};
+ _doc = std::move(newDoc);
+ }
+ }
+
+ void _updateStateDocument(OperationContext* opCtx, StateDoc&& newDoc) {
+ PersistentTaskStore<StateDoc> store(NamespaceString::kShardingDDLCoordinatorsNamespace);
+ invariant(newDoc.getShardingDDLCoordinatorMetadata().getRecoveredFromDisk());
+ store.update(opCtx,
+ BSON(StateDoc::kIdFieldName << newDoc.getId().toBSON()),
+ newDoc.toBSON(),
+ WriteConcerns::kMajorityWriteConcernNoTimeout);
+
+ {
+ stdx::lock_guard lk{_docMutex};
+ _doc = std::move(newDoc);
+ }
+ }
+
+ // lazily acqiure Logical Session ID and a txn number
+ void _updateSession(OperationContext* opCtx) {
+ auto newDoc = [&] {
+ stdx::lock_guard lk{_docMutex};
+ return _doc;
+ }();
+ auto newShardingDDLCoordinatorMetadata = newDoc.getShardingDDLCoordinatorMetadata();
+
+ auto optSession = newShardingDDLCoordinatorMetadata.getSession();
+ if (optSession) {
+ auto txnNumber = optSession->getTxnNumber();
+ optSession->setTxnNumber(++txnNumber);
+ newShardingDDLCoordinatorMetadata.setSession(optSession);
+ } else {
+ auto session = InternalSessionPool::get(opCtx)->acquireSystemSession();
+ newShardingDDLCoordinatorMetadata.setSession(
+ ShardingDDLSession(session.getSessionId(), session.getTxnNumber()));
+ }
+
+ newDoc.setShardingDDLCoordinatorMetadata(std::move(newShardingDDLCoordinatorMetadata));
+ _updateStateDocument(opCtx, std::move(newDoc));
+ }
+
+ OperationSessionInfo getCurrentSession() const {
+ auto optSession = [&] {
+ stdx::lock_guard lk{_docMutex};
+ return _doc.getShardingDDLCoordinatorMetadata().getSession();
+ }();
+
+ invariant(optSession);
+
+ OperationSessionInfo osi;
+ osi.setSessionId(optSession->getLsid());
+ osi.setTxnNumber(optSession->getTxnNumber());
+ return osi;
+ }
+};
+
+#undef MONGO_LOGV2_DEFAULT_COMPONENT
+
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.idl b/src/mongo/db/s/sharding_ddl_coordinator.idl
index ce42c66a6e4..3a6b35e3eb4 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.idl
+++ b/src/mongo/db/s/sharding_ddl_coordinator.idl
@@ -47,13 +47,9 @@ enums:
kDropDatabase: "dropDatabase"
kDropCollection: "dropCollection"
kRenameCollection: "renameCollection"
- # TODO SERVER-64720 remove once 6.0 becomes last LTS
- kCreateCollectionPre60Compatible: "createCollection"
kCreateCollection: "createCollection_V2"
kRefineCollectionShardKey: "refineCollectionShardKey"
kSetAllowMigrations: "setAllowMigrations"
- # TODO (SERVER-62325): Remove pre60 compatible collMod coordinator after 6.0 branching.
- kCollModPre60Compatible: "collMod"
kCollMod: "collMod_V2"
kReshardCollection: "reshardCollection"
kReshardCollectionNoResilient: "reshardCollectionNoResilient"
diff --git a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
index 4073c70fc58..f4494ace7eb 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
@@ -38,7 +38,6 @@
#include "mongo/db/pipeline/document_source_count.h"
#include "mongo/db/pipeline/expression_context.h"
#include "mongo/db/s/collmod_coordinator.h"
-#include "mongo/db/s/collmod_coordinator_pre60_compatible.h"
#include "mongo/db/s/compact_structured_encryption_data_coordinator.h"
#include "mongo/db/s/create_collection_coordinator.h"
#include "mongo/db/s/database_sharding_state.h"
@@ -76,10 +75,6 @@ std::shared_ptr<ShardingDDLCoordinator> constructShardingDDLCoordinatorInstance(
break;
case DDLCoordinatorTypeEnum::kRenameCollection:
return std::make_shared<RenameCollectionCoordinator>(service, std::move(initialState));
- case DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible:
- return std::make_shared<CreateCollectionCoordinatorPre60Compatible>(
- service, std::move(initialState));
- break;
case DDLCoordinatorTypeEnum::kCreateCollection:
return std::make_shared<CreateCollectionCoordinator>(service, std::move(initialState));
break;
@@ -94,10 +89,6 @@ std::shared_ptr<ShardingDDLCoordinator> constructShardingDDLCoordinatorInstance(
case DDLCoordinatorTypeEnum::kCollMod:
return std::make_shared<CollModCoordinator>(service, std::move(initialState));
break;
- case DDLCoordinatorTypeEnum::kCollModPre60Compatible:
- return std::make_shared<CollModCoordinatorPre60Compatible>(service,
- std::move(initialState));
- break;
case DDLCoordinatorTypeEnum::kReshardCollection:
return std::make_shared<ReshardCollectionCoordinator>(service, std::move(initialState));
break;
diff --git a/src/mongo/db/s/sharding_ddl_util.cpp b/src/mongo/db/s/sharding_ddl_util.cpp
index 8b5c2113adf..89eb4107f60 100644
--- a/src/mongo/db/s/sharding_ddl_util.cpp
+++ b/src/mongo/db/s/sharding_ddl_util.cpp
@@ -340,14 +340,7 @@ void shardedRenameMetadata(OperationContext* opCtx,
auto now = VectorClock::get(opCtx)->getTime();
auto newTimestamp = now.clusterTime().asTimestamp();
fromCollType.setTimestamp(newTimestamp);
- {
- // Only bump the epoch if the whole cluster is in FCV 5.0, so chunks do not contain epochs.
- FixedFCVRegion fixedFCVRegion(opCtx);
- if (serverGlobalParams.featureCompatibility.isGreaterThanOrEqualTo(
- multiversion::FeatureCompatibilityVersion::kFullyDowngradedTo_5_0)) {
- fromCollType.setEpoch(OID::gen());
- }
- }
+ fromCollType.setEpoch(OID::gen());
// Insert the TO collection entry
uassertStatusOK(catalogClient->insertConfigDocument(
@@ -506,16 +499,8 @@ void sendDropCollectionParticipantCommandToShards(OperationContext* opCtx,
const auto cmdObj =
CommandHelpers::appendMajorityWriteConcern(dropCollectionParticipant.toBSON({}));
- try {
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx, nss.db(), cmdObj.addFields(osi.toBSON()), shardIds, executor);
- } catch (const ExceptionFor<ErrorCodes::NotARetryableWriteCommand>&) {
- // Older 5.0 binaries don't support running the _shardsvrDropCollectionParticipant
- // command as a retryable write yet. In that case, retry without attaching session
- // info.
- sharding_ddl_util::sendAuthenticatedCommandToShards(
- opCtx, nss.db(), cmdObj, shardIds, executor);
- }
+ sharding_ddl_util::sendAuthenticatedCommandToShards(
+ opCtx, nss.db(), cmdObj.addFields(osi.toBSON()), shardIds, executor);
}
} // namespace sharding_ddl_util
diff --git a/src/mongo/db/s/sharding_ddl_util_test.cpp b/src/mongo/db/s/sharding_ddl_util_test.cpp
index fd4e3905980..2ff3925c53e 100644
--- a/src/mongo/db/s/sharding_ddl_util_test.cpp
+++ b/src/mongo/db/s/sharding_ddl_util_test.cpp
@@ -27,9 +27,6 @@
* it in the license file.
*/
-
-#include "mongo/platform/basic.h"
-
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/logical_session_cache_noop.h"
#include "mongo/db/namespace_string.h"
@@ -47,7 +44,6 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
-
namespace mongo {
namespace {
@@ -119,7 +115,7 @@ TEST_F(ShardingDDLUtilTest, ShardedRenameMetadata) {
const int nChunks = 10;
std::vector<ChunkType> chunks;
for (int i = 0; i < nChunks; i++) {
- ChunkVersion chunkVersion(1, i, fromEpoch, collTimestamp);
+ ChunkVersion chunkVersion({fromEpoch, collTimestamp}, {1, uint32_t(i)});
ChunkType chunk;
chunk.setName(OID::gen());
chunk.setCollectionUUID(collUUID);
@@ -138,7 +134,7 @@ TEST_F(ShardingDDLUtilTest, ShardedRenameMetadata) {
const auto toEpoch = OID::gen();
const auto toUUID = UUID::gen();
for (int i = 0; i < nChunks; i++) {
- ChunkVersion chunkVersion(1, i, toEpoch, Timestamp(2));
+ ChunkVersion chunkVersion({toEpoch, Timestamp(2)}, {1, uint32_t(i)});
ChunkType chunk;
chunk.setName(OID::gen());
chunk.setCollectionUUID(toUUID);
@@ -215,7 +211,7 @@ TEST_F(ShardingDDLUtilTest, RenamePreconditionsAreMet) {
opCtx, false /* sourceIsSharded */, kToNss, false /* dropTarget */);
// Initialize a chunk
- ChunkVersion chunkVersion(1, 1, OID::gen(), Timestamp(2, 1));
+ ChunkVersion chunkVersion({OID::gen(), Timestamp(2, 1)}, {1, 1});
ChunkType chunk;
chunk.setName(OID::gen());
chunk.setCollectionUUID(UUID::gen());
@@ -256,7 +252,7 @@ TEST_F(ShardingDDLUtilTest, RenamePreconditionsTargetCollectionExists) {
auto opCtx = operationContext();
// Initialize a chunk
- ChunkVersion chunkVersion(1, 1, OID::gen(), Timestamp(2, 1));
+ ChunkVersion chunkVersion({OID::gen(), Timestamp(2, 1)}, {1, 1});
ChunkType chunk;
chunk.setName(OID::gen());
chunk.setCollectionUUID(UUID::gen());
diff --git a/src/mongo/db/s/sharding_mongod_test_fixture.cpp b/src/mongo/db/s/sharding_mongod_test_fixture.cpp
index a05fddaa213..c7b078c89e0 100644
--- a/src/mongo/db/s/sharding_mongod_test_fixture.cpp
+++ b/src/mongo/db/s/sharding_mongod_test_fixture.cpp
@@ -71,7 +71,6 @@
#include "mongo/s/client/shard_remote.h"
#include "mongo/s/grid.h"
#include "mongo/s/query/cluster_cursor_manager.h"
-#include "mongo/s/request_types/set_shard_version_request.h"
#include "mongo/util/clock_source_mock.h"
#include "mongo/util/tick_source_mock.h"
diff --git a/src/mongo/db/s/sharding_server_status.cpp b/src/mongo/db/s/sharding_server_status.cpp
index 8d560454382..82de4cfc5c9 100644
--- a/src/mongo/db/s/sharding_server_status.cpp
+++ b/src/mongo/db/s/sharding_server_status.cpp
@@ -73,14 +73,20 @@ public:
result.append("configsvrConnectionString",
shardRegistry->getConfigServerConnectionString().toString());
+ const auto vcTime = VectorClock::get(opCtx)->getTime();
+
const auto configOpTime = [&]() {
- const auto vcTime = VectorClock::get(opCtx)->getTime();
const auto vcConfigTimeTs = vcTime.configTime().asTimestamp();
return mongo::repl::OpTime(vcConfigTimeTs, mongo::repl::OpTime::kUninitializedTerm);
}();
-
configOpTime.append(&result, "lastSeenConfigServerOpTime");
+ const auto topologyOpTime = [&]() {
+ const auto vcTopologyTimeTs = vcTime.topologyTime().asTimestamp();
+ return mongo::repl::OpTime(vcTopologyTimeTs, mongo::repl::OpTime::kUninitializedTerm);
+ }();
+ topologyOpTime.append(&result, "lastSeenTopologyOpTime");
+
const long long maxChunkSizeInBytes =
grid->getBalancerConfiguration()->getMaxChunkSizeBytes();
result.append("maxChunkSizeInBytes", maxChunkSizeInBytes);
diff --git a/src/mongo/db/s/sharding_util.cpp b/src/mongo/db/s/sharding_util.cpp
index fde594f35cb..c082038d714 100644
--- a/src/mongo/db/s/sharding_util.cpp
+++ b/src/mongo/db/s/sharding_util.cpp
@@ -28,18 +28,12 @@
*/
-#include "mongo/platform/basic.h"
-
#include "mongo/db/s/sharding_util.h"
#include <fmt/format.h>
#include "mongo/db/commands.h"
-#include "mongo/db/dbdirectclient.h"
-#include "mongo/db/repl/repl_client_info.h"
-#include "mongo/db/s/type_shard_collection.h"
#include "mongo/logv2/log.h"
-#include "mongo/s/catalog/type_collection.h"
#include "mongo/s/request_types/flush_routing_table_cache_updates_gen.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
@@ -113,45 +107,5 @@ std::vector<AsyncRequestsSender::Response> sendCommandToShards(
return responses;
}
-void downgradeCollectionBalancingFieldsToPre53(OperationContext* opCtx) {
- const NamespaceString collNss = [&]() {
- if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
- return NamespaceString::kShardConfigCollectionsNamespace;
- } else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- return CollectionType::ConfigNS;
- }
- MONGO_UNREACHABLE;
- }();
-
- write_ops::UpdateCommandRequest updateOp(collNss);
- updateOp.setUpdates({[&] {
- write_ops::UpdateOpEntry entry;
- BSONObjBuilder updateCmd;
- BSONObjBuilder unsetBuilder(updateCmd.subobjStart("$unset"));
- unsetBuilder.append(CollectionType::kMaxChunkSizeBytesFieldName, 0);
- if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- unsetBuilder.append(CollectionType::kNoAutoSplitFieldName, 0);
- } else {
- unsetBuilder.append(ShardCollectionTypeBase::kAllowAutoSplitFieldName, 0);
- }
- unsetBuilder.doneFast();
- entry.setQ({});
- const BSONObj update = updateCmd.obj();
- entry.setU(write_ops::UpdateModification::parseFromClassicUpdate(update));
- entry.setUpsert(false);
- entry.setMulti(true);
- return entry;
- }()});
-
- DBDirectClient client(opCtx);
- client.update(updateOp);
-
- const WriteConcernOptions majorityWC{
- WriteConcernOptions::kMajority, WriteConcernOptions::SyncMode::UNSET, Seconds(0)};
- WriteConcernResult ignoreResult;
- auto latestOpTime = repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
- uassertStatusOK(waitForWriteConcern(opCtx, latestOpTime, majorityWC, &ignoreResult));
-}
-
} // namespace sharding_util
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_util.h b/src/mongo/db/s/sharding_util.h
index c5021b4d46f..783c6703138 100644
--- a/src/mongo/db/s/sharding_util.h
+++ b/src/mongo/db/s/sharding_util.h
@@ -61,14 +61,5 @@ std::vector<AsyncRequestsSender::Response> sendCommandToShards(
const std::shared_ptr<executor::TaskExecutor>& executor,
bool throwOnError = true);
-/**
- * Unset the `noAutosplit` and `maxChunkSizeBytes` fields from:
- * - `config.collections` on the CSRS
- * - `config.cache.collections` on shards
- *
- * TODO SERVER-62693 remove this method and all its usages once 6.0 branches out
- */
-void downgradeCollectionBalancingFieldsToPre53(OperationContext* opCtx);
-
} // namespace sharding_util
} // namespace mongo
diff --git a/src/mongo/db/s/sharding_write_router_bm.cpp b/src/mongo/db/s/sharding_write_router_bm.cpp
index 7a47c6eed21..6d20ad82215 100644
--- a/src/mongo/db/s/sharding_write_router_bm.cpp
+++ b/src/mongo/db/s/sharding_write_router_bm.cpp
@@ -103,7 +103,7 @@ std::pair<std::vector<mongo::ChunkType>, mongo::ChunkManager> createChunks(
for (uint32_t i = 0; i < nChunks; ++i) {
chunks.emplace_back(collIdentifier,
getRangeForChunk(i, nChunks),
- ChunkVersion{i + 1, 0, collEpoch, collTimestamp},
+ ChunkVersion({collEpoch, collTimestamp}, {i + 1, 0}),
pessimalShardSelector(i, nShards, nChunks));
}
diff --git a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
index 4e95395faaa..f0918cc5766 100644
--- a/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_abort_reshard_collection_command.cpp
@@ -99,7 +99,7 @@ public:
// If abort actually went through, the resharding documents should be cleaned up.
// If they still exists, it could be because that it was interrupted or it is no
// longer primary.
- doNoopWrite(opCtx, "_shardsvrAbortReshardCollection no-op", ns());
+ resharding::doNoopWrite(opCtx, "_shardsvrAbortReshardCollection no-op", ns());
PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore(
NamespaceString::kDonorReshardingOperationsNamespace);
uassert(5563802,
diff --git a/src/mongo/db/s/shardsvr_collmod_command.cpp b/src/mongo/db/s/shardsvr_collmod_command.cpp
index f0564913aa1..3df3e521579 100644
--- a/src/mongo/db/s/shardsvr_collmod_command.cpp
+++ b/src/mongo/db/s/shardsvr_collmod_command.cpp
@@ -33,19 +33,12 @@
#include "mongo/db/coll_mod_gen.h"
#include "mongo/db/coll_mod_reply_validation.h"
#include "mongo/db/commands.h"
-#include "mongo/db/commands/feature_compatibility_version.h"
#include "mongo/db/curop.h"
#include "mongo/db/s/collmod_coordinator.h"
-#include "mongo/db/s/collmod_coordinator_pre60_compatible.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/s/sharding_util.h"
#include "mongo/db/timeseries/catalog_helper.h"
-#include "mongo/db/timeseries/timeseries_commands_conversion_helper.h"
#include "mongo/logv2/log.h"
-#include "mongo/s/chunk_manager_targeter.h"
-#include "mongo/s/cluster_commands_helpers.h"
-#include "mongo/s/grid.h"
-#include "mongo/util/fail_point.h"
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kSharding
@@ -53,9 +46,6 @@
namespace mongo {
namespace {
-MONGO_FAIL_POINT_DEFINE(collModPrimaryDispatching);
-MONGO_FAIL_POINT_DEFINE(collModCoordinatorPre60Compatible);
-
class ShardsvrCollModCommand final : public BasicCommandWithRequestParser<ShardsvrCollModCommand> {
public:
using Request = ShardsvrCollMod;
@@ -112,29 +102,6 @@ public:
CurOp::get(opCtx)->raiseDbProfileLevel(
CollectionCatalog::get(opCtx)->getDatabaseProfileLevel(cmd.getNamespace().dbName()));
- boost::optional<FixedFCVRegion> fcvRegion;
- fcvRegion.emplace(opCtx);
-
- if (MONGO_unlikely(collModPrimaryDispatching.shouldFail())) {
- return runWithDispatchingCommands(opCtx, result, cmd);
- } else if (MONGO_unlikely(collModCoordinatorPre60Compatible.shouldFail())) {
- return runWithDDLCoordinatorPre60Compatible(opCtx, result, cmd, fcvRegion);
- }
-
- if (fcvRegion.get()->isLessThan(multiversion::FeatureCompatibilityVersion::kVersion_5_3)) {
- return runWithDispatchingCommands(opCtx, result, cmd);
- } else if (fcvRegion.get()->isLessThan(
- multiversion::FeatureCompatibilityVersion::kVersion_6_0)) {
- return runWithDDLCoordinatorPre60Compatible(opCtx, result, cmd, fcvRegion);
- } else {
- return runWithDDLCoordinator(opCtx, result, cmd, fcvRegion);
- }
- }
-
- bool runWithDDLCoordinator(OperationContext* opCtx,
- BSONObjBuilder& result,
- const ShardsvrCollMod& cmd,
- boost::optional<FixedFCVRegion>& fcvRegion) {
auto coordinatorDoc = CollModCoordinatorDocument();
coordinatorDoc.setCollModRequest(cmd.getCollModRequest());
coordinatorDoc.setShardingDDLCoordinatorMetadata(
@@ -142,73 +109,10 @@ public:
auto service = ShardingDDLCoordinatorService::getService(opCtx);
auto collModCoordinator = checked_pointer_cast<CollModCoordinator>(
service->getOrCreateInstance(opCtx, coordinatorDoc.toBSON()));
- fcvRegion = boost::none;
- result.appendElements(collModCoordinator->getResult(opCtx));
- return true;
- }
-
- bool runWithDDLCoordinatorPre60Compatible(OperationContext* opCtx,
- BSONObjBuilder& result,
- const ShardsvrCollMod& cmd,
- boost::optional<FixedFCVRegion>& fcvRegion) {
- auto coordinatorDoc = CollModCoordinatorDocument();
- coordinatorDoc.setCollModRequest(cmd.getCollModRequest());
- coordinatorDoc.setShardingDDLCoordinatorMetadata(
- {{cmd.getNamespace(), DDLCoordinatorTypeEnum::kCollModPre60Compatible}});
- auto service = ShardingDDLCoordinatorService::getService(opCtx);
- auto collModCoordinator = checked_pointer_cast<CollModCoordinatorPre60Compatible>(
- service->getOrCreateInstance(opCtx, coordinatorDoc.toBSON()));
- fcvRegion = boost::none;
result.appendElements(collModCoordinator->getResult(opCtx));
return true;
}
- bool runWithDispatchingCommands(OperationContext* opCtx,
- BSONObjBuilder& result,
- const ShardsvrCollMod& cmd) {
- const auto& nss = cmd.getNamespace();
- auto collModCmd = CollMod(nss);
- collModCmd.setCollModRequest(cmd.getCollModRequest());
- auto collModCmdObj = collModCmd.toBSON({});
-
- const auto targeter = ChunkManagerTargeter(opCtx, nss);
- const auto& routingInfo = targeter.getRoutingInfo();
- if (targeter.timeseriesNamespaceNeedsRewrite(nss)) {
- collModCmdObj =
- timeseries::makeTimeseriesCommand(collModCmdObj,
- nss,
- CollMod::kCommandName,
- CollMod::kIsTimeseriesNamespaceFieldName);
- }
-
- std::set<ShardId> participants;
- if (routingInfo.isSharded()) {
- std::unique_ptr<CollatorInterface> collator;
- const auto expCtx =
- make_intrusive<ExpressionContext>(opCtx, std::move(collator), targeter.getNS());
- routingInfo.getShardIdsForQuery(
- expCtx, {} /* query */, {} /* collation */, &participants);
- } else {
- participants.insert(routingInfo.dbPrimary());
- }
-
- auto executor = Grid::get(opCtx)->getExecutorPool()->getFixedExecutor();
- const auto& responses = sharding_util::sendCommandToShards(
- opCtx,
- targeter.getNS().db(),
- CommandHelpers::appendMajorityWriteConcern(collModCmdObj, opCtx->getWriteConcern()),
- {std::make_move_iterator(participants.begin()),
- std::make_move_iterator(participants.end())},
- executor);
-
- std::string errmsg;
- auto ok = appendRawResponses(opCtx, &errmsg, &result, std::move(responses)).responseOK;
- if (!errmsg.empty()) {
- CommandHelpers::appendSimpleCommandStatus(result, ok, errmsg);
- }
- return ok;
- }
-
void validateResult(const BSONObj& resultObj) final {
StringDataSet ignorableFields({"raw", "ok", "errmsg"});
auto reply = Response::parse(IDLParserErrorContext("CollModReply"),
diff --git a/src/mongo/db/s/shardsvr_collmod_participant_command.cpp b/src/mongo/db/s/shardsvr_collmod_participant_command.cpp
index b321236caf1..2a7e78886b2 100644
--- a/src/mongo/db/s/shardsvr_collmod_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_collmod_participant_command.cpp
@@ -69,6 +69,10 @@ public:
return Command::AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
index f4240c1eb0a..3d9be030fcb 100644
--- a/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_commit_reshard_collection_command.cpp
@@ -107,7 +107,7 @@ public:
// If commit actually went through, the resharding documents will be cleaned up. If
// documents still exist, it could be because that commit was interrupted or that the
// underlying replica set node is no longer primary.
- doNoopWrite(opCtx, "_shardsvrCommitReshardCollection no-op", ns());
+ resharding::doNoopWrite(opCtx, "_shardsvrCommitReshardCollection no-op", ns());
PersistentTaskStore<CommonReshardingMetadata> donorReshardingOpStore(
NamespaceString::kDonorReshardingOperationsNamespace);
uassert(5795302,
diff --git a/src/mongo/db/s/shardsvr_create_collection_command.cpp b/src/mongo/db/s/shardsvr_create_collection_command.cpp
index bcc2e17a9fd..3769e253b7b 100644
--- a/src/mongo/db/s/shardsvr_create_collection_command.cpp
+++ b/src/mongo/db/s/shardsvr_create_collection_command.cpp
@@ -144,21 +144,11 @@ public:
FixedFCVRegion fixedFcvRegion(opCtx);
auto coordinatorDoc = [&] {
- if (serverGlobalParams.featureCompatibility.isLessThan(
- multiversion::FeatureCompatibilityVersion::kVersion_6_0)) {
- auto doc = CreateCollectionCoordinatorDocumentPre60Compatible();
- doc.setShardingDDLCoordinatorMetadata(
- {{std::move(nss),
- DDLCoordinatorTypeEnum::kCreateCollectionPre60Compatible}});
- doc.setCreateCollectionRequest(std::move(createCmdRequest));
- return doc.toBSON();
- } else {
- auto doc = CreateCollectionCoordinatorDocument();
- doc.setShardingDDLCoordinatorMetadata(
- {{std::move(nss), DDLCoordinatorTypeEnum::kCreateCollection}});
- doc.setCreateCollectionRequest(std::move(createCmdRequest));
- return doc.toBSON();
- }
+ auto doc = CreateCollectionCoordinatorDocument();
+ doc.setShardingDDLCoordinatorMetadata(
+ {{std::move(nss), DDLCoordinatorTypeEnum::kCreateCollection}});
+ doc.setCreateCollectionRequest(std::move(createCmdRequest));
+ return doc.toBSON();
}();
auto service = ShardingDDLCoordinatorService::getService(opCtx);
diff --git a/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp b/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp
index fd7c8217403..4157f1145f8 100644
--- a/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_create_collection_participant_command.cpp
@@ -65,6 +65,10 @@ public:
return AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
@@ -76,6 +80,11 @@ public:
CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
opCtx->getWriteConcern());
+ const auto txnParticipant = TransactionParticipant::get(opCtx);
+ uassert(6077300,
+ str::stream() << Request::kCommandName << " must be run as a retryable write",
+ txnParticipant);
+
opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE();
MigrationDestinationManager::cloneCollectionIndexesAndOptions(
@@ -86,23 +95,15 @@ public:
request().getIdIndex(),
request().getOptions()});
- // The txnParticipant will only be missing when the command was sent from a coordinator
- // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
- // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
- // uassert for that.
- auto txnParticipant = TransactionParticipant::get(opCtx);
- if (txnParticipant) {
- // Since no write that generated a retryable write oplog entry with this sessionId
- // and txnNumber happened, we need to make a dummy write so that the session gets
- // durably persisted on the oplog. This must be the last operation done on this
- // command.
- DBDirectClient client(opCtx);
- client.update(NamespaceString::kServerConfigurationNamespace.ns(),
- BSON("_id" << Request::kCommandName),
- BSON("$inc" << BSON("count" << 1)),
- true /* upsert */,
- false /* multi */);
- }
+ // Since no write that generated a retryable write oplog entry with this sessionId and
+ // txnNumber happened, we need to make a dummy write so that the session gets durably
+ // persisted on the oplog. This must be the last operation done on this command.
+ DBDirectClient client(opCtx);
+ client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+ BSON("_id" << Request::kCommandName),
+ BSON("$inc" << BSON("count" << 1)),
+ true /* upsert */,
+ false /* multi */);
}
private:
diff --git a/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp b/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp
index 658c894a209..31c19139c38 100644
--- a/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_drop_collection_participant_command.cpp
@@ -64,6 +64,10 @@ public:
"directly. Participates in droping a collection.";
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
using Request = ShardsvrDropCollectionParticipant;
class Invocation final : public InvocationBase {
@@ -75,6 +79,11 @@ public:
CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
opCtx->getWriteConcern());
+ const auto txnParticipant = TransactionParticipant::get(opCtx);
+ uassert(6077301,
+ str::stream() << Request::kCommandName << " must be run as a retryable write",
+ txnParticipant);
+
opCtx->setAlwaysInterruptAtStepDownOrUp_UNSAFE();
try {
@@ -86,23 +95,16 @@ public:
"namespace"_attr = ns());
}
- // The txnParticipant will only be missing when the command was sent from a coordinator
- // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
- // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
- // uassert for that.
- auto txnParticipant = TransactionParticipant::get(opCtx);
- if (txnParticipant) {
- // Since no write that generated a retryable write oplog entry with this sessionId
- // and txnNumber happened, we need to make a dummy write so that the session gets
- // durably persisted on the oplog. This must be the last operation done on this
- // command.
- DBDirectClient client(opCtx);
- client.update(NamespaceString::kServerConfigurationNamespace.ns(),
- BSON("_id" << Request::kCommandName),
- BSON("$inc" << BSON("count" << 1)),
- true /* upsert */,
- false /* multi */);
- }
+
+ // Since no write that generated a retryable write oplog entry with this sessionId and
+ // txnNumber happened, we need to make a dummy write so that the session gets durably
+ // persisted on the oplog. This must be the last operation done on this command.
+ DBDirectClient client(opCtx);
+ client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+ BSON("_id" << Request::kCommandName),
+ BSON("$inc" << BSON("count" << 1)),
+ true /* upsert */,
+ false /* multi */);
}
private:
diff --git a/src/mongo/db/s/shardsvr_merge_chunks_command.cpp b/src/mongo/db/s/shardsvr_merge_chunks_command.cpp
index c3971e7afd6..8b3892a907b 100644
--- a/src/mongo/db/s/shardsvr_merge_chunks_command.cpp
+++ b/src/mongo/db/s/shardsvr_merge_chunks_command.cpp
@@ -149,8 +149,7 @@ void mergeChunks(OperationContext* opCtx,
auto shardVersionReceived = [&]() -> boost::optional<ChunkVersion> {
// Old versions might not have the shardVersion field
if (cmdResponse.response[ChunkVersion::kShardVersionField]) {
- return ChunkVersion::fromBSONPositionalOrNewerFormat(
- cmdResponse.response[ChunkVersion::kShardVersionField]);
+ return ChunkVersion::parse(cmdResponse.response[ChunkVersion::kShardVersionField]);
}
return boost::none;
}();
diff --git a/src/mongo/db/s/shardsvr_participant_block_command.cpp b/src/mongo/db/s/shardsvr_participant_block_command.cpp
index 9ff5f58127c..c6774bd7bec 100644
--- a/src/mongo/db/s/shardsvr_participant_block_command.cpp
+++ b/src/mongo/db/s/shardsvr_participant_block_command.cpp
@@ -62,6 +62,10 @@ public:
return Command::AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
diff --git a/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp b/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp
index 73a182754e5..16d75a2bfb9 100644
--- a/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp
+++ b/src/mongo/db/s/shardsvr_rename_collection_participant_command.cpp
@@ -68,6 +68,10 @@ public:
return AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
@@ -76,6 +80,11 @@ public:
CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
opCtx->getWriteConcern());
+ const auto txnParticipant = TransactionParticipant::get(opCtx);
+ uassert(6077302,
+ str::stream() << Request::kCommandName << " must be run as a retryable write",
+ txnParticipant);
+
auto const shardingState = ShardingState::get(opCtx);
uassertStatusOK(shardingState->canAcceptShardedCommands());
auto const& req = request();
@@ -100,23 +109,15 @@ public:
renameCollectionParticipant->getBlockCRUDAndRenameCompletionFuture().get(opCtx);
- // The txnParticipant will only be missing when the command was sent from a coordinator
- // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
- // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
- // uassert for that.
- auto txnParticipant = TransactionParticipant::get(opCtx);
- if (txnParticipant) {
- // Since no write that generated a retryable write oplog entry with this sessionId
- // and txnNumber happened, we need to make a dummy write so that the session gets
- // durably persisted on the oplog. This must be the last operation done on this
- // command.
- DBDirectClient client(opCtx);
- client.update(NamespaceString::kServerConfigurationNamespace.ns(),
- BSON("_id" << Request::kCommandName),
- BSON("$inc" << BSON("count" << 1)),
- true /* upsert */,
- false /* multi */);
- }
+ // Since no write that generated a retryable write oplog entry with this sessionId and
+ // txnNumber happened, we need to make a dummy write so that the session gets durably
+ // persisted on the oplog. This must be the last operation done on this command.
+ DBDirectClient client(opCtx);
+ client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+ BSON("_id" << Request::kCommandName),
+ BSON("$inc" << BSON("count" << 1)),
+ true /* upsert */,
+ false /* multi */);
}
private:
@@ -162,6 +163,10 @@ public:
return AllowedOnSecondary::kNever;
}
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
+
class Invocation final : public InvocationBase {
public:
using InvocationBase::InvocationBase;
@@ -170,6 +175,11 @@ public:
CommandHelpers::uassertCommandRunWithMajority(Request::kCommandName,
opCtx->getWriteConcern());
+ const auto txnParticipant = TransactionParticipant::get(opCtx);
+ uassert(6077303,
+ str::stream() << Request::kCommandName << " must be run as a retryable write",
+ txnParticipant);
+
auto const shardingState = ShardingState::get(opCtx);
uassertStatusOK(shardingState->canAcceptShardedCommands());
@@ -187,23 +197,16 @@ public:
optRenameCollectionParticipant.get()->getUnblockCrudFuture().get(opCtx);
}
- // The txnParticipant will only be missing when the command was sent from a coordinator
- // running an old 5.0.0 binary that didn't attach a sessionId & txnNumber.
- // TODO SERVER-60773: Once 6.0 has branched out, txnParticipant must always exist. Add a
- // uassert for that.
- auto txnParticipant = TransactionParticipant::get(opCtx);
- if (txnParticipant) {
- // Since no write that generated a retryable write oplog entry with this sessionId
- // and txnNumber happened, we need to make a dummy write so that the session gets
- // durably persisted on the oplog. This must be the last operation done on this
- // command.
- DBDirectClient client(opCtx);
- client.update(NamespaceString::kServerConfigurationNamespace.ns(),
- BSON("_id" << Request::kCommandName),
- BSON("$inc" << BSON("count" << 1)),
- true /* upsert */,
- false /* multi */);
- }
+ // Since no write that generated a retryable write oplog entry with this sessionId
+ // and txnNumber happened, we need to make a dummy write so that the session gets
+ // durably persisted on the oplog. This must be the last operation done on this
+ // command.
+ DBDirectClient client(opCtx);
+ client.update(NamespaceString::kServerConfigurationNamespace.ns(),
+ BSON("_id" << Request::kCommandName),
+ BSON("$inc" << BSON("count" << 1)),
+ true /* upsert */,
+ false /* multi */);
}
private:
diff --git a/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp b/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp
index 4c3e05a7879..56bf7b644f3 100644
--- a/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp
+++ b/src/mongo/db/s/shardsvr_resharding_operation_time_command.cpp
@@ -108,10 +108,9 @@ public:
}
Response typedRun(OperationContext* opCtx) {
- auto instances =
- getReshardingStateMachines<ReshardingRecipientService,
- ReshardingRecipientService::RecipientStateMachine>(opCtx,
- ns());
+ auto instances = resharding::getReshardingStateMachines<
+ ReshardingRecipientService,
+ ReshardingRecipientService::RecipientStateMachine>(opCtx, ns());
if (instances.empty()) {
return Response{boost::none, boost::none};
}
diff --git a/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp b/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp
index d5d2593bdf2..e8ed9e14277 100644
--- a/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp
+++ b/src/mongo/db/s/shardsvr_set_cluster_parameter_command.cpp
@@ -127,6 +127,10 @@ public:
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
return AllowedOnSecondary::kNever;
}
+
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
} shardsvrSetClusterParameterCmd;
} // namespace
diff --git a/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp b/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp
index 49bdc1b90bb..ceecece4027 100644
--- a/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp
+++ b/src/mongo/db/s/shardsvr_set_user_write_block_mode_command.cpp
@@ -198,6 +198,10 @@ public:
AllowedOnSecondary secondaryAllowed(ServiceContext*) const override {
return AllowedOnSecondary::kNever;
}
+
+ bool supportsRetryableWrite() const final {
+ return true;
+ }
} shardsvrSetUserWriteBlockModeCmd;
} // namespace
diff --git a/src/mongo/db/s/split_chunk.cpp b/src/mongo/db/s/split_chunk.cpp
index 062f5b47752..964871d3740 100644
--- a/src/mongo/db/s/split_chunk.cpp
+++ b/src/mongo/db/s/split_chunk.cpp
@@ -255,8 +255,7 @@ StatusWith<boost::optional<ChunkRange>> splitChunk(
boost::optional<ChunkVersion> shardVersionReceived = [&]() -> boost::optional<ChunkVersion> {
// old versions might not have the shardVersion field
if (cmdResponse.response[ChunkVersion::kShardVersionField]) {
- return ChunkVersion::fromBSONPositionalOrNewerFormat(
- cmdResponse.response[ChunkVersion::kShardVersionField]);
+ return ChunkVersion::parse(cmdResponse.response[ChunkVersion::kShardVersionField]);
}
return boost::none;
}();
diff --git a/src/mongo/db/s/transaction_coordinator_service.cpp b/src/mongo/db/s/transaction_coordinator_service.cpp
index 41b758cffec..c317922c251 100644
--- a/src/mongo/db/s/transaction_coordinator_service.cpp
+++ b/src/mongo/db/s/transaction_coordinator_service.cpp
@@ -379,6 +379,10 @@ TransactionCoordinatorService::getAllRemovalFuturesForCoordinatorsForInternalTra
std::shared_ptr<CatalogAndScheduler> cas = _getCatalogAndScheduler(opCtx);
auto& catalog = cas->catalog;
+ // On step up, we want to wait until the catalog has recovered all active transaction
+ // coordinators before getting the removal futures.
+ cas->recoveryTaskCompleted->get(opCtx);
+
auto predicate = [](const LogicalSessionId lsid,
const TxnNumberAndRetryCounter txnNumberAndRetryCounter,
const std::shared_ptr<TransactionCoordinator> transactionCoordinator) {
diff --git a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
index e60c2ad339e..dd4b94aae1c 100644
--- a/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
+++ b/src/mongo/db/s/txn_two_phase_commit_cmds.cpp
@@ -59,6 +59,14 @@ public:
return true;
}
+ bool isTransactionCommand() const final {
+ return true;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
class PrepareTimestamp {
public:
PrepareTimestamp(Timestamp timestamp) : _timestamp(std::move(timestamp)) {}
@@ -383,6 +391,18 @@ public:
return AllowedOnSecondary::kNever;
}
+ bool isTransactionCommand() const final {
+ return true;
+ }
+
+ bool shouldCheckoutSession() const final {
+ return false;
+ }
+
+ bool allowedInTransactions() const final {
+ return true;
+ }
+
} coordinateCommitTransactionCmd;
} // namespace
diff --git a/src/mongo/db/s/type_shard_collection.cpp b/src/mongo/db/s/type_shard_collection.cpp
index 4dbb0b1c2f8..2628297e0d7 100644
--- a/src/mongo/db/s/type_shard_collection.cpp
+++ b/src/mongo/db/s/type_shard_collection.cpp
@@ -52,15 +52,6 @@ ShardCollectionType::ShardCollectionType(const BSONObj& obj) {
uassert(ErrorCodes::ShardKeyNotFound,
str::stream() << "Empty shard key. Failed to parse: " << obj.toString(),
!getKeyPattern().toBSON().isEmpty());
-
- // Last refreshed collection version is stored as a timestamp in the BSON representation of
- // shard collection type for legacy reasons. We therefore explicitly convert this timestamp, if
- // it exists, into a chunk version.
- if (getLastRefreshedCollectionVersion()) {
- ChunkVersion version = *getLastRefreshedCollectionVersion();
- setLastRefreshedCollectionVersion(ChunkVersion(
- version.majorVersion(), version.minorVersion(), getEpoch(), getTimestamp()));
- }
}
BSONObj ShardCollectionType::toBSON() const {
@@ -83,4 +74,15 @@ void ShardCollectionType::setAllowMigrations(bool allowMigrations) {
setPre50CompatibleAllowMigrations(false);
}
+boost::optional<ChunkVersion> ShardCollectionType::getLastRefreshedCollectionVersion() const {
+ // Last refreshed collection version is stored as a timestamp in the BSON representation of
+ // shard collection type for legacy reasons. We therefore explicitly convert this timestamp, if
+ // it exists, into a chunk version.
+ if (!getLastRefreshedCollectionMajorMinorVersion())
+ return boost::none;
+
+ Timestamp majorMinor = *getLastRefreshedCollectionMajorMinorVersion();
+ return ChunkVersion({getEpoch(), getTimestamp()}, {majorMinor.getSecs(), majorMinor.getInc()});
+}
+
} // namespace mongo
diff --git a/src/mongo/db/s/type_shard_collection.h b/src/mongo/db/s/type_shard_collection.h
index 8180358174a..de6e56eb784 100644
--- a/src/mongo/db/s/type_shard_collection.h
+++ b/src/mongo/db/s/type_shard_collection.h
@@ -42,7 +42,7 @@ public:
using ShardCollectionTypeBase::kEnterCriticalSectionCounterFieldName;
using ShardCollectionTypeBase::kEpochFieldName;
using ShardCollectionTypeBase::kKeyPatternFieldName;
- using ShardCollectionTypeBase::kLastRefreshedCollectionVersionFieldName;
+ using ShardCollectionTypeBase::kLastRefreshedCollectionMajorMinorVersionFieldName;
using ShardCollectionTypeBase::kNssFieldName;
using ShardCollectionTypeBase::kRefreshingFieldName;
using ShardCollectionTypeBase::kReshardingFieldsFieldName;
@@ -57,7 +57,6 @@ public:
using ShardCollectionTypeBase::getEnterCriticalSectionCounter;
using ShardCollectionTypeBase::getEpoch;
using ShardCollectionTypeBase::getKeyPattern;
- using ShardCollectionTypeBase::getLastRefreshedCollectionVersion;
using ShardCollectionTypeBase::getMaxChunkSizeBytes;
using ShardCollectionTypeBase::getNss;
using ShardCollectionTypeBase::getRefreshing;
@@ -94,6 +93,8 @@ public:
return getPre50CompatibleAllowMigrations().value_or(true);
}
void setAllowMigrations(bool allowMigrations);
+
+ boost::optional<ChunkVersion> getLastRefreshedCollectionVersion() const;
};
} // namespace mongo
diff --git a/src/mongo/db/s/type_shard_collection.idl b/src/mongo/db/s/type_shard_collection.idl
index 051a6de35d3..d56b231e302 100644
--- a/src/mongo/db/s/type_shard_collection.idl
+++ b/src/mongo/db/s/type_shard_collection.idl
@@ -80,15 +80,6 @@ imports:
- "mongo/s/resharding/type_collection_fields.idl"
- "mongo/s/type_collection_common_types.idl"
-types:
- ChunkVersionLegacy:
- bson_serialization_type: any
- description: "An object representing a chunk version for a collection. Ignores the
- component in the chunk version for legacy reasons."
- cpp_type: ChunkVersion
- serializer: ChunkVersion::serialiseMajorMinorVersionOnlyForShardCollectionType
- deserializer: ChunkVersion::parseMajorMinorVersionOnlyFromShardCollectionType
-
structs:
ShardCollectionTypeBase:
description: "Represents the layout and contents of documents contained in the shard
@@ -102,11 +93,12 @@ structs:
optional: false
epoch:
type: objectid
+ optional: false
description: "Uniquely identifies this instance of the collection, in case of
drop/create or shard key refine."
- optional: false
timestamp:
type: timestamp
+ optional: false
description: "Uniquely identifies this incarnation of the collection. Only changes
in case of drop and create, or shard key refine.
This field will store the ClusterTime of the Config Server when the
@@ -141,7 +133,8 @@ structs:
chunk metadata."
optional: true
lastRefreshedCollectionVersion:
- type: ChunkVersionLegacy
+ type: timestamp
+ cpp_name: lastRefreshedCollectionMajorMinorVersion
description: "Set by primaries and used by shard secondaries to safely refresh chunk
metadata. Indicates the collection version of the last complete chunk
metadata refresh, and is used to indicate if a refresh occurred if the
diff --git a/src/mongo/db/s/type_shard_collection_test.cpp b/src/mongo/db/s/type_shard_collection_test.cpp
index 59a85b1e13c..f21418cc206 100644
--- a/src/mongo/db/s/type_shard_collection_test.cpp
+++ b/src/mongo/db/s/type_shard_collection_test.cpp
@@ -67,25 +67,12 @@ TEST(ShardCollectionType, FromBSONEpochMatchesLastRefreshedCollectionVersionWhen
<< ShardCollectionType::kUuidFieldName << UUID::gen()
<< ShardCollectionType::kKeyPatternFieldName << kKeyPattern
<< ShardCollectionType::kUniqueFieldName << true
- << ShardCollectionType::kLastRefreshedCollectionVersionFieldName << Timestamp(1, 1)));
- ASSERT_EQ(epoch, shardCollType.getLastRefreshedCollectionVersion()->epoch());
- ASSERT_EQ(timestamp, shardCollType.getLastRefreshedCollectionVersion()->getTimestamp());
-}
-
-TEST(ShardCollectionType, FromBSONEpochMatchesLastRefreshedCollectionVersionWhenDate) {
- OID epoch = OID::gen();
- Timestamp timestamp(1, 1);
-
- ShardCollectionType shardCollType(
- BSON(ShardCollectionType::kNssFieldName
- << kNss.ns() << ShardCollectionType::kEpochFieldName << epoch
- << ShardCollectionType::kUuidFieldName << UUID::gen()
- << ShardCollectionType::kTimestampFieldName << timestamp
- << ShardCollectionType::kKeyPatternFieldName << kKeyPattern
- << ShardCollectionType::kUniqueFieldName << true
- << ShardCollectionType::kLastRefreshedCollectionVersionFieldName << Date_t()));
+ << ShardCollectionType::kLastRefreshedCollectionMajorMinorVersionFieldName
+ << Timestamp(123, 45)));
ASSERT_EQ(epoch, shardCollType.getLastRefreshedCollectionVersion()->epoch());
ASSERT_EQ(timestamp, shardCollType.getLastRefreshedCollectionVersion()->getTimestamp());
+ ASSERT_EQ(Timestamp(123, 45),
+ Timestamp(shardCollType.getLastRefreshedCollectionVersion()->toLong()));
}
TEST(ShardCollectionType, ToBSONEmptyDefaultCollationNotIncluded) {
diff --git a/src/mongo/db/server_options.h b/src/mongo/db/server_options.h
index 643fe7a46a1..25a69ffa995 100644
--- a/src/mongo/db/server_options.h
+++ b/src/mongo/db/server_options.h
@@ -201,15 +201,6 @@ struct ServerGlobalParams {
version != multiversion::GenericFCV::kLastLTS;
}
- bool isFCVUpgradingToOrAlreadyLatest() const {
- auto currentVersion = getVersion();
-
- // (Generic FCV reference): This FCV reference should exist across LTS binary versions.
- return currentVersion == multiversion::GenericFCV::kUpgradingFromLastLTSToLatest ||
- isGreaterThanOrEqualTo(
- multiversion::GenericFCV::kUpgradingFromLastContinuousToLatest);
- }
-
bool isFCVDowngradingOrAlreadyDowngradedFromLatest() const {
auto currentVersion = getVersion();
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
index ce1d0e55ddf..9de2da2e33d 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer.cpp
@@ -42,6 +42,10 @@ bool isSecondary(const OperationContext* opCtx) {
return !opCtx->writesAreReplicated();
}
+bool isPrimary(const OperationContext* opCtx) {
+ return opCtx->writesAreReplicated();
+}
+
const auto tenantIdsToDeleteDecoration =
OperationContext::declareDecoration<boost::optional<std::vector<std::string>>>();
@@ -50,6 +54,13 @@ ShardSplitDonorDocument parseAndValidateDonorDocument(const BSONObj& doc) {
ShardSplitDonorDocument::parse(IDLParserErrorContext("donorStateDoc"), doc);
const std::string errmsg = "Invalid donor state doc, {}: {}";
+ if (donorStateDoc.getExpireAt()) {
+ uassert(ErrorCodes::BadValue,
+ "Contains 'expireAt' but the split has not committed or aborted",
+ donorStateDoc.getState() == ShardSplitDonorStateEnum::kCommitted ||
+ donorStateDoc.getState() == ShardSplitDonorStateEnum::kAborted);
+ }
+
switch (donorStateDoc.getState()) {
case ShardSplitDonorStateEnum::kUninitialized:
uassert(ErrorCodes::BadValue,
@@ -68,6 +79,12 @@ ShardSplitDonorDocument parseAndValidateDonorDocument(const BSONObj& doc) {
doc.toString()),
!donorStateDoc.getAbortReason());
break;
+ case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+ uassert(ErrorCodes::BadValue,
+ errmsg,
+ !donorStateDoc.getBlockTimestamp() && !donorStateDoc.getCommitOrAbortOpTime() &&
+ !donorStateDoc.getAbortReason());
+ break;
case ShardSplitDonorStateEnum::kBlocking:
uassert(ErrorCodes::BadValue,
fmt::format(errmsg,
@@ -125,54 +142,61 @@ ShardSplitDonorDocument parseAndValidateDonorDocument(const BSONObj& doc) {
* Initializes the TenantMigrationDonorAccessBlocker for the tenant migration denoted by the given
* state doc.
*/
-void onBlockerInitialization(OperationContext* opCtx,
- const ShardSplitDonorDocument& donorStateDoc) {
- invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kBlocking);
- invariant(donorStateDoc.getBlockTimestamp());
-
- auto optionalTenants = donorStateDoc.getTenantIds();
- invariant(optionalTenants);
-
- const auto& tenantIds = optionalTenants.get();
+void onTransitionToAbortingIndexBuilds(OperationContext* opCtx,
+ const ShardSplitDonorDocument& donorStateDoc) {
+ invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kAbortingIndexBuilds);
+ invariant(donorStateDoc.getTenantIds());
+ invariant(donorStateDoc.getRecipientConnectionString());
+
+ auto tenantIds = *donorStateDoc.getTenantIds();
+ auto recipientConnectionString = *donorStateDoc.getRecipientConnectionString();
+ for (const auto& tenantId : tenantIds) {
+ auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>(
+ opCtx->getServiceContext(),
+ donorStateDoc.getId(),
+ tenantId.toString(),
+ MigrationProtocolEnum::kMultitenantMigrations,
+ recipientConnectionString.toString());
+
+ TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()).add(tenantId, mtab);
+ }
- // The primary create and sets the tenant access blocker to blocking within the
- // ShardSplitDonorService.
- if (isSecondary(opCtx)) {
- auto recipientConnectionString = [stateDoc = donorStateDoc]() {
- if (stateDoc.getRecipientConnectionString()) {
- return *stateDoc.getRecipientConnectionString();
+ if (isPrimary(opCtx)) {
+ // onRollback is not registered on secondaries since secondaries should not fail to
+ // apply the write.
+ opCtx->recoveryUnit()->onRollback([opCtx, tenantIds] {
+ for (const auto& tenantId : tenantIds) {
+ TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
+ .remove(tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
}
-
- auto recipientTagName = stateDoc.getRecipientTagName();
- invariant(recipientTagName);
- auto recipientSetName = stateDoc.getRecipientSetName();
- invariant(recipientSetName);
- auto config = repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
- return serverless::makeRecipientConnectionString(
- config, *recipientTagName, *recipientSetName);
- }();
-
- for (const auto& tenantId : tenantIds) {
- auto mtab = std::make_shared<TenantMigrationDonorAccessBlocker>(
- opCtx->getServiceContext(),
- donorStateDoc.getId(),
- tenantId.toString(),
- MigrationProtocolEnum::kMultitenantMigrations,
- recipientConnectionString.toString());
-
- TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
- .add(tenantId, mtab);
-
- // No rollback handler is necessary as the write should not fail on secondaries.
- mtab->startBlockingWrites();
- }
+ });
}
+}
- for (const auto& tenantId : tenantIds) {
+/**
+ * Transitions the TenantMigrationDonorAccessBlocker to the blocking state.
+ */
+void onTransitionToBlocking(OperationContext* opCtx, const ShardSplitDonorDocument& donorStateDoc) {
+ invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kBlocking);
+ invariant(donorStateDoc.getBlockTimestamp());
+ invariant(donorStateDoc.getTenantIds());
+
+ auto tenantIds = *donorStateDoc.getTenantIds();
+ for (auto tenantId : tenantIds) {
auto mtab = tenant_migration_access_blocker::getTenantMigrationDonorAccessBlocker(
opCtx->getServiceContext(), tenantId);
invariant(mtab);
+ if (isSecondary(opCtx)) {
+ // A primary calls startBlockingWrites on the TenantMigrationDonorAccessBlocker before
+ // reserving the OpTime for the "start blocking" write, so only secondaries call
+ // startBlockingWrites on the TenantMigrationDonorAccessBlocker in the op observer.
+ mtab->startBlockingWrites();
+ }
+
+ // Both primaries and secondaries call startBlockingReadsAfter in the op observer, since
+ // startBlockingReadsAfter just needs to be called before the "start blocking" write's oplog
+ // hole is filled.
mtab->startBlockingReadsAfter(donorStateDoc.getBlockTimestamp().get());
}
}
@@ -206,9 +230,9 @@ void onTransitionToAborted(OperationContext* opCtx, const ShardSplitDonorDocumen
auto tenants = donorStateDoc.getTenantIds();
if (!tenants) {
- // The only case where there can be no tenants is when the instance is created by the abort
- // command. In that case, no tenant migration blockers are created and the state will go
- // straight to abort.
+ // The only case where there can be no tenants is when the instance is created by the
+ // abort command. In that case, no tenant migration blockers are created and the state
+ // will go straight to abort.
invariant(donorStateDoc.getState() == ShardSplitDonorStateEnum::kUninitialized);
return;
}
@@ -242,34 +266,35 @@ public:
_opCtx->getServiceContext(), tenantId);
if (!mtab) {
- // The state doc and TenantMigrationDonorAccessBlocker for this migration
- // were removed immediately after expireAt was set. This is unlikely to
- // occur in production where the garbage collection delay should be
- // sufficiently large.
+ // The state doc and TenantMigrationDonorAccessBlocker for this
+ // migration were removed immediately after expireAt was set. This is
+ // unlikely to occur in production where the garbage collection delay
+ // should be sufficiently large.
continue;
}
- if (!_opCtx->writesAreReplicated()) {
- // Setting expireAt implies that the TenantMigrationDonorAccessBlocker for
- // this migration will be removed shortly after this. However, a lagged
- // secondary might not manage to advance its majority commit point past the
- // migration commit or abort opTime and consequently transition out of the
- // blocking state before the TenantMigrationDonorAccessBlocker is removed.
- // When this occurs, blocked reads or writes will be left waiting for the
- // migration decision indefinitely. To avoid that, notify the
- // TenantMigrationDonorAccessBlocker here that the commit or abort opTime
- // has been majority committed (guaranteed to be true since by design the
- // donor never marks its state doc as garbage collectable before the
- // migration decision is majority committed).
+ if (isSecondary(_opCtx)) {
+ // Setting expireAt implies that the TenantMigrationDonorAccessBlocker
+ // for this migration will be removed shortly after this. However, a
+ // lagged secondary might not manage to advance its majority commit
+ // point past the migration commit or abort opTime and consequently
+ // transition out of the blocking state before the
+ // TenantMigrationDonorAccessBlocker is removed. When this occurs,
+ // blocked reads or writes will be left waiting for the migration
+ // decision indefinitely. To avoid that, notify the
+ // TenantMigrationDonorAccessBlocker here that the commit or abort
+ // opTime has been majority committed (guaranteed to be true since by
+ // design the donor never marks its state doc as garbage collectable
+ // before the migration decision is majority committed).
mtab->onMajorityCommitPointUpdate(
_donorStateDoc.getCommitOrAbortOpTime().get());
}
if (_donorStateDoc.getState() == ShardSplitDonorStateEnum::kAborted) {
invariant(mtab->inStateAborted());
- // The migration durably aborted and is now marked as garbage collectable,
- // remove its TenantMigrationDonorAccessBlocker right away to allow
- // back-to-back migration retries.
+ // The migration durably aborted and is now marked as garbage
+ // collectable, remove its TenantMigrationDonorAccessBlocker right away
+ // to allow back-to-back migration retries.
TenantMigrationAccessBlockerRegistry::get(_opCtx->getServiceContext())
.remove(tenantId, TenantMigrationAccessBlocker::BlockerType::kDonor);
}
@@ -305,7 +330,7 @@ void ShardSplitDonorOpObserver::onInserts(OperationContext* opCtx,
std::vector<InsertStatement>::const_iterator first,
std::vector<InsertStatement>::const_iterator last,
bool fromMigrate) {
- if (nss != NamespaceString::kTenantSplitDonorsNamespace ||
+ if (nss != NamespaceString::kShardSplitDonorsNamespace ||
tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
return;
}
@@ -313,45 +338,41 @@ void ShardSplitDonorOpObserver::onInserts(OperationContext* opCtx,
for (auto it = first; it != last; it++) {
auto donorStateDoc = parseAndValidateDonorDocument(it->doc);
switch (donorStateDoc.getState()) {
- case ShardSplitDonorStateEnum::kBlocking:
- onBlockerInitialization(opCtx, donorStateDoc);
+ case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+ onTransitionToAbortingIndexBuilds(opCtx, donorStateDoc);
break;
case ShardSplitDonorStateEnum::kAborted:
// If the operation starts aborted, do not do anything.
break;
- case ShardSplitDonorStateEnum::kUninitialized:
- case ShardSplitDonorStateEnum::kCommitted:
- uasserted(ErrorCodes::IllegalOperation,
- "cannot insert a donor's state doc with 'state' other than 'kAborted' or "
- "'kBlocking'");
- break;
default:
- MONGO_UNREACHABLE;
+ uasserted(ErrorCodes::IllegalOperation,
+ "Cannot insert donor's state document with state other than 'aborted' or "
+ "'aborting index builds'.");
}
}
}
void ShardSplitDonorOpObserver::onUpdate(OperationContext* opCtx,
const OplogUpdateEntryArgs& args) {
- if (args.nss != NamespaceString::kTenantSplitDonorsNamespace ||
+ if (args.nss != NamespaceString::kShardSplitDonorsNamespace ||
tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
return;
}
auto donorStateDoc = parseAndValidateDonorDocument(args.updateArgs->updatedDoc);
switch (donorStateDoc.getState()) {
+ case ShardSplitDonorStateEnum::kBlocking:
+ onTransitionToBlocking(opCtx, donorStateDoc);
+ break;
case ShardSplitDonorStateEnum::kCommitted:
case ShardSplitDonorStateEnum::kAborted:
opCtx->recoveryUnit()->registerChange(
std::make_unique<TenantMigrationDonorCommitOrAbortHandler>(opCtx, donorStateDoc));
break;
- case ShardSplitDonorStateEnum::kBlocking:
- uasserted(ErrorCodes::IllegalOperation,
- "The state document should be inserted as blocking and never transition to "
- "blocking");
- break;
default:
- MONGO_UNREACHABLE;
+ uasserted(ErrorCodes::IllegalOperation,
+ "Cannot update donor's state document with state other than 'aborted', "
+ "'committed', or 'aborted'");
}
}
@@ -359,13 +380,12 @@ void ShardSplitDonorOpObserver::aboutToDelete(OperationContext* opCtx,
NamespaceString const& nss,
const UUID& uuid,
BSONObj const& doc) {
- if (nss != NamespaceString::kTenantSplitDonorsNamespace ||
+ if (nss != NamespaceString::kShardSplitDonorsNamespace ||
tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
return;
}
auto donorStateDoc = parseAndValidateDonorDocument(doc);
-
uassert(ErrorCodes::IllegalOperation,
str::stream() << "cannot delete a donor's state document " << doc
<< " since it has not been marked as garbage collectable and is not a"
@@ -390,8 +410,7 @@ void ShardSplitDonorOpObserver::onDelete(OperationContext* opCtx,
const UUID& uuid,
StmtId stmtId,
const OplogDeleteEntryArgs& args) {
- if (nss != NamespaceString::kTenantSplitDonorsNamespace ||
- !tenantIdsToDeleteDecoration(opCtx) ||
+ if (nss != NamespaceString::kShardSplitDonorsNamespace || !tenantIdsToDeleteDecoration(opCtx) ||
tenant_migration_access_blocker::inRecoveryMode(opCtx)) {
return;
}
@@ -414,7 +433,7 @@ repl::OpTime ShardSplitDonorOpObserver::onDropCollection(OperationContext* opCtx
const UUID& uuid,
std::uint64_t numRecords,
const CollectionDropType dropType) {
- if (collectionName == NamespaceString::kTenantSplitDonorsNamespace) {
+ if (collectionName == NamespaceString::kShardSplitDonorsNamespace) {
opCtx->recoveryUnit()->onCommit([opCtx](boost::optional<Timestamp>) {
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext())
.removeAll(TenantMigrationAccessBlocker::BlockerType::kDonor);
diff --git a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
index c52868126e6..6f2e376de47 100644
--- a/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_op_observer_test.cpp
@@ -35,6 +35,7 @@
#include "mongo/db/serverless/shard_split_donor_op_observer.h"
#include "mongo/db/serverless/shard_split_state_machine_gen.h"
#include "mongo/db/serverless/shard_split_test_utils.h"
+#include "mongo/db/serverless/shard_split_utils.h"
#include "mongo/db/service_context_d_test_fixture.h"
#include "mongo/dbtests/mock/mock_replica_set.h"
@@ -129,7 +130,8 @@ protected:
std::vector<std::shared_ptr<TenantMigrationDonorAccessBlocker>>
createBlockersAndStartBlockingWrites(const std::vector<std::string>& tenants,
OperationContext* opCtx,
- const std::string& connectionStr) {
+ const std::string& connectionStr,
+ bool isSecondary = false) {
auto uuid = UUID::gen();
std::vector<std::shared_ptr<TenantMigrationDonorAccessBlocker>> blockers;
for (const auto& tenant : tenants) {
@@ -141,7 +143,10 @@ protected:
_connectionStr);
blockers.push_back(mtab);
- mtab->startBlockingWrites();
+ if (!isSecondary) {
+ mtab->startBlockingWrites();
+ }
+
TenantMigrationAccessBlockerRegistry::get(opCtx->getServiceContext()).add(tenant, mtab);
}
@@ -160,7 +165,7 @@ protected:
MockReplicaSet("donorSet", 3, true /* hasPrimary */, true /* dollarPrefixHosts */);
MockReplicaSet _recipientReplSet =
MockReplicaSet("recipientSet", 3, true /* hasPrimary */, true /* dollarPrefixHosts */);
- const NamespaceString _nss = NamespaceString::kTenantSplitDonorsNamespace;
+ const NamespaceString _nss = NamespaceString::kShardSplitDonorsNamespace;
std::vector<std::string> _tenantIds = {"tenant1", "tenantAB"};
std::string _connectionStr = _replSet.getConnectionString();
UUID _uuid = UUID::gen();
@@ -253,7 +258,30 @@ TEST_F(ShardSplitDonorOpObserverTest, InsertValidAbortedDocument) {
}
}
-TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentPrimary) {
+TEST_F(ShardSplitDonorOpObserverTest, InsertAbortingIndexDocumentPrimary) {
+ test::shard_split::reconfigToAddRecipientNodes(
+ getServiceContext(), _recipientTagName, _replSet.getHosts(), _recipientReplSet.getHosts());
+
+ auto stateDocument = defaultStateDocument();
+ stateDocument.setState(ShardSplitDonorStateEnum::kAbortingIndexBuilds);
+ stateDocument.setRecipientConnectionString(mongo::serverless::makeRecipientConnectionString(
+ repl::ReplicationCoordinator::get(_opCtx.get())->getConfig(),
+ _recipientTagName,
+ _recipientSetName));
+
+ auto mtabVerifier = [opCtx = _opCtx.get()](std::shared_ptr<TenantMigrationAccessBlocker> mtab) {
+ ASSERT_TRUE(mtab);
+ // The OpObserver does not set the mtab to blocking for primaries.
+ ASSERT_OK(mtab->checkIfCanWrite(Timestamp(1, 1)));
+ ASSERT_OK(mtab->checkIfCanWrite(Timestamp(1, 3)));
+ ASSERT_OK(mtab->checkIfLinearizableReadWasAllowed(opCtx));
+ ASSERT_EQ(mtab->checkIfCanBuildIndex().code(), ErrorCodes::TenantMigrationConflict);
+ };
+
+ runInsertTestCase(stateDocument, _tenantIds, mtabVerifier);
+}
+
+TEST_F(ShardSplitDonorOpObserverTest, UpdateBlockingDocumentPrimary) {
test::shard_split::reconfigToAddRecipientNodes(
getServiceContext(), _recipientTagName, _replSet.getHosts(), _recipientReplSet.getHosts());
@@ -274,15 +302,16 @@ TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentPrimary) {
ASSERT_EQ(mtab->checkIfCanBuildIndex().code(), ErrorCodes::TenantMigrationConflict);
};
- runInsertTestCase(stateDocument, _tenantIds, mtabVerifier);
+ runUpdateTestCase(stateDocument, _tenantIds, mtabVerifier);
}
-TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentSecondary) {
+TEST_F(ShardSplitDonorOpObserverTest, UpdateBlockingDocumentSecondary) {
test::shard_split::reconfigToAddRecipientNodes(
getServiceContext(), _recipientTagName, _replSet.getHosts(), _recipientReplSet.getHosts());
// This indicates the instance is secondary for the OpObserver.
repl::UnreplicatedWritesBlock setSecondary(_opCtx.get());
+ createBlockersAndStartBlockingWrites(_tenantIds, _opCtx.get(), _connectionStr, true);
auto stateDocument = defaultStateDocument();
stateDocument.setState(ShardSplitDonorStateEnum::kBlocking);
@@ -299,18 +328,15 @@ TEST_F(ShardSplitDonorOpObserverTest, InsertBlockingDocumentSecondary) {
ASSERT_EQ(mtab->checkIfCanBuildIndex().code(), ErrorCodes::TenantMigrationConflict);
};
- runInsertTestCase(stateDocument, _tenantIds, mtabVerifier);
+ runUpdateTestCase(stateDocument, _tenantIds, mtabVerifier);
}
-
-TEST_F(ShardSplitDonorOpObserverTest, TransitionToBlockingFail) {
+TEST_F(ShardSplitDonorOpObserverTest, TransitionToAbortingIndexBuildsFail) {
// This indicates the instance is secondary for the OpObserver.
repl::UnreplicatedWritesBlock setSecondary(_opCtx.get());
auto stateDocument = defaultStateDocument();
- stateDocument.setState(ShardSplitDonorStateEnum::kBlocking);
- stateDocument.setBlockTimestamp(Timestamp(1, 1));
-
+ stateDocument.setState(ShardSplitDonorStateEnum::kAbortingIndexBuilds);
CollectionUpdateArgs updateArgs;
updateArgs.stmtIds = {};
diff --git a/src/mongo/db/serverless/shard_split_donor_service.cpp b/src/mongo/db/serverless/shard_split_donor_service.cpp
index f37a9416f5e..deb78f1779b 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service.cpp
@@ -68,54 +68,16 @@ MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterMarkingStateGarbageCollectable);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeSplitConfigRemoval);
MONGO_FAIL_POINT_DEFINE(skipShardSplitRecipientCleanup);
MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeLeavingBlockingState);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterUpdatingToCommittedState);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitBeforeSendingStepUpToRecipients);
+MONGO_FAIL_POINT_DEFINE(pauseShardSplitAfterReceivingAbortCmd);
const Backoff kExponentialBackoff(Seconds(1), Milliseconds::max());
-bool shouldStopInsertingDonorStateDoc(Status status) {
- return status.isOK() || status == ErrorCodes::ConflictingOperationInProgress;
-}
-
-void setStateDocTimestamps(WithLock,
- ShardSplitDonorStateEnum nextState,
- repl::OpTime time,
- ShardSplitDonorDocument& stateDoc) {
- switch (nextState) {
- case ShardSplitDonorStateEnum::kUninitialized:
- break;
- case ShardSplitDonorStateEnum::kBlocking:
- stateDoc.setBlockTimestamp(time.getTimestamp());
- break;
- case ShardSplitDonorStateEnum::kAborted:
- stateDoc.setCommitOrAbortOpTime(time);
- break;
- case ShardSplitDonorStateEnum::kCommitted:
- stateDoc.setCommitOrAbortOpTime(time);
- break;
- default:
- MONGO_UNREACHABLE;
- }
-}
-
bool isAbortedDocumentPersistent(WithLock, ShardSplitDonorDocument& stateDoc) {
return !!stateDoc.getAbortReason();
}
-void setMtabToBlockingForTenants(ServiceContext* context,
- OperationContext* opCtx,
- const std::vector<StringData>& tenantIds) {
- // Start blocking writes before getting an oplog slot to guarantee no
- // writes to the tenant's data can commit with a timestamp after the
- // block timestamp.
- for (const auto& tenantId : tenantIds) {
- auto mtab = tenant_migration_access_blocker::getTenantMigrationDonorAccessBlocker(context,
- tenantId);
- invariant(mtab);
- mtab->startBlockingWrites();
-
- opCtx->recoveryUnit()->onRollback([mtab] { mtab->rollBackStartBlocking(); });
- }
-}
-
void checkForTokenInterrupt(const CancellationToken& token) {
uassert(ErrorCodes::CallbackCanceled, "Donor service interrupted", !token.isCanceled());
}
@@ -304,11 +266,14 @@ ShardSplitDonorService::DonorStateMachine::DonorStateMachine(
void ShardSplitDonorService::DonorStateMachine::tryAbort() {
LOGV2(6086502, "Received 'abortShardSplit' command.", "id"_attr = _migrationId);
- stdx::lock_guard<Latch> lg(_mutex);
- _abortRequested = true;
- if (_abortSource) {
- _abortSource->cancel();
+ {
+ stdx::lock_guard<Latch> lg(_mutex);
+ _abortRequested = true;
+ if (_abortSource) {
+ _abortSource->cancel();
+ }
}
+ pauseShardSplitAfterReceivingAbortCmd.pauseWhileSet();
}
void ShardSplitDonorService::DonorStateMachine::tryForget() {
@@ -417,19 +382,16 @@ SemiFuture<void> ShardSplitDonorService::DonorStateMachine::run(
// Note we do not use the abort split token here because the abortShardSplit
// command waits for a decision to be persisted which will not happen if
// inserting the initial state document fails.
- if (MONGO_unlikely(pauseShardSplitBeforeBlockingState.shouldFail())) {
- pauseShardSplitBeforeBlockingState.pauseWhileSet();
- }
- return _enterBlockingOrAbortedState(executor, primaryToken, abortToken);
+ return _enterAbortIndexBuildsOrAbortedState(executor, primaryToken, abortToken);
+ })
+ .then([this, executor, abortToken] {
+ // Start tracking the abortToken for killing operation contexts
+ _cancelableOpCtxFactory.emplace(abortToken, _markKilledExecutor);
+ return _abortIndexBuildsAndEnterBlockingState(executor, abortToken);
})
.then([this, executor, abortToken, criticalSectionTimer] {
criticalSectionTimer->reset();
- checkForTokenInterrupt(abortToken);
- _cancelableOpCtxFactory.emplace(abortToken, _markKilledExecutor);
- _abortIndexBuilds(abortToken);
- })
- .then([this, executor, abortToken] {
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
pauseShardSplitAfterBlocking.pauseWhileSet(opCtx.get());
@@ -440,7 +402,12 @@ SemiFuture<void> ShardSplitDonorService::DonorStateMachine::run(
return _applySplitConfigToDonor(executor, abortToken);
})
.then([this, executor, abortToken] {
- return _waitForRecipientToAcceptSplitAndTriggerElection(executor, abortToken);
+ return _waitForRecipientToAcceptSplit(executor, abortToken);
+ })
+ .then([this, executor, primaryToken] {
+ // only cancel operations on stepdown from here out
+ _cancelableOpCtxFactory.emplace(primaryToken, _markKilledExecutor);
+ return _triggerElectionAndEnterCommitedState(executor, primaryToken);
})
// anchor ensures the instance will still exists even if the primary stepped down
.onCompletion([this,
@@ -572,6 +539,143 @@ bool ShardSplitDonorService::DonorStateMachine::_hasInstalledSplitConfig(WithLoc
config.getRecipientConfig()->getReplSetName() == *_stateDoc.getRecipientSetName();
}
+ConnectionString ShardSplitDonorService::DonorStateMachine::_setupAcceptanceMonitoring(
+ WithLock lock, const CancellationToken& abortToken) {
+ auto recipientConnectionString = [stateDoc = _stateDoc]() {
+ if (stateDoc.getRecipientConnectionString()) {
+ return *stateDoc.getRecipientConnectionString();
+ }
+
+ auto recipientTagName = stateDoc.getRecipientTagName();
+ invariant(recipientTagName);
+ auto recipientSetName = stateDoc.getRecipientSetName();
+ invariant(recipientSetName);
+ auto config = repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
+ return serverless::makeRecipientConnectionString(
+ config, *recipientTagName, *recipientSetName);
+ }();
+
+ // Always start the replica set monitor if we haven't reached a decision yet
+ _splitAcceptancePromise.setWith([&]() -> Future<void> {
+ if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking ||
+ MONGO_unlikely(skipShardSplitWaitForSplitAcceptance.shouldFail())) {
+ return SemiFuture<void>::makeReady().unsafeToInlineFuture();
+ }
+
+ // Optionally select a task executor for unit testing
+ auto executor = _splitAcceptanceTaskExecutorForTest
+ ? *_splitAcceptanceTaskExecutorForTest
+ : _shardSplitService->getInstanceCleanupExecutor();
+
+ LOGV2(6142508,
+ "Monitoring recipient nodes for split acceptance.",
+ "id"_attr = _migrationId,
+ "recipientConnectionString"_attr = recipientConnectionString);
+
+ return detail::makeRecipientAcceptSplitFuture(
+ executor, abortToken, recipientConnectionString, _migrationId)
+ .unsafeToInlineFuture();
+ });
+
+ return recipientConnectionString;
+}
+
+ExecutorFuture<void>
+ShardSplitDonorService::DonorStateMachine::_enterAbortIndexBuildsOrAbortedState(
+ const ScopedTaskExecutorPtr& executor,
+ const CancellationToken& primaryToken,
+ const CancellationToken& abortToken) {
+ ShardSplitDonorStateEnum nextState;
+ {
+ stdx::lock_guard<Latch> lg(_mutex);
+ if (_stateDoc.getState() == ShardSplitDonorStateEnum::kAborted) {
+ if (isAbortedDocumentPersistent(lg, _stateDoc)) {
+ // Node has step up and created an instance using a document in abort state. No
+ // need to write the document as it already exists.
+ return ExecutorFuture(**executor);
+ }
+
+ _abortReason =
+ Status(ErrorCodes::TenantMigrationAborted, "Aborted due to 'abortShardSplit'.");
+ BSONObjBuilder bob;
+ _abortReason->serializeErrorToBSON(&bob);
+ _stateDoc.setAbortReason(bob.obj());
+ _stateDoc.setExpireAt(_serviceContext->getFastClockSource()->now() +
+ Milliseconds{repl::shardSplitGarbageCollectionDelayMS.load()});
+ nextState = ShardSplitDonorStateEnum::kAborted;
+
+ LOGV2(6670500, "Entering 'aborted' state.", "id"_attr = _stateDoc.getId());
+ } else {
+ // Always set up acceptance monitoring.
+ auto recipientConnectionString = _setupAcceptanceMonitoring(lg, abortToken);
+
+ if (_stateDoc.getState() > ShardSplitDonorStateEnum::kUninitialized) {
+ // Node has stepped up and resumed a shard split. No need to write the document as
+ // it already exists.
+ return ExecutorFuture(**executor);
+ }
+
+ _stateDoc.setRecipientConnectionString(recipientConnectionString);
+ nextState = ShardSplitDonorStateEnum::kAbortingIndexBuilds;
+
+ LOGV2(
+ 6670501, "Entering 'aborting index builds' state.", "id"_attr = _stateDoc.getId());
+ }
+ }
+
+ return _updateStateDocument(executor, primaryToken, nextState)
+ .then([this, executor, primaryToken](repl::OpTime opTime) {
+ return _waitForMajorityWriteConcern(executor, std::move(opTime), primaryToken);
+ })
+ .then([this, executor, nextState]() {
+ uassert(ErrorCodes::TenantMigrationAborted,
+ "Shard split operation aborted.",
+ nextState != ShardSplitDonorStateEnum::kAborted);
+ });
+}
+
+ExecutorFuture<void>
+ShardSplitDonorService::DonorStateMachine::_abortIndexBuildsAndEnterBlockingState(
+ const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken) {
+ checkForTokenInterrupt(abortToken);
+
+ boost::optional<std::vector<StringData>> tenantIds;
+ {
+ stdx::lock_guard<Latch> lg(_mutex);
+ if (_stateDoc.getState() > ShardSplitDonorStateEnum::kAbortingIndexBuilds) {
+ return ExecutorFuture(**executor);
+ }
+
+ tenantIds = _stateDoc.getTenantIds();
+ invariant(tenantIds);
+ }
+
+ LOGV2(6436100, "Aborting index builds for shard split.", "id"_attr = _migrationId);
+
+ // Abort any in-progress index builds. No new index builds can start while we are doing this
+ // because the mtab prevents it.
+ auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
+ auto* indexBuildsCoordinator = IndexBuildsCoordinator::get(opCtx.get());
+ for (const auto& tenantId : *tenantIds) {
+ indexBuildsCoordinator->abortTenantIndexBuilds(
+ opCtx.get(), MigrationProtocolEnum::kMultitenantMigrations, tenantId, "shard split");
+ }
+
+ if (MONGO_unlikely(pauseShardSplitBeforeBlockingState.shouldFail())) {
+ pauseShardSplitBeforeBlockingState.pauseWhileSet();
+ }
+
+ {
+ stdx::lock_guard<Latch> lg(_mutex);
+ LOGV2(8423358, "Entering 'blocking' state.", "id"_attr = _stateDoc.getId());
+ }
+
+ return _updateStateDocument(executor, abortToken, ShardSplitDonorStateEnum::kBlocking)
+ .then([this, self = shared_from_this(), executor, abortToken](repl::OpTime opTime) {
+ return _waitForMajorityWriteConcern(executor, std::move(opTime), abortToken);
+ });
+}
+
ExecutorFuture<void>
ShardSplitDonorService::DonorStateMachine::_waitForRecipientToReachBlockTimestamp(
const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken) {
@@ -661,7 +765,7 @@ ExecutorFuture<void> sendStepUpToRecipient(const HostAndPort recipient,
return AsyncTry([executor, recipient, token] {
executor::RemoteCommandRequest request(
recipient, "admin", BSON("replSetStepUp" << 1 << "skipDryRun" << true), nullptr);
-
+ pauseShardSplitBeforeSendingStepUpToRecipients.pauseWhileSet();
return executor->scheduleRemoteCommand(request, token)
.then([](const auto& response) {
return getStatusFromCommandResult(response.data);
@@ -676,10 +780,26 @@ ExecutorFuture<void> sendStepUpToRecipient(const HostAndPort recipient,
.on(executor, token);
}
-ExecutorFuture<void>
-ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTriggerElection(
+ExecutorFuture<void> ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplit(
const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken) {
+
checkForTokenInterrupt(abortToken);
+ {
+ stdx::lock_guard<Latch> lg(_mutex);
+ if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking) {
+ return ExecutorFuture(**executor);
+ }
+ }
+
+ LOGV2(6142501, "Waiting for recipient to accept the split.", "id"_attr = _migrationId);
+
+ return ExecutorFuture(**executor).then([&]() { return _splitAcceptancePromise.getFuture(); });
+}
+
+ExecutorFuture<void>
+ShardSplitDonorService::DonorStateMachine::_triggerElectionAndEnterCommitedState(
+ const ScopedTaskExecutorPtr& executor, const CancellationToken& primaryToken) {
+ checkForTokenInterrupt(primaryToken);
std::vector<HostAndPort> recipients;
{
@@ -699,10 +819,7 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
auto remoteCommandExecutor =
_splitAcceptanceTaskExecutorForTest ? *_splitAcceptanceTaskExecutorForTest : **executor;
- LOGV2(6142501, "Waiting for recipient to accept the split.", "id"_attr = _migrationId);
-
return ExecutorFuture(**executor)
- .then([&]() { return _splitAcceptancePromise.getFuture(); })
.then([this] {
auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
if (MONGO_unlikely(pauseShardSplitBeforeLeavingBlockingState.shouldFail())) {
@@ -723,7 +840,7 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
uasserted(ErrorCodes::InternalError, "simulate a shard split error");
}
})
- .then([this, recipients, abortToken, remoteCommandExecutor] {
+ .then([this, recipients, primaryToken, remoteCommandExecutor] {
LOGV2(6493901,
"Triggering an election after recipient has accepted the split.",
"id"_attr = _migrationId);
@@ -733,14 +850,16 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
// succeed). Selecting a random node has a 2/3 chance to succeed for replSetStepUp. If
// the first command fail, we know this node is the most out-of-date. Therefore we
// select the next node and we know the first node selected will vote for the second.
- return sendStepUpToRecipient(recipients[0], remoteCommandExecutor, abortToken)
- .onCompletion([this, recipients, remoteCommandExecutor, abortToken](Status status) {
- if (status.isOK()) {
- return ExecutorFuture<void>(remoteCommandExecutor, status);
- }
+ return sendStepUpToRecipient(recipients[0], remoteCommandExecutor, primaryToken)
+ .onCompletion(
+ [this, recipients, remoteCommandExecutor, primaryToken](Status status) {
+ if (status.isOK()) {
+ return ExecutorFuture<void>(remoteCommandExecutor, status);
+ }
- return sendStepUpToRecipient(recipients[1], remoteCommandExecutor, abortToken);
- })
+ return sendStepUpToRecipient(
+ recipients[1], remoteCommandExecutor, primaryToken);
+ })
.onCompletion([this](Status replSetStepUpStatus) {
if (!replSetStepUpStatus.isOK()) {
LOGV2(6493904,
@@ -756,180 +875,93 @@ ShardSplitDonorService::DonorStateMachine::_waitForRecipientToAcceptSplitAndTrig
});
})
.thenRunOn(**executor)
- .then([this, executor, abortToken]() {
+ .then([this, executor, primaryToken]() {
LOGV2(6142503, "Entering 'committed' state.", "id"_attr = _stateDoc.getId());
+ auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
+ pauseShardSplitAfterUpdatingToCommittedState.pauseWhileSet(opCtx.get());
- return _updateStateDocument(executor, abortToken, ShardSplitDonorStateEnum::kCommitted)
- .then([this, executor, abortToken](repl::OpTime opTime) {
- return _waitForMajorityWriteConcern(executor, std::move(opTime), abortToken);
+ return _updateStateDocument(
+ executor, primaryToken, ShardSplitDonorStateEnum::kCommitted)
+ .then([this, executor, primaryToken](repl::OpTime opTime) {
+ return _waitForMajorityWriteConcern(executor, std::move(opTime), primaryToken);
});
});
}
-ExecutorFuture<void> ShardSplitDonorService::DonorStateMachine::_enterBlockingOrAbortedState(
- const ScopedTaskExecutorPtr& executor,
- const CancellationToken& primaryToken,
- const CancellationToken& abortToken) {
- ShardSplitDonorStateEnum nextState;
- {
- stdx::lock_guard<Latch> lg(_mutex);
- if (_stateDoc.getState() == ShardSplitDonorStateEnum::kAborted) {
- if (isAbortedDocumentPersistent(lg, _stateDoc)) {
- // Node has step up and created an instance using a document in abort state. No
- // need to write the document as it already exists.
- return ExecutorFuture(**executor);
- }
-
- _abortReason =
- Status(ErrorCodes::TenantMigrationAborted, "Aborted due to 'abortShardSplit'.");
- BSONObjBuilder bob;
- _abortReason->serializeErrorToBSON(&bob);
- _stateDoc.setAbortReason(bob.obj());
- _stateDoc.setExpireAt(_serviceContext->getFastClockSource()->now() +
- Milliseconds{repl::shardSplitGarbageCollectionDelayMS.load()});
- nextState = ShardSplitDonorStateEnum::kAborted;
-
- LOGV2(8423355, "Entering 'aborted' state.", "id"_attr = _stateDoc.getId());
- } else {
- auto recipientConnectionString = [stateDoc = _stateDoc]() {
- if (stateDoc.getRecipientConnectionString()) {
- return *stateDoc.getRecipientConnectionString();
- }
-
- auto recipientTagName = stateDoc.getRecipientTagName();
- invariant(recipientTagName);
- auto recipientSetName = stateDoc.getRecipientSetName();
- invariant(recipientSetName);
- auto config =
- repl::ReplicationCoordinator::get(cc().getServiceContext())->getConfig();
- return serverless::makeRecipientConnectionString(
- config, *recipientTagName, *recipientSetName);
- }();
-
- // Always start the replica set monitor if we haven't reached a decision yet
- _splitAcceptancePromise.setWith([&]() -> Future<void> {
- if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking ||
- MONGO_unlikely(skipShardSplitWaitForSplitAcceptance.shouldFail())) {
- return SemiFuture<void>::makeReady().unsafeToInlineFuture();
- }
-
- // Optionally select a task executor for unit testing
- auto executor = _splitAcceptanceTaskExecutorForTest
- ? *_splitAcceptanceTaskExecutorForTest
- : _shardSplitService->getInstanceCleanupExecutor();
-
- LOGV2(6142508,
- "Monitoring recipient nodes for split acceptance.",
- "id"_attr = _migrationId,
- "recipientConnectionString"_attr = recipientConnectionString);
-
- return detail::makeRecipientAcceptSplitFuture(
- executor, abortToken, recipientConnectionString, _migrationId)
- .unsafeToInlineFuture();
- });
-
- if (_stateDoc.getState() > ShardSplitDonorStateEnum::kUninitialized) {
- // Node has step up and resumed a shard split. No need to write the document as
- // it already exists.
- return ExecutorFuture(**executor);
- }
-
- // Otherwise, record the recipient connection string
- _stateDoc.setRecipientConnectionString(recipientConnectionString);
- _stateDoc.setState(ShardSplitDonorStateEnum::kBlocking);
- nextState = ShardSplitDonorStateEnum::kBlocking;
-
- LOGV2(8423358, "Entering 'blocking' state.", "id"_attr = _stateDoc.getId());
- }
- }
-
- return AsyncTry([this, nextState, uuid = _migrationId]() {
- auto opCtxHolder = _cancelableOpCtxFactory->makeOperationContext(&cc());
- auto opCtx = opCtxHolder.get();
-
- AutoGetCollection collection(opCtx, _stateDocumentsNS, MODE_IX);
-
- writeConflictRetry(
- opCtx, "ShardSplitDonorInsertStateDoc", _stateDocumentsNS.ns(), [&] {
- const auto filter = BSON(ShardSplitDonorDocument::kIdFieldName << uuid);
- const auto getUpdatedStateDocBson = [&]() {
- stdx::lock_guard<Latch> lg(_mutex);
- return _stateDoc.toBSON();
- };
-
- WriteUnitOfWork wuow(opCtx);
- if (nextState == ShardSplitDonorStateEnum::kBlocking) {
- stdx::lock_guard<Latch> lg(_mutex);
-
- insertTenantAccessBlocker(lg, opCtx, _stateDoc);
-
- auto tenantIds = _stateDoc.getTenantIds();
- invariant(tenantIds);
- setMtabToBlockingForTenants(_serviceContext, opCtx, tenantIds.get());
- }
-
- // Reserve an opTime for the write.
- auto oplogSlot = LocalOplogInfo::get(opCtx)->getNextOpTimes(opCtx, 1U)[0];
- setStateDocTimestamps(
- stdx::lock_guard<Latch>{_mutex}, nextState, oplogSlot, _stateDoc);
-
- auto updateResult = Helpers::upsert(opCtx,
- _stateDocumentsNS.ns(),
- filter,
- getUpdatedStateDocBson(),
- /*fromMigrate=*/false);
-
-
- // We only want to insert, not modify, document
- invariant(updateResult.numMatched == 0);
- wuow.commit();
- });
-
- return repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
- })
- .until([](StatusWith<repl::OpTime> swOpTime) {
- return shouldStopInsertingDonorStateDoc(swOpTime.getStatus());
- })
- .withBackoffBetweenIterations(kExponentialBackoff)
- .on(**executor, primaryToken)
- .then([this, executor, primaryToken](repl::OpTime opTime) {
- return _waitForMajorityWriteConcern(executor, std::move(opTime), primaryToken);
- })
- .then([this, executor, nextState]() {
- uassert(ErrorCodes::TenantMigrationAborted,
- "Shard split operation aborted.",
- nextState != ShardSplitDonorStateEnum::kAborted);
- });
-}
-
ExecutorFuture<repl::OpTime> ShardSplitDonorService::DonorStateMachine::_updateStateDocument(
const ScopedTaskExecutorPtr& executor,
const CancellationToken& token,
ShardSplitDonorStateEnum nextState) {
- auto tenantIds = [&]() {
+ auto [tenantIds, isInsert] = [&]() {
stdx::lock_guard<Latch> lg(_mutex);
- _stateDoc.setState(nextState);
-
- return _stateDoc.getTenantIds();
+ auto isInsert = _stateDoc.getState() == ShardSplitDonorStateEnum::kUninitialized ||
+ _stateDoc.getState() == ShardSplitDonorStateEnum::kAborted;
+ return std::make_pair(_stateDoc.getTenantIds(), isInsert);
}();
- return AsyncTry([this, tenantIds = std::move(tenantIds), uuid = _migrationId, nextState] {
+ return AsyncTry([this,
+ tenantIds = std::move(tenantIds),
+ isInsert = isInsert,
+ uuid = _migrationId,
+ nextState] {
auto opCtxHolder = _cancelableOpCtxFactory->makeOperationContext(&cc());
auto opCtx = opCtxHolder.get();
AutoGetCollection collection(opCtx, _stateDocumentsNS, MODE_IX);
- uassert(ErrorCodes::NamespaceNotFound,
- str::stream() << _stateDocumentsNS.ns() << " does not exist",
- collection);
+
+ if (!isInsert) {
+ uassert(ErrorCodes::NamespaceNotFound,
+ str::stream() << _stateDocumentsNS.ns() << " does not exist",
+ collection);
+ }
writeConflictRetry(
- opCtx, "ShardSplitDonorUpdateStateDoc", _stateDocumentsNS.ns(), [&] {
+ opCtx, "ShardSplitDonorUpdateStateDoc", _stateDocumentsNS.ns(), [&]() {
WriteUnitOfWork wuow(opCtx);
+ if (nextState == ShardSplitDonorStateEnum::kBlocking) {
+ // Start blocking writes before getting an oplog slot to guarantee no
+ // writes to the tenant's data can commit with a timestamp after the
+ // block timestamp.
+ for (const auto& tenantId : *tenantIds) {
+ auto mtab = tenant_migration_access_blocker::
+ getTenantMigrationDonorAccessBlocker(_serviceContext, tenantId);
+ invariant(mtab);
+ mtab->startBlockingWrites();
+
+ opCtx->recoveryUnit()->onRollback(
+ [mtab] { mtab->rollBackStartBlocking(); });
+ }
+ }
+
// Reserve an opTime for the write.
auto oplogSlot = LocalOplogInfo::get(opCtx)->getNextOpTimes(opCtx, 1U)[0];
- setStateDocTimestamps(
- stdx::lock_guard<Latch>{_mutex}, nextState, oplogSlot, _stateDoc);
+ {
+ stdx::lock_guard<Latch> lg(_mutex);
+ _stateDoc.setState(nextState);
+ switch (nextState) {
+ case ShardSplitDonorStateEnum::kUninitialized:
+ case ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+ break;
+ case ShardSplitDonorStateEnum::kBlocking:
+ _stateDoc.setBlockTimestamp(oplogSlot.getTimestamp());
+ break;
+ case ShardSplitDonorStateEnum::kCommitted:
+ _stateDoc.setCommitOrAbortOpTime(oplogSlot);
+ break;
+ case ShardSplitDonorStateEnum::kAborted: {
+ _stateDoc.setCommitOrAbortOpTime(oplogSlot);
+
+ invariant(_abortReason);
+ BSONObjBuilder bob;
+ _abortReason.get().serializeErrorToBSON(&bob);
+ _stateDoc.setAbortReason(bob.obj());
+ break;
+ }
+ default:
+ MONGO_UNREACHABLE;
+ }
+ }
const auto filter = BSON(ShardSplitDonorDocument::kIdFieldName << uuid);
const auto updatedStateDocBson = [&]() {
@@ -942,15 +974,19 @@ ExecutorFuture<repl::OpTime> ShardSplitDonorService::DonorStateMachine::_updateS
updatedStateDocBson,
/*fromMigrate=*/false);
- invariant(updateResult.numDocsModified == 1);
+ if (isInsert) {
+ invariant(!updateResult.existing);
+ invariant(!updateResult.upsertedId.isEmpty());
+ } else {
+ invariant(updateResult.numDocsModified == 1);
+ }
+
wuow.commit();
});
return repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp();
})
- .until([](StatusWith<repl::OpTime> swOpTime) {
- return shouldStopInsertingDonorStateDoc(swOpTime.getStatus());
- })
+ .until([](StatusWith<repl::OpTime> swOpTime) { return swOpTime.getStatus().isOK(); })
.withBackoffBetweenIterations(kExponentialBackoff)
.on(**executor, token);
}
@@ -1148,30 +1184,4 @@ ExecutorFuture<void> ShardSplitDonorService::DonorStateMachine::_cleanRecipientS
.on(**executor, primaryToken)
.ignoreValue();
}
-
-void ShardSplitDonorService::DonorStateMachine::_abortIndexBuilds(
- const CancellationToken& abortToken) {
- checkForTokenInterrupt(abortToken);
-
- boost::optional<std::vector<StringData>> tenantIds;
- {
- stdx::lock_guard<Latch> lg(_mutex);
- if (_stateDoc.getState() > ShardSplitDonorStateEnum::kBlocking) {
- return;
- }
- tenantIds = _stateDoc.getTenantIds();
- invariant(tenantIds);
- }
-
- LOGV2(6436100, "Aborting index build for shard split.", "id"_attr = _migrationId);
-
- // Before applying the split config, abort any in-progress index builds. No new index builds
- // can start while we are doing this because the mtab prevents it.
- auto opCtx = _cancelableOpCtxFactory->makeOperationContext(&cc());
- auto* indexBuildsCoordinator = IndexBuildsCoordinator::get(opCtx.get());
- for (const auto& tenantId : *tenantIds) {
- indexBuildsCoordinator->abortTenantIndexBuilds(
- opCtx.get(), MigrationProtocolEnum::kMultitenantMigrations, tenantId, "shard split");
- }
-}
} // namespace mongo
diff --git a/src/mongo/db/serverless/shard_split_donor_service.h b/src/mongo/db/serverless/shard_split_donor_service.h
index 9c6c3645de2..bf1548527dc 100644
--- a/src/mongo/db/serverless/shard_split_donor_service.h
+++ b/src/mongo/db/serverless/shard_split_donor_service.h
@@ -56,7 +56,7 @@ public:
}
NamespaceString getStateDocumentsNS() const override {
- return NamespaceString::kTenantSplitDonorsNamespace;
+ return NamespaceString::kShardSplitDonorsNamespace;
}
ThreadPool::Limits getThreadPoolLimits() const override;
@@ -156,9 +156,12 @@ public:
private:
// Tasks
- ExecutorFuture<void> _enterBlockingOrAbortedState(const ScopedTaskExecutorPtr& executor,
- const CancellationToken& primaryToken,
- const CancellationToken& abortToken);
+ ExecutorFuture<void> _enterAbortIndexBuildsOrAbortedState(const ScopedTaskExecutorPtr& executor,
+ const CancellationToken& primaryToken,
+ const CancellationToken& abortToken);
+
+ ExecutorFuture<void> _abortIndexBuildsAndEnterBlockingState(
+ const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken);
ExecutorFuture<void> _waitForRecipientToReachBlockTimestamp(
const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken);
@@ -166,8 +169,11 @@ private:
ExecutorFuture<void> _applySplitConfigToDonor(const ScopedTaskExecutorPtr& executor,
const CancellationToken& abortToken);
- ExecutorFuture<void> _waitForRecipientToAcceptSplitAndTriggerElection(
- const ScopedTaskExecutorPtr& executor, const CancellationToken& abortToken);
+ ExecutorFuture<void> _waitForRecipientToAcceptSplit(const ScopedTaskExecutorPtr& executor,
+ const CancellationToken& primaryToken);
+
+ ExecutorFuture<void> _triggerElectionAndEnterCommitedState(
+ const ScopedTaskExecutorPtr& executor, const CancellationToken& primaryToken);
ExecutorFuture<void> _waitForForgetCmdThenMarkGarbageCollectable(
const ScopedTaskExecutorPtr& executor, const CancellationToken& primaryToken);
@@ -192,7 +198,7 @@ private:
void _initiateTimeout(const ScopedTaskExecutorPtr& executor,
const CancellationToken& abortToken);
-
+ ConnectionString _setupAcceptanceMonitoring(WithLock lock, const CancellationToken& abortToken);
bool _hasInstalledSplitConfig(WithLock lock);
/*
@@ -202,10 +208,8 @@ private:
ExecutorFuture<void> _cleanRecipientStateDoc(const ScopedTaskExecutorPtr& executor,
const CancellationToken& token);
- void _abortIndexBuilds(const CancellationToken& abortToken);
-
private:
- const NamespaceString _stateDocumentsNS = NamespaceString::kTenantSplitDonorsNamespace;
+ const NamespaceString _stateDocumentsNS = NamespaceString::kShardSplitDonorsNamespace;
mutable Mutex _mutex = MONGO_MAKE_LATCH("ShardSplitDonorService::_mutex");
const UUID _migrationId;
diff --git a/src/mongo/db/serverless/shard_split_donor_service_test.cpp b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
index f061e686c13..5824029d097 100644
--- a/src/mongo/db/serverless/shard_split_donor_service_test.cpp
+++ b/src/mongo/db/serverless/shard_split_donor_service_test.cpp
@@ -90,11 +90,11 @@ StatusWith<ShardSplitDonorDocument> getStateDocument(OperationContext* opCtx,
const UUID& shardSplitId) {
// Use kLastApplied so that we can read the state document as a secondary.
ReadSourceScope readSourceScope(opCtx, RecoveryUnit::ReadSource::kLastApplied);
- AutoGetCollectionForRead collection(opCtx, NamespaceString::kTenantSplitDonorsNamespace);
+ AutoGetCollectionForRead collection(opCtx, NamespaceString::kShardSplitDonorsNamespace);
if (!collection) {
return Status(ErrorCodes::NamespaceNotFound,
str::stream() << "Collection not found looking for state document: "
- << NamespaceString::kTenantSplitDonorsNamespace.ns());
+ << NamespaceString::kShardSplitDonorsNamespace.ns());
}
BSONObj result;
@@ -192,6 +192,9 @@ std::ostringstream& operator<<(std::ostringstream& builder,
case mongo::ShardSplitDonorStateEnum::kUninitialized:
builder << "kUninitialized";
break;
+ case mongo::ShardSplitDonorStateEnum::kAbortingIndexBuilds:
+ builder << "kAbortingIndexBuilds";
+ break;
case mongo::ShardSplitDonorStateEnum::kAborted:
builder << "kAborted";
break;
@@ -348,8 +351,7 @@ public:
// The database needs to be open before using shard split donor service.
{
auto opCtx = cc().makeOperationContext();
- AutoGetDb autoDb(
- opCtx.get(), NamespaceString::kTenantSplitDonorsNamespace.db(), MODE_X);
+ AutoGetDb autoDb(opCtx.get(), NamespaceString::kShardSplitDonorsNamespace.db(), MODE_X);
auto db = autoDb.ensureDbExists(opCtx.get());
ASSERT_TRUE(db);
}
@@ -484,18 +486,14 @@ TEST_F(ShardSplitDonorServiceTest, BasicShardSplitDonorServiceInstanceCreation)
ASSERT_EQ(_uuid, serviceInstance->getId());
waitForMonitorAndProcessHello();
-
waitForReplSetStepUp(Status(ErrorCodes::OK, ""));
auto result = serviceInstance->decisionFuture().get();
-
ASSERT_TRUE(hasActiveSplitForTenants(opCtx.get(), _tenantIds));
-
ASSERT(!result.abortReason);
ASSERT_EQ(result.state, mongo::ShardSplitDonorStateEnum::kCommitted);
serviceInstance->tryForget();
-
auto completionFuture = serviceInstance->completionFuture();
completionFuture.wait();
@@ -692,7 +690,7 @@ TEST_F(ShardSplitDonorServiceTest, ReconfigToRemoveSplitConfig) {
}
// Abort scenario : abortSplit called before startSplit.
-TEST_F(ShardSplitDonorServiceTest, CreateInstanceInAbortState) {
+TEST_F(ShardSplitDonorServiceTest, CreateInstanceInAbortedState) {
auto opCtx = makeOperationContext();
auto serviceContext = getServiceContext();
@@ -1067,4 +1065,49 @@ TEST_F(ShardSplitRecipientCleanupTest, ShardSplitRecipientCleanup) {
ErrorCodes::NoMatchingDocument);
}
+class ShardSplitAbortedStepUpTest : public ShardSplitPersistenceTest {
+public:
+ repl::ReplSetConfig initialDonorConfig() override {
+ BSONArrayBuilder members;
+ members.append(BSON("_id" << 1 << "host"
+ << "node1"));
+
+ return repl::ReplSetConfig::parse(BSON("_id"
+ << "donorSetName"
+ << "version" << 1 << "protocolVersion" << 1
+ << "members" << members.arr()));
+ }
+
+ ShardSplitDonorDocument initialStateDocument() override {
+
+ auto stateDocument = defaultStateDocument();
+
+ stateDocument.setState(mongo::ShardSplitDonorStateEnum::kAborted);
+ stateDocument.setBlockTimestamp(Timestamp(1, 1));
+ stateDocument.setCommitOrAbortOpTime(repl::OpTime(Timestamp(1, 1), 1));
+
+ Status status(ErrorCodes::InternalError, abortReason);
+ BSONObjBuilder bob;
+ status.serializeErrorToBSON(&bob);
+ stateDocument.setAbortReason(bob.obj());
+
+ return stateDocument;
+ }
+
+ std::string abortReason{"Testing simulated error"};
+};
+
+TEST_F(ShardSplitAbortedStepUpTest, ShardSplitAbortedStepUp) {
+ auto opCtx = makeOperationContext();
+ auto splitService = repl::PrimaryOnlyServiceRegistry::get(opCtx->getServiceContext())
+ ->lookupServiceByName(ShardSplitDonorService::kServiceName);
+ auto optionalDonor = ShardSplitDonorService::DonorStateMachine::lookup(
+ opCtx.get(), splitService, BSON("_id" << _uuid));
+
+ ASSERT(optionalDonor);
+ auto result = optionalDonor->get()->decisionFuture().get();
+
+ ASSERT_EQ(result.state, mongo::ShardSplitDonorStateEnum::kAborted);
+}
+
} // namespace mongo
diff --git a/src/mongo/db/serverless/shard_split_state_machine.idl b/src/mongo/db/serverless/shard_split_state_machine.idl
index 8aa65017c1b..ee3462f5a05 100644
--- a/src/mongo/db/serverless/shard_split_state_machine.idl
+++ b/src/mongo/db/serverless/shard_split_state_machine.idl
@@ -40,6 +40,7 @@ enums:
type: string
values:
kUninitialized: "uninitialized"
+ kAbortingIndexBuilds: "aborting index builds"
kBlocking: "blocking"
kCommitted: "committed"
kAborted: "aborted"
diff --git a/src/mongo/db/serverless/shard_split_utils.cpp b/src/mongo/db/serverless/shard_split_utils.cpp
index b9bb407220d..041c133b02b 100644
--- a/src/mongo/db/serverless/shard_split_utils.cpp
+++ b/src/mongo/db/serverless/shard_split_utils.cpp
@@ -149,7 +149,7 @@ repl::ReplSetConfig makeSplitConfig(const repl::ReplSetConfig& config,
}
Status insertStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& stateDoc) {
- const auto nss = NamespaceString::kTenantSplitDonorsNamespace;
+ const auto nss = NamespaceString::kShardSplitDonorsNamespace;
AutoGetCollection collection(opCtx, nss, MODE_IX);
uassert(ErrorCodes::PrimarySteppedDown,
@@ -176,7 +176,7 @@ Status insertStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& st
}
Status updateStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& stateDoc) {
- const auto nss = NamespaceString::kTenantSplitDonorsNamespace;
+ const auto nss = NamespaceString::kShardSplitDonorsNamespace;
AutoGetCollection collection(opCtx, nss, MODE_IX);
if (!collection) {
@@ -198,7 +198,7 @@ Status updateStateDoc(OperationContext* opCtx, const ShardSplitDonorDocument& st
}
StatusWith<bool> deleteStateDoc(OperationContext* opCtx, const UUID& shardSplitId) {
- const auto nss = NamespaceString::kTenantSplitDonorsNamespace;
+ const auto nss = NamespaceString::kShardSplitDonorsNamespace;
AutoGetCollection collection(opCtx, nss, MODE_IX);
if (!collection) {
diff --git a/src/mongo/db/serverless/shard_split_utils.h b/src/mongo/db/serverless/shard_split_utils.h
index b58f24b5a1a..2d9ab8402e7 100644
--- a/src/mongo/db/serverless/shard_split_utils.h
+++ b/src/mongo/db/serverless/shard_split_utils.h
@@ -64,7 +64,7 @@ repl::ReplSetConfig makeSplitConfig(const repl::ReplSetConfig& config,
/**
* Inserts the shard split state document 'stateDoc' into
- * 'config.tenantSplitDonors' collection. Also, creates the collection if not present
+ * 'config.shardSplitDonors' collection. Also, creates the collection if not present
* before inserting the document.
*
* NOTE: A state doc might get inserted based on a decision made out of a stale read within a
diff --git a/src/mongo/db/service_context_d_test_fixture.cpp b/src/mongo/db/service_context_d_test_fixture.cpp
index a6d84a4cd40..9eda18d4812 100644
--- a/src/mongo/db/service_context_d_test_fixture.cpp
+++ b/src/mongo/db/service_context_d_test_fixture.cpp
@@ -124,6 +124,8 @@ ServiceContextMongoDTest::ServiceContextMongoDTest(Options options)
storageGlobalParams.dbpath = _tempDir.path();
+ storageGlobalParams.ephemeral = options._ephemeral;
+
// Since unit tests start in their own directories, by default skip lock file and metadata file
// for faster startup.
auto opCtx = serviceContext->makeOperationContext(getClient());
diff --git a/src/mongo/db/service_context_d_test_fixture.h b/src/mongo/db/service_context_d_test_fixture.h
index 8b21eaf0b2e..3973a344ffd 100644
--- a/src/mongo/db/service_context_d_test_fixture.h
+++ b/src/mongo/db/service_context_d_test_fixture.h
@@ -82,8 +82,16 @@ protected:
return std::move(*this);
}
+ Options ephemeral(bool ephemeral) {
+ _ephemeral = ephemeral;
+ return std::move(*this);
+ }
+
private:
std::string _engine = "wiredTiger";
+ // We use ephemeral instances by default to advise Storage Engines (in particular
+ // WiredTiger) not to perform Disk I/O.
+ bool _ephemeral = true;
RepairAction _repair = RepairAction::kNoRepair;
StorageEngineInitFlags _initFlags = kDefaultStorageEngineInitFlags;
bool _useReplSettings = false;
diff --git a/src/mongo/db/service_entry_point_common.cpp b/src/mongo/db/service_entry_point_common.cpp
index 7c0c30e58e7..bc167fba693 100644
--- a/src/mongo/db/service_entry_point_common.cpp
+++ b/src/mongo/db/service_entry_point_common.cpp
@@ -41,7 +41,7 @@
#include "mongo/db/auth/authorization_session.h"
#include "mongo/db/auth/impersonation_session.h"
#include "mongo/db/auth/ldap_cumulative_operation_stats.h"
-#include "mongo/db/auth/security_token.h"
+#include "mongo/db/auth/security_token_authentication_guard.h"
#include "mongo/db/client.h"
#include "mongo/db/command_can_run_here.h"
#include "mongo/db/commands.h"
@@ -654,6 +654,7 @@ private:
CommandHelpers::uassertShouldAttemptParse(opCtx, command, request);
_startOperationTime = getClientOperationTime(opCtx);
+ rpc::readRequestMetadata(opCtx, request, command->requiresAuth());
_invocation = command->parse(opCtx, request);
CommandInvocation::set(opCtx, _invocation);
@@ -1244,7 +1245,7 @@ Future<void> RunCommandImpl::_runImpl() {
Future<void> RunCommandImpl::_runCommand() {
auto shouldCheckoutSession = _ecd->getSessionOptions().getTxnNumber() &&
- !shouldCommandSkipSessionCheckout(_ecd->getInvocation()->definition()->getName());
+ _ecd->getInvocation()->definition()->shouldCheckoutSession();
if (shouldCheckoutSession) {
return future_util::makeState<CheckoutSessionAndInvokeCommand>(_ecd).thenWithState(
[](auto* path) { return path->run(); });
@@ -1276,8 +1277,7 @@ void RunCommandAndWaitForWriteConcern::_waitForWriteConcern(BSONObjBuilder& bb)
}
CurOp::get(opCtx)->debug().writeConcern.emplace(opCtx->getWriteConcern());
- _execContext->behaviors->waitForWriteConcern(
- opCtx, invocation, repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(), bb);
+ _execContext->behaviors->waitForWriteConcern(opCtx, invocation, _ecd->getLastOpBeforeRun(), bb);
}
Future<void> RunCommandAndWaitForWriteConcern::_runImpl() {
@@ -1309,7 +1309,7 @@ void RunCommandAndWaitForWriteConcern::_setup() {
// server defaults. So, warn if the operation has not specified writeConcern and is on
// a shard/config server.
if (!opCtx->getClient()->isInDirectClient() &&
- (!opCtx->inMultiDocumentTransaction() || isTransactionCommand(command->getName()))) {
+ (!opCtx->inMultiDocumentTransaction() || command->isTransactionCommand())) {
if (_isInternalClient()) {
// WriteConcern should always be explicitly specified by operations received
// from internal clients (ie. from a mongos or mongod), even if it is empty
@@ -1406,6 +1406,14 @@ void ExecCommandDatabase::_initiateCommand() {
Client* client = opCtx->getClient();
+ if (auto scope = request.validatedTenancyScope; scope && scope->hasAuthenticatedUser()) {
+ uassert(ErrorCodes::Unauthorized,
+ str::stream() << "Command " << command->getName()
+ << " is not supported in multitenancy mode",
+ command->allowedWithSecurityToken());
+ _tokenAuthorizationSessionGuard.emplace(opCtx, request.validatedTenancyScope.get());
+ }
+
if (isHello()) {
// Preload generic ClientMetadata ahead of our first hello request. After the first
// request, metaElement should always be empty.
@@ -1429,13 +1437,6 @@ void ExecCommandDatabase::_initiateCommand() {
}
});
- rpc::readRequestMetadata(opCtx, request, command->requiresAuth());
- uassert(ErrorCodes::Unauthorized,
- str::stream() << "Command " << command->getName()
- << " is not supported in multitenancy mode",
- command->allowedWithSecurityToken() || auth::getSecurityToken(opCtx) == boost::none);
- _tokenAuthorizationSessionGuard.emplace(opCtx);
-
rpc::TrackingMetadata::get(opCtx).initWithOperName(command->getName());
auto const replCoord = repl::ReplicationCoordinator::get(opCtx);
@@ -1449,7 +1450,6 @@ void ExecCommandDatabase::_initiateCommand() {
// Start authz contract tracking before we evaluate failpoints
auto authzSession = AuthorizationSession::get(client);
-
authzSession->startContractTracking();
CommandHelpers::evaluateFailCommandFailPoint(opCtx, _invocation.get());
@@ -1683,7 +1683,7 @@ void ExecCommandDatabase::_initiateCommand() {
boost::optional<ChunkVersion> shardVersion;
if (auto shardVersionElem = request.body[ChunkVersion::kShardVersionField]) {
- shardVersion = ChunkVersion::fromBSONPositionalOrNewerFormat(shardVersionElem);
+ shardVersion = ChunkVersion::parse(shardVersionElem);
}
boost::optional<DatabaseVersion> databaseVersion;
@@ -1950,10 +1950,11 @@ void curOpCommandSetup(OperationContext* opCtx, const OpMsgRequest& request) {
Future<void> parseCommand(std::shared_ptr<HandleRequest::ExecutionContext> execContext) try {
const auto& msg = execContext->getMessage();
- auto opMsgReq = rpc::opMsgRequestFromAnyProtocol(msg);
+ auto client = execContext->getOpCtx()->getClient();
+ auto opMsgReq = rpc::opMsgRequestFromAnyProtocol(msg, client);
+
if (msg.operation() == dbQuery) {
- checkAllowedOpQueryCommand(*(execContext->getOpCtx()->getClient()),
- opMsgReq.getCommandName());
+ checkAllowedOpQueryCommand(*client, opMsgReq.getCommandName());
}
execContext->setRequest(opMsgReq);
return Status::OK();
diff --git a/src/mongo/db/session_catalog_mongod.cpp b/src/mongo/db/session_catalog_mongod.cpp
index fe77c218e91..ba5d503ebd8 100644
--- a/src/mongo/db/session_catalog_mongod.cpp
+++ b/src/mongo/db/session_catalog_mongod.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/create_indexes_gen.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/index_builds_coordinator.h"
+#include "mongo/db/internal_transactions_feature_flag_gen.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/ops/write_ops.h"
@@ -374,25 +375,59 @@ int removeExpiredTransactionSessionsFromDisk(
}
void createTransactionTable(OperationContext* opCtx) {
- auto serviceCtx = opCtx->getServiceContext();
CollectionOptions options;
- auto createCollectionStatus =
- repl::StorageInterface::get(serviceCtx)
- ->createCollection(opCtx, NamespaceString::kSessionTransactionsTableNamespace, options);
+ auto storageInterface = repl::StorageInterface::get(opCtx);
+ auto createCollectionStatus = storageInterface->createCollection(
+ opCtx, NamespaceString::kSessionTransactionsTableNamespace, options);
+
if (createCollectionStatus == ErrorCodes::NamespaceExists) {
- return;
- }
+ bool collectionIsEmpty = false;
+ {
+ AutoGetCollection autoColl(
+ opCtx, NamespaceString::kSessionTransactionsTableNamespace, LockMode::MODE_IS);
+ invariant(autoColl);
+
+ if (autoColl->getIndexCatalog()->findIndexByName(
+ opCtx, MongoDSessionCatalog::kConfigTxnsPartialIndexName)) {
+ // Index already exists, so there's nothing to do.
+ return;
+ }
- uassertStatusOKWithContext(
- createCollectionStatus,
- str::stream() << "Failed to create the "
- << NamespaceString::kSessionTransactionsTableNamespace.ns() << " collection");
+ collectionIsEmpty = autoColl->isEmpty(opCtx);
+ }
+
+ if (!collectionIsEmpty) {
+ // Unless explicitly enabled, don't create the index to avoid delaying step up.
+ if (feature_flags::gFeatureFlagAlwaysCreateConfigTransactionsPartialIndexOnStepUp
+ .isEnabledAndIgnoreFCV()) {
+ AutoGetCollection autoColl(
+ opCtx, NamespaceString::kSessionTransactionsTableNamespace, LockMode::MODE_X);
+ IndexBuildsCoordinator::get(opCtx)->createIndex(
+ opCtx,
+ autoColl->uuid(),
+ MongoDSessionCatalog::getConfigTxnPartialIndexSpec(),
+ IndexBuildsManager::IndexConstraints::kEnforce,
+ false /* fromMigration */);
+ }
+
+ return;
+ }
+
+ // The index does not exist and the collection is empty, so fall through to create it on the
+ // empty collection. This can happen after a failover because the collection and index
+ // creation are recorded as separate oplog entries.
+ } else {
+ uassertStatusOKWithContext(createCollectionStatus,
+ str::stream()
+ << "Failed to create the "
+ << NamespaceString::kSessionTransactionsTableNamespace.ns()
+ << " collection");
+ }
auto indexSpec = MongoDSessionCatalog::getConfigTxnPartialIndexSpec();
- const auto createIndexStatus =
- repl::StorageInterface::get(opCtx)->createIndexesOnEmptyCollection(
- opCtx, NamespaceString::kSessionTransactionsTableNamespace, {indexSpec});
+ const auto createIndexStatus = storageInterface->createIndexesOnEmptyCollection(
+ opCtx, NamespaceString::kSessionTransactionsTableNamespace, {indexSpec});
uassertStatusOKWithContext(
createIndexStatus,
str::stream() << "Failed to create partial index for the "
diff --git a/src/mongo/db/sessions_collection.cpp b/src/mongo/db/sessions_collection.cpp
index 45c59c3631d..b72c85cbadc 100644
--- a/src/mongo/db/sessions_collection.cpp
+++ b/src/mongo/db/sessions_collection.cpp
@@ -236,7 +236,7 @@ LogicalSessionIdSet SessionsCollection::_doFindRemoved(
auto wrappedSend = [&](BSONObj batch) {
BSONObjBuilder batchWithReadConcernLocal(batch);
batchWithReadConcernLocal.append(repl::ReadConcernArgs::kReadConcernFieldName,
- repl::ReadConcernArgs::kImplicitDefault);
+ repl::ReadConcernArgs::kLocal);
auto swBatchResult = send(batchWithReadConcernLocal.obj());
auto result =
diff --git a/src/mongo/db/sessions_collection_rs.cpp b/src/mongo/db/sessions_collection_rs.cpp
index 8d53352c455..1bce83b547f 100644
--- a/src/mongo/db/sessions_collection_rs.cpp
+++ b/src/mongo/db/sessions_collection_rs.cpp
@@ -38,7 +38,6 @@
#include "mongo/bson/bsonobj.h"
#include "mongo/client/authenticate.h"
#include "mongo/client/connection_string.h"
-#include "mongo/client/query.h"
#include "mongo/client/read_preference.h"
#include "mongo/client/remote_command_targeter_factory_impl.h"
#include "mongo/db/concurrency/d_concurrency.h"
diff --git a/src/mongo/db/sessions_collection_standalone.cpp b/src/mongo/db/sessions_collection_standalone.cpp
index 477f20b28a9..93e7aab9821 100644
--- a/src/mongo/db/sessions_collection_standalone.cpp
+++ b/src/mongo/db/sessions_collection_standalone.cpp
@@ -31,7 +31,6 @@
#include "mongo/db/sessions_collection_standalone.h"
-#include "mongo/client/query.h"
#include "mongo/db/dbdirectclient.h"
#include "mongo/db/operation_context.h"
#include "mongo/rpc/get_status_from_command_result.h"
diff --git a/src/mongo/db/stats/counters.h b/src/mongo/db/stats/counters.h
index 3d054806f46..cbf429127f6 100644
--- a/src/mongo/db/stats/counters.h
+++ b/src/mongo/db/stats/counters.h
@@ -113,7 +113,7 @@ public:
_checkWrap(&OpCounters::_acceptableErrorInCommand, 1);
}
- // thse are used by snmp, and other things, do not remove
+ // thse are used by metrics things, do not remove
const AtomicWord<long long>* getInsert() const {
return &*_insert;
}
diff --git a/src/mongo/db/storage/kv/durable_catalog_test.cpp b/src/mongo/db/storage/kv/durable_catalog_test.cpp
index ed3162f03fd..9df5105231f 100644
--- a/src/mongo/db/storage/kv/durable_catalog_test.cpp
+++ b/src/mongo/db/storage/kv/durable_catalog_test.cpp
@@ -62,6 +62,8 @@ static const long kExpectedVersion = 1;
class DurableCatalogTest : public CatalogTestFixture {
public:
+ explicit DurableCatalogTest(Options options = {}) : CatalogTestFixture(std::move(options)) {}
+
void setUp() override {
CatalogTestFixture::setUp();
@@ -143,13 +145,17 @@ public:
WriteUnitOfWork wuow(operationContext());
const bool isSecondaryBackgroundIndexBuild = false;
boost::optional<UUID> buildUUID(twoPhase, UUID::gen());
- ASSERT_OK(collWriter.getWritableCollection()->prepareForIndexBuild(
- operationContext(), desc.get(), buildUUID, isSecondaryBackgroundIndexBuild));
- entry = collWriter.getWritableCollection()->getIndexCatalog()->createIndexEntry(
- operationContext(),
- collWriter.getWritableCollection(),
- std::move(desc),
- CreateIndexEntryFlags::kNone);
+ ASSERT_OK(collWriter.getWritableCollection(operationContext())
+ ->prepareForIndexBuild(operationContext(),
+ desc.get(),
+ buildUUID,
+ isSecondaryBackgroundIndexBuild));
+ entry = collWriter.getWritableCollection(operationContext())
+ ->getIndexCatalog()
+ ->createIndexEntry(operationContext(),
+ collWriter.getWritableCollection(operationContext()),
+ std::move(desc),
+ CreateIndexEntryFlags::kNone);
wuow.commit();
}
@@ -191,6 +197,9 @@ private:
};
class ImportCollectionTest : public DurableCatalogTest {
+public:
+ explicit ImportCollectionTest() : DurableCatalogTest(Options{}.ephemeral(false)) {}
+
protected:
void setUp() override {
DurableCatalogTest::setUp();
@@ -541,8 +550,9 @@ TEST_F(DurableCatalogTest, SinglePhaseIndexBuild) {
Lock::CollectionLock collLk(operationContext(), collection->ns(), MODE_X);
WriteUnitOfWork wuow(operationContext());
- getCollectionWriter().getWritableCollection()->indexBuildSuccess(operationContext(),
- indexEntry);
+ getCollectionWriter()
+ .getWritableCollection(operationContext())
+ ->indexBuildSuccess(operationContext(), indexEntry);
wuow.commit();
}
@@ -564,8 +574,9 @@ TEST_F(DurableCatalogTest, TwoPhaseIndexBuild) {
Lock::CollectionLock collLk(operationContext(), collection->ns(), MODE_X);
WriteUnitOfWork wuow(operationContext());
- getCollectionWriter().getWritableCollection()->indexBuildSuccess(operationContext(),
- indexEntry);
+ getCollectionWriter()
+ .getWritableCollection(operationContext())
+ ->indexBuildSuccess(operationContext(), indexEntry);
wuow.commit();
}
diff --git a/src/mongo/db/storage/kv/storage_engine_test.cpp b/src/mongo/db/storage/kv/storage_engine_test.cpp
index 2b600a7479d..ae0569a10ec 100644
--- a/src/mongo/db/storage/kv/storage_engine_test.cpp
+++ b/src/mongo/db/storage/kv/storage_engine_test.cpp
@@ -685,7 +685,7 @@ TEST_F(TimestampKVEngineTest, TimestampAdvancesOnNotification) {
_storageEngine->getTimestampMonitor()->clearListeners();
}
-TEST_F(StorageEngineTest, UseAlternateStorageLocation) {
+TEST_F(StorageEngineTestNotEphemeral, UseAlternateStorageLocation) {
auto opCtx = cc().makeOperationContext();
const NamespaceString coll1Ns("db.coll1");
diff --git a/src/mongo/db/storage/record_store_test_oplog.cpp b/src/mongo/db/storage/record_store_test_oplog.cpp
index 6c61f93ee76..cc014de1681 100644
--- a/src/mongo/db/storage/record_store_test_oplog.cpp
+++ b/src/mongo/db/storage/record_store_test_oplog.cpp
@@ -530,7 +530,7 @@ TEST(RecordStoreTestHarness, OplogVisibilityStandalone) {
rs->insertRecord(opCtx.get(), obj.objdata(), obj.objsize(), Timestamp());
ASSERT_OK(res.getStatus());
id1 = res.getValue();
- StatusWith<RecordId> expectedId = record_id_helpers::keyForOptime(ts);
+ StatusWith<RecordId> expectedId = record_id_helpers::keyForOptime(ts, KeyFormat::Long);
ASSERT_OK(expectedId.getStatus());
// RecordId should be extracted from 'ts' field when inserting into oplog namespace
ASSERT(expectedId.getValue().compare(id1) == 0);
diff --git a/src/mongo/db/storage/storage_engine_test_fixture.h b/src/mongo/db/storage/storage_engine_test_fixture.h
index eaedd287615..4e8d0e27f54 100644
--- a/src/mongo/db/storage/storage_engine_test_fixture.h
+++ b/src/mongo/db/storage/storage_engine_test_fixture.h
@@ -202,7 +202,8 @@ public:
class StorageEngineRepairTest : public StorageEngineTest {
public:
- StorageEngineRepairTest() : StorageEngineTest(Options{}.repair(RepairAction::kRepair)) {}
+ StorageEngineRepairTest()
+ : StorageEngineTest(Options{}.repair(RepairAction::kRepair).ephemeral(false)) {}
void tearDown() {
auto repairObserver = StorageRepairObserver::get(getGlobalServiceContext());
@@ -220,6 +221,11 @@ public:
}
};
+class StorageEngineTestNotEphemeral : public StorageEngineTest {
+public:
+ StorageEngineTestNotEphemeral() : StorageEngineTest(Options{}.ephemeral(false)){};
+};
+
} // namespace mongo
#undef MONGO_LOGV2_DEFAULT_COMPONENT
diff --git a/src/mongo/db/storage/storage_options.h b/src/mongo/db/storage/storage_options.h
index 0455f09d245..108ae66629c 100644
--- a/src/mongo/db/storage/storage_options.h
+++ b/src/mongo/db/storage/storage_options.h
@@ -82,6 +82,9 @@ struct StorageGlobalParams {
bool dur; // --dur durability (now --journal)
+ // Whether the Storage Engine selected should be ephemeral in nature or not.
+ bool ephemeral = false;
+
// --journalCommitInterval
static constexpr int kMaxJournalCommitIntervalMs = 500;
AtomicWord<int> journalCommitIntervalMs;
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
index e5ceaea5c82..6277a27f62e 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_init.cpp
@@ -118,7 +118,6 @@ public:
"RAM. See http://dochub.mongodb.org/core/faq-memory-diagnostics-wt");
}
}
- const bool ephemeral = false;
auto kv =
std::make_unique<WiredTigerKVEngine>(getCanonicalName().toString(),
params.dbpath,
@@ -127,7 +126,7 @@ public:
cacheMB,
wiredTigerGlobalOptions.getMaxHistoryFileSizeMB(),
params.dur,
- ephemeral,
+ params.ephemeral,
params.repair);
kv->setRecordStoreExtraOptions(wiredTigerGlobalOptions.collectionConfig);
kv->setSortedDataInterfaceExtraOptions(wiredTigerGlobalOptions.indexConfig);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
index 6fa19a03260..7cfbe29f304 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_kv_engine.cpp
@@ -468,6 +468,12 @@ WiredTigerKVEngine::WiredTigerKVEngine(const std::string& canonicalName,
ss << WiredTigerUtil::generateRestoreConfig() << ",";
}
+ // If we've requested an ephemeral instance we store everything into memory instead of backing
+ // it onto disk. Logging is not supported in this instance, thus we also have to disable it.
+ if (_ephemeral) {
+ ss << "in_memory=true,log=(enabled=false),";
+ }
+
string config = ss.str();
LOGV2(22315, "Opening WiredTiger", "config"_attr = config);
auto startTime = Date_t::now();
@@ -1994,6 +2000,11 @@ bool WiredTigerKVEngine::supportsDirectoryPerDB() const {
}
void WiredTigerKVEngine::_checkpoint(WT_SESSION* session) {
+ // Ephemeral WiredTiger instances cannot do a checkpoint to disk as there is no disk backing
+ // the data.
+ if (_ephemeral) {
+ return;
+ }
// TODO: SERVER-64507: Investigate whether we can smartly rely on one checkpointer if two or
// more threads checkpoint at the same time.
stdx::lock_guard lk(_checkpointMutex);
diff --git a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
index fdf49f19ba2..a84b6ca6061 100644
--- a/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
+++ b/src/mongo/db/storage/wiredtiger/wiredtiger_oplog_manager.cpp
@@ -239,8 +239,6 @@ void WiredTigerOplogManager::_updateOplogVisibilityLoop(WiredTigerSessionCache*
invariant(_triggerOplogVisibilityUpdate);
_triggerOplogVisibilityUpdate = false;
- lk.unlock();
-
// Fetch the all_durable timestamp from the storage engine, which is guaranteed not to have
// any holes behind it in-memory.
const uint64_t newTimestamp = sessionCache->getKVEngine()->getAllDurableTimestamp().asULL();
@@ -256,7 +254,6 @@ void WiredTigerOplogManager::_updateOplogVisibilityLoop(WiredTigerSessionCache*
continue;
}
- lk.lock();
// Publish the new timestamp value. Avoid going backward.
auto currentVisibleTimestamp = getOplogReadTimestamp();
if (newTimestamp > currentVisibleTimestamp) {
diff --git a/src/mongo/db/tenant_id.h b/src/mongo/db/tenant_id.h
index 7af08a45d95..dc9b46705c3 100644
--- a/src/mongo/db/tenant_id.h
+++ b/src/mongo/db/tenant_id.h
@@ -55,12 +55,12 @@ public:
*/
static const TenantId kSystemTenantId;
- explicit TenantId(const OID& oid) : _oid(oid), _idStr(oid.toString()) {}
+ explicit TenantId(const OID& oid) : _oid(oid) {}
TenantId() = delete;
- const std::string& toString() const {
- return _idStr;
+ std::string toString() const {
+ return _oid.toString();
}
/**
@@ -105,7 +105,6 @@ public:
private:
OID _oid;
- std::string _idStr;
};
inline bool operator==(const TenantId& lhs, const TenantId& rhs) {
diff --git a/src/mongo/db/timeseries/SConscript b/src/mongo/db/timeseries/SConscript
index 0be6ef8c7a3..42b4803dc5e 100644
--- a/src/mongo/db/timeseries/SConscript
+++ b/src/mongo/db/timeseries/SConscript
@@ -27,12 +27,15 @@ env.Library(
'flat_bson.cpp',
],
LIBDEPS_PRIVATE=[
+ '$BUILD_DIR/mongo/bson/util/bson_column',
'$BUILD_DIR/mongo/db/catalog/database_holder',
'$BUILD_DIR/mongo/db/commands/server_status',
'$BUILD_DIR/mongo/db/namespace_string',
'$BUILD_DIR/mongo/db/server_options_core',
+ '$BUILD_DIR/mongo/db/storage/storage_options',
'$BUILD_DIR/mongo/db/views/views',
'$BUILD_DIR/mongo/util/fail_point',
+ 'bucket_compression',
'timeseries_options',
],
)
diff --git a/src/mongo/db/timeseries/bucket_catalog.cpp b/src/mongo/db/timeseries/bucket_catalog.cpp
index 71ffb71dbec..41148c11dd0 100644
--- a/src/mongo/db/timeseries/bucket_catalog.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog.cpp
@@ -34,11 +34,17 @@
#include <algorithm>
#include <boost/iterator/transform_iterator.hpp>
+#include "mongo/bson/util/bsoncolumn.h"
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/commands/server_status.h"
#include "mongo/db/concurrency/exception_util.h"
#include "mongo/db/operation_context.h"
+#include "mongo/db/storage/storage_parameters_gen.h"
+#include "mongo/db/timeseries/bucket_catalog_helpers.h"
+#include "mongo/db/timeseries/bucket_compression.h"
+#include "mongo/db/timeseries/timeseries_constants.h"
#include "mongo/db/timeseries/timeseries_options.h"
+#include "mongo/logv2/redaction.h"
#include "mongo/platform/compiler.h"
#include "mongo/stdx/thread.h"
#include "mongo/util/fail_point.h"
@@ -255,6 +261,24 @@ void BucketCatalog::ExecutionStatsController::incNumBucketsClosedDueToMemoryThre
_globalStats->numBucketsClosedDueToMemoryThreshold.fetchAndAddRelaxed(increment);
}
+void BucketCatalog::ExecutionStatsController::incNumBucketsArchivedDueToTimeForward(
+ long long increment) {
+ _collectionStats->numBucketsArchivedDueToTimeForward.fetchAndAddRelaxed(increment);
+ _globalStats->numBucketsArchivedDueToTimeForward.fetchAndAddRelaxed(increment);
+}
+
+void BucketCatalog::ExecutionStatsController::incNumBucketsArchivedDueToTimeBackward(
+ long long increment) {
+ _collectionStats->numBucketsArchivedDueToTimeBackward.fetchAndAddRelaxed(increment);
+ _globalStats->numBucketsArchivedDueToTimeBackward.fetchAndAddRelaxed(increment);
+}
+
+void BucketCatalog::ExecutionStatsController::incNumBucketsArchivedDueToMemoryThreshold(
+ long long increment) {
+ _collectionStats->numBucketsArchivedDueToMemoryThreshold.fetchAndAddRelaxed(increment);
+ _globalStats->numBucketsArchivedDueToMemoryThreshold.fetchAndAddRelaxed(increment);
+}
+
void BucketCatalog::ExecutionStatsController::incNumCommits(long long increment) {
_collectionStats->numCommits.fetchAndAddRelaxed(increment);
_globalStats->numCommits.fetchAndAddRelaxed(increment);
@@ -270,11 +294,23 @@ void BucketCatalog::ExecutionStatsController::incNumMeasurementsCommitted(long l
_globalStats->numMeasurementsCommitted.fetchAndAddRelaxed(increment);
}
+void BucketCatalog::ExecutionStatsController::incNumBucketsReopened(long long increment) {
+ _collectionStats->numBucketsReopened.fetchAndAddRelaxed(increment);
+ _globalStats->numBucketsReopened.fetchAndAddRelaxed(increment);
+}
+
+void BucketCatalog::ExecutionStatsController::incNumBucketsKeptOpenDueToLargeMeasurements(
+ long long increment) {
+ _collectionStats->numBucketsKeptOpenDueToLargeMeasurements.fetchAndAddRelaxed(increment);
+ _globalStats->numBucketsKeptOpenDueToLargeMeasurements.fetchAndAddRelaxed(increment);
+}
+
class BucketCatalog::Bucket {
public:
friend class BucketCatalog;
- Bucket(const OID& id, StripeNumber stripe) : _id(id), _stripe(stripe) {}
+ Bucket(const OID& id, StripeNumber stripe, BucketKey::Hash hash)
+ : _id(id), _stripe(stripe), _keyHash(hash) {}
/**
* Returns the ID for the underlying bucket.
@@ -290,6 +326,13 @@ public:
return _stripe;
}
+ /**
+ * Returns the pre-computed hash of the corresponding BucketKey
+ */
+ BucketKey::Hash keyHash() const {
+ return _keyHash;
+ }
+
// Returns the time associated with the bucket (id)
Date_t getTime() const {
return _minTime;
@@ -338,7 +381,6 @@ private:
void _calculateBucketFieldsAndSizeChange(const BSONObj& doc,
boost::optional<StringData> metaField,
NewFieldNames* newFieldNamesToBeInserted,
- uint32_t* newFieldNamesSize,
uint32_t* sizeToBeAdded) const {
// BSON size for an object with an empty object field where field name is empty string.
// We can use this as an offset to know the size when we have real field names.
@@ -347,7 +389,6 @@ private:
dassert(emptyObjSize == BSON("" << BSONObj()).objsize());
newFieldNamesToBeInserted->clear();
- *newFieldNamesSize = 0;
*sizeToBeAdded = 0;
auto numMeasurementsFieldLength = numDigits(_numMeasurements);
for (const auto& elem : doc) {
@@ -357,12 +398,24 @@ private:
continue;
}
- // If the field name is new, add the size of an empty object with that field name.
auto hashedKey = StringSet::hasher().hashed_key(fieldName);
if (!_fieldNames.contains(hashedKey)) {
+ // Record the new field name only if it hasn't been committed yet. There could be
+ // concurrent batches writing to this bucket with the same new field name, but
+ // they're not guaranteed to commit successfully.
newFieldNamesToBeInserted->push_back(hashedKey);
- *newFieldNamesSize += elem.fieldNameSize();
- *sizeToBeAdded += emptyObjSize + fieldName.size();
+
+ // Only update the bucket size once to account for the new field name if it isn't
+ // already pending a commit from another batch.
+ if (!_uncommittedFieldNames.contains(hashedKey)) {
+ // Add the size of an empty object with that field name.
+ *sizeToBeAdded += emptyObjSize + fieldName.size();
+
+ // The control.min and control.max summaries don't have any information for this
+ // new field name yet. Add two measurements worth of data to account for this.
+ // As this is the first measurement for this field, min == max.
+ *sizeToBeAdded += elem.size() * 2;
+ }
}
// Add the element size, taking into account that the name will be changed to its
@@ -400,20 +453,21 @@ private:
// The stripe which owns this bucket.
const StripeNumber _stripe;
+ // The pre-computed hash of the associated BucketKey
+ const BucketKey::Hash _keyHash;
+
// The namespace that this bucket is used for.
NamespaceString _ns;
// The metadata of the data that this bucket contains.
BucketMetadata _metadata;
- // Extra metadata combinations that are supported without normalizing the metadata object.
- static constexpr std::size_t kNumFieldOrderCombinationsWithoutNormalizing = 1;
- boost::container::static_vector<BSONObj, kNumFieldOrderCombinationsWithoutNormalizing>
- _nonNormalizedKeyMetadatas;
-
- // Top-level field names of the measurements that have been inserted into the bucket.
+ // Top-level hashed field names of the measurements that have been inserted into the bucket.
StringSet _fieldNames;
+ // Top-level hashed new field names that have not yet been committed into the bucket.
+ StringSet _uncommittedFieldNames;
+
// Time field for the measurements that have been inserted into the bucket.
std::string _timeField;
@@ -427,9 +481,6 @@ private:
// measurements.
timeseries::Schema _schema;
- // The latest time that has been inserted into the bucket.
- Date_t _latestTime;
-
// The total size in bytes of the bucket's BSON serialization, including measurements to be
// inserted.
uint64_t _size = 0;
@@ -441,9 +492,14 @@ private:
// The number of committed measurements in the bucket.
uint32_t _numCommittedMeasurements = 0;
- // Whether the bucket is full. This can be due to number of measurements, size, or time
+ // Whether the bucket has been marked for a rollover action. It can be marked for closure due to
+ // number of measurements, size, or schema changes, or it can be marked for archival due to time
// range.
- bool _full = false;
+ RolloverAction _rolloverAction = RolloverAction::kNone;
+
+ // Whether this bucket was kept open after exceeding the bucket max size to improve bucketing
+ // performance for large measurements.
+ bool _keptOpenDueToLargeMeasurements = false;
// The batch that has been prepared and is currently in the process of being committed, if
// any.
@@ -533,9 +589,10 @@ void BucketCatalog::WriteBatch::_addMeasurement(const BSONObj& doc) {
_measurements.push_back(doc);
}
-void BucketCatalog::WriteBatch::_recordNewFields(NewFieldNames&& fields) {
+void BucketCatalog::WriteBatch::_recordNewFields(Bucket* bucket, NewFieldNames&& fields) {
for (auto&& field : fields) {
_newFieldNamesToBeInserted[field] = field.hash();
+ bucket->_uncommittedFieldNames.emplace(field);
}
}
@@ -547,6 +604,7 @@ void BucketCatalog::WriteBatch::_prepareCommit(Bucket* bucket) {
// by someone else.
for (auto it = _newFieldNamesToBeInserted.begin(); it != _newFieldNamesToBeInserted.end();) {
StringMapHashedKey fieldName(it->first, it->second);
+ bucket->_uncommittedFieldNames.erase(fieldName);
if (bucket->_fieldNames.contains(fieldName)) {
_newFieldNamesToBeInserted.erase(it++);
continue;
@@ -597,6 +655,104 @@ BucketCatalog& BucketCatalog::get(OperationContext* opCtx) {
return get(opCtx->getServiceContext());
}
+Status BucketCatalog::reopenBucket(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const BSONObj& bucketDoc) {
+ const NamespaceString ns = coll->ns().getTimeseriesViewNamespace();
+ const boost::optional<TimeseriesOptions> options = coll->getTimeseriesOptions();
+ invariant(options,
+ str::stream() << "Attempting to reopen a bucket for a non-timeseries collection: "
+ << ns);
+
+ BSONElement bucketIdElem = bucketDoc.getField(timeseries::kBucketIdFieldName);
+ if (bucketIdElem.eoo() || bucketIdElem.type() != BSONType::jstOID) {
+ return {ErrorCodes::BadValue,
+ str::stream() << timeseries::kBucketIdFieldName
+ << " is missing or not an ObjectId"};
+ }
+
+ // Validate the bucket document against the schema.
+ auto result = coll->checkValidation(opCtx, bucketDoc);
+ if (result.first != Collection::SchemaValidationResult::kPass) {
+ return result.second;
+ }
+
+ BSONElement metadata;
+ auto metaFieldName = options->getMetaField();
+ if (metaFieldName) {
+ metadata = bucketDoc.getField(*metaFieldName);
+ }
+
+ // Buckets are spread across independently-lockable stripes to improve parallelism. We map a
+ // bucket to a stripe by hashing the BucketKey.
+ auto key = BucketKey{ns, BucketMetadata{metadata, coll->getDefaultCollator()}};
+ auto stripeNumber = _getStripeNumber(key);
+
+ auto bucketId = bucketIdElem.OID();
+ std::unique_ptr<Bucket> bucket = std::make_unique<Bucket>(bucketId, stripeNumber, key.hash);
+
+ // Initialize the remaining member variables from the bucket document.
+ bucket->_ns = ns;
+ bucket->_metadata = key.metadata;
+ bucket->_timeField = options->getTimeField().toString();
+ bucket->_size = bucketDoc.objsize();
+ bucket->_minTime = bucketDoc.getObjectField(timeseries::kBucketControlFieldName)
+ .getObjectField(timeseries::kBucketControlMinFieldName)
+ .getField(options->getTimeField())
+ .Date();
+
+ // Populate the top-level data field names.
+ const BSONObj& dataObj = bucketDoc.getObjectField(timeseries::kBucketDataFieldName);
+ for (const BSONElement& dataElem : dataObj) {
+ auto hashedKey = StringSet::hasher().hashed_key(dataElem.fieldName());
+ bucket->_fieldNames.emplace(hashedKey);
+ }
+
+ auto swMinMax = timeseries::generateMinMaxFromBucketDoc(bucketDoc, coll->getDefaultCollator());
+ if (!swMinMax.isOK()) {
+ return swMinMax.getStatus();
+ }
+ bucket->_minmax = std::move(swMinMax.getValue());
+
+ auto swSchema = timeseries::generateSchemaFromBucketDoc(bucketDoc, coll->getDefaultCollator());
+ if (!swSchema.isOK()) {
+ return swSchema.getStatus();
+ }
+ bucket->_schema = std::move(swSchema.getValue());
+
+ uint32_t numMeasurements = 0;
+ const bool isCompressed = timeseries::isCompressedBucket(bucketDoc);
+ const BSONElement timeColumnElem = dataObj.getField(options->getTimeField());
+
+ if (isCompressed && timeColumnElem.type() == BSONType::BinData) {
+ BSONColumn storage{timeColumnElem};
+ numMeasurements = storage.size();
+ } else {
+ numMeasurements = timeColumnElem.Obj().nFields();
+ }
+
+ bucket->_numMeasurements = numMeasurements;
+ bucket->_numCommittedMeasurements = numMeasurements;
+
+ ExecutionStatsController stats = _getExecutionStats(ns);
+ stats.incNumBucketsReopened();
+
+ // Register the reopened bucket with the catalog.
+ auto& stripe = _stripes[stripeNumber];
+ stdx::lock_guard stripeLock{stripe.mutex};
+
+ ClosedBuckets closedBuckets;
+ _expireIdleBuckets(&stripe, stripeLock, stats, &closedBuckets);
+
+ auto [it, inserted] = stripe.allBuckets.try_emplace(bucketId, std::move(bucket));
+ tassert(6668200, "Expected bucket to be inserted", inserted);
+ Bucket* unownedBucket = it->second.get();
+ stripe.openBuckets[key] = unownedBucket;
+ _initializeBucketState(bucketId);
+
+ return Status::OK();
+}
+
BSONObj BucketCatalog::getMetadata(const BucketHandle& handle) const {
auto const& stripe = _stripes[handle.stripe];
stdx::lock_guard stripeLock{stripe.mutex};
@@ -648,59 +804,91 @@ StatusWith<BucketCatalog::InsertResult> BucketCatalog::insert(
invariant(bucket);
NewFieldNames newFieldNamesToBeInserted;
- uint32_t newFieldNamesSize = 0;
uint32_t sizeToBeAdded = 0;
- bucket->_calculateBucketFieldsAndSizeChange(doc,
- options.getMetaField(),
- &newFieldNamesToBeInserted,
- &newFieldNamesSize,
- &sizeToBeAdded);
+ bucket->_calculateBucketFieldsAndSizeChange(
+ doc, options.getMetaField(), &newFieldNamesToBeInserted, &sizeToBeAdded);
+
+ auto determineRolloverAction = [&](Bucket* bucket) -> RolloverAction {
+ const bool canArchive = feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility);
- auto shouldCloseBucket = [&](Bucket* bucket) -> bool {
if (bucket->schemaIncompatible(doc, metaFieldName, comparator)) {
stats.incNumBucketsClosedDueToSchemaChange();
- return true;
+ return RolloverAction::kClose;
}
if (bucket->_numMeasurements == static_cast<std::uint64_t>(gTimeseriesBucketMaxCount)) {
stats.incNumBucketsClosedDueToCount();
- return true;
- }
- if (bucket->_size + sizeToBeAdded > static_cast<std::uint64_t>(gTimeseriesBucketMaxSize)) {
- stats.incNumBucketsClosedDueToSize();
- return true;
+ return RolloverAction::kClose;
}
auto bucketTime = bucket->getTime();
if (time - bucketTime >= Seconds(*options.getBucketMaxSpanSeconds())) {
- stats.incNumBucketsClosedDueToTimeForward();
- return true;
+ if (canArchive) {
+ stats.incNumBucketsArchivedDueToTimeForward();
+ return RolloverAction::kArchive;
+ } else {
+ stats.incNumBucketsClosedDueToTimeForward();
+ return RolloverAction::kClose;
+ }
}
if (time < bucketTime) {
- stats.incNumBucketsClosedDueToTimeBackward();
- return true;
+ if (canArchive) {
+ stats.incNumBucketsArchivedDueToTimeBackward();
+ return RolloverAction::kArchive;
+ } else {
+ stats.incNumBucketsClosedDueToTimeBackward();
+ return RolloverAction::kClose;
+ }
}
- return false;
+ if (bucket->_size + sizeToBeAdded > static_cast<std::uint64_t>(gTimeseriesBucketMaxSize)) {
+ bool keepBucketOpenForLargeMeasurements =
+ bucket->_numMeasurements < static_cast<std::uint64_t>(gTimeseriesBucketMinCount) &&
+ feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility);
+ if (keepBucketOpenForLargeMeasurements) {
+ // Instead of packing the bucket to the BSON size limit, 16MB, we'll limit the max
+ // bucket size to 12MB. This is to leave some space in the bucket if we need to add
+ // new internal fields to existing, full buckets.
+ static constexpr size_t largeMeasurementsMaxBucketSize =
+ BSONObjMaxUserSize - (4 * 1024 * 1024);
+
+ if (bucket->_size + sizeToBeAdded > largeMeasurementsMaxBucketSize) {
+ stats.incNumBucketsClosedDueToSize();
+ return RolloverAction::kClose;
+ }
+
+ // There's enough space to add this measurement and we're still below the large
+ // measurement threshold.
+ if (!bucket->_keptOpenDueToLargeMeasurements) {
+ // Only increment this metric once per bucket.
+ bucket->_keptOpenDueToLargeMeasurements = true;
+ stats.incNumBucketsKeptOpenDueToLargeMeasurements();
+ }
+ return RolloverAction::kNone;
+ } else {
+ stats.incNumBucketsClosedDueToSize();
+ return RolloverAction::kClose;
+ }
+ }
+ return RolloverAction::kNone;
};
- if (!bucket->_ns.isEmpty() && shouldCloseBucket(bucket)) {
- info.openedDuetoMetadata = false;
- bucket = _rollover(&stripe, stripeLock, bucket, info);
+ if (!bucket->_ns.isEmpty()) {
+ auto action = determineRolloverAction(bucket);
+ if (action != RolloverAction::kNone) {
+ info.openedDuetoMetadata = false;
+ bucket = _rollover(&stripe, stripeLock, bucket, info, action);
- bucket->_calculateBucketFieldsAndSizeChange(doc,
- options.getMetaField(),
- &newFieldNamesToBeInserted,
- &newFieldNamesSize,
- &sizeToBeAdded);
+ bucket->_calculateBucketFieldsAndSizeChange(
+ doc, options.getMetaField(), &newFieldNamesToBeInserted, &sizeToBeAdded);
+ }
}
auto batch = bucket->_activeBatch(getOpId(opCtx, combine), stats);
batch->_addMeasurement(doc);
- batch->_recordNewFields(std::move(newFieldNamesToBeInserted));
+ batch->_recordNewFields(bucket, std::move(newFieldNamesToBeInserted));
bucket->_numMeasurements++;
bucket->_size += sizeToBeAdded;
- if (time > bucket->_latestTime) {
- bucket->_latestTime = time;
- }
if (bucket->_ns.isEmpty()) {
// The namespace and metadata only need to be set if this bucket was newly created.
bucket->_ns = ns;
@@ -799,29 +987,21 @@ boost::optional<BucketCatalog::ClosedBucket> BucketCatalog::finish(
getTimeseriesBucketClearedError(bucket->id(), bucket->_ns));
}
} else if (bucket->allCommitted()) {
- if (bucket->_full) {
- // Everything in the bucket has been committed, and nothing more will be added since the
- // bucket is full. Thus, we can remove it.
- _memoryUsage.fetchAndSubtract(bucket->_memoryUsage);
-
- auto it = stripe.allBuckets.find(batch->bucket().id);
- if (it != stripe.allBuckets.end()) {
- bucket = it->second.get();
-
- closedBucket = ClosedBucket{batch->bucket().id,
- bucket->getTimeField().toString(),
- bucket->numMeasurements()};
-
- // Only remove from allBuckets and idleBuckets. If it was marked full, we know
- // that happened in Stripe::rollover, and that there is already a new open
- // bucket for this metadata.
- _markBucketNotIdle(&stripe, stripeLock, bucket);
- _eraseBucketState(batch->bucket().id);
-
- stripe.allBuckets.erase(batch->bucket().id);
+ switch (bucket->_rolloverAction) {
+ case RolloverAction::kClose: {
+ closedBucket = ClosedBucket{
+ bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()};
+ _removeBucket(&stripe, stripeLock, bucket, false);
+ break;
+ }
+ case RolloverAction::kArchive: {
+ _archiveBucket(&stripe, stripeLock, bucket);
+ break;
+ }
+ case RolloverAction::kNone: {
+ _markBucketIdle(&stripe, stripeLock, bucket);
+ break;
}
- } else {
- _markBucketIdle(&stripe, stripeLock, bucket);
}
}
return closedBucket;
@@ -897,6 +1077,7 @@ void BucketCatalog::_appendExecutionStatsToBuilder(const ExecutionStats* stats,
stats->numBucketsClosedDueToTimeBackward.load());
builder->appendNumber("numBucketsClosedDueToMemoryThreshold",
stats->numBucketsClosedDueToMemoryThreshold.load());
+
auto commits = stats->numCommits.load();
builder->appendNumber("numCommits", commits);
builder->appendNumber("numWaits", stats->numWaits.load());
@@ -905,8 +1086,20 @@ void BucketCatalog::_appendExecutionStatsToBuilder(const ExecutionStats* stats,
if (commits) {
builder->appendNumber("avgNumMeasurementsPerCommit", measurementsCommitted / commits);
}
-}
+ if (feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
+ builder->appendNumber("numBucketsArchivedDueToTimeForward",
+ stats->numBucketsArchivedDueToTimeForward.load());
+ builder->appendNumber("numBucketsArchivedDueToTimeBackward",
+ stats->numBucketsArchivedDueToTimeBackward.load());
+ builder->appendNumber("numBucketsArchivedDueToMemoryThreshold",
+ stats->numBucketsArchivedDueToMemoryThreshold.load());
+ builder->appendNumber("numBucketsReopened", stats->numBucketsReopened.load());
+ builder->appendNumber("numBucketsKeptOpenDueToLargeMeasurements",
+ stats->numBucketsKeptOpenDueToLargeMeasurements.load());
+ }
+}
void BucketCatalog::appendExecutionStats(const NamespaceString& ns, BSONObjBuilder* builder) const {
const std::shared_ptr<ExecutionStats> stats = _getExecutionStats(ns);
@@ -955,6 +1148,10 @@ std::size_t BucketCatalog::BucketHasher::operator()(const BucketKey& key) const
return key.hash;
}
+std::size_t BucketCatalog::PreHashed::operator()(const BucketKey::Hash& key) const {
+ return key;
+}
+
BucketCatalog::StripeNumber BucketCatalog::_getStripeNumber(const BucketKey& key) {
return key.hash % kNumberOfStripes;
}
@@ -1050,23 +1247,51 @@ void BucketCatalog::_waitToCommitBatch(Stripe* stripe, const std::shared_ptr<Wri
}
}
-bool BucketCatalog::_removeBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket) {
- auto it = stripe->allBuckets.find(bucket->id());
- if (it == stripe->allBuckets.end()) {
- return false;
- }
-
+void BucketCatalog::_removeBucket(Stripe* stripe,
+ WithLock stripeLock,
+ Bucket* bucket,
+ bool archiving) {
invariant(bucket->_batches.empty());
invariant(!bucket->_preparedBatch);
+ auto allIt = stripe->allBuckets.find(bucket->id());
+ invariant(allIt != stripe->allBuckets.end());
+
_memoryUsage.fetchAndSubtract(bucket->_memoryUsage);
_markBucketNotIdle(stripe, stripeLock, bucket);
- stripe->openBuckets.erase({bucket->_ns, bucket->_metadata});
- _eraseBucketState(bucket->id());
- stripe->allBuckets.erase(it);
+ // If the bucket was rolled over, then there may be a different open bucket for this metadata.
+ auto openIt = stripe->openBuckets.find({bucket->_ns, bucket->_metadata});
+ if (openIt != stripe->openBuckets.end() && openIt->second == bucket) {
+ stripe->openBuckets.erase(openIt);
+ }
+
+ // If we are cleaning up while archiving a bucket, then we want to preserve its state. Otherwise
+ // we can remove the state from the catalog altogether.
+ if (!archiving) {
+ _eraseBucketState(bucket->id());
+ }
+
+ stripe->allBuckets.erase(allIt);
+}
+
+void BucketCatalog::_archiveBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket) {
+ bool archived = false;
+ auto& archivedSet = stripe->archivedBuckets[bucket->keyHash()];
+ auto it = archivedSet.find(bucket->getTime());
+ if (it == archivedSet.end()) {
+ archivedSet.emplace(bucket->getTime(),
+ ArchivedBucket{bucket->id(),
+ bucket->getTimeField().toString(),
+ bucket->numMeasurements()});
+
+ long long memory = _marginalMemoryUsageForArchivedBucket(archivedSet[bucket->getTime()],
+ archivedSet.size() == 1);
+ _memoryUsage.fetchAndAdd(memory);
- return true;
+ archived = true;
+ }
+ _removeBucket(stripe, stripeLock, bucket, archived);
}
void BucketCatalog::_abort(Stripe* stripe,
@@ -1112,7 +1337,7 @@ void BucketCatalog::_abort(Stripe* stripe,
}
if (doRemove) {
- [[maybe_unused]] bool removed = _removeBucket(stripe, stripeLock, bucket);
+ _removeBucket(stripe, stripeLock, bucket, false);
}
}
@@ -1135,19 +1360,54 @@ void BucketCatalog::_expireIdleBuckets(Stripe* stripe,
ExecutionStatsController& stats,
std::vector<BucketCatalog::ClosedBucket>* closedBuckets) {
// As long as we still need space and have entries and remaining attempts, close idle buckets.
- int32_t numClosed = 0;
+ int32_t numExpired = 0;
+
+ const bool canArchive = feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility);
+
while (!stripe->idleBuckets.empty() &&
_memoryUsage.load() > getTimeseriesIdleBucketExpiryMemoryUsageThresholdBytes() &&
- numClosed <= gTimeseriesIdleBucketExpiryMaxCountPerAttempt) {
+ numExpired <= gTimeseriesIdleBucketExpiryMaxCountPerAttempt) {
Bucket* bucket = stripe->idleBuckets.back();
- ClosedBucket closed{
- bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()};
- if (_removeBucket(stripe, stripeLock, bucket)) {
+ if (canArchive) {
+ _archiveBucket(stripe, stripeLock, bucket);
+ stats.incNumBucketsArchivedDueToMemoryThreshold();
+ } else {
+ ClosedBucket closed{
+ bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()};
+ _removeBucket(stripe, stripeLock, bucket, false);
stats.incNumBucketsClosedDueToMemoryThreshold();
closedBuckets->push_back(closed);
- ++numClosed;
}
+
+ ++numExpired;
+ }
+
+ while (canArchive && !stripe->archivedBuckets.empty() &&
+ _memoryUsage.load() > getTimeseriesIdleBucketExpiryMemoryUsageThresholdBytes() &&
+ numExpired <= gTimeseriesIdleBucketExpiryMaxCountPerAttempt) {
+
+ auto& [hash, archivedSet] = *stripe->archivedBuckets.begin();
+ invariant(!archivedSet.empty());
+
+ auto& [timestamp, bucket] = *archivedSet.begin();
+ ClosedBucket closed{bucket.bucketId, bucket.timeField, bucket.numMeasurements, true};
+
+ long long memory = _marginalMemoryUsageForArchivedBucket(bucket, archivedSet.size() == 1);
+ _eraseBucketState(bucket.bucketId);
+ if (archivedSet.size() == 1) {
+ // If this is the only entry, erase the whole map so we don't leave it empty.
+ stripe->archivedBuckets.erase(stripe->archivedBuckets.begin());
+ } else {
+ // Otherwise just erase this bucket from the map.
+ archivedSet.erase(archivedSet.begin());
+ }
+ _memoryUsage.fetchAndSubtract(memory);
+
+ stats.incNumBucketsClosedDueToMemoryThreshold();
+ closedBuckets->push_back(closed);
+ ++numExpired;
}
}
@@ -1158,8 +1418,8 @@ BucketCatalog::Bucket* BucketCatalog::_allocateBucket(Stripe* stripe,
auto [bucketId, roundedTime] = generateBucketId(info.time, info.options);
- auto [it, inserted] =
- stripe->allBuckets.try_emplace(bucketId, std::make_unique<Bucket>(bucketId, info.stripe));
+ auto [it, inserted] = stripe->allBuckets.try_emplace(
+ bucketId, std::make_unique<Bucket>(bucketId, info.stripe, info.key.hash));
tassert(6130900, "Expected bucket to be inserted", inserted);
Bucket* bucket = it->second.get();
stripe->openBuckets[info.key] = bucket;
@@ -1183,20 +1443,25 @@ BucketCatalog::Bucket* BucketCatalog::_allocateBucket(Stripe* stripe,
BucketCatalog::Bucket* BucketCatalog::_rollover(Stripe* stripe,
WithLock stripeLock,
Bucket* bucket,
- const CreationInfo& info) {
-
+ const CreationInfo& info,
+ RolloverAction action) {
+ invariant(action != RolloverAction::kNone);
if (bucket->allCommitted()) {
- // The bucket does not contain any measurements that are yet to be committed, so we can
- // remove it now.
- info.closedBuckets->push_back(ClosedBucket{
- bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()});
+ // The bucket does not contain any measurements that are yet to be committed, so we can take
+ // action now.
+ if (action == RolloverAction::kClose) {
+ info.closedBuckets->push_back(ClosedBucket{
+ bucket->id(), bucket->getTimeField().toString(), bucket->numMeasurements()});
- bool removed = _removeBucket(stripe, stripeLock, bucket);
- invariant(removed);
+ _removeBucket(stripe, stripeLock, bucket, false);
+ } else {
+ invariant(action == RolloverAction::kArchive);
+ _archiveBucket(stripe, stripeLock, bucket);
+ }
} else {
- // We must keep the bucket around until it is committed, just mark it full so it we know to
- // clean it up when the last batch finishes.
- bucket->_full = true;
+ // We must keep the bucket around until all measurements are committed committed, just mark
+ // the action we chose now so it we know what to do when the last batch finishes.
+ bucket->_rolloverAction = action;
}
return _allocateBucket(stripe, stripeLock, info);
@@ -1283,6 +1548,12 @@ boost::optional<BucketCatalog::BucketState> BucketCatalog::_setBucketState(const
return state;
}
+long long BucketCatalog::_marginalMemoryUsageForArchivedBucket(const ArchivedBucket& bucket,
+ bool onlyEntryForMatchingMetaHash) {
+ return sizeof(std::size_t) + sizeof(Date_t) + sizeof(ArchivedBucket) + bucket.timeField.size() +
+ (onlyEntryForMatchingMetaHash ? sizeof(decltype(Stripe::archivedBuckets)::value_type) : 0);
+}
+
class BucketCatalog::ServerStatus : public ServerStatusSection {
struct BucketCounts {
BucketCounts& operator+=(const BucketCounts& other) {
diff --git a/src/mongo/db/timeseries/bucket_catalog.h b/src/mongo/db/timeseries/bucket_catalog.h
index 2df33182d31..c2a82039ee8 100644
--- a/src/mongo/db/timeseries/bucket_catalog.h
+++ b/src/mongo/db/timeseries/bucket_catalog.h
@@ -67,9 +67,14 @@ class BucketCatalog {
AtomicWord<long long> numBucketsClosedDueToTimeForward;
AtomicWord<long long> numBucketsClosedDueToTimeBackward;
AtomicWord<long long> numBucketsClosedDueToMemoryThreshold;
+ AtomicWord<long long> numBucketsArchivedDueToTimeForward;
+ AtomicWord<long long> numBucketsArchivedDueToTimeBackward;
+ AtomicWord<long long> numBucketsArchivedDueToMemoryThreshold;
AtomicWord<long long> numCommits;
AtomicWord<long long> numWaits;
AtomicWord<long long> numMeasurementsCommitted;
+ AtomicWord<long long> numBucketsReopened;
+ AtomicWord<long long> numBucketsKeptOpenDueToLargeMeasurements;
};
class ExecutionStatsController {
@@ -87,9 +92,14 @@ class BucketCatalog {
void incNumBucketsClosedDueToTimeForward(long long increment = 1);
void incNumBucketsClosedDueToTimeBackward(long long increment = 1);
void incNumBucketsClosedDueToMemoryThreshold(long long increment = 1);
+ void incNumBucketsArchivedDueToTimeForward(long long increment = 1);
+ void incNumBucketsArchivedDueToTimeBackward(long long increment = 1);
+ void incNumBucketsArchivedDueToMemoryThreshold(long long increment = 1);
void incNumCommits(long long increment = 1);
void incNumWaits(long long increment = 1);
void incNumMeasurementsCommitted(long long increment = 1);
+ void incNumBucketsReopened(long long increment = 1);
+ void incNumBucketsKeptOpenDueToLargeMeasurements(long long increment = 1);
private:
std::shared_ptr<ExecutionStats> _collectionStats;
@@ -117,6 +127,7 @@ public:
OID bucketId;
std::string timeField;
uint32_t numMeasurements;
+ bool eligibleForReopening = false;
};
using ClosedBuckets = std::vector<ClosedBucket>;
@@ -179,7 +190,7 @@ public:
/**
* Records a set of new-to-the-bucket fields. Active batches only.
*/
- void _recordNewFields(NewFieldNames&& fields);
+ void _recordNewFields(Bucket* bucket, NewFieldNames&& fields);
/**
* Prepares the batch for commit. Sets min/max appropriately, records the number of
@@ -230,6 +241,13 @@ public:
BucketCatalog operator=(const BucketCatalog&) = delete;
/**
+ * Reopens a closed bucket into the catalog given the bucket document.
+ */
+ Status reopenBucket(OperationContext* opCtx,
+ const CollectionPtr& coll,
+ const BSONObj& bucketDoc);
+
+ /**
* Returns the metadata for the given bucket in the following format:
* {<metadata field name>: <value>}
* All measurements in the given bucket share same metadata value.
@@ -354,12 +372,14 @@ private:
* Key to lookup open Bucket for namespace and metadata, with pre-computed hash.
*/
struct BucketKey {
+ using Hash = std::size_t;
+
BucketKey() = delete;
BucketKey(const NamespaceString& nss, const BucketMetadata& meta);
NamespaceString ns;
BucketMetadata metadata;
- std::size_t hash;
+ Hash hash;
bool operator==(const BucketKey& other) const {
return ns == other.ns && metadata == other.metadata;
@@ -379,6 +399,23 @@ private:
};
/**
+ * Hasher to support using a pre-computed hash as a key without having to compute another hash.
+ */
+ struct PreHashed {
+ std::size_t operator()(const BucketKey::Hash& key) const;
+ };
+
+ /**
+ * Information of a Bucket that got archived while performing an operation on this
+ * BucketCatalog.
+ */
+ struct ArchivedBucket {
+ OID bucketId;
+ std::string timeField;
+ uint32_t numMeasurements;
+ };
+
+ /**
* Struct to hold a portion of the buckets managed by the catalog.
*
* Each of the bucket lists, as well as the buckets themselves, are protected by 'mutex'.
@@ -397,6 +434,12 @@ private:
// Buckets that do not have any outstanding writes.
using IdleList = std::list<Bucket*>;
IdleList idleBuckets;
+
+ // Buckets that are not currently in the catalog, but which are eligible to receive more
+ // measurements. The top-level map is keyed by the hash of the BucketKey, while the stored
+ // map is keyed by the bucket's minimum timestamp.
+ stdx::unordered_map<BucketKey::Hash, std::map<Date_t, ArchivedBucket>, PreHashed>
+ archivedBuckets;
};
StripeNumber _getStripeNumber(const BucketKey& key);
@@ -444,7 +487,13 @@ private:
/**
* Removes the given bucket from the bucket catalog's internal data structures.
*/
- bool _removeBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket);
+ void _removeBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket, bool archiving);
+
+ /**
+ * Archives the given bucket, minimizing the memory footprint but retaining the necessary
+ * information required to efficiently identify it as a candidate for future insertions.
+ */
+ void _archiveBucket(Stripe* stripe, WithLock stripeLock, Bucket* bucket);
/**
* Aborts 'batch', and if the corresponding bucket still exists, proceeds to abort any other
@@ -492,6 +541,11 @@ private:
Bucket* _allocateBucket(Stripe* stripe, WithLock stripeLock, const CreationInfo& info);
/**
+ * Mode enum to determine the rollover type decision for a given bucket.
+ */
+ enum class RolloverAction { kNone, kArchive, kClose };
+
+ /**
* Close the existing, full bucket and open a new one for the same metadata.
*
* Writes information about the closed bucket to the 'info' parameter.
@@ -499,7 +553,8 @@ private:
Bucket* _rollover(Stripe* stripe,
WithLock stripeLock,
Bucket* bucket,
- const CreationInfo& info);
+ const CreationInfo& info,
+ RolloverAction action);
ExecutionStatsController _getExecutionStats(const NamespaceString& ns);
std::shared_ptr<ExecutionStats> _getExecutionStats(const NamespaceString& ns) const;
@@ -531,6 +586,16 @@ private:
*/
boost::optional<BucketState> _setBucketState(const OID& id, BucketState target);
+ /**
+ * Calculates the marginal memory usage for an archived bucket. The
+ * 'onlyEntryForMatchingMetaHash' parameter indicates that the bucket will be (if inserting)
+ * or was (if removing) the only bucket associated with it's meta hash value. If true, then
+ * the returned value will attempt to account for the overhead of the map data structure for
+ * the meta hash value.
+ */
+ static long long _marginalMemoryUsageForArchivedBucket(const ArchivedBucket& bucket,
+ bool onlyEntryForMatchingMetaHash);
+
static constexpr std::size_t kNumberOfStripes = 32;
std::array<Stripe, kNumberOfStripes> _stripes;
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
index cc5bc65f3c1..6a5d03e77bb 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.cpp
@@ -57,7 +57,7 @@ StatusWith<std::pair<const BSONObj, const BSONObj>> extractMinAndMax(const BSONO
} // namespace
StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
- const CollatorInterface* collator) {
+ const StringData::ComparatorInterface* comparator) {
auto swDocs = extractMinAndMax(bucketDoc);
if (!swDocs.isOK()) {
return swDocs.getStatus();
@@ -66,14 +66,14 @@ StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
const auto& [minObj, maxObj] = swDocs.getValue();
try {
- return MinMax::parseFromBSON(minObj, maxObj, collator);
+ return MinMax::parseFromBSON(minObj, maxObj, comparator);
} catch (...) {
return exceptionToStatus();
}
}
StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
- const CollatorInterface* collator) {
+ const StringData::ComparatorInterface* comparator) {
auto swDocs = extractMinAndMax(bucketDoc);
if (!swDocs.isOK()) {
return swDocs.getStatus();
@@ -82,7 +82,7 @@ StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
const auto& [minObj, maxObj] = swDocs.getValue();
try {
- return Schema::parseFromBSON(minObj, maxObj, collator);
+ return Schema::parseFromBSON(minObj, maxObj, comparator);
} catch (...) {
return exceptionToStatus();
}
diff --git a/src/mongo/db/timeseries/bucket_catalog_helpers.h b/src/mongo/db/timeseries/bucket_catalog_helpers.h
index 015cae8ef66..3c84124e5b2 100644
--- a/src/mongo/db/timeseries/bucket_catalog_helpers.h
+++ b/src/mongo/db/timeseries/bucket_catalog_helpers.h
@@ -30,8 +30,8 @@
#pragma once
#include "mongo/base/status_with.h"
+#include "mongo/base/string_data_comparator_interface.h"
#include "mongo/bson/bsonobj.h"
-#include "mongo/db/query/collation/collator_interface.h"
#include "mongo/db/timeseries/flat_bson.h"
namespace mongo::timeseries {
@@ -43,7 +43,7 @@ namespace mongo::timeseries {
* Returns a bad status if the bucket document is malformed.
*/
StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
- const CollatorInterface* collator);
+ const StringData::ComparatorInterface* comparator);
/**
* Generates and returns a Schema object from an existing bucket document. Avoids unpacking the
@@ -52,6 +52,6 @@ StatusWith<MinMax> generateMinMaxFromBucketDoc(const BSONObj& bucketDoc,
* Returns a bad status if the bucket document is malformed or contains mixed schema measurements.
*/
StatusWith<Schema> generateSchemaFromBucketDoc(const BSONObj& bucketDoc,
- const CollatorInterface* collator);
+ const StringData::ComparatorInterface* comparator);
} // namespace mongo::timeseries
diff --git a/src/mongo/db/timeseries/bucket_catalog_test.cpp b/src/mongo/db/timeseries/bucket_catalog_test.cpp
index ccc3b06a2fb..c6e91d25b53 100644
--- a/src/mongo/db/timeseries/bucket_catalog_test.cpp
+++ b/src/mongo/db/timeseries/bucket_catalog_test.cpp
@@ -32,6 +32,8 @@
#include "mongo/db/catalog/create_collection.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/timeseries/bucket_catalog.h"
+#include "mongo/db/timeseries/bucket_compression.h"
+#include "mongo/idl/server_parameter_test_util.h"
#include "mongo/stdx/future.h"
#include "mongo/unittest/bson_test_util.h"
#include "mongo/unittest/death_test.h"
@@ -41,6 +43,15 @@
namespace mongo {
namespace {
+constexpr StringData kNumSchemaChanges = "numBucketsClosedDueToSchemaChange"_sd;
+constexpr StringData kNumBucketsReopened = "numBucketsReopened"_sd;
+constexpr StringData kNumArchivedDueToTimeForward = "numBucketsArchivedDueToTimeForward"_sd;
+constexpr StringData kNumArchivedDueToTimeBackward = "numBucketsArchivedDueToTimeBackward"_sd;
+constexpr StringData kNumArchivedDueToMemoryThreshold = "numBucketsArchivedDueToMemoryThreshold"_sd;
+constexpr StringData kNumClosedDueToTimeForward = "numBucketsClosedDueToTimeForward"_sd;
+constexpr StringData kNumClosedDueToTimeBackward = "numBucketsClosedDueToTimeBackward"_sd;
+constexpr StringData kNumClosedDueToMemoryThreshold = "numBucketsClosedDueToMemoryThreshold"_sd;
+
class BucketCatalogTest : public CatalogTestFixture {
protected:
class Task {
@@ -72,8 +83,7 @@ protected:
void _insertOneAndCommit(const NamespaceString& ns,
uint16_t numPreviouslyCommittedMeasurements);
- long long _getNumWaits(const NamespaceString& ns);
- long long _getNumSchemaChanges(const NamespaceString& ns);
+ long long _getExecutionStat(const NamespaceString& ns, StringData stat);
// Check that each group of objects has compatible schema with itself, but that inserting the
// first object in new group closes the existing bucket and opens a new one
@@ -177,16 +187,10 @@ void BucketCatalogTest::_insertOneAndCommit(const NamespaceString& ns,
_commit(batch, numPreviouslyCommittedMeasurements);
}
-long long BucketCatalogTest::_getNumWaits(const NamespaceString& ns) {
- BSONObjBuilder builder;
- _bucketCatalog->appendExecutionStats(ns, &builder);
- return builder.obj().getIntField("numWaits");
-}
-
-long long BucketCatalogTest::_getNumSchemaChanges(const NamespaceString& ns) {
+long long BucketCatalogTest::_getExecutionStat(const NamespaceString& ns, StringData stat) {
BSONObjBuilder builder;
_bucketCatalog->appendExecutionStats(ns, &builder);
- return builder.obj().getIntField("numBucketsClosedDueToSchemaChange");
+ return builder.obj().getIntField(stat);
}
void BucketCatalogTest::_testMeasurementSchema(
@@ -203,7 +207,7 @@ void BucketCatalogTest::_testMeasurementSchema(
timestampedDoc.append(_timeField, Date_t::now());
timestampedDoc.appendElements(doc);
- auto pre = _getNumSchemaChanges(_ns1);
+ auto pre = _getExecutionStat(_ns1, kNumSchemaChanges);
auto result = _bucketCatalog
->insert(_opCtx,
_ns1,
@@ -212,7 +216,7 @@ void BucketCatalogTest::_testMeasurementSchema(
timestampedDoc.obj(),
BucketCatalog::CombineWithInsertsFromOtherClients::kAllow)
.getValue();
- auto post = _getNumSchemaChanges(_ns1);
+ auto post = _getExecutionStat(_ns1, kNumSchemaChanges);
if (firstMember) {
if (firstGroup) {
@@ -978,5 +982,453 @@ TEST_F(BucketCatalogTest, SchemaChanges) {
_testMeasurementSchema({{docs[18], docs[19]}, {docs[20], docs[21]}});
}
+TEST_F(BucketCatalogTest, ReopenMalformedBucket) {
+ BSONObj bucketDoc = ::mongo::fromjson(
+ R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+ "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+ "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+ "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+ "1":{"$date":"2022-06-06T15:34:30.000Z"},
+ "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+ "a":{"0":1,"1":2,"2":3},
+ "b":{"0":1,"1":2,"2":3}}})");
+
+ AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+
+ {
+ // Missing _id field.
+ BSONObj missingIdObj = bucketDoc.removeField("_id");
+ ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingIdObj));
+
+ // Bad _id type.
+ BSONObj badIdObj = bucketDoc.addFields(BSON("_id" << 123));
+ ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badIdObj));
+ }
+
+ {
+ // Missing control field.
+ BSONObj missingControlObj = bucketDoc.removeField("control");
+ ASSERT_NOT_OK(
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingControlObj));
+
+ // Bad control type.
+ BSONObj badControlObj = bucketDoc.addFields(BSON("control" << BSONArray()));
+ ASSERT_NOT_OK(
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badControlObj));
+
+ // Bad control.version type.
+ BSONObj badVersionObj = bucketDoc.addFields(BSON(
+ "control" << BSON("version" << BSONArray() << "min"
+ << BSON("time" << BSON("$date"
+ << "2022-06-06T15:34:00.000Z"))
+ << "max"
+ << BSON("time" << BSON("$date"
+ << "2022-06-06T15:34:30.000Z")))));
+ ASSERT_NOT_OK(
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badVersionObj));
+
+ // Bad control.min type.
+ BSONObj badMinObj = bucketDoc.addFields(BSON(
+ "control" << BSON("version" << 1 << "min" << 123 << "max"
+ << BSON("time" << BSON("$date"
+ << "2022-06-06T15:34:30.000Z")))));
+ ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badMinObj));
+
+ // Bad control.max type.
+ BSONObj badMaxObj = bucketDoc.addFields(
+ BSON("control" << BSON("version" << 1 << "min"
+ << BSON("time" << BSON("$date"
+ << "2022-06-06T15:34:00.000Z"))
+ << "max" << 123)));
+ ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badMaxObj));
+
+ // Missing control.min.time.
+ BSONObj missingMinTimeObj = bucketDoc.addFields(BSON(
+ "control" << BSON("version" << 1 << "min" << BSON("abc" << 1) << "max"
+ << BSON("time" << BSON("$date"
+ << "2022-06-06T15:34:30.000Z")))));
+ ASSERT_NOT_OK(
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingMinTimeObj));
+
+ // Missing control.max.time.
+ BSONObj missingMaxTimeObj = bucketDoc.addFields(
+ BSON("control" << BSON("version" << 1 << "min"
+ << BSON("time" << BSON("$date"
+ << "2022-06-06T15:34:00.000Z"))
+ << "max" << BSON("abc" << 1))));
+ ASSERT_NOT_OK(
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingMaxTimeObj));
+ }
+
+
+ {
+ // Missing data field.
+ BSONObj missingDataObj = bucketDoc.removeField("data");
+ ASSERT_NOT_OK(
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), missingDataObj));
+
+ // Bad data type.
+ BSONObj badDataObj = bucketDoc.addFields(BSON("data" << 123));
+ ASSERT_NOT_OK(_bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), badDataObj));
+ }
+}
+
+TEST_F(BucketCatalogTest, ReopenUncompressedBucketAndInsertCompatibleMeasurement) {
+ // Bucket document to reopen.
+ BSONObj bucketDoc = ::mongo::fromjson(
+ R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+ "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+ "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+ "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+ "1":{"$date":"2022-06-06T15:34:30.000Z"},
+ "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+ "a":{"0":1,"1":2,"2":3},
+ "b":{"0":1,"1":2,"2":3}}})");
+
+ RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+ Status status = _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), bucketDoc);
+ ASSERT_OK(status);
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+ // Insert a measurement that is compatible with the reopened bucket.
+ auto result =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+ "a":-100,"b":100})"),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ // No buckets are closed.
+ ASSERT(result.getValue().closedBuckets.empty());
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+ auto batch = result.getValue().batch;
+ ASSERT(batch->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+ ASSERT_EQ(batch->measurements().size(), 1);
+
+ // The reopened bucket already contains three committed measurements.
+ ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 3);
+
+ // Verify that the min and max is updated correctly when inserting new measurements.
+ ASSERT_BSONOBJ_BINARY_EQ(batch->min(), BSON("u" << BSON("a" << -100)));
+ ASSERT_BSONOBJ_BINARY_EQ(
+ batch->max(),
+ BSON("u" << BSON("time" << Date_t::fromMillisSinceEpoch(1654529680000) << "b" << 100)));
+
+ _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ReopenUncompressedBucketAndInsertIncompatibleMeasurement) {
+ // Bucket document to reopen.
+ BSONObj bucketDoc = ::mongo::fromjson(
+ R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+ "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+ "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+ "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+ "1":{"$date":"2022-06-06T15:34:30.000Z"},
+ "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+ "a":{"0":1,"1":2,"2":3},
+ "b":{"0":1,"1":2,"2":3}}})");
+
+ RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+ Status status = _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), bucketDoc);
+ ASSERT_OK(status);
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+ // Insert a measurement that is incompatible with the reopened bucket.
+ auto result =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+ "a":{},"b":{}})"),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ // The reopened bucket gets closed as the schema is incompatible.
+ ASSERT_EQ(1, result.getValue().closedBuckets.size());
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+ auto batch = result.getValue().batch;
+ ASSERT(batch->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+ ASSERT_EQ(batch->measurements().size(), 1);
+
+ // Since the reopened bucket was incompatible, we opened a new one.
+ ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 0);
+
+ _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ReopenCompressedBucketAndInsertCompatibleMeasurement) {
+ // Bucket document to reopen.
+ BSONObj bucketDoc = ::mongo::fromjson(
+ R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+ "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+ "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+ "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+ "1":{"$date":"2022-06-06T15:34:30.000Z"},
+ "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+ "a":{"0":1,"1":2,"2":3},
+ "b":{"0":1,"1":2,"2":3}}})");
+
+ timeseries::CompressionResult compressionResult =
+ timeseries::compressBucket(bucketDoc,
+ _timeField,
+ _ns1,
+ /*eligibleForReopening=*/false,
+ /*validateDecompression=*/true);
+ const BSONObj& compressedBucketDoc = compressionResult.compressedBucket.get();
+
+ RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+ Status status =
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), compressedBucketDoc);
+ ASSERT_OK(status);
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+ // Insert a measurement that is compatible with the reopened bucket.
+ auto result =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+ "a":-100,"b":100})"),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ // No buckets are closed.
+ ASSERT(result.getValue().closedBuckets.empty());
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+ auto batch = result.getValue().batch;
+ ASSERT(batch->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+ ASSERT_EQ(batch->measurements().size(), 1);
+
+ // The reopened bucket already contains three committed measurements.
+ ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 3);
+
+ // Verify that the min and max is updated correctly when inserting new measurements.
+ ASSERT_BSONOBJ_BINARY_EQ(batch->min(), BSON("u" << BSON("a" << -100)));
+ ASSERT_BSONOBJ_BINARY_EQ(
+ batch->max(),
+ BSON("u" << BSON("time" << Date_t::fromMillisSinceEpoch(1654529680000) << "b" << 100)));
+
+ _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ReopenCompressedBucketAndInsertIncompatibleMeasurement) {
+ // Bucket document to reopen.
+ BSONObj bucketDoc = ::mongo::fromjson(
+ R"({"_id":{"$oid":"629e1e680958e279dc29a517"},
+ "control":{"version":1,"min":{"time":{"$date":"2022-06-06T15:34:00.000Z"},"a":1,"b":1},
+ "max":{"time":{"$date":"2022-06-06T15:34:30.000Z"},"a":3,"b":3}},
+ "data":{"time":{"0":{"$date":"2022-06-06T15:34:30.000Z"},
+ "1":{"$date":"2022-06-06T15:34:30.000Z"},
+ "2":{"$date":"2022-06-06T15:34:30.000Z"}},
+ "a":{"0":1,"1":2,"2":3},
+ "b":{"0":1,"1":2,"2":3}}})");
+
+ timeseries::CompressionResult compressionResult =
+ timeseries::compressBucket(bucketDoc,
+ _timeField,
+ _ns1,
+ /*eligibleForReopening=*/false,
+ /*validateDecompression=*/true);
+ const BSONObj& compressedBucketDoc = compressionResult.compressedBucket.get();
+
+ RAIIServerParameterControllerForTest controller{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ AutoGetCollection autoColl(_opCtx, _ns1.makeTimeseriesBucketsNamespace(), MODE_IX);
+ Status status =
+ _bucketCatalog->reopenBucket(_opCtx, autoColl.getCollection(), compressedBucketDoc);
+ ASSERT_OK(status);
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumBucketsReopened));
+
+ // Insert a measurement that is incompatible with the reopened bucket.
+ auto result =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ ::mongo::fromjson(R"({"time":{"$date":"2022-06-06T15:34:40.000Z"},
+ "a":{},"b":{}})"),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+
+ // The reopened bucket gets closed as the schema is incompatible.
+ ASSERT_EQ(1, result.getValue().closedBuckets.size());
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumSchemaChanges));
+
+ auto batch = result.getValue().batch;
+ ASSERT(batch->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+ ASSERT_EQ(batch->measurements().size(), 1);
+
+ // Since the reopened bucket was incompatible, we opened a new one.
+ ASSERT_EQ(batch->numPreviouslyCommittedMeasurements(), 0);
+
+ _bucketCatalog->finish(batch, {});
+}
+
+TEST_F(BucketCatalogTest, ArchiveIfTimeForward) {
+ RAIIServerParameterControllerForTest featureFlag{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ auto baseTimestamp = Date_t::now();
+
+ // Insert an initial document to make sure we have an open bucket.
+ auto result1 =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << baseTimestamp),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ ASSERT_OK(result1.getStatus());
+ auto batch1 = result1.getValue().batch;
+ ASSERT(batch1->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch1));
+ _bucketCatalog->finish(batch1, {});
+
+ // Make sure we start out with nothing closed or archived.
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumArchivedDueToTimeForward));
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeForward));
+
+ // Now insert another that's too far forward to fit in the same bucket
+ auto result2 =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << (baseTimestamp + Seconds{7200})),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ ASSERT_OK(result2.getStatus());
+ auto batch2 = result2.getValue().batch;
+ ASSERT(batch2->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch2));
+ _bucketCatalog->finish(batch2, {});
+
+ // Make sure it was archived, not closed.
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumArchivedDueToTimeForward));
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeForward));
+}
+
+TEST_F(BucketCatalogTest, ArchiveIfTimeBackward) {
+ RAIIServerParameterControllerForTest featureFlag{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ auto baseTimestamp = Date_t::now();
+
+ // Insert an initial document to make sure we have an open bucket.
+ auto result1 =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << baseTimestamp),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ ASSERT_OK(result1.getStatus());
+ auto batch1 = result1.getValue().batch;
+ ASSERT(batch1->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch1));
+ _bucketCatalog->finish(batch1, {});
+
+ // Make sure we start out with nothing closed or archived.
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumArchivedDueToTimeBackward));
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeBackward));
+
+ // Now insert another that's too far Backward to fit in the same bucket
+ auto result2 =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << (baseTimestamp - Seconds{7200})),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ ASSERT_OK(result2.getStatus());
+ auto batch2 = result2.getValue().batch;
+ ASSERT(batch2->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch2));
+ _bucketCatalog->finish(batch2, {});
+
+ // Make sure it was archived, not closed.
+ ASSERT_EQ(1, _getExecutionStat(_ns1, kNumArchivedDueToTimeBackward));
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToTimeBackward));
+}
+
+TEST_F(BucketCatalogTest, ArchivingUnderMemoryPressure) {
+ RAIIServerParameterControllerForTest featureFlag{"featureFlagTimeseriesScalabilityImprovements",
+ true};
+ RAIIServerParameterControllerForTest memoryLimit{
+ "timeseriesIdleBucketExpiryMemoryUsageThreshold", 10000};
+
+ // Insert a measurement with a unique meta value, guaranteeing we will open a new bucket but not
+ // close an old one except under memory pressure.
+ long long meta = 0;
+ auto insertDocument = [&meta, this]() -> BucketCatalog::ClosedBuckets {
+ auto result =
+ _bucketCatalog->insert(_opCtx,
+ _ns1,
+ _getCollator(_ns1),
+ _getTimeseriesOptions(_ns1),
+ BSON(_timeField << Date_t::now() << _metaField << meta++),
+ BucketCatalog::CombineWithInsertsFromOtherClients::kAllow);
+ ASSERT_OK(result.getStatus());
+ auto batch = result.getValue().batch;
+ ASSERT(batch->claimCommitRights());
+ ASSERT_OK(_bucketCatalog->prepareCommit(batch));
+ _bucketCatalog->finish(batch, {});
+
+ return result.getValue().closedBuckets;
+ };
+
+ // Ensure we start out with no buckets archived or closed due to memory pressure.
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold));
+ ASSERT_EQ(0, _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold));
+
+ // With a memory limit of 10000 bytes, we should be guaranteed to hit the memory limit with no
+ // more than 1000 buckets since an open bucket takes up at least 10 bytes (in reality,
+ // significantly more, but this is definitely a safe assumption).
+ for (int i = 0; i < 1000; ++i) {
+ [[maybe_unused]] auto closedBuckets = insertDocument();
+
+ if (0 < _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold)) {
+ break;
+ }
+ }
+
+ // When we first hit the limit, we should try to archive some buckets prior to closing anything.
+ // However, depending on how the buckets are distributed over the stripes, it's possible that
+ // the current stripe will not have enough open buckets to archive to drop below the limit, and
+ // may immediately close a bucket it has just archived. We should be able to guarantee that we
+ // have archived a bucket prior to closing it though.
+ ASSERT_LT(0, _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold));
+ auto numClosedInFirstRound = _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold);
+ ASSERT_LTE(numClosedInFirstRound, _getExecutionStat(_ns1, kNumArchivedDueToMemoryThreshold));
+
+ // If we continue to open more new buckets with distinct meta values, eventually we'll run out
+ // of open buckets to archive and have to start closing archived buckets to relieve memory
+ // pressure. Again, an archived bucket should take up more than 10 bytes in the catalog, so we
+ // should be fine with a maximum of 1000 iterations.
+ for (int i = 0; i < 1000; ++i) {
+ auto closedBuckets = insertDocument();
+
+ if (numClosedInFirstRound < _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold)) {
+ ASSERT_FALSE(closedBuckets.empty());
+ break;
+ }
+ }
+
+ // We should have closed some (additional) buckets by now.
+ ASSERT_LT(numClosedInFirstRound, _getExecutionStat(_ns1, kNumClosedDueToMemoryThreshold));
+}
+
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/timeseries/bucket_compression.cpp b/src/mongo/db/timeseries/bucket_compression.cpp
index 94fa5264647..1ccd26da0bd 100644
--- a/src/mongo/db/timeseries/bucket_compression.cpp
+++ b/src/mongo/db/timeseries/bucket_compression.cpp
@@ -53,6 +53,7 @@ MONGO_FAIL_POINT_DEFINE(simulateBsonColumnCompressionDataLoss);
CompressionResult compressBucket(const BSONObj& bucketDoc,
StringData timeFieldName,
const NamespaceString& nss,
+ bool eligibleForReopening,
bool validateDecompression) try {
CompressionResult result;
@@ -179,22 +180,36 @@ CompressionResult compressBucket(const BSONObj& bucketDoc,
{
BSONObjBuilder control(builder.subobjStart(kBucketControlFieldName));
- // Set right version, leave other control fields unchanged
+ const bool shouldSetBucketClosed = !eligibleForReopening &&
+ feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility);
+
+ // Set the version to indicate that the bucket was compressed and the closed flag if the
+ // bucket shouldn't be reopened. Leave other control fields unchanged.
+ bool closedSet = false;
bool versionSet = false;
for (const auto& controlField : controlElement.Obj()) {
if (controlField.fieldNameStringData() == kBucketControlVersionFieldName) {
control.append(kBucketControlVersionFieldName, kTimeseriesControlCompressedVersion);
versionSet = true;
+ } else if (controlField.fieldNameStringData() == kBucketControlClosedFieldName &&
+ shouldSetBucketClosed) {
+ control.append(kBucketControlClosedFieldName, true);
+ closedSet = true;
} else {
control.append(controlField);
}
}
- // Set version if it was missing from uncompressed bucket
+ // Set version and closed if it was missing from uncompressed bucket
if (!versionSet) {
control.append(kBucketControlVersionFieldName, kTimeseriesControlCompressedVersion);
}
+ if (!closedSet && shouldSetBucketClosed) {
+ control.append(kBucketControlClosedFieldName, true);
+ }
+
// Set count
control.append(kBucketControlCountFieldName, static_cast<int32_t>(measurements.size()));
}
diff --git a/src/mongo/db/timeseries/bucket_compression.h b/src/mongo/db/timeseries/bucket_compression.h
index e70bec965d7..30c788a9417 100644
--- a/src/mongo/db/timeseries/bucket_compression.h
+++ b/src/mongo/db/timeseries/bucket_compression.h
@@ -58,6 +58,7 @@ struct CompressionResult {
CompressionResult compressBucket(const BSONObj& bucketDoc,
StringData timeFieldName,
const NamespaceString& nss,
+ bool eligibleForReopening,
bool validateDecompression);
/**
diff --git a/src/mongo/db/timeseries/timeseries.idl b/src/mongo/db/timeseries/timeseries.idl
index d3ede8fd3c3..3f858f17fe6 100644
--- a/src/mongo/db/timeseries/timeseries.idl
+++ b/src/mongo/db/timeseries/timeseries.idl
@@ -62,6 +62,18 @@ server_parameters:
cpp_varname: "gTimeseriesIdleBucketExpiryMaxCountPerAttempt"
default: 3
validator: { gte: 2 }
+ "timeseriesBucketMinCount":
+ description: "Time-series buckets that need to be closed due to size
+ (timeseriesBucketMaxSize) but haven't crossed this threshold are considered to
+ contain large measurements, and will be kept open to improve bucketing
+ performance. These buckets will be closed when they reach the threshold or if
+ the bucket is close to the max BSON size limit. Setting this to 1 disables
+ this behaviour."
+ set_at: [ startup ]
+ cpp_vartype: "std::int32_t"
+ cpp_varname: "gTimeseriesBucketMinCount"
+ default: 10
+ validator: { gte: 1 }
enums:
BucketGranularity:
diff --git a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
index be234d0d95f..6481aa7bbf9 100644
--- a/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
+++ b/src/mongo/db/timeseries/timeseries_commands_conversion_helper.cpp
@@ -50,6 +50,28 @@ namespace {
NamespaceString makeTimeseriesBucketsNamespace(const NamespaceString& nss) {
return nss.isTimeseriesBucketsCollection() ? nss : nss.makeTimeseriesBucketsNamespace();
}
+
+/**
+ * Converts the key field on time to 'control.min.$timeField' field. Depends on error checking from
+ * 'createBucketsSpecFromTimeseriesSpec()' which should be called before this function.
+ */
+BSONObj convertToTTLTimeField(const BSONObj& origKeyField, StringData timeField) {
+ BSONObjBuilder keyBuilder;
+ uassert(ErrorCodes::CannotCreateIndex,
+ str::stream() << "TTL indexes are single-field indexes, compound indexes do "
+ "not support TTL. Index spec: "
+ << origKeyField,
+ origKeyField.nFields() == 1);
+
+ const auto& firstElem = origKeyField.firstElement();
+ uassert(ErrorCodes::InvalidOptions,
+ "TTL indexes on non-time fields are not supported on time-series collections",
+ firstElem.fieldName() == timeField);
+
+ keyBuilder.appendAs(firstElem,
+ str::stream() << timeseries::kControlMinFieldNamePrefix << timeField);
+ return keyBuilder.obj();
+}
} // namespace
@@ -83,12 +105,17 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
std::vector<mongo::BSONObj> indexes;
for (const auto& origIndex : origIndexes) {
BSONObjBuilder builder;
- bool isBucketsIndexSpecCompatibleForDowngrade = true;
+ BSONObj keyField;
+ BSONObj originalKeyField;
+ bool isTTLIndex = false;
+ bool hasPartialFilterOnMetaField = false;
+ bool includeOriginalSpec = false;
+
for (const auto& elem : origIndex) {
if (elem.fieldNameStringData() == IndexDescriptor::kPartialFilterExprFieldName) {
- if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
- serverGlobalParams.featureCompatibility.isFCVUpgradingToOrAlreadyLatest()) {
- isBucketsIndexSpecCompatibleForDowngrade = false;
+ if (feature_flags::gTimeseriesMetricIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
+ includeOriginalSpec = true;
} else {
uasserted(ErrorCodes::InvalidOptions,
"Partial indexes are not supported on time-series collections");
@@ -135,7 +162,7 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
// planner, this will be true.
bool assumeNoMixedSchemaData = true;
- BSONObj bucketPred =
+ auto [hasMetricPred, bucketPred] =
BucketSpec::pushdownPredicate(expCtx,
options,
collationMatchesDefault,
@@ -144,6 +171,9 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
includeMetaField,
assumeNoMixedSchemaData,
BucketSpec::IneligiblePredicatePolicy::kError);
+
+ hasPartialFilterOnMetaField = !hasMetricPred;
+
builder.append(IndexDescriptor::kPartialFilterExprFieldName, bucketPred);
continue;
}
@@ -171,11 +201,11 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
}
if (elem.fieldNameStringData() == IndexDescriptor::kExpireAfterSecondsFieldName) {
- uasserted(ErrorCodes::InvalidOptions,
- "TTL indexes are not supported on time-series collections");
+ isTTLIndex = true;
+ builder.append(elem);
+ continue;
}
-
if (elem.fieldNameStringData() == IndexDescriptor::kUniqueFieldName) {
uassert(ErrorCodes::InvalidOptions,
"Unique indexes are not supported on time-series collections",
@@ -183,27 +213,28 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
}
if (elem.fieldNameStringData() == NewIndexSpec::kKeyFieldName) {
- auto pluginName = IndexNames::findPluginName(elem.Obj());
+ originalKeyField = elem.Obj();
+
+ auto pluginName = IndexNames::findPluginName(originalKeyField);
uassert(ErrorCodes::InvalidOptions,
"Text indexes are not supported on time-series collections",
pluginName != IndexNames::TEXT);
auto bucketsIndexSpecWithStatus =
- timeseries::createBucketsIndexSpecFromTimeseriesIndexSpec(options, elem.Obj());
+ timeseries::createBucketsIndexSpecFromTimeseriesIndexSpec(options,
+ originalKeyField);
uassert(ErrorCodes::CannotCreateIndex,
str::stream() << bucketsIndexSpecWithStatus.getStatus().toString()
<< " Command request: " << redact(origCmd.toBSON({})),
bucketsIndexSpecWithStatus.isOK());
- if (!timeseries::isBucketsIndexSpecCompatibleForDowngrade(
+ if (timeseries::shouldIncludeOriginalSpec(
options,
BSON(NewIndexSpec::kKeyFieldName
<< bucketsIndexSpecWithStatus.getValue()))) {
- isBucketsIndexSpecCompatibleForDowngrade = false;
+ includeOriginalSpec = true;
}
-
- builder.append(NewIndexSpec::kKeyFieldName,
- std::move(bucketsIndexSpecWithStatus.getValue()));
+ keyField = std::move(bucketsIndexSpecWithStatus.getValue());
continue;
}
@@ -212,12 +243,24 @@ CreateIndexesCommand makeTimeseriesCreateIndexesCommand(OperationContext* opCtx,
builder.append(elem);
}
- if (feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV() &&
- !isBucketsIndexSpecCompatibleForDowngrade) {
+ if (isTTLIndex) {
+ uassert(ErrorCodes::InvalidOptions,
+ "TTL indexes are not supported on time-series collections",
+ feature_flags::gTimeseriesScalabilityImprovements.isEnabled(
+ serverGlobalParams.featureCompatibility));
+ uassert(ErrorCodes::InvalidOptions,
+ "TTL indexes on time-series collections require a partialFilterExpression on "
+ "the metaField",
+ hasPartialFilterOnMetaField);
+ keyField = convertToTTLTimeField(originalKeyField, options.getTimeField());
+ }
+ builder.append(NewIndexSpec::kKeyFieldName, std::move(keyField));
+
+ if (feature_flags::gTimeseriesMetricIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility) &&
+ includeOriginalSpec) {
// Store the original user index definition on the transformed index definition for the
- // time-series buckets collection if this is a newly supported index type on time-series
- // collections. This is to avoid any additional downgrade steps for index types already
- // supported in 5.0.
+ // time-series buckets collection.
builder.appendObject(IndexDescriptor::kOriginalSpecFieldName, origIndex.objdata());
}
diff --git a/src/mongo/db/timeseries/timeseries_constants.h b/src/mongo/db/timeseries/timeseries_constants.h
index 28dfd25ce78..be7d9a368f2 100644
--- a/src/mongo/db/timeseries/timeseries_constants.h
+++ b/src/mongo/db/timeseries/timeseries_constants.h
@@ -40,6 +40,7 @@ namespace timeseries {
static constexpr StringData kBucketIdFieldName = "_id"_sd;
static constexpr StringData kBucketDataFieldName = "data"_sd;
static constexpr StringData kBucketMetaFieldName = "meta"_sd;
+static constexpr StringData kBucketControlClosedFieldName = "closed"_sd;
static constexpr StringData kBucketControlFieldName = "control"_sd;
static constexpr StringData kBucketControlVersionFieldName = "version"_sd;
static constexpr StringData kBucketControlCountFieldName = "count"_sd;
diff --git a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
index 638a2c8d6c4..afabfc4e0a6 100644
--- a/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
+++ b/src/mongo/db/timeseries/timeseries_dotted_path_support_test.cpp
@@ -47,7 +47,8 @@ protected:
test(obj);
NamespaceString nss{"test"};
- auto compressionResult = timeseries::compressBucket(obj, "time", nss, true);
+ auto compressionResult =
+ timeseries::compressBucket(obj, "time", nss, /*eligibleForReopening=*/false, true);
ASSERT_TRUE(compressionResult.compressedBucket.has_value());
ASSERT_FALSE(compressionResult.decompressionFailed);
diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
index 83b6e3f6e9d..4dcf9a73eda 100644
--- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
+++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.cpp
@@ -149,7 +149,8 @@ StatusWith<BSONObj> createBucketsSpecFromTimeseriesSpec(const TimeseriesOptions&
// Indexes on measurement fields are only supported when the 'gTimeseriesMetricIndexes'
// feature flag is enabled.
- if (!feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV()) {
+ if (!feature_flags::gTimeseriesMetricIndexes.isEnabled(
+ serverGlobalParams.featureCompatibility)) {
auto reason = str::stream();
reason << "Invalid index spec for time-series collection: "
<< redact(timeseriesIndexSpecBSON) << ". ";
@@ -366,7 +367,7 @@ StatusWith<BSONObj> createBucketsShardKeySpecFromTimeseriesShardKeySpec(
boost::optional<BSONObj> createTimeseriesIndexFromBucketsIndex(
const TimeseriesOptions& timeseriesOptions, const BSONObj& bucketsIndex) {
bool timeseriesMetricIndexesFeatureFlagEnabled =
- feature_flags::gTimeseriesMetricIndexes.isEnabledAndIgnoreFCV();
+ feature_flags::gTimeseriesMetricIndexes.isEnabled(serverGlobalParams.featureCompatibility);
if (bucketsIndex.hasField(kOriginalSpecFieldName) &&
timeseriesMetricIndexesFeatureFlagEnabled) {
@@ -406,21 +407,16 @@ std::list<BSONObj> createTimeseriesIndexesFromBucketsIndexes(
return indexSpecs;
}
-bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeseriesOptions,
- const BSONObj& bucketsIndex) {
+bool shouldIncludeOriginalSpec(const TimeseriesOptions& timeseriesOptions,
+ const BSONObj& bucketsIndex) {
if (!bucketsIndex.hasField(kKeyFieldName)) {
return false;
}
- if (bucketsIndex.hasField(kPartialFilterExpressionFieldName)) {
- // Partial indexes are not supported in FCV < 5.2.
- return false;
- }
-
return createTimeseriesIndexSpecFromBucketsIndexSpec(
timeseriesOptions,
bucketsIndex.getField(kKeyFieldName).Obj(),
- /*timeseriesMetricIndexesFeatureFlagEnabled=*/false) != boost::none;
+ /*timeseriesMetricIndexesFeatureFlagEnabled=*/false) == boost::none;
}
bool doesBucketsIndexIncludeMeasurement(OperationContext* opCtx,
diff --git a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
index ad1bb795fd2..144893c0d77 100644
--- a/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
+++ b/src/mongo/db/timeseries/timeseries_index_schema_conversion_functions.h
@@ -71,10 +71,11 @@ std::list<BSONObj> createTimeseriesIndexesFromBucketsIndexes(
const TimeseriesOptions& timeseriesOptions, const std::list<BSONObj>& bucketsIndexes);
/**
- * Returns true if the 'bucketsIndex' is compatible for FCV downgrade.
+ * Returns true if the original index specification should be included when creating an index on the
+ * time-series buckets collection.
*/
-bool isBucketsIndexSpecCompatibleForDowngrade(const TimeseriesOptions& timeseriesOptions,
- const BSONObj& bucketsIndex);
+bool shouldIncludeOriginalSpec(const TimeseriesOptions& timeseriesOptions,
+ const BSONObj& bucketsIndex);
/**
* Returns true if 'bucketsIndex' uses a measurement field, excluding the time field. Checks both
diff --git a/src/mongo/db/transaction_api.cpp b/src/mongo/db/transaction_api.cpp
index 8f1f91e3080..0a950dbbe54 100644
--- a/src/mongo/db/transaction_api.cpp
+++ b/src/mongo/db/transaction_api.cpp
@@ -82,6 +82,7 @@ SyncTransactionWithRetries::SyncTransactionWithRetries(
_txn(std::make_shared<details::TransactionWithRetries>(
opCtx,
executor,
+ _source.token(),
txnClient ? std::move(txnClient)
: std::make_unique<details::SEPTransactionClient>(
opCtx,
@@ -101,6 +102,8 @@ StatusWith<CommitResult> SyncTransactionWithRetries::runNoThrow(OperationContext
}
auto txnResult = _txn->run(std::move(callback)).getNoThrow(opCtx);
+ // Cancel the source to guarantee the transaction will terminate if our opCtx was interrupted.
+ _source.cancel();
// Post transaction processing, which must also happen inline.
OperationTimeTracker::get(opCtx)->updateOperationTime(_txn->getOperationTime());
@@ -188,8 +191,7 @@ SemiFuture<CommitResult> TransactionWithRetries::run(Callback callback) noexcept
return txnStatus.isOK() || txnStatus != ErrorCodes::TransactionAPIMustRetryTransaction;
})
.withBackoffBetweenIterations(kExponentialBackoff)
- // Cancellation happens by interrupting the caller's opCtx.
- .on(_executor, CancellationToken::uncancelable())
+ .on(_executor, _token)
// Safe to inline because the continuation only holds state.
.unsafeToInlineFuture()
.tapAll([anchor = shared_from_this()](auto&&) {})
@@ -257,8 +259,7 @@ ExecutorFuture<CommitResult> TransactionWithRetries::_runCommitWithRetries() {
return swResult.isOK() || swResult != ErrorCodes::TransactionAPIMustRetryCommit;
})
.withBackoffBetweenIterations(kExponentialBackoff)
- // Cancellation happens by interrupting the caller's opCtx.
- .on(_executor, CancellationToken::uncancelable());
+ .on(_executor, _token);
}
ExecutorFuture<void> TransactionWithRetries::_bestEffortAbort() {
@@ -297,12 +298,16 @@ SemiFuture<BSONObj> SEPTransactionClient::runCommand(StringData dbName, BSONObj
invariant(!haveClient());
auto client = _serviceContext->makeClient("SEP-internal-txn-client");
AlternativeClientRegion clientRegion(client);
- auto opCtxHolder = cc().makeOperationContext();
+ // Note that _token is only cancelled once the caller of the transaction no longer cares about
+ // its result, so CancelableOperationContexts only being interrupted by ErrorCodes::Interrupted
+ // shouldn't impact any upstream retry logic.
+ CancelableOperationContextFactory opCtxFactory(_token, _executor);
+ auto cancellableOpCtx = opCtxFactory.makeOperationContext(&cc());
primeInternalClient(&cc());
auto opMsgRequest = OpMsgRequest::fromDBAndBody(dbName, cmdBuilder.obj());
auto requestMessage = opMsgRequest.serialize();
- return _behaviors->handleRequest(opCtxHolder.get(), requestMessage)
+ return _behaviors->handleRequest(cancellableOpCtx.get(), requestMessage)
.then([this](DbResponse dbResponse) {
auto reply = rpc::makeReply(&dbResponse.response)->getCommandReply().getOwned();
_hooks->runReplyHook(reply);
@@ -383,7 +388,7 @@ SemiFuture<std::vector<BSONObj>> SEPTransactionClient::exhaustiveFind(
// an error upon fetching more documents.
return result != ErrorCodes::InternalTransactionsExhaustiveFindHasMore;
})
- .on(_executor, CancellationToken::uncancelable())
+ .on(_executor, _token)
.then([response = std::move(response)] { return std::move(*response); });
})
.semi();
@@ -494,6 +499,29 @@ int getMaxRetries() {
: kTxnRetryLimit;
}
+bool isLocalTransactionFatalResult(const StatusWith<CommitResult>& swResult) {
+ // If the local node is shutting down all retries would fail and if the node has failed over,
+ // retries could eventually succeed on the new primary, but we want to prevent that since
+ // whatever command that ran the internal transaction will fail with this error and may be
+ // retried itself.
+ auto isLocalFatalStatus = [](Status status) -> bool {
+ return status.isA<ErrorCategory::NotPrimaryError>() ||
+ status.isA<ErrorCategory::ShutdownError>();
+ };
+
+ if (!swResult.isOK()) {
+ return isLocalFatalStatus(swResult.getStatus());
+ }
+ return isLocalFatalStatus(swResult.getValue().getEffectiveStatus());
+}
+
+// True if the transaction is running entirely against the local node, e.g. a single replica set
+// transaction on a mongod. False for remote transactions from a mongod or all transactions from a
+// mongos.
+bool isRunningLocalTransaction(const TransactionClient& txnClient) {
+ return !isMongos() && !txnClient.runsClusterOperations();
+}
+
Transaction::ErrorHandlingStep Transaction::handleError(const StatusWith<CommitResult>& swResult,
int attemptCounter) const noexcept {
stdx::lock_guard<Latch> lg(_mutex);
@@ -513,6 +541,11 @@ Transaction::ErrorHandlingStep Transaction::handleError(const StatusWith<CommitR
return ErrorHandlingStep::kDoNotRetry;
}
+ // If we're running locally, some errors mean we should not retry, like a failover or shutdown.
+ if (isRunningLocalTransaction(*_txnClient) && isLocalTransactionFatalResult(swResult)) {
+ return ErrorHandlingStep::kDoNotRetry;
+ }
+
// If the op has a deadline, retry until it is reached regardless of the number of attempts.
if (attemptCounter > getMaxRetries() && !_opDeadline) {
return _isInCommit() ? ErrorHandlingStep::kDoNotRetry
diff --git a/src/mongo/db/transaction_api.h b/src/mongo/db/transaction_api.h
index d8d7c34e604..830ec9a7b2f 100644
--- a/src/mongo/db/transaction_api.h
+++ b/src/mongo/db/transaction_api.h
@@ -91,7 +91,8 @@ public:
* transaction metadata to requests and parsing it from responses. Must be called before any
* commands have been sent and cannot be called more than once.
*/
- virtual void injectHooks(std::unique_ptr<details::TxnMetadataHooks> hooks) = 0;
+ virtual void initialize(std::unique_ptr<details::TxnMetadataHooks> hooks,
+ const CancellationToken& token) = 0;
/**
* Runs the given command as part of the transaction that owns this transaction client.
@@ -195,6 +196,7 @@ public:
}
private:
+ CancellationSource _source;
std::unique_ptr<ResourceYielder> _resourceYielder;
std::shared_ptr<details::TransactionWithRetries> _txn;
};
@@ -260,14 +262,17 @@ public:
std::unique_ptr<SEPTransactionClientBehaviors> behaviors)
: _serviceContext(opCtx->getServiceContext()),
_executor(executor),
+ _token(CancellationToken::uncancelable()),
_behaviors(std::move(behaviors)) {}
SEPTransactionClient(const SEPTransactionClient&) = delete;
SEPTransactionClient operator=(const SEPTransactionClient&) = delete;
- virtual void injectHooks(std::unique_ptr<details::TxnMetadataHooks> hooks) override {
+ virtual void initialize(std::unique_ptr<details::TxnMetadataHooks> hooks,
+ const CancellationToken& token) override {
invariant(!_hooks);
_hooks = std::move(hooks);
+ _token = token;
}
virtual SemiFuture<BSONObj> runCommand(StringData dbName, BSONObj cmd) const override;
@@ -289,6 +294,7 @@ public:
private:
ServiceContext* const _serviceContext;
std::shared_ptr<executor::TaskExecutor> _executor;
+ CancellationToken _token;
std::unique_ptr<SEPTransactionClientBehaviors> _behaviors;
std::unique_ptr<details::TxnMetadataHooks> _hooks;
};
@@ -323,12 +329,13 @@ public:
*/
Transaction(OperationContext* opCtx,
std::shared_ptr<executor::TaskExecutor> executor,
+ const CancellationToken& token,
std::unique_ptr<TransactionClient> txnClient)
: _executor(executor),
_txnClient(std::move(txnClient)),
_service(opCtx->getServiceContext()) {
_primeTransaction(opCtx);
- _txnClient->injectHooks(_makeTxnMetadataHooks());
+ _txnClient->initialize(_makeTxnMetadataHooks(), token);
}
/**
@@ -483,9 +490,11 @@ public:
TransactionWithRetries(OperationContext* opCtx,
std::shared_ptr<executor::TaskExecutor> executor,
+ const CancellationToken& token,
std::unique_ptr<TransactionClient> txnClient)
- : _internalTxn(std::make_shared<Transaction>(opCtx, executor, std::move(txnClient))),
- _executor(executor) {}
+ : _internalTxn(std::make_shared<Transaction>(opCtx, executor, token, std::move(txnClient))),
+ _executor(executor),
+ _token(token) {}
/**
* Returns a bundle with the commit command status and write concern error, if any. Any error
@@ -518,6 +527,7 @@ private:
std::shared_ptr<Transaction> _internalTxn;
std::shared_ptr<executor::TaskExecutor> _executor;
+ CancellationToken _token;
};
} // namespace details
diff --git a/src/mongo/db/transaction_api_test.cpp b/src/mongo/db/transaction_api_test.cpp
index 0418bd22ca6..bcfb8ba5815 100644
--- a/src/mongo/db/transaction_api_test.cpp
+++ b/src/mongo/db/transaction_api_test.cpp
@@ -142,7 +142,8 @@ class MockTransactionClient : public SEPTransactionClient {
public:
using SEPTransactionClient::SEPTransactionClient;
- virtual void injectHooks(std::unique_ptr<TxnMetadataHooks> hooks) override {
+ virtual void initialize(std::unique_ptr<TxnMetadataHooks> hooks,
+ const CancellationToken& token) override {
_hooks = std::move(hooks);
}
@@ -205,6 +206,7 @@ private:
mutable StatusWith<BSONObj> _lastResponse{BSONObj()};
mutable std::queue<StatusWith<BSONObj>> _responses;
mutable std::vector<BSONObj> _sentRequests;
+ bool _runningLocalTransaction{false};
};
} // namespace txn_api::details
@@ -329,9 +331,15 @@ protected:
_mockClient = mockClient.get();
_txnWithRetries = std::make_unique<txn_api::SyncTransactionWithRetries>(
opCtx(), _executor, nullptr /* resourceYielder */, std::move(mockClient));
+
+ // The bulk of the API tests are for the non-local transaction cases, so set isMongos=true
+ // by default.
+ setMongos(true);
}
void tearDown() override {
+ setMongos(false);
+
_executor->shutdown();
_executor->join();
_executor.reset();
@@ -406,7 +414,8 @@ private:
class MockClusterOperationTransactionClient : public txn_api::TransactionClient {
public:
- virtual void injectHooks(std::unique_ptr<txn_api::details::TxnMetadataHooks> hooks) {}
+ virtual void initialize(std::unique_ptr<txn_api::details::TxnMetadataHooks> hooks,
+ const CancellationToken& token) {}
virtual SemiFuture<BSONObj> runCommand(StringData dbName, BSONObj cmd) const {
MONGO_UNREACHABLE;
@@ -1944,6 +1953,9 @@ TEST_F(TxnAPITest, CanBeUsedWithinShardedOperationsIfClientSupportsIt) {
}
TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInClientTransaction) {
+ setMongos(false);
+ ON_BLOCK_EXIT([&] { setMongos(true); });
+
opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
opCtx()->setTxnNumber(5);
opCtx()->setInMultiDocumentTransaction();
@@ -1954,6 +1966,9 @@ TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInClientTransactio
}
TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInRetryableWrite) {
+ setMongos(false);
+ ON_BLOCK_EXIT([&] { setMongos(true); });
+
opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
opCtx()->setTxnNumber(5);
ASSERT_THROWS_CODE(
@@ -1963,21 +1978,170 @@ TEST_F(TxnAPITest, DoNotAllowCrossShardTransactionsOnShardWhenInRetryableWrite)
}
TEST_F(TxnAPITest, AllowCrossShardTransactionsOnMongosWhenInRetryableWrite) {
+ setMongos(true);
+ ON_BLOCK_EXIT([&] { setMongos(false); });
+
opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
opCtx()->setTxnNumber(5);
- setMongos(true);
resetTxnWithRetriesWithClient(std::make_unique<MockClusterOperationTransactionClient>());
- setMongos(false);
}
TEST_F(TxnAPITest, AllowCrossShardTransactionsOnMongosWhenInClientTransaction) {
+ setMongos(true);
+ ON_BLOCK_EXIT([&] { setMongos(false); });
+
opCtx()->setLogicalSessionId(makeLogicalSessionIdForTest());
opCtx()->setTxnNumber(5);
opCtx()->setInMultiDocumentTransaction();
- setMongos(true);
resetTxnWithRetriesWithClient(std::make_unique<MockClusterOperationTransactionClient>());
+}
+
+TEST_F(TxnAPITest, FailoverAndShutdownErrorsAreFatalForLocalTransactionBodyError) {
setMongos(false);
+ ON_BLOCK_EXIT([&] { setMongos(true); });
+ auto runTest = [&](bool expectSuccess, Status status) {
+ resetTxnWithRetries();
+
+ int attempt = -1;
+ auto swResult = txnWithRetries().runNoThrow(
+ opCtx(), [&](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
+ attempt += 1;
+
+ mockClient()->setNextCommandResponse(kOKInsertResponse);
+ auto insertRes = txnClient
+ .runCommand("user"_sd,
+ BSON("insert"
+ << "foo"
+ << "documents" << BSON_ARRAY(BSON("x" << 1))))
+ .get();
+ ASSERT_OK(getStatusFromWriteCommandReply(insertRes));
+
+ // Only throw once to verify the API gives up right away.
+ if (attempt == 0) {
+ uassertStatusOK(status);
+ }
+ // The commit response.
+ mockClient()->setNextCommandResponse(kOKCommandResponse);
+ return SemiFuture<void>::makeReady();
+ });
+ if (!expectSuccess) {
+ ASSERT_EQ(swResult.getStatus(), status);
+
+ // The API should have returned without trying to abort.
+ auto lastRequest = mockClient()->getLastSentRequest();
+ ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "insert"_sd);
+ } else {
+ ASSERT(swResult.getStatus().isOK());
+ ASSERT(swResult.getValue().getEffectiveStatus().isOK());
+ auto lastRequest = mockClient()->getLastSentRequest();
+ ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+ }
+ };
+
+ runTest(false, Status(ErrorCodes::InterruptedDueToReplStateChange, "mock repl change error"));
+ runTest(false, Status(ErrorCodes::InterruptedAtShutdown, "mock shutdown error"));
+
+ // Verify the fatal for local logic doesn't apply to all transient or retriable errors.
+ runTest(true, Status(ErrorCodes::HostUnreachable, "mock transient error"));
}
+TEST_F(TxnAPITest, FailoverAndShutdownErrorsAreFatalForLocalTransactionCommandError) {
+ setMongos(false);
+ ON_BLOCK_EXIT([&] { setMongos(true); });
+ auto runTest = [&](bool expectSuccess, Status status) {
+ resetTxnWithRetries();
+
+ int attempt = -1;
+ auto swResult = txnWithRetries().runNoThrow(
+ opCtx(), [&](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
+ attempt += 1;
+
+ mockClient()->setNextCommandResponse(kOKInsertResponse);
+ auto insertRes = txnClient
+ .runCommand("user"_sd,
+ BSON("insert"
+ << "foo"
+ << "documents" << BSON_ARRAY(BSON("x" << 1))))
+ .get();
+ ASSERT_OK(getStatusFromWriteCommandReply(insertRes));
+
+ // The commit response.
+ mockClient()->setNextCommandResponse(BSON("ok" << 0 << "code" << status.code()));
+ mockClient()->setNextCommandResponse(kOKCommandResponse);
+ return SemiFuture<void>::makeReady();
+ });
+ if (!expectSuccess) {
+ ASSERT(swResult.getStatus().isOK());
+ ASSERT_EQ(swResult.getValue().cmdStatus, status);
+ ASSERT(swResult.getValue().wcError.toStatus().isOK());
+
+ // The API should have returned without trying to abort.
+ auto lastRequest = mockClient()->getLastSentRequest();
+ ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+ } else {
+ ASSERT(swResult.getStatus().isOK());
+ ASSERT(swResult.getValue().getEffectiveStatus().isOK());
+ auto lastRequest = mockClient()->getLastSentRequest();
+ ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+ }
+ };
+
+ runTest(false, Status(ErrorCodes::InterruptedDueToReplStateChange, "mock repl change error"));
+ runTest(false, Status(ErrorCodes::InterruptedAtShutdown, "mock shutdown error"));
+
+ // Verify the fatal for local logic doesn't apply to all transient or retriable errors.
+ runTest(true, Status(ErrorCodes::HostUnreachable, "mock retriable error"));
+}
+
+TEST_F(TxnAPITest, FailoverAndShutdownErrorsAreFatalForLocalTransactionWCError) {
+ setMongos(false);
+ ON_BLOCK_EXIT([&] { setMongos(true); });
+ auto runTest = [&](bool expectSuccess, Status status) {
+ resetTxnWithRetries();
+
+ int attempt = -1;
+ auto swResult = txnWithRetries().runNoThrow(
+ opCtx(), [&](const txn_api::TransactionClient& txnClient, ExecutorPtr txnExec) {
+ attempt += 1;
+
+ mockClient()->setNextCommandResponse(kOKInsertResponse);
+ auto insertRes = txnClient
+ .runCommand("user"_sd,
+ BSON("insert"
+ << "foo"
+ << "documents" << BSON_ARRAY(BSON("x" << 1))))
+ .get();
+ ASSERT_OK(getStatusFromWriteCommandReply(insertRes));
+
+ // The commit response.
+ auto wcError = BSON("code" << status.code() << "errmsg"
+ << "mock");
+ auto resWithWCError = BSON("ok" << 1 << "writeConcernError" << wcError);
+ mockClient()->setNextCommandResponse(resWithWCError);
+ mockClient()->setNextCommandResponse(kOKCommandResponse);
+ return SemiFuture<void>::makeReady();
+ });
+ if (!expectSuccess) {
+ ASSERT(swResult.getStatus().isOK());
+ ASSERT(swResult.getValue().cmdStatus.isOK());
+ ASSERT_EQ(swResult.getValue().wcError.toStatus(), status);
+
+ // The API should have returned without trying to abort.
+ auto lastRequest = mockClient()->getLastSentRequest();
+ ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+ } else {
+ ASSERT(swResult.getStatus().isOK());
+ ASSERT(swResult.getValue().getEffectiveStatus().isOK());
+ auto lastRequest = mockClient()->getLastSentRequest();
+ ASSERT_EQ(lastRequest.firstElementFieldNameStringData(), "commitTransaction"_sd);
+ }
+ };
+
+ runTest(false, Status(ErrorCodes::InterruptedDueToReplStateChange, "mock repl change error"));
+ runTest(false, Status(ErrorCodes::InterruptedAtShutdown, "mock shutdown error"));
+
+ // Verify the fatal for local logic doesn't apply to all transient or retriable errors.
+ runTest(true, Status(ErrorCodes::HostUnreachable, "mock retriable error"));
+}
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/transaction_participant.cpp b/src/mongo/db/transaction_participant.cpp
index 04412b5df11..a4d31c366d4 100644
--- a/src/mongo/db/transaction_participant.cpp
+++ b/src/mongo/db/transaction_participant.cpp
@@ -207,8 +207,15 @@ struct ActiveTransactionHistory {
ActiveTransactionHistory fetchActiveTransactionHistory(OperationContext* opCtx,
const LogicalSessionId& lsid,
bool fetchOplogEntries) {
- // Storage engine operations require at least Global IS.
- Lock::GlobalLock lk(opCtx, MODE_IS);
+ // FlowControl is only impacted when a MODE_IX global lock is acquired. If we are in a
+ // multi-document transaction, we must acquire a MODE_IX global lock. Prevent obtaining a flow
+ // control ticket while in a mutli-document transaction.
+ FlowControl::Bypass flowControlBypass(opCtx);
+
+ // Storage engine operations require at a least global MODE_IS lock. In multi-document
+ // transactions, storage opeartions require at least a global MODE_IX lock. Prevent lock
+ // upgrading in the case of a multi-document transaction.
+ Lock::GlobalLock lk(opCtx, opCtx->inMultiDocumentTransaction() ? MODE_IX : MODE_IS);
ActiveTransactionHistory result;
@@ -612,6 +619,17 @@ TransactionParticipant::getOldestActiveTimestamp(Timestamp stableTimestamp) {
}
}
+boost::optional<TxnNumber> TransactionParticipant::Observer::getClientTxnNumber(
+ const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) const {
+ if (_isInternalSessionForNonRetryableWrite()) {
+ return boost::none;
+ } else if (_isInternalSessionForRetryableWrite()) {
+ invariant(_sessionId().getTxnNumber());
+ return _sessionId().getTxnNumber();
+ }
+ return {txnNumberAndRetryCounter.getTxnNumber()};
+}
+
Session* TransactionParticipant::Observer::_session() const {
return getTransactionParticipant.owner(_tp);
}
@@ -652,29 +670,73 @@ boost::optional<TxnNumber> TransactionParticipant::Observer::_activeRetryableWri
void TransactionParticipant::Participant::_uassertNoConflictingInternalTransactionForRetryableWrite(
OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) {
+ auto clientTxnNumber = getClientTxnNumber(txnNumberAndRetryCounter);
+ if (!clientTxnNumber) {
+ // This must be a non-retryable child session transaction so there can't be a conflict.
+ return;
+ }
+
auto& retryableWriteTxnParticipantCatalog =
getRetryableWriteTransactionParticipantCatalog(opCtx);
- invariant(retryableWriteTxnParticipantCatalog.isValid());
+ retryableWriteTxnParticipantCatalog.checkForConflictingInternalTransactions(
+ opCtx, *clientTxnNumber, txnNumberAndRetryCounter);
+}
- for (const auto& it : retryableWriteTxnParticipantCatalog.getParticipants()) {
- const auto& txnParticipant = it.second;
+bool TransactionParticipant::Participant::_verifyCanBeginMultiDocumentTransaction(
+ OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) {
+ if (txnNumberAndRetryCounter.getTxnNumber() ==
+ o().activeTxnNumberAndRetryCounter.getTxnNumber()) {
+ if (txnNumberAndRetryCounter.getTxnRetryCounter() <
+ o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()) {
+ uasserted(
+ TxnRetryCounterTooOldInfo(*o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()),
+ str::stream() << "Cannot start a transaction at given transaction number "
+ << txnNumberAndRetryCounter.getTxnNumber() << " on session "
+ << _sessionId() << " using txnRetryCounter "
+ << txnNumberAndRetryCounter.getTxnRetryCounter()
+ << " because it has already been restarted using a "
+ << "higher txnRetryCounter "
+ << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter());
+ } else if (txnNumberAndRetryCounter.getTxnRetryCounter() ==
+ o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ||
+ o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ==
+ kUninitializedTxnRetryCounter) {
+ // Servers in a sharded cluster can start a new transaction at the active transaction
+ // number to allow internal retries by routers on re-targeting errors, like
+ // StaleShard/DatabaseVersion or SnapshotTooOld.
+ uassert(ErrorCodes::ConflictingOperationInProgress,
+ "Only servers in a sharded cluster can start a new transaction at the active "
+ "transaction number",
+ serverGlobalParams.clusterRole != ClusterRole::None);
- if (txnParticipant._sessionId() == opCtx->getLogicalSessionId() ||
- !txnParticipant._isInternalSessionForRetryableWrite()) {
- continue;
- }
+ if (_isInternalSessionForRetryableWrite() &&
+ o().txnState.isInSet(TransactionState::kCommitted)) {
+ // This is a retry of a committed internal transaction for retryable writes so
+ // skip resetting the state and updating the metrics.
+ return true;
+ }
- uassert(ErrorCodes::RetryableTransactionInProgress,
- str::stream() << "Cannot run retryable write with session id " << _sessionId()
- << " and transaction number "
- << txnNumberAndRetryCounter.getTxnNumber()
- << " because it is being executed in a retryable internal transaction"
- << " with session id " << txnParticipant._sessionId()
- << " and transaction number "
- << txnParticipant.getActiveTxnNumberAndRetryCounter().getTxnNumber()
- << " in state " << txnParticipant.o().txnState,
- !txnParticipant.transactionIsOpen());
+ _uassertCanReuseActiveTxnNumberForTransaction(opCtx);
+ } else {
+ const auto restartableStates = TransactionState::kNone | TransactionState::kInProgress |
+ TransactionState::kAbortedWithoutPrepare | TransactionState::kAbortedWithPrepare;
+ uassert(ErrorCodes::IllegalOperation,
+ str::stream() << "Cannot restart transaction "
+ << txnNumberAndRetryCounter.getTxnNumber()
+ << " using txnRetryCounter "
+ << txnNumberAndRetryCounter.getTxnRetryCounter()
+ << " because it is already in state " << o().txnState
+ << " with txnRetryCounter "
+ << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter(),
+ o().txnState.isInSet(restartableStates));
+ }
+ } else {
+ invariant(txnNumberAndRetryCounter.getTxnNumber() >
+ o().activeTxnNumberAndRetryCounter.getTxnNumber());
}
+
+ _uassertNoConflictingInternalTransactionForRetryableWrite(opCtx, txnNumberAndRetryCounter);
+ return false;
}
void TransactionParticipant::Participant::_uassertCanReuseActiveTxnNumberForTransaction(
@@ -808,57 +870,6 @@ void TransactionParticipant::Participant::_continueMultiDocumentTransaction(
void TransactionParticipant::Participant::_beginMultiDocumentTransaction(
OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) {
- if (txnNumberAndRetryCounter.getTxnNumber() ==
- o().activeTxnNumberAndRetryCounter.getTxnNumber()) {
- if (txnNumberAndRetryCounter.getTxnRetryCounter() <
- o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()) {
- uasserted(
- TxnRetryCounterTooOldInfo(*o().activeTxnNumberAndRetryCounter.getTxnRetryCounter()),
- str::stream() << "Cannot start a transaction at given transaction number "
- << txnNumberAndRetryCounter.getTxnNumber() << " on session "
- << _sessionId() << " using txnRetryCounter "
- << txnNumberAndRetryCounter.getTxnRetryCounter()
- << " because it has already been restarted using a "
- << "higher txnRetryCounter "
- << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter());
- } else if (txnNumberAndRetryCounter.getTxnRetryCounter() ==
- o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ||
- o().activeTxnNumberAndRetryCounter.getTxnRetryCounter() ==
- kUninitializedTxnRetryCounter) {
- // Servers in a sharded cluster can start a new transaction at the active transaction
- // number to allow internal retries by routers on re-targeting errors, like
- // StaleShard/DatabaseVersion or SnapshotTooOld.
- uassert(ErrorCodes::ConflictingOperationInProgress,
- "Only servers in a sharded cluster can start a new transaction at the active "
- "transaction number",
- serverGlobalParams.clusterRole != ClusterRole::None);
-
- if (_isInternalSessionForRetryableWrite() &&
- o().txnState.isInSet(TransactionState::kCommitted)) {
- // This is a retry of a committed internal transaction for retryable writes so
- // skip resetting the state and updating the metrics.
- return;
- }
-
- _uassertCanReuseActiveTxnNumberForTransaction(opCtx);
- } else {
- const auto restartableStates = TransactionState::kNone | TransactionState::kInProgress |
- TransactionState::kAbortedWithoutPrepare | TransactionState::kAbortedWithPrepare;
- uassert(ErrorCodes::IllegalOperation,
- str::stream() << "Cannot restart transaction "
- << txnNumberAndRetryCounter.getTxnNumber()
- << " using txnRetryCounter "
- << txnNumberAndRetryCounter.getTxnRetryCounter()
- << " because it is already in state " << o().txnState
- << " with txnRetryCounter "
- << o().activeTxnNumberAndRetryCounter.getTxnRetryCounter(),
- o().txnState.isInSet(restartableStates));
- }
- } else {
- invariant(txnNumberAndRetryCounter.getTxnNumber() >
- o().activeTxnNumberAndRetryCounter.getTxnNumber());
- }
-
// Aborts any in-progress txns.
_setNewTxnNumberAndRetryCounter(opCtx, txnNumberAndRetryCounter);
p().autoCommit = false;
@@ -1008,6 +1019,13 @@ void TransactionParticipant::Participant::beginOrContinue(
// an argument on the request. The 'startTransaction' argument currently can only be specified
// as true, which is verified earlier, when parsing the request.
invariant(*startTransaction);
+
+ auto isRetry = _verifyCanBeginMultiDocumentTransaction(opCtx, txnNumberAndRetryCounter);
+ if (isRetry) {
+ // This is a retry for the active transaction, so we don't throw, and we also don't need to
+ // start the transaction since that already happened.
+ return;
+ }
_beginMultiDocumentTransaction(opCtx, txnNumberAndRetryCounter);
}
@@ -2722,6 +2740,12 @@ void TransactionParticipant::Participant::_setNewTxnNumberAndRetryCounter(
if (o().txnState.isInProgress()) {
_abortTransactionOnSession(opCtx);
}
+ // If txnNumber ordering applies, abort any child transactions with a lesser txnNumber.
+ auto clientTxnNumber = getClientTxnNumber(txnNumberAndRetryCounter);
+ if (clientTxnNumber.has_value()) {
+ getRetryableWriteTransactionParticipantCatalog(opCtx).abortSupersededTransactions(
+ opCtx, *clientTxnNumber);
+ }
stdx::unique_lock<Client> lk(*opCtx->getClient());
o(lk).activeTxnNumberAndRetryCounter = txnNumberAndRetryCounter;
@@ -2753,8 +2777,8 @@ void RetryableWriteTransactionParticipantCatalog::addParticipant(
invariant(*txnNumber >= _activeTxnNumber);
if (txnNumber > _activeTxnNumber) {
+ reset();
_activeTxnNumber = *txnNumber;
- _participants.clear();
}
if (auto it = _participants.find(participant._sessionId()); it != _participants.end()) {
invariant(it->second._tp == participant._tp);
@@ -2766,6 +2790,7 @@ void RetryableWriteTransactionParticipantCatalog::addParticipant(
void RetryableWriteTransactionParticipantCatalog::reset() {
_activeTxnNumber = kUninitializedTxnNumber;
_participants.clear();
+ _hasSeenIncomingConflictingRetryableTransaction = false;
}
void RetryableWriteTransactionParticipantCatalog::markAsValid() {
@@ -2786,6 +2811,94 @@ bool RetryableWriteTransactionParticipantCatalog::isValid() const {
});
}
+void RetryableWriteTransactionParticipantCatalog::checkForConflictingInternalTransactions(
+ OperationContext* opCtx,
+ TxnNumber incomingClientTxnNumber,
+ const TxnNumberAndRetryCounter& incomingTxnNumberAndRetryCounter) {
+ invariant(isValid());
+
+ for (auto&& it : _participants) {
+ auto& sessionId = it.first;
+ auto& txnParticipant = it.second;
+
+ if (sessionId == opCtx->getLogicalSessionId() ||
+ !txnParticipant._isInternalSessionForRetryableWrite()) {
+ continue;
+ }
+
+ if (!txnParticipant.transactionIsOpen()) {
+ // The transaction isn't open, so it can't conflict with an incoming transaction.
+ continue;
+ }
+
+ auto clientTxnNumber =
+ txnParticipant.getClientTxnNumber(txnParticipant.getActiveTxnNumberAndRetryCounter());
+ invariant(clientTxnNumber.has_value());
+ if (*clientTxnNumber < incomingClientTxnNumber) {
+ // To match the behavior of client transactions when a logically earlier prepared
+ // transaction is in progress, throw an error to block the new transaction until the
+ // earlier one exists prepare.
+ uassert(ErrorCodes::RetryableTransactionInProgress,
+ "Operation conflicts with an earlier retryable transaction in prepare",
+ !txnParticipant.transactionIsPrepared());
+
+ // Otherwise skip this transaction because it will be aborted when this one begins.
+ continue;
+ }
+
+ if (!_hasSeenIncomingConflictingRetryableTransaction &&
+ txnParticipant.transactionIsInProgress()) {
+ // Only abort when the transaction is in progress since other states may not be safe,
+ // e.g. prepare.
+ _hasSeenIncomingConflictingRetryableTransaction = true;
+ txnParticipant._abortTransactionOnSession(opCtx);
+ } else {
+ uassert(
+ ErrorCodes::RetryableTransactionInProgress,
+ str::stream() << "Cannot run operation with session id "
+ << opCtx->getLogicalSessionId() << " and transaction number "
+ << incomingTxnNumberAndRetryCounter.getTxnNumber()
+ << " because it conflicts with an active operation with session id "
+ << sessionId << " and transaction number "
+ << txnParticipant.getActiveTxnNumberAndRetryCounter().getTxnNumber()
+ << " in state " << txnParticipant.o().txnState,
+ !txnParticipant.transactionIsOpen());
+ }
+ }
+}
+
+void RetryableWriteTransactionParticipantCatalog::abortSupersededTransactions(
+ OperationContext* opCtx, TxnNumber incomingClientTxnNumber) {
+ if (!isValid()) {
+ // This was called while refreshing from storage or applying ops on a secondary, so skip it.
+ return;
+ }
+
+ for (auto&& it : _participants) {
+ auto& sessionId = it.first;
+ auto& txnParticipant = it.second;
+
+ if (sessionId == opCtx->getLogicalSessionId() ||
+ !txnParticipant._isInternalSessionForRetryableWrite()) {
+ continue;
+ }
+
+ // We should never try to abort a prepared transaction. We should have earlier thrown either
+ // RetryableTransactionInProgress or PreparedTransactionInProgress.
+ invariant(!txnParticipant.transactionIsPrepared(),
+ str::stream() << "Transaction on session " << sessionId
+ << " unexpectedly in prepare");
+
+ auto clientTxnNumber =
+ txnParticipant.getClientTxnNumber(txnParticipant.getActiveTxnNumberAndRetryCounter());
+ invariant(clientTxnNumber.has_value());
+ if (*clientTxnNumber < incomingClientTxnNumber &&
+ txnParticipant.transactionIsInProgress()) {
+ txnParticipant._abortTransactionOnSession(opCtx);
+ }
+ }
+}
+
void TransactionParticipant::Participant::refreshFromStorageIfNeeded(OperationContext* opCtx) {
return _refreshFromStorageIfNeeded(opCtx, true);
}
diff --git a/src/mongo/db/transaction_participant.h b/src/mongo/db/transaction_participant.h
index fa5b98757af..e584960b1c8 100644
--- a/src/mongo/db/transaction_participant.h
+++ b/src/mongo/db/transaction_participant.h
@@ -359,6 +359,17 @@ public:
*/
void reportUnstashedState(OperationContext* opCtx, BSONObjBuilder* builder) const;
+ /**
+ * Returns the transaction number associated with the client operation that spawned this
+ * transaction. ie the top-level txnNumber for a retryable write or client transaction or
+ * the txnNumber in the session id for a retryable transaction. The passed in
+ * txnNumberAndRetryCounter should be the active txnNumberAndRetryCounter of this
+ * participant. This must be provided so this method can be used before the participant's
+ * active txnNumberAndRetryCounter has been changed.
+ */
+ boost::optional<TxnNumber> getClientTxnNumber(
+ const TxnNumberAndRetryCounter& txnNumberAndRetryCounter) const;
+
protected:
explicit Observer(TransactionParticipant* tp) : _tp(tp) {}
@@ -919,6 +930,12 @@ public:
// byzantine messages, this check should never fail.
void _uassertCanReuseActiveTxnNumberForTransaction(OperationContext* opCtx);
+ // Verifies we can begin a multi document transaction with the given txnNumber and
+ // txnRetryCounter. Throws if we cannot. Returns true if this is a retry of the active
+ // transaction and false otherwise.
+ bool _verifyCanBeginMultiDocumentTransaction(
+ OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter);
+
// Attempt to begin or retry a retryable write at the given transaction number.
void _beginOrContinueRetryableWrite(
OperationContext* opCtx, const TxnNumberAndRetryCounter& txnNumberAndRetryCounter);
@@ -1250,10 +1267,35 @@ public:
*/
bool isValid() const;
+ /**
+ * If a transaction in the catalog conflicts with the incoming transaction and this is the first
+ * time that has happened, the conflicting transaction is aborted, on the assumption that the
+ * new transaction is likely from a fresher client and the client of the conflicting transaction
+ * has give up (e.g. crashed). To prevent livelocks if both clients are alive and retrying,
+ * RetryableTransactionInProgress is thrown on subsequent calls, forcing the incoming
+ * transaction to wait for the conflicting to complete.
+ */
+ void checkForConflictingInternalTransactions(
+ OperationContext* opCtx,
+ TxnNumber incomingClientTxnNumber,
+ const TxnNumberAndRetryCounter& incomingTxnNumberAndRetryCounter);
+
+ /**
+ * Aborts any child transactions that are logically superseded by the incoming transaction, ie
+ * retryable transactions where the txnNumber in their session id < the top-level txnNumber for
+ * a retryable write / client transaction or the session id txnNumber for a retryable
+ * transaction.
+ */
+ void abortSupersededTransactions(OperationContext* opCtx, TxnNumber incomingClientTxnNumber);
+
private:
TxnNumber _activeTxnNumber{kUninitializedTxnNumber};
LogicalSessionIdMap<TransactionParticipant::Participant> _participants;
bool _isValid{false};
+
+ // Set true after an incoming retryable transaction has conflicted with an open transaction in
+ // this catalog.
+ bool _hasSeenIncomingConflictingRetryableTransaction{false};
};
} // namespace mongo
diff --git a/src/mongo/db/transaction_participant_test.cpp b/src/mongo/db/transaction_participant_test.cpp
index 736020b0bf1..ee477f77463 100644
--- a/src/mongo/db/transaction_participant_test.cpp
+++ b/src/mongo/db/transaction_participant_test.cpp
@@ -4874,18 +4874,44 @@ TEST_F(ShardTxnParticipantTest,
ASSERT_TRUE(txnParticipant.transactionIsInProgress());
}
-TEST_F(ShardTxnParticipantTest,
- CannotRetryInProgressTransactionForRetryableWrite_ConflictingTransactionForRetryableWrite) {
+TEST_F(ShardTxnParticipantTest, CannotRetryInProgressRetryableTxn_ConflictingRetryableTxn) {
const auto parentLsid = makeLogicalSessionIdForTest();
const auto parentTxnNumber = *opCtx()->getTxnNumber();
opCtx()->setLogicalSessionId(
makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
- auto sessionCheckout = checkOutSession();
- auto txnParticipant = TransactionParticipant::get(opCtx());
- ASSERT_TRUE(txnParticipant.transactionIsInProgress());
- OperationContextSession::checkIn(opCtx(), OperationContextSession::CheckInReason::kDone);
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+
+ // The first conflicting transaction should abort the active one.
+ const auto firstConflictingLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+ runFunctionFromDifferentOpCtx([firstConflictingLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(firstConflictingLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ txnParticipant.unstashTransactionResources(newOpCtx, "insert");
+ txnParticipant.stashTransactionResources(newOpCtx);
+ });
+ // Continuing the interrupted transaction should throw without aborting the new active
+ // transaction.
+ {
+ ASSERT_THROWS_CODE(checkOutSession(boost::none /* startNewTxn */),
+ AssertionException,
+ ErrorCodes::RetryableTransactionInProgress);
+ }
+
+ // A second conflicting transaction should throw and not abort the active one.
runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
newOpCtx->setLogicalSessionId(
makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
@@ -4900,20 +4926,71 @@ TEST_F(ShardTxnParticipantTest,
ErrorCodes::RetryableTransactionInProgress);
});
- ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ // Verify the first conflicting txn is still open.
+ runFunctionFromDifferentOpCtx([firstConflictingLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(firstConflictingLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, boost::none /* startTransaction */);
+ txnParticipant.unstashTransactionResources(newOpCtx, "insert");
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
}
-TEST_F(ShardTxnParticipantTest,
- CannotRetryInProgressTransactionForRetryableWrite_ConflictingRetryableWrite) {
+TEST_F(ShardTxnParticipantTest, CannotRetryInProgressRetryableTxn_ConflictingRetryableWrite) {
const auto parentLsid = makeLogicalSessionIdForTest();
const auto parentTxnNumber = *opCtx()->getTxnNumber();
opCtx()->setLogicalSessionId(
makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
- auto sessionCheckout = checkOutSession();
- auto txnParticipant = TransactionParticipant::get(opCtx());
- ASSERT_TRUE(txnParticipant.transactionIsInProgress());
- OperationContextSession::checkIn(opCtx(), OperationContextSession::CheckInReason::kDone);
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+
+ //
+ // The first conflicting retryable write should abort a conflicting retryable transaction.
+ //
+ runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(parentLsid);
+ newOpCtx->setTxnNumber(parentTxnNumber);
+
+ // Shouldn't throw.
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(newOpCtx,
+ {parentTxnNumber},
+ boost::none /* autocommit */,
+ boost::none /* startTransaction */);
+ });
+
+ // Continuing the interrupted transaction should throw because it was aborted. Note this does
+ // not throw RetryableTransactionInProgress because the retryable write that aborted the
+ // transaction completed.
+ {
+ auto sessionCheckout = checkOutSession(boost::none /* startNewTxn */);
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_THROWS_CODE(txnParticipant.unstashTransactionResources(opCtx(), "insert"),
+ AssertionException,
+ ErrorCodes::NoSuchTransaction);
+ }
+
+ //
+ // The second conflicting retryable write should throw and not abort a conflicting retryable
+ // transaction.
+ //
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ txnParticipant.unstashTransactionResources(opCtx(), "insert");
+ txnParticipant.stashTransactionResources(opCtx());
+ }
runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
newOpCtx->setLogicalSessionId(parentLsid);
@@ -4929,7 +5006,290 @@ TEST_F(ShardTxnParticipantTest,
ErrorCodes::RetryableTransactionInProgress);
});
- ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ {
+ auto sessionCheckout = checkOutSession(boost::none /* startNewTxn */);
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ txnParticipant.beginOrContinue(
+ opCtx(), {parentTxnNumber}, false /* autocommit */, boost::none /* startTransaction */);
+ txnParticipant.unstashTransactionResources(opCtx(), "insert");
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+}
+
+TEST_F(ShardTxnParticipantTest, RetryableTransactionInProgressCounterResetsUponNewTxnNumber) {
+ const auto parentLsid = makeLogicalSessionIdForTest();
+ auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+ opCtx()->setLogicalSessionId(
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+
+ // The first conflicting transaction should abort the active one.
+ const auto firstConflictingLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+ runFunctionFromDifferentOpCtx([firstConflictingLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(firstConflictingLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ txnParticipant.unstashTransactionResources(newOpCtx, "insert");
+ txnParticipant.stashTransactionResources(newOpCtx);
+ });
+
+ // A second conflicting transaction should throw and not abort the active one.
+ runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */),
+ AssertionException,
+ ErrorCodes::RetryableTransactionInProgress);
+ });
+
+ // Advance the txnNumber and verify the first new conflicting transaction does not throw
+ // RetryableTransactionInProgress.
+
+ parentTxnNumber += 1;
+ const auto higherChildLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+ runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherChildLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
+
+ const auto higherFirstConflictingLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+ runFunctionFromDifferentOpCtx([higherFirstConflictingLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherFirstConflictingLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
+
+ // A second conflicting transaction should still throw and not abort the active one.
+ const auto higherSecondConflictingLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+ runFunctionFromDifferentOpCtx([higherSecondConflictingLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherSecondConflictingLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */),
+ AssertionException,
+ ErrorCodes::RetryableTransactionInProgress);
+ });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberAbortsLowerChildTransactions_RetryableTxn) {
+ const auto parentLsid = makeLogicalSessionIdForTest();
+ auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+ opCtx()->setLogicalSessionId(
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+
+ // Advance the txnNumber and verify the first new conflicting transaction does not throw
+ // RetryableTransactionInProgress.
+
+ parentTxnNumber += 1;
+
+ const auto higherChildLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber);
+ runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherChildLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberAbortsLowerChildTransactions_RetryableWrite) {
+ const auto parentLsid = makeLogicalSessionIdForTest();
+ auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+ opCtx()->setLogicalSessionId(
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+
+ // Advance the txnNumber and verify the first new conflicting transaction does not throw
+ // RetryableTransactionInProgress.
+
+ parentTxnNumber += 1;
+
+ runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(parentLsid);
+ newOpCtx->setTxnNumber(parentTxnNumber);
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(newOpCtx,
+ {parentTxnNumber},
+ boost::none /* autocommit */,
+ boost::none /* startTransaction */);
+ });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberAbortsLowerChildTransactions_Transaction) {
+ const auto parentLsid = makeLogicalSessionIdForTest();
+ auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+ opCtx()->setLogicalSessionId(
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ ASSERT_TRUE(txnParticipant.transactionIsInProgress());
+ }
+
+ // Advance the txnNumber and verify the first new conflicting transaction does not throw
+ // RetryableTransactionInProgress.
+
+ parentTxnNumber += 1;
+
+ runFunctionFromDifferentOpCtx([parentLsid, parentTxnNumber](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(parentLsid);
+ newOpCtx->setTxnNumber(parentTxnNumber);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(newOpCtx,
+ *newOpCtx->getTxnNumber(),
+ false /* autocommit */,
+ true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
+}
+
+TEST_F(ShardTxnParticipantTest, HigherTxnNumberDoesNotAbortPreparedLowerChildTransaction) {
+ const auto parentLsid = makeLogicalSessionIdForTest();
+ const auto parentTxnNumber = *opCtx()->getTxnNumber();
+
+ // Start a prepared child transaction.
+ opCtx()->setLogicalSessionId(
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, parentTxnNumber));
+ {
+ auto sessionCheckout = checkOutSession();
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ txnParticipant.unstashTransactionResources(opCtx(), "prepareTransaction");
+ txnParticipant.prepareTransaction(opCtx(), {});
+ ASSERT(txnParticipant.transactionIsPrepared());
+ txnParticipant.stashTransactionResources(opCtx());
+ }
+
+ // Advance the txnNumber and verify the first new conflicting transaction and retryable write
+ // throws RetryableTransactionInProgress.
+
+ const auto higherParentTxnNumber = parentTxnNumber + 1;
+
+ const auto higherChildLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, higherParentTxnNumber);
+ runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherChildLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */),
+ AssertionException,
+ ErrorCodes::RetryableTransactionInProgress);
+ });
+
+ runFunctionFromDifferentOpCtx([parentLsid, higherParentTxnNumber](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(parentLsid);
+ newOpCtx->setTxnNumber(higherParentTxnNumber);
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ ASSERT_THROWS_CODE(txnParticipant.beginOrContinue(newOpCtx,
+ {higherParentTxnNumber},
+ boost::none /* autocommit */,
+ boost::none /* startTransaction */),
+ AssertionException,
+ ErrorCodes::RetryableTransactionInProgress);
+ });
+
+ // After the transaction leaves prepare a conflicting internal transaction can still abort an
+ // active transaction.
+
+ {
+ auto sessionCheckout = checkOutSession(boost::none /* startNewTxn */);
+ auto txnParticipant = TransactionParticipant::get(opCtx());
+ txnParticipant.beginOrContinue(
+ opCtx(), {parentTxnNumber}, false /* autocommit */, boost::none /* startTransaction */);
+ txnParticipant.unstashTransactionResources(opCtx(), "abortTransaction");
+ txnParticipant.abortTransaction(opCtx());
+ }
+
+ runFunctionFromDifferentOpCtx([higherChildLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherChildLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
+
+ const auto higherConflictingChildLsid =
+ makeLogicalSessionIdWithTxnNumberAndUUIDForTest(parentLsid, higherParentTxnNumber);
+ runFunctionFromDifferentOpCtx([higherConflictingChildLsid](OperationContext* newOpCtx) {
+ newOpCtx->setLogicalSessionId(higherConflictingChildLsid);
+ newOpCtx->setTxnNumber(0);
+ newOpCtx->setInMultiDocumentTransaction();
+
+ MongoDOperationContextSession ocs(newOpCtx);
+ auto txnParticipant = TransactionParticipant::get(newOpCtx);
+ txnParticipant.beginOrContinue(
+ newOpCtx, {0}, false /* autocommit */, true /* startTransaction */);
+ ASSERT(txnParticipant.transactionIsInProgress());
+ });
}
TEST_F(ShardTxnParticipantTest,
diff --git a/src/mongo/db/transaction_validation.cpp b/src/mongo/db/transaction_validation.cpp
index 6571711cc76..2f4eedd12b5 100644
--- a/src/mongo/db/transaction_validation.cpp
+++ b/src/mongo/db/transaction_validation.cpp
@@ -43,53 +43,21 @@ namespace mongo {
using namespace fmt::literals;
-namespace {
-
-// TODO SERVER-65101: Replace this with a property on each command.
-const StringMap<int> retryableWriteCommands = {{"clusterDelete", 1},
- {"clusterInsert", 1},
- {"clusterUpdate", 1},
- {"delete", 1},
- {"findandmodify", 1},
- {"findAndModify", 1},
- {"insert", 1},
- {"testInternalTransactions", 1},
- {"update", 1},
- {"_recvChunkStart", 1},
- {"_configsvrRemoveChunks", 1},
- {"_configsvrRemoveTags", 1},
- {"_shardsvrCreateCollectionParticipant", 1},
- {"_shardsvrDropCollectionParticipant", 1},
- {"_shardsvrRenameCollectionParticipant", 1},
- {"_shardsvrRenameCollectionParticipantUnblock", 1},
- {"_configsvrRenameCollectionMetadata", 1},
- {"_shardsvrParticipantBlock", 1},
- {"_configsvrCollMod", 1},
- {"_shardsvrCollModParticipant", 1},
- {"_shardsvrSetClusterParameter", 1},
- {"_shardsvrSetUserWriteBlockMode", 1}};
-
-// TODO SERVER-65101: Replace this with a property on each command.
-// Commands that can be sent with session info but should not check out a session.
-const StringMap<int> skipSessionCheckoutList = {
- {"coordinateCommitTransaction", 1}, {"_recvChunkStart", 1}, {"replSetStepDown", 1}};
-
-// TODO SERVER-65101: Replace this with a property on each command.
-const StringMap<int> transactionCommands = {{"abortTransaction", 1},
- {"clusterAbortTransaction", 1},
- {"clusterCommitTransaction", 1},
- {"commitTransaction", 1},
- {"coordinateCommitTransaction", 1},
- {"prepareTransaction", 1}};
-
-} // namespace
-
bool isRetryableWriteCommand(StringData cmdName) {
- return retryableWriteCommands.find(cmdName) != retryableWriteCommands.cend();
+ auto command = CommandHelpers::findCommand(cmdName);
+ uassert(ErrorCodes::CommandNotFound,
+ str::stream() << "Encountered unknown command during retryability check: " << cmdName,
+ command);
+ return command->supportsRetryableWrite();
}
bool isTransactionCommand(StringData cmdName) {
- return transactionCommands.find(cmdName) != transactionCommands.cend();
+ auto command = CommandHelpers::findCommand(cmdName);
+ uassert(ErrorCodes::CommandNotFound,
+ str::stream() << "Encountered unknown command during isTransactionCommand check: "
+ << cmdName,
+ command);
+ return command->isTransactionCommand();
}
void validateWriteConcernForTransaction(const WriteConcernOptions& wcResult, StringData cmdName) {
@@ -104,10 +72,6 @@ bool isReadConcernLevelAllowedInTransaction(repl::ReadConcernLevel readConcernLe
readConcernLevel == repl::ReadConcernLevel::kLocalReadConcern;
}
-bool shouldCommandSkipSessionCheckout(StringData cmdName) {
- return skipSessionCheckoutList.find(cmdName) != skipSessionCheckoutList.cend();
-}
-
void validateSessionOptions(const OperationSessionInfoFromClient& sessionOptions,
StringData cmdName,
const NamespaceString& nss,
diff --git a/src/mongo/db/transaction_validation.h b/src/mongo/db/transaction_validation.h
index b3372dfe5b4..e3c4d21f201 100644
--- a/src/mongo/db/transaction_validation.h
+++ b/src/mongo/db/transaction_validation.h
@@ -57,12 +57,6 @@ void validateWriteConcernForTransaction(const WriteConcernOptions& wcResult, Str
bool isReadConcernLevelAllowedInTransaction(repl::ReadConcernLevel readConcernLevel);
/**
- * Returns true if the given command is one of the commands that does not check out a session
- * regardless of its session options, e.g. two-phase commit commands.
- */
-bool shouldCommandSkipSessionCheckout(StringData cmdName);
-
-/**
* Throws if the given session options are invalid for the given command and target namespace.
*/
void validateSessionOptions(const OperationSessionInfoFromClient& sessionOptions,
diff --git a/src/mongo/db/views/durable_view_catalog.cpp b/src/mongo/db/views/durable_view_catalog.cpp
index 361505220b8..900988f8dbd 100644
--- a/src/mongo/db/views/durable_view_catalog.cpp
+++ b/src/mongo/db/views/durable_view_catalog.cpp
@@ -54,13 +54,16 @@
#define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kStorage
-
namespace mongo {
namespace {
void validateViewDefinitionBSON(OperationContext* opCtx,
const BSONObj& viewDefinition,
StringData dbName) {
+ // Internal callers should always pass in a valid 'dbName' against which to compare the
+ // 'viewDefinition'.
+ invariant(NamespaceString::validDBName(dbName));
+
bool valid = true;
for (const BSONElement& e : viewDefinition) {
@@ -122,7 +125,7 @@ Status DurableViewCatalog::onExternalInsert(OperationContext* opCtx,
const BSONObj& doc,
const NamespaceString& name) {
try {
- validateViewDefinitionBSON(opCtx, doc, name.toString());
+ validateViewDefinitionBSON(opCtx, doc, name.db());
} catch (const DBException& e) {
return e.toStatus();
}
@@ -132,9 +135,6 @@ Status DurableViewCatalog::onExternalInsert(OperationContext* opCtx,
NamespaceString viewOn(name.db(), doc.getStringField("viewOn"));
BSONArray pipeline(doc.getObjectField("pipeline"));
BSONObj collation(doc.getObjectField("collation"));
- // Set updateDurableViewCatalog to false because the view has already been inserted into the
- // durable view catalog.
- const bool updateDurableViewCatalog = false;
return catalog->createView(opCtx,
viewName,
@@ -142,7 +142,7 @@ Status DurableViewCatalog::onExternalInsert(OperationContext* opCtx,
pipeline,
collation,
view_catalog_helpers::validatePipeline,
- updateDurableViewCatalog);
+ CollectionCatalog::ViewUpsertMode::kAlreadyDurableView);
}
void DurableViewCatalog::onSystemViewsCollectionDrop(OperationContext* opCtx,