diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2018-02-03 09:42:54 -0500 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2018-02-06 07:33:21 -0500 |
commit | 0851ee0434ba5352561a204f368a062d660c8882 (patch) | |
tree | 1b16836f9db4bc134df768a4f4a1e23a0c8f0b30 /src/mongo/db | |
parent | d7c127cb2c98eab7ca9ea0ef8405126e675ed5d7 (diff) | |
download | mongo-0851ee0434ba5352561a204f368a062d660c8882.tar.gz |
SERVER-29908 Move sharding_catalog_manager and tests under db/s/config
Also get rid of sharding_catalog_test_fixture since it doesn't provide
much value.
Diffstat (limited to 'src/mongo/db')
42 files changed, 9334 insertions, 25 deletions
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript index 378e475dc44..7d889b66c5c 100644 --- a/src/mongo/db/commands/SConscript +++ b/src/mongo/db/commands/SConscript @@ -226,11 +226,11 @@ env.Library( '$BUILD_DIR/mongo/db/repl/oplog', '$BUILD_DIR/mongo/db/repl/repl_coordinator_interface', '$BUILD_DIR/mongo/db/rw_concern_d', + '$BUILD_DIR/mongo/db/s/sharding_catalog_manager', '$BUILD_DIR/mongo/db/server_options_core', '$BUILD_DIR/mongo/db/stats/serveronly_stats', '$BUILD_DIR/mongo/db/storage/mmap_v1/storage_mmapv1', '$BUILD_DIR/mongo/db/views/views_mongod', - '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager', '$BUILD_DIR/mongo/s/client/parallel', 'core', 'current_op_common', diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp index b4f58018eaa..ba82f10cc54 100644 --- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp +++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp @@ -40,10 +40,10 @@ #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/replication_coordinator.h" #include "mongo/db/repl/replication_coordinator_global.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/server_options.h" #include "mongo/rpc/get_status_from_command_result.h" #include "mongo/s/catalog/sharding_catalog_client_impl.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/client/shard_registry.h" #include "mongo/util/exit.h" #include "mongo/util/fail_point_service.h" diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp index 19ffd88d700..19aef3e230c 100644 --- a/src/mongo/db/db.cpp +++ b/src/mongo/db/db.cpp @@ -106,6 +106,7 @@ #include "mongo/db/repl/storage_interface_impl.h" #include "mongo/db/repl/topology_coordinator.h" #include "mongo/db/s/balancer/balancer.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/sharded_connection_info.h" #include "mongo/db/s/sharding_initialization_mongod.h" #include "mongo/db/s/sharding_state.h" @@ -132,7 +133,6 @@ #include "mongo/executor/thread_pool_task_executor.h" #include "mongo/platform/process_id.h" #include "mongo/rpc/metadata/egress_metadata_hook_list.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/client/shard_registry.h" #include "mongo/s/grid.h" #include "mongo/s/sharding_initialization.h" diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript index 8e512adcdcf..ddcc2ab2c5b 100644 --- a/src/mongo/db/repl/SConscript +++ b/src/mongo/db/repl/SConscript @@ -1565,10 +1565,10 @@ env.Library( '$BUILD_DIR/mongo/db/repair_database', '$BUILD_DIR/mongo/db/repl/oplog_buffer_proxy', '$BUILD_DIR/mongo/db/s/balancer', + '$BUILD_DIR/mongo/db/s/sharding_catalog_manager', '$BUILD_DIR/mongo/db/service_context', '$BUILD_DIR/mongo/db/stats/counters', '$BUILD_DIR/mongo/rpc/client_metadata', - '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager', 'bgsync', 'drop_pending_collection_reaper', 'oplog_buffer_collection', diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp index 3a6819534b5..f2a1e8863eb 100644 --- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp @@ -70,6 +70,7 @@ #include "mongo/db/repl/rs_sync.h" #include "mongo/db/repl/storage_interface.h" #include "mongo/db/s/balancer/balancer.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/s/sharding_state_recovery.h" #include "mongo/db/server_options.h" @@ -82,7 +83,6 @@ #include "mongo/executor/network_interface_factory.h" #include "mongo/executor/thread_pool_task_executor.h" #include "mongo/rpc/metadata/egress_metadata_hook_list.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog/type_shard.h" #include "mongo/s/catalog_cache_loader.h" #include "mongo/s/client/shard_registry.h" diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript index fd53ece397b..b41ea1be6a7 100644 --- a/src/mongo/db/s/SConscript +++ b/src/mongo/db/s/SConscript @@ -88,7 +88,6 @@ env.Library( '$BUILD_DIR/mongo/db/commands/server_status', '$BUILD_DIR/mongo/db/common', '$BUILD_DIR/mongo/db/repl/repl_coordinator_global', - '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager', '$BUILD_DIR/mongo/s/client/shard_local', '$BUILD_DIR/mongo/s/coreshard', '$BUILD_DIR/mongo/s/is_mongos', @@ -147,6 +146,28 @@ env.CppUnitTest( ) env.Library( + target='sharding_catalog_manager', + source=[ + 'config/sharding_catalog_manager_chunk_operations.cpp', + 'config/sharding_catalog_manager_collection_operations.cpp', + 'config/sharding_catalog_manager.cpp', + 'config/sharding_catalog_manager_database_operations.cpp', + 'config/sharding_catalog_manager_shard_operations.cpp', + 'config/sharding_catalog_manager_zone_operations.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/db/catalog/catalog_raii', + '$BUILD_DIR/mongo/db/commands/dcommands_fcv', + '$BUILD_DIR/mongo/db/repl/read_concern_args', + '$BUILD_DIR/mongo/db/s/balancer', + '$BUILD_DIR/mongo/executor/network_interface', + '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client', + '$BUILD_DIR/mongo/s/client/sharding_client', + '$BUILD_DIR/mongo/s/coreshard', + ], +) + +env.Library( target='commands_db_s', source=[ 'check_sharding_index_command.cpp', @@ -189,11 +210,11 @@ env.Library( '$BUILD_DIR/mongo/db/index_d', '$BUILD_DIR/mongo/db/repl/repl_coordinator_global', '$BUILD_DIR/mongo/db/rw_concern_d', - '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager', '$BUILD_DIR/mongo/s/commands/shared_cluster_commands', 'balancer', 'collection_metadata', 'sharding', + 'sharding_catalog_manager', ], ) @@ -232,7 +253,7 @@ env.CppUnitTest( '$BUILD_DIR/mongo/db/query/query_request', '$BUILD_DIR/mongo/s/catalog/dist_lock_manager_mock', '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_impl', - '$BUILD_DIR/mongo/s/catalog/sharding_catalog_mock', + '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_mock', '$BUILD_DIR/mongo/s/shard_server_test_fixture', ], ) @@ -296,8 +317,31 @@ env.CppUnitTest( ], LIBDEPS=[ '$BUILD_DIR/mongo/db/ops/write_ops_exec', - '$BUILD_DIR/mongo/s/catalog/sharding_catalog_mock', + '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_mock', '$BUILD_DIR/mongo/s/shard_server_test_fixture', 'sharding', ] ) + +env.CppUnitTest( + target='sharding_catalog_manager_test', + source=[ + 'config/sharding_catalog_manager_add_shard_test.cpp', + 'config/sharding_catalog_manager_add_shard_to_zone_test.cpp', + 'config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp', + 'config/sharding_catalog_manager_commit_chunk_migration_test.cpp', + 'config/sharding_catalog_manager_config_initialization_test.cpp', + 'config/sharding_catalog_manager_create_database_test.cpp', + 'config/sharding_catalog_manager_drop_coll_test.cpp', + 'config/sharding_catalog_manager_enable_sharding_test.cpp', + 'config/sharding_catalog_manager_merge_chunks_test.cpp', + 'config/sharding_catalog_manager_remove_shard_from_zone_test.cpp', + 'config/sharding_catalog_manager_remove_shard_test.cpp', + 'config/sharding_catalog_manager_shard_collection_test.cpp', + 'config/sharding_catalog_manager_split_chunk_test.cpp', + ], + LIBDEPS=[ + '$BUILD_DIR/mongo/s/config_server_test_fixture', + '$BUILD_DIR/mongo/util/version_impl', + ] +) diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp index 78e9c8b4d55..07afbe69236 100644 --- a/src/mongo/db/s/collection_sharding_state.cpp +++ b/src/mongo/db/s/collection_sharding_state.cpp @@ -38,6 +38,7 @@ #include "mongo/db/client.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/migration_chunk_cloner_source.h" #include "mongo/db/s/migration_source_manager.h" #include "mongo/db/s/operation_sharding_state.h" @@ -49,7 +50,6 @@ #include "mongo/db/server_parameters.h" #include "mongo/db/service_context.h" #include "mongo/s/balancer_configuration.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog/type_config_version.h" #include "mongo/s/catalog/type_shard.h" #include "mongo/s/catalog/type_shard_collection.h" diff --git a/src/mongo/db/s/config/configsvr_add_shard_command.cpp b/src/mongo/db/s/config/configsvr_add_shard_command.cpp index 834aa73ca74..2e587bef4d3 100644 --- a/src/mongo/db/s/config/configsvr_add_shard_command.cpp +++ b/src/mongo/db/s/config/configsvr_add_shard_command.cpp @@ -39,7 +39,7 @@ #include "mongo/db/namespace_string.h" #include "mongo/db/repl/repl_set_config.h" #include "mongo/db/repl/replication_coordinator.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/type_shard.h" #include "mongo/s/grid.h" #include "mongo/s/request_types/add_shard_request_type.h" diff --git a/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp b/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp index 00c1af987cf..2f280dffc00 100644 --- a/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp +++ b/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp @@ -35,7 +35,7 @@ #include "mongo/db/auth/privilege.h" #include "mongo/db/commands.h" #include "mongo/db/namespace_string.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/grid.h" #include "mongo/s/request_types/add_shard_to_zone_request_type.h" #include "mongo/util/log.h" diff --git a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp index 4e9b80d82ea..0dd43e61c74 100644 --- a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp +++ b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp @@ -37,9 +37,9 @@ #include "mongo/db/operation_context.h" #include "mongo/db/repl/read_concern_args.h" #include "mongo/db/s/chunk_move_write_concern_options.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/s/sharding_state.h" #include "mongo/rpc/get_status_from_command_result.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog/type_chunk.h" #include "mongo/s/catalog/type_locks.h" #include "mongo/s/chunk_version.h" diff --git a/src/mongo/db/s/config/configsvr_create_database_command.cpp b/src/mongo/db/s/config/configsvr_create_database_command.cpp index 797f76f5720..47740c174be 100644 --- a/src/mongo/db/s/config/configsvr_create_database_command.cpp +++ b/src/mongo/db/s/config/configsvr_create_database_command.cpp @@ -39,7 +39,7 @@ #include "mongo/db/client.h" #include "mongo/db/commands.h" #include "mongo/db/operation_context.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/grid.h" diff --git a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp index bffad346dc6..e923f753c7a 100644 --- a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp +++ b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp @@ -34,8 +34,8 @@ #include "mongo/db/operation_context.h" #include "mongo/db/repl/read_concern_args.h" #include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/dist_lock_manager.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_registry.h" diff --git a/src/mongo/db/s/config/configsvr_drop_database_command.cpp b/src/mongo/db/s/config/configsvr_drop_database_command.cpp index 5a39eee3174..b85a2eab87e 100644 --- a/src/mongo/db/s/config/configsvr_drop_database_command.cpp +++ b/src/mongo/db/s/config/configsvr_drop_database_command.cpp @@ -33,8 +33,8 @@ #include "mongo/db/commands.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/dist_lock_manager.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/grid.h" diff --git a/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp b/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp index 9bdbd5c5669..50cbe6d15ea 100644 --- a/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp +++ b/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp @@ -39,7 +39,7 @@ #include "mongo/db/client.h" #include "mongo/db/commands.h" #include "mongo/db/operation_context.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/grid.h" diff --git a/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp b/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp index 37c32c6781a..ed49bba1b9d 100644 --- a/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp +++ b/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp @@ -36,7 +36,7 @@ #include "mongo/db/commands.h" #include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/grid.h" #include "mongo/s/request_types/merge_chunk_request_type.h" #include "mongo/util/log.h" diff --git a/src/mongo/db/s/config/configsvr_move_primary_command.cpp b/src/mongo/db/s/config/configsvr_move_primary_command.cpp index ad02277cc69..4bdeb30ad05 100644 --- a/src/mongo/db/s/config/configsvr_move_primary_command.cpp +++ b/src/mongo/db/s/config/configsvr_move_primary_command.cpp @@ -40,7 +40,7 @@ #include "mongo/db/commands.h" #include "mongo/db/operation_context.h" #include "mongo/db/repl/repl_client_info.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_registry.h" diff --git a/src/mongo/db/s/config/configsvr_remove_shard_command.cpp b/src/mongo/db/s/config/configsvr_remove_shard_command.cpp index d55e81b4ba9..3ef3d0c6995 100644 --- a/src/mongo/db/s/config/configsvr_remove_shard_command.cpp +++ b/src/mongo/db/s/config/configsvr_remove_shard_command.cpp @@ -39,7 +39,7 @@ #include "mongo/db/client.h" #include "mongo/db/commands.h" #include "mongo/db/operation_context.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/client/shard_registry.h" diff --git a/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp b/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp index 9aa7f9285e2..02c5473173f 100644 --- a/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp +++ b/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp @@ -35,7 +35,7 @@ #include "mongo/db/auth/privilege.h" #include "mongo/db/commands.h" #include "mongo/db/namespace_string.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/grid.h" #include "mongo/s/request_types/remove_shard_from_zone_request_type.h" #include "mongo/util/log.h" diff --git a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp index d221a719bb8..5a2053719de 100644 --- a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp +++ b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp @@ -46,9 +46,9 @@ #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/repl_set_config.h" #include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/db/sessions_collection.h" #include "mongo/s/balancer_configuration.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog/type_database.h" #include "mongo/s/catalog/type_shard.h" #include "mongo/s/catalog_cache.h" diff --git a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp index 4d66c7ffa8b..474cf1b5e92 100644 --- a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp +++ b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp @@ -36,7 +36,7 @@ #include "mongo/db/commands.h" #include "mongo/db/namespace_string.h" #include "mongo/db/operation_context.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/grid.h" #include "mongo/s/request_types/split_chunk_request_type.h" #include "mongo/util/log.h" diff --git a/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp b/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp index 1742699b038..afd3ebdf2d2 100644 --- a/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp +++ b/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp @@ -35,7 +35,7 @@ #include "mongo/db/auth/privilege.h" #include "mongo/db/commands.h" #include "mongo/db/namespace_string.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" #include "mongo/s/grid.h" #include "mongo/s/request_types/update_zone_key_range_request_type.h" #include "mongo/util/log.h" diff --git a/src/mongo/db/s/config/sharding_catalog_manager.cpp b/src/mongo/db/s/config/sharding_catalog_manager.cpp new file mode 100644 index 00000000000..136fce6a29c --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager.cpp @@ -0,0 +1,328 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/db/s/config/sharding_catalog_manager.h" + +#include "mongo/db/commands/feature_compatibility_version.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/s/balancer/type_migration.h" +#include "mongo/s/catalog/config_server_version.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_config_version.h" +#include "mongo/s/catalog/type_lockpings.h" +#include "mongo/s/catalog/type_locks.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/grid.h" +#include "mongo/util/log.h" + +namespace mongo { +namespace { + +const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)); + +// This value is initialized only if the node is running as a config server +const auto getShardingCatalogManager = + ServiceContext::declareDecoration<boost::optional<ShardingCatalogManager>>(); + +} // namespace + +void ShardingCatalogManager::create(ServiceContext* serviceContext, + std::unique_ptr<executor::TaskExecutor> addShardExecutor) { + auto& shardingCatalogManager = getShardingCatalogManager(serviceContext); + invariant(!shardingCatalogManager); + + shardingCatalogManager.emplace(serviceContext, std::move(addShardExecutor)); +} + +void ShardingCatalogManager::clearForTests(ServiceContext* serviceContext) { + auto& shardingCatalogManager = getShardingCatalogManager(serviceContext); + invariant(shardingCatalogManager); + + shardingCatalogManager.reset(); +} + +ShardingCatalogManager* ShardingCatalogManager::get(ServiceContext* serviceContext) { + auto& shardingCatalogManager = getShardingCatalogManager(serviceContext); + invariant(shardingCatalogManager); + + return shardingCatalogManager.get_ptr(); +} + +ShardingCatalogManager* ShardingCatalogManager::get(OperationContext* operationContext) { + return get(operationContext->getServiceContext()); +} + +ShardingCatalogManager::ShardingCatalogManager( + ServiceContext* serviceContext, std::unique_ptr<executor::TaskExecutor> addShardExecutor) + : _serviceContext(serviceContext), + _executorForAddShard(std::move(addShardExecutor)), + _kZoneOpLock("zoneOpLock"), + _kChunkOpLock("chunkOpLock"), + _kShardMembershipLock("shardMembershipLock") { + startup(); +} + +ShardingCatalogManager::~ShardingCatalogManager() { + shutDown(); +} + +void ShardingCatalogManager::startup() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + if (_started) { + return; + } + _started = true; + _executorForAddShard->startup(); + + Grid::get(_serviceContext) + ->setCustomConnectionPoolStatsFn( + [this](executor::ConnectionPoolStats* stats) { appendConnectionStats(stats); }); +} + +void ShardingCatalogManager::shutDown() { + { + stdx::lock_guard<stdx::mutex> lk(_mutex); + _inShutdown = true; + } + + Grid::get(_serviceContext)->setCustomConnectionPoolStatsFn(nullptr); + + _executorForAddShard->shutdown(); + _executorForAddShard->join(); +} + +Status ShardingCatalogManager::initializeConfigDatabaseIfNeeded(OperationContext* opCtx) { + { + stdx::lock_guard<stdx::mutex> lk(_mutex); + if (_configInitialized) { + return {ErrorCodes::AlreadyInitialized, + "Config database was previously loaded into memory"}; + } + } + + Status status = _initConfigIndexes(opCtx); + if (!status.isOK()) { + return status; + } + + // Make sure to write config.version last since we detect rollbacks of config.version and + // will re-run initializeConfigDatabaseIfNeeded if that happens, but we don't detect rollback + // of the index builds. + status = _initConfigVersion(opCtx); + if (!status.isOK()) { + return status; + } + + stdx::lock_guard<stdx::mutex> lk(_mutex); + _configInitialized = true; + + return Status::OK(); +} + +void ShardingCatalogManager::discardCachedConfigDatabaseInitializationState() { + stdx::lock_guard<stdx::mutex> lk(_mutex); + _configInitialized = false; +} + +Status ShardingCatalogManager::_initConfigVersion(OperationContext* opCtx) { + const auto catalogClient = Grid::get(opCtx)->catalogClient(); + + auto versionStatus = + catalogClient->getConfigVersion(opCtx, repl::ReadConcernLevel::kLocalReadConcern); + if (!versionStatus.isOK()) { + return versionStatus.getStatus(); + } + + const auto& versionInfo = versionStatus.getValue(); + if (versionInfo.getMinCompatibleVersion() > CURRENT_CONFIG_VERSION) { + return {ErrorCodes::IncompatibleShardingConfigVersion, + str::stream() << "current version v" << CURRENT_CONFIG_VERSION + << " is older than the cluster min compatible v" + << versionInfo.getMinCompatibleVersion()}; + } + + if (versionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion) { + VersionType newVersion; + newVersion.setClusterId(OID::gen()); + newVersion.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION); + newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION); + + BSONObj versionObj(newVersion.toBSON()); + auto insertStatus = catalogClient->insertConfigDocument( + opCtx, VersionType::ConfigNS, versionObj, kNoWaitWriteConcern); + + return insertStatus; + } + + if (versionInfo.getCurrentVersion() == UpgradeHistory_UnreportedVersion) { + return {ErrorCodes::IncompatibleShardingConfigVersion, + "Assuming config data is old since the version document cannot be found in the " + "config server and it contains databases besides 'local' and 'admin'. " + "Please upgrade if this is the case. Otherwise, make sure that the config " + "server is clean."}; + } + + if (versionInfo.getCurrentVersion() < CURRENT_CONFIG_VERSION) { + return {ErrorCodes::IncompatibleShardingConfigVersion, + str::stream() << "need to upgrade current cluster version to v" + << CURRENT_CONFIG_VERSION + << "; currently at v" + << versionInfo.getCurrentVersion()}; + } + + return Status::OK(); +} + +Status ShardingCatalogManager::_initConfigIndexes(OperationContext* opCtx) { + const bool unique = true; + auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + + Status result = configShard->createIndexOnConfig( + opCtx, ChunkType::ConfigNS, BSON(ChunkType::ns() << 1 << ChunkType::min() << 1), unique); + if (!result.isOK()) { + return result.withContext("couldn't create ns_1_min_1 index on config db"); + } + + result = configShard->createIndexOnConfig( + opCtx, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << 1 << ChunkType::shard() << 1 << ChunkType::min() << 1), + unique); + if (!result.isOK()) { + return result.withContext("couldn't create ns_1_shard_1_min_1 index on config db"); + } + + result = + configShard->createIndexOnConfig(opCtx, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << 1 << ChunkType::lastmod() << 1), + unique); + if (!result.isOK()) { + return result.withContext("couldn't create ns_1_lastmod_1 index on config db"); + } + + result = configShard->createIndexOnConfig( + opCtx, + MigrationType::ConfigNS, + BSON(MigrationType::ns() << 1 << MigrationType::min() << 1), + unique); + if (!result.isOK()) { + return result.withContext("couldn't create ns_1_min_1 index on config.migrations"); + } + + result = configShard->createIndexOnConfig( + opCtx, ShardType::ConfigNS, BSON(ShardType::host() << 1), unique); + if (!result.isOK()) { + return result.withContext("couldn't create host_1 index on config db"); + } + + result = configShard->createIndexOnConfig( + opCtx, LocksType::ConfigNS, BSON(LocksType::lockID() << 1), !unique); + if (!result.isOK()) { + return result.withContext("couldn't create lock id index on config db"); + } + + result = + configShard->createIndexOnConfig(opCtx, + LocksType::ConfigNS, + BSON(LocksType::state() << 1 << LocksType::process() << 1), + !unique); + if (!result.isOK()) { + return result.withContext("couldn't create state and process id index on config db"); + } + + result = configShard->createIndexOnConfig( + opCtx, LockpingsType::ConfigNS, BSON(LockpingsType::ping() << 1), !unique); + if (!result.isOK()) { + return result.withContext("couldn't create lockping ping time index on config db"); + } + + result = configShard->createIndexOnConfig( + opCtx, TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), unique); + if (!result.isOK()) { + return result.withContext("couldn't create ns_1_min_1 index on config db"); + } + + result = configShard->createIndexOnConfig( + opCtx, TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::tag() << 1), !unique); + if (!result.isOK()) { + return result.withContext("couldn't create ns_1_tag_1 index on config db"); + } + + return Status::OK(); +} + +Status ShardingCatalogManager::setFeatureCompatibilityVersionOnShards(OperationContext* opCtx, + const BSONObj& cmdObj) { + + // No shards should be added until we have forwarded featureCompatibilityVersion to all shards. + Lock::SharedLock lk(opCtx->lockState(), _kShardMembershipLock); + + // We do a direct read of the shards collection with local readConcern so no shards are missed, + // but don't go through the ShardRegistry to prevent it from caching data that may be rolled + // back. + const auto opTimeWithShards = uassertStatusOK(Grid::get(opCtx)->catalogClient()->getAllShards( + opCtx, repl::ReadConcernLevel::kLocalReadConcern)); + + for (const auto& shardType : opTimeWithShards.value) { + const auto shardStatus = + Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardType.getName()); + if (!shardStatus.isOK()) { + continue; + } + const auto shard = shardStatus.getValue(); + + auto response = shard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + "admin", + cmdObj, + Shard::RetryPolicy::kIdempotent); + if (!response.isOK()) { + return response.getStatus(); + } + if (!response.getValue().commandStatus.isOK()) { + return response.getValue().commandStatus; + } + if (!response.getValue().writeConcernStatus.isOK()) { + return response.getValue().writeConcernStatus; + } + } + + return Status::OK(); +} + +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager.h b/src/mongo/db/s/config/sharding_catalog_manager.h new file mode 100644 index 00000000000..aa7bd212ae0 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager.h @@ -0,0 +1,508 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#pragma once + +#include "mongo/base/disallow_copying.h" +#include "mongo/base/status_with.h" +#include "mongo/db/concurrency/d_concurrency.h" +#include "mongo/db/repl/optime_with.h" +#include "mongo/executor/task_executor.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/shard_key_pattern.h" +#include "mongo/stdx/mutex.h" + +namespace mongo { + +class OperationContext; +class RemoteCommandTargeter; +class ServiceContext; +class UUID; + +/** + * Used to indicate to the caller of the removeShard method whether draining of chunks for + * a particular shard has started, is ongoing, or has been completed. + */ +enum ShardDrainingStatus { + STARTED, + ONGOING, + COMPLETED, +}; + +/** + * Implements modifications to the sharding catalog metadata. + * + * TODO: Currently the code responsible for writing the sharding catalog metadata is split between + * this class and ShardingCatalogClient. Eventually all methods that write catalog data should be + * moved out of ShardingCatalogClient and into this class. + */ +class ShardingCatalogManager { + MONGO_DISALLOW_COPYING(ShardingCatalogManager); + +public: + ShardingCatalogManager(ServiceContext* serviceContext, + std::unique_ptr<executor::TaskExecutor> addShardExecutor); + ~ShardingCatalogManager(); + + /** + * Instantiates an instance of the sharding catalog manager and installs it on the specified + * service context. This method is not thread-safe and must be called only once when the service + * is starting. + */ + static void create(ServiceContext* serviceContext, + std::unique_ptr<executor::TaskExecutor> addShardExecutor); + + /** + * Retrieves the per-service instance of the ShardingCatalogManager. This instance is only + * available if the node is running as a config server. + */ + static ShardingCatalogManager* get(ServiceContext* serviceContext); + static ShardingCatalogManager* get(OperationContext* operationContext); + + /** + * Safe to call multiple times as long as the calls are externally synchronized to be + * non-overlapping. + */ + void startup(); + + /** + * Performs necessary cleanup when shutting down cleanly. + */ + void shutDown(); + + /** + * Checks if this is the first start of a newly instantiated config server and if so pre-creates + * the catalog collections and their indexes. Also generates and persists the cluster's + * identity. + */ + Status initializeConfigDatabaseIfNeeded(OperationContext* opCtx); + + /** + * Invoked on cluster identity metadata rollback after replication step down. Throws out any + * cached identity information and causes it to be reloaded/re-created on the next attempt. + */ + void discardCachedConfigDatabaseInitializationState(); + + // + // Zone Operations + // + + /** + * Adds the given shardName to the zone. Returns ErrorCodes::ShardNotFound if a shard by that + * name does not exist. + */ + Status addShardToZone(OperationContext* opCtx, + const std::string& shardName, + const std::string& zoneName); + + /** + * Removes the given shardName from the zone. Returns ErrorCodes::ShardNotFound if a shard by + * that name does not exist. + */ + Status removeShardFromZone(OperationContext* opCtx, + const std::string& shardName, + const std::string& zoneName); + + /** + * Assigns a range of a sharded collection to a particular shard zone. If range is a prefix of + * the shard key, the range will be converted into a new range with full shard key filled with + * MinKey values. + */ + Status assignKeyRangeToZone(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkRange& range, + const std::string& zoneName); + + /** + * Removes a range from a zone. + * + * NOTE: unlike assignKeyRangeToZone, the given range will never be converted to include the + * full shard key. + */ + Status removeKeyRangeFromZone(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkRange& range); + + // + // Chunk Operations + // + + /** + * Updates metadata in the config.chunks collection to show the given chunk as split into + * smaller chunks at the specified split points. + */ + Status commitChunkSplit(OperationContext* opCtx, + const NamespaceString& nss, + const OID& requestEpoch, + const ChunkRange& range, + const std::vector<BSONObj>& splitPoints, + const std::string& shardName); + + /** + * Updates metadata in the config.chunks collection so the chunks with given boundaries are seen + * merged into a single larger chunk. + */ + Status commitChunkMerge(OperationContext* opCtx, + const NamespaceString& nss, + const OID& requestEpoch, + const std::vector<BSONObj>& chunkBoundaries, + const std::string& shardName); + + /** + * Updates metadata in config.chunks collection to show the given chunk in its new shard. + */ + StatusWith<BSONObj> commitChunkMigration(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkType& migratedChunk, + const boost::optional<ChunkType>& controlChunk, + const OID& collectionEpoch, + const ShardId& fromShard, + const ShardId& toShard); + + // + // Database Operations + // + + /** + * Checks if a database with the same name already exists, and if not, selects a primary shard + * for the database and creates a new entry for it in config.databases. + * + * Returns the database entry. + * + * Throws DatabaseDifferCase if the database already exists with a different case. + */ + DatabaseType createDatabase(OperationContext* opCtx, const std::string& dbName); + + /** + * Creates the database if it does not exist, then marks its entry in config.databases as + * sharding-enabled. + * + * Throws DatabaseDifferCase if the database already exists with a different case. + */ + void enableSharding(OperationContext* opCtx, const std::string& dbName); + + /** + * Retrieves all databases for a shard. + * + * Returns a !OK status if an error occurs. + */ + StatusWith<std::vector<std::string>> getDatabasesForShard(OperationContext* opCtx, + const ShardId& shardId); + + // + // Collection Operations + // + + /** + * Drops the specified collection from the collection metadata store. + * + * Returns Status::OK if successful or any error code indicating the failure. These are + * some of the known failures: + * - NamespaceNotFound - collection does not exist + */ + Status dropCollection(OperationContext* opCtx, const NamespaceString& nss); + + + /** + * Shards a collection. Assumes that the database is enabled for sharding. + * + * @param ns: namespace of collection to shard + * @param uuid: the collection's UUID. Optional because new in 3.6. + * @param fieldsAndOrder: shardKey pattern + * @param defaultCollation: the default collation for the collection, to be written to + * config.collections. If empty, the collection default collation is simple binary + * comparison. Note the the shard key collation will always be simple binary comparison, + * even if the collection default collation is non-simple. + * @param unique: if true, ensure underlying index enforces a unique constraint. + * @param initPoints: create chunks based on a set of specified split points. + * @param initShardIds: If non-empty, specifies the set of shards to assign chunks between. + * Otherwise all chunks will be assigned to the primary shard for the database. + */ + void shardCollection(OperationContext* opCtx, + const NamespaceString& nss, + const boost::optional<UUID> uuid, + const ShardKeyPattern& fieldsAndOrder, + const BSONObj& defaultCollation, + bool unique, + const std::vector<BSONObj>& initPoints, + const bool distributeInitialChunks, + const ShardId& dbPrimaryShardId); + + + /** + * Iterates through each entry in config.collections that does not have a UUID, generates a UUID + * for the collection, and updates the entry with the generated UUID. + * + * Remove after 3.4 -> 3.6 upgrade. + */ + void generateUUIDsForExistingShardedCollections(OperationContext* opCtx); + + + /** + * Returns the set of collections for the specified database, which have been marked as sharded. + * Goes directly to the config server's metadata, without checking the local cache so it should + * not be used in frequently called code paths. + * + * Throws exception on errors. + * + * TODO SERVER-32366: Make this an anonymous helper function in + * sharding_catalog_manager_database_operations.cpp since it will no longer need to be + * called outside of the ShardingCatalogManager. + */ + std::vector<NamespaceString> getAllShardedCollectionsForDb(OperationContext* opCtx, + StringData dbName); + + // + // Shard Operations + // + + /** + * + * Adds a new shard. It expects a standalone mongod process or replica set to be running on the + * provided address. + * + * 'shardProposedName' is an optional string with the proposed name of the shard. If it is + * nullptr, a name will be automatically generated; if not nullptr, it cannot + * contain the empty string. + * 'shardConnectionString' is the complete connection string of the shard being added. + * 'maxSize' is the optional space quota in bytes. Zero means there's no limitation to space + * usage. + * + * On success returns the name of the newly added shard. + */ + StatusWith<std::string> addShard(OperationContext* opCtx, + const std::string* shardProposedName, + const ConnectionString& shardConnectionString, + const long long maxSize); + + /** + * Tries to remove a shard. To completely remove a shard from a sharded cluster, + * the data residing in that shard must be moved to the remaining shards in the + * cluster by "draining" chunks from that shard. + * + * Because of the asynchronous nature of the draining mechanism, this method returns + * the current draining status. See ShardDrainingStatus enum definition for more details. + */ + StatusWith<ShardDrainingStatus> removeShard(OperationContext* opCtx, const ShardId& shardId); + + // + // Cluster Upgrade Operations + // + + /** + * Returns a BSON representation of an update request that can be used to insert a shardIdentity + * doc into the shard for the given shardType (or update the shard's existing shardIdentity + * doc's configsvrConnString if the _id, shardName, and clusterId do not conflict). + */ + BSONObj createShardIdentityUpsertForAddShard(OperationContext* opCtx, + const std::string& shardName); + + /** + * Runs the setFeatureCompatibilityVersion command on all shards. + */ + Status setFeatureCompatibilityVersionOnShards(OperationContext* opCtx, const BSONObj& cmdObj); + + // + // For Diagnostics + // + + /** + * Append information about the connection pools owned by the CatalogManager. + */ + void appendConnectionStats(executor::ConnectionPoolStats* stats); + + /** + * Only used for unit-tests, clears a previously-created catalog manager from the specified + * service context, so that 'create' can be called again. + */ + static void clearForTests(ServiceContext* serviceContext); + +private: + /** + * Performs the necessary checks for version compatibility and creates a new config.version + * document if the current cluster config is empty. + */ + Status _initConfigVersion(OperationContext* opCtx); + + /** + * Builds all the expected indexes on the config server. + */ + Status _initConfigIndexes(OperationContext* opCtx); + + /** + * Used during addShard to determine if there is already an existing shard that matches the + * shard that is currently being added. An OK return with boost::none indicates that there + * is no conflicting shard, and we can proceed trying to add the new shard. An OK return + * with a ShardType indicates that there is an existing shard that matches the shard being added + * but since the options match, this addShard request can do nothing and return success. A + * non-OK return either indicates a problem reading the existing shards from disk or more likely + * indicates that an existing shard conflicts with the shard being added and they have different + * options, so the addShard attempt must be aborted. + */ + StatusWith<boost::optional<ShardType>> _checkIfShardExists( + OperationContext* opCtx, + const ConnectionString& propsedShardConnectionString, + const std::string* shardProposedName, + long long maxSize); + + /** + * Validates that the specified endpoint can serve as a shard server. In particular, this + * this function checks that the shard can be contacted and that it is not already member of + * another sharded cluster. + * + * @param targeter For sending requests to the shard-to-be. + * @param shardProposedName Optional proposed name for the shard. Can be omitted in which case + * a unique name for the shard will be generated from the shard's connection string. If it + * is not omitted, the value cannot be the empty string. + * + * On success returns a partially initialized ShardType object corresponding to the requested + * shard. It will have the hostName field set and optionally the name, if the name could be + * generated from either the proposed name or the connection string set name. The returned + * shard's name should be checked and if empty, one should be generated using some uniform + * algorithm. + */ + StatusWith<ShardType> _validateHostAsShard(OperationContext* opCtx, + std::shared_ptr<RemoteCommandTargeter> targeter, + const std::string* shardProposedName, + const ConnectionString& connectionString); + + /** + * Drops the sessions collection on the specified host. + */ + Status _dropSessionsCollection(OperationContext* opCtx, + std::shared_ptr<RemoteCommandTargeter> targeter); + + /** + * Runs the listDatabases command on the specified host and returns the names of all databases + * it returns excluding those named local, config and admin, since they serve administrative + * purposes. + */ + StatusWith<std::vector<std::string>> _getDBNamesListFromShard( + OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter); + + /** + * Runs a command against a "shard" that is not yet in the cluster and thus not present in the + * ShardRegistry. + */ + StatusWith<Shard::CommandResponse> _runCommandForAddShard(OperationContext* opCtx, + RemoteCommandTargeter* targeter, + const std::string& dbName, + const BSONObj& cmdObj); + + /** + * Selects an optimal shard on which to place a newly created database from the set of + * available shards. Will return ShardNotFound if shard could not be found. + */ + static StatusWith<ShardId> _selectShardForNewDatabase(OperationContext* opCtx, + ShardRegistry* shardRegistry); + + /** + * Helper method for running a count command against the config server with appropriate error + * handling. + */ + StatusWith<long long> _runCountCommandOnConfig(OperationContext* opCtx, + const NamespaceString& nss, + BSONObj query); + + /** + * Appends a read committed read concern to the request object. + */ + void _appendReadConcern(BSONObjBuilder* builder); + + /** + * Creates the first chunks of a new sharded collection. + */ + ChunkVersion _createFirstChunks(OperationContext* opCtx, + const NamespaceString& nss, + const ShardKeyPattern& shardKeyPattern, + const ShardId& primaryShardId, + const std::vector<BSONObj>& initPoints, + const bool distributeInitialChunks); + + // The owning service context + ServiceContext* const _serviceContext; + + // Executor specifically used for sending commands to servers that are in the process of being + // added as shards. Does not have any connection hook set on it, thus it can be used to talk to + // servers that are not yet in the ShardRegistry. + const std::unique_ptr<executor::TaskExecutor> _executorForAddShard; + + // + // All member variables are labeled with one of the following codes indicating the + // synchronization rules for accessing them. + // + // (M) Must hold _mutex for access. + // (R) Read only, can only be written during initialization. + // (S) Self-synchronizing; access in any way from any context. + // + + stdx::mutex _mutex; + + // True if shutDown() has been called. False, otherwise. + bool _inShutdown{false}; // (M) + + // True if startup() has been called. + bool _started{false}; // (M) + + // True if initializeConfigDatabaseIfNeeded() has been called and returned successfully. + bool _configInitialized{false}; // (M) + + /** + * Lock for shard zoning operations. This should be acquired when doing any operations that + * can affect the config.tags collection or the tags field of the config.shards collection. + * No other locks should be held when locking this. If an operation needs to take database + * locks (for example to write to a local collection) those locks should be taken after + * taking this. + */ + Lock::ResourceMutex _kZoneOpLock; + + /** + * Lock for chunk split/merge/move operations. This should be acquired when doing split/merge/ + * move operations that can affect the config.chunks collection. + * No other locks should be held when locking this. If an operation needs to take database + * locks (for example to write to a local collection) those locks should be taken after + * taking this. + */ + Lock::ResourceMutex _kChunkOpLock; + + /** + * Lock that guards changes to the set of shards in the cluster (ie addShard and removeShard + * requests). + * TODO: Currently only taken during addShard requests, this should also be taken in X mode + * during removeShard, once removeShard is moved to run on the config server primary instead of + * on mongos. At that point we should also change any operations that expect the shard not to + * be removed while they are running (such as removeShardFromZone) to take this in shared mode. + */ + Lock::ResourceMutex _kShardMembershipLock; +}; + +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp new file mode 100644 index 00000000000..b0507b34451 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp @@ -0,0 +1,1311 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include <vector> + +#include "mongo/client/connection_string.h" +#include "mongo/client/remote_command_targeter_factory_mock.h" +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/commands.h" +#include "mongo/db/ops/write_ops.h" +#include "mongo/db/repl/replication_coordinator_mock.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/db/s/type_shard_identity.h" +#include "mongo/s/catalog/config_server_version.h" +#include "mongo/s/catalog/type_changelog.h" +#include "mongo/s/catalog/type_config_version.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/cluster_identity_loader.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/s/write_ops/batched_command_response.h" +#include "mongo/util/fail_point_service.h" +#include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { +namespace { + +using executor::RemoteCommandRequest; +using executor::RemoteCommandResponse; +using std::vector; +using unittest::assertGet; + +// TODO (SERVER-27029): This value was chosen to be greater than the time it takes for the hang +// analyzer to kick in. Remove once the cause for the test failure has been figured out. +const Hours kLongFutureTimeout(8); + +class AddShardTest : public ConfigServerTestFixture { +protected: + /** + * Performs the test setup steps from the parent class and then configures the config shard and + * the client name. + */ + void setUp() override { + ConfigServerTestFixture::setUp(); + + // Make sure clusterID is written to the config.version collection. + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto clusterIdLoader = ClusterIdentityLoader::get(operationContext()); + ASSERT_OK(clusterIdLoader->loadClusterId(operationContext(), + repl::ReadConcernLevel::kLocalReadConcern)); + _clusterId = clusterIdLoader->getClusterId(); + } + + /** + * addShard validates the host as a shard. It calls "isMaster" on the host to determine what + * kind of host it is -- mongos, regular mongod, config mongod -- and whether the replica set + * details are correct. "isMasterResponse" defines the response of the "isMaster" request and + * should be a command response BSONObj, or a failed Status. + * + * ShardingTestFixture::expectGetShards() should be called before this function, otherwise + * addShard will never reach the isMaster command -- a find query is called first. + */ + void expectIsMaster(const HostAndPort& target, StatusWith<BSONObj> isMasterResponse) { + onCommandForAddShard([&, target, isMasterResponse](const RemoteCommandRequest& request) { + ASSERT_EQ(request.target, target); + ASSERT_EQ(request.dbname, "admin"); + ASSERT_BSONOBJ_EQ(request.cmdObj, BSON("isMaster" << 1)); + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), request.metadata); + + return isMasterResponse; + }); + } + + void expectListDatabases(const HostAndPort& target, const std::vector<BSONObj>& dbs) { + onCommandForAddShard([&](const RemoteCommandRequest& request) { + ASSERT_EQ(request.target, target); + ASSERT_EQ(request.dbname, "admin"); + ASSERT_BSONOBJ_EQ(request.cmdObj, BSON("listDatabases" << 1 << "nameOnly" << true)); + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), request.metadata); + + BSONArrayBuilder arr; + for (const auto& db : dbs) { + arr.append(db); + } + + return BSON("ok" << 1 << "databases" << arr.obj()); + }); + } + + void expectCollectionDrop(const HostAndPort& target, const NamespaceString& nss) { + onCommandForAddShard([&](const RemoteCommandRequest& request) { + ASSERT_EQ(request.target, target); + ASSERT_EQ(request.dbname, nss.db()); + ASSERT_BSONOBJ_EQ(request.cmdObj, + BSON("drop" << nss.coll() << "writeConcern" << BSON("w" + << "majority"))); + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), request.metadata); + + return BSON("ok" << 1); + }); + } + + void expectSetFeatureCompatibilityVersion(const HostAndPort& target, + StatusWith<BSONObj> response) { + onCommandForAddShard([&, target, response](const RemoteCommandRequest& request) { + ASSERT_EQ(request.target, target); + ASSERT_EQ(request.dbname, "admin"); + ASSERT_BSONOBJ_EQ(request.cmdObj, + BSON("setFeatureCompatibilityVersion" + << "4.0")); + + return response; + }); + } + + /** + * Waits for a request for the shardIdentity document to be upserted into a shard from the + * config server on addShard. + */ + void expectShardIdentityUpsertReturnSuccess(const HostAndPort& expectedHost, + const std::string& expectedShardName) { + // Create the expected upsert shardIdentity command for this shardType. + auto upsertCmdObj = + ShardingCatalogManager::get(operationContext()) + ->createShardIdentityUpsertForAddShard(operationContext(), expectedShardName); + + const auto opMsgRequest = + OpMsgRequest::fromDBAndBody(NamespaceString::kAdminDb, upsertCmdObj); + expectUpdatesReturnSuccess(expectedHost, + NamespaceString(NamespaceString::kServerConfigurationNamespace), + UpdateOp::parse(opMsgRequest)); + } + + void expectShardIdentityUpsertReturnFailure(const HostAndPort& expectedHost, + const std::string& expectedShardName, + const Status& statusToReturn) { + // Create the expected upsert shardIdentity command for this shardType. + auto upsertCmdObj = + ShardingCatalogManager::get(operationContext()) + ->createShardIdentityUpsertForAddShard(operationContext(), expectedShardName); + + const auto opMsgRequest = + OpMsgRequest::fromDBAndBody(NamespaceString::kAdminDb, upsertCmdObj); + expectUpdatesReturnFailure(expectedHost, + NamespaceString(NamespaceString::kServerConfigurationNamespace), + UpdateOp::parse(opMsgRequest), + statusToReturn); + } + + /** + * Waits for a set of batched updates and ensures that the host, namespace, and updates exactly + * match what's expected. Responds with a success status. + */ + void expectUpdatesReturnSuccess(const HostAndPort& expectedHost, + const NamespaceString& expectedNss, + const write_ops::Update& expectedUpdateOp) { + onCommandForAddShard([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(expectedHost, request.target); + + // Check that the db name in the request matches the expected db name. + ASSERT_EQUALS(expectedNss.db(), request.dbname); + + const auto opMsgRequest = OpMsgRequest::fromDBAndBody(request.dbname, request.cmdObj); + const auto updateOp = UpdateOp::parse(opMsgRequest); + ASSERT_EQUALS(expectedNss, expectedUpdateOp.getNamespace()); + + const auto& expectedUpdates = expectedUpdateOp.getUpdates(); + const auto& actualUpdates = updateOp.getUpdates(); + + ASSERT_EQUALS(expectedUpdates.size(), actualUpdates.size()); + + auto itExpected = expectedUpdates.begin(); + auto itActual = actualUpdates.begin(); + + for (; itActual != actualUpdates.end(); itActual++, itExpected++) { + ASSERT_EQ(itExpected->getUpsert(), itActual->getUpsert()); + ASSERT_EQ(itExpected->getMulti(), itActual->getMulti()); + ASSERT_BSONOBJ_EQ(itExpected->getQ(), itActual->getQ()); + ASSERT_BSONOBJ_EQ(itExpected->getU(), itActual->getU()); + } + + BatchedCommandResponse response; + response.setStatus(Status::OK()); + response.setNModified(1); + + return response.toBSON(); + }); + } + + /** + * Waits for a set of batched updates and ensures that the host, namespace, and updates exactly + * match what's expected. Responds with a failure status. + */ + void expectUpdatesReturnFailure(const HostAndPort& expectedHost, + const NamespaceString& expectedNss, + const write_ops::Update& expectedUpdateOp, + const Status& statusToReturn) { + onCommandForAddShard([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(expectedHost, request.target); + + // Check that the db name in the request matches the expected db name. + ASSERT_EQUALS(expectedNss.db(), request.dbname); + + const auto opMsgRequest = OpMsgRequest::fromDBAndBody(request.dbname, request.cmdObj); + const auto updateOp = UpdateOp::parse(opMsgRequest); + ASSERT_EQUALS(expectedNss, expectedUpdateOp.getNamespace()); + + const auto& expectedUpdates = expectedUpdateOp.getUpdates(); + const auto& actualUpdates = updateOp.getUpdates(); + + ASSERT_EQUALS(expectedUpdates.size(), actualUpdates.size()); + + auto itExpected = expectedUpdates.begin(); + auto itActual = actualUpdates.begin(); + + for (; itActual != actualUpdates.end(); itActual++, itExpected++) { + ASSERT_EQ(itExpected->getUpsert(), itActual->getUpsert()); + ASSERT_EQ(itExpected->getMulti(), itActual->getMulti()); + ASSERT_BSONOBJ_EQ(itExpected->getQ(), itActual->getQ()); + ASSERT_BSONOBJ_EQ(itExpected->getU(), itActual->getU()); + } + + return statusToReturn; + }); + } + + + /** + * Asserts that a document exists in the config server's config.shards collection corresponding + * to 'expectedShard'. + */ + void assertShardExists(const ShardType& expectedShard) { + auto foundShard = assertGet(getShardDoc(operationContext(), expectedShard.getName())); + + ASSERT_EQUALS(expectedShard.getName(), foundShard.getName()); + ASSERT_EQUALS(expectedShard.getHost(), foundShard.getHost()); + ASSERT_EQUALS(expectedShard.getMaxSizeMB(), foundShard.getMaxSizeMB()); + ASSERT_EQUALS(expectedShard.getDraining(), foundShard.getDraining()); + ASSERT_EQUALS((int)expectedShard.getState(), (int)foundShard.getState()); + ASSERT_TRUE(foundShard.getTags().empty()); + } + + /** + * Asserts that a document exists in the config server's config.databases collection + * corresponding to 'expectedDB'. + */ + void assertDatabaseExists(const DatabaseType& expectedDB) { + auto foundDB = + assertGet(catalogClient()->getDatabase(operationContext(), + expectedDB.getName(), + repl::ReadConcernLevel::kMajorityReadConcern)) + .value; + + ASSERT_EQUALS(expectedDB.getName(), foundDB.getName()); + ASSERT_EQUALS(expectedDB.getPrimary(), foundDB.getPrimary()); + ASSERT_EQUALS(expectedDB.getSharded(), foundDB.getSharded()); + } + + /** + * Asserts that a document exists in the config server's config.changelog collection + * describing the addShard request for 'addedShard'. + */ + void assertChangeWasLogged(const ShardType& addedShard) { + auto response = assertGet( + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + ReadPreferenceSetting{ + ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + NamespaceString("config.changelog"), + BSON("what" + << "addShard" + << "details.name" + << addedShard.getName()), + BSONObj(), + 1)); + ASSERT_EQ(1U, response.docs.size()); + auto logEntryBSON = response.docs.front(); + auto logEntry = assertGet(ChangeLogType::fromBSON(logEntryBSON)); + + ASSERT_EQUALS(addedShard.getName(), logEntry.getDetails()["name"].String()); + ASSERT_EQUALS(addedShard.getHost(), logEntry.getDetails()["host"].String()); + } + + void forwardAddShardNetwork(Date_t when) { + networkForAddShard()->enterNetwork(); + networkForAddShard()->runUntil(when); + networkForAddShard()->exitNetwork(); + } + + OID _clusterId; +}; + +TEST_F(AddShardTest, CreateShardIdentityUpsertForAddShard) { + std::string shardName = "shardName"; + + BSONObj expectedBSON = BSON("update" + << "system.version" + << "bypassDocumentValidation" + << false + << "ordered" + << true + << "updates" + << BSON_ARRAY( + BSON("q" << BSON("_id" + << "shardIdentity" + << "shardName" + << shardName + << "clusterId" + << _clusterId) + << "u" + << BSON("$set" << BSON("configsvrConnectionString" + << replicationCoordinator() + ->getConfig() + .getConnectionString() + .toString())) + << "multi" + << false + << "upsert" + << true)) + << "writeConcern" + << BSON("w" + << "majority" + << "wtimeout" + << 15000)); + ASSERT_BSONOBJ_EQ(expectedBSON, + ShardingCatalogManager::get(operationContext()) + ->createShardIdentityUpsertForAddShard(operationContext(), shardName)); +} + +TEST_F(AddShardTest, StandaloneBasicSuccess) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + HostAndPort shardTarget("StandaloneHost:12345"); + targeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter)); + + + std::string expectedShardName = "StandaloneShard"; + + // The shard doc inserted into the config.shards collection on the config server. + ShardType expectedShard; + expectedShard.setName(expectedShardName); + expectedShard.setHost("StandaloneHost:12345"); + expectedShard.setMaxSizeMB(100); + expectedShard.setState(ShardType::ShardState::kShardAware); + + DatabaseType discoveredDB1("TestDB1", ShardId("StandaloneShard"), false); + DatabaseType discoveredDB2("TestDB2", ShardId("StandaloneShard"), false); + + auto future = launchAsync([this, expectedShardName] { + Client::initThreadIfNotAlready(); + auto shardName = + assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + &expectedShardName, + assertGet(ConnectionString::parse("StandaloneHost:12345")), + 100)); + ASSERT_EQUALS(expectedShardName, shardName); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + // Get databases list from new shard + expectListDatabases( + shardTarget, + std::vector<BSONObj>{BSON("name" + << "local" + << "sizeOnDisk" + << 1000), + BSON("name" << discoveredDB1.getName() << "sizeOnDisk" << 2000), + BSON("name" << discoveredDB2.getName() << "sizeOnDisk" << 5000)}); + + expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions")); + + // The shardIdentity doc inserted into the admin.system.version collection on the shard. + expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName); + + // The shard receives the setFeatureCompatibilityVersion command. + expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1)); + + // Wait for the addShard to complete before checking the config database + future.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was properly added to config.shards. + assertShardExists(expectedShard); + + // Ensure that the databases detected from the shard were properly added to config.database. + assertDatabaseExists(discoveredDB1); + assertDatabaseExists(discoveredDB2); + + assertChangeWasLogged(expectedShard); +} + +TEST_F(AddShardTest, StandaloneGenerateName) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + HostAndPort shardTarget("StandaloneHost:12345"); + targeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter)); + + ShardType existingShard; + existingShard.setName("shard0005"); + existingShard.setHost("existingHost:12345"); + existingShard.setMaxSizeMB(100); + existingShard.setState(ShardType::ShardState::kShardAware); + + // Add a pre-existing shard so when generating a name for the new shard it will have to go + // higher than the existing one. + ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(), + ShardType::ConfigNS, + existingShard.toBSON(), + ShardingCatalogClient::kMajorityWriteConcern)); + assertShardExists(existingShard); + + std::string expectedShardName = "shard0006"; + + // The shard doc inserted into the config.shards collection on the config server. + ShardType expectedShard; + expectedShard.setName(expectedShardName); + expectedShard.setHost(shardTarget.toString()); + expectedShard.setMaxSizeMB(100); + expectedShard.setState(ShardType::ShardState::kShardAware); + + DatabaseType discoveredDB1("TestDB1", ShardId(expectedShardName), false); + DatabaseType discoveredDB2("TestDB2", ShardId(expectedShardName), false); + + auto future = launchAsync([this, &expectedShardName, &shardTarget] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet( + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), nullptr, ConnectionString(shardTarget), 100)); + ASSERT_EQUALS(expectedShardName, shardName); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + // Get databases list from new shard + expectListDatabases( + shardTarget, + std::vector<BSONObj>{BSON("name" + << "local" + << "sizeOnDisk" + << 1000), + BSON("name" << discoveredDB1.getName() << "sizeOnDisk" << 2000), + BSON("name" << discoveredDB2.getName() << "sizeOnDisk" << 5000)}); + + expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions")); + + // The shardIdentity doc inserted into the admin.system.version collection on the shard. + expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName); + + // The shard receives the setFeatureCompatibilityVersion command. + expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1)); + + // Wait for the addShard to complete before checking the config database + future.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was properly added to config.shards. + assertShardExists(expectedShard); + + // Ensure that the databases detected from the shard were properly added to config.database. + assertDatabaseExists(discoveredDB1); + assertDatabaseExists(discoveredDB2); + + assertChangeWasLogged(expectedShard); +} + +TEST_F(AddShardTest, AddSCCCConnectionStringAsShard) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + auto invalidConn = + ConnectionString("host1:12345,host2:12345,host3:12345", ConnectionString::INVALID); + targeter->setConnectionStringReturnValue(invalidConn); + + auto future = launchAsync([this, invalidConn] { + const std::string shardName("StandaloneShard"); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &shardName, invalidConn, 100); + ASSERT_EQUALS(ErrorCodes::BadValue, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), "Invalid connection string"); + }); + + future.timed_get(kLongFutureTimeout); +} + +TEST_F(AddShardTest, EmptyShardName) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + std::string expectedShardName = ""; + + auto future = launchAsync([this, expectedShardName] { + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + &expectedShardName, + assertGet(ConnectionString::parse("StandaloneHost:12345")), + 100); + ASSERT_EQUALS(ErrorCodes::BadValue, status); + ASSERT_EQUALS("shard name cannot be empty", status.getStatus().reason()); + }); + + future.timed_get(kLongFutureTimeout); +} + +// Host is unreachable, cannot verify host. +TEST_F(AddShardTest, UnreachableHost) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + HostAndPort shardTarget("StandaloneHost:12345"); + targeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter)); + std::string expectedShardName = "StandaloneShard"; + + auto future = launchAsync([this, &expectedShardName, &shardTarget] { + Client::initThreadIfNotAlready(); + auto status = + ShardingCatalogManager::get(operationContext()) + ->addShard( + operationContext(), &expectedShardName, ConnectionString(shardTarget), 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), "host unreachable"); + }); + + Status hostUnreachableStatus = Status(ErrorCodes::HostUnreachable, "host unreachable"); + expectIsMaster(shardTarget, hostUnreachableStatus); + + future.timed_get(kLongFutureTimeout); +} + +// Cannot add mongos as a shard. +TEST_F(AddShardTest, AddMongosAsShard) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + HostAndPort shardTarget("StandaloneHost:12345"); + targeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter)); + std::string expectedShardName = "StandaloneShard"; + + auto future = launchAsync([this, &expectedShardName, &shardTarget] { + Client::initThreadIfNotAlready(); + auto status = + ShardingCatalogManager::get(operationContext()) + ->addShard( + operationContext(), &expectedShardName, ConnectionString(shardTarget), 100); + ASSERT_EQUALS(ErrorCodes::IllegalOperation, status); + }); + + expectIsMaster(shardTarget, + BSON("msg" + << "isdbgrid")); + + future.timed_get(kLongFutureTimeout); +} + +// A replica set name was found for the host but no name was provided with the host. +TEST_F(AddShardTest, AddReplicaSetShardAsStandalone) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + HostAndPort shardTarget = HostAndPort("host1:12345"); + targeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter)); + std::string expectedShardName = "Standalone"; + + auto future = launchAsync([this, expectedShardName, shardTarget] { + Client::initThreadIfNotAlready(); + auto status = + ShardingCatalogManager::get(operationContext()) + ->addShard( + operationContext(), &expectedShardName, ConnectionString(shardTarget), 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), "use replica set url format"); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "myOtherSet" + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + future.timed_get(kLongFutureTimeout); +} + +// A replica set name was provided with the host but no name was found for the host. +TEST_F(AddShardTest, AddStandaloneHostShardAsReplicaSet) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setConnectionStringReturnValue(connString); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + std::string expectedShardName = "StandaloneShard"; + + auto future = launchAsync([this, expectedShardName, connString] { + Client::initThreadIfNotAlready(); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &expectedShardName, connString, 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), "host did not return a set name"); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + future.timed_get(kLongFutureTimeout); +} + +// Provided replica set name does not match found replica set name. +TEST_F(AddShardTest, ReplicaSetMistmatchedReplicaSetName) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + targeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + std::string expectedShardName = "StandaloneShard"; + + auto future = launchAsync([this, expectedShardName, connString] { + Client::initThreadIfNotAlready(); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &expectedShardName, connString, 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), "does not match the actual set name"); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "myOtherSet" + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + future.timed_get(kLongFutureTimeout); +} + +// Cannot add config server as a shard. +TEST_F(AddShardTest, ShardIsCSRSConfigServer) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("config/host1:12345,host2:12345")); + targeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + std::string expectedShardName = "StandaloneShard"; + + auto future = launchAsync([this, expectedShardName, connString] { + Client::initThreadIfNotAlready(); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &expectedShardName, connString, 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), + "as a shard since it is a config server"); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "config" + << "configsvr" + << true + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + future.timed_get(kLongFutureTimeout); +} + +// One of the hosts is not part of the found replica set. +TEST_F(AddShardTest, ReplicaSetMissingHostsProvidedInSeedList) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + targeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + std::string expectedShardName = "StandaloneShard"; + + auto future = launchAsync([this, expectedShardName, connString] { + Client::initThreadIfNotAlready(); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &expectedShardName, connString, 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS(status.getStatus().reason(), + "host2:12345 does not belong to replica set"); + }); + + BSONArrayBuilder hosts; + hosts.append("host1:12345"); + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "mySet" + << "hosts" + << hosts.arr() + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + future.timed_get(kLongFutureTimeout); +} + +// Cannot add a shard with the shard name "config". +TEST_F(AddShardTest, AddShardWithNameConfigFails) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + targeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + std::string expectedShardName = "config"; + + auto future = launchAsync([this, expectedShardName, connString] { + Client::initThreadIfNotAlready(); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &expectedShardName, connString, 100); + ASSERT_EQUALS(ErrorCodes::BadValue, status); + ASSERT_EQUALS(status.getStatus().reason(), + "use of shard replica set with name 'config' is not allowed"); + }); + + BSONArrayBuilder hosts; + hosts.append("host1:12345"); + hosts.append("host2:12345"); + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "mySet" + << "hosts" + << hosts.arr() + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + future.timed_get(kLongFutureTimeout); +} + +TEST_F(AddShardTest, ShardContainsExistingDatabase) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + targeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + std::string expectedShardName = "mySet"; + + DatabaseType existingDB("existing", ShardId("existingShard"), false); + + // Add a pre-existing database. + ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(), + DatabaseType::ConfigNS, + existingDB.toBSON(), + ShardingCatalogClient::kMajorityWriteConcern)); + assertDatabaseExists(existingDB); + + + auto future = launchAsync([this, expectedShardName, connString] { + Client::initThreadIfNotAlready(); + auto status = ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), &expectedShardName, connString, 100); + ASSERT_EQUALS(ErrorCodes::OperationFailed, status); + ASSERT_STRING_CONTAINS( + status.getStatus().reason(), + "because a local database 'existing' exists in another existingShard"); + }); + + BSONArrayBuilder hosts; + hosts.append("host1:12345"); + hosts.append("host2:12345"); + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "mySet" + << "hosts" + << hosts.arr() + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + expectListDatabases(shardTarget, {BSON("name" << existingDB.getName())}); + + future.timed_get(kLongFutureTimeout); +} + +TEST_F(AddShardTest, SuccessfullyAddReplicaSet) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + targeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + targeterFactory()->addTargeterToReturn(connString, std::move(targeter)); + + std::string expectedShardName = "mySet"; + + // The shard doc inserted into the config.shards collection on the config server. + ShardType expectedShard; + expectedShard.setName(expectedShardName); + expectedShard.setHost(connString.toString()); + expectedShard.setMaxSizeMB(100); + expectedShard.setState(ShardType::ShardState::kShardAware); + + DatabaseType discoveredDB("shardDB", ShardId(expectedShardName), false); + + auto future = launchAsync([this, &expectedShardName, &connString] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), nullptr, connString, 100)); + ASSERT_EQUALS(expectedShardName, shardName); + }); + + BSONArrayBuilder hosts; + hosts.append("host1:12345"); + hosts.append("host2:12345"); + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "mySet" + << "hosts" + << hosts.arr() + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + // Get databases list from new shard + expectListDatabases(shardTarget, std::vector<BSONObj>{BSON("name" << discoveredDB.getName())}); + + expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions")); + + // The shardIdentity doc inserted into the admin.system.version collection on the shard. + expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName); + + // The shard receives the setFeatureCompatibilityVersion command. + expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1)); + + // Wait for the addShard to complete before checking the config database + future.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was properly added to config.shards. + assertShardExists(expectedShard); + + // Ensure that the databases detected from the shard were properly added to config.database. + assertDatabaseExists(discoveredDB); + + assertChangeWasLogged(expectedShard); +} + +TEST_F(AddShardTest, ReplicaSetExtraHostsDiscovered) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString seedString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345")); + ConnectionString fullConnString = + assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345,host3:12345")); + targeter->setConnectionStringReturnValue(fullConnString); + HostAndPort shardTarget = seedString.getServers().front(); + targeter->setFindHostReturnValue(shardTarget); + targeterFactory()->addTargeterToReturn(seedString, std::move(targeter)); + + std::string expectedShardName = "mySet"; + + // The shard doc inserted into the config.shards collection on the config server. + ShardType expectedShard; + expectedShard.setName(expectedShardName); + expectedShard.setHost(fullConnString.toString()); + expectedShard.setMaxSizeMB(100); + expectedShard.setState(ShardType::ShardState::kShardAware); + + DatabaseType discoveredDB("shardDB", ShardId(expectedShardName), false); + + auto future = launchAsync([this, &expectedShardName, &seedString] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), nullptr, seedString, 100)); + ASSERT_EQUALS(expectedShardName, shardName); + }); + + BSONArrayBuilder hosts; + hosts.append("host1:12345"); + hosts.append("host2:12345"); + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName" + << "mySet" + << "hosts" + << hosts.arr() + << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + // Get databases list from new shard + expectListDatabases(shardTarget, std::vector<BSONObj>{BSON("name" << discoveredDB.getName())}); + + expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions")); + + // The shardIdentity doc inserted into the admin.system.version collection on the shard. + expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName); + + // The shard receives the setFeatureCompatibilityVersion command. + expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1)); + + // Wait for the addShard to complete before checking the config database + future.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was properly added to config.shards. + assertShardExists(expectedShard); + + // Ensure that the databases detected from the shard were properly added to config.database. + assertDatabaseExists(discoveredDB); + + // The changelog entry uses whatever connection string is passed to addShard, even if addShard + // discovered additional hosts. + expectedShard.setHost(seedString.toString()); + assertChangeWasLogged(expectedShard); +} + +TEST_F(AddShardTest, AddShardSucceedsEvenIfAddingDBsFromNewShardFails) { + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + HostAndPort shardTarget("StandaloneHost:12345"); + targeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + targeter->setFindHostReturnValue(shardTarget); + + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter)); + + + std::string expectedShardName = "StandaloneShard"; + + // The shard doc inserted into the config.shards collection on the config server. + ShardType expectedShard; + expectedShard.setName(expectedShardName); + expectedShard.setHost("StandaloneHost:12345"); + expectedShard.setMaxSizeMB(100); + expectedShard.setState(ShardType::ShardState::kShardAware); + + DatabaseType discoveredDB1("TestDB1", ShardId("StandaloneShard"), false); + DatabaseType discoveredDB2("TestDB2", ShardId("StandaloneShard"), false); + + // Enable fail point to cause all updates to fail. Since we add the databases detected from + // the shard being added with upserts, but we add the shard document itself via insert, this + // will allow the shard to be added but prevent the databases from brought into the cluster. + auto failPoint = getGlobalFailPointRegistry()->getFailPoint("failAllUpdates"); + ASSERT(failPoint); + failPoint->setMode(FailPoint::alwaysOn); + ON_BLOCK_EXIT([&] { failPoint->setMode(FailPoint::off); }); + + auto future = launchAsync([this, &expectedShardName, &shardTarget] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet( + ShardingCatalogManager::get(operationContext()) + ->addShard( + operationContext(), &expectedShardName, ConnectionString(shardTarget), 100)); + ASSERT_EQUALS(expectedShardName, shardName); + }); + + BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion" + << WireVersion::LATEST_WIRE_VERSION); + expectIsMaster(shardTarget, commandResponse); + + // Get databases list from new shard + expectListDatabases( + shardTarget, + std::vector<BSONObj>{BSON("name" + << "local" + << "sizeOnDisk" + << 1000), + BSON("name" << discoveredDB1.getName() << "sizeOnDisk" << 2000), + BSON("name" << discoveredDB2.getName() << "sizeOnDisk" << 5000)}); + + expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions")); + + // The shardIdentity doc inserted into the admin.system.version collection on the shard. + expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName); + + // The shard receives the setFeatureCompatibilityVersion command. + expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1)); + + // Wait for the addShard to complete before checking the config database + future.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was properly added to config.shards. + assertShardExists(expectedShard); + + // Ensure that the databases detected from the shard were *not* added. + ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, + catalogClient() + ->getDatabase(operationContext(), + discoveredDB1.getName(), + repl::ReadConcernLevel::kMajorityReadConcern) + .getStatus()); + ASSERT_EQUALS(ErrorCodes::NamespaceNotFound, + catalogClient() + ->getDatabase(operationContext(), + discoveredDB2.getName(), + repl::ReadConcernLevel::kMajorityReadConcern) + .getStatus()); + + assertChangeWasLogged(expectedShard); +} + +// Tests both that trying to add a shard with the same host as an existing shard but with different +// options fails, and that adding a shard with the same host as an existing shard with the *same* +// options succeeds. +TEST_F(AddShardTest, AddExistingShardStandalone) { + HostAndPort shardTarget("StandaloneHost:12345"); + std::unique_ptr<RemoteCommandTargeterMock> standaloneTargeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + standaloneTargeter->setConnectionStringReturnValue(ConnectionString(shardTarget)); + standaloneTargeter->setFindHostReturnValue(shardTarget); + targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), + std::move(standaloneTargeter)); + + std::unique_ptr<RemoteCommandTargeterMock> replsetTargeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + replsetTargeter->setConnectionStringReturnValue( + ConnectionString::forReplicaSet("mySet", {shardTarget})); + replsetTargeter->setFindHostReturnValue(shardTarget); + targeterFactory()->addTargeterToReturn(ConnectionString::forReplicaSet("mySet", {shardTarget}), + std::move(replsetTargeter)); + + std::string existingShardName = "myShard"; + ShardType existingShard; + existingShard.setName(existingShardName); + existingShard.setHost(shardTarget.toString()); + existingShard.setMaxSizeMB(100); + existingShard.setState(ShardType::ShardState::kShardAware); + + // Make sure the shard already exists. + ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(), + ShardType::ConfigNS, + existingShard.toBSON(), + ShardingCatalogClient::kMajorityWriteConcern)); + assertShardExists(existingShard); + + // Adding the same standalone host with a different shard name should fail. + std::string differentName = "anotherShardName"; + auto future1 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS(ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + &differentName, + ConnectionString(shardTarget), + existingShard.getMaxSizeMB())); + }); + future1.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding the same standalone host with a different maxSize should fail. + auto future2 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS(ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + nullptr, + ConnectionString(shardTarget), + existingShard.getMaxSizeMB() + 100)); + }); + future2.timed_get(kLongFutureTimeout); + + // Adding the same standalone host but as part of a replica set should fail. + // Ensures that even if the user changed the standalone shard to a single-node replica set, you + // can't change the sharded cluster's notion of the shard from standalone to replica set just + // by calling addShard. + auto future3 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS(ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + nullptr, + ConnectionString::forReplicaSet("mySet", {shardTarget}), + existingShard.getMaxSizeMB())); + }); + future3.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding the same standalone host with the same options should succeed. + auto future4 = launchAsync([&] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + &existingShardName, + ConnectionString(shardTarget), + existingShard.getMaxSizeMB())); + ASSERT_EQUALS(existingShardName, shardName); + }); + future4.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding the same standalone host with the same options (without explicitly specifying the + // shard name) should succeed. + auto future5 = launchAsync([&] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + nullptr, + ConnectionString(shardTarget), + existingShard.getMaxSizeMB())); + ASSERT_EQUALS(existingShardName, shardName); + }); + future5.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); +} + +// Tests both that trying to add a shard with the same replica set as an existing shard but with +// different options fails, and that adding a shard with the same replica set as an existing shard +// with the *same* options succeeds. +TEST_F(AddShardTest, AddExistingShardReplicaSet) { + std::unique_ptr<RemoteCommandTargeterMock> replsetTargeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + ConnectionString connString = assertGet(ConnectionString::parse("mySet/host1:12345")); + replsetTargeter->setConnectionStringReturnValue(connString); + HostAndPort shardTarget = connString.getServers().front(); + replsetTargeter->setFindHostReturnValue(shardTarget); + targeterFactory()->addTargeterToReturn(connString, std::move(replsetTargeter)); + + std::string existingShardName = "myShard"; + ShardType existingShard; + existingShard.setName(existingShardName); + existingShard.setHost(connString.toString()); + existingShard.setMaxSizeMB(100); + existingShard.setState(ShardType::ShardState::kShardAware); + + // Make sure the shard already exists. + ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(), + ShardType::ConfigNS, + existingShard.toBSON(), + ShardingCatalogClient::kMajorityWriteConcern)); + assertShardExists(existingShard); + + // Adding the same connection string with a different shard name should fail. + std::string differentName = "anotherShardName"; + auto future1 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS( + ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard( + operationContext(), &differentName, connString, existingShard.getMaxSizeMB())); + }); + future1.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding the same connection string with a different maxSize should fail. + auto future2 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS( + ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard( + operationContext(), nullptr, connString, existingShard.getMaxSizeMB() + 100)); + }); + future2.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding a connecting string with a host of an existing shard but using a different connection + // string type should fail. + // Ensures that even if the user changed the replica set shard to a standalone, you can't change + // the sharded cluster's notion of the shard from replica set to standalone just by calling + // addShard. + auto future3 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS(ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + nullptr, + ConnectionString(shardTarget), + existingShard.getMaxSizeMB())); + }); + future3.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding a connecting string with the same hosts but a different replica set name should fail. + // Ensures that even if you manually change the shard's replica set name somehow, you can't + // change the replica set name the sharded cluster knows for it just by calling addShard again. + std::string differentSetName = "differentSet"; + auto future4 = launchAsync([&] { + Client::initThreadIfNotAlready(); + ASSERT_EQUALS(ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + nullptr, + ConnectionString::forReplicaSet(differentSetName, + connString.getServers()), + existingShard.getMaxSizeMB())); + }); + future4.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding the same host with the same options should succeed. + auto future5 = launchAsync([&] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + &existingShardName, + connString, + existingShard.getMaxSizeMB())); + ASSERT_EQUALS(existingShardName, shardName); + }); + future5.timed_get(kLongFutureTimeout); + + // Adding the same host with the same options (without explicitly specifying the shard name) + // should succeed. + auto future6 = launchAsync([&] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet( + ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), nullptr, connString, existingShard.getMaxSizeMB())); + ASSERT_EQUALS(existingShardName, shardName); + }); + future6.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); + + // Adding the same replica set but different host membership (but otherwise the same options) + // should succeed + auto otherHost = connString.getServers().back(); + ConnectionString otherHostConnString = assertGet(ConnectionString::parse("mySet/host2:12345")); + { + // Add a targeter for the different seed string this addShard request will use. + std::unique_ptr<RemoteCommandTargeterMock> otherHostTargeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + otherHostTargeter->setConnectionStringReturnValue(otherHostConnString); + otherHostTargeter->setFindHostReturnValue(otherHost); + targeterFactory()->addTargeterToReturn(otherHostConnString, std::move(otherHostTargeter)); + } + auto future7 = launchAsync([&] { + Client::initThreadIfNotAlready(); + auto shardName = assertGet(ShardingCatalogManager::get(operationContext()) + ->addShard(operationContext(), + nullptr, + otherHostConnString, + existingShard.getMaxSizeMB())); + ASSERT_EQUALS(existingShardName, shardName); + }); + future7.timed_get(kLongFutureTimeout); + + // Ensure that the shard document was unchanged. + assertShardExists(existingShard); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp new file mode 100644 index 00000000000..c586f412ed1 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp @@ -0,0 +1,119 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/client/read_preference.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/config_server_test_fixture.h" + +namespace mongo { +namespace { + + +ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly); + +using AddShardToZoneTest = ConfigServerTestFixture; + +TEST_F(AddShardToZoneTest, AddSingleZoneToExistingShardShouldSucceed) { + ShardType shard; + shard.setName("a"); + shard.setHost("a:1234"); + + setupShards({shard}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->addShardToZone(operationContext(), shard.getName(), "z")); + auto shardDocStatus = getShardDoc(operationContext(), shard.getName()); + ASSERT_OK(shardDocStatus.getStatus()); + + auto shardDoc = shardDocStatus.getValue(); + auto tags = shardDoc.getTags(); + ASSERT_EQ(1u, tags.size()); + ASSERT_EQ("z", tags.front()); +} + +TEST_F(AddShardToZoneTest, AddZoneToShardWithSameTagShouldSucceed) { + ShardType shard; + shard.setName("a"); + shard.setHost("a:1234"); + shard.setTags({"x", "y"}); + + setupShards({shard}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->addShardToZone(operationContext(), shard.getName(), "x")); + + auto shardDocStatus = getShardDoc(operationContext(), shard.getName()); + ASSERT_OK(shardDocStatus.getStatus()); + + auto shardDoc = shardDocStatus.getValue(); + auto tags = shardDoc.getTags(); + ASSERT_EQ(2u, tags.size()); + ASSERT_EQ("x", tags.front()); + ASSERT_EQ("y", tags.back()); +} + +TEST_F(AddShardToZoneTest, AddZoneToShardWithNewTagShouldAppend) { + ShardType shard; + shard.setName("a"); + shard.setHost("a:1234"); + shard.setTags({"x"}); + + setupShards({shard}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->addShardToZone(operationContext(), shard.getName(), "y")); + + auto shardDocStatus = getShardDoc(operationContext(), shard.getName()); + ASSERT_OK(shardDocStatus.getStatus()); + + auto shardDoc = shardDocStatus.getValue(); + auto tags = shardDoc.getTags(); + ASSERT_EQ(2u, tags.size()); + ASSERT_EQ("x", tags.front()); + ASSERT_EQ("y", tags.back()); +} + +TEST_F(AddShardToZoneTest, AddSingleZoneToNonExistingShardShouldFail) { + ShardType shard; + shard.setName("a"); + shard.setHost("a:1234"); + + setupShards({shard}).transitional_ignore(); + + auto status = ShardingCatalogManager::get(operationContext()) + ->addShardToZone(operationContext(), "b", "z"); + ASSERT_EQ(ErrorCodes::ShardNotFound, status); +} + +} // unnamed namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp new file mode 100644 index 00000000000..89b64867b99 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp @@ -0,0 +1,725 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/bson/json.h" +#include "mongo/client/read_preference.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/config_server_test_fixture.h" + +namespace mongo { +namespace { + +using std::string; + +ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly); + +/** + * Basic fixture with a one shard with zone, and a sharded collection. + */ +class AssignKeyRangeToZoneTestFixture : public ConfigServerTestFixture { +public: + void setUp() override { + ConfigServerTestFixture::setUp(); + + ShardType shard; + shard.setName("a"); + shard.setHost("a:1234"); + shard.setTags({zoneName()}); + + setupShards({shard}).transitional_ignore(); + + CollectionType shardedCollection; + shardedCollection.setNs(shardedNS()); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + } + + /** + * Asserts that the config.tags collection is empty. + */ + void assertNoZoneDoc() { + auto findStatus = + findOneOnConfigCollection(operationContext(), TagsType::ConfigNS, BSONObj()); + ASSERT_EQ(ErrorCodes::NoMatchingDocument, findStatus); + } + + /** + * Asserts that this is the only tag that exists in config.tags. + */ + void assertOnlyZone(const NamespaceString& ns, + const ChunkRange& range, + const string& zoneName) { + auto findStatus = + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + kReadPref, + repl::ReadConcernLevel::kMajorityReadConcern, + TagsType::ConfigNS, + BSONObj(), + BSONObj(), + 1); + ASSERT_OK(findStatus.getStatus()); + + auto findResult = findStatus.getValue(); + ASSERT_EQ(1U, findResult.docs.size()); + + auto tagDocStatus = TagsType::fromBSON(findResult.docs.front()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(ns, tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(range.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(range.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName, tagDoc.getTag()); + } + + NamespaceString shardedNS() const { + return NamespaceString("test.foo"); + } + + string zoneName() const { + return "z"; + } +}; + +TEST_F(AssignKeyRangeToZoneTestFixture, BasicAssignKeyRange) { + const ChunkRange newRange(BSON("x" << 0), BSON("x" << 10)); + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), shardedNS(), newRange, zoneName())); + + assertOnlyZone(shardedNS(), newRange, zoneName()); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, AssignKeyRangeOnUnshardedCollShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + NamespaceString("unsharded.coll"), + ChunkRange(BSON("x" << 0), BSON("x" << 10)), + zoneName()); + ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, AssignKeyRangeOnDroppedShardedCollShouldFail) { + CollectionType unshardedCollection; + NamespaceString ns("unsharded.coll"); + unshardedCollection.setNs(ns); + unshardedCollection.setEpoch(OID::gen()); + unshardedCollection.setKeyPattern(BSON("x" << 1)); + unshardedCollection.setDropped(true); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, unshardedCollection.toBSON())); + + auto status = + ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone( + operationContext(), ns, ChunkRange(BSON("x" << 0), BSON("x" << 10)), zoneName()); + ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, AssignKeyRangeNonExistingZoneShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0), BSON("x" << 10)), + zoneName() + "y"); + ASSERT_EQ(ErrorCodes::ZoneNotFound, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MinWithInvalidShardKeyShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("a" << 0), BSON("x" << 10)), + zoneName()); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MaxWithInvalidShardKeyShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0), BSON("y" << 10)), + zoneName()); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MinThatIsAShardKeyPrefixShouldConvertToFullShardKey) { + NamespaceString ns("compound.shard"); + CollectionType shardedCollection; + shardedCollection.setNs(ns); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + + const ChunkRange newRange(BSON("x" << 0), BSON("x" << 10 << "y" << 10)); + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), ns, newRange, zoneName())); + + const ChunkRange fullRange(fromjson("{ x: 0, y: { $minKey: 1 }}"), + BSON("x" << 10 << "y" << 10)); + assertOnlyZone(ns, fullRange, zoneName()); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MaxThatIsAShardKeyPrefixShouldConvertToFullShardKey) { + NamespaceString ns("compound.shard"); + CollectionType shardedCollection; + shardedCollection.setNs(ns); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + + const ChunkRange newRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10)); + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), ns, newRange, zoneName())); + + const ChunkRange fullRange(BSON("x" << 0 << "y" << 0), fromjson("{ x: 10, y: { $minKey: 1 }}")); + assertOnlyZone(ns, fullRange, zoneName()); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MinThatIsNotAShardKeyPrefixShouldFail) { + auto status = + ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10)), + zoneName()); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MaxThatIsNotAShardKeyPrefixShouldFail) { + auto status = + ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0), BSON("x" << 10 << "y" << 10)), + zoneName()); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MinMaxThatIsNotAShardKeyPrefixShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone( + operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10 << "y" << 10)), + zoneName()); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeToZoneTestFixture, MinMaxThatIsAShardKeyPrefixShouldSucceed) { + NamespaceString ns("compound.shard"); + CollectionType shardedCollection; + shardedCollection.setNs(ns); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + + const ChunkRange newRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10 << "y" << 10)); + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), ns, newRange, zoneName())); + + assertOnlyZone(ns, newRange, zoneName()); +} + +/** + * Basic fixture with a one shard with zone, a sharded collection and a zoned key range. + */ +class AssignKeyRangeWithOneRangeFixture : public AssignKeyRangeToZoneTestFixture { +public: + void setUp() override { + AssignKeyRangeToZoneTestFixture::setUp(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone( + operationContext(), shardedNS(), getExistingRange(), zoneName())); + } + + ChunkRange getExistingRange() { + return ChunkRange(BSON("x" << 4), BSON("x" << 8)); + } +}; + + +/** + * new ZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewMaxAlignsWithExistingMinShouldSucceed) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 2), BSON("x" << 4)), + zoneName())); + + { + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << BSON("x" << 2))); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(BSON("x" << 2), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(BSON("x" << 4), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } + + { + const auto existingRange = getExistingRange(); + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } +} + +/** + * new ZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewMaxOverlappingExistingShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 3), BSON("x" << 5)), + zoneName()); + ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +/** + * new ZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeOverlappingInsideExistingShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 5), BSON("x" << 7)), + zoneName()); + ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +/** + * new ZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeOverlappingWithDifferentNSShouldSucceed) { + CollectionType shardedCollection; + shardedCollection.setNs(NamespaceString("other.coll")); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedCollection.getNs(), + ChunkRange(BSON("x" << 5), BSON("x" << 7)), + zoneName())); + + { + const auto existingRange = getExistingRange(); + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } + { + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << BSON("x" << 5))); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedCollection.getNs(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(BSON("x" << 5), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(BSON("x" << 7), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } +} + +/** + * new ZZZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeEquivalentToExistingOneShouldBeNoOp) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone( + operationContext(), shardedNS(), getExistingRange(), zoneName())); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +/** + * new YYYY + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, + NewRangeEquivalentToExistingOneWithDifferentZoneShouldFail) { + ShardType shard; + shard.setName("b"); + shard.setHost("b:1234"); + shard.setTags({"y"}); + + ASSERT_OK(insertToConfigCollection(operationContext(), ShardType::ConfigNS, shard.toBSON())); + + auto status = + ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), shardedNS(), getExistingRange(), "y"); + ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +/** + * new ZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewMinOverlappingExistingShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 7), BSON("x" << 9)), + zoneName()); + ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +/** + * new ZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewMinAlignsWithExistingMaxShouldSucceed) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 8), BSON("x" << 10)), + zoneName())); + + { + const auto existingRange = getExistingRange(); + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } + + { + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << BSON("x" << 8))); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(BSON("x" << 8), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(BSON("x" << 10), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } +} + +/** + * new ZZZZZZ + * existing ZZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeIsSuperSetOfExistingShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 3), BSON("x" << 9)), + zoneName()); + + ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +/** + * new ZZ + * existing ZZZZ + * existing ZZZ + * 0123456789 + */ +TEST_F(AssignKeyRangeWithOneRangeFixture, AssignWithExistingOveralpShouldFail) { + TagsType tagDoc; + tagDoc.setNS(shardedNS()); + tagDoc.setMinKey(BSON("x" << 0)); + tagDoc.setMaxKey(BSON("x" << 2)); + tagDoc.setTag("z"); + + ASSERT_OK(insertToConfigCollection(operationContext(), TagsType::ConfigNS, tagDoc.toBSON())); + + auto status = ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0), BSON("x" << 1)), + zoneName()); + + ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status); +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, BasicRemoveKeyRange) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone(operationContext(), shardedNS(), getExistingRange())); + + assertNoZoneDoc(); +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveKeyRangeOnUnshardedCollShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone(operationContext(), + NamespaceString("unsharded.coll"), + ChunkRange(BSON("x" << 0), BSON("x" << 10))); + ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveKeyRangeOnDroppedShardedCollShouldFail) { + CollectionType unshardedCollection; + NamespaceString ns("unsharded.coll"); + unshardedCollection.setNs(ns); + unshardedCollection.setEpoch(OID::gen()); + unshardedCollection.setKeyPattern(BSON("x" << 1)); + unshardedCollection.setDropped(true); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, unshardedCollection.toBSON())); + + auto status = ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone( + operationContext(), ns, ChunkRange(BSON("x" << 0), BSON("x" << 10))); + ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveWithInvalidMinShardKeyShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone(operationContext(), + shardedNS(), + ChunkRange(BSON("a" << 0), BSON("x" << 10))); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveWithInvalidMaxShardKeyShouldFail) { + auto status = ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone(operationContext(), + shardedNS(), + ChunkRange(BSON("x" << 0), BSON("y" << 10))); + ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status); + + assertOnlyZone(shardedNS(), getExistingRange(), zoneName()); +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveThatIsOnlyMinPrefixOfExistingShouldNotRemoveRange) { + NamespaceString ns("compound.shard"); + CollectionType shardedCollection; + shardedCollection.setNs(ns); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + + const ChunkRange existingRange(fromjson("{ x: 0, y: { $minKey: 1 }}"), + BSON("x" << 10 << "y" << 10)); + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), ns, existingRange, zoneName())); + + ASSERT_OK( + ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone( + operationContext(), ns, ChunkRange(BSON("x" << 0), BSON("x" << 10 << "y" << 10)))); + + { + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(ns, tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } + + { + const auto existingRange = getExistingRange(); + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } +} + +TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveThatIsOnlyMaxPrefixOfExistingShouldNotRemoveRange) { + NamespaceString ns("compound.shard"); + CollectionType shardedCollection; + shardedCollection.setNs(ns); + shardedCollection.setEpoch(OID::gen()); + shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1)); + + ASSERT_OK(insertToConfigCollection( + operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON())); + + const ChunkRange existingRange(BSON("x" << 0 << "y" << 0), + fromjson("{ x: 10, y: { $minKey: 1 }}")); + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->assignKeyRangeToZone(operationContext(), ns, existingRange, zoneName())); + + ASSERT_OK( + ShardingCatalogManager::get(operationContext()) + ->removeKeyRangeFromZone( + operationContext(), ns, ChunkRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10)))); + + { + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(ns, tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } + + { + const auto existingRange = getExistingRange(); + auto findStatus = findOneOnConfigCollection( + operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin())); + ASSERT_OK(findStatus); + + auto tagDocStatus = TagsType::fromBSON(findStatus.getValue()); + ASSERT_OK(tagDocStatus.getStatus()); + + auto tagDoc = tagDocStatus.getValue(); + ASSERT_EQ(shardedNS(), tagDoc.getNS()); + ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey()); + ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey()); + ASSERT_EQ(zoneName(), tagDoc.getTag()); + } +} + +} // unnamed namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp new file mode 100644 index 00000000000..dca5b5403e8 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp @@ -0,0 +1,669 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/db/s/config/sharding_catalog_manager.h" + +#include "mongo/base/status_with.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/bson/util/bson_extract.h" +#include "mongo/client/connection_string.h" +#include "mongo/client/read_preference.h" +#include "mongo/db/catalog/catalog_raii.h" +#include "mongo/db/dbdirectclient.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/operation_context.h" +#include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/grid.h" +#include "mongo/s/shard_key_pattern.h" +#include "mongo/util/fail_point_service.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" + +namespace mongo { +namespace { + +MONGO_FP_DECLARE(migrationCommitVersionError); + +/** + * Append min, max and version information from chunk to the buffer for logChange purposes. + */ +void appendShortVersion(BufBuilder* b, const ChunkType& chunk) { + BSONObjBuilder bb(*b); + bb.append(ChunkType::min(), chunk.getMin()); + bb.append(ChunkType::max(), chunk.getMax()); + if (chunk.isVersionSet()) + chunk.getVersion().addToBSON(bb, ChunkType::lastmod()); + bb.done(); +} + +BSONArray buildMergeChunksTransactionUpdates(const std::vector<ChunkType>& chunksToMerge, + const ChunkVersion& mergeVersion) { + BSONArrayBuilder updates; + + // Build an update operation to expand the first chunk into the newly merged chunk + { + BSONObjBuilder op; + op.append("op", "u"); + op.appendBool("b", false); // no upsert + op.append("ns", ChunkType::ConfigNS.ns()); + + // expand first chunk into newly merged chunk + ChunkType mergedChunk(chunksToMerge.front()); + mergedChunk.setMax(chunksToMerge.back().getMax()); + + // fill in additional details for sending through transaction + mergedChunk.setVersion(mergeVersion); + + // add the new chunk information as the update object + op.append("o", mergedChunk.toConfigBSON()); + + // query object + op.append("o2", BSON(ChunkType::name(mergedChunk.getName()))); + + updates.append(op.obj()); + } + + // Build update operations to delete the rest of the chunks to be merged. Remember not + // to delete the first chunk we're expanding + for (size_t i = 1; i < chunksToMerge.size(); ++i) { + BSONObjBuilder op; + op.append("op", "d"); + op.append("ns", ChunkType::ConfigNS.ns()); + + op.append("o", BSON(ChunkType::name(chunksToMerge[i].getName()))); + + updates.append(op.obj()); + } + + return updates.arr(); +} + +BSONArray buildMergeChunksTransactionPrecond(const std::vector<ChunkType>& chunksToMerge, + const ChunkVersion& collVersion) { + BSONArrayBuilder preCond; + + for (auto chunk : chunksToMerge) { + BSONObjBuilder b; + b.append("ns", ChunkType::ConfigNS.ns()); + b.append("q", + BSON("query" << BSON(ChunkType::ns(chunk.getNS().ns()) + << ChunkType::min(chunk.getMin()) + << ChunkType::max(chunk.getMax())) + << "orderby" + << BSON(ChunkType::lastmod() << -1))); + b.append("res", + BSON(ChunkType::epoch(collVersion.epoch()) + << ChunkType::shard(chunk.getShard().toString()))); + preCond.append(b.obj()); + } + return preCond.arr(); +} + +Status checkChunkIsOnShard(OperationContext* opCtx, + const NamespaceString& nss, + const BSONObj& min, + const BSONObj& max, + const ShardId& shard) { + BSONObj chunkQuery = + BSON(ChunkType::ns() << nss.ns() << ChunkType::min() << min << ChunkType::max() << max + << ChunkType::shard() + << shard); + + // Must use local read concern because we're going to perform subsequent writes. + auto findResponseWith = + Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + chunkQuery, + BSONObj(), + 1); + if (!findResponseWith.isOK()) { + return findResponseWith.getStatus(); + } + + if (findResponseWith.getValue().docs.empty()) { + return {ErrorCodes::Error(40165), + str::stream() + << "Could not find the chunk (" + << chunkQuery.toString() + << ") on the shard. Cannot execute the migration commit with invalid chunks."}; + } + + return Status::OK(); +} + +BSONObj makeCommitChunkTransactionCommand(const NamespaceString& nss, + const ChunkType& migratedChunk, + const boost::optional<ChunkType>& controlChunk, + StringData fromShard, + StringData toShard) { + + // Update migratedChunk's version and shard. + BSONArrayBuilder updates; + { + BSONObjBuilder op; + op.append("op", "u"); + op.appendBool("b", false); // No upserting + op.append("ns", ChunkType::ConfigNS.ns()); + + BSONObjBuilder n(op.subobjStart("o")); + n.append(ChunkType::name(), ChunkType::genID(nss, migratedChunk.getMin())); + migratedChunk.getVersion().addToBSON(n, ChunkType::lastmod()); + n.append(ChunkType::ns(), nss.ns()); + n.append(ChunkType::min(), migratedChunk.getMin()); + n.append(ChunkType::max(), migratedChunk.getMax()); + n.append(ChunkType::shard(), toShard); + n.done(); + + BSONObjBuilder q(op.subobjStart("o2")); + q.append(ChunkType::name(), ChunkType::genID(nss, migratedChunk.getMin())); + q.done(); + + updates.append(op.obj()); + } + + // If we have a controlChunk, update its chunk version. + if (controlChunk) { + BSONObjBuilder op; + op.append("op", "u"); + op.appendBool("b", false); + op.append("ns", ChunkType::ConfigNS.ns()); + + BSONObjBuilder n(op.subobjStart("o")); + n.append(ChunkType::name(), ChunkType::genID(nss, controlChunk->getMin())); + controlChunk->getVersion().addToBSON(n, ChunkType::lastmod()); + n.append(ChunkType::ns(), nss.ns()); + n.append(ChunkType::min(), controlChunk->getMin()); + n.append(ChunkType::max(), controlChunk->getMax()); + n.append(ChunkType::shard(), fromShard); + n.done(); + + BSONObjBuilder q(op.subobjStart("o2")); + q.append(ChunkType::name(), ChunkType::genID(nss, controlChunk->getMin())); + q.done(); + + updates.append(op.obj()); + } + + // Do not give doTxn a write concern. If doTxn tries to wait for replication, it will fail + // because of the GlobalWrite lock CommitChunkMigration already holds. Replication will not be + // able to take the lock it requires. + return BSON("doTxn" << updates.arr()); +} + +} // namespace + +Status ShardingCatalogManager::commitChunkSplit(OperationContext* opCtx, + const NamespaceString& nss, + const OID& requestEpoch, + const ChunkRange& range, + const std::vector<BSONObj>& splitPoints, + const std::string& shardName) { + // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and + // migrations + // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/ + // move chunks on different collections to proceed in parallel + Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock); + + std::string errmsg; + + // Get the max chunk version for this namespace. + auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON("ns" << nss.ns()), + BSON(ChunkType::lastmod << -1), + 1); + + if (!findStatus.isOK()) { + return findStatus.getStatus(); + } + + const auto& chunksVector = findStatus.getValue().docs; + if (chunksVector.empty()) { + errmsg = str::stream() << "splitChunk cannot split chunk " << range.toString() + << ". Collection '" << nss.ns() + << "' no longer either exists, is sharded, or has chunks"; + return {ErrorCodes::IllegalOperation, errmsg}; + } + + ChunkVersion collVersion = ChunkVersion::fromBSON(chunksVector.front(), ChunkType::lastmod()); + + // Return an error if collection epoch does not match epoch of request. + if (collVersion.epoch() != requestEpoch) { + errmsg = str::stream() << "splitChunk cannot split chunk " << range.toString() + << ". Collection '" << nss.ns() << "' was dropped and re-created." + << " Current epoch: " << collVersion.epoch() + << ", cmd epoch: " << requestEpoch; + return {ErrorCodes::StaleEpoch, errmsg}; + } + + std::vector<ChunkType> newChunks; + + ChunkVersion currentMaxVersion = collVersion; + + auto startKey = range.getMin(); + auto newChunkBounds(splitPoints); + newChunkBounds.push_back(range.getMax()); + + BSONArrayBuilder updates; + + for (const auto& endKey : newChunkBounds) { + // Verify the split points are all within the chunk + if (endKey.woCompare(range.getMax()) != 0 && !range.containsKey(endKey)) { + return {ErrorCodes::InvalidOptions, + str::stream() << "Split key " << endKey << " not contained within chunk " + << range.toString()}; + } + + // Verify the split points came in increasing order + if (endKey.woCompare(startKey) < 0) { + return { + ErrorCodes::InvalidOptions, + str::stream() << "Split keys must be specified in strictly increasing order. Key " + << endKey + << " was specified after " + << startKey + << "."}; + } + + // Verify that splitPoints are not repeated + if (endKey.woCompare(startKey) == 0) { + return {ErrorCodes::InvalidOptions, + str::stream() << "Split on lower bound of chunk " + << ChunkRange(startKey, endKey).toString() + << "is not allowed"}; + } + + // verify that splits don't create too-big shard keys + Status shardKeyStatus = ShardKeyPattern::checkShardKeySize(endKey); + if (!shardKeyStatus.isOK()) { + return shardKeyStatus; + } + + // splits only update the 'minor' portion of version + currentMaxVersion.incMinor(); + + // build an update operation against the chunks collection of the config database + // with upsert true + BSONObjBuilder op; + op.append("op", "u"); + op.appendBool("b", true); + op.append("ns", ChunkType::ConfigNS.ns()); + + // add the modified (new) chunk information as the update object + BSONObjBuilder n(op.subobjStart("o")); + n.append(ChunkType::name(), ChunkType::genID(nss, startKey)); + currentMaxVersion.addToBSON(n, ChunkType::lastmod()); + n.append(ChunkType::ns(), nss.ns()); + n.append(ChunkType::min(), startKey); + n.append(ChunkType::max(), endKey); + n.append(ChunkType::shard(), shardName); + n.done(); + + // add the chunk's _id as the query part of the update statement + BSONObjBuilder q(op.subobjStart("o2")); + q.append(ChunkType::name(), ChunkType::genID(nss, startKey)); + q.done(); + + updates.append(op.obj()); + + // remember this chunk info for logging later + ChunkType chunk; + chunk.setMin(startKey); + chunk.setMax(endKey); + chunk.setVersion(currentMaxVersion); + + newChunks.push_back(std::move(chunk)); + + startKey = endKey; + } + + BSONArrayBuilder preCond; + { + BSONObjBuilder b; + b.append("ns", ChunkType::ConfigNS.ns()); + b.append("q", + BSON("query" << BSON(ChunkType::ns(nss.ns()) << ChunkType::min() << range.getMin() + << ChunkType::max() + << range.getMax()) + << "orderby" + << BSON(ChunkType::lastmod() << -1))); + { + BSONObjBuilder bb(b.subobjStart("res")); + bb.append(ChunkType::epoch(), requestEpoch); + bb.append(ChunkType::shard(), shardName); + } + preCond.append(b.obj()); + } + + // apply the batch of updates to local metadata. + Status doTxnStatus = Grid::get(opCtx)->catalogClient()->applyChunkOpsDeprecated( + opCtx, + updates.arr(), + preCond.arr(), + nss, + currentMaxVersion, + WriteConcernOptions(), + repl::ReadConcernLevel::kLocalReadConcern); + if (!doTxnStatus.isOK()) { + return doTxnStatus; + } + + // log changes + BSONObjBuilder logDetail; + { + BSONObjBuilder b(logDetail.subobjStart("before")); + b.append(ChunkType::min(), range.getMin()); + b.append(ChunkType::max(), range.getMax()); + collVersion.addToBSON(b, ChunkType::lastmod()); + } + + if (newChunks.size() == 2) { + appendShortVersion(&logDetail.subobjStart("left"), newChunks[0]); + appendShortVersion(&logDetail.subobjStart("right"), newChunks[1]); + + Grid::get(opCtx) + ->catalogClient() + ->logChange(opCtx, "split", nss.ns(), logDetail.obj(), WriteConcernOptions()) + .transitional_ignore(); + } else { + BSONObj beforeDetailObj = logDetail.obj(); + BSONObj firstDetailObj = beforeDetailObj.getOwned(); + const int newChunksSize = newChunks.size(); + + for (int i = 0; i < newChunksSize; i++) { + BSONObjBuilder chunkDetail; + chunkDetail.appendElements(beforeDetailObj); + chunkDetail.append("number", i + 1); + chunkDetail.append("of", newChunksSize); + appendShortVersion(&chunkDetail.subobjStart("chunk"), newChunks[i]); + + Grid::get(opCtx) + ->catalogClient() + ->logChange( + opCtx, "multi-split", nss.ns(), chunkDetail.obj(), WriteConcernOptions()) + .transitional_ignore(); + } + } + + return doTxnStatus; +} + +Status ShardingCatalogManager::commitChunkMerge(OperationContext* opCtx, + const NamespaceString& nss, + const OID& requestEpoch, + const std::vector<BSONObj>& chunkBoundaries, + const std::string& shardName) { + // This method must never be called with empty chunks to merge + invariant(!chunkBoundaries.empty()); + + // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and + // migrations + // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/ + // move chunks on different collections to proceed in parallel + Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock); + + // Get the chunk with the highest version for this namespace + auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON("ns" << nss.ns()), + BSON(ChunkType::lastmod << -1), + 1); + + if (!findStatus.isOK()) { + return findStatus.getStatus(); + } + + const auto& chunksVector = findStatus.getValue().docs; + if (chunksVector.empty()) + return {ErrorCodes::IllegalOperation, + "collection does not exist, isn't sharded, or has no chunks"}; + + ChunkVersion collVersion = ChunkVersion::fromBSON(chunksVector.front(), ChunkType::lastmod()); + + // Return an error if epoch of chunk does not match epoch of request + if (collVersion.epoch() != requestEpoch) { + return {ErrorCodes::StaleEpoch, + "epoch of chunk does not match epoch of request. This most likely means " + "that the collection was dropped and re-created."}; + } + + // Build chunks to be merged + std::vector<ChunkType> chunksToMerge; + + ChunkType itChunk; + itChunk.setMax(chunkBoundaries.front()); + itChunk.setNS(nss); + itChunk.setShard(shardName); + + // Do not use the first chunk boundary as a max bound while building chunks + for (size_t i = 1; i < chunkBoundaries.size(); ++i) { + itChunk.setMin(itChunk.getMax()); + + // Ensure the chunk boundaries are strictly increasing + if (chunkBoundaries[i].woCompare(itChunk.getMin()) <= 0) { + return { + ErrorCodes::InvalidOptions, + str::stream() + << "Chunk boundaries must be specified in strictly increasing order. Boundary " + << chunkBoundaries[i] + << " was specified after " + << itChunk.getMin() + << "."}; + } + + itChunk.setMax(chunkBoundaries[i]); + chunksToMerge.push_back(itChunk); + } + + ChunkVersion mergeVersion = collVersion; + mergeVersion.incMinor(); + + auto updates = buildMergeChunksTransactionUpdates(chunksToMerge, mergeVersion); + auto preCond = buildMergeChunksTransactionPrecond(chunksToMerge, collVersion); + + // apply the batch of updates to local metadata + Status doTxnStatus = Grid::get(opCtx)->catalogClient()->applyChunkOpsDeprecated( + opCtx, + updates, + preCond, + nss, + mergeVersion, + WriteConcernOptions(), + repl::ReadConcernLevel::kLocalReadConcern); + if (!doTxnStatus.isOK()) { + return doTxnStatus; + } + + // log changes + BSONObjBuilder logDetail; + { + BSONArrayBuilder b(logDetail.subarrayStart("merged")); + for (auto chunkToMerge : chunksToMerge) { + b.append(chunkToMerge.toConfigBSON()); + } + } + collVersion.addToBSON(logDetail, "prevShardVersion"); + mergeVersion.addToBSON(logDetail, "mergedVersion"); + + Grid::get(opCtx) + ->catalogClient() + ->logChange(opCtx, "merge", nss.ns(), logDetail.obj(), WriteConcernOptions()) + .transitional_ignore(); + + return doTxnStatus; +} + +StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration( + OperationContext* opCtx, + const NamespaceString& nss, + const ChunkType& migratedChunk, + const boost::optional<ChunkType>& controlChunk, + const OID& collectionEpoch, + const ShardId& fromShard, + const ShardId& toShard) { + + auto const configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + + // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and + // migrations. + // + // ConfigSvrCommitChunkMigration commands must be run serially because the new ChunkVersions + // for migrated chunks are generated within the command and must be committed to the database + // before another chunk commit generates new ChunkVersions in the same manner. + // + // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/ + // move chunks on different collections to proceed in parallel. + // (Note: This is not needed while we have a global lock, taken here only for consistency.) + Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock); + + // Must use local read concern because we will perform subsequent writes. + auto findResponse = + configShard->exhaustiveFindOnConfig(opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON("ns" << nss.ns()), + BSON(ChunkType::lastmod << -1), + 1); + if (!findResponse.isOK()) { + return findResponse.getStatus(); + } + + if (MONGO_FAIL_POINT(migrationCommitVersionError)) { + uassert(ErrorCodes::StaleEpoch, + "failpoint 'migrationCommitVersionError' generated error", + false); + } + + const auto chunksVector = std::move(findResponse.getValue().docs); + if (chunksVector.empty()) { + return {ErrorCodes::IncompatibleShardingMetadata, + str::stream() << "Tried to find max chunk version for collection '" << nss.ns() + << ", but found no chunks"}; + } + + const auto swChunk = ChunkType::fromConfigBSON(chunksVector.front()); + if (!swChunk.isOK()) { + return swChunk.getStatus(); + } + + const auto currentCollectionVersion = swChunk.getValue().getVersion(); + + // It is possible for a migration to end up running partly without the protection of the + // distributed lock if the config primary stepped down since the start of the migration and + // failed to recover the migration. Check that the collection has not been dropped and recreated + // since the migration began, unbeknown to the shard when the command was sent. + if (currentCollectionVersion.epoch() != collectionEpoch) { + return {ErrorCodes::StaleEpoch, + str::stream() << "The collection '" << nss.ns() + << "' has been dropped and recreated since the migration began." + " The config server's collection version epoch is now '" + << currentCollectionVersion.epoch().toString() + << "', but the shard's is " + << collectionEpoch.toString() + << "'. Aborting migration commit for chunk (" + << migratedChunk.getRange().toString() + << ")."}; + } + + // Check that migratedChunk and controlChunk are where they should be, on fromShard. + + auto migratedOnShard = + checkChunkIsOnShard(opCtx, nss, migratedChunk.getMin(), migratedChunk.getMax(), fromShard); + if (!migratedOnShard.isOK()) { + return migratedOnShard; + } + + if (controlChunk) { + auto controlOnShard = checkChunkIsOnShard( + opCtx, nss, controlChunk->getMin(), controlChunk->getMax(), fromShard); + if (!controlOnShard.isOK()) { + return controlOnShard; + } + } + + // Generate the new versions of migratedChunk and controlChunk. Migrating chunk's minor version + // will be 0. + ChunkType newMigratedChunk = migratedChunk; + newMigratedChunk.setVersion(ChunkVersion( + currentCollectionVersion.majorVersion() + 1, 0, currentCollectionVersion.epoch())); + + // Control chunk's minor version will be 1 (if control chunk is present). + boost::optional<ChunkType> newControlChunk = boost::none; + if (controlChunk) { + newControlChunk = controlChunk.get(); + newControlChunk->setVersion(ChunkVersion( + currentCollectionVersion.majorVersion() + 1, 1, currentCollectionVersion.epoch())); + } + + auto command = makeCommitChunkTransactionCommand( + nss, newMigratedChunk, newControlChunk, fromShard.toString(), toShard.toString()); + + StatusWith<Shard::CommandResponse> doTxnCommandResponse = + configShard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + nss.db().toString(), + command, + Shard::RetryPolicy::kIdempotent); + + if (!doTxnCommandResponse.isOK()) { + return doTxnCommandResponse.getStatus(); + } + + if (!doTxnCommandResponse.getValue().commandStatus.isOK()) { + return doTxnCommandResponse.getValue().commandStatus; + } + + BSONObjBuilder result; + newMigratedChunk.getVersion().appendWithFieldForCommands(&result, "migratedChunkVersion"); + if (controlChunk) { + newControlChunk->getVersion().appendWithFieldForCommands(&result, "controlChunkVersion"); + } + + return result.obj(); +} + +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp new file mode 100644 index 00000000000..874b87a8dc5 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp @@ -0,0 +1,605 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/db/s/config/sharding_catalog_manager.h" + +#include <iomanip> +#include <set> + +#include "mongo/base/status_with.h" +#include "mongo/bson/util/bson_extract.h" +#include "mongo/client/connection_string.h" +#include "mongo/client/read_preference.h" +#include "mongo/client/remote_command_targeter.h" +#include "mongo/client/replica_set_monitor.h" +#include "mongo/db/client.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/query/collation/collator_factory_interface.h" +#include "mongo/db/repl/repl_client_info.h" +#include "mongo/executor/network_interface.h" +#include "mongo/executor/task_executor.h" +#include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/s/balancer_configuration.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/sharding_catalog_client_impl.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/grid.h" +#include "mongo/s/request_types/set_shard_version_request.h" +#include "mongo/s/shard_key_pattern.h" +#include "mongo/s/shard_util.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { + +using CollectionUUID = UUID; +using std::string; +using std::vector; +using std::set; + +namespace { + +const Seconds kDefaultFindHostMaxWaitTime(20); + +const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{}); +const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)); + +void checkForExistingChunks(OperationContext* opCtx, const NamespaceString& nss) { + BSONObjBuilder countBuilder; + countBuilder.append("count", ChunkType::ConfigNS.coll()); + countBuilder.append("query", BSON(ChunkType::ns(nss.ns()))); + + // OK to use limit=1, since if any chunks exist, we will fail. + countBuilder.append("limit", 1); + + // Use readConcern local to guarantee we see any chunks that have been written and may + // become committed; readConcern majority will not see the chunks if they have not made it + // to the majority snapshot. + repl::ReadConcernArgs readConcern(repl::ReadConcernLevel::kLocalReadConcern); + readConcern.appendInfo(&countBuilder); + + auto cmdResponse = uassertStatusOK( + Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts( + opCtx, + kConfigReadSelector, + ChunkType::ConfigNS.db().toString(), + countBuilder.done(), + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kIdempotent)); + uassertStatusOK(cmdResponse.commandStatus); + + long long numChunks; + uassertStatusOK(bsonExtractIntegerField(cmdResponse.response, "n", &numChunks)); + uassert(ErrorCodes::ManualInterventionRequired, + str::stream() << "A previous attempt to shard collection " << nss.ns() + << " failed after writing some initial chunks to config.chunks. Please " + "manually delete the partially written chunks for collection " + << nss.ns() + << " from config.chunks", + numChunks == 0); +} + +} // namespace + +/** + * Creates and writes to the config server the first chunks for a newly sharded collection. Returns + * the version generated for the collection. + */ +ChunkVersion ShardingCatalogManager::_createFirstChunks(OperationContext* opCtx, + const NamespaceString& nss, + const ShardKeyPattern& shardKeyPattern, + const ShardId& primaryShardId, + const std::vector<BSONObj>& initPoints, + const bool distributeInitialChunks) { + + const KeyPattern keyPattern = shardKeyPattern.getKeyPattern(); + + vector<BSONObj> splitPoints; + vector<ShardId> shardIds; + + std::string primaryShardName = primaryShardId.toString(); + auto drainingCount = uassertStatusOK(_runCountCommandOnConfig( + opCtx, + NamespaceString(ShardType::ConfigNS), + BSON(ShardType::name() << primaryShardName << ShardType::draining(true)))); + + const bool primaryDraining = (drainingCount > 0); + auto getPrimaryOrFirstNonDrainingShard = + [&opCtx, primaryShardId, &shardIds, primaryDraining]() { + if (primaryDraining) { + vector<ShardId> allShardIds; + Grid::get(opCtx)->shardRegistry()->getAllShardIds(&allShardIds); + + auto dbShardId = allShardIds[0]; + if (allShardIds[0] == primaryShardId && allShardIds.size() > 1) { + dbShardId = allShardIds[1]; + } + + return dbShardId; + } else { + return primaryShardId; + } + }; + + if (initPoints.empty()) { + // If no split points were specified use the shard's data distribution to determine them + auto primaryShard = + uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, primaryShardId)); + + auto result = uassertStatusOK(primaryShard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryPreferred}, + nss.db().toString(), + BSON("count" << nss.coll()), + Shard::RetryPolicy::kIdempotent)); + + long long numObjects = 0; + uassertStatusOK(result.commandStatus); + uassertStatusOK(bsonExtractIntegerField(result.response, "n", &numObjects)); + + // Refresh the balancer settings to ensure the chunk size setting, which is sent as part of + // the splitVector command and affects the number of chunks returned, has been loaded. + uassertStatusOK(Grid::get(opCtx)->getBalancerConfiguration()->refreshAndCheck(opCtx)); + + if (numObjects > 0) { + splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints( + opCtx, + primaryShardId, + nss, + shardKeyPattern, + ChunkRange(keyPattern.globalMin(), keyPattern.globalMax()), + Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes(), + 0)); + } + + // If docs already exist for the collection, must use primary shard, + // otherwise defer to passed-in distribution option. + if (numObjects == 0 && distributeInitialChunks) { + Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds); + if (primaryDraining && shardIds.size() > 1) { + shardIds.erase(std::remove(shardIds.begin(), shardIds.end(), primaryShardId), + shardIds.end()); + } + } else { + shardIds.push_back(getPrimaryOrFirstNonDrainingShard()); + } + } else { + // Make sure points are unique and ordered + auto orderedPts = SimpleBSONObjComparator::kInstance.makeBSONObjSet(); + + for (const auto& initPoint : initPoints) { + orderedPts.insert(initPoint); + } + + for (const auto& initPoint : orderedPts) { + splitPoints.push_back(initPoint); + } + + if (distributeInitialChunks) { + Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds); + if (primaryDraining) { + shardIds.erase(std::remove(shardIds.begin(), shardIds.end(), primaryShardId), + shardIds.end()); + } + } else { + shardIds.push_back(getPrimaryOrFirstNonDrainingShard()); + } + } + + // This is the first chunk; start the versioning from scratch + const OID epoch = OID::gen(); + ChunkVersion version(1, 0, epoch); + + log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << nss + << " using new epoch " << version.epoch(); + + for (unsigned i = 0; i <= splitPoints.size(); i++) { + const BSONObj min = (i == 0) ? keyPattern.globalMin() : splitPoints[i - 1]; + const BSONObj max = (i < splitPoints.size()) ? splitPoints[i] : keyPattern.globalMax(); + + // The correct version must be returned as part of this call so only increment for versions, + // which get written + if (i > 0) { + version.incMinor(); + } + + ChunkType chunk; + chunk.setNS(nss); + chunk.setMin(min); + chunk.setMax(max); + chunk.setShard(shardIds[i % shardIds.size()]); + chunk.setVersion(version); + + uassertStatusOK(Grid::get(opCtx)->catalogClient()->insertConfigDocument( + opCtx, + ChunkType::ConfigNS, + chunk.toConfigBSON(), + ShardingCatalogClient::kMajorityWriteConcern)); + } + + return version; +} + +Status ShardingCatalogManager::dropCollection(OperationContext* opCtx, const NamespaceString& nss) { + const auto catalogClient = Grid::get(opCtx)->catalogClient(); + catalogClient + ->logChange(opCtx, + "dropCollection.start", + nss.ns(), + BSONObj(), + ShardingCatalogClientImpl::kMajorityWriteConcern) + .ignore(); + + auto shardsStatus = + catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kLocalReadConcern); + if (!shardsStatus.isOK()) { + return shardsStatus.getStatus(); + } + vector<ShardType> allShards = std::move(shardsStatus.getValue().value); + + LOG(1) << "dropCollection " << nss.ns() << " started"; + + const auto dropCommandBSON = [opCtx, &nss] { + BSONObjBuilder builder; + builder.append("drop", nss.coll()); + + if (!opCtx->getWriteConcern().usedDefault) { + builder.append(WriteConcernOptions::kWriteConcernField, + opCtx->getWriteConcern().toBSON()); + } + + return builder.obj(); + }(); + + std::map<std::string, BSONObj> errors; + auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); + + for (const auto& shardEntry : allShards) { + auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); + if (!swShard.isOK()) { + return swShard.getStatus(); + } + + const auto& shard = swShard.getValue(); + + auto swDropResult = shard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + nss.db().toString(), + dropCommandBSON, + Shard::RetryPolicy::kIdempotent); + + if (!swDropResult.isOK()) { + return swDropResult.getStatus().withContext( + str::stream() << "Error dropping collection on shard " << shardEntry.getName()); + } + + auto& dropResult = swDropResult.getValue(); + + auto dropStatus = std::move(dropResult.commandStatus); + auto wcStatus = std::move(dropResult.writeConcernStatus); + if (!dropStatus.isOK() || !wcStatus.isOK()) { + if (dropStatus.code() == ErrorCodes::NamespaceNotFound && wcStatus.isOK()) { + // Generally getting NamespaceNotFound is okay to ignore as it simply means that + // the collection has already been dropped or doesn't exist on this shard. + // If, however, we get NamespaceNotFound but also have a write concern error then we + // can't confirm whether the fact that the namespace doesn't exist is actually + // committed. Thus we must still fail on NamespaceNotFound if there is also a write + // concern error. This can happen if we call drop, it succeeds but with a write + // concern error, then we retry the drop. + continue; + } + + errors.emplace(shardEntry.getHost(), std::move(dropResult.response)); + } + } + + if (!errors.empty()) { + StringBuilder sb; + sb << "Dropping collection failed on the following hosts: "; + + for (auto it = errors.cbegin(); it != errors.cend(); ++it) { + if (it != errors.cbegin()) { + sb << ", "; + } + + sb << it->first << ": " << it->second; + } + + return {ErrorCodes::OperationFailed, sb.str()}; + } + + LOG(1) << "dropCollection " << nss.ns() << " shard data deleted"; + + // Remove chunk data + Status result = + catalogClient->removeConfigDocuments(opCtx, + ChunkType::ConfigNS, + BSON(ChunkType::ns(nss.ns())), + ShardingCatalogClient::kMajorityWriteConcern); + if (!result.isOK()) { + return result; + } + + LOG(1) << "dropCollection " << nss.ns() << " chunk data deleted"; + + // Mark the collection as dropped + CollectionType coll; + coll.setNs(nss); + coll.setDropped(true); + coll.setEpoch(ChunkVersion::DROPPED().epoch()); + coll.setUpdatedAt(Grid::get(opCtx)->getNetwork()->now()); + + const bool upsert = false; + result = ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( + opCtx, nss, coll, upsert); + if (!result.isOK()) { + return result; + } + + LOG(1) << "dropCollection " << nss.ns() << " collection marked as dropped"; + + for (const auto& shardEntry : allShards) { + auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName()); + if (!swShard.isOK()) { + return swShard.getStatus(); + } + + const auto& shard = swShard.getValue(); + + SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( + shardRegistry->getConfigServerConnectionString(), + shardEntry.getName(), + fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())), + nss, + ChunkVersion::DROPPED(), + true); + + auto ssvResult = shard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + "admin", + ssv.toBSON(), + Shard::RetryPolicy::kIdempotent); + + if (!ssvResult.isOK()) { + return ssvResult.getStatus(); + } + + auto ssvStatus = std::move(ssvResult.getValue().commandStatus); + if (!ssvStatus.isOK()) { + return ssvStatus; + } + + auto unsetShardingStatus = shard->runCommandWithFixedRetryAttempts( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + "admin", + BSON("unsetSharding" << 1), + Shard::RetryPolicy::kIdempotent); + + if (!unsetShardingStatus.isOK()) { + return unsetShardingStatus.getStatus(); + } + + auto unsetShardingResult = std::move(unsetShardingStatus.getValue().commandStatus); + if (!unsetShardingResult.isOK()) { + return unsetShardingResult; + } + } + + LOG(1) << "dropCollection " << nss.ns() << " completed"; + + catalogClient + ->logChange(opCtx, + "dropCollection", + nss.ns(), + BSONObj(), + ShardingCatalogClientImpl::kMajorityWriteConcern) + .ignore(); + + return Status::OK(); +} + +void ShardingCatalogManager::shardCollection(OperationContext* opCtx, + const NamespaceString& nss, + const boost::optional<UUID> uuid, + const ShardKeyPattern& fieldsAndOrder, + const BSONObj& defaultCollation, + bool unique, + const vector<BSONObj>& initPoints, + const bool distributeInitialChunks, + const ShardId& dbPrimaryShardId) { + const auto catalogClient = Grid::get(opCtx)->catalogClient(); + const auto shardRegistry = Grid::get(opCtx)->shardRegistry(); + + const auto primaryShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); + + // Fail if there are partially written chunks from a previous failed shardCollection. + checkForExistingChunks(opCtx, nss); + + // Record start in changelog + { + BSONObjBuilder collectionDetail; + collectionDetail.append("shardKey", fieldsAndOrder.toBSON()); + collectionDetail.append("collection", nss.ns()); + if (uuid) { + uuid->appendToBuilder(&collectionDetail, "uuid"); + } + collectionDetail.append("primary", primaryShard->toString()); + collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1)); + catalogClient + ->logChange(opCtx, + "shardCollection.start", + nss.ns(), + collectionDetail.obj(), + ShardingCatalogClient::kMajorityWriteConcern) + .transitional_ignore(); + } + + // const NamespaceString nss(ns); + + // Construct the collection default collator. + std::unique_ptr<CollatorInterface> defaultCollator; + if (!defaultCollation.isEmpty()) { + defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext()) + ->makeFromBSON(defaultCollation)); + } + + const auto& collVersion = _createFirstChunks( + opCtx, nss, fieldsAndOrder, dbPrimaryShardId, initPoints, distributeInitialChunks); + + { + CollectionType coll; + coll.setNs(nss); + if (uuid) { + coll.setUUID(*uuid); + } + coll.setEpoch(collVersion.epoch()); + + // TODO(schwerin): The following isn't really a date, but is stored as one in-memory and in + // config.collections, as a historical oddity. + coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(collVersion.toLong())); + coll.setKeyPattern(fieldsAndOrder.toBSON()); + coll.setDefaultCollation(defaultCollator ? defaultCollator->getSpec().toBSON() : BSONObj()); + coll.setUnique(unique); + + uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( + opCtx, nss, coll, true /*upsert*/)); + } + + auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId)); + invariant(!shard->isConfig()); + + // Tell the primary mongod to refresh its data + // TODO: Think the real fix here is for mongos to just + // assume that all collections are sharded, when we get there + SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist( + shardRegistry->getConfigServerConnectionString(), + dbPrimaryShardId, + primaryShard->getConnString(), + nss, + collVersion, + true); + + auto ssvResponse = + shard->runCommandWithFixedRetryAttempts(opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + "admin", + ssv.toBSON(), + Shard::RetryPolicy::kIdempotent); + auto status = ssvResponse.isOK() ? std::move(ssvResponse.getValue().commandStatus) + : std::move(ssvResponse.getStatus()); + if (!status.isOK()) { + warning() << "could not update initial version of " << nss.ns() << " on shard primary " + << dbPrimaryShardId << causedBy(redact(status)); + } + + catalogClient + ->logChange(opCtx, + "shardCollection.end", + nss.ns(), + BSON("version" << collVersion.toString()), + ShardingCatalogClient::kMajorityWriteConcern) + .transitional_ignore(); +} + +void ShardingCatalogManager::generateUUIDsForExistingShardedCollections(OperationContext* opCtx) { + // Retrieve all collections in config.collections that do not have a UUID. Some collections + // may already have a UUID if an earlier upgrade attempt failed after making some progress. + auto shardedColls = + uassertStatusOK( + Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + CollectionType::ConfigNS, + BSON(CollectionType::uuid.name() << BSON("$exists" << false) << "dropped" << false), + BSONObj(), // sort + boost::none // limit + )) + .docs; + + if (shardedColls.empty()) { + LOG(0) << "all sharded collections already have UUIDs"; + + // We did a local read of the collections collection above and found that all sharded + // collections already have UUIDs. However, the data may not be majority committed (a + // previous setFCV attempt may have failed with a write concern error). Since the current + // Client doesn't know the opTime of the last write to the collections collection, make it + // wait for the last opTime in the system when we wait for writeConcern. + repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); + return; + } + + // Generate and persist a new UUID for each collection that did not have a UUID. + LOG(0) << "generating UUIDs for " << shardedColls.size() + << " sharded collections that do not yet have a UUID"; + for (auto& coll : shardedColls) { + auto collType = uassertStatusOK(CollectionType::fromBSON(coll)); + invariant(!collType.getUUID()); + + auto uuid = CollectionUUID::gen(); + collType.setUUID(uuid); + + uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection( + opCtx, collType.getNs(), collType, false /* upsert */)); + LOG(2) << "updated entry in config.collections for sharded collection " << collType.getNs() + << " with generated UUID " << uuid; + } +} + +std::vector<NamespaceString> ShardingCatalogManager::getAllShardedCollectionsForDb( + OperationContext* opCtx, StringData dbName) { + const auto dbNameStr = dbName.toString(); + + const std::vector<CollectionType> collectionsOnConfig = + uassertStatusOK(Grid::get(opCtx)->catalogClient()->getCollections( + opCtx, &dbNameStr, nullptr, repl::ReadConcernLevel::kLocalReadConcern)); + + std::vector<NamespaceString> collectionsToReturn; + for (const auto& coll : collectionsOnConfig) { + if (coll.getDropped()) + continue; + + collectionsToReturn.push_back(coll.getNs()); + } + + return collectionsToReturn; +} + +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp new file mode 100644 index 00000000000..76340441d2c --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp @@ -0,0 +1,373 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/bson/bsonobj.h" +#include "mongo/bson/bsonobjbuilder.h" +#include "mongo/client/read_preference.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/config_server_test_fixture.h" + +namespace mongo { +namespace { + +using CommitChunkMigrate = ConfigServerTestFixture; + +const NamespaceString kNamespace("TestDB.TestColl"); + +TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandWithCtl) { + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost("shard0:12"); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("shard1:12"); + + setupShards({shard0, shard1}).transitional_ignore(); + + int origMajorVersion = 12; + auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen()); + + ChunkType chunk0; + chunk0.setNS(kNamespace); + chunk0.setVersion(origVersion); + chunk0.setShard(shard0.getName()); + + // apportion + auto chunkMin = BSON("a" << 1); + chunk0.setMin(chunkMin); + auto chunkMax = BSON("a" << 10); + chunk0.setMax(chunkMax); + + ChunkType chunk1; + chunk1.setNS(kNamespace); + chunk1.setVersion(origVersion); + chunk1.setShard(shard0.getName()); + + chunk1.setMin(chunkMax); + auto chunkMaxax = BSON("a" << 20); + chunk1.setMax(chunkMaxax); + + setupChunks({chunk0, chunk1}).transitional_ignore(); + + // use crefs to verify it will take consts: + ChunkType const& chunk0cref = chunk0; + ChunkType const& chunk1cref = chunk1; + + StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext()) + ->commitChunkMigration(operationContext(), + chunk0.getNS(), + chunk0cref, + chunk1cref, + origVersion.epoch(), + ShardId(shard0.getName()), + ShardId(shard1.getName())); + + ASSERT_OK(resultBSON.getStatus()); + + // Verify the versions returned match expected values. + BSONObj versions = resultBSON.getValue(); + auto mver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "migratedChunkVersion"); + ASSERT_OK(mver.getStatus()); + ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 0, origVersion.epoch()), mver.getValue()); + + auto cver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "controlChunkVersion"); + ASSERT_OK(cver.getStatus()); + ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 1, origVersion.epoch()), cver.getValue()); + + // Verify the chunks ended up in the right shards, and versions match the values returned. + auto chunkDoc0 = uassertStatusOK(getChunkDoc(operationContext(), chunkMin)); + ASSERT_EQ("shard1", chunkDoc0.getShard().toString()); + ASSERT_EQ(mver.getValue(), chunkDoc0.getVersion()); + + auto chunkDoc1 = uassertStatusOK(getChunkDoc(operationContext(), chunkMax)); + ASSERT_EQ("shard0", chunkDoc1.getShard().toString()); + ASSERT_EQ(cver.getValue(), chunkDoc1.getVersion()); +} + +TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandNoCtl) { + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost("shard0:12"); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("shard1:12"); + + setupShards({shard0, shard1}).transitional_ignore(); + + int origMajorVersion = 15; + auto const origVersion = ChunkVersion(origMajorVersion, 4, OID::gen()); + + ChunkType chunk0; + chunk0.setNS(kNamespace); + chunk0.setVersion(origVersion); + chunk0.setShard(shard0.getName()); + + // apportion + auto chunkMin = BSON("a" << 1); + chunk0.setMin(chunkMin); + auto chunkMax = BSON("a" << 10); + chunk0.setMax(chunkMax); + + setupChunks({chunk0}).transitional_ignore(); + + StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext()) + ->commitChunkMigration(operationContext(), + chunk0.getNS(), + chunk0, + boost::none, + origVersion.epoch(), + ShardId(shard0.getName()), + ShardId(shard1.getName())); + + ASSERT_OK(resultBSON.getStatus()); + + // Verify the version returned matches expected value. + BSONObj versions = resultBSON.getValue(); + auto mver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "migratedChunkVersion"); + ASSERT_OK(mver.getStatus()); + ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 0, origVersion.epoch()), mver.getValue()); + + auto cver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "controlChunkVersion"); + ASSERT_NOT_OK(cver.getStatus()); + + // Verify the chunk ended up in the right shard, and version matches the value returned. + auto chunkDoc0 = uassertStatusOK(getChunkDoc(operationContext(), chunkMin)); + ASSERT_EQ("shard1", chunkDoc0.getShard().toString()); + ASSERT_EQ(mver.getValue(), chunkDoc0.getVersion()); +} + +TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch0) { + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost("shard0:12"); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("shard1:12"); + + setupShards({shard0, shard1}).transitional_ignore(); + + int origMajorVersion = 12; + auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen()); + + ChunkType chunk0; + chunk0.setNS(kNamespace); + chunk0.setVersion(origVersion); + chunk0.setShard(shard0.getName()); + + // apportion + auto chunkMin = BSON("a" << 1); + chunk0.setMin(chunkMin); + auto chunkMax = BSON("a" << 10); + chunk0.setMax(chunkMax); + + ChunkType chunk1; + chunk1.setNS(kNamespace); + chunk1.setVersion(origVersion); + chunk1.setShard(shard0.getName()); + + chunk1.setMin(chunkMax); + auto chunkMaxax = BSON("a" << 20); + chunk1.setMax(chunkMaxax); + + setupChunks({chunk0, chunk1}).transitional_ignore(); + + StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext()) + ->commitChunkMigration(operationContext(), + chunk0.getNS(), + chunk0, + chunk1, + OID::gen(), + ShardId(shard0.getName()), + ShardId(shard1.getName())); + + ASSERT_EQ(ErrorCodes::StaleEpoch, resultBSON.getStatus()); +} + +TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch1) { + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost("shard0:12"); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("shard1:12"); + + setupShards({shard0, shard1}).transitional_ignore(); + + int origMajorVersion = 12; + auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen()); + auto const otherVersion = ChunkVersion(origMajorVersion, 7, OID::gen()); + + ChunkType chunk0; + chunk0.setNS(kNamespace); + chunk0.setVersion(origVersion); + chunk0.setShard(shard0.getName()); + + // apportion + auto chunkMin = BSON("a" << 1); + chunk0.setMin(chunkMin); + auto chunkMax = BSON("a" << 10); + chunk0.setMax(chunkMax); + + ChunkType chunk1; + chunk1.setNS(kNamespace); + chunk1.setVersion(otherVersion); + chunk1.setShard(shard0.getName()); + + chunk1.setMin(chunkMax); + auto chunkMaxax = BSON("a" << 20); + chunk1.setMax(chunkMaxax); + + // get version from the control chunk this time + setupChunks({chunk1, chunk0}).transitional_ignore(); + + StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext()) + ->commitChunkMigration(operationContext(), + chunk0.getNS(), + chunk0, + chunk1, + origVersion.epoch(), + ShardId(shard0.getName()), + ShardId(shard1.getName())); + + ASSERT_EQ(ErrorCodes::StaleEpoch, resultBSON.getStatus()); +} + +TEST_F(CommitChunkMigrate, RejectChunkMissing0) { + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost("shard0:12"); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("shard1:12"); + + setupShards({shard0, shard1}).transitional_ignore(); + + int origMajorVersion = 12; + auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen()); + + ChunkType chunk0; + chunk0.setNS(kNamespace); + chunk0.setVersion(origVersion); + chunk0.setShard(shard0.getName()); + + // apportion + auto chunkMin = BSON("a" << 1); + chunk0.setMin(chunkMin); + auto chunkMax = BSON("a" << 10); + chunk0.setMax(chunkMax); + + ChunkType chunk1; + chunk1.setNS(kNamespace); + chunk1.setVersion(origVersion); + chunk1.setShard(shard0.getName()); + + chunk1.setMin(chunkMax); + auto chunkMaxax = BSON("a" << 20); + chunk1.setMax(chunkMaxax); + + setupChunks({chunk1}).transitional_ignore(); + + StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext()) + ->commitChunkMigration(operationContext(), + chunk0.getNS(), + chunk0, + chunk1, + origVersion.epoch(), + ShardId(shard0.getName()), + ShardId(shard1.getName())); + + ASSERT_EQ(40165, resultBSON.getStatus().code()); +} + +TEST_F(CommitChunkMigrate, RejectChunkMissing1) { + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost("shard0:12"); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("shard1:12"); + + setupShards({shard0, shard1}).transitional_ignore(); + + int origMajorVersion = 12; + auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen()); + + ChunkType chunk0; + chunk0.setNS(kNamespace); + chunk0.setVersion(origVersion); + chunk0.setShard(shard0.getName()); + + // apportion + auto chunkMin = BSON("a" << 1); + chunk0.setMin(chunkMin); + auto chunkMax = BSON("a" << 10); + chunk0.setMax(chunkMax); + + ChunkType chunk1; + chunk1.setNS(kNamespace); + chunk1.setVersion(origVersion); + chunk1.setShard(shard0.getName()); + + chunk1.setMin(chunkMax); + auto chunkMaxax = BSON("a" << 20); + chunk1.setMax(chunkMaxax); + + setupChunks({chunk0}).transitional_ignore(); + + StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext()) + ->commitChunkMigration(operationContext(), + chunk0.getNS(), + chunk0, + chunk1, + origVersion.epoch(), + ShardId(shard0.getName()), + ShardId(shard1.getName())); + + ASSERT_EQ(40165, resultBSON.getStatus().code()); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp new file mode 100644 index 00000000000..151d1a888a7 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp @@ -0,0 +1,386 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include <string> +#include <vector> + +#include "mongo/bson/json.h" +#include "mongo/db/catalog/catalog_raii.h" +#include "mongo/db/concurrency/write_conflict_exception.h" +#include "mongo/db/curop.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/repl/replication_coordinator_mock.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/config_server_version.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_config_version.h" +#include "mongo/s/catalog/type_lockpings.h" +#include "mongo/s/catalog/type_locks.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { +namespace { + +using std::string; +using std::vector; +using unittest::assertGet; + +/** + * Takes two arrays of BSON objects and asserts that they contain the same documents + */ +void assertBSONObjsSame(const std::vector<BSONObj>& expectedBSON, + const std::vector<BSONObj>& foundBSON) { + ASSERT_EQUALS(expectedBSON.size(), foundBSON.size()); + + for (const auto& expectedObj : expectedBSON) { + bool wasFound = false; + for (const auto& foundObj : foundBSON) { + if (expectedObj.woCompare(foundObj) == 0) { + wasFound = true; + break; + } + } + ASSERT_TRUE(wasFound); + } +} + +using ConfigInitializationTest = ConfigServerTestFixture; + +TEST_F(ConfigInitializationTest, UpgradeNotNeeded) { + VersionType version; + version.setClusterId(OID::gen()); + version.setCurrentVersion(CURRENT_CONFIG_VERSION); + version.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION); + ASSERT_OK( + insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON())); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto versionDoc = + assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc)); + + ASSERT_EQUALS(version.getClusterId(), foundVersion.getClusterId()); + ASSERT_EQUALS(version.getCurrentVersion(), foundVersion.getCurrentVersion()); + ASSERT_EQUALS(version.getMinCompatibleVersion(), foundVersion.getMinCompatibleVersion()); +} + +TEST_F(ConfigInitializationTest, InitIncompatibleVersion) { + VersionType version; + version.setClusterId(OID::gen()); + version.setCurrentVersion(MIN_COMPATIBLE_CONFIG_VERSION - 1); + version.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION - 2); + ASSERT_OK( + insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON())); + + ASSERT_EQ(ErrorCodes::IncompatibleShardingConfigVersion, + ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto versionDoc = + assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc)); + + ASSERT_EQUALS(version.getClusterId(), foundVersion.getClusterId()); + ASSERT_EQUALS(version.getCurrentVersion(), foundVersion.getCurrentVersion()); + ASSERT_EQUALS(version.getMinCompatibleVersion(), foundVersion.getMinCompatibleVersion()); +} + +TEST_F(ConfigInitializationTest, InitClusterMultipleVersionDocs) { + VersionType version; + version.setClusterId(OID::gen()); + version.setCurrentVersion(MIN_COMPATIBLE_CONFIG_VERSION - 2); + version.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION - 3); + ASSERT_OK( + insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON())); + + ASSERT_OK(insertToConfigCollection(operationContext(), + VersionType::ConfigNS, + BSON("_id" + << "a second document"))); + + ASSERT_EQ(ErrorCodes::TooManyMatchingDocuments, + ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); +} + +TEST_F(ConfigInitializationTest, InitInvalidConfigVersionDoc) { + BSONObj versionDoc(fromjson(R"({ + _id: 1, + minCompatibleVersion: "should be numeric", + currentVersion: 7, + clusterId: ObjectId("55919cc6dbe86ce7ac056427") + })")); + ASSERT_OK(insertToConfigCollection(operationContext(), VersionType::ConfigNS, versionDoc)); + + ASSERT_EQ(ErrorCodes::TypeMismatch, + ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); +} + + +TEST_F(ConfigInitializationTest, InitNoVersionDocEmptyConfig) { + // Make sure there is no existing document + ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, + findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto versionDoc = + assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc)); + + ASSERT_TRUE(foundVersion.getClusterId().isSet()); + ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion()); + ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion()); +} + +TEST_F(ConfigInitializationTest, InitVersionTooHigh) { + VersionType version; + version.setClusterId(OID::gen()); + version.setCurrentVersion(10000); + version.setMinCompatibleVersion(10000); + ASSERT_OK( + insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON())); + + ASSERT_EQ(ErrorCodes::IncompatibleShardingConfigVersion, + ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); +} + +TEST_F(ConfigInitializationTest, OnlyRunsOnce) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto versionDoc = + assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc)); + + ASSERT_TRUE(foundVersion.getClusterId().isSet()); + ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion()); + ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion()); + + ASSERT_EQUALS(ErrorCodes::AlreadyInitialized, + ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); +} + +TEST_F(ConfigInitializationTest, ReRunsIfDocRolledBackThenReElected) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto versionDoc = + assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc)); + + ASSERT_TRUE(foundVersion.getClusterId().isSet()); + ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion()); + ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion()); + + // Now remove the version document and re-run initializeConfigDatabaseIfNeeded(). + { + // Mirror what happens if the config.version document is rolled back. + ON_BLOCK_EXIT([&] { + replicationCoordinator()->setFollowerMode(repl::MemberState::RS_PRIMARY).ignore(); + }); + ASSERT_OK(replicationCoordinator()->setFollowerMode(repl::MemberState::RS_ROLLBACK)); + auto opCtx = operationContext(); + repl::UnreplicatedWritesBlock uwb(opCtx); + auto nss = VersionType::ConfigNS; + writeConflictRetry(opCtx, "removeConfigDocuments", nss.ns(), [&] { + AutoGetCollection autoColl(opCtx, nss, MODE_IX); + auto coll = autoColl.getCollection(); + ASSERT_TRUE(coll); + auto cursor = coll->getCursor(opCtx); + std::vector<RecordId> recordIds; + while (auto recordId = cursor->next()) { + recordIds.push_back(recordId->id); + } + mongo::WriteUnitOfWork wuow(opCtx); + for (auto recordId : recordIds) { + coll->deleteDocument(opCtx, kUninitializedStmtId, recordId, nullptr); + } + wuow.commit(); + ASSERT_EQUALS(0UL, coll->numRecords(opCtx)); + }); + } + + // Verify the document was actually removed. + ASSERT_EQUALS(ErrorCodes::NoMatchingDocument, + findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + // Re-create the config.version document. + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto newVersionDoc = + assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj())); + + VersionType newFoundVersion = assertGet(VersionType::fromBSON(newVersionDoc)); + + ASSERT_TRUE(newFoundVersion.getClusterId().isSet()); + ASSERT_NOT_EQUALS(newFoundVersion.getClusterId(), foundVersion.getClusterId()); + ASSERT_EQUALS(CURRENT_CONFIG_VERSION, newFoundVersion.getCurrentVersion()); + ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, newFoundVersion.getMinCompatibleVersion()); +} + +TEST_F(ConfigInitializationTest, BuildsNecessaryIndexes) { + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto expectedChunksIndexes = std::vector<BSONObj>{ + BSON("v" << 2 << "key" << BSON("_id" << 1) << "name" + << "_id_" + << "ns" + << "config.chunks"), + BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "min" << 1) << "name" + << "ns_1_min_1" + << "ns" + << "config.chunks"), + BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "shard" << 1 << "min" << 1) + << "name" + << "ns_1_shard_1_min_1" + << "ns" + << "config.chunks"), + BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "lastmod" << 1) << "name" + << "ns_1_lastmod_1" + << "ns" + << "config.chunks")}; + auto expectedLockpingsIndexes = + std::vector<BSONObj>{BSON("v" << 2 << "key" << BSON("_id" << 1) << "name" + << "_id_" + << "ns" + << "config.lockpings"), + BSON("v" << 2 << "key" << BSON("ping" << 1) << "name" + << "ping_1" + << "ns" + << "config.lockpings")}; + auto expectedLocksIndexes = std::vector<BSONObj>{ + BSON("v" << 2 << "key" << BSON("_id" << 1) << "name" + << "_id_" + << "ns" + << "config.locks"), + BSON("v" << 2 << "key" << BSON("ts" << 1) << "name" + << "ts_1" + << "ns" + << "config.locks"), + BSON("v" << 2 << "key" << BSON("state" << 1 << "process" << 1) << "name" + << "state_1_process_1" + << "ns" + << "config.locks")}; + auto expectedShardsIndexes = std::vector<BSONObj>{ + BSON("v" << 2 << "key" << BSON("_id" << 1) << "name" + << "_id_" + << "ns" + << "config.shards"), + BSON("v" << 2 << "unique" << true << "key" << BSON("host" << 1) << "name" + << "host_1" + << "ns" + << "config.shards")}; + auto expectedTagsIndexes = std::vector<BSONObj>{ + BSON("v" << 2 << "key" << BSON("_id" << 1) << "name" + << "_id_" + << "ns" + << "config.tags"), + BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "min" << 1) << "name" + << "ns_1_min_1" + << "ns" + << "config.tags"), + BSON("v" << 2 << "key" << BSON("ns" << 1 << "tag" << 1) << "name" + << "ns_1_tag_1" + << "ns" + << "config.tags")}; + + auto foundChunksIndexes = assertGet(getIndexes(operationContext(), ChunkType::ConfigNS)); + assertBSONObjsSame(expectedChunksIndexes, foundChunksIndexes); + + auto foundLockpingsIndexes = assertGet(getIndexes(operationContext(), LockpingsType::ConfigNS)); + assertBSONObjsSame(expectedLockpingsIndexes, foundLockpingsIndexes); + + auto foundLocksIndexes = assertGet(getIndexes(operationContext(), LocksType::ConfigNS)); + assertBSONObjsSame(expectedLocksIndexes, foundLocksIndexes); + + auto foundShardsIndexes = assertGet(getIndexes(operationContext(), ShardType::ConfigNS)); + assertBSONObjsSame(expectedShardsIndexes, foundShardsIndexes); + + auto foundTagsIndexes = assertGet(getIndexes(operationContext(), TagsType::ConfigNS)); + assertBSONObjsSame(expectedTagsIndexes, foundTagsIndexes); +} + +TEST_F(ConfigInitializationTest, CompatibleIndexAlreadyExists) { + getConfigShard() + ->createIndexOnConfig(operationContext(), ShardType::ConfigNS, BSON("host" << 1), true) + .transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto expectedShardsIndexes = std::vector<BSONObj>{ + BSON("v" << 2 << "key" << BSON("_id" << 1) << "name" + << "_id_" + << "ns" + << "config.shards"), + BSON("v" << 2 << "unique" << true << "key" << BSON("host" << 1) << "name" + << "host_1" + << "ns" + << "config.shards")}; + + + auto foundShardsIndexes = assertGet(getIndexes(operationContext(), ShardType::ConfigNS)); + assertBSONObjsSame(expectedShardsIndexes, foundShardsIndexes); +} + +TEST_F(ConfigInitializationTest, IncompatibleIndexAlreadyExists) { + // Make the index non-unique even though its supposed to be unique, make sure initialization + // fails + getConfigShard() + ->createIndexOnConfig(operationContext(), ShardType::ConfigNS, BSON("host" << 1), false) + .transitional_ignore(); + + ASSERT_EQUALS(ErrorCodes::IndexOptionsConflict, + ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); +} + +} // unnamed namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp new file mode 100644 index 00000000000..9e4bdb14ffe --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp @@ -0,0 +1,195 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include <pcrecpp.h> + +#include "mongo/bson/json.h" +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/commands.h" +#include "mongo/db/query/query_request.h" +#include "mongo/db/repl/read_concern_args.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/executor/task_executor.h" +#include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/rpc/metadata/repl_set_metadata.h" +#include "mongo/rpc/metadata/tracking_metadata.h" +#include "mongo/s/catalog/dist_lock_catalog_impl.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_locks.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/chunk_version.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/stdx/future.h" +#include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" +#include "mongo/util/time_support.h" + +namespace mongo { +namespace { + +using executor::RemoteCommandRequest; +using std::vector; + +using CreateDatabaseTest = ConfigServerTestFixture; + +TEST_F(CreateDatabaseTest, createDatabaseSuccess) { + const std::string dbname = "db1"; + + ShardType s0; + s0.setName("shard0000"); + s0.setHost("ShardHost0:27017"); + ASSERT_OK(setupShards(vector<ShardType>{s0})); + + ShardType s1; + s1.setName("shard0001"); + s1.setHost("ShardHost1:27017"); + ASSERT_OK(setupShards(vector<ShardType>{s1})); + + ShardType s2; + s2.setName("shard0002"); + s2.setHost("ShardHost2:27017"); + ASSERT_OK(setupShards(vector<ShardType>{s2})); + + // Prime the shard registry with information about the existing shards + shardRegistry()->reload(operationContext()); + + // Set up all the target mocks return values. + RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), s0.getName()))->getTargeter()) + ->setFindHostReturnValue(HostAndPort(s0.getHost())); + RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), s1.getName()))->getTargeter()) + ->setFindHostReturnValue(HostAndPort(s1.getHost())); + RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), s2.getName()))->getTargeter()) + ->setFindHostReturnValue(HostAndPort(s2.getHost())); + + // Now actually start the createDatabase work. + + auto future = launchAsync([this, dbname] { + ON_BLOCK_EXIT([&] { Client::destroy(); }); + Client::initThreadIfNotAlready("Test"); + auto opCtx = cc().makeOperationContext(); + ShardingCatalogManager::get(opCtx.get())->createDatabase(opCtx.get(), dbname); + }); + + // Return size information about first shard + onCommand([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(s0.getHost(), request.target.toString()); + ASSERT_EQUALS("admin", request.dbname); + std::string cmdName = request.cmdObj.firstElement().fieldName(); + ASSERT_EQUALS("listDatabases", cmdName); + ASSERT_FALSE(request.cmdObj.hasField(repl::ReadConcernArgs::kReadConcernFieldName)); + + ASSERT_BSONOBJ_EQ( + ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 1 << "totalSize" << 10); + }); + + // Return size information about second shard + onCommand([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(s1.getHost(), request.target.toString()); + ASSERT_EQUALS("admin", request.dbname); + std::string cmdName = request.cmdObj.firstElement().fieldName(); + ASSERT_EQUALS("listDatabases", cmdName); + ASSERT_FALSE(request.cmdObj.hasField(repl::ReadConcernArgs::kReadConcernFieldName)); + + ASSERT_BSONOBJ_EQ( + ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 1 << "totalSize" << 1); + }); + + // Return size information about third shard + onCommand([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(s2.getHost(), request.target.toString()); + ASSERT_EQUALS("admin", request.dbname); + std::string cmdName = request.cmdObj.firstElement().fieldName(); + ASSERT_EQUALS("listDatabases", cmdName); + + ASSERT_BSONOBJ_EQ( + ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 1 << "totalSize" << 100); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(CreateDatabaseTest, createDatabaseDBExists) { + const std::string dbname = "db3"; + + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + setupDatabase(dbname, shard.getName(), false); + + ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname); +} + +TEST_F(CreateDatabaseTest, createDatabaseDBExistsDifferentCase) { + const std::string dbname = "db4"; + const std::string dbnameDiffCase = "Db4"; + + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + setupDatabase(dbnameDiffCase, shard.getName(), false); + + ASSERT_THROWS_CODE( + ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname), + AssertionException, + ErrorCodes::DatabaseDifferCase); +} + +TEST_F(CreateDatabaseTest, createDatabaseNoShards) { + const std::string dbname = "db5"; + ASSERT_THROWS_CODE( + ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname), + AssertionException, + ErrorCodes::ShardNotFound); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp new file mode 100644 index 00000000000..d84ea03282f --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp @@ -0,0 +1,166 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/db/s/config/sharding_catalog_manager.h" + +#include <pcrecpp.h> + +#include "mongo/bson/util/bson_extract.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/repl/repl_client_info.h" +#include "mongo/s/catalog/sharding_catalog_client_impl.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/grid.h" +#include "mongo/util/log.h" + +namespace mongo { + +using std::string; +using std::vector; + +namespace { + +const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{}); + +} // namespace + +DatabaseType ShardingCatalogManager::createDatabase(OperationContext* opCtx, + const std::string& dbName) { + invariant(nsIsDbOnly(dbName)); + + // The admin and config databases should never be explicitly created. They "just exist", + // i.e. getDatabase will always return an entry for them. + if (dbName == "admin" || dbName == "config") { + uasserted(ErrorCodes::InvalidOptions, + str::stream() << "cannot manually create database '" << dbName << "'"); + } + + // Check if a database already exists with the same name (case sensitive), and if so, return the + // existing entry. + + BSONObjBuilder queryBuilder; + queryBuilder.appendRegex( + DatabaseType::name(), (string) "^" + pcrecpp::RE::QuoteMeta(dbName) + "$", "i"); + + auto docs = uassertStatusOK(Grid::get(opCtx)->catalogClient()->_exhaustiveFindOnConfig( + opCtx, + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + DatabaseType::ConfigNS, + queryBuilder.obj(), + BSONObj(), + 1)) + .value; + + if (!docs.empty()) { + BSONObj dbObj = docs.front(); + std::string actualDbName = dbObj[DatabaseType::name()].String(); + + uassert(ErrorCodes::DatabaseDifferCase, + str::stream() << "can't have 2 databases that just differ on case " + << " have: " + << actualDbName + << " want to add: " + << dbName, + actualDbName == dbName); + + // We did a local read of the database entry above and found that the database already + // exists. However, the data may not be majority committed (a previous createDatabase + // attempt may have failed with a writeConcern error). + // Since the current Client doesn't know the opTime of the last write to the database entry, + // make it wait for the last opTime in the system when we wait for writeConcern. + repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); + return uassertStatusOK(DatabaseType::fromBSON(dbObj)); + } + + // The database does not exist. Pick a primary shard to place it on. + auto const primaryShardId = + uassertStatusOK(_selectShardForNewDatabase(opCtx, Grid::get(opCtx)->shardRegistry())); + log() << "Placing [" << dbName << "] on: " << primaryShardId; + + // Insert an entry for the new database into the sharding catalog. + DatabaseType db(dbName, primaryShardId, false); + uassertStatusOK(Grid::get(opCtx)->catalogClient()->insertConfigDocument( + opCtx, DatabaseType::ConfigNS, db.toBSON(), ShardingCatalogClient::kMajorityWriteConcern)); + + return db; +} + +void ShardingCatalogManager::enableSharding(OperationContext* opCtx, const std::string& dbName) { + invariant(nsIsDbOnly(dbName)); + + uassert(ErrorCodes::IllegalOperation, + str::stream() << "Enabling sharding on the admin database is not allowed", + dbName != NamespaceString::kAdminDb); + + // Sharding is enabled automatically on the config db. + if (dbName == NamespaceString::kConfigDb) { + return; + } + + // Creates the database if it doesn't exist and returns the new database entry, else returns the + // existing database entry. + auto dbType = createDatabase(opCtx, dbName); + dbType.setSharded(true); + + log() << "Enabling sharding for database [" << dbName << "] in config db"; + uassertStatusOK(Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbType)); +} + +StatusWith<std::vector<std::string>> ShardingCatalogManager::getDatabasesForShard( + OperationContext* opCtx, const ShardId& shardId) { + auto findStatus = Grid::get(opCtx)->catalogClient()->_exhaustiveFindOnConfig( + opCtx, + kConfigReadSelector, + repl::ReadConcernLevel::kLocalReadConcern, + DatabaseType::ConfigNS, + BSON(DatabaseType::primary(shardId.toString())), + BSONObj(), + boost::none); // no limit + + if (!findStatus.isOK()) + return findStatus.getStatus(); + + std::vector<std::string> dbs; + for (const BSONObj& obj : findStatus.getValue().value) { + std::string dbName; + Status status = bsonExtractStringField(obj, DatabaseType::name(), &dbName); + if (!status.isOK()) { + return status; + } + + dbs.push_back(dbName); + } + + return dbs; +} + +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp new file mode 100644 index 00000000000..1e767b6c0f1 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp @@ -0,0 +1,477 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/rpc/metadata/tracking_metadata.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/chunk_version.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { +namespace { + +using executor::RemoteCommandRequest; +using executor::RemoteCommandResponse; +using std::string; +using std::vector; +using unittest::assertGet; + +class DropColl2ShardTest : public ConfigServerTestFixture { +public: + void setUp() override { + ConfigServerTestFixture::setUp(); + + _shard1.setName("shard0001"); + _shard1.setHost("s:1"); + + _shard2.setName("shard0002"); + _shard2.setHost("s:2"); + + ASSERT_OK(setupShards({_shard1, _shard2})); + + auto shard1Targeter = RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), _shard1.getName())) + ->getTargeter()); + shard1Targeter->setFindHostReturnValue(HostAndPort(_shard1.getHost())); + + auto shard2Targeter = RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), _shard2.getName())) + ->getTargeter()); + shard2Targeter->setFindHostReturnValue(HostAndPort(_shard2.getHost())); + } + + void expectDrop(const ShardType& shard) { + onCommand([this, shard](const RemoteCommandRequest& request) { + ASSERT_EQ(HostAndPort(shard.getHost()), request.target); + ASSERT_EQ(_dropNS.db(), request.dbname); + ASSERT_BSONOBJ_EQ(BSON("drop" << _dropNS.coll() << "writeConcern" + << BSON("w" << 0 << "wtimeout" << 0)), + request.cmdObj); + + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ns" << _dropNS.ns() << "ok" << 1); + }); + } + + void expectSetShardVersionZero(const ShardType& shard) { + expectSetShardVersion( + HostAndPort(shard.getHost()), shard, dropNS(), ChunkVersion::DROPPED()); + } + + void expectUnsetSharding(const ShardType& shard) { + onCommand([shard](const RemoteCommandRequest& request) { + ASSERT_EQ(HostAndPort(shard.getHost()), request.target); + ASSERT_EQ("admin", request.dbname); + ASSERT_BSONOBJ_EQ(BSON("unsetSharding" << 1), request.cmdObj); + + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("n" << 1 << "ok" << 1); + }); + } + + void shutdownExecutor() { + ConfigServerTestFixture::executor()->shutdown(); + } + + Status doDrop() { + ON_BLOCK_EXIT([&] { Client::destroy(); }); + Client::initThreadIfNotAlready("Test"); + auto opCtx = cc().makeOperationContext(); + return ShardingCatalogManager::get(opCtx.get())->dropCollection(opCtx.get(), dropNS()); + } + + const NamespaceString& dropNS() const { + return _dropNS; + } + + const ShardType& shard1() const { + return _shard1; + } + + const ShardType& shard2() const { + return _shard2; + } + +private: + const NamespaceString _dropNS{"test.user"}; + ShardType _shard1; + ShardType _shard2; +}; + +TEST_F(DropColl2ShardTest, Basic) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_OK(status); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + expectUnsetSharding(shard1()); + + expectSetShardVersionZero(shard2()); + expectUnsetSharding(shard2()); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, NSNotFound) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_OK(status); + }); + + onCommand([this](const RemoteCommandRequest& request) { + ASSERT_EQ(HostAndPort(shard1().getHost()), request.target); + ASSERT_EQ(dropNS().db(), request.dbname); + ASSERT_BSONOBJ_EQ( + BSON("drop" << dropNS().coll() << "writeConcern" << BSON("w" << 0 << "wtimeout" << 0)), + request.cmdObj); + + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 0 << "code" << ErrorCodes::NamespaceNotFound); + }); + + onCommand([this](const RemoteCommandRequest& request) { + ASSERT_EQ(HostAndPort(shard2().getHost()), request.target); + ASSERT_EQ(dropNS().db(), request.dbname); + ASSERT_BSONOBJ_EQ( + BSON("drop" << dropNS().coll() << "writeConcern" << BSON("w" << 0 << "wtimeout" << 0)), + request.cmdObj); + + ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 0 << "code" << ErrorCodes::NamespaceNotFound); + }); + + expectSetShardVersionZero(shard1()); + expectUnsetSharding(shard1()); + + expectSetShardVersionZero(shard2()); + expectUnsetSharding(shard2()); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, FirstShardTargeterError) { + auto shard1Targeter = RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), shard1().getName())) + ->getTargeter()); + shard1Targeter->setFindHostReturnValue({ErrorCodes::HostUnreachable, "bad test network"}); + + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::HostUnreachable, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, FirstShardDropError) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + onCommand([this](const RemoteCommandRequest& request) { + shutdownExecutor(); // shutdown executor so drop command will fail. + return BSON("ok" << 1); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, FirstShardDropCmdError) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::OperationFailed, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + // drop command will be sent to all shards even if we get a not ok response from one shard. + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized); + }); + + expectDrop(shard2()); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SecondShardTargeterError) { + auto shard2Targeter = RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), shard2().getName())) + ->getTargeter()); + shard2Targeter->setFindHostReturnValue({ErrorCodes::HostUnreachable, "bad test network"}); + + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::HostUnreachable, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SecondShardDropError) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + + onCommand([this](const RemoteCommandRequest& request) { + shutdownExecutor(); // shutdown executor so drop command will fail. + return BSON("ok" << 1); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SecondShardDropCmdError) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::OperationFailed, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, CleanupChunkError) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::Unauthorized, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg" + << "bad delete"); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SSVCmdErrorOnShard1) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::Unauthorized, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg" + << "bad"); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SSVErrorOnShard1) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + onCommand([this](const RemoteCommandRequest& request) { + shutdownExecutor(); // shutdown executor so ssv command will fail. + return BSON("ok" << 1); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, UnsetCmdErrorOnShard1) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::Unauthorized, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg" + << "bad"); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, UnsetErrorOnShard1) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + + onCommand([this](const RemoteCommandRequest& request) { + shutdownExecutor(); // shutdown executor so unsetSharding command will fail. + return BSON("ok" << 1); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SSVCmdErrorOnShard2) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::Unauthorized, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + expectUnsetSharding(shard1()); + + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg" + << "bad"); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, SSVErrorOnShard2) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + expectUnsetSharding(shard1()); + + onCommand([this](const RemoteCommandRequest& request) { + shutdownExecutor(); // shutdown executor so ssv command will fail. + return BSON("ok" << 1); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, UnsetCmdErrorOnShard2) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::Unauthorized, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + expectUnsetSharding(shard1()); + + expectSetShardVersionZero(shard2()); + + onCommand([](const RemoteCommandRequest& request) { + return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg" + << "bad"); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(DropColl2ShardTest, UnsetErrorOnShard2) { + auto future = launchAsync([this] { + auto status = doDrop(); + ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code()); + ASSERT_FALSE(status.reason().empty()); + }); + + expectDrop(shard1()); + expectDrop(shard2()); + + expectSetShardVersionZero(shard1()); + expectUnsetSharding(shard1()); + + expectSetShardVersionZero(shard2()); + + onCommand([this](const RemoteCommandRequest& request) { + shutdownExecutor(); // shutdown executor so unset command will fail. + return BSON("ok" << 1); + }); + + future.timed_get(kFutureTimeout); +} + +} // unnamed namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp new file mode 100644 index 00000000000..b97b8efbca0 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp @@ -0,0 +1,168 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include <pcrecpp.h> + +#include "mongo/bson/json.h" +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/commands.h" +#include "mongo/db/query/query_request.h" +#include "mongo/db/repl/read_concern_args.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/executor/task_executor.h" +#include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/rpc/metadata/repl_set_metadata.h" +#include "mongo/rpc/metadata/tracking_metadata.h" +#include "mongo/s/catalog/dist_lock_catalog_impl.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_locks.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/chunk_version.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/s/write_ops/batched_command_response.h" +#include "mongo/stdx/future.h" +#include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" +#include "mongo/util/time_support.h" + +namespace mongo { +namespace { + +using executor::RemoteCommandRequest; +using std::vector; + +class EnableShardingTest : public ConfigServerTestFixture {}; + +TEST_F(EnableShardingTest, noDBExists) { + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + auto shardTargeter = RemoteCommandTargeterMock::get( + uassertStatusOK(shardRegistry()->getShard(operationContext(), ShardId("shard0"))) + ->getTargeter()); + shardTargeter->setFindHostReturnValue(HostAndPort("shard0:12")); + + auto future = launchAsync([&] { + ON_BLOCK_EXIT([&] { Client::destroy(); }); + Client::initThreadIfNotAlready("Test"); + auto opCtx = cc().makeOperationContext(); + ShardingCatalogManager::get(opCtx.get())->enableSharding(opCtx.get(), "db1"); + }); + + // list databases for checking shard size. + onCommand([](const RemoteCommandRequest& request) { + ASSERT_EQ(HostAndPort("shard0:12"), request.target); + ASSERT_EQ("admin", request.dbname); + ASSERT_BSONOBJ_EQ(BSON("listDatabases" << 1 << "maxTimeMS" << 600000), request.cmdObj); + + ASSERT_BSONOBJ_EQ( + ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return fromjson(R"({ + databases: [], + totalSize: 1, + ok: 1 + })"); + }); + + future.timed_get(kFutureTimeout); +} + +TEST_F(EnableShardingTest, dbExistsWithDifferentCase) { + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + ASSERT_OK(setupShards(vector<ShardType>{shard})); + setupDatabase("Db3", shard.getName(), false); + ASSERT_THROWS_CODE( + ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db3"), + AssertionException, + ErrorCodes::DatabaseDifferCase); +} + +TEST_F(EnableShardingTest, dbExists) { + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + ASSERT_OK(setupShards(vector<ShardType>{shard})); + setupDatabase("db4", shard.getName(), false); + ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db4"); +} + +TEST_F(EnableShardingTest, succeedsWhenTheDatabaseIsAlreadySharded) { + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + ASSERT_OK(setupShards(vector<ShardType>{shard})); + setupDatabase("db5", shard.getName(), true); + ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db5"); +} + +TEST_F(EnableShardingTest, dbExistsInvalidFormat) { + ShardType shard; + shard.setName("shard0"); + shard.setHost("shard0:12"); + + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + // Set up database with bad type for primary field. + ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(), + DatabaseType::ConfigNS, + BSON("_id" + << "db6" + << "primary" + << 12 + << "partitioned" + << false), + ShardingCatalogClient::kMajorityWriteConcern)); + + ASSERT_THROWS_CODE( + ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db6"), + AssertionException, + ErrorCodes::TypeMismatch); +} + +TEST_F(EnableShardingTest, noDBExistsNoShards) { + ASSERT_THROWS_CODE( + ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db7"), + AssertionException, + ErrorCodes::ShardNotFound); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp new file mode 100644 index 00000000000..3d115ec0a80 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp @@ -0,0 +1,474 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/client/read_preference.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/config_server_test_fixture.h" + +namespace mongo { +namespace { + +using MergeChunkTest = ConfigServerTestFixture; + +const NamespaceString kNamespace("TestDB.TestColl"); + +TEST_F(MergeChunkTest, MergeExistingChunksCorrectlyShouldSucceed) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunk to be merged + auto chunk2(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + // second chunk boundaries + chunk2.setMin(chunkBound); + chunk2.setMax(chunkMax); + + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax}; + + setupChunks({chunk, chunk2}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + chunkBoundaries, + "shard0000")); + + auto findResponse = uassertStatusOK( + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << "TestDB.TestColl"), + BSON(ChunkType::lastmod << -1), + boost::none)); + + const auto& chunksVector = findResponse.docs; + + // There should be exactly one chunk left in the collection + ASSERT_EQ(1u, chunksVector.size()); + + // MergedChunk should have range [chunkMin, chunkMax] + auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front())); + ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin()); + ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax()); + + { + // Check for increment on mergedChunk's minor version + ASSERT_EQ(origVersion.majorVersion(), mergedChunk.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion()); + } +} + +TEST_F(MergeChunkTest, MergeSeveralChunksCorrectlyShouldSucceed) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunks to be merged + auto chunk2(chunk); + auto chunk3(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkBound2 = BSON("a" << 7); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + // second chunk boundaries + chunk2.setMin(chunkBound); + chunk2.setMax(chunkBound2); + // third chunk boundaries + chunk3.setMin(chunkBound2); + chunk3.setMax(chunkMax); + + // Record chunk boundaries for passing into commitChunkMerge + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkBound2, chunkMax}; + + setupChunks({chunk, chunk2, chunk3}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + chunkBoundaries, + "shard0000")); + + auto findResponse = uassertStatusOK( + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << "TestDB.TestColl"), + BSON(ChunkType::lastmod << -1), + boost::none)); + + const auto& chunksVector = findResponse.docs; + + // There should be exactly one chunk left in the collection + ASSERT_EQ(1u, chunksVector.size()); + + // MergedChunk should have range [chunkMin, chunkMax] + auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front())); + ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin()); + ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax()); + + { + // Check for increment on mergedChunk's minor version + ASSERT_EQ(origVersion.majorVersion(), mergedChunk.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion()); + } +} + +TEST_F(MergeChunkTest, NewMergeShouldClaimHighestVersion) { + ChunkType chunk, otherChunk; + chunk.setNS(kNamespace); + otherChunk.setNS(kNamespace); + auto collEpoch = OID::gen(); + + auto origVersion = ChunkVersion(1, 2, collEpoch); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunk to be merged + auto chunk2(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + // second chunk boundaries + chunk2.setMin(chunkBound); + chunk2.setMax(chunkMax); + + // Record chunk boundaries for passing into commitChunkMerge + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax}; + + // Set up other chunk with competing version + auto competingVersion = ChunkVersion(2, 1, collEpoch); + otherChunk.setVersion(competingVersion); + otherChunk.setShard(ShardId("shard0000")); + otherChunk.setMin(BSON("a" << 10)); + otherChunk.setMax(BSON("a" << 20)); + + setupChunks({chunk, chunk2, otherChunk}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + collEpoch, + chunkBoundaries, + "shard0000")); + + auto findResponse = uassertStatusOK( + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << "TestDB.TestColl"), + BSON(ChunkType::lastmod << -1), + boost::none)); + + const auto& chunksVector = findResponse.docs; + + // There should be exactly two chunks left in the collection: one merged, one competing + ASSERT_EQ(2u, chunksVector.size()); + + // MergedChunk should have range [chunkMin, chunkMax] + auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front())); + ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin()); + ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax()); + + { + // Check for minor increment on collection version + ASSERT_EQ(competingVersion.majorVersion(), mergedChunk.getVersion().majorVersion()); + ASSERT_EQ(competingVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion()); + } +} + +TEST_F(MergeChunkTest, MergeLeavesOtherChunksAlone) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 2, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunk to be merged + auto chunk2(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + // second chunk boundaries + chunk2.setMin(chunkBound); + chunk2.setMax(chunkMax); + + // Record chunk boundaries for passing into commitChunkMerge + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax}; + + // Set up unmerged chunk + auto otherChunk(chunk); + otherChunk.setMin(BSON("a" << 10)); + otherChunk.setMax(BSON("a" << 20)); + + setupChunks({chunk, chunk2, otherChunk}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + chunkBoundaries, + "shard0000")); + + auto findResponse = uassertStatusOK( + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << "TestDB.TestColl"), + BSON(ChunkType::lastmod << -1), + boost::none)); + + const auto& chunksVector = findResponse.docs; + + // There should be exactly two chunks left in the collection: one merged, one untouched + ASSERT_EQ(2u, chunksVector.size()); + + // MergedChunk should have range [chunkMin, chunkMax] + auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front())); + ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin()); + ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax()); + + { + // Check for increment on mergedChunk's minor version + ASSERT_EQ(origVersion.majorVersion(), mergedChunk.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion()); + } + + // OtherChunk should have been left alone + auto foundOtherChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.back())); + ASSERT_BSONOBJ_EQ(otherChunk.getMin(), foundOtherChunk.getMin()); + ASSERT_BSONOBJ_EQ(otherChunk.getMax(), foundOtherChunk.getMax()); +} + +TEST_F(MergeChunkTest, NonExistingNamespace) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunk to be merged + auto chunk2(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + chunk2.setMin(chunkBound); + chunk2.setMax(chunkMax); + + // Record chunk boundaries for passing into commitChunkMerge + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax}; + + setupChunks({chunk, chunk2}).transitional_ignore(); + + auto mergeStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.NonExistingColl"), + origVersion.epoch(), + chunkBoundaries, + "shard0000"); + ASSERT_EQ(ErrorCodes::IllegalOperation, mergeStatus); +} + +TEST_F(MergeChunkTest, NonMatchingEpochsOfChunkAndRequestErrors) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunk to be merged + auto chunk2(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + chunk2.setMin(chunkBound); + chunk2.setMax(chunkMax); + + // Record chunk baoundaries for passing into commitChunkMerge + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax}; + + setupChunks({chunk, chunk2}).transitional_ignore(); + + auto mergeStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + OID::gen(), + chunkBoundaries, + "shard0000"); + ASSERT_EQ(ErrorCodes::StaleEpoch, mergeStatus); +} + +TEST_F(MergeChunkTest, MergeAlreadyHappenedFailsPrecondition) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + // Construct chunk to be merged + auto chunk2(chunk); + + auto chunkMin = BSON("a" << 1); + auto chunkBound = BSON("a" << 5); + auto chunkMax = BSON("a" << 10); + // first chunk boundaries + chunk.setMin(chunkMin); + chunk.setMax(chunkBound); + // second chunk boundaries + chunk2.setMin(chunkBound); + chunk2.setMax(chunkMax); + + std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax}; + + ChunkType mergedChunk(chunk); + auto mergedVersion = chunk.getVersion(); + mergedVersion.incMinor(); + mergedChunk.setVersion(mergedVersion); + mergedChunk.setMax(chunkMax); + + setupChunks({mergedChunk}).transitional_ignore(); + + ASSERT_EQ(ErrorCodes::BadValue, + ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + chunkBoundaries, + "shard0000")); + + // Verify that no change to config.chunks happened. + auto findResponse = uassertStatusOK( + getConfigShard()->exhaustiveFindOnConfig(operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kLocalReadConcern, + ChunkType::ConfigNS, + BSON(ChunkType::ns() << "TestDB.TestColl"), + BSON(ChunkType::lastmod << -1), + boost::none)); + + const auto& chunksVector = findResponse.docs; + + // There should be exactly one chunk left in the collection + ASSERT_EQ(1u, chunksVector.size()); + + // MergedChunk should have range [chunkMin, chunkMax] + ChunkType foundChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front())); + ASSERT_BSONOBJ_EQ(mergedChunk.toConfigBSON(), foundChunk.toConfigBSON()); +} + +TEST_F(MergeChunkTest, ChunkBoundariesOutOfOrderFails) { + const OID epoch = OID::gen(); + const std::vector<BSONObj> chunkBoundaries{ + BSON("a" << 100), BSON("a" << 200), BSON("a" << 30), BSON("a" << 400)}; + + { + std::vector<ChunkType> originalChunks; + ChunkVersion version = ChunkVersion(1, 0, epoch); + + ChunkType chunk; + chunk.setNS(kNamespace); + chunk.setShard(ShardId("shard0000")); + + chunk.setVersion(version); + chunk.setMin(BSON("a" << 100)); + chunk.setMax(BSON("a" << 200)); + originalChunks.push_back(chunk); + + version.incMinor(); + chunk.setMin(BSON("a" << 200)); + chunk.setMax(BSON("a" << 300)); + chunk.setVersion(version); + originalChunks.push_back(chunk); + + version.incMinor(); + chunk.setMin(BSON("a" << 300)); + chunk.setMax(BSON("a" << 400)); + chunk.setVersion(version); + originalChunks.push_back(chunk); + + setupChunks(originalChunks).transitional_ignore(); + } + + ASSERT_EQ(ErrorCodes::InvalidOptions, + ShardingCatalogManager::get(operationContext()) + ->commitChunkMerge(operationContext(), + NamespaceString("TestDB.TestColl"), + epoch, + chunkBoundaries, + "shard0000")); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp new file mode 100644 index 00000000000..f3998cab85c --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp @@ -0,0 +1,260 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/client/read_preference.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/config_server_test_fixture.h" + +namespace mongo { +namespace { + + +ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly); + +using RemoveShardFromZoneTest = ConfigServerTestFixture; + +TEST_F(RemoveShardFromZoneTest, RemoveZoneThatNoLongerExistsShouldNotError) { + ShardType shard; + shard.setName("a"); + shard.setHost("a:1234"); + + setupShards({shard}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->removeShardFromZone(operationContext(), shard.getName(), "z")); + auto shardDocStatus = getShardDoc(operationContext(), shard.getName()); + ASSERT_OK(shardDocStatus.getStatus()); + + auto shardDoc = shardDocStatus.getValue(); + auto tags = shardDoc.getTags(); + ASSERT_TRUE(tags.empty()); +} + +TEST_F(RemoveShardFromZoneTest, RemovingZoneThatIsOnlyReferencedByAnotherShardShouldSucceed) { + ShardType shardA; + shardA.setName("a"); + shardA.setHost("a:1234"); + shardA.setTags({"z"}); + + ShardType shardB; + shardB.setName("b"); + shardB.setHost("b:1234"); + + setupShards({shardA, shardB}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->removeShardFromZone(operationContext(), shardB.getName(), "z")); + + // Shard A should still be in zone 'z'. + auto shardADocStatus = getShardDoc(operationContext(), shardA.getName()); + ASSERT_OK(shardADocStatus.getStatus()); + + auto shardADoc = shardADocStatus.getValue(); + auto shardATags = shardADoc.getTags(); + ASSERT_EQ(1u, shardATags.size()); + ASSERT_EQ("z", shardATags.front()); + + // Shard B should not be in zone 'z'. + auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName()); + ASSERT_OK(shardBDocStatus.getStatus()); + + auto shardBDoc = shardBDocStatus.getValue(); + auto shardBTags = shardBDoc.getTags(); + ASSERT_TRUE(shardBTags.empty()); +} + +TEST_F(RemoveShardFromZoneTest, RemoveLastZoneFromShardShouldSucceedWhenNoChunksReferToIt) { + ShardType shardA; + shardA.setName("a"); + shardA.setHost("a:1234"); + shardA.setTags({"z"}); + + ShardType shardB; + shardB.setName("b"); + shardB.setHost("b:1234"); + + setupShards({shardA, shardB}).transitional_ignore(); + + // Insert a chunk range document referring to a different zone + TagsType tagDoc; + tagDoc.setNS(NamespaceString("test.foo")); + tagDoc.setMinKey(BSON("x" << 0)); + tagDoc.setMaxKey(BSON("x" << 10)); + tagDoc.setTag("y"); + insertToConfigCollection(operationContext(), TagsType::ConfigNS, tagDoc.toBSON()) + .transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->removeShardFromZone(operationContext(), shardA.getName(), "z")); + + // Shard A should not be in zone 'z'. + auto shardADocStatus = getShardDoc(operationContext(), shardA.getName()); + ASSERT_OK(shardADocStatus.getStatus()); + + auto shardADoc = shardADocStatus.getValue(); + auto shardATags = shardADoc.getTags(); + ASSERT_TRUE(shardATags.empty()); + + // Shard B should not be in zone 'z'. + auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName()); + ASSERT_OK(shardBDocStatus.getStatus()); + + auto shardBDoc = shardBDocStatus.getValue(); + auto shardBTags = shardBDoc.getTags(); + ASSERT_TRUE(shardBTags.empty()); +} + +TEST_F(RemoveShardFromZoneTest, RemoveLastZoneFromShardShouldFailWhenAChunkRefersToIt) { + ShardType shardA; + shardA.setName("a"); + shardA.setHost("a:1234"); + shardA.setTags({"y", "z"}); + + ShardType shardB; + shardB.setName("b"); + shardB.setHost("b:1234"); + + setupShards({shardA, shardB}).transitional_ignore(); + + TagsType tagDoc; + tagDoc.setNS(NamespaceString("test.foo")); + tagDoc.setMinKey(BSON("x" << 0)); + tagDoc.setMaxKey(BSON("x" << 10)); + tagDoc.setTag("z"); + insertToConfigCollection(operationContext(), TagsType::ConfigNS, tagDoc.toBSON()) + .transitional_ignore(); + + auto status = ShardingCatalogManager::get(operationContext()) + ->removeShardFromZone(operationContext(), shardA.getName(), "z"); + ASSERT_EQ(ErrorCodes::ZoneStillInUse, status); + + // Shard A should still be in zone 'z'. + auto shardADocStatus = getShardDoc(operationContext(), shardA.getName()); + ASSERT_OK(shardADocStatus.getStatus()); + + auto shardADoc = shardADocStatus.getValue(); + auto shardATags = shardADoc.getTags(); + ASSERT_EQ(2u, shardATags.size()); + ASSERT_EQ("y", shardATags.front()); + ASSERT_EQ("z", shardATags.back()); + + // Shard B should not be in zone 'z'. + auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName()); + ASSERT_OK(shardBDocStatus.getStatus()); + + auto shardBDoc = shardBDocStatus.getValue(); + auto shardBTags = shardBDoc.getTags(); + ASSERT_TRUE(shardBTags.empty()); +} + +TEST_F(RemoveShardFromZoneTest, RemoveZoneShouldFailIfShardDoesntExist) { + ShardType shardA; + shardA.setName("a"); + shardA.setHost("a:1234"); + shardA.setTags({"z"}); + + setupShards({shardA}).transitional_ignore(); + + auto status = ShardingCatalogManager::get(operationContext()) + ->removeShardFromZone(operationContext(), "b", "z"); + ASSERT_EQ(ErrorCodes::ShardNotFound, status); + + // Shard A should still be in zone 'z'. + auto shardADocStatus = getShardDoc(operationContext(), shardA.getName()); + ASSERT_OK(shardADocStatus.getStatus()); + + auto shardADoc = shardADocStatus.getValue(); + auto shardATags = shardADoc.getTags(); + ASSERT_EQ(1u, shardATags.size()); + ASSERT_EQ("z", shardATags.front()); +} + +TEST_F(RemoveShardFromZoneTest, RemoveZoneFromShardShouldOnlyRemoveZoneOnSpecifiedShard) { + ShardType shardA; + shardA.setName("a"); + shardA.setHost("a:1234"); + shardA.setTags({"z"}); + + ShardType shardB; + shardB.setName("b"); + shardB.setHost("b:1234"); + shardB.setTags({"y", "z"}); + + setupShards({shardA, shardB}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->removeShardFromZone(operationContext(), shardB.getName(), "z")); + + // Shard A should still be in zone 'z'. + auto shardADocStatus = getShardDoc(operationContext(), shardA.getName()); + ASSERT_OK(shardADocStatus.getStatus()); + + auto shardADoc = shardADocStatus.getValue(); + auto shardATags = shardADoc.getTags(); + ASSERT_EQ(1u, shardATags.size()); + ASSERT_EQ("z", shardATags.front()); + + // Shard B should not be in zone 'z'. + auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName()); + ASSERT_OK(shardBDocStatus.getStatus()); + + auto shardBDoc = shardBDocStatus.getValue(); + auto shardBTags = shardBDoc.getTags(); + ASSERT_EQ(1u, shardBTags.size()); + ASSERT_EQ("y", shardBTags.front()); +} + +/* +// TODO: This test fails while an OpObserver is present, since the insert of the invalid shard +// doc fails. +TEST_F(RemoveShardFromZoneTest, RemoveZoneFromShardShouldErrorIfShardDocIsMalformed) { + // Note: invalid because tags is in string instead of array. + BSONObj invalidShardDoc(BSON("_id" + << "a" + << "host" + << "a:1" + << "tags" + << "z")); + + insertToConfigCollection( + operationContext(), ShardType::ConfigNS, invalidShardDoc); + + + auto status = +ShardingCatalogManager::get(operationContext())->removeShardFromZone(operationContext(), "a", "z"); + ASSERT_EQ(ErrorCodes::TypeMismatch, status); +} +*/ +} // unnamed namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp new file mode 100644 index 00000000000..6def5ee6603 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp @@ -0,0 +1,325 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include <string> +#include <vector> + +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/commands.h" +#include "mongo/db/ops/write_ops.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/executor/network_interface_mock.h" +#include "mongo/executor/task_executor.h" +#include "mongo/rpc/metadata/repl_set_metadata.h" +#include "mongo/rpc/metadata/tracking_metadata.h" +#include "mongo/s/catalog/type_changelog.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/cluster_identity_loader.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/s/grid.h" +#include "mongo/s/write_ops/batched_command_response.h" +#include "mongo/stdx/chrono.h" +#include "mongo/stdx/future.h" +#include "mongo/util/log.h" + +namespace mongo { +namespace { + +using executor::NetworkInterfaceMock; +using executor::RemoteCommandRequest; +using executor::RemoteCommandResponse; +using executor::TaskExecutor; +using std::string; +using std::vector; +using unittest::assertGet; + +const Seconds kFutureTimeout{5}; + +BSONObj getReplSecondaryOkMetadata() { + BSONObjBuilder o; + ReadPreferenceSetting(ReadPreference::Nearest).toContainingBSON(&o); + o.append(rpc::kReplSetMetadataFieldName, 1); + return o.obj(); +} + +class RemoveShardTest : public ConfigServerTestFixture { +protected: + /** + * Performs the test setup steps from the parent class and then configures the config shard and + * the client name. + */ + void setUp() override { + ConfigServerTestFixture::setUp(); + + // Make sure clusterID is written to the config.version collection. + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->initializeConfigDatabaseIfNeeded(operationContext())); + + auto clusterIdLoader = ClusterIdentityLoader::get(operationContext()); + ASSERT_OK(clusterIdLoader->loadClusterId(operationContext(), + repl::ReadConcernLevel::kLocalReadConcern)); + _clusterId = clusterIdLoader->getClusterId(); + } + + /** + * Checks whether a particular shard's "draining" field is set to true. + */ + bool isDraining(const std::string& shardName) { + auto response = assertGet(shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kMajorityReadConcern, + ShardType::ConfigNS, + BSON(ShardType::name() << shardName), + BSONObj(), + 1)); + BSONObj shardBSON = response.docs.front(); + if (shardBSON.hasField("draining")) { + return shardBSON["draining"].Bool(); + } + return false; + } + + const HostAndPort configHost{"TestHost1"}; + OID _clusterId; +}; + +TEST_F(RemoveShardTest, RemoveShardAnotherShardDraining) { + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("host1:12345"); + shard1.setMaxSizeMB(100); + shard1.setState(ShardType::ShardState::kShardAware); + + ShardType shard2; + shard2.setName("shard2"); + shard2.setHost("host2:12345"); + shard2.setMaxSizeMB(100); + shard2.setState(ShardType::ShardState::kShardAware); + + ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2})); + + auto result = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::STARTED, result); + ASSERT_TRUE(isDraining(shard1.getName())); + + ASSERT_EQUALS(ErrorCodes::ConflictingOperationInProgress, + ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard2.getName())); + ASSERT_FALSE(isDraining(shard2.getName())); +} + +TEST_F(RemoveShardTest, RemoveShardCantRemoveLastShard) { + string shardName = "shardToRemove"; + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("host1:12345"); + shard1.setMaxSizeMB(100); + shard1.setState(ShardType::ShardState::kShardAware); + + ASSERT_OK(setupShards(std::vector<ShardType>{shard1})); + + ASSERT_EQUALS(ErrorCodes::IllegalOperation, + ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_FALSE(isDraining(shard1.getName())); +} + +TEST_F(RemoveShardTest, RemoveShardStartDraining) { + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("host1:12345"); + shard1.setMaxSizeMB(100); + shard1.setState(ShardType::ShardState::kShardAware); + + ShardType shard2; + shard2.setName("shard2"); + shard2.setHost("host2:12345"); + shard2.setMaxSizeMB(100); + shard2.setState(ShardType::ShardState::kShardAware); + + ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2})); + + auto result = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::STARTED, result); + ASSERT_TRUE(isDraining(shard1.getName())); +} + +TEST_F(RemoveShardTest, RemoveShardStillDrainingChunksRemaining) { + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("host1:12345"); + shard1.setMaxSizeMB(100); + shard1.setState(ShardType::ShardState::kShardAware); + + ShardType shard2; + shard2.setName("shard2"); + shard2.setHost("host2:12345"); + shard2.setMaxSizeMB(100); + shard2.setState(ShardType::ShardState::kShardAware); + + auto epoch = OID::gen(); + ChunkType chunk1(NamespaceString("testDB.testColl"), + ChunkRange(BSON("_id" << 0), BSON("_id" << 20)), + ChunkVersion(1, 1, epoch), + shard1.getName()); + ChunkType chunk2(NamespaceString("testDB.testColl"), + ChunkRange(BSON("_id" << 21), BSON("_id" << 50)), + ChunkVersion(1, 2, epoch), + shard1.getName()); + ChunkType chunk3(NamespaceString("testDB.testColl"), + ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)), + ChunkVersion(1, 3, epoch), + shard1.getName()); + + ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2})); + setupDatabase("testDB", shard1.getName(), true); + ASSERT_OK(setupChunks(std::vector<ChunkType>{chunk1, chunk2, chunk3})); + + auto startedResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::STARTED, startedResult); + ASSERT_TRUE(isDraining(shard1.getName())); + + auto ongoingResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::ONGOING, ongoingResult); + ASSERT_TRUE(isDraining(shard1.getName())); +} + +TEST_F(RemoveShardTest, RemoveShardStillDrainingDatabasesRemaining) { + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("host1:12345"); + shard1.setMaxSizeMB(100); + shard1.setState(ShardType::ShardState::kShardAware); + + ShardType shard2; + shard2.setName("shard2"); + shard2.setHost("host2:12345"); + shard2.setMaxSizeMB(100); + shard2.setState(ShardType::ShardState::kShardAware); + + ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2})); + setupDatabase("testDB", shard1.getName(), false); + + auto startedResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::STARTED, startedResult); + ASSERT_TRUE(isDraining(shard1.getName())); + + auto ongoingResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::ONGOING, ongoingResult); + ASSERT_TRUE(isDraining(shard1.getName())); +} + +TEST_F(RemoveShardTest, RemoveShardCompletion) { + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost("host1:12345"); + shard1.setMaxSizeMB(100); + shard1.setState(ShardType::ShardState::kShardAware); + + ShardType shard2; + shard2.setName("shard2"); + shard2.setHost("host2:12345"); + shard2.setMaxSizeMB(100); + shard2.setState(ShardType::ShardState::kShardAware); + + auto epoch = OID::gen(); + ChunkType chunk1(NamespaceString("testDB.testColl"), + ChunkRange(BSON("_id" << 0), BSON("_id" << 20)), + ChunkVersion(1, 1, epoch), + shard1.getName()); + ChunkType chunk2(NamespaceString("testDB.testColl"), + ChunkRange(BSON("_id" << 21), BSON("_id" << 50)), + ChunkVersion(1, 2, epoch), + shard1.getName()); + ChunkType chunk3(NamespaceString("testDB.testColl"), + ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)), + ChunkVersion(1, 3, epoch), + shard1.getName()); + + std::vector<ChunkType> chunks{chunk1, chunk2, chunk3}; + + ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2})); + setupDatabase("testDB", shard2.getName(), false); + ASSERT_OK(setupChunks(std::vector<ChunkType>{chunk1, chunk2, chunk3})); + + auto startedResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::STARTED, startedResult); + ASSERT_TRUE(isDraining(shard1.getName())); + + auto ongoingResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::ONGOING, ongoingResult); + ASSERT_TRUE(isDraining(shard1.getName())); + + // Mock the operation during which the chunks are moved to the other shard. + const NamespaceString chunkNS(ChunkType::ConfigNS); + for (ChunkType chunk : chunks) { + ChunkType updatedChunk = chunk; + updatedChunk.setShard(shard2.getName()); + ASSERT_OK(updateToConfigCollection( + operationContext(), chunkNS, chunk.toConfigBSON(), updatedChunk.toConfigBSON(), false)); + } + + auto completedResult = assertGet(ShardingCatalogManager::get(operationContext()) + ->removeShard(operationContext(), shard1.getName())); + ASSERT_EQUALS(ShardDrainingStatus::COMPLETED, completedResult); + + // Now make sure that the shard no longer exists on config. + auto response = assertGet(shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + operationContext(), + ReadPreferenceSetting{ReadPreference::PrimaryOnly}, + repl::ReadConcernLevel::kMajorityReadConcern, + ShardType::ConfigNS, + BSON(ShardType::name() << shard1.getName()), + BSONObj(), + 1)); + ASSERT_TRUE(response.docs.empty()); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp new file mode 100644 index 00000000000..35b564c9139 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp @@ -0,0 +1,445 @@ +/** + * Copyright (C) 2015 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include <set> +#include <string> +#include <vector> + +#include "mongo/client/read_preference.h" +#include "mongo/client/remote_command_targeter_factory_mock.h" +#include "mongo/client/remote_command_targeter_mock.h" +#include "mongo/db/client.h" +#include "mongo/db/commands.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/executor/network_interface_mock.h" +#include "mongo/executor/task_executor.h" +#include "mongo/rpc/metadata/tracking_metadata.h" +#include "mongo/s/catalog/type_changelog.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_locks.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/config_server_test_fixture.h" +#include "mongo/s/grid.h" +#include "mongo/s/shard_key_pattern.h" +#include "mongo/stdx/future.h" +#include "mongo/transport/mock_session.h" +#include "mongo/util/log.h" +#include "mongo/util/scopeguard.h" +#include "mongo/util/time_support.h" + +namespace mongo { +namespace { + +using executor::NetworkInterfaceMock; +using executor::RemoteCommandRequest; +using executor::RemoteCommandResponse; +using executor::TaskExecutor; +using std::set; +using std::string; +using std::vector; +using unittest::assertGet; + +const ShardId testPrimaryShard = ShardId("shard0"); + +const NamespaceString kNamespace("db1.foo"); + +class ShardCollectionTest : public ConfigServerTestFixture { +public: + void expectCount(const HostAndPort& receivingHost, + const NamespaceString& expectedNss, + const BSONObj& expectedQuery, + const StatusWith<long long>& response) { + onCommand([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(receivingHost, request.target); + string cmdName = request.cmdObj.firstElement().fieldName(); + + ASSERT_EQUALS("count", cmdName); + + const NamespaceString nss(request.dbname, request.cmdObj.firstElement().String()); + ASSERT_EQUALS(expectedNss, nss); + + if (expectedQuery.isEmpty()) { + auto queryElem = request.cmdObj["query"]; + ASSERT_TRUE(queryElem.eoo() || queryElem.Obj().isEmpty()); + } else { + ASSERT_BSONOBJ_EQ(expectedQuery, request.cmdObj["query"].Obj()); + } + + if (response.isOK()) { + return BSON("ok" << 1 << "n" << response.getValue()); + } + + BSONObjBuilder responseBuilder; + CommandHelpers::appendCommandStatus(responseBuilder, response.getStatus()); + return responseBuilder.obj(); + }); + } + +private: + const HostAndPort configHost{"configHost1"}; + const ConnectionString configCS{ConnectionString::forReplicaSet("configReplSet", {configHost})}; + const HostAndPort clientHost{"clientHost1"}; +}; + +TEST_F(ShardCollectionTest, anotherMongosSharding) { + ShardType shard; + shard.setName("shard0"); + shard.setHost("shardHost"); + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + setupDatabase(kNamespace.db().toString(), shard.getName(), true); + + // Set up chunks in the collection, indicating that another mongos must have already started + // sharding the collection. + ChunkType chunk; + chunk.setNS(kNamespace); + chunk.setVersion(ChunkVersion(2, 0, OID::gen())); + chunk.setShard(shard.getName()); + chunk.setMin(BSON("_id" << 1)); + chunk.setMax(BSON("_id" << 5)); + ASSERT_OK(setupChunks({chunk})); + + ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); + BSONObj defaultCollation; + + ASSERT_THROWS_CODE(ShardingCatalogManager::get(operationContext()) + ->shardCollection(operationContext(), + kNamespace, + boost::none, // UUID + shardKeyPattern, + defaultCollation, + false, + vector<BSONObj>{}, + false, + testPrimaryShard), + AssertionException, + ErrorCodes::ManualInterventionRequired); +} + +TEST_F(ShardCollectionTest, noInitialChunksOrData) { + // Initial setup + const HostAndPort shardHost{"shardHost"}; + ShardType shard; + shard.setName("shard0"); + shard.setHost(shardHost.toString()); + + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + targeter->setConnectionStringReturnValue(ConnectionString(shardHost)); + targeter->setFindHostReturnValue(shardHost); + targeterFactory()->addTargeterToReturn(ConnectionString(shardHost), std::move(targeter)); + + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + setupDatabase(kNamespace.db().toString(), shard.getName(), true); + + ShardKeyPattern shardKeyPattern(BSON("_id" << 1)); + BSONObj defaultCollation; + + // Now start actually sharding the collection. + auto future = launchAsync([&] { + ON_BLOCK_EXIT([&] { Client::destroy(); }); + Client::initThreadIfNotAlready("Test"); + auto opCtx = cc().makeOperationContext(); + ShardingCatalogManager::get(operationContext()) + ->shardCollection(opCtx.get(), + kNamespace, + boost::none, // UUID + shardKeyPattern, + defaultCollation, + false, + vector<BSONObj>{}, + false, + testPrimaryShard); + }); + + // Report that no documents exist for the given collection on the primary shard + expectCount(shardHost, kNamespace, BSONObj(), 0); + + // Expect the set shard version for that namespace. + // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was + // generated by shardCollection for the first chunk. + // TODO SERVER-29451: add hooks to the mock storage engine to expect reads and writes. + expectSetShardVersion(shardHost, shard, kNamespace, boost::none /* expected ChunkVersion */); + + future.timed_get(kFutureTimeout); +} + +TEST_F(ShardCollectionTest, withInitialChunks) { + // Initial setup + const HostAndPort shard0Host{"shardHost0"}; + const HostAndPort shard1Host{"shardHost1"}; + const HostAndPort shard2Host{"shardHost2"}; + + ShardType shard0; + shard0.setName("shard0"); + shard0.setHost(shard0Host.toString()); + + ShardType shard1; + shard1.setName("shard1"); + shard1.setHost(shard1Host.toString()); + + ShardType shard2; + shard2.setName("shard2"); + shard2.setHost(shard2Host.toString()); + + std::unique_ptr<RemoteCommandTargeterMock> targeter0( + stdx::make_unique<RemoteCommandTargeterMock>()); + std::unique_ptr<RemoteCommandTargeterMock> targeter1( + stdx::make_unique<RemoteCommandTargeterMock>()); + std::unique_ptr<RemoteCommandTargeterMock> targeter2( + stdx::make_unique<RemoteCommandTargeterMock>()); + targeter0->setConnectionStringReturnValue(ConnectionString(shard0Host)); + targeter0->setFindHostReturnValue(shard0Host); + targeterFactory()->addTargeterToReturn(ConnectionString(shard0Host), std::move(targeter0)); + targeter1->setConnectionStringReturnValue(ConnectionString(shard1Host)); + targeter1->setFindHostReturnValue(shard1Host); + targeterFactory()->addTargeterToReturn(ConnectionString(shard1Host), std::move(targeter1)); + targeter2->setConnectionStringReturnValue(ConnectionString(shard2Host)); + targeter2->setFindHostReturnValue(shard2Host); + targeterFactory()->addTargeterToReturn(ConnectionString(shard2Host), std::move(targeter2)); + + ASSERT_OK(setupShards(vector<ShardType>{shard0, shard1, shard2})); + + setupDatabase(kNamespace.db().toString(), shard0.getName(), true); + + ShardKeyPattern keyPattern(BSON("_id" << 1)); + + BSONObj splitPoint0 = BSON("_id" << 1); + BSONObj splitPoint1 = BSON("_id" << 100); + BSONObj splitPoint2 = BSON("_id" << 200); + BSONObj splitPoint3 = BSON("_id" << 300); + + ChunkVersion expectedVersion(1, 0, OID::gen()); + + ChunkType expectedChunk0; + expectedChunk0.setNS(kNamespace); + expectedChunk0.setShard(shard0.getName()); + expectedChunk0.setMin(keyPattern.getKeyPattern().globalMin()); + expectedChunk0.setMax(splitPoint0); + expectedChunk0.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk1; + expectedChunk1.setNS(kNamespace); + expectedChunk1.setShard(shard1.getName()); + expectedChunk1.setMin(splitPoint0); + expectedChunk1.setMax(splitPoint1); + expectedChunk1.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk2; + expectedChunk2.setNS(kNamespace); + expectedChunk2.setShard(shard2.getName()); + expectedChunk2.setMin(splitPoint1); + expectedChunk2.setMax(splitPoint2); + expectedChunk2.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk3; + expectedChunk3.setNS(kNamespace); + expectedChunk3.setShard(shard0.getName()); + expectedChunk3.setMin(splitPoint2); + expectedChunk3.setMax(splitPoint3); + expectedChunk3.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk4; + expectedChunk4.setNS(kNamespace); + expectedChunk4.setShard(shard1.getName()); + expectedChunk4.setMin(splitPoint3); + expectedChunk4.setMax(keyPattern.getKeyPattern().globalMax()); + expectedChunk4.setVersion(expectedVersion); + + vector<ChunkType> expectedChunks{ + expectedChunk0, expectedChunk1, expectedChunk2, expectedChunk3, expectedChunk4}; + + BSONObj defaultCollation; + + // Now start actually sharding the collection. + auto future = launchAsync([&] { + // TODO: can we mock the ShardRegistry to return these? + set<ShardId> shards{shard0.getName(), shard1.getName(), shard2.getName()}; + + ON_BLOCK_EXIT([&] { Client::destroy(); }); + Client::initThreadIfNotAlready("Test"); + auto opCtx = cc().makeOperationContext(); + ShardingCatalogManager::get(operationContext()) + ->shardCollection(opCtx.get(), + kNamespace, + boost::none, // UUID + keyPattern, + defaultCollation, + true, + vector<BSONObj>{splitPoint0, splitPoint1, splitPoint2, splitPoint3}, + true, + testPrimaryShard); + }); + + // Expect the set shard version for that namespace + // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was + // generated by shardCollection for the first chunk. + // TODO SERVER-29451: add hooks to the mock storage engine to expect reads and writes. + expectSetShardVersion(shard0Host, shard0, kNamespace, boost::none /* expected ChunkVersion */); + + future.timed_get(kFutureTimeout); +} + +TEST_F(ShardCollectionTest, withInitialData) { + // Initial setup + const HostAndPort shardHost{"shardHost"}; + ShardType shard; + shard.setName("shard0"); + shard.setHost(shardHost.toString()); + + std::unique_ptr<RemoteCommandTargeterMock> targeter( + stdx::make_unique<RemoteCommandTargeterMock>()); + targeter->setConnectionStringReturnValue(ConnectionString(shardHost)); + targeter->setFindHostReturnValue(shardHost); + targeterFactory()->addTargeterToReturn(ConnectionString(shardHost), std::move(targeter)); + + ASSERT_OK(setupShards(vector<ShardType>{shard})); + + setupDatabase(kNamespace.db().toString(), shard.getName(), true); + + ShardKeyPattern keyPattern(BSON("_id" << 1)); + + BSONObj splitPoint0 = BSON("_id" << 1); + BSONObj splitPoint1 = BSON("_id" << 100); + BSONObj splitPoint2 = BSON("_id" << 200); + BSONObj splitPoint3 = BSON("_id" << 300); + + ChunkVersion expectedVersion(1, 0, OID::gen()); + + ChunkType expectedChunk0; + expectedChunk0.setNS(kNamespace); + expectedChunk0.setShard(shard.getName()); + expectedChunk0.setMin(keyPattern.getKeyPattern().globalMin()); + expectedChunk0.setMax(splitPoint0); + expectedChunk0.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk1; + expectedChunk1.setNS(kNamespace); + expectedChunk1.setShard(shard.getName()); + expectedChunk1.setMin(splitPoint0); + expectedChunk1.setMax(splitPoint1); + expectedChunk1.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk2; + expectedChunk2.setNS(kNamespace); + expectedChunk2.setShard(shard.getName()); + expectedChunk2.setMin(splitPoint1); + expectedChunk2.setMax(splitPoint2); + expectedChunk2.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk3; + expectedChunk3.setNS(kNamespace); + expectedChunk3.setShard(shard.getName()); + expectedChunk3.setMin(splitPoint2); + expectedChunk3.setMax(splitPoint3); + expectedChunk3.setVersion(expectedVersion); + expectedVersion.incMinor(); + + ChunkType expectedChunk4; + expectedChunk4.setNS(kNamespace); + expectedChunk4.setShard(shard.getName()); + expectedChunk4.setMin(splitPoint3); + expectedChunk4.setMax(keyPattern.getKeyPattern().globalMax()); + expectedChunk4.setVersion(expectedVersion); + + vector<ChunkType> expectedChunks{ + expectedChunk0, expectedChunk1, expectedChunk2, expectedChunk3, expectedChunk4}; + + BSONObj defaultCollation; + + // Now start actually sharding the collection. + auto future = launchAsync([&] { + ON_BLOCK_EXIT([&] { Client::destroy(); }); + Client::initThreadIfNotAlready("Test"); + auto opCtx = cc().makeOperationContext(); + ShardingCatalogManager::get(operationContext()) + ->shardCollection(opCtx.get(), + kNamespace, + boost::none, // UUID + keyPattern, + defaultCollation, + false, + vector<BSONObj>{}, + false, + testPrimaryShard); + }); + + // Report that documents exist for the given collection on the primary shard, so that calling + // splitVector is required for calculating the initial split points. + expectCount(shardHost, kNamespace, BSONObj(), 1000); + + // Respond to the splitVector command sent to the shard to figure out initial split points + onCommand([&](const RemoteCommandRequest& request) { + ASSERT_EQUALS(shardHost, request.target); + string cmdName = request.cmdObj.firstElement().fieldName(); + ASSERT_EQUALS("splitVector", cmdName); + ASSERT_EQUALS(kNamespace.ns(), + request.cmdObj["splitVector"].String()); // splitVector uses full ns + + ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj()); + ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj()); + ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMax(), request.cmdObj["max"].Obj()); + ASSERT_EQUALS(64 * 1024 * 1024ULL, + static_cast<uint64_t>(request.cmdObj["maxChunkSizeBytes"].numberLong())); + ASSERT_EQUALS(0, request.cmdObj["maxSplitPoints"].numberLong()); + ASSERT_EQUALS(0, request.cmdObj["maxChunkObjects"].numberLong()); + + ASSERT_BSONOBJ_EQ( + ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(), + rpc::TrackingMetadata::removeTrackingData(request.metadata)); + + return BSON("ok" << 1 << "splitKeys" + << BSON_ARRAY(splitPoint0 << splitPoint1 << splitPoint2 << splitPoint3)); + }); + + // Expect the set shard version for that namespace + // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was + // generated by shardCollection for the first chunk. + // TODO SERVER-29451: add hooks to the mock storage engine to expect reads and writes. + expectSetShardVersion(shardHost, shard, kNamespace, boost::none); + + future.timed_get(kFutureTimeout); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp new file mode 100644 index 00000000000..e8f93fc34f3 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp @@ -0,0 +1,956 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/db/s/config/sharding_catalog_manager.h" + +#include <iomanip> +#include <pcrecpp.h> +#include <set> + +#include "mongo/base/status_with.h" +#include "mongo/bson/util/bson_extract.h" +#include "mongo/client/connection_string.h" +#include "mongo/client/read_preference.h" +#include "mongo/client/remote_command_targeter.h" +#include "mongo/client/replica_set_monitor.h" +#include "mongo/db/audit.h" +#include "mongo/db/catalog/catalog_raii.h" +#include "mongo/db/client.h" +#include "mongo/db/commands/feature_compatibility_version.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/repl/repl_set_config.h" +#include "mongo/db/repl/replication_coordinator.h" +#include "mongo/db/s/type_shard_identity.h" +#include "mongo/db/sessions_collection.h" +#include "mongo/db/wire_version.h" +#include "mongo/executor/task_executor.h" +#include "mongo/rpc/get_status_from_command_result.h" +#include "mongo/s/catalog/config_server_version.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_database.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/client/shard_connection.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/cluster_identity_loader.h" +#include "mongo/s/grid.h" +#include "mongo/s/shard_util.h" +#include "mongo/s/write_ops/batched_command_request.h" +#include "mongo/s/write_ops/batched_command_response.h" +#include "mongo/util/fail_point_service.h" +#include "mongo/util/log.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/scopeguard.h" + +namespace mongo { +namespace { + +using std::vector; + +using CallbackHandle = executor::TaskExecutor::CallbackHandle; +using CallbackArgs = executor::TaskExecutor::CallbackArgs; +using RemoteCommandCallbackArgs = executor::TaskExecutor::RemoteCommandCallbackArgs; +using RemoteCommandCallbackFn = executor::TaskExecutor::RemoteCommandCallbackFn; + +const Seconds kDefaultFindHostMaxWaitTime(20); + +const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{}); +const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)); + +/** + * Generates a unique name to be given to a newly added shard. + */ +StatusWith<std::string> generateNewShardName(OperationContext* opCtx) { + BSONObjBuilder shardNameRegex; + shardNameRegex.appendRegex(ShardType::name(), "^shard"); + + auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig( + opCtx, + kConfigReadSelector, + repl::ReadConcernLevel::kMajorityReadConcern, + ShardType::ConfigNS, + shardNameRegex.obj(), + BSON(ShardType::name() << -1), + 1); + if (!findStatus.isOK()) { + return findStatus.getStatus(); + } + + const auto& docs = findStatus.getValue().docs; + + int count = 0; + if (!docs.empty()) { + const auto shardStatus = ShardType::fromBSON(docs.front()); + if (!shardStatus.isOK()) { + return shardStatus.getStatus(); + } + + std::istringstream is(shardStatus.getValue().getName().substr(5)); + is >> count; + count++; + } + + // TODO: fix so that we can have more than 10000 automatically generated shard names + if (count < 9999) { + std::stringstream ss; + ss << "shard" << std::setfill('0') << std::setw(4) << count; + return ss.str(); + } + + return Status(ErrorCodes::OperationFailed, "unable to generate new shard name"); +} + +} // namespace + +StatusWith<Shard::CommandResponse> ShardingCatalogManager::_runCommandForAddShard( + OperationContext* opCtx, + RemoteCommandTargeter* targeter, + const std::string& dbName, + const BSONObj& cmdObj) { + auto swHost = targeter->findHost(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly}); + if (!swHost.isOK()) { + return swHost.getStatus(); + } + auto host = std::move(swHost.getValue()); + + executor::RemoteCommandRequest request( + host, dbName, cmdObj, rpc::makeEmptyMetadata(), nullptr, Seconds(30)); + + executor::RemoteCommandResponse response = + Status(ErrorCodes::InternalError, "Internal error running command"); + + auto swCallbackHandle = _executorForAddShard->scheduleRemoteCommand( + request, [&response](const executor::TaskExecutor::RemoteCommandCallbackArgs& args) { + response = args.response; + }); + if (!swCallbackHandle.isOK()) { + return swCallbackHandle.getStatus(); + } + + // Block until the command is carried out + _executorForAddShard->wait(swCallbackHandle.getValue()); + + if (response.status == ErrorCodes::ExceededTimeLimit) { + LOG(0) << "Operation timed out with status " << redact(response.status); + } + + if (!response.isOK()) { + if (!Shard::shouldErrorBePropagated(response.status.code())) { + return {ErrorCodes::OperationFailed, + str::stream() << "failed to run command " << cmdObj + << " when attempting to add shard " + << targeter->connectionString().toString() + << causedBy(response.status)}; + } + return response.status; + } + + BSONObj result = response.data.getOwned(); + + Status commandStatus = getStatusFromCommandResult(result); + if (!Shard::shouldErrorBePropagated(commandStatus.code())) { + commandStatus = {ErrorCodes::OperationFailed, + str::stream() << "failed to run command " << cmdObj + << " when attempting to add shard " + << targeter->connectionString().toString() + << causedBy(commandStatus)}; + } + + Status writeConcernStatus = getWriteConcernStatusFromCommandResult(result); + if (!Shard::shouldErrorBePropagated(writeConcernStatus.code())) { + writeConcernStatus = {ErrorCodes::OperationFailed, + str::stream() << "failed to satisfy writeConcern for command " + << cmdObj + << " when attempting to add shard " + << targeter->connectionString().toString() + << causedBy(writeConcernStatus)}; + } + + return Shard::CommandResponse(std::move(host), + std::move(result), + response.metadata.getOwned(), + std::move(commandStatus), + std::move(writeConcernStatus)); +} + +StatusWith<boost::optional<ShardType>> ShardingCatalogManager::_checkIfShardExists( + OperationContext* opCtx, + const ConnectionString& proposedShardConnectionString, + const std::string* proposedShardName, + long long proposedShardMaxSize) { + // Check whether any host in the connection is already part of the cluster. + const auto existingShards = Grid::get(opCtx)->catalogClient()->getAllShards( + opCtx, repl::ReadConcernLevel::kLocalReadConcern); + if (!existingShards.isOK()) { + return existingShards.getStatus().withContext( + "Failed to load existing shards during addShard"); + } + + // Now check if this shard already exists - if it already exists *with the same options* then + // the addShard request can return success early without doing anything more. + for (const auto& existingShard : existingShards.getValue().value) { + auto swExistingShardConnStr = ConnectionString::parse(existingShard.getHost()); + if (!swExistingShardConnStr.isOK()) { + return swExistingShardConnStr.getStatus(); + } + auto existingShardConnStr = std::move(swExistingShardConnStr.getValue()); + + // Function for determining if the options for the shard that is being added match the + // options of an existing shard that conflicts with it. + auto shardsAreEquivalent = [&]() { + if (proposedShardName && *proposedShardName != existingShard.getName()) { + return false; + } + if (proposedShardConnectionString.type() != existingShardConnStr.type()) { + return false; + } + if (proposedShardConnectionString.type() == ConnectionString::SET && + proposedShardConnectionString.getSetName() != existingShardConnStr.getSetName()) { + return false; + } + if (proposedShardMaxSize != existingShard.getMaxSizeMB()) { + return false; + } + return true; + }; + + if (existingShardConnStr.type() == ConnectionString::SET && + proposedShardConnectionString.type() == ConnectionString::SET && + existingShardConnStr.getSetName() == proposedShardConnectionString.getSetName()) { + // An existing shard has the same replica set name as the shard being added. + // If the options aren't the same, then this is an error, + // but if the options match then the addShard operation should be immediately + // considered a success and terminated. + if (shardsAreEquivalent()) { + return {existingShard}; + } else { + return {ErrorCodes::IllegalOperation, + str::stream() << "A shard already exists containing the replica set '" + << existingShardConnStr.getSetName() + << "'"}; + } + } + + for (const auto& existingHost : existingShardConnStr.getServers()) { + // Look if any of the hosts in the existing shard are present within the shard trying + // to be added. + for (const auto& addingHost : proposedShardConnectionString.getServers()) { + if (existingHost == addingHost) { + // At least one of the hosts in the shard being added already exists in an + // existing shard. If the options aren't the same, then this is an error, + // but if the options match then the addShard operation should be immediately + // considered a success and terminated. + if (shardsAreEquivalent()) { + return {existingShard}; + } else { + return {ErrorCodes::IllegalOperation, + str::stream() << "'" << addingHost.toString() << "' " + << "is already a member of the existing shard '" + << existingShard.getHost() + << "' (" + << existingShard.getName() + << ")."}; + } + } + } + } + + if (proposedShardName && *proposedShardName == existingShard.getName()) { + // If we get here then we're trying to add a shard with the same name as an existing + // shard, but there was no overlap in the hosts between the existing shard and the + // proposed connection string for the new shard. + return {ErrorCodes::IllegalOperation, + str::stream() << "A shard named " << *proposedShardName << " already exists"}; + } + } + + return {boost::none}; +} + +StatusWith<ShardType> ShardingCatalogManager::_validateHostAsShard( + OperationContext* opCtx, + std::shared_ptr<RemoteCommandTargeter> targeter, + const std::string* shardProposedName, + const ConnectionString& connectionString) { + auto swCommandResponse = + _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON("isMaster" << 1)); + if (swCommandResponse.getStatus() == ErrorCodes::IncompatibleServerVersion) { + return swCommandResponse.getStatus().withReason( + str::stream() << "Cannot add " << connectionString.toString() + << " as a shard because its binary version is not compatible with " + "the cluster's featureCompatibilityVersion."); + } else if (!swCommandResponse.isOK()) { + return swCommandResponse.getStatus(); + } + + // Check for a command response error + auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus); + if (!resIsMasterStatus.isOK()) { + return resIsMasterStatus.withContext(str::stream() + << "Error running isMaster against " + << targeter->connectionString().toString()); + } + + auto resIsMaster = std::move(swCommandResponse.getValue().response); + + // Fail if the node being added is a mongos. + const std::string msg = resIsMaster.getStringField("msg"); + if (msg == "isdbgrid") { + return {ErrorCodes::IllegalOperation, "cannot add a mongos as a shard"}; + } + + // Extract the maxWireVersion so we can verify that the node being added has a binary version + // greater than or equal to the cluster's featureCompatibilityVersion. We expect an incompatible + // binary node to be unable to communicate, returning an IncompatibleServerVersion error, + // because of our internal wire version protocol. So we can safely invariant here that the node + // is compatible. + long long maxWireVersion; + Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion); + if (!status.isOK()) { + return status.withContext(str::stream() << "isMaster returned invalid 'maxWireVersion' " + << "field when attempting to add " + << connectionString.toString() + << " as a shard"); + } + if (serverGlobalParams.featureCompatibility.getVersion() > + ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) { + // If FCV 4.0, or upgrading to / downgrading from, wire version must be LATEST. + invariant(maxWireVersion == WireVersion::LATEST_WIRE_VERSION); + } else if (serverGlobalParams.featureCompatibility.getVersion() > + ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34 && + serverGlobalParams.featureCompatibility.getVersion() <= + ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) { + // If FCV 3.6, or upgrading to / downgrading from, wire version must be v3.6 + // LATEST_WIRE_VERSION or greater. + invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 1); + } else { + // If FCV 3.4, wire version cannot be less than v3.4 LATEST_WIRE_VERSION. + invariant(serverGlobalParams.featureCompatibility.getVersion() == + ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34); + invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 2); + } + + // Check whether there is a master. If there isn't, the replica set may not have been + // initiated. If the connection is a standalone, it will return true for isMaster. + bool isMaster; + status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster); + if (!status.isOK()) { + return status.withContext(str::stream() << "isMaster returned invalid 'ismaster' " + << "field when attempting to add " + << connectionString.toString() + << " as a shard"); + } + if (!isMaster) { + return {ErrorCodes::NotMaster, + str::stream() + << connectionString.toString() + << " does not have a master. If this is a replica set, ensure that it has a" + << " healthy primary and that the set has been properly initiated."}; + } + + const std::string providedSetName = connectionString.getSetName(); + const std::string foundSetName = resIsMaster["setName"].str(); + + // Make sure the specified replica set name (if any) matches the actual shard's replica set + if (providedSetName.empty() && !foundSetName.empty()) { + return {ErrorCodes::OperationFailed, + str::stream() << "host is part of set " << foundSetName << "; " + << "use replica set url format " + << "<setname>/<server1>,<server2>, ..."}; + } + + if (!providedSetName.empty() && foundSetName.empty()) { + return {ErrorCodes::OperationFailed, + str::stream() << "host did not return a set name; " + << "is the replica set still initializing? " + << resIsMaster}; + } + + // Make sure the set name specified in the connection string matches the one where its hosts + // belong into + if (!providedSetName.empty() && (providedSetName != foundSetName)) { + return {ErrorCodes::OperationFailed, + str::stream() << "the provided connection string (" << connectionString.toString() + << ") does not match the actual set name " + << foundSetName}; + } + + // Is it a config server? + if (resIsMaster.hasField("configsvr")) { + return {ErrorCodes::OperationFailed, + str::stream() << "Cannot add " << connectionString.toString() + << " as a shard since it is a config server"}; + } + + // If the shard is part of a replica set, make sure all the hosts mentioned in the connection + // string are part of the set. It is fine if not all members of the set are mentioned in the + // connection string, though. + if (!providedSetName.empty()) { + std::set<std::string> hostSet; + + BSONObjIterator iter(resIsMaster["hosts"].Obj()); + while (iter.more()) { + hostSet.insert(iter.next().String()); // host:port + } + + if (resIsMaster["passives"].isABSONObj()) { + BSONObjIterator piter(resIsMaster["passives"].Obj()); + while (piter.more()) { + hostSet.insert(piter.next().String()); // host:port + } + } + + if (resIsMaster["arbiters"].isABSONObj()) { + BSONObjIterator piter(resIsMaster["arbiters"].Obj()); + while (piter.more()) { + hostSet.insert(piter.next().String()); // host:port + } + } + + for (const auto& hostEntry : connectionString.getServers()) { + const auto& host = hostEntry.toString(); // host:port + if (hostSet.find(host) == hostSet.end()) { + return {ErrorCodes::OperationFailed, + str::stream() << "in seed list " << connectionString.toString() << ", host " + << host + << " does not belong to replica set " + << foundSetName + << "; found " + << resIsMaster.toString()}; + } + } + } + + std::string actualShardName; + + if (shardProposedName) { + actualShardName = *shardProposedName; + } else if (!foundSetName.empty()) { + // Default it to the name of the replica set + actualShardName = foundSetName; + } + + // Disallow adding shard replica set with name 'config' + if (actualShardName == NamespaceString::kConfigDb) { + return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"}; + } + + // Retrieve the most up to date connection string that we know from the replica set monitor (if + // this is a replica set shard, otherwise it will be the same value as connectionString). + ConnectionString actualShardConnStr = targeter->connectionString(); + + ShardType shard; + shard.setName(actualShardName); + shard.setHost(actualShardConnStr.toString()); + shard.setState(ShardType::ShardState::kShardAware); + + return shard; +} + +Status ShardingCatalogManager::_dropSessionsCollection( + OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter) { + + BSONObjBuilder builder; + builder.append("drop", SessionsCollection::kSessionsCollection.toString()); + { + BSONObjBuilder wcBuilder(builder.subobjStart("writeConcern")); + wcBuilder.append("w", "majority"); + } + + auto swCommandResponse = _runCommandForAddShard( + opCtx, targeter.get(), SessionsCollection::kSessionsDb.toString(), builder.done()); + if (!swCommandResponse.isOK()) { + return swCommandResponse.getStatus(); + } + + auto cmdStatus = std::move(swCommandResponse.getValue().commandStatus); + if (!cmdStatus.isOK() && cmdStatus.code() != ErrorCodes::NamespaceNotFound) { + return cmdStatus; + } + + return Status::OK(); +} + +StatusWith<std::vector<std::string>> ShardingCatalogManager::_getDBNamesListFromShard( + OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter) { + + auto swCommandResponse = _runCommandForAddShard( + opCtx, targeter.get(), "admin", BSON("listDatabases" << 1 << "nameOnly" << true)); + if (!swCommandResponse.isOK()) { + return swCommandResponse.getStatus(); + } + + auto cmdStatus = std::move(swCommandResponse.getValue().commandStatus); + if (!cmdStatus.isOK()) { + return cmdStatus; + } + + auto cmdResult = std::move(swCommandResponse.getValue().response); + + std::vector<std::string> dbNames; + + for (const auto& dbEntry : cmdResult["databases"].Obj()) { + const auto& dbName = dbEntry["name"].String(); + + if (!(dbName == NamespaceString::kAdminDb || dbName == NamespaceString::kLocalDb || + dbName == NamespaceString::kConfigDb)) { + dbNames.push_back(dbName); + } + } + + return dbNames; +} + +StatusWith<std::string> ShardingCatalogManager::addShard( + OperationContext* opCtx, + const std::string* shardProposedName, + const ConnectionString& shardConnectionString, + const long long maxSize) { + if (shardConnectionString.type() == ConnectionString::INVALID) { + return {ErrorCodes::BadValue, "Invalid connection string"}; + } + + if (shardProposedName && shardProposedName->empty()) { + return {ErrorCodes::BadValue, "shard name cannot be empty"}; + } + + // Only one addShard operation can be in progress at a time. + Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock); + + // Check if this shard has already been added (can happen in the case of a retry after a network + // error, for example) and thus this addShard request should be considered a no-op. + auto existingShard = + _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize); + if (!existingShard.isOK()) { + return existingShard.getStatus(); + } + if (existingShard.getValue()) { + // These hosts already belong to an existing shard, so report success and terminate the + // addShard request. Make sure to set the last optime for the client to the system last + // optime so that we'll still wait for replication so that this state is visible in the + // committed snapshot. + repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx); + return existingShard.getValue()->getName(); + } + + // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a + // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the + // set with that setName from the ReplicaSetMonitorManager and will create a new + // ReplicaSetMonitor when targeting the set below. + // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike + // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a + // shard is removed. + if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) { + // If the first reload joined an existing one, call reload again to ensure the reload is + // fresh. + Grid::get(opCtx)->shardRegistry()->reload(opCtx); + } + + // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead. + const std::shared_ptr<Shard> shard{ + Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)}; + invariant(shard); + auto targeter = shard->getTargeter(); + + auto stopMonitoringGuard = MakeGuard([&] { + if (shardConnectionString.type() == ConnectionString::SET) { + // This is a workaround for the case were we could have some bad shard being + // requested to be added and we put that bad connection string on the global replica set + // monitor registry. It needs to be cleaned up so that when a correct replica set is + // added, it will be recreated. + ReplicaSetMonitor::remove(shardConnectionString.getSetName()); + } + }); + + // Validate the specified connection string may serve as shard at all + auto shardStatus = + _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString); + if (!shardStatus.isOK()) { + return shardStatus.getStatus(); + } + ShardType& shardType = shardStatus.getValue(); + + // Check that none of the existing shard candidate's dbs exist already + auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter); + if (!dbNamesStatus.isOK()) { + return dbNamesStatus.getStatus(); + } + + for (const auto& dbName : dbNamesStatus.getValue()) { + auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase( + opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern); + if (dbt.isOK()) { + const auto& dbDoc = dbt.getValue().value; + return Status(ErrorCodes::OperationFailed, + str::stream() << "can't add shard " + << "'" + << shardConnectionString.toString() + << "'" + << " because a local database '" + << dbName + << "' exists in another " + << dbDoc.getPrimary()); + } else if (dbt != ErrorCodes::NamespaceNotFound) { + return dbt.getStatus(); + } + } + + // Check that the shard candidate does not have a local config.system.sessions collection + auto res = _dropSessionsCollection(opCtx, targeter); + + if (!res.isOK()) { + return res.withContext( + "can't add shard with a local copy of config.system.sessions, please drop this " + "collection from the shard manually and try again."); + } + + // If a name for a shard wasn't provided, generate one + if (shardType.getName().empty()) { + auto result = generateNewShardName(opCtx); + if (!result.isOK()) { + return result.getStatus(); + } + shardType.setName(result.getValue()); + } + + if (maxSize > 0) { + shardType.setMaxSizeMB(maxSize); + } + + // Insert a shardIdentity document onto the shard. This also triggers sharding initialization on + // the shard. + LOG(2) << "going to insert shardIdentity document into shard: " << shardType; + auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName()); + auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest); + if (!swCommandResponse.isOK()) { + return swCommandResponse.getStatus(); + } + auto commandResponse = std::move(swCommandResponse.getValue()); + BatchedCommandResponse batchResponse; + auto batchResponseStatus = + Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse); + if (!batchResponseStatus.isOK()) { + return batchResponseStatus; + } + + // The featureCompatibilityVersion should be the same throughout the cluster. We don't + // explicitly send writeConcern majority to the added shard, because a 3.4 mongod will reject + // it (setFCV did not support writeConcern until 3.6), and a 3.6 mongod will still default to + // majority writeConcern. + // + // TODO SERVER-32045: propagate the user's writeConcern + auto versionResponse = _runCommandForAddShard( + opCtx, + targeter.get(), + "admin", + BSON(FeatureCompatibilityVersion::kCommandName << FeatureCompatibilityVersion::toString( + serverGlobalParams.featureCompatibility.getVersion()))); + if (!versionResponse.isOK()) { + return versionResponse.getStatus(); + } + + if (!versionResponse.getValue().commandStatus.isOK()) { + return versionResponse.getValue().commandStatus; + } + + log() << "going to insert new entry for shard into config.shards: " << shardType.toString(); + + Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument( + opCtx, + ShardType::ConfigNS, + shardType.toBSON(), + ShardingCatalogClient::kMajorityWriteConcern); + if (!result.isOK()) { + log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason(); + return result; + } + + // Add all databases which were discovered on the new shard + for (const auto& dbName : dbNamesStatus.getValue()) { + DatabaseType dbt(dbName, shardType.getName(), false); + Status status = Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbt); + if (!status.isOK()) { + log() << "adding shard " << shardConnectionString.toString() + << " even though could not add database " << dbName; + } + } + + // Record in changelog + BSONObjBuilder shardDetails; + shardDetails.append("name", shardType.getName()); + shardDetails.append("host", shardConnectionString.toString()); + + Grid::get(opCtx) + ->catalogClient() + ->logChange( + opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern) + .transitional_ignore(); + + // Ensure the added shard is visible to this process. + auto shardRegistry = Grid::get(opCtx)->shardRegistry(); + if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) { + return {ErrorCodes::OperationFailed, + "Could not find shard metadata for shard after adding it. This most likely " + "indicates that the shard was removed immediately after it was added."}; + } + stopMonitoringGuard.Dismiss(); + + return shardType.getName(); +} + +StatusWith<ShardDrainingStatus> ShardingCatalogManager::removeShard(OperationContext* opCtx, + const ShardId& shardId) { + // Check preconditions for removing the shard + std::string name = shardId.toString(); + auto countStatus = _runCountCommandOnConfig( + opCtx, + ShardType::ConfigNS, + BSON(ShardType::name() << NE << name << ShardType::draining(true))); + if (!countStatus.isOK()) { + return countStatus.getStatus(); + } + if (countStatus.getValue() > 0) { + return Status(ErrorCodes::ConflictingOperationInProgress, + "Can't have more than one draining shard at a time"); + } + + countStatus = + _runCountCommandOnConfig(opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name)); + if (!countStatus.isOK()) { + return countStatus.getStatus(); + } + if (countStatus.getValue() == 0) { + return Status(ErrorCodes::IllegalOperation, "Can't remove last shard"); + } + + // Figure out if shard is already draining + countStatus = _runCountCommandOnConfig( + opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name << ShardType::draining(true))); + if (!countStatus.isOK()) { + return countStatus.getStatus(); + } + + auto* const shardRegistry = Grid::get(opCtx)->shardRegistry(); + + if (countStatus.getValue() == 0) { + log() << "going to start draining shard: " << name; + + auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument( + opCtx, + ShardType::ConfigNS, + BSON(ShardType::name() << name), + BSON("$set" << BSON(ShardType::draining(true))), + false, + ShardingCatalogClient::kLocalWriteConcern); + if (!updateStatus.isOK()) { + log() << "error starting removeShard: " << name + << causedBy(redact(updateStatus.getStatus())); + return updateStatus.getStatus(); + } + + shardRegistry->reload(opCtx); + + // Record start in changelog + Grid::get(opCtx) + ->catalogClient() + ->logChange(opCtx, + "removeShard.start", + "", + BSON("shard" << name), + ShardingCatalogClient::kLocalWriteConcern) + .transitional_ignore(); + + return ShardDrainingStatus::STARTED; + } + + // Draining has already started, now figure out how many chunks and databases are still on the + // shard. + countStatus = + _runCountCommandOnConfig(opCtx, ChunkType::ConfigNS, BSON(ChunkType::shard(name))); + if (!countStatus.isOK()) { + return countStatus.getStatus(); + } + const long long chunkCount = countStatus.getValue(); + + countStatus = + _runCountCommandOnConfig(opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::primary(name))); + if (!countStatus.isOK()) { + return countStatus.getStatus(); + } + const long long databaseCount = countStatus.getValue(); + + if (chunkCount > 0 || databaseCount > 0) { + // Still more draining to do + LOG(0) << "chunkCount: " << chunkCount; + LOG(0) << "databaseCount: " << databaseCount; + return ShardDrainingStatus::ONGOING; + } + + // Draining is done, now finish removing the shard. + log() << "going to remove shard: " << name; + audit::logRemoveShard(opCtx->getClient(), name); + + Status status = Grid::get(opCtx)->catalogClient()->removeConfigDocuments( + opCtx, + ShardType::ConfigNS, + BSON(ShardType::name() << name), + ShardingCatalogClient::kLocalWriteConcern); + if (!status.isOK()) { + log() << "Error concluding removeShard operation on: " << name + << "; err: " << status.reason(); + return status; + } + + shardConnectionPool.removeHost(name); + ReplicaSetMonitor::remove(name); + + shardRegistry->reload(opCtx); + + // Record finish in changelog + Grid::get(opCtx) + ->catalogClient() + ->logChange(opCtx, + "removeShard", + "", + BSON("shard" << name), + ShardingCatalogClient::kLocalWriteConcern) + .transitional_ignore(); + + return ShardDrainingStatus::COMPLETED; +} + +void ShardingCatalogManager::appendConnectionStats(executor::ConnectionPoolStats* stats) { + _executorForAddShard->appendConnectionStats(stats); +} + +BSONObj ShardingCatalogManager::createShardIdentityUpsertForAddShard(OperationContext* opCtx, + const std::string& shardName) { + BatchedCommandRequest request([&] { + write_ops::Update updateOp(NamespaceString::kServerConfigurationNamespace); + updateOp.setUpdates( + {[&] { + write_ops::UpdateOpEntry entry; + entry.setQ(BSON("_id" + << "shardIdentity" + << ShardIdentityType::shardName(shardName) + << ShardIdentityType::clusterId( + ClusterIdentityLoader::get(opCtx)->getClusterId()))); + entry.setU(BSON("$set" << BSON(ShardIdentityType::configsvrConnString( + repl::ReplicationCoordinator::get(opCtx) + ->getConfig() + .getConnectionString() + .toString())))); + entry.setUpsert(true); + return entry; + }()}); + return updateOp; + }()); + request.setWriteConcern(ShardingCatalogClient::kMajorityWriteConcern.toBSON()); + + return request.toBSON(); +} + +// static +StatusWith<ShardId> ShardingCatalogManager::_selectShardForNewDatabase( + OperationContext* opCtx, ShardRegistry* shardRegistry) { + vector<ShardId> allShardIds; + + shardRegistry->getAllShardIds(&allShardIds); + if (allShardIds.empty()) { + shardRegistry->reload(opCtx); + shardRegistry->getAllShardIds(&allShardIds); + + if (allShardIds.empty()) { + return Status(ErrorCodes::ShardNotFound, "No shards found"); + } + } + + ShardId candidateShardId = allShardIds[0]; + + auto candidateSizeStatus = shardutil::retrieveTotalShardSize(opCtx, candidateShardId); + if (!candidateSizeStatus.isOK()) { + return candidateSizeStatus.getStatus(); + } + + for (size_t i = 1; i < allShardIds.size(); i++) { + const ShardId shardId = allShardIds[i]; + + const auto sizeStatus = shardutil::retrieveTotalShardSize(opCtx, shardId); + if (!sizeStatus.isOK()) { + return sizeStatus.getStatus(); + } + + if (sizeStatus.getValue() < candidateSizeStatus.getValue()) { + candidateSizeStatus = sizeStatus; + candidateShardId = shardId; + } + } + + return candidateShardId; +} + +StatusWith<long long> ShardingCatalogManager::_runCountCommandOnConfig(OperationContext* opCtx, + const NamespaceString& nss, + BSONObj query) { + BSONObjBuilder countBuilder; + countBuilder.append("count", nss.coll()); + countBuilder.append("query", query); + + auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + auto resultStatus = + configShard->runCommandWithFixedRetryAttempts(opCtx, + kConfigReadSelector, + nss.db().toString(), + countBuilder.done(), + Shard::kDefaultConfigCommandTimeout, + Shard::RetryPolicy::kIdempotent); + if (!resultStatus.isOK()) { + return resultStatus.getStatus(); + } + if (!resultStatus.getValue().commandStatus.isOK()) { + return resultStatus.getValue().commandStatus; + } + + auto responseObj = std::move(resultStatus.getValue().response); + + long long result; + auto status = bsonExtractIntegerField(responseObj, "n", &result); + if (!status.isOK()) { + return status; + } + + return result; +} + +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp new file mode 100644 index 00000000000..31d3a794d41 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp @@ -0,0 +1,379 @@ +/** + * Copyright (C) 2016 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects + * for all of the code used other than as permitted herein. If you modify + * file(s) with this exception, you may extend this exception to your + * version of the file(s), but you are not obligated to do so. If you do not + * wish to do so, delete this exception statement from your version. If you + * delete this exception statement from all source files in the program, + * then also delete it in the license file. + */ + +#include "mongo/platform/basic.h" + +#include "mongo/client/read_preference.h" +#include "mongo/db/namespace_string.h" +#include "mongo/db/s/config/sharding_catalog_manager.h" +#include "mongo/s/catalog/type_chunk.h" +#include "mongo/s/config_server_test_fixture.h" + +namespace mongo { +namespace { + +using SplitChunkTest = ConfigServerTestFixture; + +const NamespaceString kNamespace("TestDB", "TestColl"); + +TEST_F(SplitChunkTest, SplitExistingChunkCorrectlyShouldSucceed) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + auto chunkSplitPoint = BSON("a" << 5); + std::vector<BSONObj> splitPoints{chunkSplitPoint}; + + setupChunks({chunk}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000")); + + // First chunkDoc should have range [chunkMin, chunkSplitPoint] + auto chunkDocStatus = getChunkDoc(operationContext(), chunkMin); + ASSERT_OK(chunkDocStatus.getStatus()); + + auto chunkDoc = chunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkSplitPoint, chunkDoc.getMax()); + + // Check for increment on first chunkDoc's minor version + ASSERT_EQ(origVersion.majorVersion(), chunkDoc.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 1, chunkDoc.getVersion().minorVersion()); + + // Second chunkDoc should have range [chunkSplitPoint, chunkMax] + auto otherChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint); + ASSERT_OK(otherChunkDocStatus.getStatus()); + + auto otherChunkDoc = otherChunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkMax, otherChunkDoc.getMax()); + + // Check for increment on second chunkDoc's minor version + ASSERT_EQ(origVersion.majorVersion(), otherChunkDoc.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 2, otherChunkDoc.getVersion().minorVersion()); +} + +TEST_F(SplitChunkTest, MultipleSplitsOnExistingChunkShouldSucceed) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + auto chunkSplitPoint = BSON("a" << 5); + auto chunkSplitPoint2 = BSON("a" << 7); + std::vector<BSONObj> splitPoints{chunkSplitPoint, chunkSplitPoint2}; + + setupChunks({chunk}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000")); + + // First chunkDoc should have range [chunkMin, chunkSplitPoint] + auto chunkDocStatus = getChunkDoc(operationContext(), chunkMin); + ASSERT_OK(chunkDocStatus.getStatus()); + + auto chunkDoc = chunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkSplitPoint, chunkDoc.getMax()); + + // Check for increment on first chunkDoc's minor version + ASSERT_EQ(origVersion.majorVersion(), chunkDoc.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 1, chunkDoc.getVersion().minorVersion()); + + // Second chunkDoc should have range [chunkSplitPoint, chunkSplitPoint2] + auto midChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint); + ASSERT_OK(midChunkDocStatus.getStatus()); + + auto midChunkDoc = midChunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkSplitPoint2, midChunkDoc.getMax()); + + // Check for increment on second chunkDoc's minor version + ASSERT_EQ(origVersion.majorVersion(), midChunkDoc.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 2, midChunkDoc.getVersion().minorVersion()); + + // Third chunkDoc should have range [chunkSplitPoint2, chunkMax] + auto lastChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint2); + ASSERT_OK(lastChunkDocStatus.getStatus()); + + auto lastChunkDoc = lastChunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkMax, lastChunkDoc.getMax()); + + // Check for increment on third chunkDoc's minor version + ASSERT_EQ(origVersion.majorVersion(), lastChunkDoc.getVersion().majorVersion()); + ASSERT_EQ(origVersion.minorVersion() + 3, lastChunkDoc.getVersion().minorVersion()); +} + +TEST_F(SplitChunkTest, NewSplitShouldClaimHighestVersion) { + ChunkType chunk, chunk2; + chunk.setNS(kNamespace); + chunk2.setNS(kNamespace); + auto collEpoch = OID::gen(); + + // set up first chunk + auto origVersion = ChunkVersion(1, 2, collEpoch); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints; + auto chunkSplitPoint = BSON("a" << 5); + splitPoints.push_back(chunkSplitPoint); + + // set up second chunk (chunk2) + auto competingVersion = ChunkVersion(2, 1, collEpoch); + chunk2.setVersion(competingVersion); + chunk2.setShard(ShardId("shard0000")); + chunk2.setMin(BSON("a" << 10)); + chunk2.setMax(BSON("a" << 20)); + + setupChunks({chunk, chunk2}).transitional_ignore(); + + ASSERT_OK(ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + collEpoch, + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000")); + + // First chunkDoc should have range [chunkMin, chunkSplitPoint] + auto chunkDocStatus = getChunkDoc(operationContext(), chunkMin); + ASSERT_OK(chunkDocStatus.getStatus()); + + auto chunkDoc = chunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkSplitPoint, chunkDoc.getMax()); + + // Check for increment based on the competing chunk version + ASSERT_EQ(competingVersion.majorVersion(), chunkDoc.getVersion().majorVersion()); + ASSERT_EQ(competingVersion.minorVersion() + 1, chunkDoc.getVersion().minorVersion()); + + // Second chunkDoc should have range [chunkSplitPoint, chunkMax] + auto otherChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint); + ASSERT_OK(otherChunkDocStatus.getStatus()); + + auto otherChunkDoc = otherChunkDocStatus.getValue(); + ASSERT_BSONOBJ_EQ(chunkMax, otherChunkDoc.getMax()); + + // Check for increment based on the competing chunk version + ASSERT_EQ(competingVersion.majorVersion(), otherChunkDoc.getVersion().majorVersion()); + ASSERT_EQ(competingVersion.minorVersion() + 2, otherChunkDoc.getVersion().minorVersion()); +} + +TEST_F(SplitChunkTest, PreConditionFailErrors) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints; + auto chunkSplitPoint = BSON("a" << 5); + splitPoints.push_back(chunkSplitPoint); + + setupChunks({chunk}).transitional_ignore(); + + auto splitStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + ChunkRange(chunkMin, BSON("a" << 7)), + splitPoints, + "shard0000"); + ASSERT_EQ(ErrorCodes::BadValue, splitStatus); +} + +TEST_F(SplitChunkTest, NonExisingNamespaceErrors) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints{BSON("a" << 5)}; + + setupChunks({chunk}).transitional_ignore(); + + auto splitStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.NonExistingColl"), + origVersion.epoch(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000"); + ASSERT_EQ(ErrorCodes::IllegalOperation, splitStatus); +} + +TEST_F(SplitChunkTest, NonMatchingEpochsOfChunkAndRequestErrors) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints{BSON("a" << 5)}; + + setupChunks({chunk}).transitional_ignore(); + + auto splitStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + OID::gen(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000"); + ASSERT_EQ(ErrorCodes::StaleEpoch, splitStatus); +} + +TEST_F(SplitChunkTest, SplitPointsOutOfOrderShouldFail) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints{BSON("a" << 5), BSON("a" << 4)}; + + setupChunks({chunk}).transitional_ignore(); + + auto splitStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000"); + ASSERT_EQ(ErrorCodes::InvalidOptions, splitStatus); +} + +TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMinShouldFail) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints{BSON("a" << 0), BSON("a" << 5)}; + + setupChunks({chunk}).transitional_ignore(); + + auto splitStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000"); + ASSERT_EQ(ErrorCodes::InvalidOptions, splitStatus); +} + +TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMaxShouldFail) { + ChunkType chunk; + chunk.setNS(kNamespace); + + auto origVersion = ChunkVersion(1, 0, OID::gen()); + chunk.setVersion(origVersion); + chunk.setShard(ShardId("shard0000")); + + auto chunkMin = BSON("a" << 1); + auto chunkMax = BSON("a" << 10); + chunk.setMin(chunkMin); + chunk.setMax(chunkMax); + + std::vector<BSONObj> splitPoints{BSON("a" << 5), BSON("a" << 15)}; + + setupChunks({chunk}).transitional_ignore(); + + auto splitStatus = ShardingCatalogManager::get(operationContext()) + ->commitChunkSplit(operationContext(), + NamespaceString("TestDB.TestColl"), + origVersion.epoch(), + ChunkRange(chunkMin, chunkMax), + splitPoints, + "shard0000"); + ASSERT_EQ(ErrorCodes::InvalidOptions, splitStatus); +} + +} // namespace +} // namespace mongo diff --git a/src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp new file mode 100644 index 00000000000..37d783a9a02 --- /dev/null +++ b/src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp @@ -0,0 +1,397 @@ +/** + * Copyright (C) 2017 MongoDB Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + * + * As a special exception, the copyright holders give permission to link the + * code of portions of this program with the OpenSSL library under certain + * conditions as described in each individual source file and distribute + * linked combinations including the program with the OpenSSL library. You + * must comply with the GNU Affero General Public License in all respects for + * all of the code used other than as permitted herein. If you modify file(s) + * with this exception, you may extend this exception to your version of the + * file(s), but you are not obligated to do so. If you do not wish to do so, + * delete this exception statement from your version. If you delete this + * exception statement from all source files in the program, then also delete + * it in the license file. + */ + +#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding + +#include "mongo/platform/basic.h" + +#include "mongo/db/s/config/sharding_catalog_manager.h" + +#include "mongo/base/status_with.h" +#include "mongo/client/read_preference.h" +#include "mongo/db/operation_context.h" +#include "mongo/db/s/balancer/balancer_policy.h" +#include "mongo/db/write_concern_options.h" +#include "mongo/s/catalog/sharding_catalog_client.h" +#include "mongo/s/catalog/type_collection.h" +#include "mongo/s/catalog/type_shard.h" +#include "mongo/s/catalog/type_tags.h" +#include "mongo/s/client/shard.h" +#include "mongo/s/client/shard_registry.h" +#include "mongo/s/grid.h" +#include "mongo/s/shard_key_pattern.h" +#include "mongo/util/log.h" + +namespace mongo { +namespace { + +const ReadPreferenceSetting kConfigPrimarySelector(ReadPreference::PrimaryOnly); +const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0)); + +/** + * Checks if the given key range for the given namespace conflicts with an existing key range. + * Note: range should have the full shard key. + * Returns ErrorCodes::RangeOverlapConflict is an overlap is detected. + */ +Status checkForOveralappedZonedKeyRange(OperationContext* opCtx, + Shard* configServer, + const NamespaceString& nss, + const ChunkRange& range, + const std::string& zoneName, + const KeyPattern& shardKeyPattern) { + DistributionStatus chunkDist(nss, ShardToChunksMap{}); + + auto tagStatus = configServer->exhaustiveFindOnConfig(opCtx, + kConfigPrimarySelector, + repl::ReadConcernLevel::kLocalReadConcern, + TagsType::ConfigNS, + BSON(TagsType::ns(nss.ns())), + BSONObj(), + 0); + if (!tagStatus.isOK()) { + return tagStatus.getStatus(); + } + + const auto& tagDocList = tagStatus.getValue().docs; + for (const auto& tagDoc : tagDocList) { + auto tagParseStatus = TagsType::fromBSON(tagDoc); + if (!tagParseStatus.isOK()) { + return tagParseStatus.getStatus(); + } + + // Always extend ranges to full shard key to be compatible with tags created before + // the zone commands were implemented. + const auto& parsedTagDoc = tagParseStatus.getValue(); + auto overlapStatus = chunkDist.addRangeToZone( + ZoneRange(shardKeyPattern.extendRangeBound(parsedTagDoc.getMinKey(), false), + shardKeyPattern.extendRangeBound(parsedTagDoc.getMaxKey(), false), + parsedTagDoc.getTag())); + if (!overlapStatus.isOK()) { + return overlapStatus; + } + } + + auto overlapStatus = + chunkDist.addRangeToZone(ZoneRange(range.getMin(), range.getMax(), zoneName)); + if (!overlapStatus.isOK()) { + return overlapStatus; + } + + return Status::OK(); +} + +/** + * Returns a new range based on the given range with the full shard key. + * Returns: + * - ErrorCodes::NamespaceNotSharded if nss is not sharded. + * - ErrorCodes::ShardKeyNotFound if range is not compatible (for example, not a prefix of shard + * key) with the shard key of nss. + */ +StatusWith<ChunkRange> includeFullShardKey(OperationContext* opCtx, + Shard* configServer, + const NamespaceString& nss, + const ChunkRange& range, + KeyPattern* shardKeyPatternOut) { + auto findCollStatus = + configServer->exhaustiveFindOnConfig(opCtx, + kConfigPrimarySelector, + repl::ReadConcernLevel::kLocalReadConcern, + CollectionType::ConfigNS, + BSON(CollectionType::fullNs(nss.ns())), + BSONObj(), + 1); + + if (!findCollStatus.isOK()) { + return findCollStatus.getStatus(); + } + + const auto& findCollResult = findCollStatus.getValue().docs; + + if (findCollResult.size() < 1) { + return {ErrorCodes::NamespaceNotSharded, str::stream() << nss.ns() << " is not sharded"}; + } + + auto parseStatus = CollectionType::fromBSON(findCollResult.front()); + if (!parseStatus.isOK()) { + return parseStatus.getStatus(); + } + + auto collDoc = parseStatus.getValue(); + if (collDoc.getDropped()) { + return {ErrorCodes::NamespaceNotSharded, str::stream() << nss.ns() << " is not sharded"}; + } + + const auto& shardKeyPattern = collDoc.getKeyPattern(); + const auto& shardKeyBSON = shardKeyPattern.toBSON(); + *shardKeyPatternOut = shardKeyPattern; + + if (!range.getMin().isFieldNamePrefixOf(shardKeyBSON)) { + return {ErrorCodes::ShardKeyNotFound, + str::stream() << "min: " << range.getMin() << " is not a prefix of the shard key " + << shardKeyBSON + << " of ns: " + << nss.ns()}; + } + + if (!range.getMax().isFieldNamePrefixOf(shardKeyBSON)) { + return {ErrorCodes::ShardKeyNotFound, + str::stream() << "max: " << range.getMax() << " is not a prefix of the shard key " + << shardKeyBSON + << " of ns: " + << nss.ns()}; + } + + return ChunkRange(shardKeyPattern.extendRangeBound(range.getMin(), false), + shardKeyPattern.extendRangeBound(range.getMax(), false)); +} + +} // namespace + +Status ShardingCatalogManager::addShardToZone(OperationContext* opCtx, + const std::string& shardName, + const std::string& zoneName) { + Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock); + + auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument( + opCtx, + ShardType::ConfigNS, + BSON(ShardType::name(shardName)), + BSON("$addToSet" << BSON(ShardType::tags() << zoneName)), + false, + kNoWaitWriteConcern); + + if (!updateStatus.isOK()) { + return updateStatus.getStatus(); + } + + if (!updateStatus.getValue()) { + return {ErrorCodes::ShardNotFound, + str::stream() << "shard " << shardName << " does not exist"}; + } + + return Status::OK(); +} + +Status ShardingCatalogManager::removeShardFromZone(OperationContext* opCtx, + const std::string& shardName, + const std::string& zoneName) { + Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock); + + auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + const NamespaceString shardNS(ShardType::ConfigNS); + + // + // Check whether the shard even exist in the first place. + // + + auto findShardExistsStatus = + configShard->exhaustiveFindOnConfig(opCtx, + kConfigPrimarySelector, + repl::ReadConcernLevel::kLocalReadConcern, + shardNS, + BSON(ShardType::name() << shardName), + BSONObj(), + 1); + + if (!findShardExistsStatus.isOK()) { + return findShardExistsStatus.getStatus(); + } + + if (findShardExistsStatus.getValue().docs.size() == 0) { + return {ErrorCodes::ShardNotFound, + str::stream() << "shard " << shardName << " does not exist"}; + } + + // + // Check how many shards belongs to this zone. + // + + auto findShardStatus = + configShard->exhaustiveFindOnConfig(opCtx, + kConfigPrimarySelector, + repl::ReadConcernLevel::kLocalReadConcern, + shardNS, + BSON(ShardType::tags() << zoneName), + BSONObj(), + 2); + + if (!findShardStatus.isOK()) { + return findShardStatus.getStatus(); + } + + const auto shardDocs = findShardStatus.getValue().docs; + + if (shardDocs.size() == 0) { + // The zone doesn't exists, this could be a retry. + return Status::OK(); + } + + if (shardDocs.size() == 1) { + auto shardDocStatus = ShardType::fromBSON(shardDocs.front()); + if (!shardDocStatus.isOK()) { + return shardDocStatus.getStatus(); + } + + auto shardDoc = shardDocStatus.getValue(); + if (shardDoc.getName() != shardName) { + // The last shard that belongs to this zone is a different shard. + // This could be a retry, so return OK. + return Status::OK(); + } + + auto findChunkRangeStatus = + configShard->exhaustiveFindOnConfig(opCtx, + kConfigPrimarySelector, + repl::ReadConcernLevel::kLocalReadConcern, + TagsType::ConfigNS, + BSON(TagsType::tag() << zoneName), + BSONObj(), + 1); + + if (!findChunkRangeStatus.isOK()) { + return findChunkRangeStatus.getStatus(); + } + + if (findChunkRangeStatus.getValue().docs.size() > 0) { + return {ErrorCodes::ZoneStillInUse, + "cannot remove a shard from zone if a chunk range is associated with it"}; + } + } + + // + // Perform update. + // + + auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument( + opCtx, + ShardType::ConfigNS, + BSON(ShardType::name(shardName)), + BSON("$pull" << BSON(ShardType::tags() << zoneName)), + false, + kNoWaitWriteConcern); + + if (!updateStatus.isOK()) { + return updateStatus.getStatus(); + } + + // The update did not match a document, another thread could have removed it. + if (!updateStatus.getValue()) { + return {ErrorCodes::ShardNotFound, + str::stream() << "shard " << shardName << " no longer exist"}; + } + + return Status::OK(); +} + + +Status ShardingCatalogManager::assignKeyRangeToZone(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkRange& givenRange, + const std::string& zoneName) { + Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock); + + auto configServer = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + + KeyPattern shardKeyPattern{BSONObj()}; + auto fullShardKeyStatus = + includeFullShardKey(opCtx, configServer.get(), nss, givenRange, &shardKeyPattern); + if (!fullShardKeyStatus.isOK()) { + return fullShardKeyStatus.getStatus(); + } + + const auto& fullShardKeyRange = fullShardKeyStatus.getValue(); + + auto zoneExistStatus = + configServer->exhaustiveFindOnConfig(opCtx, + kConfigPrimarySelector, + repl::ReadConcernLevel::kLocalReadConcern, + ShardType::ConfigNS, + BSON(ShardType::tags() << zoneName), + BSONObj(), + 1); + + if (!zoneExistStatus.isOK()) { + return zoneExistStatus.getStatus(); + } + + auto zoneExist = zoneExistStatus.getValue().docs.size() > 0; + if (!zoneExist) { + return {ErrorCodes::ZoneNotFound, + str::stream() << "zone " << zoneName << " does not exist"}; + } + + auto overlapStatus = checkForOveralappedZonedKeyRange( + opCtx, configServer.get(), nss, fullShardKeyRange, zoneName, shardKeyPattern); + if (!overlapStatus.isOK()) { + return overlapStatus; + } + + BSONObj updateQuery( + BSON("_id" << BSON(TagsType::ns(nss.ns()) << TagsType::min(fullShardKeyRange.getMin())))); + + BSONObjBuilder updateBuilder; + updateBuilder.append("_id", + BSON(TagsType::ns(nss.ns()) << TagsType::min(fullShardKeyRange.getMin()))); + updateBuilder.append(TagsType::ns(), nss.ns()); + updateBuilder.append(TagsType::min(), fullShardKeyRange.getMin()); + updateBuilder.append(TagsType::max(), fullShardKeyRange.getMax()); + updateBuilder.append(TagsType::tag(), zoneName); + + auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument( + opCtx, TagsType::ConfigNS, updateQuery, updateBuilder.obj(), true, kNoWaitWriteConcern); + + if (!updateStatus.isOK()) { + return updateStatus.getStatus(); + } + + return Status::OK(); +} + +Status ShardingCatalogManager::removeKeyRangeFromZone(OperationContext* opCtx, + const NamespaceString& nss, + const ChunkRange& range) { + Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock); + + auto configServer = Grid::get(opCtx)->shardRegistry()->getConfigShard(); + + KeyPattern shardKeyPattern{BSONObj()}; + auto fullShardKeyStatus = + includeFullShardKey(opCtx, configServer.get(), nss, range, &shardKeyPattern); + if (!fullShardKeyStatus.isOK()) { + return fullShardKeyStatus.getStatus(); + } + + BSONObjBuilder removeBuilder; + removeBuilder.append("_id", BSON(TagsType::ns(nss.ns()) << TagsType::min(range.getMin()))); + removeBuilder.append(TagsType::max(), range.getMax()); + + return Grid::get(opCtx)->catalogClient()->removeConfigDocuments( + opCtx, TagsType::ConfigNS, removeBuilder.obj(), kNoWaitWriteConcern); +} + +} // namespace mongo diff --git a/src/mongo/db/s/sharding_initialization_mongod.cpp b/src/mongo/db/s/sharding_initialization_mongod.cpp index bccdebf8f21..6f93f3498d6 100644 --- a/src/mongo/db/s/sharding_initialization_mongod.cpp +++ b/src/mongo/db/s/sharding_initialization_mongod.cpp @@ -45,7 +45,6 @@ #include "mongo/db/server_options.h" #include "mongo/executor/task_executor.h" #include "mongo/rpc/metadata/egress_metadata_hook_list.h" -#include "mongo/s/catalog/sharding_catalog_manager.h" #include "mongo/s/catalog_cache.h" #include "mongo/s/catalog_cache_loader.h" #include "mongo/s/client/shard_factory.h" |