summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2018-02-03 09:42:54 -0500
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2018-02-06 07:33:21 -0500
commit0851ee0434ba5352561a204f368a062d660c8882 (patch)
tree1b16836f9db4bc134df768a4f4a1e23a0c8f0b30 /src/mongo/db
parentd7c127cb2c98eab7ca9ea0ef8405126e675ed5d7 (diff)
downloadmongo-0851ee0434ba5352561a204f368a062d660c8882.tar.gz
SERVER-29908 Move sharding_catalog_manager and tests under db/s/config
Also get rid of sharding_catalog_test_fixture since it doesn't provide much value.
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/commands/SConscript2
-rw-r--r--src/mongo/db/commands/set_feature_compatibility_version_command.cpp2
-rw-r--r--src/mongo/db/db.cpp2
-rw-r--r--src/mongo/db/repl/SConscript2
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp2
-rw-r--r--src/mongo/db/s/SConscript52
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_add_shard_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_create_database_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_drop_collection_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_drop_database_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_enable_sharding_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_merge_chunk_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_move_primary_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_remove_shard_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_shard_collection_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_split_chunk_command.cpp2
-rw-r--r--src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp2
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager.cpp328
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager.h508
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp1311
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp119
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp725
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp669
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp605
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp373
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp386
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp195
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp166
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp477
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp168
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp474
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp260
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp325
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp445
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp956
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp379
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp397
-rw-r--r--src/mongo/db/s/sharding_initialization_mongod.cpp1
42 files changed, 9334 insertions, 25 deletions
diff --git a/src/mongo/db/commands/SConscript b/src/mongo/db/commands/SConscript
index 378e475dc44..7d889b66c5c 100644
--- a/src/mongo/db/commands/SConscript
+++ b/src/mongo/db/commands/SConscript
@@ -226,11 +226,11 @@ env.Library(
'$BUILD_DIR/mongo/db/repl/oplog',
'$BUILD_DIR/mongo/db/repl/repl_coordinator_interface',
'$BUILD_DIR/mongo/db/rw_concern_d',
+ '$BUILD_DIR/mongo/db/s/sharding_catalog_manager',
'$BUILD_DIR/mongo/db/server_options_core',
'$BUILD_DIR/mongo/db/stats/serveronly_stats',
'$BUILD_DIR/mongo/db/storage/mmap_v1/storage_mmapv1',
'$BUILD_DIR/mongo/db/views/views_mongod',
- '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager',
'$BUILD_DIR/mongo/s/client/parallel',
'core',
'current_op_common',
diff --git a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
index b4f58018eaa..ba82f10cc54 100644
--- a/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
+++ b/src/mongo/db/commands/set_feature_compatibility_version_command.cpp
@@ -40,10 +40,10 @@
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/replication_coordinator.h"
#include "mongo/db/repl/replication_coordinator_global.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/server_options.h"
#include "mongo/rpc/get_status_from_command_result.h"
#include "mongo/s/catalog/sharding_catalog_client_impl.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/client/shard_registry.h"
#include "mongo/util/exit.h"
#include "mongo/util/fail_point_service.h"
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index 19ffd88d700..19aef3e230c 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -106,6 +106,7 @@
#include "mongo/db/repl/storage_interface_impl.h"
#include "mongo/db/repl/topology_coordinator.h"
#include "mongo/db/s/balancer/balancer.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/sharded_connection_info.h"
#include "mongo/db/s/sharding_initialization_mongod.h"
#include "mongo/db/s/sharding_state.h"
@@ -132,7 +133,6 @@
#include "mongo/executor/thread_pool_task_executor.h"
#include "mongo/platform/process_id.h"
#include "mongo/rpc/metadata/egress_metadata_hook_list.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/client/shard_registry.h"
#include "mongo/s/grid.h"
#include "mongo/s/sharding_initialization.h"
diff --git a/src/mongo/db/repl/SConscript b/src/mongo/db/repl/SConscript
index 8e512adcdcf..ddcc2ab2c5b 100644
--- a/src/mongo/db/repl/SConscript
+++ b/src/mongo/db/repl/SConscript
@@ -1565,10 +1565,10 @@ env.Library(
'$BUILD_DIR/mongo/db/repair_database',
'$BUILD_DIR/mongo/db/repl/oplog_buffer_proxy',
'$BUILD_DIR/mongo/db/s/balancer',
+ '$BUILD_DIR/mongo/db/s/sharding_catalog_manager',
'$BUILD_DIR/mongo/db/service_context',
'$BUILD_DIR/mongo/db/stats/counters',
'$BUILD_DIR/mongo/rpc/client_metadata',
- '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager',
'bgsync',
'drop_pending_collection_reaper',
'oplog_buffer_collection',
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 3a6819534b5..f2a1e8863eb 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -70,6 +70,7 @@
#include "mongo/db/repl/rs_sync.h"
#include "mongo/db/repl/storage_interface.h"
#include "mongo/db/s/balancer/balancer.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/s/sharding_state_recovery.h"
#include "mongo/db/server_options.h"
@@ -82,7 +83,6 @@
#include "mongo/executor/network_interface_factory.h"
#include "mongo/executor/thread_pool_task_executor.h"
#include "mongo/rpc/metadata/egress_metadata_hook_list.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_shard.h"
#include "mongo/s/catalog_cache_loader.h"
#include "mongo/s/client/shard_registry.h"
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index fd53ece397b..b41ea1be6a7 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -88,7 +88,6 @@ env.Library(
'$BUILD_DIR/mongo/db/commands/server_status',
'$BUILD_DIR/mongo/db/common',
'$BUILD_DIR/mongo/db/repl/repl_coordinator_global',
- '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager',
'$BUILD_DIR/mongo/s/client/shard_local',
'$BUILD_DIR/mongo/s/coreshard',
'$BUILD_DIR/mongo/s/is_mongos',
@@ -147,6 +146,28 @@ env.CppUnitTest(
)
env.Library(
+ target='sharding_catalog_manager',
+ source=[
+ 'config/sharding_catalog_manager_chunk_operations.cpp',
+ 'config/sharding_catalog_manager_collection_operations.cpp',
+ 'config/sharding_catalog_manager.cpp',
+ 'config/sharding_catalog_manager_database_operations.cpp',
+ 'config/sharding_catalog_manager_shard_operations.cpp',
+ 'config/sharding_catalog_manager_zone_operations.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/db/catalog/catalog_raii',
+ '$BUILD_DIR/mongo/db/commands/dcommands_fcv',
+ '$BUILD_DIR/mongo/db/repl/read_concern_args',
+ '$BUILD_DIR/mongo/db/s/balancer',
+ '$BUILD_DIR/mongo/executor/network_interface',
+ '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client',
+ '$BUILD_DIR/mongo/s/client/sharding_client',
+ '$BUILD_DIR/mongo/s/coreshard',
+ ],
+)
+
+env.Library(
target='commands_db_s',
source=[
'check_sharding_index_command.cpp',
@@ -189,11 +210,11 @@ env.Library(
'$BUILD_DIR/mongo/db/index_d',
'$BUILD_DIR/mongo/db/repl/repl_coordinator_global',
'$BUILD_DIR/mongo/db/rw_concern_d',
- '$BUILD_DIR/mongo/s/catalog/sharding_catalog_manager',
'$BUILD_DIR/mongo/s/commands/shared_cluster_commands',
'balancer',
'collection_metadata',
'sharding',
+ 'sharding_catalog_manager',
],
)
@@ -232,7 +253,7 @@ env.CppUnitTest(
'$BUILD_DIR/mongo/db/query/query_request',
'$BUILD_DIR/mongo/s/catalog/dist_lock_manager_mock',
'$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_impl',
- '$BUILD_DIR/mongo/s/catalog/sharding_catalog_mock',
+ '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_mock',
'$BUILD_DIR/mongo/s/shard_server_test_fixture',
],
)
@@ -296,8 +317,31 @@ env.CppUnitTest(
],
LIBDEPS=[
'$BUILD_DIR/mongo/db/ops/write_ops_exec',
- '$BUILD_DIR/mongo/s/catalog/sharding_catalog_mock',
+ '$BUILD_DIR/mongo/s/catalog/sharding_catalog_client_mock',
'$BUILD_DIR/mongo/s/shard_server_test_fixture',
'sharding',
]
)
+
+env.CppUnitTest(
+ target='sharding_catalog_manager_test',
+ source=[
+ 'config/sharding_catalog_manager_add_shard_test.cpp',
+ 'config/sharding_catalog_manager_add_shard_to_zone_test.cpp',
+ 'config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp',
+ 'config/sharding_catalog_manager_commit_chunk_migration_test.cpp',
+ 'config/sharding_catalog_manager_config_initialization_test.cpp',
+ 'config/sharding_catalog_manager_create_database_test.cpp',
+ 'config/sharding_catalog_manager_drop_coll_test.cpp',
+ 'config/sharding_catalog_manager_enable_sharding_test.cpp',
+ 'config/sharding_catalog_manager_merge_chunks_test.cpp',
+ 'config/sharding_catalog_manager_remove_shard_from_zone_test.cpp',
+ 'config/sharding_catalog_manager_remove_shard_test.cpp',
+ 'config/sharding_catalog_manager_shard_collection_test.cpp',
+ 'config/sharding_catalog_manager_split_chunk_test.cpp',
+ ],
+ LIBDEPS=[
+ '$BUILD_DIR/mongo/s/config_server_test_fixture',
+ '$BUILD_DIR/mongo/util/version_impl',
+ ]
+)
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index 78e9c8b4d55..07afbe69236 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -38,6 +38,7 @@
#include "mongo/db/client.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/migration_chunk_cloner_source.h"
#include "mongo/db/s/migration_source_manager.h"
#include "mongo/db/s/operation_sharding_state.h"
@@ -49,7 +50,6 @@
#include "mongo/db/server_parameters.h"
#include "mongo/db/service_context.h"
#include "mongo/s/balancer_configuration.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_config_version.h"
#include "mongo/s/catalog/type_shard.h"
#include "mongo/s/catalog/type_shard_collection.h"
diff --git a/src/mongo/db/s/config/configsvr_add_shard_command.cpp b/src/mongo/db/s/config/configsvr_add_shard_command.cpp
index 834aa73ca74..2e587bef4d3 100644
--- a/src/mongo/db/s/config/configsvr_add_shard_command.cpp
+++ b/src/mongo/db/s/config/configsvr_add_shard_command.cpp
@@ -39,7 +39,7 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/repl/repl_set_config.h"
#include "mongo/db/repl/replication_coordinator.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_shard.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/add_shard_request_type.h"
diff --git a/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp b/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp
index 00c1af987cf..2f280dffc00 100644
--- a/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp
+++ b/src/mongo/db/s/config/configsvr_add_shard_to_zone_command.cpp
@@ -35,7 +35,7 @@
#include "mongo/db/auth/privilege.h"
#include "mongo/db/commands.h"
#include "mongo/db/namespace_string.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/add_shard_to_zone_request_type.h"
#include "mongo/util/log.h"
diff --git a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
index 4e9b80d82ea..0dd43e61c74 100644
--- a/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
+++ b/src/mongo/db/s/config/configsvr_commit_chunk_migration_command.cpp
@@ -37,9 +37,9 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/s/chunk_move_write_concern_options.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/rpc/get_status_from_command_result.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_chunk.h"
#include "mongo/s/catalog/type_locks.h"
#include "mongo/s/chunk_version.h"
diff --git a/src/mongo/db/s/config/configsvr_create_database_command.cpp b/src/mongo/db/s/config/configsvr_create_database_command.cpp
index 797f76f5720..47740c174be 100644
--- a/src/mongo/db/s/config/configsvr_create_database_command.cpp
+++ b/src/mongo/db/s/config/configsvr_create_database_command.cpp
@@ -39,7 +39,7 @@
#include "mongo/db/client.h"
#include "mongo/db/commands.h"
#include "mongo/db/operation_context.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/grid.h"
diff --git a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
index bffad346dc6..e923f753c7a 100644
--- a/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_drop_collection_command.cpp
@@ -34,8 +34,8 @@
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/read_concern_args.h"
#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/dist_lock_manager.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/client/shard_registry.h"
diff --git a/src/mongo/db/s/config/configsvr_drop_database_command.cpp b/src/mongo/db/s/config/configsvr_drop_database_command.cpp
index 5a39eee3174..b85a2eab87e 100644
--- a/src/mongo/db/s/config/configsvr_drop_database_command.cpp
+++ b/src/mongo/db/s/config/configsvr_drop_database_command.cpp
@@ -33,8 +33,8 @@
#include "mongo/db/commands.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/dist_lock_manager.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/grid.h"
diff --git a/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp b/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp
index 9bdbd5c5669..50cbe6d15ea 100644
--- a/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp
+++ b/src/mongo/db/s/config/configsvr_enable_sharding_command.cpp
@@ -39,7 +39,7 @@
#include "mongo/db/client.h"
#include "mongo/db/commands.h"
#include "mongo/db/operation_context.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/grid.h"
diff --git a/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp b/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp
index 37c32c6781a..ed49bba1b9d 100644
--- a/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp
+++ b/src/mongo/db/s/config/configsvr_merge_chunk_command.cpp
@@ -36,7 +36,7 @@
#include "mongo/db/commands.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/merge_chunk_request_type.h"
#include "mongo/util/log.h"
diff --git a/src/mongo/db/s/config/configsvr_move_primary_command.cpp b/src/mongo/db/s/config/configsvr_move_primary_command.cpp
index ad02277cc69..4bdeb30ad05 100644
--- a/src/mongo/db/s/config/configsvr_move_primary_command.cpp
+++ b/src/mongo/db/s/config/configsvr_move_primary_command.cpp
@@ -40,7 +40,7 @@
#include "mongo/db/commands.h"
#include "mongo/db/operation_context.h"
#include "mongo/db/repl/repl_client_info.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/client/shard_registry.h"
diff --git a/src/mongo/db/s/config/configsvr_remove_shard_command.cpp b/src/mongo/db/s/config/configsvr_remove_shard_command.cpp
index d55e81b4ba9..3ef3d0c6995 100644
--- a/src/mongo/db/s/config/configsvr_remove_shard_command.cpp
+++ b/src/mongo/db/s/config/configsvr_remove_shard_command.cpp
@@ -39,7 +39,7 @@
#include "mongo/db/client.h"
#include "mongo/db/commands.h"
#include "mongo/db/operation_context.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/client/shard_registry.h"
diff --git a/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp b/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp
index 9aa7f9285e2..02c5473173f 100644
--- a/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp
+++ b/src/mongo/db/s/config/configsvr_remove_shard_from_zone_command.cpp
@@ -35,7 +35,7 @@
#include "mongo/db/auth/privilege.h"
#include "mongo/db/commands.h"
#include "mongo/db/namespace_string.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/remove_shard_from_zone_request_type.h"
#include "mongo/util/log.h"
diff --git a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
index d221a719bb8..5a2053719de 100644
--- a/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
+++ b/src/mongo/db/s/config/configsvr_shard_collection_command.cpp
@@ -46,9 +46,9 @@
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/repl/repl_set_config.h"
#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/db/sessions_collection.h"
#include "mongo/s/balancer_configuration.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog/type_shard.h"
#include "mongo/s/catalog_cache.h"
diff --git a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp
index 4d66c7ffa8b..474cf1b5e92 100644
--- a/src/mongo/db/s/config/configsvr_split_chunk_command.cpp
+++ b/src/mongo/db/s/config/configsvr_split_chunk_command.cpp
@@ -36,7 +36,7 @@
#include "mongo/db/commands.h"
#include "mongo/db/namespace_string.h"
#include "mongo/db/operation_context.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/split_chunk_request_type.h"
#include "mongo/util/log.h"
diff --git a/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp b/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp
index 1742699b038..afd3ebdf2d2 100644
--- a/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp
+++ b/src/mongo/db/s/config/configsvr_update_zone_key_range_command.cpp
@@ -35,7 +35,7 @@
#include "mongo/db/auth/privilege.h"
#include "mongo/db/commands.h"
#include "mongo/db/namespace_string.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
#include "mongo/s/grid.h"
#include "mongo/s/request_types/update_zone_key_range_request_type.h"
#include "mongo/util/log.h"
diff --git a/src/mongo/db/s/config/sharding_catalog_manager.cpp b/src/mongo/db/s/config/sharding_catalog_manager.cpp
new file mode 100644
index 00000000000..136fce6a29c
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager.cpp
@@ -0,0 +1,328 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+
+#include "mongo/db/commands/feature_compatibility_version.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/s/balancer/type_migration.h"
+#include "mongo/s/catalog/config_server_version.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_collection.h"
+#include "mongo/s/catalog/type_config_version.h"
+#include "mongo/s/catalog/type_lockpings.h"
+#include "mongo/s/catalog/type_locks.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/grid.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace {
+
+const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0));
+
+// This value is initialized only if the node is running as a config server
+const auto getShardingCatalogManager =
+ ServiceContext::declareDecoration<boost::optional<ShardingCatalogManager>>();
+
+} // namespace
+
+void ShardingCatalogManager::create(ServiceContext* serviceContext,
+ std::unique_ptr<executor::TaskExecutor> addShardExecutor) {
+ auto& shardingCatalogManager = getShardingCatalogManager(serviceContext);
+ invariant(!shardingCatalogManager);
+
+ shardingCatalogManager.emplace(serviceContext, std::move(addShardExecutor));
+}
+
+void ShardingCatalogManager::clearForTests(ServiceContext* serviceContext) {
+ auto& shardingCatalogManager = getShardingCatalogManager(serviceContext);
+ invariant(shardingCatalogManager);
+
+ shardingCatalogManager.reset();
+}
+
+ShardingCatalogManager* ShardingCatalogManager::get(ServiceContext* serviceContext) {
+ auto& shardingCatalogManager = getShardingCatalogManager(serviceContext);
+ invariant(shardingCatalogManager);
+
+ return shardingCatalogManager.get_ptr();
+}
+
+ShardingCatalogManager* ShardingCatalogManager::get(OperationContext* operationContext) {
+ return get(operationContext->getServiceContext());
+}
+
+ShardingCatalogManager::ShardingCatalogManager(
+ ServiceContext* serviceContext, std::unique_ptr<executor::TaskExecutor> addShardExecutor)
+ : _serviceContext(serviceContext),
+ _executorForAddShard(std::move(addShardExecutor)),
+ _kZoneOpLock("zoneOpLock"),
+ _kChunkOpLock("chunkOpLock"),
+ _kShardMembershipLock("shardMembershipLock") {
+ startup();
+}
+
+ShardingCatalogManager::~ShardingCatalogManager() {
+ shutDown();
+}
+
+void ShardingCatalogManager::startup() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_started) {
+ return;
+ }
+ _started = true;
+ _executorForAddShard->startup();
+
+ Grid::get(_serviceContext)
+ ->setCustomConnectionPoolStatsFn(
+ [this](executor::ConnectionPoolStats* stats) { appendConnectionStats(stats); });
+}
+
+void ShardingCatalogManager::shutDown() {
+ {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _inShutdown = true;
+ }
+
+ Grid::get(_serviceContext)->setCustomConnectionPoolStatsFn(nullptr);
+
+ _executorForAddShard->shutdown();
+ _executorForAddShard->join();
+}
+
+Status ShardingCatalogManager::initializeConfigDatabaseIfNeeded(OperationContext* opCtx) {
+ {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ if (_configInitialized) {
+ return {ErrorCodes::AlreadyInitialized,
+ "Config database was previously loaded into memory"};
+ }
+ }
+
+ Status status = _initConfigIndexes(opCtx);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ // Make sure to write config.version last since we detect rollbacks of config.version and
+ // will re-run initializeConfigDatabaseIfNeeded if that happens, but we don't detect rollback
+ // of the index builds.
+ status = _initConfigVersion(opCtx);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _configInitialized = true;
+
+ return Status::OK();
+}
+
+void ShardingCatalogManager::discardCachedConfigDatabaseInitializationState() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _configInitialized = false;
+}
+
+Status ShardingCatalogManager::_initConfigVersion(OperationContext* opCtx) {
+ const auto catalogClient = Grid::get(opCtx)->catalogClient();
+
+ auto versionStatus =
+ catalogClient->getConfigVersion(opCtx, repl::ReadConcernLevel::kLocalReadConcern);
+ if (!versionStatus.isOK()) {
+ return versionStatus.getStatus();
+ }
+
+ const auto& versionInfo = versionStatus.getValue();
+ if (versionInfo.getMinCompatibleVersion() > CURRENT_CONFIG_VERSION) {
+ return {ErrorCodes::IncompatibleShardingConfigVersion,
+ str::stream() << "current version v" << CURRENT_CONFIG_VERSION
+ << " is older than the cluster min compatible v"
+ << versionInfo.getMinCompatibleVersion()};
+ }
+
+ if (versionInfo.getCurrentVersion() == UpgradeHistory_EmptyVersion) {
+ VersionType newVersion;
+ newVersion.setClusterId(OID::gen());
+ newVersion.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
+ newVersion.setCurrentVersion(CURRENT_CONFIG_VERSION);
+
+ BSONObj versionObj(newVersion.toBSON());
+ auto insertStatus = catalogClient->insertConfigDocument(
+ opCtx, VersionType::ConfigNS, versionObj, kNoWaitWriteConcern);
+
+ return insertStatus;
+ }
+
+ if (versionInfo.getCurrentVersion() == UpgradeHistory_UnreportedVersion) {
+ return {ErrorCodes::IncompatibleShardingConfigVersion,
+ "Assuming config data is old since the version document cannot be found in the "
+ "config server and it contains databases besides 'local' and 'admin'. "
+ "Please upgrade if this is the case. Otherwise, make sure that the config "
+ "server is clean."};
+ }
+
+ if (versionInfo.getCurrentVersion() < CURRENT_CONFIG_VERSION) {
+ return {ErrorCodes::IncompatibleShardingConfigVersion,
+ str::stream() << "need to upgrade current cluster version to v"
+ << CURRENT_CONFIG_VERSION
+ << "; currently at v"
+ << versionInfo.getCurrentVersion()};
+ }
+
+ return Status::OK();
+}
+
+Status ShardingCatalogManager::_initConfigIndexes(OperationContext* opCtx) {
+ const bool unique = true;
+ auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+
+ Status result = configShard->createIndexOnConfig(
+ opCtx, ChunkType::ConfigNS, BSON(ChunkType::ns() << 1 << ChunkType::min() << 1), unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create ns_1_min_1 index on config db");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << 1 << ChunkType::shard() << 1 << ChunkType::min() << 1),
+ unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create ns_1_shard_1_min_1 index on config db");
+ }
+
+ result =
+ configShard->createIndexOnConfig(opCtx,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << 1 << ChunkType::lastmod() << 1),
+ unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create ns_1_lastmod_1 index on config db");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx,
+ MigrationType::ConfigNS,
+ BSON(MigrationType::ns() << 1 << MigrationType::min() << 1),
+ unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create ns_1_min_1 index on config.migrations");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx, ShardType::ConfigNS, BSON(ShardType::host() << 1), unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create host_1 index on config db");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx, LocksType::ConfigNS, BSON(LocksType::lockID() << 1), !unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create lock id index on config db");
+ }
+
+ result =
+ configShard->createIndexOnConfig(opCtx,
+ LocksType::ConfigNS,
+ BSON(LocksType::state() << 1 << LocksType::process() << 1),
+ !unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create state and process id index on config db");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx, LockpingsType::ConfigNS, BSON(LockpingsType::ping() << 1), !unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create lockping ping time index on config db");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx, TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::min() << 1), unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create ns_1_min_1 index on config db");
+ }
+
+ result = configShard->createIndexOnConfig(
+ opCtx, TagsType::ConfigNS, BSON(TagsType::ns() << 1 << TagsType::tag() << 1), !unique);
+ if (!result.isOK()) {
+ return result.withContext("couldn't create ns_1_tag_1 index on config db");
+ }
+
+ return Status::OK();
+}
+
+Status ShardingCatalogManager::setFeatureCompatibilityVersionOnShards(OperationContext* opCtx,
+ const BSONObj& cmdObj) {
+
+ // No shards should be added until we have forwarded featureCompatibilityVersion to all shards.
+ Lock::SharedLock lk(opCtx->lockState(), _kShardMembershipLock);
+
+ // We do a direct read of the shards collection with local readConcern so no shards are missed,
+ // but don't go through the ShardRegistry to prevent it from caching data that may be rolled
+ // back.
+ const auto opTimeWithShards = uassertStatusOK(Grid::get(opCtx)->catalogClient()->getAllShards(
+ opCtx, repl::ReadConcernLevel::kLocalReadConcern));
+
+ for (const auto& shardType : opTimeWithShards.value) {
+ const auto shardStatus =
+ Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardType.getName());
+ if (!shardStatus.isOK()) {
+ continue;
+ }
+ const auto shard = shardStatus.getValue();
+
+ auto response = shard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ "admin",
+ cmdObj,
+ Shard::RetryPolicy::kIdempotent);
+ if (!response.isOK()) {
+ return response.getStatus();
+ }
+ if (!response.getValue().commandStatus.isOK()) {
+ return response.getValue().commandStatus;
+ }
+ if (!response.getValue().writeConcernStatus.isOK()) {
+ return response.getValue().writeConcernStatus;
+ }
+ }
+
+ return Status::OK();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager.h b/src/mongo/db/s/config/sharding_catalog_manager.h
new file mode 100644
index 00000000000..aa7bd212ae0
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager.h
@@ -0,0 +1,508 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#pragma once
+
+#include "mongo/base/disallow_copying.h"
+#include "mongo/base/status_with.h"
+#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/repl/optime_with.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/shard_key_pattern.h"
+#include "mongo/stdx/mutex.h"
+
+namespace mongo {
+
+class OperationContext;
+class RemoteCommandTargeter;
+class ServiceContext;
+class UUID;
+
+/**
+ * Used to indicate to the caller of the removeShard method whether draining of chunks for
+ * a particular shard has started, is ongoing, or has been completed.
+ */
+enum ShardDrainingStatus {
+ STARTED,
+ ONGOING,
+ COMPLETED,
+};
+
+/**
+ * Implements modifications to the sharding catalog metadata.
+ *
+ * TODO: Currently the code responsible for writing the sharding catalog metadata is split between
+ * this class and ShardingCatalogClient. Eventually all methods that write catalog data should be
+ * moved out of ShardingCatalogClient and into this class.
+ */
+class ShardingCatalogManager {
+ MONGO_DISALLOW_COPYING(ShardingCatalogManager);
+
+public:
+ ShardingCatalogManager(ServiceContext* serviceContext,
+ std::unique_ptr<executor::TaskExecutor> addShardExecutor);
+ ~ShardingCatalogManager();
+
+ /**
+ * Instantiates an instance of the sharding catalog manager and installs it on the specified
+ * service context. This method is not thread-safe and must be called only once when the service
+ * is starting.
+ */
+ static void create(ServiceContext* serviceContext,
+ std::unique_ptr<executor::TaskExecutor> addShardExecutor);
+
+ /**
+ * Retrieves the per-service instance of the ShardingCatalogManager. This instance is only
+ * available if the node is running as a config server.
+ */
+ static ShardingCatalogManager* get(ServiceContext* serviceContext);
+ static ShardingCatalogManager* get(OperationContext* operationContext);
+
+ /**
+ * Safe to call multiple times as long as the calls are externally synchronized to be
+ * non-overlapping.
+ */
+ void startup();
+
+ /**
+ * Performs necessary cleanup when shutting down cleanly.
+ */
+ void shutDown();
+
+ /**
+ * Checks if this is the first start of a newly instantiated config server and if so pre-creates
+ * the catalog collections and their indexes. Also generates and persists the cluster's
+ * identity.
+ */
+ Status initializeConfigDatabaseIfNeeded(OperationContext* opCtx);
+
+ /**
+ * Invoked on cluster identity metadata rollback after replication step down. Throws out any
+ * cached identity information and causes it to be reloaded/re-created on the next attempt.
+ */
+ void discardCachedConfigDatabaseInitializationState();
+
+ //
+ // Zone Operations
+ //
+
+ /**
+ * Adds the given shardName to the zone. Returns ErrorCodes::ShardNotFound if a shard by that
+ * name does not exist.
+ */
+ Status addShardToZone(OperationContext* opCtx,
+ const std::string& shardName,
+ const std::string& zoneName);
+
+ /**
+ * Removes the given shardName from the zone. Returns ErrorCodes::ShardNotFound if a shard by
+ * that name does not exist.
+ */
+ Status removeShardFromZone(OperationContext* opCtx,
+ const std::string& shardName,
+ const std::string& zoneName);
+
+ /**
+ * Assigns a range of a sharded collection to a particular shard zone. If range is a prefix of
+ * the shard key, the range will be converted into a new range with full shard key filled with
+ * MinKey values.
+ */
+ Status assignKeyRangeToZone(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ChunkRange& range,
+ const std::string& zoneName);
+
+ /**
+ * Removes a range from a zone.
+ *
+ * NOTE: unlike assignKeyRangeToZone, the given range will never be converted to include the
+ * full shard key.
+ */
+ Status removeKeyRangeFromZone(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ChunkRange& range);
+
+ //
+ // Chunk Operations
+ //
+
+ /**
+ * Updates metadata in the config.chunks collection to show the given chunk as split into
+ * smaller chunks at the specified split points.
+ */
+ Status commitChunkSplit(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& requestEpoch,
+ const ChunkRange& range,
+ const std::vector<BSONObj>& splitPoints,
+ const std::string& shardName);
+
+ /**
+ * Updates metadata in the config.chunks collection so the chunks with given boundaries are seen
+ * merged into a single larger chunk.
+ */
+ Status commitChunkMerge(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& requestEpoch,
+ const std::vector<BSONObj>& chunkBoundaries,
+ const std::string& shardName);
+
+ /**
+ * Updates metadata in config.chunks collection to show the given chunk in its new shard.
+ */
+ StatusWith<BSONObj> commitChunkMigration(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ChunkType& migratedChunk,
+ const boost::optional<ChunkType>& controlChunk,
+ const OID& collectionEpoch,
+ const ShardId& fromShard,
+ const ShardId& toShard);
+
+ //
+ // Database Operations
+ //
+
+ /**
+ * Checks if a database with the same name already exists, and if not, selects a primary shard
+ * for the database and creates a new entry for it in config.databases.
+ *
+ * Returns the database entry.
+ *
+ * Throws DatabaseDifferCase if the database already exists with a different case.
+ */
+ DatabaseType createDatabase(OperationContext* opCtx, const std::string& dbName);
+
+ /**
+ * Creates the database if it does not exist, then marks its entry in config.databases as
+ * sharding-enabled.
+ *
+ * Throws DatabaseDifferCase if the database already exists with a different case.
+ */
+ void enableSharding(OperationContext* opCtx, const std::string& dbName);
+
+ /**
+ * Retrieves all databases for a shard.
+ *
+ * Returns a !OK status if an error occurs.
+ */
+ StatusWith<std::vector<std::string>> getDatabasesForShard(OperationContext* opCtx,
+ const ShardId& shardId);
+
+ //
+ // Collection Operations
+ //
+
+ /**
+ * Drops the specified collection from the collection metadata store.
+ *
+ * Returns Status::OK if successful or any error code indicating the failure. These are
+ * some of the known failures:
+ * - NamespaceNotFound - collection does not exist
+ */
+ Status dropCollection(OperationContext* opCtx, const NamespaceString& nss);
+
+
+ /**
+ * Shards a collection. Assumes that the database is enabled for sharding.
+ *
+ * @param ns: namespace of collection to shard
+ * @param uuid: the collection's UUID. Optional because new in 3.6.
+ * @param fieldsAndOrder: shardKey pattern
+ * @param defaultCollation: the default collation for the collection, to be written to
+ * config.collections. If empty, the collection default collation is simple binary
+ * comparison. Note the the shard key collation will always be simple binary comparison,
+ * even if the collection default collation is non-simple.
+ * @param unique: if true, ensure underlying index enforces a unique constraint.
+ * @param initPoints: create chunks based on a set of specified split points.
+ * @param initShardIds: If non-empty, specifies the set of shards to assign chunks between.
+ * Otherwise all chunks will be assigned to the primary shard for the database.
+ */
+ void shardCollection(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const boost::optional<UUID> uuid,
+ const ShardKeyPattern& fieldsAndOrder,
+ const BSONObj& defaultCollation,
+ bool unique,
+ const std::vector<BSONObj>& initPoints,
+ const bool distributeInitialChunks,
+ const ShardId& dbPrimaryShardId);
+
+
+ /**
+ * Iterates through each entry in config.collections that does not have a UUID, generates a UUID
+ * for the collection, and updates the entry with the generated UUID.
+ *
+ * Remove after 3.4 -> 3.6 upgrade.
+ */
+ void generateUUIDsForExistingShardedCollections(OperationContext* opCtx);
+
+
+ /**
+ * Returns the set of collections for the specified database, which have been marked as sharded.
+ * Goes directly to the config server's metadata, without checking the local cache so it should
+ * not be used in frequently called code paths.
+ *
+ * Throws exception on errors.
+ *
+ * TODO SERVER-32366: Make this an anonymous helper function in
+ * sharding_catalog_manager_database_operations.cpp since it will no longer need to be
+ * called outside of the ShardingCatalogManager.
+ */
+ std::vector<NamespaceString> getAllShardedCollectionsForDb(OperationContext* opCtx,
+ StringData dbName);
+
+ //
+ // Shard Operations
+ //
+
+ /**
+ *
+ * Adds a new shard. It expects a standalone mongod process or replica set to be running on the
+ * provided address.
+ *
+ * 'shardProposedName' is an optional string with the proposed name of the shard. If it is
+ * nullptr, a name will be automatically generated; if not nullptr, it cannot
+ * contain the empty string.
+ * 'shardConnectionString' is the complete connection string of the shard being added.
+ * 'maxSize' is the optional space quota in bytes. Zero means there's no limitation to space
+ * usage.
+ *
+ * On success returns the name of the newly added shard.
+ */
+ StatusWith<std::string> addShard(OperationContext* opCtx,
+ const std::string* shardProposedName,
+ const ConnectionString& shardConnectionString,
+ const long long maxSize);
+
+ /**
+ * Tries to remove a shard. To completely remove a shard from a sharded cluster,
+ * the data residing in that shard must be moved to the remaining shards in the
+ * cluster by "draining" chunks from that shard.
+ *
+ * Because of the asynchronous nature of the draining mechanism, this method returns
+ * the current draining status. See ShardDrainingStatus enum definition for more details.
+ */
+ StatusWith<ShardDrainingStatus> removeShard(OperationContext* opCtx, const ShardId& shardId);
+
+ //
+ // Cluster Upgrade Operations
+ //
+
+ /**
+ * Returns a BSON representation of an update request that can be used to insert a shardIdentity
+ * doc into the shard for the given shardType (or update the shard's existing shardIdentity
+ * doc's configsvrConnString if the _id, shardName, and clusterId do not conflict).
+ */
+ BSONObj createShardIdentityUpsertForAddShard(OperationContext* opCtx,
+ const std::string& shardName);
+
+ /**
+ * Runs the setFeatureCompatibilityVersion command on all shards.
+ */
+ Status setFeatureCompatibilityVersionOnShards(OperationContext* opCtx, const BSONObj& cmdObj);
+
+ //
+ // For Diagnostics
+ //
+
+ /**
+ * Append information about the connection pools owned by the CatalogManager.
+ */
+ void appendConnectionStats(executor::ConnectionPoolStats* stats);
+
+ /**
+ * Only used for unit-tests, clears a previously-created catalog manager from the specified
+ * service context, so that 'create' can be called again.
+ */
+ static void clearForTests(ServiceContext* serviceContext);
+
+private:
+ /**
+ * Performs the necessary checks for version compatibility and creates a new config.version
+ * document if the current cluster config is empty.
+ */
+ Status _initConfigVersion(OperationContext* opCtx);
+
+ /**
+ * Builds all the expected indexes on the config server.
+ */
+ Status _initConfigIndexes(OperationContext* opCtx);
+
+ /**
+ * Used during addShard to determine if there is already an existing shard that matches the
+ * shard that is currently being added. An OK return with boost::none indicates that there
+ * is no conflicting shard, and we can proceed trying to add the new shard. An OK return
+ * with a ShardType indicates that there is an existing shard that matches the shard being added
+ * but since the options match, this addShard request can do nothing and return success. A
+ * non-OK return either indicates a problem reading the existing shards from disk or more likely
+ * indicates that an existing shard conflicts with the shard being added and they have different
+ * options, so the addShard attempt must be aborted.
+ */
+ StatusWith<boost::optional<ShardType>> _checkIfShardExists(
+ OperationContext* opCtx,
+ const ConnectionString& propsedShardConnectionString,
+ const std::string* shardProposedName,
+ long long maxSize);
+
+ /**
+ * Validates that the specified endpoint can serve as a shard server. In particular, this
+ * this function checks that the shard can be contacted and that it is not already member of
+ * another sharded cluster.
+ *
+ * @param targeter For sending requests to the shard-to-be.
+ * @param shardProposedName Optional proposed name for the shard. Can be omitted in which case
+ * a unique name for the shard will be generated from the shard's connection string. If it
+ * is not omitted, the value cannot be the empty string.
+ *
+ * On success returns a partially initialized ShardType object corresponding to the requested
+ * shard. It will have the hostName field set and optionally the name, if the name could be
+ * generated from either the proposed name or the connection string set name. The returned
+ * shard's name should be checked and if empty, one should be generated using some uniform
+ * algorithm.
+ */
+ StatusWith<ShardType> _validateHostAsShard(OperationContext* opCtx,
+ std::shared_ptr<RemoteCommandTargeter> targeter,
+ const std::string* shardProposedName,
+ const ConnectionString& connectionString);
+
+ /**
+ * Drops the sessions collection on the specified host.
+ */
+ Status _dropSessionsCollection(OperationContext* opCtx,
+ std::shared_ptr<RemoteCommandTargeter> targeter);
+
+ /**
+ * Runs the listDatabases command on the specified host and returns the names of all databases
+ * it returns excluding those named local, config and admin, since they serve administrative
+ * purposes.
+ */
+ StatusWith<std::vector<std::string>> _getDBNamesListFromShard(
+ OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter);
+
+ /**
+ * Runs a command against a "shard" that is not yet in the cluster and thus not present in the
+ * ShardRegistry.
+ */
+ StatusWith<Shard::CommandResponse> _runCommandForAddShard(OperationContext* opCtx,
+ RemoteCommandTargeter* targeter,
+ const std::string& dbName,
+ const BSONObj& cmdObj);
+
+ /**
+ * Selects an optimal shard on which to place a newly created database from the set of
+ * available shards. Will return ShardNotFound if shard could not be found.
+ */
+ static StatusWith<ShardId> _selectShardForNewDatabase(OperationContext* opCtx,
+ ShardRegistry* shardRegistry);
+
+ /**
+ * Helper method for running a count command against the config server with appropriate error
+ * handling.
+ */
+ StatusWith<long long> _runCountCommandOnConfig(OperationContext* opCtx,
+ const NamespaceString& nss,
+ BSONObj query);
+
+ /**
+ * Appends a read committed read concern to the request object.
+ */
+ void _appendReadConcern(BSONObjBuilder* builder);
+
+ /**
+ * Creates the first chunks of a new sharded collection.
+ */
+ ChunkVersion _createFirstChunks(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ShardKeyPattern& shardKeyPattern,
+ const ShardId& primaryShardId,
+ const std::vector<BSONObj>& initPoints,
+ const bool distributeInitialChunks);
+
+ // The owning service context
+ ServiceContext* const _serviceContext;
+
+ // Executor specifically used for sending commands to servers that are in the process of being
+ // added as shards. Does not have any connection hook set on it, thus it can be used to talk to
+ // servers that are not yet in the ShardRegistry.
+ const std::unique_ptr<executor::TaskExecutor> _executorForAddShard;
+
+ //
+ // All member variables are labeled with one of the following codes indicating the
+ // synchronization rules for accessing them.
+ //
+ // (M) Must hold _mutex for access.
+ // (R) Read only, can only be written during initialization.
+ // (S) Self-synchronizing; access in any way from any context.
+ //
+
+ stdx::mutex _mutex;
+
+ // True if shutDown() has been called. False, otherwise.
+ bool _inShutdown{false}; // (M)
+
+ // True if startup() has been called.
+ bool _started{false}; // (M)
+
+ // True if initializeConfigDatabaseIfNeeded() has been called and returned successfully.
+ bool _configInitialized{false}; // (M)
+
+ /**
+ * Lock for shard zoning operations. This should be acquired when doing any operations that
+ * can affect the config.tags collection or the tags field of the config.shards collection.
+ * No other locks should be held when locking this. If an operation needs to take database
+ * locks (for example to write to a local collection) those locks should be taken after
+ * taking this.
+ */
+ Lock::ResourceMutex _kZoneOpLock;
+
+ /**
+ * Lock for chunk split/merge/move operations. This should be acquired when doing split/merge/
+ * move operations that can affect the config.chunks collection.
+ * No other locks should be held when locking this. If an operation needs to take database
+ * locks (for example to write to a local collection) those locks should be taken after
+ * taking this.
+ */
+ Lock::ResourceMutex _kChunkOpLock;
+
+ /**
+ * Lock that guards changes to the set of shards in the cluster (ie addShard and removeShard
+ * requests).
+ * TODO: Currently only taken during addShard requests, this should also be taken in X mode
+ * during removeShard, once removeShard is moved to run on the config server primary instead of
+ * on mongos. At that point we should also change any operations that expect the shard not to
+ * be removed while they are running (such as removeShardFromZone) to take this in shared mode.
+ */
+ Lock::ResourceMutex _kShardMembershipLock;
+};
+
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
new file mode 100644
index 00000000000..b0507b34451
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
@@ -0,0 +1,1311 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include <vector>
+
+#include "mongo/client/connection_string.h"
+#include "mongo/client/remote_command_targeter_factory_mock.h"
+#include "mongo/client/remote_command_targeter_mock.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/ops/write_ops.h"
+#include "mongo/db/repl/replication_coordinator_mock.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/db/s/type_shard_identity.h"
+#include "mongo/s/catalog/config_server_version.h"
+#include "mongo/s/catalog/type_changelog.h"
+#include "mongo/s/catalog/type_config_version.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/cluster_identity_loader.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/s/write_ops/batched_command_response.h"
+#include "mongo/util/fail_point_service.h"
+#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
+
+namespace mongo {
+namespace {
+
+using executor::RemoteCommandRequest;
+using executor::RemoteCommandResponse;
+using std::vector;
+using unittest::assertGet;
+
+// TODO (SERVER-27029): This value was chosen to be greater than the time it takes for the hang
+// analyzer to kick in. Remove once the cause for the test failure has been figured out.
+const Hours kLongFutureTimeout(8);
+
+class AddShardTest : public ConfigServerTestFixture {
+protected:
+ /**
+ * Performs the test setup steps from the parent class and then configures the config shard and
+ * the client name.
+ */
+ void setUp() override {
+ ConfigServerTestFixture::setUp();
+
+ // Make sure clusterID is written to the config.version collection.
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto clusterIdLoader = ClusterIdentityLoader::get(operationContext());
+ ASSERT_OK(clusterIdLoader->loadClusterId(operationContext(),
+ repl::ReadConcernLevel::kLocalReadConcern));
+ _clusterId = clusterIdLoader->getClusterId();
+ }
+
+ /**
+ * addShard validates the host as a shard. It calls "isMaster" on the host to determine what
+ * kind of host it is -- mongos, regular mongod, config mongod -- and whether the replica set
+ * details are correct. "isMasterResponse" defines the response of the "isMaster" request and
+ * should be a command response BSONObj, or a failed Status.
+ *
+ * ShardingTestFixture::expectGetShards() should be called before this function, otherwise
+ * addShard will never reach the isMaster command -- a find query is called first.
+ */
+ void expectIsMaster(const HostAndPort& target, StatusWith<BSONObj> isMasterResponse) {
+ onCommandForAddShard([&, target, isMasterResponse](const RemoteCommandRequest& request) {
+ ASSERT_EQ(request.target, target);
+ ASSERT_EQ(request.dbname, "admin");
+ ASSERT_BSONOBJ_EQ(request.cmdObj, BSON("isMaster" << 1));
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), request.metadata);
+
+ return isMasterResponse;
+ });
+ }
+
+ void expectListDatabases(const HostAndPort& target, const std::vector<BSONObj>& dbs) {
+ onCommandForAddShard([&](const RemoteCommandRequest& request) {
+ ASSERT_EQ(request.target, target);
+ ASSERT_EQ(request.dbname, "admin");
+ ASSERT_BSONOBJ_EQ(request.cmdObj, BSON("listDatabases" << 1 << "nameOnly" << true));
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), request.metadata);
+
+ BSONArrayBuilder arr;
+ for (const auto& db : dbs) {
+ arr.append(db);
+ }
+
+ return BSON("ok" << 1 << "databases" << arr.obj());
+ });
+ }
+
+ void expectCollectionDrop(const HostAndPort& target, const NamespaceString& nss) {
+ onCommandForAddShard([&](const RemoteCommandRequest& request) {
+ ASSERT_EQ(request.target, target);
+ ASSERT_EQ(request.dbname, nss.db());
+ ASSERT_BSONOBJ_EQ(request.cmdObj,
+ BSON("drop" << nss.coll() << "writeConcern" << BSON("w"
+ << "majority")));
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(), request.metadata);
+
+ return BSON("ok" << 1);
+ });
+ }
+
+ void expectSetFeatureCompatibilityVersion(const HostAndPort& target,
+ StatusWith<BSONObj> response) {
+ onCommandForAddShard([&, target, response](const RemoteCommandRequest& request) {
+ ASSERT_EQ(request.target, target);
+ ASSERT_EQ(request.dbname, "admin");
+ ASSERT_BSONOBJ_EQ(request.cmdObj,
+ BSON("setFeatureCompatibilityVersion"
+ << "4.0"));
+
+ return response;
+ });
+ }
+
+ /**
+ * Waits for a request for the shardIdentity document to be upserted into a shard from the
+ * config server on addShard.
+ */
+ void expectShardIdentityUpsertReturnSuccess(const HostAndPort& expectedHost,
+ const std::string& expectedShardName) {
+ // Create the expected upsert shardIdentity command for this shardType.
+ auto upsertCmdObj =
+ ShardingCatalogManager::get(operationContext())
+ ->createShardIdentityUpsertForAddShard(operationContext(), expectedShardName);
+
+ const auto opMsgRequest =
+ OpMsgRequest::fromDBAndBody(NamespaceString::kAdminDb, upsertCmdObj);
+ expectUpdatesReturnSuccess(expectedHost,
+ NamespaceString(NamespaceString::kServerConfigurationNamespace),
+ UpdateOp::parse(opMsgRequest));
+ }
+
+ void expectShardIdentityUpsertReturnFailure(const HostAndPort& expectedHost,
+ const std::string& expectedShardName,
+ const Status& statusToReturn) {
+ // Create the expected upsert shardIdentity command for this shardType.
+ auto upsertCmdObj =
+ ShardingCatalogManager::get(operationContext())
+ ->createShardIdentityUpsertForAddShard(operationContext(), expectedShardName);
+
+ const auto opMsgRequest =
+ OpMsgRequest::fromDBAndBody(NamespaceString::kAdminDb, upsertCmdObj);
+ expectUpdatesReturnFailure(expectedHost,
+ NamespaceString(NamespaceString::kServerConfigurationNamespace),
+ UpdateOp::parse(opMsgRequest),
+ statusToReturn);
+ }
+
+ /**
+ * Waits for a set of batched updates and ensures that the host, namespace, and updates exactly
+ * match what's expected. Responds with a success status.
+ */
+ void expectUpdatesReturnSuccess(const HostAndPort& expectedHost,
+ const NamespaceString& expectedNss,
+ const write_ops::Update& expectedUpdateOp) {
+ onCommandForAddShard([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(expectedHost, request.target);
+
+ // Check that the db name in the request matches the expected db name.
+ ASSERT_EQUALS(expectedNss.db(), request.dbname);
+
+ const auto opMsgRequest = OpMsgRequest::fromDBAndBody(request.dbname, request.cmdObj);
+ const auto updateOp = UpdateOp::parse(opMsgRequest);
+ ASSERT_EQUALS(expectedNss, expectedUpdateOp.getNamespace());
+
+ const auto& expectedUpdates = expectedUpdateOp.getUpdates();
+ const auto& actualUpdates = updateOp.getUpdates();
+
+ ASSERT_EQUALS(expectedUpdates.size(), actualUpdates.size());
+
+ auto itExpected = expectedUpdates.begin();
+ auto itActual = actualUpdates.begin();
+
+ for (; itActual != actualUpdates.end(); itActual++, itExpected++) {
+ ASSERT_EQ(itExpected->getUpsert(), itActual->getUpsert());
+ ASSERT_EQ(itExpected->getMulti(), itActual->getMulti());
+ ASSERT_BSONOBJ_EQ(itExpected->getQ(), itActual->getQ());
+ ASSERT_BSONOBJ_EQ(itExpected->getU(), itActual->getU());
+ }
+
+ BatchedCommandResponse response;
+ response.setStatus(Status::OK());
+ response.setNModified(1);
+
+ return response.toBSON();
+ });
+ }
+
+ /**
+ * Waits for a set of batched updates and ensures that the host, namespace, and updates exactly
+ * match what's expected. Responds with a failure status.
+ */
+ void expectUpdatesReturnFailure(const HostAndPort& expectedHost,
+ const NamespaceString& expectedNss,
+ const write_ops::Update& expectedUpdateOp,
+ const Status& statusToReturn) {
+ onCommandForAddShard([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(expectedHost, request.target);
+
+ // Check that the db name in the request matches the expected db name.
+ ASSERT_EQUALS(expectedNss.db(), request.dbname);
+
+ const auto opMsgRequest = OpMsgRequest::fromDBAndBody(request.dbname, request.cmdObj);
+ const auto updateOp = UpdateOp::parse(opMsgRequest);
+ ASSERT_EQUALS(expectedNss, expectedUpdateOp.getNamespace());
+
+ const auto& expectedUpdates = expectedUpdateOp.getUpdates();
+ const auto& actualUpdates = updateOp.getUpdates();
+
+ ASSERT_EQUALS(expectedUpdates.size(), actualUpdates.size());
+
+ auto itExpected = expectedUpdates.begin();
+ auto itActual = actualUpdates.begin();
+
+ for (; itActual != actualUpdates.end(); itActual++, itExpected++) {
+ ASSERT_EQ(itExpected->getUpsert(), itActual->getUpsert());
+ ASSERT_EQ(itExpected->getMulti(), itActual->getMulti());
+ ASSERT_BSONOBJ_EQ(itExpected->getQ(), itActual->getQ());
+ ASSERT_BSONOBJ_EQ(itExpected->getU(), itActual->getU());
+ }
+
+ return statusToReturn;
+ });
+ }
+
+
+ /**
+ * Asserts that a document exists in the config server's config.shards collection corresponding
+ * to 'expectedShard'.
+ */
+ void assertShardExists(const ShardType& expectedShard) {
+ auto foundShard = assertGet(getShardDoc(operationContext(), expectedShard.getName()));
+
+ ASSERT_EQUALS(expectedShard.getName(), foundShard.getName());
+ ASSERT_EQUALS(expectedShard.getHost(), foundShard.getHost());
+ ASSERT_EQUALS(expectedShard.getMaxSizeMB(), foundShard.getMaxSizeMB());
+ ASSERT_EQUALS(expectedShard.getDraining(), foundShard.getDraining());
+ ASSERT_EQUALS((int)expectedShard.getState(), (int)foundShard.getState());
+ ASSERT_TRUE(foundShard.getTags().empty());
+ }
+
+ /**
+ * Asserts that a document exists in the config server's config.databases collection
+ * corresponding to 'expectedDB'.
+ */
+ void assertDatabaseExists(const DatabaseType& expectedDB) {
+ auto foundDB =
+ assertGet(catalogClient()->getDatabase(operationContext(),
+ expectedDB.getName(),
+ repl::ReadConcernLevel::kMajorityReadConcern))
+ .value;
+
+ ASSERT_EQUALS(expectedDB.getName(), foundDB.getName());
+ ASSERT_EQUALS(expectedDB.getPrimary(), foundDB.getPrimary());
+ ASSERT_EQUALS(expectedDB.getSharded(), foundDB.getSharded());
+ }
+
+ /**
+ * Asserts that a document exists in the config server's config.changelog collection
+ * describing the addShard request for 'addedShard'.
+ */
+ void assertChangeWasLogged(const ShardType& addedShard) {
+ auto response = assertGet(
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ ReadPreferenceSetting{
+ ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ NamespaceString("config.changelog"),
+ BSON("what"
+ << "addShard"
+ << "details.name"
+ << addedShard.getName()),
+ BSONObj(),
+ 1));
+ ASSERT_EQ(1U, response.docs.size());
+ auto logEntryBSON = response.docs.front();
+ auto logEntry = assertGet(ChangeLogType::fromBSON(logEntryBSON));
+
+ ASSERT_EQUALS(addedShard.getName(), logEntry.getDetails()["name"].String());
+ ASSERT_EQUALS(addedShard.getHost(), logEntry.getDetails()["host"].String());
+ }
+
+ void forwardAddShardNetwork(Date_t when) {
+ networkForAddShard()->enterNetwork();
+ networkForAddShard()->runUntil(when);
+ networkForAddShard()->exitNetwork();
+ }
+
+ OID _clusterId;
+};
+
+TEST_F(AddShardTest, CreateShardIdentityUpsertForAddShard) {
+ std::string shardName = "shardName";
+
+ BSONObj expectedBSON = BSON("update"
+ << "system.version"
+ << "bypassDocumentValidation"
+ << false
+ << "ordered"
+ << true
+ << "updates"
+ << BSON_ARRAY(
+ BSON("q" << BSON("_id"
+ << "shardIdentity"
+ << "shardName"
+ << shardName
+ << "clusterId"
+ << _clusterId)
+ << "u"
+ << BSON("$set" << BSON("configsvrConnectionString"
+ << replicationCoordinator()
+ ->getConfig()
+ .getConnectionString()
+ .toString()))
+ << "multi"
+ << false
+ << "upsert"
+ << true))
+ << "writeConcern"
+ << BSON("w"
+ << "majority"
+ << "wtimeout"
+ << 15000));
+ ASSERT_BSONOBJ_EQ(expectedBSON,
+ ShardingCatalogManager::get(operationContext())
+ ->createShardIdentityUpsertForAddShard(operationContext(), shardName));
+}
+
+TEST_F(AddShardTest, StandaloneBasicSuccess) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ HostAndPort shardTarget("StandaloneHost:12345");
+ targeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter));
+
+
+ std::string expectedShardName = "StandaloneShard";
+
+ // The shard doc inserted into the config.shards collection on the config server.
+ ShardType expectedShard;
+ expectedShard.setName(expectedShardName);
+ expectedShard.setHost("StandaloneHost:12345");
+ expectedShard.setMaxSizeMB(100);
+ expectedShard.setState(ShardType::ShardState::kShardAware);
+
+ DatabaseType discoveredDB1("TestDB1", ShardId("StandaloneShard"), false);
+ DatabaseType discoveredDB2("TestDB2", ShardId("StandaloneShard"), false);
+
+ auto future = launchAsync([this, expectedShardName] {
+ Client::initThreadIfNotAlready();
+ auto shardName =
+ assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ &expectedShardName,
+ assertGet(ConnectionString::parse("StandaloneHost:12345")),
+ 100));
+ ASSERT_EQUALS(expectedShardName, shardName);
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ // Get databases list from new shard
+ expectListDatabases(
+ shardTarget,
+ std::vector<BSONObj>{BSON("name"
+ << "local"
+ << "sizeOnDisk"
+ << 1000),
+ BSON("name" << discoveredDB1.getName() << "sizeOnDisk" << 2000),
+ BSON("name" << discoveredDB2.getName() << "sizeOnDisk" << 5000)});
+
+ expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions"));
+
+ // The shardIdentity doc inserted into the admin.system.version collection on the shard.
+ expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName);
+
+ // The shard receives the setFeatureCompatibilityVersion command.
+ expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1));
+
+ // Wait for the addShard to complete before checking the config database
+ future.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was properly added to config.shards.
+ assertShardExists(expectedShard);
+
+ // Ensure that the databases detected from the shard were properly added to config.database.
+ assertDatabaseExists(discoveredDB1);
+ assertDatabaseExists(discoveredDB2);
+
+ assertChangeWasLogged(expectedShard);
+}
+
+TEST_F(AddShardTest, StandaloneGenerateName) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ HostAndPort shardTarget("StandaloneHost:12345");
+ targeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter));
+
+ ShardType existingShard;
+ existingShard.setName("shard0005");
+ existingShard.setHost("existingHost:12345");
+ existingShard.setMaxSizeMB(100);
+ existingShard.setState(ShardType::ShardState::kShardAware);
+
+ // Add a pre-existing shard so when generating a name for the new shard it will have to go
+ // higher than the existing one.
+ ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(),
+ ShardType::ConfigNS,
+ existingShard.toBSON(),
+ ShardingCatalogClient::kMajorityWriteConcern));
+ assertShardExists(existingShard);
+
+ std::string expectedShardName = "shard0006";
+
+ // The shard doc inserted into the config.shards collection on the config server.
+ ShardType expectedShard;
+ expectedShard.setName(expectedShardName);
+ expectedShard.setHost(shardTarget.toString());
+ expectedShard.setMaxSizeMB(100);
+ expectedShard.setState(ShardType::ShardState::kShardAware);
+
+ DatabaseType discoveredDB1("TestDB1", ShardId(expectedShardName), false);
+ DatabaseType discoveredDB2("TestDB2", ShardId(expectedShardName), false);
+
+ auto future = launchAsync([this, &expectedShardName, &shardTarget] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), nullptr, ConnectionString(shardTarget), 100));
+ ASSERT_EQUALS(expectedShardName, shardName);
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ // Get databases list from new shard
+ expectListDatabases(
+ shardTarget,
+ std::vector<BSONObj>{BSON("name"
+ << "local"
+ << "sizeOnDisk"
+ << 1000),
+ BSON("name" << discoveredDB1.getName() << "sizeOnDisk" << 2000),
+ BSON("name" << discoveredDB2.getName() << "sizeOnDisk" << 5000)});
+
+ expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions"));
+
+ // The shardIdentity doc inserted into the admin.system.version collection on the shard.
+ expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName);
+
+ // The shard receives the setFeatureCompatibilityVersion command.
+ expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1));
+
+ // Wait for the addShard to complete before checking the config database
+ future.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was properly added to config.shards.
+ assertShardExists(expectedShard);
+
+ // Ensure that the databases detected from the shard were properly added to config.database.
+ assertDatabaseExists(discoveredDB1);
+ assertDatabaseExists(discoveredDB2);
+
+ assertChangeWasLogged(expectedShard);
+}
+
+TEST_F(AddShardTest, AddSCCCConnectionStringAsShard) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ auto invalidConn =
+ ConnectionString("host1:12345,host2:12345,host3:12345", ConnectionString::INVALID);
+ targeter->setConnectionStringReturnValue(invalidConn);
+
+ auto future = launchAsync([this, invalidConn] {
+ const std::string shardName("StandaloneShard");
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &shardName, invalidConn, 100);
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(), "Invalid connection string");
+ });
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+TEST_F(AddShardTest, EmptyShardName) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ std::string expectedShardName = "";
+
+ auto future = launchAsync([this, expectedShardName] {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ &expectedShardName,
+ assertGet(ConnectionString::parse("StandaloneHost:12345")),
+ 100);
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+ ASSERT_EQUALS("shard name cannot be empty", status.getStatus().reason());
+ });
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// Host is unreachable, cannot verify host.
+TEST_F(AddShardTest, UnreachableHost) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ HostAndPort shardTarget("StandaloneHost:12345");
+ targeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter));
+ std::string expectedShardName = "StandaloneShard";
+
+ auto future = launchAsync([this, &expectedShardName, &shardTarget] {
+ Client::initThreadIfNotAlready();
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(
+ operationContext(), &expectedShardName, ConnectionString(shardTarget), 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(), "host unreachable");
+ });
+
+ Status hostUnreachableStatus = Status(ErrorCodes::HostUnreachable, "host unreachable");
+ expectIsMaster(shardTarget, hostUnreachableStatus);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// Cannot add mongos as a shard.
+TEST_F(AddShardTest, AddMongosAsShard) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ HostAndPort shardTarget("StandaloneHost:12345");
+ targeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter));
+ std::string expectedShardName = "StandaloneShard";
+
+ auto future = launchAsync([this, &expectedShardName, &shardTarget] {
+ Client::initThreadIfNotAlready();
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(
+ operationContext(), &expectedShardName, ConnectionString(shardTarget), 100);
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation, status);
+ });
+
+ expectIsMaster(shardTarget,
+ BSON("msg"
+ << "isdbgrid"));
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// A replica set name was found for the host but no name was provided with the host.
+TEST_F(AddShardTest, AddReplicaSetShardAsStandalone) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ HostAndPort shardTarget = HostAndPort("host1:12345");
+ targeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter));
+ std::string expectedShardName = "Standalone";
+
+ auto future = launchAsync([this, expectedShardName, shardTarget] {
+ Client::initThreadIfNotAlready();
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(
+ operationContext(), &expectedShardName, ConnectionString(shardTarget), 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(), "use replica set url format");
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "myOtherSet"
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// A replica set name was provided with the host but no name was found for the host.
+TEST_F(AddShardTest, AddStandaloneHostShardAsReplicaSet) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setConnectionStringReturnValue(connString);
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+ std::string expectedShardName = "StandaloneShard";
+
+ auto future = launchAsync([this, expectedShardName, connString] {
+ Client::initThreadIfNotAlready();
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &expectedShardName, connString, 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(), "host did not return a set name");
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// Provided replica set name does not match found replica set name.
+TEST_F(AddShardTest, ReplicaSetMistmatchedReplicaSetName) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ targeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+ std::string expectedShardName = "StandaloneShard";
+
+ auto future = launchAsync([this, expectedShardName, connString] {
+ Client::initThreadIfNotAlready();
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &expectedShardName, connString, 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(), "does not match the actual set name");
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "myOtherSet"
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// Cannot add config server as a shard.
+TEST_F(AddShardTest, ShardIsCSRSConfigServer) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("config/host1:12345,host2:12345"));
+ targeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+ std::string expectedShardName = "StandaloneShard";
+
+ auto future = launchAsync([this, expectedShardName, connString] {
+ Client::initThreadIfNotAlready();
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &expectedShardName, connString, 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(),
+ "as a shard since it is a config server");
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "config"
+ << "configsvr"
+ << true
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// One of the hosts is not part of the found replica set.
+TEST_F(AddShardTest, ReplicaSetMissingHostsProvidedInSeedList) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ targeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+ std::string expectedShardName = "StandaloneShard";
+
+ auto future = launchAsync([this, expectedShardName, connString] {
+ Client::initThreadIfNotAlready();
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &expectedShardName, connString, 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(status.getStatus().reason(),
+ "host2:12345 does not belong to replica set");
+ });
+
+ BSONArrayBuilder hosts;
+ hosts.append("host1:12345");
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "mySet"
+ << "hosts"
+ << hosts.arr()
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+// Cannot add a shard with the shard name "config".
+TEST_F(AddShardTest, AddShardWithNameConfigFails) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ targeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+ std::string expectedShardName = "config";
+
+ auto future = launchAsync([this, expectedShardName, connString] {
+ Client::initThreadIfNotAlready();
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &expectedShardName, connString, 100);
+ ASSERT_EQUALS(ErrorCodes::BadValue, status);
+ ASSERT_EQUALS(status.getStatus().reason(),
+ "use of shard replica set with name 'config' is not allowed");
+ });
+
+ BSONArrayBuilder hosts;
+ hosts.append("host1:12345");
+ hosts.append("host2:12345");
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "mySet"
+ << "hosts"
+ << hosts.arr()
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+TEST_F(AddShardTest, ShardContainsExistingDatabase) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ targeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+ std::string expectedShardName = "mySet";
+
+ DatabaseType existingDB("existing", ShardId("existingShard"), false);
+
+ // Add a pre-existing database.
+ ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(),
+ DatabaseType::ConfigNS,
+ existingDB.toBSON(),
+ ShardingCatalogClient::kMajorityWriteConcern));
+ assertDatabaseExists(existingDB);
+
+
+ auto future = launchAsync([this, expectedShardName, connString] {
+ Client::initThreadIfNotAlready();
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), &expectedShardName, connString, 100);
+ ASSERT_EQUALS(ErrorCodes::OperationFailed, status);
+ ASSERT_STRING_CONTAINS(
+ status.getStatus().reason(),
+ "because a local database 'existing' exists in another existingShard");
+ });
+
+ BSONArrayBuilder hosts;
+ hosts.append("host1:12345");
+ hosts.append("host2:12345");
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "mySet"
+ << "hosts"
+ << hosts.arr()
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ expectListDatabases(shardTarget, {BSON("name" << existingDB.getName())});
+
+ future.timed_get(kLongFutureTimeout);
+}
+
+TEST_F(AddShardTest, SuccessfullyAddReplicaSet) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ targeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+ targeterFactory()->addTargeterToReturn(connString, std::move(targeter));
+
+ std::string expectedShardName = "mySet";
+
+ // The shard doc inserted into the config.shards collection on the config server.
+ ShardType expectedShard;
+ expectedShard.setName(expectedShardName);
+ expectedShard.setHost(connString.toString());
+ expectedShard.setMaxSizeMB(100);
+ expectedShard.setState(ShardType::ShardState::kShardAware);
+
+ DatabaseType discoveredDB("shardDB", ShardId(expectedShardName), false);
+
+ auto future = launchAsync([this, &expectedShardName, &connString] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), nullptr, connString, 100));
+ ASSERT_EQUALS(expectedShardName, shardName);
+ });
+
+ BSONArrayBuilder hosts;
+ hosts.append("host1:12345");
+ hosts.append("host2:12345");
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "mySet"
+ << "hosts"
+ << hosts.arr()
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ // Get databases list from new shard
+ expectListDatabases(shardTarget, std::vector<BSONObj>{BSON("name" << discoveredDB.getName())});
+
+ expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions"));
+
+ // The shardIdentity doc inserted into the admin.system.version collection on the shard.
+ expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName);
+
+ // The shard receives the setFeatureCompatibilityVersion command.
+ expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1));
+
+ // Wait for the addShard to complete before checking the config database
+ future.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was properly added to config.shards.
+ assertShardExists(expectedShard);
+
+ // Ensure that the databases detected from the shard were properly added to config.database.
+ assertDatabaseExists(discoveredDB);
+
+ assertChangeWasLogged(expectedShard);
+}
+
+TEST_F(AddShardTest, ReplicaSetExtraHostsDiscovered) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString seedString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345"));
+ ConnectionString fullConnString =
+ assertGet(ConnectionString::parse("mySet/host1:12345,host2:12345,host3:12345"));
+ targeter->setConnectionStringReturnValue(fullConnString);
+ HostAndPort shardTarget = seedString.getServers().front();
+ targeter->setFindHostReturnValue(shardTarget);
+ targeterFactory()->addTargeterToReturn(seedString, std::move(targeter));
+
+ std::string expectedShardName = "mySet";
+
+ // The shard doc inserted into the config.shards collection on the config server.
+ ShardType expectedShard;
+ expectedShard.setName(expectedShardName);
+ expectedShard.setHost(fullConnString.toString());
+ expectedShard.setMaxSizeMB(100);
+ expectedShard.setState(ShardType::ShardState::kShardAware);
+
+ DatabaseType discoveredDB("shardDB", ShardId(expectedShardName), false);
+
+ auto future = launchAsync([this, &expectedShardName, &seedString] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), nullptr, seedString, 100));
+ ASSERT_EQUALS(expectedShardName, shardName);
+ });
+
+ BSONArrayBuilder hosts;
+ hosts.append("host1:12345");
+ hosts.append("host2:12345");
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "setName"
+ << "mySet"
+ << "hosts"
+ << hosts.arr()
+ << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ // Get databases list from new shard
+ expectListDatabases(shardTarget, std::vector<BSONObj>{BSON("name" << discoveredDB.getName())});
+
+ expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions"));
+
+ // The shardIdentity doc inserted into the admin.system.version collection on the shard.
+ expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName);
+
+ // The shard receives the setFeatureCompatibilityVersion command.
+ expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1));
+
+ // Wait for the addShard to complete before checking the config database
+ future.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was properly added to config.shards.
+ assertShardExists(expectedShard);
+
+ // Ensure that the databases detected from the shard were properly added to config.database.
+ assertDatabaseExists(discoveredDB);
+
+ // The changelog entry uses whatever connection string is passed to addShard, even if addShard
+ // discovered additional hosts.
+ expectedShard.setHost(seedString.toString());
+ assertChangeWasLogged(expectedShard);
+}
+
+TEST_F(AddShardTest, AddShardSucceedsEvenIfAddingDBsFromNewShardFails) {
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ HostAndPort shardTarget("StandaloneHost:12345");
+ targeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ targeter->setFindHostReturnValue(shardTarget);
+
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget), std::move(targeter));
+
+
+ std::string expectedShardName = "StandaloneShard";
+
+ // The shard doc inserted into the config.shards collection on the config server.
+ ShardType expectedShard;
+ expectedShard.setName(expectedShardName);
+ expectedShard.setHost("StandaloneHost:12345");
+ expectedShard.setMaxSizeMB(100);
+ expectedShard.setState(ShardType::ShardState::kShardAware);
+
+ DatabaseType discoveredDB1("TestDB1", ShardId("StandaloneShard"), false);
+ DatabaseType discoveredDB2("TestDB2", ShardId("StandaloneShard"), false);
+
+ // Enable fail point to cause all updates to fail. Since we add the databases detected from
+ // the shard being added with upserts, but we add the shard document itself via insert, this
+ // will allow the shard to be added but prevent the databases from brought into the cluster.
+ auto failPoint = getGlobalFailPointRegistry()->getFailPoint("failAllUpdates");
+ ASSERT(failPoint);
+ failPoint->setMode(FailPoint::alwaysOn);
+ ON_BLOCK_EXIT([&] { failPoint->setMode(FailPoint::off); });
+
+ auto future = launchAsync([this, &expectedShardName, &shardTarget] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(
+ operationContext(), &expectedShardName, ConnectionString(shardTarget), 100));
+ ASSERT_EQUALS(expectedShardName, shardName);
+ });
+
+ BSONObj commandResponse = BSON("ok" << 1 << "ismaster" << true << "maxWireVersion"
+ << WireVersion::LATEST_WIRE_VERSION);
+ expectIsMaster(shardTarget, commandResponse);
+
+ // Get databases list from new shard
+ expectListDatabases(
+ shardTarget,
+ std::vector<BSONObj>{BSON("name"
+ << "local"
+ << "sizeOnDisk"
+ << 1000),
+ BSON("name" << discoveredDB1.getName() << "sizeOnDisk" << 2000),
+ BSON("name" << discoveredDB2.getName() << "sizeOnDisk" << 5000)});
+
+ expectCollectionDrop(shardTarget, NamespaceString("config", "system.sessions"));
+
+ // The shardIdentity doc inserted into the admin.system.version collection on the shard.
+ expectShardIdentityUpsertReturnSuccess(shardTarget, expectedShardName);
+
+ // The shard receives the setFeatureCompatibilityVersion command.
+ expectSetFeatureCompatibilityVersion(shardTarget, BSON("ok" << 1));
+
+ // Wait for the addShard to complete before checking the config database
+ future.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was properly added to config.shards.
+ assertShardExists(expectedShard);
+
+ // Ensure that the databases detected from the shard were *not* added.
+ ASSERT_EQUALS(ErrorCodes::NamespaceNotFound,
+ catalogClient()
+ ->getDatabase(operationContext(),
+ discoveredDB1.getName(),
+ repl::ReadConcernLevel::kMajorityReadConcern)
+ .getStatus());
+ ASSERT_EQUALS(ErrorCodes::NamespaceNotFound,
+ catalogClient()
+ ->getDatabase(operationContext(),
+ discoveredDB2.getName(),
+ repl::ReadConcernLevel::kMajorityReadConcern)
+ .getStatus());
+
+ assertChangeWasLogged(expectedShard);
+}
+
+// Tests both that trying to add a shard with the same host as an existing shard but with different
+// options fails, and that adding a shard with the same host as an existing shard with the *same*
+// options succeeds.
+TEST_F(AddShardTest, AddExistingShardStandalone) {
+ HostAndPort shardTarget("StandaloneHost:12345");
+ std::unique_ptr<RemoteCommandTargeterMock> standaloneTargeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ standaloneTargeter->setConnectionStringReturnValue(ConnectionString(shardTarget));
+ standaloneTargeter->setFindHostReturnValue(shardTarget);
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardTarget),
+ std::move(standaloneTargeter));
+
+ std::unique_ptr<RemoteCommandTargeterMock> replsetTargeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ replsetTargeter->setConnectionStringReturnValue(
+ ConnectionString::forReplicaSet("mySet", {shardTarget}));
+ replsetTargeter->setFindHostReturnValue(shardTarget);
+ targeterFactory()->addTargeterToReturn(ConnectionString::forReplicaSet("mySet", {shardTarget}),
+ std::move(replsetTargeter));
+
+ std::string existingShardName = "myShard";
+ ShardType existingShard;
+ existingShard.setName(existingShardName);
+ existingShard.setHost(shardTarget.toString());
+ existingShard.setMaxSizeMB(100);
+ existingShard.setState(ShardType::ShardState::kShardAware);
+
+ // Make sure the shard already exists.
+ ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(),
+ ShardType::ConfigNS,
+ existingShard.toBSON(),
+ ShardingCatalogClient::kMajorityWriteConcern));
+ assertShardExists(existingShard);
+
+ // Adding the same standalone host with a different shard name should fail.
+ std::string differentName = "anotherShardName";
+ auto future1 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ &differentName,
+ ConnectionString(shardTarget),
+ existingShard.getMaxSizeMB()));
+ });
+ future1.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding the same standalone host with a different maxSize should fail.
+ auto future2 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ nullptr,
+ ConnectionString(shardTarget),
+ existingShard.getMaxSizeMB() + 100));
+ });
+ future2.timed_get(kLongFutureTimeout);
+
+ // Adding the same standalone host but as part of a replica set should fail.
+ // Ensures that even if the user changed the standalone shard to a single-node replica set, you
+ // can't change the sharded cluster's notion of the shard from standalone to replica set just
+ // by calling addShard.
+ auto future3 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ nullptr,
+ ConnectionString::forReplicaSet("mySet", {shardTarget}),
+ existingShard.getMaxSizeMB()));
+ });
+ future3.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding the same standalone host with the same options should succeed.
+ auto future4 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ &existingShardName,
+ ConnectionString(shardTarget),
+ existingShard.getMaxSizeMB()));
+ ASSERT_EQUALS(existingShardName, shardName);
+ });
+ future4.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding the same standalone host with the same options (without explicitly specifying the
+ // shard name) should succeed.
+ auto future5 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ nullptr,
+ ConnectionString(shardTarget),
+ existingShard.getMaxSizeMB()));
+ ASSERT_EQUALS(existingShardName, shardName);
+ });
+ future5.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+}
+
+// Tests both that trying to add a shard with the same replica set as an existing shard but with
+// different options fails, and that adding a shard with the same replica set as an existing shard
+// with the *same* options succeeds.
+TEST_F(AddShardTest, AddExistingShardReplicaSet) {
+ std::unique_ptr<RemoteCommandTargeterMock> replsetTargeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ ConnectionString connString = assertGet(ConnectionString::parse("mySet/host1:12345"));
+ replsetTargeter->setConnectionStringReturnValue(connString);
+ HostAndPort shardTarget = connString.getServers().front();
+ replsetTargeter->setFindHostReturnValue(shardTarget);
+ targeterFactory()->addTargeterToReturn(connString, std::move(replsetTargeter));
+
+ std::string existingShardName = "myShard";
+ ShardType existingShard;
+ existingShard.setName(existingShardName);
+ existingShard.setHost(connString.toString());
+ existingShard.setMaxSizeMB(100);
+ existingShard.setState(ShardType::ShardState::kShardAware);
+
+ // Make sure the shard already exists.
+ ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(),
+ ShardType::ConfigNS,
+ existingShard.toBSON(),
+ ShardingCatalogClient::kMajorityWriteConcern));
+ assertShardExists(existingShard);
+
+ // Adding the same connection string with a different shard name should fail.
+ std::string differentName = "anotherShardName";
+ auto future1 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(
+ ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(
+ operationContext(), &differentName, connString, existingShard.getMaxSizeMB()));
+ });
+ future1.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding the same connection string with a different maxSize should fail.
+ auto future2 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(
+ ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(
+ operationContext(), nullptr, connString, existingShard.getMaxSizeMB() + 100));
+ });
+ future2.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding a connecting string with a host of an existing shard but using a different connection
+ // string type should fail.
+ // Ensures that even if the user changed the replica set shard to a standalone, you can't change
+ // the sharded cluster's notion of the shard from replica set to standalone just by calling
+ // addShard.
+ auto future3 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ nullptr,
+ ConnectionString(shardTarget),
+ existingShard.getMaxSizeMB()));
+ });
+ future3.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding a connecting string with the same hosts but a different replica set name should fail.
+ // Ensures that even if you manually change the shard's replica set name somehow, you can't
+ // change the replica set name the sharded cluster knows for it just by calling addShard again.
+ std::string differentSetName = "differentSet";
+ auto future4 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ nullptr,
+ ConnectionString::forReplicaSet(differentSetName,
+ connString.getServers()),
+ existingShard.getMaxSizeMB()));
+ });
+ future4.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding the same host with the same options should succeed.
+ auto future5 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ &existingShardName,
+ connString,
+ existingShard.getMaxSizeMB()));
+ ASSERT_EQUALS(existingShardName, shardName);
+ });
+ future5.timed_get(kLongFutureTimeout);
+
+ // Adding the same host with the same options (without explicitly specifying the shard name)
+ // should succeed.
+ auto future6 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(
+ ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(), nullptr, connString, existingShard.getMaxSizeMB()));
+ ASSERT_EQUALS(existingShardName, shardName);
+ });
+ future6.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+
+ // Adding the same replica set but different host membership (but otherwise the same options)
+ // should succeed
+ auto otherHost = connString.getServers().back();
+ ConnectionString otherHostConnString = assertGet(ConnectionString::parse("mySet/host2:12345"));
+ {
+ // Add a targeter for the different seed string this addShard request will use.
+ std::unique_ptr<RemoteCommandTargeterMock> otherHostTargeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ otherHostTargeter->setConnectionStringReturnValue(otherHostConnString);
+ otherHostTargeter->setFindHostReturnValue(otherHost);
+ targeterFactory()->addTargeterToReturn(otherHostConnString, std::move(otherHostTargeter));
+ }
+ auto future7 = launchAsync([&] {
+ Client::initThreadIfNotAlready();
+ auto shardName = assertGet(ShardingCatalogManager::get(operationContext())
+ ->addShard(operationContext(),
+ nullptr,
+ otherHostConnString,
+ existingShard.getMaxSizeMB()));
+ ASSERT_EQUALS(existingShardName, shardName);
+ });
+ future7.timed_get(kLongFutureTimeout);
+
+ // Ensure that the shard document was unchanged.
+ assertShardExists(existingShard);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp
new file mode 100644
index 00000000000..c586f412ed1
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_to_zone_test.cpp
@@ -0,0 +1,119 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/client/read_preference.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/config_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+
+ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly);
+
+using AddShardToZoneTest = ConfigServerTestFixture;
+
+TEST_F(AddShardToZoneTest, AddSingleZoneToExistingShardShouldSucceed) {
+ ShardType shard;
+ shard.setName("a");
+ shard.setHost("a:1234");
+
+ setupShards({shard}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->addShardToZone(operationContext(), shard.getName(), "z"));
+ auto shardDocStatus = getShardDoc(operationContext(), shard.getName());
+ ASSERT_OK(shardDocStatus.getStatus());
+
+ auto shardDoc = shardDocStatus.getValue();
+ auto tags = shardDoc.getTags();
+ ASSERT_EQ(1u, tags.size());
+ ASSERT_EQ("z", tags.front());
+}
+
+TEST_F(AddShardToZoneTest, AddZoneToShardWithSameTagShouldSucceed) {
+ ShardType shard;
+ shard.setName("a");
+ shard.setHost("a:1234");
+ shard.setTags({"x", "y"});
+
+ setupShards({shard}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->addShardToZone(operationContext(), shard.getName(), "x"));
+
+ auto shardDocStatus = getShardDoc(operationContext(), shard.getName());
+ ASSERT_OK(shardDocStatus.getStatus());
+
+ auto shardDoc = shardDocStatus.getValue();
+ auto tags = shardDoc.getTags();
+ ASSERT_EQ(2u, tags.size());
+ ASSERT_EQ("x", tags.front());
+ ASSERT_EQ("y", tags.back());
+}
+
+TEST_F(AddShardToZoneTest, AddZoneToShardWithNewTagShouldAppend) {
+ ShardType shard;
+ shard.setName("a");
+ shard.setHost("a:1234");
+ shard.setTags({"x"});
+
+ setupShards({shard}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->addShardToZone(operationContext(), shard.getName(), "y"));
+
+ auto shardDocStatus = getShardDoc(operationContext(), shard.getName());
+ ASSERT_OK(shardDocStatus.getStatus());
+
+ auto shardDoc = shardDocStatus.getValue();
+ auto tags = shardDoc.getTags();
+ ASSERT_EQ(2u, tags.size());
+ ASSERT_EQ("x", tags.front());
+ ASSERT_EQ("y", tags.back());
+}
+
+TEST_F(AddShardToZoneTest, AddSingleZoneToNonExistingShardShouldFail) {
+ ShardType shard;
+ shard.setName("a");
+ shard.setHost("a:1234");
+
+ setupShards({shard}).transitional_ignore();
+
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->addShardToZone(operationContext(), "b", "z");
+ ASSERT_EQ(ErrorCodes::ShardNotFound, status);
+}
+
+} // unnamed namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp
new file mode 100644
index 00000000000..89b64867b99
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_assign_key_range_to_zone_test.cpp
@@ -0,0 +1,725 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/json.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_collection.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/config_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+using std::string;
+
+ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly);
+
+/**
+ * Basic fixture with a one shard with zone, and a sharded collection.
+ */
+class AssignKeyRangeToZoneTestFixture : public ConfigServerTestFixture {
+public:
+ void setUp() override {
+ ConfigServerTestFixture::setUp();
+
+ ShardType shard;
+ shard.setName("a");
+ shard.setHost("a:1234");
+ shard.setTags({zoneName()});
+
+ setupShards({shard}).transitional_ignore();
+
+ CollectionType shardedCollection;
+ shardedCollection.setNs(shardedNS());
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+ }
+
+ /**
+ * Asserts that the config.tags collection is empty.
+ */
+ void assertNoZoneDoc() {
+ auto findStatus =
+ findOneOnConfigCollection(operationContext(), TagsType::ConfigNS, BSONObj());
+ ASSERT_EQ(ErrorCodes::NoMatchingDocument, findStatus);
+ }
+
+ /**
+ * Asserts that this is the only tag that exists in config.tags.
+ */
+ void assertOnlyZone(const NamespaceString& ns,
+ const ChunkRange& range,
+ const string& zoneName) {
+ auto findStatus =
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ kReadPref,
+ repl::ReadConcernLevel::kMajorityReadConcern,
+ TagsType::ConfigNS,
+ BSONObj(),
+ BSONObj(),
+ 1);
+ ASSERT_OK(findStatus.getStatus());
+
+ auto findResult = findStatus.getValue();
+ ASSERT_EQ(1U, findResult.docs.size());
+
+ auto tagDocStatus = TagsType::fromBSON(findResult.docs.front());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(ns, tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(range.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(range.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName, tagDoc.getTag());
+ }
+
+ NamespaceString shardedNS() const {
+ return NamespaceString("test.foo");
+ }
+
+ string zoneName() const {
+ return "z";
+ }
+};
+
+TEST_F(AssignKeyRangeToZoneTestFixture, BasicAssignKeyRange) {
+ const ChunkRange newRange(BSON("x" << 0), BSON("x" << 10));
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), shardedNS(), newRange, zoneName()));
+
+ assertOnlyZone(shardedNS(), newRange, zoneName());
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, AssignKeyRangeOnUnshardedCollShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ NamespaceString("unsharded.coll"),
+ ChunkRange(BSON("x" << 0), BSON("x" << 10)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, AssignKeyRangeOnDroppedShardedCollShouldFail) {
+ CollectionType unshardedCollection;
+ NamespaceString ns("unsharded.coll");
+ unshardedCollection.setNs(ns);
+ unshardedCollection.setEpoch(OID::gen());
+ unshardedCollection.setKeyPattern(BSON("x" << 1));
+ unshardedCollection.setDropped(true);
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, unshardedCollection.toBSON()));
+
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(
+ operationContext(), ns, ChunkRange(BSON("x" << 0), BSON("x" << 10)), zoneName());
+ ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, AssignKeyRangeNonExistingZoneShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0), BSON("x" << 10)),
+ zoneName() + "y");
+ ASSERT_EQ(ErrorCodes::ZoneNotFound, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MinWithInvalidShardKeyShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("a" << 0), BSON("x" << 10)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MaxWithInvalidShardKeyShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0), BSON("y" << 10)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MinThatIsAShardKeyPrefixShouldConvertToFullShardKey) {
+ NamespaceString ns("compound.shard");
+ CollectionType shardedCollection;
+ shardedCollection.setNs(ns);
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+
+ const ChunkRange newRange(BSON("x" << 0), BSON("x" << 10 << "y" << 10));
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), ns, newRange, zoneName()));
+
+ const ChunkRange fullRange(fromjson("{ x: 0, y: { $minKey: 1 }}"),
+ BSON("x" << 10 << "y" << 10));
+ assertOnlyZone(ns, fullRange, zoneName());
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MaxThatIsAShardKeyPrefixShouldConvertToFullShardKey) {
+ NamespaceString ns("compound.shard");
+ CollectionType shardedCollection;
+ shardedCollection.setNs(ns);
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+
+ const ChunkRange newRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10));
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), ns, newRange, zoneName()));
+
+ const ChunkRange fullRange(BSON("x" << 0 << "y" << 0), fromjson("{ x: 10, y: { $minKey: 1 }}"));
+ assertOnlyZone(ns, fullRange, zoneName());
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MinThatIsNotAShardKeyPrefixShouldFail) {
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MaxThatIsNotAShardKeyPrefixShouldFail) {
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0), BSON("x" << 10 << "y" << 10)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MinMaxThatIsNotAShardKeyPrefixShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(
+ operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10 << "y" << 10)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeToZoneTestFixture, MinMaxThatIsAShardKeyPrefixShouldSucceed) {
+ NamespaceString ns("compound.shard");
+ CollectionType shardedCollection;
+ shardedCollection.setNs(ns);
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+
+ const ChunkRange newRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10 << "y" << 10));
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), ns, newRange, zoneName()));
+
+ assertOnlyZone(ns, newRange, zoneName());
+}
+
+/**
+ * Basic fixture with a one shard with zone, a sharded collection and a zoned key range.
+ */
+class AssignKeyRangeWithOneRangeFixture : public AssignKeyRangeToZoneTestFixture {
+public:
+ void setUp() override {
+ AssignKeyRangeToZoneTestFixture::setUp();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(
+ operationContext(), shardedNS(), getExistingRange(), zoneName()));
+ }
+
+ ChunkRange getExistingRange() {
+ return ChunkRange(BSON("x" << 4), BSON("x" << 8));
+ }
+};
+
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewMaxAlignsWithExistingMinShouldSucceed) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 2), BSON("x" << 4)),
+ zoneName()));
+
+ {
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << BSON("x" << 2)));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(BSON("x" << 2), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(BSON("x" << 4), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+
+ {
+ const auto existingRange = getExistingRange();
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+}
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewMaxOverlappingExistingShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 3), BSON("x" << 5)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeOverlappingInsideExistingShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 5), BSON("x" << 7)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeOverlappingWithDifferentNSShouldSucceed) {
+ CollectionType shardedCollection;
+ shardedCollection.setNs(NamespaceString("other.coll"));
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedCollection.getNs(),
+ ChunkRange(BSON("x" << 5), BSON("x" << 7)),
+ zoneName()));
+
+ {
+ const auto existingRange = getExistingRange();
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+ {
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << BSON("x" << 5)));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedCollection.getNs(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(BSON("x" << 5), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(BSON("x" << 7), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+}
+
+/**
+ * new ZZZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeEquivalentToExistingOneShouldBeNoOp) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(
+ operationContext(), shardedNS(), getExistingRange(), zoneName()));
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+/**
+ * new YYYY
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture,
+ NewRangeEquivalentToExistingOneWithDifferentZoneShouldFail) {
+ ShardType shard;
+ shard.setName("b");
+ shard.setHost("b:1234");
+ shard.setTags({"y"});
+
+ ASSERT_OK(insertToConfigCollection(operationContext(), ShardType::ConfigNS, shard.toBSON()));
+
+ auto status =
+ ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), shardedNS(), getExistingRange(), "y");
+ ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewMinOverlappingExistingShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 7), BSON("x" << 9)),
+ zoneName());
+ ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewMinAlignsWithExistingMaxShouldSucceed) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 8), BSON("x" << 10)),
+ zoneName()));
+
+ {
+ const auto existingRange = getExistingRange();
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+
+ {
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << BSON("x" << 8)));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(BSON("x" << 8), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(BSON("x" << 10), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+}
+
+/**
+ * new ZZZZZZ
+ * existing ZZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, NewRangeIsSuperSetOfExistingShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 3), BSON("x" << 9)),
+ zoneName());
+
+ ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+/**
+ * new ZZ
+ * existing ZZZZ
+ * existing ZZZ
+ * 0123456789
+ */
+TEST_F(AssignKeyRangeWithOneRangeFixture, AssignWithExistingOveralpShouldFail) {
+ TagsType tagDoc;
+ tagDoc.setNS(shardedNS());
+ tagDoc.setMinKey(BSON("x" << 0));
+ tagDoc.setMaxKey(BSON("x" << 2));
+ tagDoc.setTag("z");
+
+ ASSERT_OK(insertToConfigCollection(operationContext(), TagsType::ConfigNS, tagDoc.toBSON()));
+
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0), BSON("x" << 1)),
+ zoneName());
+
+ ASSERT_EQ(ErrorCodes::RangeOverlapConflict, status);
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, BasicRemoveKeyRange) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(operationContext(), shardedNS(), getExistingRange()));
+
+ assertNoZoneDoc();
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveKeyRangeOnUnshardedCollShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(operationContext(),
+ NamespaceString("unsharded.coll"),
+ ChunkRange(BSON("x" << 0), BSON("x" << 10)));
+ ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveKeyRangeOnDroppedShardedCollShouldFail) {
+ CollectionType unshardedCollection;
+ NamespaceString ns("unsharded.coll");
+ unshardedCollection.setNs(ns);
+ unshardedCollection.setEpoch(OID::gen());
+ unshardedCollection.setKeyPattern(BSON("x" << 1));
+ unshardedCollection.setDropped(true);
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, unshardedCollection.toBSON()));
+
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(
+ operationContext(), ns, ChunkRange(BSON("x" << 0), BSON("x" << 10)));
+ ASSERT_EQ(ErrorCodes::NamespaceNotSharded, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveWithInvalidMinShardKeyShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("a" << 0), BSON("x" << 10)));
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveWithInvalidMaxShardKeyShouldFail) {
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(operationContext(),
+ shardedNS(),
+ ChunkRange(BSON("x" << 0), BSON("y" << 10)));
+ ASSERT_EQ(ErrorCodes::ShardKeyNotFound, status);
+
+ assertOnlyZone(shardedNS(), getExistingRange(), zoneName());
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveThatIsOnlyMinPrefixOfExistingShouldNotRemoveRange) {
+ NamespaceString ns("compound.shard");
+ CollectionType shardedCollection;
+ shardedCollection.setNs(ns);
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+
+ const ChunkRange existingRange(fromjson("{ x: 0, y: { $minKey: 1 }}"),
+ BSON("x" << 10 << "y" << 10));
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), ns, existingRange, zoneName()));
+
+ ASSERT_OK(
+ ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(
+ operationContext(), ns, ChunkRange(BSON("x" << 0), BSON("x" << 10 << "y" << 10))));
+
+ {
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(ns, tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+
+ {
+ const auto existingRange = getExistingRange();
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+}
+
+TEST_F(AssignKeyRangeWithOneRangeFixture, RemoveThatIsOnlyMaxPrefixOfExistingShouldNotRemoveRange) {
+ NamespaceString ns("compound.shard");
+ CollectionType shardedCollection;
+ shardedCollection.setNs(ns);
+ shardedCollection.setEpoch(OID::gen());
+ shardedCollection.setKeyPattern(BSON("x" << 1 << "y" << 1));
+
+ ASSERT_OK(insertToConfigCollection(
+ operationContext(), CollectionType::ConfigNS, shardedCollection.toBSON()));
+
+ const ChunkRange existingRange(BSON("x" << 0 << "y" << 0),
+ fromjson("{ x: 10, y: { $minKey: 1 }}"));
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->assignKeyRangeToZone(operationContext(), ns, existingRange, zoneName()));
+
+ ASSERT_OK(
+ ShardingCatalogManager::get(operationContext())
+ ->removeKeyRangeFromZone(
+ operationContext(), ns, ChunkRange(BSON("x" << 0 << "y" << 0), BSON("x" << 10))));
+
+ {
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(ns, tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+
+ {
+ const auto existingRange = getExistingRange();
+ auto findStatus = findOneOnConfigCollection(
+ operationContext(), TagsType::ConfigNS, BSON("min" << existingRange.getMin()));
+ ASSERT_OK(findStatus);
+
+ auto tagDocStatus = TagsType::fromBSON(findStatus.getValue());
+ ASSERT_OK(tagDocStatus.getStatus());
+
+ auto tagDoc = tagDocStatus.getValue();
+ ASSERT_EQ(shardedNS(), tagDoc.getNS());
+ ASSERT_BSONOBJ_EQ(existingRange.getMin(), tagDoc.getMinKey());
+ ASSERT_BSONOBJ_EQ(existingRange.getMax(), tagDoc.getMaxKey());
+ ASSERT_EQ(zoneName(), tagDoc.getTag());
+ }
+}
+
+} // unnamed namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
new file mode 100644
index 00000000000..dca5b5403e8
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_chunk_operations.cpp
@@ -0,0 +1,669 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+
+#include "mongo/base/status_with.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/bson/util/bson_extract.h"
+#include "mongo/client/connection_string.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/db/catalog/catalog_raii.h"
+#include "mongo/db/dbdirectclient.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/shard_key_pattern.h"
+#include "mongo/util/fail_point_service.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+
+namespace mongo {
+namespace {
+
+MONGO_FP_DECLARE(migrationCommitVersionError);
+
+/**
+ * Append min, max and version information from chunk to the buffer for logChange purposes.
+ */
+void appendShortVersion(BufBuilder* b, const ChunkType& chunk) {
+ BSONObjBuilder bb(*b);
+ bb.append(ChunkType::min(), chunk.getMin());
+ bb.append(ChunkType::max(), chunk.getMax());
+ if (chunk.isVersionSet())
+ chunk.getVersion().addToBSON(bb, ChunkType::lastmod());
+ bb.done();
+}
+
+BSONArray buildMergeChunksTransactionUpdates(const std::vector<ChunkType>& chunksToMerge,
+ const ChunkVersion& mergeVersion) {
+ BSONArrayBuilder updates;
+
+ // Build an update operation to expand the first chunk into the newly merged chunk
+ {
+ BSONObjBuilder op;
+ op.append("op", "u");
+ op.appendBool("b", false); // no upsert
+ op.append("ns", ChunkType::ConfigNS.ns());
+
+ // expand first chunk into newly merged chunk
+ ChunkType mergedChunk(chunksToMerge.front());
+ mergedChunk.setMax(chunksToMerge.back().getMax());
+
+ // fill in additional details for sending through transaction
+ mergedChunk.setVersion(mergeVersion);
+
+ // add the new chunk information as the update object
+ op.append("o", mergedChunk.toConfigBSON());
+
+ // query object
+ op.append("o2", BSON(ChunkType::name(mergedChunk.getName())));
+
+ updates.append(op.obj());
+ }
+
+ // Build update operations to delete the rest of the chunks to be merged. Remember not
+ // to delete the first chunk we're expanding
+ for (size_t i = 1; i < chunksToMerge.size(); ++i) {
+ BSONObjBuilder op;
+ op.append("op", "d");
+ op.append("ns", ChunkType::ConfigNS.ns());
+
+ op.append("o", BSON(ChunkType::name(chunksToMerge[i].getName())));
+
+ updates.append(op.obj());
+ }
+
+ return updates.arr();
+}
+
+BSONArray buildMergeChunksTransactionPrecond(const std::vector<ChunkType>& chunksToMerge,
+ const ChunkVersion& collVersion) {
+ BSONArrayBuilder preCond;
+
+ for (auto chunk : chunksToMerge) {
+ BSONObjBuilder b;
+ b.append("ns", ChunkType::ConfigNS.ns());
+ b.append("q",
+ BSON("query" << BSON(ChunkType::ns(chunk.getNS().ns())
+ << ChunkType::min(chunk.getMin())
+ << ChunkType::max(chunk.getMax()))
+ << "orderby"
+ << BSON(ChunkType::lastmod() << -1)));
+ b.append("res",
+ BSON(ChunkType::epoch(collVersion.epoch())
+ << ChunkType::shard(chunk.getShard().toString())));
+ preCond.append(b.obj());
+ }
+ return preCond.arr();
+}
+
+Status checkChunkIsOnShard(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const BSONObj& min,
+ const BSONObj& max,
+ const ShardId& shard) {
+ BSONObj chunkQuery =
+ BSON(ChunkType::ns() << nss.ns() << ChunkType::min() << min << ChunkType::max() << max
+ << ChunkType::shard()
+ << shard);
+
+ // Must use local read concern because we're going to perform subsequent writes.
+ auto findResponseWith =
+ Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ chunkQuery,
+ BSONObj(),
+ 1);
+ if (!findResponseWith.isOK()) {
+ return findResponseWith.getStatus();
+ }
+
+ if (findResponseWith.getValue().docs.empty()) {
+ return {ErrorCodes::Error(40165),
+ str::stream()
+ << "Could not find the chunk ("
+ << chunkQuery.toString()
+ << ") on the shard. Cannot execute the migration commit with invalid chunks."};
+ }
+
+ return Status::OK();
+}
+
+BSONObj makeCommitChunkTransactionCommand(const NamespaceString& nss,
+ const ChunkType& migratedChunk,
+ const boost::optional<ChunkType>& controlChunk,
+ StringData fromShard,
+ StringData toShard) {
+
+ // Update migratedChunk's version and shard.
+ BSONArrayBuilder updates;
+ {
+ BSONObjBuilder op;
+ op.append("op", "u");
+ op.appendBool("b", false); // No upserting
+ op.append("ns", ChunkType::ConfigNS.ns());
+
+ BSONObjBuilder n(op.subobjStart("o"));
+ n.append(ChunkType::name(), ChunkType::genID(nss, migratedChunk.getMin()));
+ migratedChunk.getVersion().addToBSON(n, ChunkType::lastmod());
+ n.append(ChunkType::ns(), nss.ns());
+ n.append(ChunkType::min(), migratedChunk.getMin());
+ n.append(ChunkType::max(), migratedChunk.getMax());
+ n.append(ChunkType::shard(), toShard);
+ n.done();
+
+ BSONObjBuilder q(op.subobjStart("o2"));
+ q.append(ChunkType::name(), ChunkType::genID(nss, migratedChunk.getMin()));
+ q.done();
+
+ updates.append(op.obj());
+ }
+
+ // If we have a controlChunk, update its chunk version.
+ if (controlChunk) {
+ BSONObjBuilder op;
+ op.append("op", "u");
+ op.appendBool("b", false);
+ op.append("ns", ChunkType::ConfigNS.ns());
+
+ BSONObjBuilder n(op.subobjStart("o"));
+ n.append(ChunkType::name(), ChunkType::genID(nss, controlChunk->getMin()));
+ controlChunk->getVersion().addToBSON(n, ChunkType::lastmod());
+ n.append(ChunkType::ns(), nss.ns());
+ n.append(ChunkType::min(), controlChunk->getMin());
+ n.append(ChunkType::max(), controlChunk->getMax());
+ n.append(ChunkType::shard(), fromShard);
+ n.done();
+
+ BSONObjBuilder q(op.subobjStart("o2"));
+ q.append(ChunkType::name(), ChunkType::genID(nss, controlChunk->getMin()));
+ q.done();
+
+ updates.append(op.obj());
+ }
+
+ // Do not give doTxn a write concern. If doTxn tries to wait for replication, it will fail
+ // because of the GlobalWrite lock CommitChunkMigration already holds. Replication will not be
+ // able to take the lock it requires.
+ return BSON("doTxn" << updates.arr());
+}
+
+} // namespace
+
+Status ShardingCatalogManager::commitChunkSplit(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& requestEpoch,
+ const ChunkRange& range,
+ const std::vector<BSONObj>& splitPoints,
+ const std::string& shardName) {
+ // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and
+ // migrations
+ // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/
+ // move chunks on different collections to proceed in parallel
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock);
+
+ std::string errmsg;
+
+ // Get the max chunk version for this namespace.
+ auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON("ns" << nss.ns()),
+ BSON(ChunkType::lastmod << -1),
+ 1);
+
+ if (!findStatus.isOK()) {
+ return findStatus.getStatus();
+ }
+
+ const auto& chunksVector = findStatus.getValue().docs;
+ if (chunksVector.empty()) {
+ errmsg = str::stream() << "splitChunk cannot split chunk " << range.toString()
+ << ". Collection '" << nss.ns()
+ << "' no longer either exists, is sharded, or has chunks";
+ return {ErrorCodes::IllegalOperation, errmsg};
+ }
+
+ ChunkVersion collVersion = ChunkVersion::fromBSON(chunksVector.front(), ChunkType::lastmod());
+
+ // Return an error if collection epoch does not match epoch of request.
+ if (collVersion.epoch() != requestEpoch) {
+ errmsg = str::stream() << "splitChunk cannot split chunk " << range.toString()
+ << ". Collection '" << nss.ns() << "' was dropped and re-created."
+ << " Current epoch: " << collVersion.epoch()
+ << ", cmd epoch: " << requestEpoch;
+ return {ErrorCodes::StaleEpoch, errmsg};
+ }
+
+ std::vector<ChunkType> newChunks;
+
+ ChunkVersion currentMaxVersion = collVersion;
+
+ auto startKey = range.getMin();
+ auto newChunkBounds(splitPoints);
+ newChunkBounds.push_back(range.getMax());
+
+ BSONArrayBuilder updates;
+
+ for (const auto& endKey : newChunkBounds) {
+ // Verify the split points are all within the chunk
+ if (endKey.woCompare(range.getMax()) != 0 && !range.containsKey(endKey)) {
+ return {ErrorCodes::InvalidOptions,
+ str::stream() << "Split key " << endKey << " not contained within chunk "
+ << range.toString()};
+ }
+
+ // Verify the split points came in increasing order
+ if (endKey.woCompare(startKey) < 0) {
+ return {
+ ErrorCodes::InvalidOptions,
+ str::stream() << "Split keys must be specified in strictly increasing order. Key "
+ << endKey
+ << " was specified after "
+ << startKey
+ << "."};
+ }
+
+ // Verify that splitPoints are not repeated
+ if (endKey.woCompare(startKey) == 0) {
+ return {ErrorCodes::InvalidOptions,
+ str::stream() << "Split on lower bound of chunk "
+ << ChunkRange(startKey, endKey).toString()
+ << "is not allowed"};
+ }
+
+ // verify that splits don't create too-big shard keys
+ Status shardKeyStatus = ShardKeyPattern::checkShardKeySize(endKey);
+ if (!shardKeyStatus.isOK()) {
+ return shardKeyStatus;
+ }
+
+ // splits only update the 'minor' portion of version
+ currentMaxVersion.incMinor();
+
+ // build an update operation against the chunks collection of the config database
+ // with upsert true
+ BSONObjBuilder op;
+ op.append("op", "u");
+ op.appendBool("b", true);
+ op.append("ns", ChunkType::ConfigNS.ns());
+
+ // add the modified (new) chunk information as the update object
+ BSONObjBuilder n(op.subobjStart("o"));
+ n.append(ChunkType::name(), ChunkType::genID(nss, startKey));
+ currentMaxVersion.addToBSON(n, ChunkType::lastmod());
+ n.append(ChunkType::ns(), nss.ns());
+ n.append(ChunkType::min(), startKey);
+ n.append(ChunkType::max(), endKey);
+ n.append(ChunkType::shard(), shardName);
+ n.done();
+
+ // add the chunk's _id as the query part of the update statement
+ BSONObjBuilder q(op.subobjStart("o2"));
+ q.append(ChunkType::name(), ChunkType::genID(nss, startKey));
+ q.done();
+
+ updates.append(op.obj());
+
+ // remember this chunk info for logging later
+ ChunkType chunk;
+ chunk.setMin(startKey);
+ chunk.setMax(endKey);
+ chunk.setVersion(currentMaxVersion);
+
+ newChunks.push_back(std::move(chunk));
+
+ startKey = endKey;
+ }
+
+ BSONArrayBuilder preCond;
+ {
+ BSONObjBuilder b;
+ b.append("ns", ChunkType::ConfigNS.ns());
+ b.append("q",
+ BSON("query" << BSON(ChunkType::ns(nss.ns()) << ChunkType::min() << range.getMin()
+ << ChunkType::max()
+ << range.getMax())
+ << "orderby"
+ << BSON(ChunkType::lastmod() << -1)));
+ {
+ BSONObjBuilder bb(b.subobjStart("res"));
+ bb.append(ChunkType::epoch(), requestEpoch);
+ bb.append(ChunkType::shard(), shardName);
+ }
+ preCond.append(b.obj());
+ }
+
+ // apply the batch of updates to local metadata.
+ Status doTxnStatus = Grid::get(opCtx)->catalogClient()->applyChunkOpsDeprecated(
+ opCtx,
+ updates.arr(),
+ preCond.arr(),
+ nss,
+ currentMaxVersion,
+ WriteConcernOptions(),
+ repl::ReadConcernLevel::kLocalReadConcern);
+ if (!doTxnStatus.isOK()) {
+ return doTxnStatus;
+ }
+
+ // log changes
+ BSONObjBuilder logDetail;
+ {
+ BSONObjBuilder b(logDetail.subobjStart("before"));
+ b.append(ChunkType::min(), range.getMin());
+ b.append(ChunkType::max(), range.getMax());
+ collVersion.addToBSON(b, ChunkType::lastmod());
+ }
+
+ if (newChunks.size() == 2) {
+ appendShortVersion(&logDetail.subobjStart("left"), newChunks[0]);
+ appendShortVersion(&logDetail.subobjStart("right"), newChunks[1]);
+
+ Grid::get(opCtx)
+ ->catalogClient()
+ ->logChange(opCtx, "split", nss.ns(), logDetail.obj(), WriteConcernOptions())
+ .transitional_ignore();
+ } else {
+ BSONObj beforeDetailObj = logDetail.obj();
+ BSONObj firstDetailObj = beforeDetailObj.getOwned();
+ const int newChunksSize = newChunks.size();
+
+ for (int i = 0; i < newChunksSize; i++) {
+ BSONObjBuilder chunkDetail;
+ chunkDetail.appendElements(beforeDetailObj);
+ chunkDetail.append("number", i + 1);
+ chunkDetail.append("of", newChunksSize);
+ appendShortVersion(&chunkDetail.subobjStart("chunk"), newChunks[i]);
+
+ Grid::get(opCtx)
+ ->catalogClient()
+ ->logChange(
+ opCtx, "multi-split", nss.ns(), chunkDetail.obj(), WriteConcernOptions())
+ .transitional_ignore();
+ }
+ }
+
+ return doTxnStatus;
+}
+
+Status ShardingCatalogManager::commitChunkMerge(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const OID& requestEpoch,
+ const std::vector<BSONObj>& chunkBoundaries,
+ const std::string& shardName) {
+ // This method must never be called with empty chunks to merge
+ invariant(!chunkBoundaries.empty());
+
+ // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and
+ // migrations
+ // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/
+ // move chunks on different collections to proceed in parallel
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock);
+
+ // Get the chunk with the highest version for this namespace
+ auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON("ns" << nss.ns()),
+ BSON(ChunkType::lastmod << -1),
+ 1);
+
+ if (!findStatus.isOK()) {
+ return findStatus.getStatus();
+ }
+
+ const auto& chunksVector = findStatus.getValue().docs;
+ if (chunksVector.empty())
+ return {ErrorCodes::IllegalOperation,
+ "collection does not exist, isn't sharded, or has no chunks"};
+
+ ChunkVersion collVersion = ChunkVersion::fromBSON(chunksVector.front(), ChunkType::lastmod());
+
+ // Return an error if epoch of chunk does not match epoch of request
+ if (collVersion.epoch() != requestEpoch) {
+ return {ErrorCodes::StaleEpoch,
+ "epoch of chunk does not match epoch of request. This most likely means "
+ "that the collection was dropped and re-created."};
+ }
+
+ // Build chunks to be merged
+ std::vector<ChunkType> chunksToMerge;
+
+ ChunkType itChunk;
+ itChunk.setMax(chunkBoundaries.front());
+ itChunk.setNS(nss);
+ itChunk.setShard(shardName);
+
+ // Do not use the first chunk boundary as a max bound while building chunks
+ for (size_t i = 1; i < chunkBoundaries.size(); ++i) {
+ itChunk.setMin(itChunk.getMax());
+
+ // Ensure the chunk boundaries are strictly increasing
+ if (chunkBoundaries[i].woCompare(itChunk.getMin()) <= 0) {
+ return {
+ ErrorCodes::InvalidOptions,
+ str::stream()
+ << "Chunk boundaries must be specified in strictly increasing order. Boundary "
+ << chunkBoundaries[i]
+ << " was specified after "
+ << itChunk.getMin()
+ << "."};
+ }
+
+ itChunk.setMax(chunkBoundaries[i]);
+ chunksToMerge.push_back(itChunk);
+ }
+
+ ChunkVersion mergeVersion = collVersion;
+ mergeVersion.incMinor();
+
+ auto updates = buildMergeChunksTransactionUpdates(chunksToMerge, mergeVersion);
+ auto preCond = buildMergeChunksTransactionPrecond(chunksToMerge, collVersion);
+
+ // apply the batch of updates to local metadata
+ Status doTxnStatus = Grid::get(opCtx)->catalogClient()->applyChunkOpsDeprecated(
+ opCtx,
+ updates,
+ preCond,
+ nss,
+ mergeVersion,
+ WriteConcernOptions(),
+ repl::ReadConcernLevel::kLocalReadConcern);
+ if (!doTxnStatus.isOK()) {
+ return doTxnStatus;
+ }
+
+ // log changes
+ BSONObjBuilder logDetail;
+ {
+ BSONArrayBuilder b(logDetail.subarrayStart("merged"));
+ for (auto chunkToMerge : chunksToMerge) {
+ b.append(chunkToMerge.toConfigBSON());
+ }
+ }
+ collVersion.addToBSON(logDetail, "prevShardVersion");
+ mergeVersion.addToBSON(logDetail, "mergedVersion");
+
+ Grid::get(opCtx)
+ ->catalogClient()
+ ->logChange(opCtx, "merge", nss.ns(), logDetail.obj(), WriteConcernOptions())
+ .transitional_ignore();
+
+ return doTxnStatus;
+}
+
+StatusWith<BSONObj> ShardingCatalogManager::commitChunkMigration(
+ OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ChunkType& migratedChunk,
+ const boost::optional<ChunkType>& controlChunk,
+ const OID& collectionEpoch,
+ const ShardId& fromShard,
+ const ShardId& toShard) {
+
+ auto const configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+
+ // Take _kChunkOpLock in exclusive mode to prevent concurrent chunk splits, merges, and
+ // migrations.
+ //
+ // ConfigSvrCommitChunkMigration commands must be run serially because the new ChunkVersions
+ // for migrated chunks are generated within the command and must be committed to the database
+ // before another chunk commit generates new ChunkVersions in the same manner.
+ //
+ // TODO(SERVER-25359): Replace with a collection-specific lock map to allow splits/merges/
+ // move chunks on different collections to proceed in parallel.
+ // (Note: This is not needed while we have a global lock, taken here only for consistency.)
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kChunkOpLock);
+
+ // Must use local read concern because we will perform subsequent writes.
+ auto findResponse =
+ configShard->exhaustiveFindOnConfig(opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON("ns" << nss.ns()),
+ BSON(ChunkType::lastmod << -1),
+ 1);
+ if (!findResponse.isOK()) {
+ return findResponse.getStatus();
+ }
+
+ if (MONGO_FAIL_POINT(migrationCommitVersionError)) {
+ uassert(ErrorCodes::StaleEpoch,
+ "failpoint 'migrationCommitVersionError' generated error",
+ false);
+ }
+
+ const auto chunksVector = std::move(findResponse.getValue().docs);
+ if (chunksVector.empty()) {
+ return {ErrorCodes::IncompatibleShardingMetadata,
+ str::stream() << "Tried to find max chunk version for collection '" << nss.ns()
+ << ", but found no chunks"};
+ }
+
+ const auto swChunk = ChunkType::fromConfigBSON(chunksVector.front());
+ if (!swChunk.isOK()) {
+ return swChunk.getStatus();
+ }
+
+ const auto currentCollectionVersion = swChunk.getValue().getVersion();
+
+ // It is possible for a migration to end up running partly without the protection of the
+ // distributed lock if the config primary stepped down since the start of the migration and
+ // failed to recover the migration. Check that the collection has not been dropped and recreated
+ // since the migration began, unbeknown to the shard when the command was sent.
+ if (currentCollectionVersion.epoch() != collectionEpoch) {
+ return {ErrorCodes::StaleEpoch,
+ str::stream() << "The collection '" << nss.ns()
+ << "' has been dropped and recreated since the migration began."
+ " The config server's collection version epoch is now '"
+ << currentCollectionVersion.epoch().toString()
+ << "', but the shard's is "
+ << collectionEpoch.toString()
+ << "'. Aborting migration commit for chunk ("
+ << migratedChunk.getRange().toString()
+ << ")."};
+ }
+
+ // Check that migratedChunk and controlChunk are where they should be, on fromShard.
+
+ auto migratedOnShard =
+ checkChunkIsOnShard(opCtx, nss, migratedChunk.getMin(), migratedChunk.getMax(), fromShard);
+ if (!migratedOnShard.isOK()) {
+ return migratedOnShard;
+ }
+
+ if (controlChunk) {
+ auto controlOnShard = checkChunkIsOnShard(
+ opCtx, nss, controlChunk->getMin(), controlChunk->getMax(), fromShard);
+ if (!controlOnShard.isOK()) {
+ return controlOnShard;
+ }
+ }
+
+ // Generate the new versions of migratedChunk and controlChunk. Migrating chunk's minor version
+ // will be 0.
+ ChunkType newMigratedChunk = migratedChunk;
+ newMigratedChunk.setVersion(ChunkVersion(
+ currentCollectionVersion.majorVersion() + 1, 0, currentCollectionVersion.epoch()));
+
+ // Control chunk's minor version will be 1 (if control chunk is present).
+ boost::optional<ChunkType> newControlChunk = boost::none;
+ if (controlChunk) {
+ newControlChunk = controlChunk.get();
+ newControlChunk->setVersion(ChunkVersion(
+ currentCollectionVersion.majorVersion() + 1, 1, currentCollectionVersion.epoch()));
+ }
+
+ auto command = makeCommitChunkTransactionCommand(
+ nss, newMigratedChunk, newControlChunk, fromShard.toString(), toShard.toString());
+
+ StatusWith<Shard::CommandResponse> doTxnCommandResponse =
+ configShard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ nss.db().toString(),
+ command,
+ Shard::RetryPolicy::kIdempotent);
+
+ if (!doTxnCommandResponse.isOK()) {
+ return doTxnCommandResponse.getStatus();
+ }
+
+ if (!doTxnCommandResponse.getValue().commandStatus.isOK()) {
+ return doTxnCommandResponse.getValue().commandStatus;
+ }
+
+ BSONObjBuilder result;
+ newMigratedChunk.getVersion().appendWithFieldForCommands(&result, "migratedChunkVersion");
+ if (controlChunk) {
+ newControlChunk->getVersion().appendWithFieldForCommands(&result, "controlChunkVersion");
+ }
+
+ return result.obj();
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
new file mode 100644
index 00000000000..874b87a8dc5
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_collection_operations.cpp
@@ -0,0 +1,605 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+
+#include <iomanip>
+#include <set>
+
+#include "mongo/base/status_with.h"
+#include "mongo/bson/util/bson_extract.h"
+#include "mongo/client/connection_string.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/client/remote_command_targeter.h"
+#include "mongo/client/replica_set_monitor.h"
+#include "mongo/db/client.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/query/collation/collator_factory_interface.h"
+#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/executor/network_interface.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/s/balancer_configuration.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/sharding_catalog_client_impl.h"
+#include "mongo/s/catalog/type_collection.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/request_types/set_shard_version_request.h"
+#include "mongo/s/shard_key_pattern.h"
+#include "mongo/s/shard_util.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/scopeguard.h"
+
+namespace mongo {
+
+using CollectionUUID = UUID;
+using std::string;
+using std::vector;
+using std::set;
+
+namespace {
+
+const Seconds kDefaultFindHostMaxWaitTime(20);
+
+const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{});
+const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0));
+
+void checkForExistingChunks(OperationContext* opCtx, const NamespaceString& nss) {
+ BSONObjBuilder countBuilder;
+ countBuilder.append("count", ChunkType::ConfigNS.coll());
+ countBuilder.append("query", BSON(ChunkType::ns(nss.ns())));
+
+ // OK to use limit=1, since if any chunks exist, we will fail.
+ countBuilder.append("limit", 1);
+
+ // Use readConcern local to guarantee we see any chunks that have been written and may
+ // become committed; readConcern majority will not see the chunks if they have not made it
+ // to the majority snapshot.
+ repl::ReadConcernArgs readConcern(repl::ReadConcernLevel::kLocalReadConcern);
+ readConcern.appendInfo(&countBuilder);
+
+ auto cmdResponse = uassertStatusOK(
+ Grid::get(opCtx)->shardRegistry()->getConfigShard()->runCommandWithFixedRetryAttempts(
+ opCtx,
+ kConfigReadSelector,
+ ChunkType::ConfigNS.db().toString(),
+ countBuilder.done(),
+ Shard::kDefaultConfigCommandTimeout,
+ Shard::RetryPolicy::kIdempotent));
+ uassertStatusOK(cmdResponse.commandStatus);
+
+ long long numChunks;
+ uassertStatusOK(bsonExtractIntegerField(cmdResponse.response, "n", &numChunks));
+ uassert(ErrorCodes::ManualInterventionRequired,
+ str::stream() << "A previous attempt to shard collection " << nss.ns()
+ << " failed after writing some initial chunks to config.chunks. Please "
+ "manually delete the partially written chunks for collection "
+ << nss.ns()
+ << " from config.chunks",
+ numChunks == 0);
+}
+
+} // namespace
+
+/**
+ * Creates and writes to the config server the first chunks for a newly sharded collection. Returns
+ * the version generated for the collection.
+ */
+ChunkVersion ShardingCatalogManager::_createFirstChunks(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ShardKeyPattern& shardKeyPattern,
+ const ShardId& primaryShardId,
+ const std::vector<BSONObj>& initPoints,
+ const bool distributeInitialChunks) {
+
+ const KeyPattern keyPattern = shardKeyPattern.getKeyPattern();
+
+ vector<BSONObj> splitPoints;
+ vector<ShardId> shardIds;
+
+ std::string primaryShardName = primaryShardId.toString();
+ auto drainingCount = uassertStatusOK(_runCountCommandOnConfig(
+ opCtx,
+ NamespaceString(ShardType::ConfigNS),
+ BSON(ShardType::name() << primaryShardName << ShardType::draining(true))));
+
+ const bool primaryDraining = (drainingCount > 0);
+ auto getPrimaryOrFirstNonDrainingShard =
+ [&opCtx, primaryShardId, &shardIds, primaryDraining]() {
+ if (primaryDraining) {
+ vector<ShardId> allShardIds;
+ Grid::get(opCtx)->shardRegistry()->getAllShardIds(&allShardIds);
+
+ auto dbShardId = allShardIds[0];
+ if (allShardIds[0] == primaryShardId && allShardIds.size() > 1) {
+ dbShardId = allShardIds[1];
+ }
+
+ return dbShardId;
+ } else {
+ return primaryShardId;
+ }
+ };
+
+ if (initPoints.empty()) {
+ // If no split points were specified use the shard's data distribution to determine them
+ auto primaryShard =
+ uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, primaryShardId));
+
+ auto result = uassertStatusOK(primaryShard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryPreferred},
+ nss.db().toString(),
+ BSON("count" << nss.coll()),
+ Shard::RetryPolicy::kIdempotent));
+
+ long long numObjects = 0;
+ uassertStatusOK(result.commandStatus);
+ uassertStatusOK(bsonExtractIntegerField(result.response, "n", &numObjects));
+
+ // Refresh the balancer settings to ensure the chunk size setting, which is sent as part of
+ // the splitVector command and affects the number of chunks returned, has been loaded.
+ uassertStatusOK(Grid::get(opCtx)->getBalancerConfiguration()->refreshAndCheck(opCtx));
+
+ if (numObjects > 0) {
+ splitPoints = uassertStatusOK(shardutil::selectChunkSplitPoints(
+ opCtx,
+ primaryShardId,
+ nss,
+ shardKeyPattern,
+ ChunkRange(keyPattern.globalMin(), keyPattern.globalMax()),
+ Grid::get(opCtx)->getBalancerConfiguration()->getMaxChunkSizeBytes(),
+ 0));
+ }
+
+ // If docs already exist for the collection, must use primary shard,
+ // otherwise defer to passed-in distribution option.
+ if (numObjects == 0 && distributeInitialChunks) {
+ Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds);
+ if (primaryDraining && shardIds.size() > 1) {
+ shardIds.erase(std::remove(shardIds.begin(), shardIds.end(), primaryShardId),
+ shardIds.end());
+ }
+ } else {
+ shardIds.push_back(getPrimaryOrFirstNonDrainingShard());
+ }
+ } else {
+ // Make sure points are unique and ordered
+ auto orderedPts = SimpleBSONObjComparator::kInstance.makeBSONObjSet();
+
+ for (const auto& initPoint : initPoints) {
+ orderedPts.insert(initPoint);
+ }
+
+ for (const auto& initPoint : orderedPts) {
+ splitPoints.push_back(initPoint);
+ }
+
+ if (distributeInitialChunks) {
+ Grid::get(opCtx)->shardRegistry()->getAllShardIds(&shardIds);
+ if (primaryDraining) {
+ shardIds.erase(std::remove(shardIds.begin(), shardIds.end(), primaryShardId),
+ shardIds.end());
+ }
+ } else {
+ shardIds.push_back(getPrimaryOrFirstNonDrainingShard());
+ }
+ }
+
+ // This is the first chunk; start the versioning from scratch
+ const OID epoch = OID::gen();
+ ChunkVersion version(1, 0, epoch);
+
+ log() << "going to create " << splitPoints.size() + 1 << " chunk(s) for: " << nss
+ << " using new epoch " << version.epoch();
+
+ for (unsigned i = 0; i <= splitPoints.size(); i++) {
+ const BSONObj min = (i == 0) ? keyPattern.globalMin() : splitPoints[i - 1];
+ const BSONObj max = (i < splitPoints.size()) ? splitPoints[i] : keyPattern.globalMax();
+
+ // The correct version must be returned as part of this call so only increment for versions,
+ // which get written
+ if (i > 0) {
+ version.incMinor();
+ }
+
+ ChunkType chunk;
+ chunk.setNS(nss);
+ chunk.setMin(min);
+ chunk.setMax(max);
+ chunk.setShard(shardIds[i % shardIds.size()]);
+ chunk.setVersion(version);
+
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->insertConfigDocument(
+ opCtx,
+ ChunkType::ConfigNS,
+ chunk.toConfigBSON(),
+ ShardingCatalogClient::kMajorityWriteConcern));
+ }
+
+ return version;
+}
+
+Status ShardingCatalogManager::dropCollection(OperationContext* opCtx, const NamespaceString& nss) {
+ const auto catalogClient = Grid::get(opCtx)->catalogClient();
+ catalogClient
+ ->logChange(opCtx,
+ "dropCollection.start",
+ nss.ns(),
+ BSONObj(),
+ ShardingCatalogClientImpl::kMajorityWriteConcern)
+ .ignore();
+
+ auto shardsStatus =
+ catalogClient->getAllShards(opCtx, repl::ReadConcernLevel::kLocalReadConcern);
+ if (!shardsStatus.isOK()) {
+ return shardsStatus.getStatus();
+ }
+ vector<ShardType> allShards = std::move(shardsStatus.getValue().value);
+
+ LOG(1) << "dropCollection " << nss.ns() << " started";
+
+ const auto dropCommandBSON = [opCtx, &nss] {
+ BSONObjBuilder builder;
+ builder.append("drop", nss.coll());
+
+ if (!opCtx->getWriteConcern().usedDefault) {
+ builder.append(WriteConcernOptions::kWriteConcernField,
+ opCtx->getWriteConcern().toBSON());
+ }
+
+ return builder.obj();
+ }();
+
+ std::map<std::string, BSONObj> errors;
+ auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();
+
+ for (const auto& shardEntry : allShards) {
+ auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName());
+ if (!swShard.isOK()) {
+ return swShard.getStatus();
+ }
+
+ const auto& shard = swShard.getValue();
+
+ auto swDropResult = shard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ nss.db().toString(),
+ dropCommandBSON,
+ Shard::RetryPolicy::kIdempotent);
+
+ if (!swDropResult.isOK()) {
+ return swDropResult.getStatus().withContext(
+ str::stream() << "Error dropping collection on shard " << shardEntry.getName());
+ }
+
+ auto& dropResult = swDropResult.getValue();
+
+ auto dropStatus = std::move(dropResult.commandStatus);
+ auto wcStatus = std::move(dropResult.writeConcernStatus);
+ if (!dropStatus.isOK() || !wcStatus.isOK()) {
+ if (dropStatus.code() == ErrorCodes::NamespaceNotFound && wcStatus.isOK()) {
+ // Generally getting NamespaceNotFound is okay to ignore as it simply means that
+ // the collection has already been dropped or doesn't exist on this shard.
+ // If, however, we get NamespaceNotFound but also have a write concern error then we
+ // can't confirm whether the fact that the namespace doesn't exist is actually
+ // committed. Thus we must still fail on NamespaceNotFound if there is also a write
+ // concern error. This can happen if we call drop, it succeeds but with a write
+ // concern error, then we retry the drop.
+ continue;
+ }
+
+ errors.emplace(shardEntry.getHost(), std::move(dropResult.response));
+ }
+ }
+
+ if (!errors.empty()) {
+ StringBuilder sb;
+ sb << "Dropping collection failed on the following hosts: ";
+
+ for (auto it = errors.cbegin(); it != errors.cend(); ++it) {
+ if (it != errors.cbegin()) {
+ sb << ", ";
+ }
+
+ sb << it->first << ": " << it->second;
+ }
+
+ return {ErrorCodes::OperationFailed, sb.str()};
+ }
+
+ LOG(1) << "dropCollection " << nss.ns() << " shard data deleted";
+
+ // Remove chunk data
+ Status result =
+ catalogClient->removeConfigDocuments(opCtx,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns(nss.ns())),
+ ShardingCatalogClient::kMajorityWriteConcern);
+ if (!result.isOK()) {
+ return result;
+ }
+
+ LOG(1) << "dropCollection " << nss.ns() << " chunk data deleted";
+
+ // Mark the collection as dropped
+ CollectionType coll;
+ coll.setNs(nss);
+ coll.setDropped(true);
+ coll.setEpoch(ChunkVersion::DROPPED().epoch());
+ coll.setUpdatedAt(Grid::get(opCtx)->getNetwork()->now());
+
+ const bool upsert = false;
+ result = ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection(
+ opCtx, nss, coll, upsert);
+ if (!result.isOK()) {
+ return result;
+ }
+
+ LOG(1) << "dropCollection " << nss.ns() << " collection marked as dropped";
+
+ for (const auto& shardEntry : allShards) {
+ auto swShard = shardRegistry->getShard(opCtx, shardEntry.getName());
+ if (!swShard.isOK()) {
+ return swShard.getStatus();
+ }
+
+ const auto& shard = swShard.getValue();
+
+ SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
+ shardRegistry->getConfigServerConnectionString(),
+ shardEntry.getName(),
+ fassertStatusOK(28781, ConnectionString::parse(shardEntry.getHost())),
+ nss,
+ ChunkVersion::DROPPED(),
+ true);
+
+ auto ssvResult = shard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ "admin",
+ ssv.toBSON(),
+ Shard::RetryPolicy::kIdempotent);
+
+ if (!ssvResult.isOK()) {
+ return ssvResult.getStatus();
+ }
+
+ auto ssvStatus = std::move(ssvResult.getValue().commandStatus);
+ if (!ssvStatus.isOK()) {
+ return ssvStatus;
+ }
+
+ auto unsetShardingStatus = shard->runCommandWithFixedRetryAttempts(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ "admin",
+ BSON("unsetSharding" << 1),
+ Shard::RetryPolicy::kIdempotent);
+
+ if (!unsetShardingStatus.isOK()) {
+ return unsetShardingStatus.getStatus();
+ }
+
+ auto unsetShardingResult = std::move(unsetShardingStatus.getValue().commandStatus);
+ if (!unsetShardingResult.isOK()) {
+ return unsetShardingResult;
+ }
+ }
+
+ LOG(1) << "dropCollection " << nss.ns() << " completed";
+
+ catalogClient
+ ->logChange(opCtx,
+ "dropCollection",
+ nss.ns(),
+ BSONObj(),
+ ShardingCatalogClientImpl::kMajorityWriteConcern)
+ .ignore();
+
+ return Status::OK();
+}
+
+void ShardingCatalogManager::shardCollection(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const boost::optional<UUID> uuid,
+ const ShardKeyPattern& fieldsAndOrder,
+ const BSONObj& defaultCollation,
+ bool unique,
+ const vector<BSONObj>& initPoints,
+ const bool distributeInitialChunks,
+ const ShardId& dbPrimaryShardId) {
+ const auto catalogClient = Grid::get(opCtx)->catalogClient();
+ const auto shardRegistry = Grid::get(opCtx)->shardRegistry();
+
+ const auto primaryShard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId));
+
+ // Fail if there are partially written chunks from a previous failed shardCollection.
+ checkForExistingChunks(opCtx, nss);
+
+ // Record start in changelog
+ {
+ BSONObjBuilder collectionDetail;
+ collectionDetail.append("shardKey", fieldsAndOrder.toBSON());
+ collectionDetail.append("collection", nss.ns());
+ if (uuid) {
+ uuid->appendToBuilder(&collectionDetail, "uuid");
+ }
+ collectionDetail.append("primary", primaryShard->toString());
+ collectionDetail.append("numChunks", static_cast<int>(initPoints.size() + 1));
+ catalogClient
+ ->logChange(opCtx,
+ "shardCollection.start",
+ nss.ns(),
+ collectionDetail.obj(),
+ ShardingCatalogClient::kMajorityWriteConcern)
+ .transitional_ignore();
+ }
+
+ // const NamespaceString nss(ns);
+
+ // Construct the collection default collator.
+ std::unique_ptr<CollatorInterface> defaultCollator;
+ if (!defaultCollation.isEmpty()) {
+ defaultCollator = uassertStatusOK(CollatorFactoryInterface::get(opCtx->getServiceContext())
+ ->makeFromBSON(defaultCollation));
+ }
+
+ const auto& collVersion = _createFirstChunks(
+ opCtx, nss, fieldsAndOrder, dbPrimaryShardId, initPoints, distributeInitialChunks);
+
+ {
+ CollectionType coll;
+ coll.setNs(nss);
+ if (uuid) {
+ coll.setUUID(*uuid);
+ }
+ coll.setEpoch(collVersion.epoch());
+
+ // TODO(schwerin): The following isn't really a date, but is stored as one in-memory and in
+ // config.collections, as a historical oddity.
+ coll.setUpdatedAt(Date_t::fromMillisSinceEpoch(collVersion.toLong()));
+ coll.setKeyPattern(fieldsAndOrder.toBSON());
+ coll.setDefaultCollation(defaultCollator ? defaultCollator->getSpec().toBSON() : BSONObj());
+ coll.setUnique(unique);
+
+ uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection(
+ opCtx, nss, coll, true /*upsert*/));
+ }
+
+ auto shard = uassertStatusOK(shardRegistry->getShard(opCtx, dbPrimaryShardId));
+ invariant(!shard->isConfig());
+
+ // Tell the primary mongod to refresh its data
+ // TODO: Think the real fix here is for mongos to just
+ // assume that all collections are sharded, when we get there
+ SetShardVersionRequest ssv = SetShardVersionRequest::makeForVersioningNoPersist(
+ shardRegistry->getConfigServerConnectionString(),
+ dbPrimaryShardId,
+ primaryShard->getConnString(),
+ nss,
+ collVersion,
+ true);
+
+ auto ssvResponse =
+ shard->runCommandWithFixedRetryAttempts(opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ "admin",
+ ssv.toBSON(),
+ Shard::RetryPolicy::kIdempotent);
+ auto status = ssvResponse.isOK() ? std::move(ssvResponse.getValue().commandStatus)
+ : std::move(ssvResponse.getStatus());
+ if (!status.isOK()) {
+ warning() << "could not update initial version of " << nss.ns() << " on shard primary "
+ << dbPrimaryShardId << causedBy(redact(status));
+ }
+
+ catalogClient
+ ->logChange(opCtx,
+ "shardCollection.end",
+ nss.ns(),
+ BSON("version" << collVersion.toString()),
+ ShardingCatalogClient::kMajorityWriteConcern)
+ .transitional_ignore();
+}
+
+void ShardingCatalogManager::generateUUIDsForExistingShardedCollections(OperationContext* opCtx) {
+ // Retrieve all collections in config.collections that do not have a UUID. Some collections
+ // may already have a UUID if an earlier upgrade attempt failed after making some progress.
+ auto shardedColls =
+ uassertStatusOK(
+ Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ CollectionType::ConfigNS,
+ BSON(CollectionType::uuid.name() << BSON("$exists" << false) << "dropped" << false),
+ BSONObj(), // sort
+ boost::none // limit
+ ))
+ .docs;
+
+ if (shardedColls.empty()) {
+ LOG(0) << "all sharded collections already have UUIDs";
+
+ // We did a local read of the collections collection above and found that all sharded
+ // collections already have UUIDs. However, the data may not be majority committed (a
+ // previous setFCV attempt may have failed with a write concern error). Since the current
+ // Client doesn't know the opTime of the last write to the collections collection, make it
+ // wait for the last opTime in the system when we wait for writeConcern.
+ repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
+ return;
+ }
+
+ // Generate and persist a new UUID for each collection that did not have a UUID.
+ LOG(0) << "generating UUIDs for " << shardedColls.size()
+ << " sharded collections that do not yet have a UUID";
+ for (auto& coll : shardedColls) {
+ auto collType = uassertStatusOK(CollectionType::fromBSON(coll));
+ invariant(!collType.getUUID());
+
+ auto uuid = CollectionUUID::gen();
+ collType.setUUID(uuid);
+
+ uassertStatusOK(ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection(
+ opCtx, collType.getNs(), collType, false /* upsert */));
+ LOG(2) << "updated entry in config.collections for sharded collection " << collType.getNs()
+ << " with generated UUID " << uuid;
+ }
+}
+
+std::vector<NamespaceString> ShardingCatalogManager::getAllShardedCollectionsForDb(
+ OperationContext* opCtx, StringData dbName) {
+ const auto dbNameStr = dbName.toString();
+
+ const std::vector<CollectionType> collectionsOnConfig =
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->getCollections(
+ opCtx, &dbNameStr, nullptr, repl::ReadConcernLevel::kLocalReadConcern));
+
+ std::vector<NamespaceString> collectionsToReturn;
+ for (const auto& coll : collectionsOnConfig) {
+ if (coll.getDropped())
+ continue;
+
+ collectionsToReturn.push_back(coll.getNs());
+ }
+
+ return collectionsToReturn;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
new file mode 100644
index 00000000000..76340441d2c
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_commit_chunk_migration_test.cpp
@@ -0,0 +1,373 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/bson/bsonobj.h"
+#include "mongo/bson/bsonobjbuilder.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/config_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+using CommitChunkMigrate = ConfigServerTestFixture;
+
+const NamespaceString kNamespace("TestDB.TestColl");
+
+TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandWithCtl) {
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost("shard0:12");
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("shard1:12");
+
+ setupShards({shard0, shard1}).transitional_ignore();
+
+ int origMajorVersion = 12;
+ auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen());
+
+ ChunkType chunk0;
+ chunk0.setNS(kNamespace);
+ chunk0.setVersion(origVersion);
+ chunk0.setShard(shard0.getName());
+
+ // apportion
+ auto chunkMin = BSON("a" << 1);
+ chunk0.setMin(chunkMin);
+ auto chunkMax = BSON("a" << 10);
+ chunk0.setMax(chunkMax);
+
+ ChunkType chunk1;
+ chunk1.setNS(kNamespace);
+ chunk1.setVersion(origVersion);
+ chunk1.setShard(shard0.getName());
+
+ chunk1.setMin(chunkMax);
+ auto chunkMaxax = BSON("a" << 20);
+ chunk1.setMax(chunkMaxax);
+
+ setupChunks({chunk0, chunk1}).transitional_ignore();
+
+ // use crefs to verify it will take consts:
+ ChunkType const& chunk0cref = chunk0;
+ ChunkType const& chunk1cref = chunk1;
+
+ StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMigration(operationContext(),
+ chunk0.getNS(),
+ chunk0cref,
+ chunk1cref,
+ origVersion.epoch(),
+ ShardId(shard0.getName()),
+ ShardId(shard1.getName()));
+
+ ASSERT_OK(resultBSON.getStatus());
+
+ // Verify the versions returned match expected values.
+ BSONObj versions = resultBSON.getValue();
+ auto mver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "migratedChunkVersion");
+ ASSERT_OK(mver.getStatus());
+ ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 0, origVersion.epoch()), mver.getValue());
+
+ auto cver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "controlChunkVersion");
+ ASSERT_OK(cver.getStatus());
+ ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 1, origVersion.epoch()), cver.getValue());
+
+ // Verify the chunks ended up in the right shards, and versions match the values returned.
+ auto chunkDoc0 = uassertStatusOK(getChunkDoc(operationContext(), chunkMin));
+ ASSERT_EQ("shard1", chunkDoc0.getShard().toString());
+ ASSERT_EQ(mver.getValue(), chunkDoc0.getVersion());
+
+ auto chunkDoc1 = uassertStatusOK(getChunkDoc(operationContext(), chunkMax));
+ ASSERT_EQ("shard0", chunkDoc1.getShard().toString());
+ ASSERT_EQ(cver.getValue(), chunkDoc1.getVersion());
+}
+
+TEST_F(CommitChunkMigrate, CheckCorrectOpsCommandNoCtl) {
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost("shard0:12");
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("shard1:12");
+
+ setupShards({shard0, shard1}).transitional_ignore();
+
+ int origMajorVersion = 15;
+ auto const origVersion = ChunkVersion(origMajorVersion, 4, OID::gen());
+
+ ChunkType chunk0;
+ chunk0.setNS(kNamespace);
+ chunk0.setVersion(origVersion);
+ chunk0.setShard(shard0.getName());
+
+ // apportion
+ auto chunkMin = BSON("a" << 1);
+ chunk0.setMin(chunkMin);
+ auto chunkMax = BSON("a" << 10);
+ chunk0.setMax(chunkMax);
+
+ setupChunks({chunk0}).transitional_ignore();
+
+ StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMigration(operationContext(),
+ chunk0.getNS(),
+ chunk0,
+ boost::none,
+ origVersion.epoch(),
+ ShardId(shard0.getName()),
+ ShardId(shard1.getName()));
+
+ ASSERT_OK(resultBSON.getStatus());
+
+ // Verify the version returned matches expected value.
+ BSONObj versions = resultBSON.getValue();
+ auto mver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "migratedChunkVersion");
+ ASSERT_OK(mver.getStatus());
+ ASSERT_EQ(ChunkVersion(origMajorVersion + 1, 0, origVersion.epoch()), mver.getValue());
+
+ auto cver = ChunkVersion::parseFromBSONWithFieldForCommands(versions, "controlChunkVersion");
+ ASSERT_NOT_OK(cver.getStatus());
+
+ // Verify the chunk ended up in the right shard, and version matches the value returned.
+ auto chunkDoc0 = uassertStatusOK(getChunkDoc(operationContext(), chunkMin));
+ ASSERT_EQ("shard1", chunkDoc0.getShard().toString());
+ ASSERT_EQ(mver.getValue(), chunkDoc0.getVersion());
+}
+
+TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch0) {
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost("shard0:12");
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("shard1:12");
+
+ setupShards({shard0, shard1}).transitional_ignore();
+
+ int origMajorVersion = 12;
+ auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen());
+
+ ChunkType chunk0;
+ chunk0.setNS(kNamespace);
+ chunk0.setVersion(origVersion);
+ chunk0.setShard(shard0.getName());
+
+ // apportion
+ auto chunkMin = BSON("a" << 1);
+ chunk0.setMin(chunkMin);
+ auto chunkMax = BSON("a" << 10);
+ chunk0.setMax(chunkMax);
+
+ ChunkType chunk1;
+ chunk1.setNS(kNamespace);
+ chunk1.setVersion(origVersion);
+ chunk1.setShard(shard0.getName());
+
+ chunk1.setMin(chunkMax);
+ auto chunkMaxax = BSON("a" << 20);
+ chunk1.setMax(chunkMaxax);
+
+ setupChunks({chunk0, chunk1}).transitional_ignore();
+
+ StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMigration(operationContext(),
+ chunk0.getNS(),
+ chunk0,
+ chunk1,
+ OID::gen(),
+ ShardId(shard0.getName()),
+ ShardId(shard1.getName()));
+
+ ASSERT_EQ(ErrorCodes::StaleEpoch, resultBSON.getStatus());
+}
+
+TEST_F(CommitChunkMigrate, RejectWrongCollectionEpoch1) {
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost("shard0:12");
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("shard1:12");
+
+ setupShards({shard0, shard1}).transitional_ignore();
+
+ int origMajorVersion = 12;
+ auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen());
+ auto const otherVersion = ChunkVersion(origMajorVersion, 7, OID::gen());
+
+ ChunkType chunk0;
+ chunk0.setNS(kNamespace);
+ chunk0.setVersion(origVersion);
+ chunk0.setShard(shard0.getName());
+
+ // apportion
+ auto chunkMin = BSON("a" << 1);
+ chunk0.setMin(chunkMin);
+ auto chunkMax = BSON("a" << 10);
+ chunk0.setMax(chunkMax);
+
+ ChunkType chunk1;
+ chunk1.setNS(kNamespace);
+ chunk1.setVersion(otherVersion);
+ chunk1.setShard(shard0.getName());
+
+ chunk1.setMin(chunkMax);
+ auto chunkMaxax = BSON("a" << 20);
+ chunk1.setMax(chunkMaxax);
+
+ // get version from the control chunk this time
+ setupChunks({chunk1, chunk0}).transitional_ignore();
+
+ StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMigration(operationContext(),
+ chunk0.getNS(),
+ chunk0,
+ chunk1,
+ origVersion.epoch(),
+ ShardId(shard0.getName()),
+ ShardId(shard1.getName()));
+
+ ASSERT_EQ(ErrorCodes::StaleEpoch, resultBSON.getStatus());
+}
+
+TEST_F(CommitChunkMigrate, RejectChunkMissing0) {
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost("shard0:12");
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("shard1:12");
+
+ setupShards({shard0, shard1}).transitional_ignore();
+
+ int origMajorVersion = 12;
+ auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen());
+
+ ChunkType chunk0;
+ chunk0.setNS(kNamespace);
+ chunk0.setVersion(origVersion);
+ chunk0.setShard(shard0.getName());
+
+ // apportion
+ auto chunkMin = BSON("a" << 1);
+ chunk0.setMin(chunkMin);
+ auto chunkMax = BSON("a" << 10);
+ chunk0.setMax(chunkMax);
+
+ ChunkType chunk1;
+ chunk1.setNS(kNamespace);
+ chunk1.setVersion(origVersion);
+ chunk1.setShard(shard0.getName());
+
+ chunk1.setMin(chunkMax);
+ auto chunkMaxax = BSON("a" << 20);
+ chunk1.setMax(chunkMaxax);
+
+ setupChunks({chunk1}).transitional_ignore();
+
+ StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMigration(operationContext(),
+ chunk0.getNS(),
+ chunk0,
+ chunk1,
+ origVersion.epoch(),
+ ShardId(shard0.getName()),
+ ShardId(shard1.getName()));
+
+ ASSERT_EQ(40165, resultBSON.getStatus().code());
+}
+
+TEST_F(CommitChunkMigrate, RejectChunkMissing1) {
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost("shard0:12");
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("shard1:12");
+
+ setupShards({shard0, shard1}).transitional_ignore();
+
+ int origMajorVersion = 12;
+ auto const origVersion = ChunkVersion(origMajorVersion, 7, OID::gen());
+
+ ChunkType chunk0;
+ chunk0.setNS(kNamespace);
+ chunk0.setVersion(origVersion);
+ chunk0.setShard(shard0.getName());
+
+ // apportion
+ auto chunkMin = BSON("a" << 1);
+ chunk0.setMin(chunkMin);
+ auto chunkMax = BSON("a" << 10);
+ chunk0.setMax(chunkMax);
+
+ ChunkType chunk1;
+ chunk1.setNS(kNamespace);
+ chunk1.setVersion(origVersion);
+ chunk1.setShard(shard0.getName());
+
+ chunk1.setMin(chunkMax);
+ auto chunkMaxax = BSON("a" << 20);
+ chunk1.setMax(chunkMaxax);
+
+ setupChunks({chunk0}).transitional_ignore();
+
+ StatusWith<BSONObj> resultBSON = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMigration(operationContext(),
+ chunk0.getNS(),
+ chunk0,
+ chunk1,
+ origVersion.epoch(),
+ ShardId(shard0.getName()),
+ ShardId(shard1.getName()));
+
+ ASSERT_EQ(40165, resultBSON.getStatus().code());
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp
new file mode 100644
index 00000000000..151d1a888a7
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_config_initialization_test.cpp
@@ -0,0 +1,386 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include <string>
+#include <vector>
+
+#include "mongo/bson/json.h"
+#include "mongo/db/catalog/catalog_raii.h"
+#include "mongo/db/concurrency/write_conflict_exception.h"
+#include "mongo/db/curop.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/replication_coordinator_mock.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/config_server_version.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_config_version.h"
+#include "mongo/s/catalog/type_lockpings.h"
+#include "mongo/s/catalog/type_locks.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/util/scopeguard.h"
+
+namespace mongo {
+namespace {
+
+using std::string;
+using std::vector;
+using unittest::assertGet;
+
+/**
+ * Takes two arrays of BSON objects and asserts that they contain the same documents
+ */
+void assertBSONObjsSame(const std::vector<BSONObj>& expectedBSON,
+ const std::vector<BSONObj>& foundBSON) {
+ ASSERT_EQUALS(expectedBSON.size(), foundBSON.size());
+
+ for (const auto& expectedObj : expectedBSON) {
+ bool wasFound = false;
+ for (const auto& foundObj : foundBSON) {
+ if (expectedObj.woCompare(foundObj) == 0) {
+ wasFound = true;
+ break;
+ }
+ }
+ ASSERT_TRUE(wasFound);
+ }
+}
+
+using ConfigInitializationTest = ConfigServerTestFixture;
+
+TEST_F(ConfigInitializationTest, UpgradeNotNeeded) {
+ VersionType version;
+ version.setClusterId(OID::gen());
+ version.setCurrentVersion(CURRENT_CONFIG_VERSION);
+ version.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION);
+ ASSERT_OK(
+ insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON()));
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto versionDoc =
+ assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc));
+
+ ASSERT_EQUALS(version.getClusterId(), foundVersion.getClusterId());
+ ASSERT_EQUALS(version.getCurrentVersion(), foundVersion.getCurrentVersion());
+ ASSERT_EQUALS(version.getMinCompatibleVersion(), foundVersion.getMinCompatibleVersion());
+}
+
+TEST_F(ConfigInitializationTest, InitIncompatibleVersion) {
+ VersionType version;
+ version.setClusterId(OID::gen());
+ version.setCurrentVersion(MIN_COMPATIBLE_CONFIG_VERSION - 1);
+ version.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION - 2);
+ ASSERT_OK(
+ insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON()));
+
+ ASSERT_EQ(ErrorCodes::IncompatibleShardingConfigVersion,
+ ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto versionDoc =
+ assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc));
+
+ ASSERT_EQUALS(version.getClusterId(), foundVersion.getClusterId());
+ ASSERT_EQUALS(version.getCurrentVersion(), foundVersion.getCurrentVersion());
+ ASSERT_EQUALS(version.getMinCompatibleVersion(), foundVersion.getMinCompatibleVersion());
+}
+
+TEST_F(ConfigInitializationTest, InitClusterMultipleVersionDocs) {
+ VersionType version;
+ version.setClusterId(OID::gen());
+ version.setCurrentVersion(MIN_COMPATIBLE_CONFIG_VERSION - 2);
+ version.setMinCompatibleVersion(MIN_COMPATIBLE_CONFIG_VERSION - 3);
+ ASSERT_OK(
+ insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON()));
+
+ ASSERT_OK(insertToConfigCollection(operationContext(),
+ VersionType::ConfigNS,
+ BSON("_id"
+ << "a second document")));
+
+ ASSERT_EQ(ErrorCodes::TooManyMatchingDocuments,
+ ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+}
+
+TEST_F(ConfigInitializationTest, InitInvalidConfigVersionDoc) {
+ BSONObj versionDoc(fromjson(R"({
+ _id: 1,
+ minCompatibleVersion: "should be numeric",
+ currentVersion: 7,
+ clusterId: ObjectId("55919cc6dbe86ce7ac056427")
+ })"));
+ ASSERT_OK(insertToConfigCollection(operationContext(), VersionType::ConfigNS, versionDoc));
+
+ ASSERT_EQ(ErrorCodes::TypeMismatch,
+ ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+}
+
+
+TEST_F(ConfigInitializationTest, InitNoVersionDocEmptyConfig) {
+ // Make sure there is no existing document
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument,
+ findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto versionDoc =
+ assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc));
+
+ ASSERT_TRUE(foundVersion.getClusterId().isSet());
+ ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion());
+ ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion());
+}
+
+TEST_F(ConfigInitializationTest, InitVersionTooHigh) {
+ VersionType version;
+ version.setClusterId(OID::gen());
+ version.setCurrentVersion(10000);
+ version.setMinCompatibleVersion(10000);
+ ASSERT_OK(
+ insertToConfigCollection(operationContext(), VersionType::ConfigNS, version.toBSON()));
+
+ ASSERT_EQ(ErrorCodes::IncompatibleShardingConfigVersion,
+ ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+}
+
+TEST_F(ConfigInitializationTest, OnlyRunsOnce) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto versionDoc =
+ assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc));
+
+ ASSERT_TRUE(foundVersion.getClusterId().isSet());
+ ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion());
+ ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion());
+
+ ASSERT_EQUALS(ErrorCodes::AlreadyInitialized,
+ ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+}
+
+TEST_F(ConfigInitializationTest, ReRunsIfDocRolledBackThenReElected) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto versionDoc =
+ assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc));
+
+ ASSERT_TRUE(foundVersion.getClusterId().isSet());
+ ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion());
+ ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion());
+
+ // Now remove the version document and re-run initializeConfigDatabaseIfNeeded().
+ {
+ // Mirror what happens if the config.version document is rolled back.
+ ON_BLOCK_EXIT([&] {
+ replicationCoordinator()->setFollowerMode(repl::MemberState::RS_PRIMARY).ignore();
+ });
+ ASSERT_OK(replicationCoordinator()->setFollowerMode(repl::MemberState::RS_ROLLBACK));
+ auto opCtx = operationContext();
+ repl::UnreplicatedWritesBlock uwb(opCtx);
+ auto nss = VersionType::ConfigNS;
+ writeConflictRetry(opCtx, "removeConfigDocuments", nss.ns(), [&] {
+ AutoGetCollection autoColl(opCtx, nss, MODE_IX);
+ auto coll = autoColl.getCollection();
+ ASSERT_TRUE(coll);
+ auto cursor = coll->getCursor(opCtx);
+ std::vector<RecordId> recordIds;
+ while (auto recordId = cursor->next()) {
+ recordIds.push_back(recordId->id);
+ }
+ mongo::WriteUnitOfWork wuow(opCtx);
+ for (auto recordId : recordIds) {
+ coll->deleteDocument(opCtx, kUninitializedStmtId, recordId, nullptr);
+ }
+ wuow.commit();
+ ASSERT_EQUALS(0UL, coll->numRecords(opCtx));
+ });
+ }
+
+ // Verify the document was actually removed.
+ ASSERT_EQUALS(ErrorCodes::NoMatchingDocument,
+ findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ // Re-create the config.version document.
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto newVersionDoc =
+ assertGet(findOneOnConfigCollection(operationContext(), VersionType::ConfigNS, BSONObj()));
+
+ VersionType newFoundVersion = assertGet(VersionType::fromBSON(newVersionDoc));
+
+ ASSERT_TRUE(newFoundVersion.getClusterId().isSet());
+ ASSERT_NOT_EQUALS(newFoundVersion.getClusterId(), foundVersion.getClusterId());
+ ASSERT_EQUALS(CURRENT_CONFIG_VERSION, newFoundVersion.getCurrentVersion());
+ ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, newFoundVersion.getMinCompatibleVersion());
+}
+
+TEST_F(ConfigInitializationTest, BuildsNecessaryIndexes) {
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto expectedChunksIndexes = std::vector<BSONObj>{
+ BSON("v" << 2 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns"
+ << "config.chunks"),
+ BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "min" << 1) << "name"
+ << "ns_1_min_1"
+ << "ns"
+ << "config.chunks"),
+ BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "shard" << 1 << "min" << 1)
+ << "name"
+ << "ns_1_shard_1_min_1"
+ << "ns"
+ << "config.chunks"),
+ BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "lastmod" << 1) << "name"
+ << "ns_1_lastmod_1"
+ << "ns"
+ << "config.chunks")};
+ auto expectedLockpingsIndexes =
+ std::vector<BSONObj>{BSON("v" << 2 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns"
+ << "config.lockpings"),
+ BSON("v" << 2 << "key" << BSON("ping" << 1) << "name"
+ << "ping_1"
+ << "ns"
+ << "config.lockpings")};
+ auto expectedLocksIndexes = std::vector<BSONObj>{
+ BSON("v" << 2 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns"
+ << "config.locks"),
+ BSON("v" << 2 << "key" << BSON("ts" << 1) << "name"
+ << "ts_1"
+ << "ns"
+ << "config.locks"),
+ BSON("v" << 2 << "key" << BSON("state" << 1 << "process" << 1) << "name"
+ << "state_1_process_1"
+ << "ns"
+ << "config.locks")};
+ auto expectedShardsIndexes = std::vector<BSONObj>{
+ BSON("v" << 2 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns"
+ << "config.shards"),
+ BSON("v" << 2 << "unique" << true << "key" << BSON("host" << 1) << "name"
+ << "host_1"
+ << "ns"
+ << "config.shards")};
+ auto expectedTagsIndexes = std::vector<BSONObj>{
+ BSON("v" << 2 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns"
+ << "config.tags"),
+ BSON("v" << 2 << "unique" << true << "key" << BSON("ns" << 1 << "min" << 1) << "name"
+ << "ns_1_min_1"
+ << "ns"
+ << "config.tags"),
+ BSON("v" << 2 << "key" << BSON("ns" << 1 << "tag" << 1) << "name"
+ << "ns_1_tag_1"
+ << "ns"
+ << "config.tags")};
+
+ auto foundChunksIndexes = assertGet(getIndexes(operationContext(), ChunkType::ConfigNS));
+ assertBSONObjsSame(expectedChunksIndexes, foundChunksIndexes);
+
+ auto foundLockpingsIndexes = assertGet(getIndexes(operationContext(), LockpingsType::ConfigNS));
+ assertBSONObjsSame(expectedLockpingsIndexes, foundLockpingsIndexes);
+
+ auto foundLocksIndexes = assertGet(getIndexes(operationContext(), LocksType::ConfigNS));
+ assertBSONObjsSame(expectedLocksIndexes, foundLocksIndexes);
+
+ auto foundShardsIndexes = assertGet(getIndexes(operationContext(), ShardType::ConfigNS));
+ assertBSONObjsSame(expectedShardsIndexes, foundShardsIndexes);
+
+ auto foundTagsIndexes = assertGet(getIndexes(operationContext(), TagsType::ConfigNS));
+ assertBSONObjsSame(expectedTagsIndexes, foundTagsIndexes);
+}
+
+TEST_F(ConfigInitializationTest, CompatibleIndexAlreadyExists) {
+ getConfigShard()
+ ->createIndexOnConfig(operationContext(), ShardType::ConfigNS, BSON("host" << 1), true)
+ .transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto expectedShardsIndexes = std::vector<BSONObj>{
+ BSON("v" << 2 << "key" << BSON("_id" << 1) << "name"
+ << "_id_"
+ << "ns"
+ << "config.shards"),
+ BSON("v" << 2 << "unique" << true << "key" << BSON("host" << 1) << "name"
+ << "host_1"
+ << "ns"
+ << "config.shards")};
+
+
+ auto foundShardsIndexes = assertGet(getIndexes(operationContext(), ShardType::ConfigNS));
+ assertBSONObjsSame(expectedShardsIndexes, foundShardsIndexes);
+}
+
+TEST_F(ConfigInitializationTest, IncompatibleIndexAlreadyExists) {
+ // Make the index non-unique even though its supposed to be unique, make sure initialization
+ // fails
+ getConfigShard()
+ ->createIndexOnConfig(operationContext(), ShardType::ConfigNS, BSON("host" << 1), false)
+ .transitional_ignore();
+
+ ASSERT_EQUALS(ErrorCodes::IndexOptionsConflict,
+ ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+}
+
+} // unnamed namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp
new file mode 100644
index 00000000000..9e4bdb14ffe
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_create_database_test.cpp
@@ -0,0 +1,195 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include <pcrecpp.h>
+
+#include "mongo/bson/json.h"
+#include "mongo/client/remote_command_targeter_mock.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/query/query_request.h"
+#include "mongo/db/repl/read_concern_args.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/rpc/metadata/repl_set_metadata.h"
+#include "mongo/rpc/metadata/tracking_metadata.h"
+#include "mongo/s/catalog/dist_lock_catalog_impl.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_locks.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/chunk_version.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/stdx/future.h"
+#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
+#include "mongo/util/time_support.h"
+
+namespace mongo {
+namespace {
+
+using executor::RemoteCommandRequest;
+using std::vector;
+
+using CreateDatabaseTest = ConfigServerTestFixture;
+
+TEST_F(CreateDatabaseTest, createDatabaseSuccess) {
+ const std::string dbname = "db1";
+
+ ShardType s0;
+ s0.setName("shard0000");
+ s0.setHost("ShardHost0:27017");
+ ASSERT_OK(setupShards(vector<ShardType>{s0}));
+
+ ShardType s1;
+ s1.setName("shard0001");
+ s1.setHost("ShardHost1:27017");
+ ASSERT_OK(setupShards(vector<ShardType>{s1}));
+
+ ShardType s2;
+ s2.setName("shard0002");
+ s2.setHost("ShardHost2:27017");
+ ASSERT_OK(setupShards(vector<ShardType>{s2}));
+
+ // Prime the shard registry with information about the existing shards
+ shardRegistry()->reload(operationContext());
+
+ // Set up all the target mocks return values.
+ RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), s0.getName()))->getTargeter())
+ ->setFindHostReturnValue(HostAndPort(s0.getHost()));
+ RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), s1.getName()))->getTargeter())
+ ->setFindHostReturnValue(HostAndPort(s1.getHost()));
+ RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), s2.getName()))->getTargeter())
+ ->setFindHostReturnValue(HostAndPort(s2.getHost()));
+
+ // Now actually start the createDatabase work.
+
+ auto future = launchAsync([this, dbname] {
+ ON_BLOCK_EXIT([&] { Client::destroy(); });
+ Client::initThreadIfNotAlready("Test");
+ auto opCtx = cc().makeOperationContext();
+ ShardingCatalogManager::get(opCtx.get())->createDatabase(opCtx.get(), dbname);
+ });
+
+ // Return size information about first shard
+ onCommand([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(s0.getHost(), request.target.toString());
+ ASSERT_EQUALS("admin", request.dbname);
+ std::string cmdName = request.cmdObj.firstElement().fieldName();
+ ASSERT_EQUALS("listDatabases", cmdName);
+ ASSERT_FALSE(request.cmdObj.hasField(repl::ReadConcernArgs::kReadConcernFieldName));
+
+ ASSERT_BSONOBJ_EQ(
+ ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 1 << "totalSize" << 10);
+ });
+
+ // Return size information about second shard
+ onCommand([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(s1.getHost(), request.target.toString());
+ ASSERT_EQUALS("admin", request.dbname);
+ std::string cmdName = request.cmdObj.firstElement().fieldName();
+ ASSERT_EQUALS("listDatabases", cmdName);
+ ASSERT_FALSE(request.cmdObj.hasField(repl::ReadConcernArgs::kReadConcernFieldName));
+
+ ASSERT_BSONOBJ_EQ(
+ ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 1 << "totalSize" << 1);
+ });
+
+ // Return size information about third shard
+ onCommand([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(s2.getHost(), request.target.toString());
+ ASSERT_EQUALS("admin", request.dbname);
+ std::string cmdName = request.cmdObj.firstElement().fieldName();
+ ASSERT_EQUALS("listDatabases", cmdName);
+
+ ASSERT_BSONOBJ_EQ(
+ ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 1 << "totalSize" << 100);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(CreateDatabaseTest, createDatabaseDBExists) {
+ const std::string dbname = "db3";
+
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ setupDatabase(dbname, shard.getName(), false);
+
+ ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname);
+}
+
+TEST_F(CreateDatabaseTest, createDatabaseDBExistsDifferentCase) {
+ const std::string dbname = "db4";
+ const std::string dbnameDiffCase = "Db4";
+
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ setupDatabase(dbnameDiffCase, shard.getName(), false);
+
+ ASSERT_THROWS_CODE(
+ ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname),
+ AssertionException,
+ ErrorCodes::DatabaseDifferCase);
+}
+
+TEST_F(CreateDatabaseTest, createDatabaseNoShards) {
+ const std::string dbname = "db5";
+ ASSERT_THROWS_CODE(
+ ShardingCatalogManager::get(operationContext())->createDatabase(operationContext(), dbname),
+ AssertionException,
+ ErrorCodes::ShardNotFound);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
new file mode 100644
index 00000000000..d84ea03282f
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
@@ -0,0 +1,166 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+
+#include <pcrecpp.h>
+
+#include "mongo/bson/util/bson_extract.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/s/catalog/sharding_catalog_client_impl.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/grid.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+
+using std::string;
+using std::vector;
+
+namespace {
+
+const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{});
+
+} // namespace
+
+DatabaseType ShardingCatalogManager::createDatabase(OperationContext* opCtx,
+ const std::string& dbName) {
+ invariant(nsIsDbOnly(dbName));
+
+ // The admin and config databases should never be explicitly created. They "just exist",
+ // i.e. getDatabase will always return an entry for them.
+ if (dbName == "admin" || dbName == "config") {
+ uasserted(ErrorCodes::InvalidOptions,
+ str::stream() << "cannot manually create database '" << dbName << "'");
+ }
+
+ // Check if a database already exists with the same name (case sensitive), and if so, return the
+ // existing entry.
+
+ BSONObjBuilder queryBuilder;
+ queryBuilder.appendRegex(
+ DatabaseType::name(), (string) "^" + pcrecpp::RE::QuoteMeta(dbName) + "$", "i");
+
+ auto docs = uassertStatusOK(Grid::get(opCtx)->catalogClient()->_exhaustiveFindOnConfig(
+ opCtx,
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ DatabaseType::ConfigNS,
+ queryBuilder.obj(),
+ BSONObj(),
+ 1))
+ .value;
+
+ if (!docs.empty()) {
+ BSONObj dbObj = docs.front();
+ std::string actualDbName = dbObj[DatabaseType::name()].String();
+
+ uassert(ErrorCodes::DatabaseDifferCase,
+ str::stream() << "can't have 2 databases that just differ on case "
+ << " have: "
+ << actualDbName
+ << " want to add: "
+ << dbName,
+ actualDbName == dbName);
+
+ // We did a local read of the database entry above and found that the database already
+ // exists. However, the data may not be majority committed (a previous createDatabase
+ // attempt may have failed with a writeConcern error).
+ // Since the current Client doesn't know the opTime of the last write to the database entry,
+ // make it wait for the last opTime in the system when we wait for writeConcern.
+ repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
+ return uassertStatusOK(DatabaseType::fromBSON(dbObj));
+ }
+
+ // The database does not exist. Pick a primary shard to place it on.
+ auto const primaryShardId =
+ uassertStatusOK(_selectShardForNewDatabase(opCtx, Grid::get(opCtx)->shardRegistry()));
+ log() << "Placing [" << dbName << "] on: " << primaryShardId;
+
+ // Insert an entry for the new database into the sharding catalog.
+ DatabaseType db(dbName, primaryShardId, false);
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->insertConfigDocument(
+ opCtx, DatabaseType::ConfigNS, db.toBSON(), ShardingCatalogClient::kMajorityWriteConcern));
+
+ return db;
+}
+
+void ShardingCatalogManager::enableSharding(OperationContext* opCtx, const std::string& dbName) {
+ invariant(nsIsDbOnly(dbName));
+
+ uassert(ErrorCodes::IllegalOperation,
+ str::stream() << "Enabling sharding on the admin database is not allowed",
+ dbName != NamespaceString::kAdminDb);
+
+ // Sharding is enabled automatically on the config db.
+ if (dbName == NamespaceString::kConfigDb) {
+ return;
+ }
+
+ // Creates the database if it doesn't exist and returns the new database entry, else returns the
+ // existing database entry.
+ auto dbType = createDatabase(opCtx, dbName);
+ dbType.setSharded(true);
+
+ log() << "Enabling sharding for database [" << dbName << "] in config db";
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbType));
+}
+
+StatusWith<std::vector<std::string>> ShardingCatalogManager::getDatabasesForShard(
+ OperationContext* opCtx, const ShardId& shardId) {
+ auto findStatus = Grid::get(opCtx)->catalogClient()->_exhaustiveFindOnConfig(
+ opCtx,
+ kConfigReadSelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ DatabaseType::ConfigNS,
+ BSON(DatabaseType::primary(shardId.toString())),
+ BSONObj(),
+ boost::none); // no limit
+
+ if (!findStatus.isOK())
+ return findStatus.getStatus();
+
+ std::vector<std::string> dbs;
+ for (const BSONObj& obj : findStatus.getValue().value) {
+ std::string dbName;
+ Status status = bsonExtractStringField(obj, DatabaseType::name(), &dbName);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ dbs.push_back(dbName);
+ }
+
+ return dbs;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp
new file mode 100644
index 00000000000..1e767b6c0f1
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_drop_coll_test.cpp
@@ -0,0 +1,477 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/client/remote_command_targeter_mock.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/rpc/metadata/tracking_metadata.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/chunk_version.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/util/scopeguard.h"
+
+namespace mongo {
+namespace {
+
+using executor::RemoteCommandRequest;
+using executor::RemoteCommandResponse;
+using std::string;
+using std::vector;
+using unittest::assertGet;
+
+class DropColl2ShardTest : public ConfigServerTestFixture {
+public:
+ void setUp() override {
+ ConfigServerTestFixture::setUp();
+
+ _shard1.setName("shard0001");
+ _shard1.setHost("s:1");
+
+ _shard2.setName("shard0002");
+ _shard2.setHost("s:2");
+
+ ASSERT_OK(setupShards({_shard1, _shard2}));
+
+ auto shard1Targeter = RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), _shard1.getName()))
+ ->getTargeter());
+ shard1Targeter->setFindHostReturnValue(HostAndPort(_shard1.getHost()));
+
+ auto shard2Targeter = RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), _shard2.getName()))
+ ->getTargeter());
+ shard2Targeter->setFindHostReturnValue(HostAndPort(_shard2.getHost()));
+ }
+
+ void expectDrop(const ShardType& shard) {
+ onCommand([this, shard](const RemoteCommandRequest& request) {
+ ASSERT_EQ(HostAndPort(shard.getHost()), request.target);
+ ASSERT_EQ(_dropNS.db(), request.dbname);
+ ASSERT_BSONOBJ_EQ(BSON("drop" << _dropNS.coll() << "writeConcern"
+ << BSON("w" << 0 << "wtimeout" << 0)),
+ request.cmdObj);
+
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ns" << _dropNS.ns() << "ok" << 1);
+ });
+ }
+
+ void expectSetShardVersionZero(const ShardType& shard) {
+ expectSetShardVersion(
+ HostAndPort(shard.getHost()), shard, dropNS(), ChunkVersion::DROPPED());
+ }
+
+ void expectUnsetSharding(const ShardType& shard) {
+ onCommand([shard](const RemoteCommandRequest& request) {
+ ASSERT_EQ(HostAndPort(shard.getHost()), request.target);
+ ASSERT_EQ("admin", request.dbname);
+ ASSERT_BSONOBJ_EQ(BSON("unsetSharding" << 1), request.cmdObj);
+
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("n" << 1 << "ok" << 1);
+ });
+ }
+
+ void shutdownExecutor() {
+ ConfigServerTestFixture::executor()->shutdown();
+ }
+
+ Status doDrop() {
+ ON_BLOCK_EXIT([&] { Client::destroy(); });
+ Client::initThreadIfNotAlready("Test");
+ auto opCtx = cc().makeOperationContext();
+ return ShardingCatalogManager::get(opCtx.get())->dropCollection(opCtx.get(), dropNS());
+ }
+
+ const NamespaceString& dropNS() const {
+ return _dropNS;
+ }
+
+ const ShardType& shard1() const {
+ return _shard1;
+ }
+
+ const ShardType& shard2() const {
+ return _shard2;
+ }
+
+private:
+ const NamespaceString _dropNS{"test.user"};
+ ShardType _shard1;
+ ShardType _shard2;
+};
+
+TEST_F(DropColl2ShardTest, Basic) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_OK(status);
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+ expectUnsetSharding(shard1());
+
+ expectSetShardVersionZero(shard2());
+ expectUnsetSharding(shard2());
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, NSNotFound) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_OK(status);
+ });
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ ASSERT_EQ(HostAndPort(shard1().getHost()), request.target);
+ ASSERT_EQ(dropNS().db(), request.dbname);
+ ASSERT_BSONOBJ_EQ(
+ BSON("drop" << dropNS().coll() << "writeConcern" << BSON("w" << 0 << "wtimeout" << 0)),
+ request.cmdObj);
+
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 0 << "code" << ErrorCodes::NamespaceNotFound);
+ });
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ ASSERT_EQ(HostAndPort(shard2().getHost()), request.target);
+ ASSERT_EQ(dropNS().db(), request.dbname);
+ ASSERT_BSONOBJ_EQ(
+ BSON("drop" << dropNS().coll() << "writeConcern" << BSON("w" << 0 << "wtimeout" << 0)),
+ request.cmdObj);
+
+ ASSERT_BSONOBJ_EQ(rpc::makeEmptyMetadata(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 0 << "code" << ErrorCodes::NamespaceNotFound);
+ });
+
+ expectSetShardVersionZero(shard1());
+ expectUnsetSharding(shard1());
+
+ expectSetShardVersionZero(shard2());
+ expectUnsetSharding(shard2());
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, FirstShardTargeterError) {
+ auto shard1Targeter = RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), shard1().getName()))
+ ->getTargeter());
+ shard1Targeter->setFindHostReturnValue({ErrorCodes::HostUnreachable, "bad test network"});
+
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::HostUnreachable, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, FirstShardDropError) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ shutdownExecutor(); // shutdown executor so drop command will fail.
+ return BSON("ok" << 1);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, FirstShardDropCmdError) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::OperationFailed, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ // drop command will be sent to all shards even if we get a not ok response from one shard.
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized);
+ });
+
+ expectDrop(shard2());
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SecondShardTargeterError) {
+ auto shard2Targeter = RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), shard2().getName()))
+ ->getTargeter());
+ shard2Targeter->setFindHostReturnValue({ErrorCodes::HostUnreachable, "bad test network"});
+
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::HostUnreachable, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SecondShardDropError) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ shutdownExecutor(); // shutdown executor so drop command will fail.
+ return BSON("ok" << 1);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SecondShardDropCmdError) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::OperationFailed, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, CleanupChunkError) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::Unauthorized, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg"
+ << "bad delete");
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SSVCmdErrorOnShard1) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::Unauthorized, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg"
+ << "bad");
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SSVErrorOnShard1) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ shutdownExecutor(); // shutdown executor so ssv command will fail.
+ return BSON("ok" << 1);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, UnsetCmdErrorOnShard1) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::Unauthorized, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg"
+ << "bad");
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, UnsetErrorOnShard1) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ shutdownExecutor(); // shutdown executor so unsetSharding command will fail.
+ return BSON("ok" << 1);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SSVCmdErrorOnShard2) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::Unauthorized, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+ expectUnsetSharding(shard1());
+
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg"
+ << "bad");
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, SSVErrorOnShard2) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+ expectUnsetSharding(shard1());
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ shutdownExecutor(); // shutdown executor so ssv command will fail.
+ return BSON("ok" << 1);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, UnsetCmdErrorOnShard2) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::Unauthorized, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+ expectUnsetSharding(shard1());
+
+ expectSetShardVersionZero(shard2());
+
+ onCommand([](const RemoteCommandRequest& request) {
+ return BSON("ok" << 0 << "code" << ErrorCodes::Unauthorized << "errmsg"
+ << "bad");
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(DropColl2ShardTest, UnsetErrorOnShard2) {
+ auto future = launchAsync([this] {
+ auto status = doDrop();
+ ASSERT_EQ(ErrorCodes::CallbackCanceled, status.code());
+ ASSERT_FALSE(status.reason().empty());
+ });
+
+ expectDrop(shard1());
+ expectDrop(shard2());
+
+ expectSetShardVersionZero(shard1());
+ expectUnsetSharding(shard1());
+
+ expectSetShardVersionZero(shard2());
+
+ onCommand([this](const RemoteCommandRequest& request) {
+ shutdownExecutor(); // shutdown executor so unset command will fail.
+ return BSON("ok" << 1);
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+} // unnamed namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp
new file mode 100644
index 00000000000..b97b8efbca0
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_enable_sharding_test.cpp
@@ -0,0 +1,168 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include <pcrecpp.h>
+
+#include "mongo/bson/json.h"
+#include "mongo/client/remote_command_targeter_mock.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/query/query_request.h"
+#include "mongo/db/repl/read_concern_args.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/rpc/metadata/repl_set_metadata.h"
+#include "mongo/rpc/metadata/tracking_metadata.h"
+#include "mongo/s/catalog/dist_lock_catalog_impl.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_locks.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/chunk_version.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/s/write_ops/batched_command_response.h"
+#include "mongo/stdx/future.h"
+#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
+#include "mongo/util/time_support.h"
+
+namespace mongo {
+namespace {
+
+using executor::RemoteCommandRequest;
+using std::vector;
+
+class EnableShardingTest : public ConfigServerTestFixture {};
+
+TEST_F(EnableShardingTest, noDBExists) {
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ auto shardTargeter = RemoteCommandTargeterMock::get(
+ uassertStatusOK(shardRegistry()->getShard(operationContext(), ShardId("shard0")))
+ ->getTargeter());
+ shardTargeter->setFindHostReturnValue(HostAndPort("shard0:12"));
+
+ auto future = launchAsync([&] {
+ ON_BLOCK_EXIT([&] { Client::destroy(); });
+ Client::initThreadIfNotAlready("Test");
+ auto opCtx = cc().makeOperationContext();
+ ShardingCatalogManager::get(opCtx.get())->enableSharding(opCtx.get(), "db1");
+ });
+
+ // list databases for checking shard size.
+ onCommand([](const RemoteCommandRequest& request) {
+ ASSERT_EQ(HostAndPort("shard0:12"), request.target);
+ ASSERT_EQ("admin", request.dbname);
+ ASSERT_BSONOBJ_EQ(BSON("listDatabases" << 1 << "maxTimeMS" << 600000), request.cmdObj);
+
+ ASSERT_BSONOBJ_EQ(
+ ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return fromjson(R"({
+ databases: [],
+ totalSize: 1,
+ ok: 1
+ })");
+ });
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(EnableShardingTest, dbExistsWithDifferentCase) {
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+ setupDatabase("Db3", shard.getName(), false);
+ ASSERT_THROWS_CODE(
+ ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db3"),
+ AssertionException,
+ ErrorCodes::DatabaseDifferCase);
+}
+
+TEST_F(EnableShardingTest, dbExists) {
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+ setupDatabase("db4", shard.getName(), false);
+ ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db4");
+}
+
+TEST_F(EnableShardingTest, succeedsWhenTheDatabaseIsAlreadySharded) {
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+ setupDatabase("db5", shard.getName(), true);
+ ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db5");
+}
+
+TEST_F(EnableShardingTest, dbExistsInvalidFormat) {
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shard0:12");
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ // Set up database with bad type for primary field.
+ ASSERT_OK(catalogClient()->insertConfigDocument(operationContext(),
+ DatabaseType::ConfigNS,
+ BSON("_id"
+ << "db6"
+ << "primary"
+ << 12
+ << "partitioned"
+ << false),
+ ShardingCatalogClient::kMajorityWriteConcern));
+
+ ASSERT_THROWS_CODE(
+ ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db6"),
+ AssertionException,
+ ErrorCodes::TypeMismatch);
+}
+
+TEST_F(EnableShardingTest, noDBExistsNoShards) {
+ ASSERT_THROWS_CODE(
+ ShardingCatalogManager::get(operationContext())->enableSharding(operationContext(), "db7"),
+ AssertionException,
+ ErrorCodes::ShardNotFound);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
new file mode 100644
index 00000000000..3d115ec0a80
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_merge_chunks_test.cpp
@@ -0,0 +1,474 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/client/read_preference.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/config_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+using MergeChunkTest = ConfigServerTestFixture;
+
+const NamespaceString kNamespace("TestDB.TestColl");
+
+TEST_F(MergeChunkTest, MergeExistingChunksCorrectlyShouldSucceed) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunk to be merged
+ auto chunk2(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ // second chunk boundaries
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkMax);
+
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax};
+
+ setupChunks({chunk, chunk2}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ chunkBoundaries,
+ "shard0000"));
+
+ auto findResponse = uassertStatusOK(
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << "TestDB.TestColl"),
+ BSON(ChunkType::lastmod << -1),
+ boost::none));
+
+ const auto& chunksVector = findResponse.docs;
+
+ // There should be exactly one chunk left in the collection
+ ASSERT_EQ(1u, chunksVector.size());
+
+ // MergedChunk should have range [chunkMin, chunkMax]
+ auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front()));
+ ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin());
+ ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax());
+
+ {
+ // Check for increment on mergedChunk's minor version
+ ASSERT_EQ(origVersion.majorVersion(), mergedChunk.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion());
+ }
+}
+
+TEST_F(MergeChunkTest, MergeSeveralChunksCorrectlyShouldSucceed) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunks to be merged
+ auto chunk2(chunk);
+ auto chunk3(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkBound2 = BSON("a" << 7);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ // second chunk boundaries
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkBound2);
+ // third chunk boundaries
+ chunk3.setMin(chunkBound2);
+ chunk3.setMax(chunkMax);
+
+ // Record chunk boundaries for passing into commitChunkMerge
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkBound2, chunkMax};
+
+ setupChunks({chunk, chunk2, chunk3}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ chunkBoundaries,
+ "shard0000"));
+
+ auto findResponse = uassertStatusOK(
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << "TestDB.TestColl"),
+ BSON(ChunkType::lastmod << -1),
+ boost::none));
+
+ const auto& chunksVector = findResponse.docs;
+
+ // There should be exactly one chunk left in the collection
+ ASSERT_EQ(1u, chunksVector.size());
+
+ // MergedChunk should have range [chunkMin, chunkMax]
+ auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front()));
+ ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin());
+ ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax());
+
+ {
+ // Check for increment on mergedChunk's minor version
+ ASSERT_EQ(origVersion.majorVersion(), mergedChunk.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion());
+ }
+}
+
+TEST_F(MergeChunkTest, NewMergeShouldClaimHighestVersion) {
+ ChunkType chunk, otherChunk;
+ chunk.setNS(kNamespace);
+ otherChunk.setNS(kNamespace);
+ auto collEpoch = OID::gen();
+
+ auto origVersion = ChunkVersion(1, 2, collEpoch);
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunk to be merged
+ auto chunk2(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ // second chunk boundaries
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkMax);
+
+ // Record chunk boundaries for passing into commitChunkMerge
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax};
+
+ // Set up other chunk with competing version
+ auto competingVersion = ChunkVersion(2, 1, collEpoch);
+ otherChunk.setVersion(competingVersion);
+ otherChunk.setShard(ShardId("shard0000"));
+ otherChunk.setMin(BSON("a" << 10));
+ otherChunk.setMax(BSON("a" << 20));
+
+ setupChunks({chunk, chunk2, otherChunk}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ collEpoch,
+ chunkBoundaries,
+ "shard0000"));
+
+ auto findResponse = uassertStatusOK(
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << "TestDB.TestColl"),
+ BSON(ChunkType::lastmod << -1),
+ boost::none));
+
+ const auto& chunksVector = findResponse.docs;
+
+ // There should be exactly two chunks left in the collection: one merged, one competing
+ ASSERT_EQ(2u, chunksVector.size());
+
+ // MergedChunk should have range [chunkMin, chunkMax]
+ auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front()));
+ ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin());
+ ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax());
+
+ {
+ // Check for minor increment on collection version
+ ASSERT_EQ(competingVersion.majorVersion(), mergedChunk.getVersion().majorVersion());
+ ASSERT_EQ(competingVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion());
+ }
+}
+
+TEST_F(MergeChunkTest, MergeLeavesOtherChunksAlone) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 2, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunk to be merged
+ auto chunk2(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ // second chunk boundaries
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkMax);
+
+ // Record chunk boundaries for passing into commitChunkMerge
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax};
+
+ // Set up unmerged chunk
+ auto otherChunk(chunk);
+ otherChunk.setMin(BSON("a" << 10));
+ otherChunk.setMax(BSON("a" << 20));
+
+ setupChunks({chunk, chunk2, otherChunk}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ chunkBoundaries,
+ "shard0000"));
+
+ auto findResponse = uassertStatusOK(
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << "TestDB.TestColl"),
+ BSON(ChunkType::lastmod << -1),
+ boost::none));
+
+ const auto& chunksVector = findResponse.docs;
+
+ // There should be exactly two chunks left in the collection: one merged, one untouched
+ ASSERT_EQ(2u, chunksVector.size());
+
+ // MergedChunk should have range [chunkMin, chunkMax]
+ auto mergedChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front()));
+ ASSERT_BSONOBJ_EQ(chunkMin, mergedChunk.getMin());
+ ASSERT_BSONOBJ_EQ(chunkMax, mergedChunk.getMax());
+
+ {
+ // Check for increment on mergedChunk's minor version
+ ASSERT_EQ(origVersion.majorVersion(), mergedChunk.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 1, mergedChunk.getVersion().minorVersion());
+ }
+
+ // OtherChunk should have been left alone
+ auto foundOtherChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.back()));
+ ASSERT_BSONOBJ_EQ(otherChunk.getMin(), foundOtherChunk.getMin());
+ ASSERT_BSONOBJ_EQ(otherChunk.getMax(), foundOtherChunk.getMax());
+}
+
+TEST_F(MergeChunkTest, NonExistingNamespace) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunk to be merged
+ auto chunk2(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkMax);
+
+ // Record chunk boundaries for passing into commitChunkMerge
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax};
+
+ setupChunks({chunk, chunk2}).transitional_ignore();
+
+ auto mergeStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.NonExistingColl"),
+ origVersion.epoch(),
+ chunkBoundaries,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::IllegalOperation, mergeStatus);
+}
+
+TEST_F(MergeChunkTest, NonMatchingEpochsOfChunkAndRequestErrors) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunk to be merged
+ auto chunk2(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkMax);
+
+ // Record chunk baoundaries for passing into commitChunkMerge
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax};
+
+ setupChunks({chunk, chunk2}).transitional_ignore();
+
+ auto mergeStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ OID::gen(),
+ chunkBoundaries,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::StaleEpoch, mergeStatus);
+}
+
+TEST_F(MergeChunkTest, MergeAlreadyHappenedFailsPrecondition) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ // Construct chunk to be merged
+ auto chunk2(chunk);
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkBound = BSON("a" << 5);
+ auto chunkMax = BSON("a" << 10);
+ // first chunk boundaries
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkBound);
+ // second chunk boundaries
+ chunk2.setMin(chunkBound);
+ chunk2.setMax(chunkMax);
+
+ std::vector<BSONObj> chunkBoundaries{chunkMin, chunkBound, chunkMax};
+
+ ChunkType mergedChunk(chunk);
+ auto mergedVersion = chunk.getVersion();
+ mergedVersion.incMinor();
+ mergedChunk.setVersion(mergedVersion);
+ mergedChunk.setMax(chunkMax);
+
+ setupChunks({mergedChunk}).transitional_ignore();
+
+ ASSERT_EQ(ErrorCodes::BadValue,
+ ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ chunkBoundaries,
+ "shard0000"));
+
+ // Verify that no change to config.chunks happened.
+ auto findResponse = uassertStatusOK(
+ getConfigShard()->exhaustiveFindOnConfig(operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ChunkType::ConfigNS,
+ BSON(ChunkType::ns() << "TestDB.TestColl"),
+ BSON(ChunkType::lastmod << -1),
+ boost::none));
+
+ const auto& chunksVector = findResponse.docs;
+
+ // There should be exactly one chunk left in the collection
+ ASSERT_EQ(1u, chunksVector.size());
+
+ // MergedChunk should have range [chunkMin, chunkMax]
+ ChunkType foundChunk = uassertStatusOK(ChunkType::fromConfigBSON(chunksVector.front()));
+ ASSERT_BSONOBJ_EQ(mergedChunk.toConfigBSON(), foundChunk.toConfigBSON());
+}
+
+TEST_F(MergeChunkTest, ChunkBoundariesOutOfOrderFails) {
+ const OID epoch = OID::gen();
+ const std::vector<BSONObj> chunkBoundaries{
+ BSON("a" << 100), BSON("a" << 200), BSON("a" << 30), BSON("a" << 400)};
+
+ {
+ std::vector<ChunkType> originalChunks;
+ ChunkVersion version = ChunkVersion(1, 0, epoch);
+
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+ chunk.setShard(ShardId("shard0000"));
+
+ chunk.setVersion(version);
+ chunk.setMin(BSON("a" << 100));
+ chunk.setMax(BSON("a" << 200));
+ originalChunks.push_back(chunk);
+
+ version.incMinor();
+ chunk.setMin(BSON("a" << 200));
+ chunk.setMax(BSON("a" << 300));
+ chunk.setVersion(version);
+ originalChunks.push_back(chunk);
+
+ version.incMinor();
+ chunk.setMin(BSON("a" << 300));
+ chunk.setMax(BSON("a" << 400));
+ chunk.setVersion(version);
+ originalChunks.push_back(chunk);
+
+ setupChunks(originalChunks).transitional_ignore();
+ }
+
+ ASSERT_EQ(ErrorCodes::InvalidOptions,
+ ShardingCatalogManager::get(operationContext())
+ ->commitChunkMerge(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ epoch,
+ chunkBoundaries,
+ "shard0000"));
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp
new file mode 100644
index 00000000000..f3998cab85c
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_from_zone_test.cpp
@@ -0,0 +1,260 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/client/read_preference.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/config_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+
+ReadPreferenceSetting kReadPref(ReadPreference::PrimaryOnly);
+
+using RemoveShardFromZoneTest = ConfigServerTestFixture;
+
+TEST_F(RemoveShardFromZoneTest, RemoveZoneThatNoLongerExistsShouldNotError) {
+ ShardType shard;
+ shard.setName("a");
+ shard.setHost("a:1234");
+
+ setupShards({shard}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->removeShardFromZone(operationContext(), shard.getName(), "z"));
+ auto shardDocStatus = getShardDoc(operationContext(), shard.getName());
+ ASSERT_OK(shardDocStatus.getStatus());
+
+ auto shardDoc = shardDocStatus.getValue();
+ auto tags = shardDoc.getTags();
+ ASSERT_TRUE(tags.empty());
+}
+
+TEST_F(RemoveShardFromZoneTest, RemovingZoneThatIsOnlyReferencedByAnotherShardShouldSucceed) {
+ ShardType shardA;
+ shardA.setName("a");
+ shardA.setHost("a:1234");
+ shardA.setTags({"z"});
+
+ ShardType shardB;
+ shardB.setName("b");
+ shardB.setHost("b:1234");
+
+ setupShards({shardA, shardB}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->removeShardFromZone(operationContext(), shardB.getName(), "z"));
+
+ // Shard A should still be in zone 'z'.
+ auto shardADocStatus = getShardDoc(operationContext(), shardA.getName());
+ ASSERT_OK(shardADocStatus.getStatus());
+
+ auto shardADoc = shardADocStatus.getValue();
+ auto shardATags = shardADoc.getTags();
+ ASSERT_EQ(1u, shardATags.size());
+ ASSERT_EQ("z", shardATags.front());
+
+ // Shard B should not be in zone 'z'.
+ auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName());
+ ASSERT_OK(shardBDocStatus.getStatus());
+
+ auto shardBDoc = shardBDocStatus.getValue();
+ auto shardBTags = shardBDoc.getTags();
+ ASSERT_TRUE(shardBTags.empty());
+}
+
+TEST_F(RemoveShardFromZoneTest, RemoveLastZoneFromShardShouldSucceedWhenNoChunksReferToIt) {
+ ShardType shardA;
+ shardA.setName("a");
+ shardA.setHost("a:1234");
+ shardA.setTags({"z"});
+
+ ShardType shardB;
+ shardB.setName("b");
+ shardB.setHost("b:1234");
+
+ setupShards({shardA, shardB}).transitional_ignore();
+
+ // Insert a chunk range document referring to a different zone
+ TagsType tagDoc;
+ tagDoc.setNS(NamespaceString("test.foo"));
+ tagDoc.setMinKey(BSON("x" << 0));
+ tagDoc.setMaxKey(BSON("x" << 10));
+ tagDoc.setTag("y");
+ insertToConfigCollection(operationContext(), TagsType::ConfigNS, tagDoc.toBSON())
+ .transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->removeShardFromZone(operationContext(), shardA.getName(), "z"));
+
+ // Shard A should not be in zone 'z'.
+ auto shardADocStatus = getShardDoc(operationContext(), shardA.getName());
+ ASSERT_OK(shardADocStatus.getStatus());
+
+ auto shardADoc = shardADocStatus.getValue();
+ auto shardATags = shardADoc.getTags();
+ ASSERT_TRUE(shardATags.empty());
+
+ // Shard B should not be in zone 'z'.
+ auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName());
+ ASSERT_OK(shardBDocStatus.getStatus());
+
+ auto shardBDoc = shardBDocStatus.getValue();
+ auto shardBTags = shardBDoc.getTags();
+ ASSERT_TRUE(shardBTags.empty());
+}
+
+TEST_F(RemoveShardFromZoneTest, RemoveLastZoneFromShardShouldFailWhenAChunkRefersToIt) {
+ ShardType shardA;
+ shardA.setName("a");
+ shardA.setHost("a:1234");
+ shardA.setTags({"y", "z"});
+
+ ShardType shardB;
+ shardB.setName("b");
+ shardB.setHost("b:1234");
+
+ setupShards({shardA, shardB}).transitional_ignore();
+
+ TagsType tagDoc;
+ tagDoc.setNS(NamespaceString("test.foo"));
+ tagDoc.setMinKey(BSON("x" << 0));
+ tagDoc.setMaxKey(BSON("x" << 10));
+ tagDoc.setTag("z");
+ insertToConfigCollection(operationContext(), TagsType::ConfigNS, tagDoc.toBSON())
+ .transitional_ignore();
+
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->removeShardFromZone(operationContext(), shardA.getName(), "z");
+ ASSERT_EQ(ErrorCodes::ZoneStillInUse, status);
+
+ // Shard A should still be in zone 'z'.
+ auto shardADocStatus = getShardDoc(operationContext(), shardA.getName());
+ ASSERT_OK(shardADocStatus.getStatus());
+
+ auto shardADoc = shardADocStatus.getValue();
+ auto shardATags = shardADoc.getTags();
+ ASSERT_EQ(2u, shardATags.size());
+ ASSERT_EQ("y", shardATags.front());
+ ASSERT_EQ("z", shardATags.back());
+
+ // Shard B should not be in zone 'z'.
+ auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName());
+ ASSERT_OK(shardBDocStatus.getStatus());
+
+ auto shardBDoc = shardBDocStatus.getValue();
+ auto shardBTags = shardBDoc.getTags();
+ ASSERT_TRUE(shardBTags.empty());
+}
+
+TEST_F(RemoveShardFromZoneTest, RemoveZoneShouldFailIfShardDoesntExist) {
+ ShardType shardA;
+ shardA.setName("a");
+ shardA.setHost("a:1234");
+ shardA.setTags({"z"});
+
+ setupShards({shardA}).transitional_ignore();
+
+ auto status = ShardingCatalogManager::get(operationContext())
+ ->removeShardFromZone(operationContext(), "b", "z");
+ ASSERT_EQ(ErrorCodes::ShardNotFound, status);
+
+ // Shard A should still be in zone 'z'.
+ auto shardADocStatus = getShardDoc(operationContext(), shardA.getName());
+ ASSERT_OK(shardADocStatus.getStatus());
+
+ auto shardADoc = shardADocStatus.getValue();
+ auto shardATags = shardADoc.getTags();
+ ASSERT_EQ(1u, shardATags.size());
+ ASSERT_EQ("z", shardATags.front());
+}
+
+TEST_F(RemoveShardFromZoneTest, RemoveZoneFromShardShouldOnlyRemoveZoneOnSpecifiedShard) {
+ ShardType shardA;
+ shardA.setName("a");
+ shardA.setHost("a:1234");
+ shardA.setTags({"z"});
+
+ ShardType shardB;
+ shardB.setName("b");
+ shardB.setHost("b:1234");
+ shardB.setTags({"y", "z"});
+
+ setupShards({shardA, shardB}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->removeShardFromZone(operationContext(), shardB.getName(), "z"));
+
+ // Shard A should still be in zone 'z'.
+ auto shardADocStatus = getShardDoc(operationContext(), shardA.getName());
+ ASSERT_OK(shardADocStatus.getStatus());
+
+ auto shardADoc = shardADocStatus.getValue();
+ auto shardATags = shardADoc.getTags();
+ ASSERT_EQ(1u, shardATags.size());
+ ASSERT_EQ("z", shardATags.front());
+
+ // Shard B should not be in zone 'z'.
+ auto shardBDocStatus = getShardDoc(operationContext(), shardB.getName());
+ ASSERT_OK(shardBDocStatus.getStatus());
+
+ auto shardBDoc = shardBDocStatus.getValue();
+ auto shardBTags = shardBDoc.getTags();
+ ASSERT_EQ(1u, shardBTags.size());
+ ASSERT_EQ("y", shardBTags.front());
+}
+
+/*
+// TODO: This test fails while an OpObserver is present, since the insert of the invalid shard
+// doc fails.
+TEST_F(RemoveShardFromZoneTest, RemoveZoneFromShardShouldErrorIfShardDocIsMalformed) {
+ // Note: invalid because tags is in string instead of array.
+ BSONObj invalidShardDoc(BSON("_id"
+ << "a"
+ << "host"
+ << "a:1"
+ << "tags"
+ << "z"));
+
+ insertToConfigCollection(
+ operationContext(), ShardType::ConfigNS, invalidShardDoc);
+
+
+ auto status =
+ShardingCatalogManager::get(operationContext())->removeShardFromZone(operationContext(), "a", "z");
+ ASSERT_EQ(ErrorCodes::TypeMismatch, status);
+}
+*/
+} // unnamed namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
new file mode 100644
index 00000000000..6def5ee6603
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_remove_shard_test.cpp
@@ -0,0 +1,325 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include <string>
+#include <vector>
+
+#include "mongo/client/remote_command_targeter_mock.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/ops/write_ops.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/executor/network_interface_mock.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/rpc/metadata/repl_set_metadata.h"
+#include "mongo/rpc/metadata/tracking_metadata.h"
+#include "mongo/s/catalog/type_changelog.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/cluster_identity_loader.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/write_ops/batched_command_response.h"
+#include "mongo/stdx/chrono.h"
+#include "mongo/stdx/future.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace {
+
+using executor::NetworkInterfaceMock;
+using executor::RemoteCommandRequest;
+using executor::RemoteCommandResponse;
+using executor::TaskExecutor;
+using std::string;
+using std::vector;
+using unittest::assertGet;
+
+const Seconds kFutureTimeout{5};
+
+BSONObj getReplSecondaryOkMetadata() {
+ BSONObjBuilder o;
+ ReadPreferenceSetting(ReadPreference::Nearest).toContainingBSON(&o);
+ o.append(rpc::kReplSetMetadataFieldName, 1);
+ return o.obj();
+}
+
+class RemoveShardTest : public ConfigServerTestFixture {
+protected:
+ /**
+ * Performs the test setup steps from the parent class and then configures the config shard and
+ * the client name.
+ */
+ void setUp() override {
+ ConfigServerTestFixture::setUp();
+
+ // Make sure clusterID is written to the config.version collection.
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto clusterIdLoader = ClusterIdentityLoader::get(operationContext());
+ ASSERT_OK(clusterIdLoader->loadClusterId(operationContext(),
+ repl::ReadConcernLevel::kLocalReadConcern));
+ _clusterId = clusterIdLoader->getClusterId();
+ }
+
+ /**
+ * Checks whether a particular shard's "draining" field is set to true.
+ */
+ bool isDraining(const std::string& shardName) {
+ auto response = assertGet(shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kMajorityReadConcern,
+ ShardType::ConfigNS,
+ BSON(ShardType::name() << shardName),
+ BSONObj(),
+ 1));
+ BSONObj shardBSON = response.docs.front();
+ if (shardBSON.hasField("draining")) {
+ return shardBSON["draining"].Bool();
+ }
+ return false;
+ }
+
+ const HostAndPort configHost{"TestHost1"};
+ OID _clusterId;
+};
+
+TEST_F(RemoveShardTest, RemoveShardAnotherShardDraining) {
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("host1:12345");
+ shard1.setMaxSizeMB(100);
+ shard1.setState(ShardType::ShardState::kShardAware);
+
+ ShardType shard2;
+ shard2.setName("shard2");
+ shard2.setHost("host2:12345");
+ shard2.setMaxSizeMB(100);
+ shard2.setState(ShardType::ShardState::kShardAware);
+
+ ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2}));
+
+ auto result = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::STARTED, result);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+
+ ASSERT_EQUALS(ErrorCodes::ConflictingOperationInProgress,
+ ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard2.getName()));
+ ASSERT_FALSE(isDraining(shard2.getName()));
+}
+
+TEST_F(RemoveShardTest, RemoveShardCantRemoveLastShard) {
+ string shardName = "shardToRemove";
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("host1:12345");
+ shard1.setMaxSizeMB(100);
+ shard1.setState(ShardType::ShardState::kShardAware);
+
+ ASSERT_OK(setupShards(std::vector<ShardType>{shard1}));
+
+ ASSERT_EQUALS(ErrorCodes::IllegalOperation,
+ ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_FALSE(isDraining(shard1.getName()));
+}
+
+TEST_F(RemoveShardTest, RemoveShardStartDraining) {
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("host1:12345");
+ shard1.setMaxSizeMB(100);
+ shard1.setState(ShardType::ShardState::kShardAware);
+
+ ShardType shard2;
+ shard2.setName("shard2");
+ shard2.setHost("host2:12345");
+ shard2.setMaxSizeMB(100);
+ shard2.setState(ShardType::ShardState::kShardAware);
+
+ ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2}));
+
+ auto result = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::STARTED, result);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+}
+
+TEST_F(RemoveShardTest, RemoveShardStillDrainingChunksRemaining) {
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("host1:12345");
+ shard1.setMaxSizeMB(100);
+ shard1.setState(ShardType::ShardState::kShardAware);
+
+ ShardType shard2;
+ shard2.setName("shard2");
+ shard2.setHost("host2:12345");
+ shard2.setMaxSizeMB(100);
+ shard2.setState(ShardType::ShardState::kShardAware);
+
+ auto epoch = OID::gen();
+ ChunkType chunk1(NamespaceString("testDB.testColl"),
+ ChunkRange(BSON("_id" << 0), BSON("_id" << 20)),
+ ChunkVersion(1, 1, epoch),
+ shard1.getName());
+ ChunkType chunk2(NamespaceString("testDB.testColl"),
+ ChunkRange(BSON("_id" << 21), BSON("_id" << 50)),
+ ChunkVersion(1, 2, epoch),
+ shard1.getName());
+ ChunkType chunk3(NamespaceString("testDB.testColl"),
+ ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)),
+ ChunkVersion(1, 3, epoch),
+ shard1.getName());
+
+ ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2}));
+ setupDatabase("testDB", shard1.getName(), true);
+ ASSERT_OK(setupChunks(std::vector<ChunkType>{chunk1, chunk2, chunk3}));
+
+ auto startedResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::STARTED, startedResult);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+
+ auto ongoingResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::ONGOING, ongoingResult);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+}
+
+TEST_F(RemoveShardTest, RemoveShardStillDrainingDatabasesRemaining) {
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("host1:12345");
+ shard1.setMaxSizeMB(100);
+ shard1.setState(ShardType::ShardState::kShardAware);
+
+ ShardType shard2;
+ shard2.setName("shard2");
+ shard2.setHost("host2:12345");
+ shard2.setMaxSizeMB(100);
+ shard2.setState(ShardType::ShardState::kShardAware);
+
+ ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2}));
+ setupDatabase("testDB", shard1.getName(), false);
+
+ auto startedResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::STARTED, startedResult);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+
+ auto ongoingResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::ONGOING, ongoingResult);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+}
+
+TEST_F(RemoveShardTest, RemoveShardCompletion) {
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost("host1:12345");
+ shard1.setMaxSizeMB(100);
+ shard1.setState(ShardType::ShardState::kShardAware);
+
+ ShardType shard2;
+ shard2.setName("shard2");
+ shard2.setHost("host2:12345");
+ shard2.setMaxSizeMB(100);
+ shard2.setState(ShardType::ShardState::kShardAware);
+
+ auto epoch = OID::gen();
+ ChunkType chunk1(NamespaceString("testDB.testColl"),
+ ChunkRange(BSON("_id" << 0), BSON("_id" << 20)),
+ ChunkVersion(1, 1, epoch),
+ shard1.getName());
+ ChunkType chunk2(NamespaceString("testDB.testColl"),
+ ChunkRange(BSON("_id" << 21), BSON("_id" << 50)),
+ ChunkVersion(1, 2, epoch),
+ shard1.getName());
+ ChunkType chunk3(NamespaceString("testDB.testColl"),
+ ChunkRange(BSON("_id" << 51), BSON("_id" << 1000)),
+ ChunkVersion(1, 3, epoch),
+ shard1.getName());
+
+ std::vector<ChunkType> chunks{chunk1, chunk2, chunk3};
+
+ ASSERT_OK(setupShards(std::vector<ShardType>{shard1, shard2}));
+ setupDatabase("testDB", shard2.getName(), false);
+ ASSERT_OK(setupChunks(std::vector<ChunkType>{chunk1, chunk2, chunk3}));
+
+ auto startedResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::STARTED, startedResult);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+
+ auto ongoingResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::ONGOING, ongoingResult);
+ ASSERT_TRUE(isDraining(shard1.getName()));
+
+ // Mock the operation during which the chunks are moved to the other shard.
+ const NamespaceString chunkNS(ChunkType::ConfigNS);
+ for (ChunkType chunk : chunks) {
+ ChunkType updatedChunk = chunk;
+ updatedChunk.setShard(shard2.getName());
+ ASSERT_OK(updateToConfigCollection(
+ operationContext(), chunkNS, chunk.toConfigBSON(), updatedChunk.toConfigBSON(), false));
+ }
+
+ auto completedResult = assertGet(ShardingCatalogManager::get(operationContext())
+ ->removeShard(operationContext(), shard1.getName()));
+ ASSERT_EQUALS(ShardDrainingStatus::COMPLETED, completedResult);
+
+ // Now make sure that the shard no longer exists on config.
+ auto response = assertGet(shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ operationContext(),
+ ReadPreferenceSetting{ReadPreference::PrimaryOnly},
+ repl::ReadConcernLevel::kMajorityReadConcern,
+ ShardType::ConfigNS,
+ BSON(ShardType::name() << shard1.getName()),
+ BSONObj(),
+ 1));
+ ASSERT_TRUE(response.docs.empty());
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
new file mode 100644
index 00000000000..35b564c9139
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_collection_test.cpp
@@ -0,0 +1,445 @@
+/**
+ * Copyright (C) 2015 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include <set>
+#include <string>
+#include <vector>
+
+#include "mongo/client/read_preference.h"
+#include "mongo/client/remote_command_targeter_factory_mock.h"
+#include "mongo/client/remote_command_targeter_mock.h"
+#include "mongo/db/client.h"
+#include "mongo/db/commands.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/executor/network_interface_mock.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/rpc/metadata/tracking_metadata.h"
+#include "mongo/s/catalog/type_changelog.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/catalog/type_collection.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_locks.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/shard_key_pattern.h"
+#include "mongo/stdx/future.h"
+#include "mongo/transport/mock_session.h"
+#include "mongo/util/log.h"
+#include "mongo/util/scopeguard.h"
+#include "mongo/util/time_support.h"
+
+namespace mongo {
+namespace {
+
+using executor::NetworkInterfaceMock;
+using executor::RemoteCommandRequest;
+using executor::RemoteCommandResponse;
+using executor::TaskExecutor;
+using std::set;
+using std::string;
+using std::vector;
+using unittest::assertGet;
+
+const ShardId testPrimaryShard = ShardId("shard0");
+
+const NamespaceString kNamespace("db1.foo");
+
+class ShardCollectionTest : public ConfigServerTestFixture {
+public:
+ void expectCount(const HostAndPort& receivingHost,
+ const NamespaceString& expectedNss,
+ const BSONObj& expectedQuery,
+ const StatusWith<long long>& response) {
+ onCommand([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(receivingHost, request.target);
+ string cmdName = request.cmdObj.firstElement().fieldName();
+
+ ASSERT_EQUALS("count", cmdName);
+
+ const NamespaceString nss(request.dbname, request.cmdObj.firstElement().String());
+ ASSERT_EQUALS(expectedNss, nss);
+
+ if (expectedQuery.isEmpty()) {
+ auto queryElem = request.cmdObj["query"];
+ ASSERT_TRUE(queryElem.eoo() || queryElem.Obj().isEmpty());
+ } else {
+ ASSERT_BSONOBJ_EQ(expectedQuery, request.cmdObj["query"].Obj());
+ }
+
+ if (response.isOK()) {
+ return BSON("ok" << 1 << "n" << response.getValue());
+ }
+
+ BSONObjBuilder responseBuilder;
+ CommandHelpers::appendCommandStatus(responseBuilder, response.getStatus());
+ return responseBuilder.obj();
+ });
+ }
+
+private:
+ const HostAndPort configHost{"configHost1"};
+ const ConnectionString configCS{ConnectionString::forReplicaSet("configReplSet", {configHost})};
+ const HostAndPort clientHost{"clientHost1"};
+};
+
+TEST_F(ShardCollectionTest, anotherMongosSharding) {
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost("shardHost");
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ setupDatabase(kNamespace.db().toString(), shard.getName(), true);
+
+ // Set up chunks in the collection, indicating that another mongos must have already started
+ // sharding the collection.
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+ chunk.setVersion(ChunkVersion(2, 0, OID::gen()));
+ chunk.setShard(shard.getName());
+ chunk.setMin(BSON("_id" << 1));
+ chunk.setMax(BSON("_id" << 5));
+ ASSERT_OK(setupChunks({chunk}));
+
+ ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
+ BSONObj defaultCollation;
+
+ ASSERT_THROWS_CODE(ShardingCatalogManager::get(operationContext())
+ ->shardCollection(operationContext(),
+ kNamespace,
+ boost::none, // UUID
+ shardKeyPattern,
+ defaultCollation,
+ false,
+ vector<BSONObj>{},
+ false,
+ testPrimaryShard),
+ AssertionException,
+ ErrorCodes::ManualInterventionRequired);
+}
+
+TEST_F(ShardCollectionTest, noInitialChunksOrData) {
+ // Initial setup
+ const HostAndPort shardHost{"shardHost"};
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost(shardHost.toString());
+
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ targeter->setConnectionStringReturnValue(ConnectionString(shardHost));
+ targeter->setFindHostReturnValue(shardHost);
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardHost), std::move(targeter));
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ setupDatabase(kNamespace.db().toString(), shard.getName(), true);
+
+ ShardKeyPattern shardKeyPattern(BSON("_id" << 1));
+ BSONObj defaultCollation;
+
+ // Now start actually sharding the collection.
+ auto future = launchAsync([&] {
+ ON_BLOCK_EXIT([&] { Client::destroy(); });
+ Client::initThreadIfNotAlready("Test");
+ auto opCtx = cc().makeOperationContext();
+ ShardingCatalogManager::get(operationContext())
+ ->shardCollection(opCtx.get(),
+ kNamespace,
+ boost::none, // UUID
+ shardKeyPattern,
+ defaultCollation,
+ false,
+ vector<BSONObj>{},
+ false,
+ testPrimaryShard);
+ });
+
+ // Report that no documents exist for the given collection on the primary shard
+ expectCount(shardHost, kNamespace, BSONObj(), 0);
+
+ // Expect the set shard version for that namespace.
+ // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was
+ // generated by shardCollection for the first chunk.
+ // TODO SERVER-29451: add hooks to the mock storage engine to expect reads and writes.
+ expectSetShardVersion(shardHost, shard, kNamespace, boost::none /* expected ChunkVersion */);
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(ShardCollectionTest, withInitialChunks) {
+ // Initial setup
+ const HostAndPort shard0Host{"shardHost0"};
+ const HostAndPort shard1Host{"shardHost1"};
+ const HostAndPort shard2Host{"shardHost2"};
+
+ ShardType shard0;
+ shard0.setName("shard0");
+ shard0.setHost(shard0Host.toString());
+
+ ShardType shard1;
+ shard1.setName("shard1");
+ shard1.setHost(shard1Host.toString());
+
+ ShardType shard2;
+ shard2.setName("shard2");
+ shard2.setHost(shard2Host.toString());
+
+ std::unique_ptr<RemoteCommandTargeterMock> targeter0(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ std::unique_ptr<RemoteCommandTargeterMock> targeter1(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ std::unique_ptr<RemoteCommandTargeterMock> targeter2(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ targeter0->setConnectionStringReturnValue(ConnectionString(shard0Host));
+ targeter0->setFindHostReturnValue(shard0Host);
+ targeterFactory()->addTargeterToReturn(ConnectionString(shard0Host), std::move(targeter0));
+ targeter1->setConnectionStringReturnValue(ConnectionString(shard1Host));
+ targeter1->setFindHostReturnValue(shard1Host);
+ targeterFactory()->addTargeterToReturn(ConnectionString(shard1Host), std::move(targeter1));
+ targeter2->setConnectionStringReturnValue(ConnectionString(shard2Host));
+ targeter2->setFindHostReturnValue(shard2Host);
+ targeterFactory()->addTargeterToReturn(ConnectionString(shard2Host), std::move(targeter2));
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard0, shard1, shard2}));
+
+ setupDatabase(kNamespace.db().toString(), shard0.getName(), true);
+
+ ShardKeyPattern keyPattern(BSON("_id" << 1));
+
+ BSONObj splitPoint0 = BSON("_id" << 1);
+ BSONObj splitPoint1 = BSON("_id" << 100);
+ BSONObj splitPoint2 = BSON("_id" << 200);
+ BSONObj splitPoint3 = BSON("_id" << 300);
+
+ ChunkVersion expectedVersion(1, 0, OID::gen());
+
+ ChunkType expectedChunk0;
+ expectedChunk0.setNS(kNamespace);
+ expectedChunk0.setShard(shard0.getName());
+ expectedChunk0.setMin(keyPattern.getKeyPattern().globalMin());
+ expectedChunk0.setMax(splitPoint0);
+ expectedChunk0.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk1;
+ expectedChunk1.setNS(kNamespace);
+ expectedChunk1.setShard(shard1.getName());
+ expectedChunk1.setMin(splitPoint0);
+ expectedChunk1.setMax(splitPoint1);
+ expectedChunk1.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk2;
+ expectedChunk2.setNS(kNamespace);
+ expectedChunk2.setShard(shard2.getName());
+ expectedChunk2.setMin(splitPoint1);
+ expectedChunk2.setMax(splitPoint2);
+ expectedChunk2.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk3;
+ expectedChunk3.setNS(kNamespace);
+ expectedChunk3.setShard(shard0.getName());
+ expectedChunk3.setMin(splitPoint2);
+ expectedChunk3.setMax(splitPoint3);
+ expectedChunk3.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk4;
+ expectedChunk4.setNS(kNamespace);
+ expectedChunk4.setShard(shard1.getName());
+ expectedChunk4.setMin(splitPoint3);
+ expectedChunk4.setMax(keyPattern.getKeyPattern().globalMax());
+ expectedChunk4.setVersion(expectedVersion);
+
+ vector<ChunkType> expectedChunks{
+ expectedChunk0, expectedChunk1, expectedChunk2, expectedChunk3, expectedChunk4};
+
+ BSONObj defaultCollation;
+
+ // Now start actually sharding the collection.
+ auto future = launchAsync([&] {
+ // TODO: can we mock the ShardRegistry to return these?
+ set<ShardId> shards{shard0.getName(), shard1.getName(), shard2.getName()};
+
+ ON_BLOCK_EXIT([&] { Client::destroy(); });
+ Client::initThreadIfNotAlready("Test");
+ auto opCtx = cc().makeOperationContext();
+ ShardingCatalogManager::get(operationContext())
+ ->shardCollection(opCtx.get(),
+ kNamespace,
+ boost::none, // UUID
+ keyPattern,
+ defaultCollation,
+ true,
+ vector<BSONObj>{splitPoint0, splitPoint1, splitPoint2, splitPoint3},
+ true,
+ testPrimaryShard);
+ });
+
+ // Expect the set shard version for that namespace
+ // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was
+ // generated by shardCollection for the first chunk.
+ // TODO SERVER-29451: add hooks to the mock storage engine to expect reads and writes.
+ expectSetShardVersion(shard0Host, shard0, kNamespace, boost::none /* expected ChunkVersion */);
+
+ future.timed_get(kFutureTimeout);
+}
+
+TEST_F(ShardCollectionTest, withInitialData) {
+ // Initial setup
+ const HostAndPort shardHost{"shardHost"};
+ ShardType shard;
+ shard.setName("shard0");
+ shard.setHost(shardHost.toString());
+
+ std::unique_ptr<RemoteCommandTargeterMock> targeter(
+ stdx::make_unique<RemoteCommandTargeterMock>());
+ targeter->setConnectionStringReturnValue(ConnectionString(shardHost));
+ targeter->setFindHostReturnValue(shardHost);
+ targeterFactory()->addTargeterToReturn(ConnectionString(shardHost), std::move(targeter));
+
+ ASSERT_OK(setupShards(vector<ShardType>{shard}));
+
+ setupDatabase(kNamespace.db().toString(), shard.getName(), true);
+
+ ShardKeyPattern keyPattern(BSON("_id" << 1));
+
+ BSONObj splitPoint0 = BSON("_id" << 1);
+ BSONObj splitPoint1 = BSON("_id" << 100);
+ BSONObj splitPoint2 = BSON("_id" << 200);
+ BSONObj splitPoint3 = BSON("_id" << 300);
+
+ ChunkVersion expectedVersion(1, 0, OID::gen());
+
+ ChunkType expectedChunk0;
+ expectedChunk0.setNS(kNamespace);
+ expectedChunk0.setShard(shard.getName());
+ expectedChunk0.setMin(keyPattern.getKeyPattern().globalMin());
+ expectedChunk0.setMax(splitPoint0);
+ expectedChunk0.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk1;
+ expectedChunk1.setNS(kNamespace);
+ expectedChunk1.setShard(shard.getName());
+ expectedChunk1.setMin(splitPoint0);
+ expectedChunk1.setMax(splitPoint1);
+ expectedChunk1.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk2;
+ expectedChunk2.setNS(kNamespace);
+ expectedChunk2.setShard(shard.getName());
+ expectedChunk2.setMin(splitPoint1);
+ expectedChunk2.setMax(splitPoint2);
+ expectedChunk2.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk3;
+ expectedChunk3.setNS(kNamespace);
+ expectedChunk3.setShard(shard.getName());
+ expectedChunk3.setMin(splitPoint2);
+ expectedChunk3.setMax(splitPoint3);
+ expectedChunk3.setVersion(expectedVersion);
+ expectedVersion.incMinor();
+
+ ChunkType expectedChunk4;
+ expectedChunk4.setNS(kNamespace);
+ expectedChunk4.setShard(shard.getName());
+ expectedChunk4.setMin(splitPoint3);
+ expectedChunk4.setMax(keyPattern.getKeyPattern().globalMax());
+ expectedChunk4.setVersion(expectedVersion);
+
+ vector<ChunkType> expectedChunks{
+ expectedChunk0, expectedChunk1, expectedChunk2, expectedChunk3, expectedChunk4};
+
+ BSONObj defaultCollation;
+
+ // Now start actually sharding the collection.
+ auto future = launchAsync([&] {
+ ON_BLOCK_EXIT([&] { Client::destroy(); });
+ Client::initThreadIfNotAlready("Test");
+ auto opCtx = cc().makeOperationContext();
+ ShardingCatalogManager::get(operationContext())
+ ->shardCollection(opCtx.get(),
+ kNamespace,
+ boost::none, // UUID
+ keyPattern,
+ defaultCollation,
+ false,
+ vector<BSONObj>{},
+ false,
+ testPrimaryShard);
+ });
+
+ // Report that documents exist for the given collection on the primary shard, so that calling
+ // splitVector is required for calculating the initial split points.
+ expectCount(shardHost, kNamespace, BSONObj(), 1000);
+
+ // Respond to the splitVector command sent to the shard to figure out initial split points
+ onCommand([&](const RemoteCommandRequest& request) {
+ ASSERT_EQUALS(shardHost, request.target);
+ string cmdName = request.cmdObj.firstElement().fieldName();
+ ASSERT_EQUALS("splitVector", cmdName);
+ ASSERT_EQUALS(kNamespace.ns(),
+ request.cmdObj["splitVector"].String()); // splitVector uses full ns
+
+ ASSERT_BSONOBJ_EQ(keyPattern.toBSON(), request.cmdObj["keyPattern"].Obj());
+ ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMin(), request.cmdObj["min"].Obj());
+ ASSERT_BSONOBJ_EQ(keyPattern.getKeyPattern().globalMax(), request.cmdObj["max"].Obj());
+ ASSERT_EQUALS(64 * 1024 * 1024ULL,
+ static_cast<uint64_t>(request.cmdObj["maxChunkSizeBytes"].numberLong()));
+ ASSERT_EQUALS(0, request.cmdObj["maxSplitPoints"].numberLong());
+ ASSERT_EQUALS(0, request.cmdObj["maxChunkObjects"].numberLong());
+
+ ASSERT_BSONOBJ_EQ(
+ ReadPreferenceSetting(ReadPreference::PrimaryPreferred).toContainingBSON(),
+ rpc::TrackingMetadata::removeTrackingData(request.metadata));
+
+ return BSON("ok" << 1 << "splitKeys"
+ << BSON_ARRAY(splitPoint0 << splitPoint1 << splitPoint2 << splitPoint3));
+ });
+
+ // Expect the set shard version for that namespace
+ // We do not check for a specific ChunkVersion, because we cannot easily know the OID that was
+ // generated by shardCollection for the first chunk.
+ // TODO SERVER-29451: add hooks to the mock storage engine to expect reads and writes.
+ expectSetShardVersion(shardHost, shard, kNamespace, boost::none);
+
+ future.timed_get(kFutureTimeout);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
new file mode 100644
index 00000000000..e8f93fc34f3
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -0,0 +1,956 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+
+#include <iomanip>
+#include <pcrecpp.h>
+#include <set>
+
+#include "mongo/base/status_with.h"
+#include "mongo/bson/util/bson_extract.h"
+#include "mongo/client/connection_string.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/client/remote_command_targeter.h"
+#include "mongo/client/replica_set_monitor.h"
+#include "mongo/db/audit.h"
+#include "mongo/db/catalog/catalog_raii.h"
+#include "mongo/db/client.h"
+#include "mongo/db/commands/feature_compatibility_version.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/repl_client_info.h"
+#include "mongo/db/repl/repl_set_config.h"
+#include "mongo/db/repl/replication_coordinator.h"
+#include "mongo/db/s/type_shard_identity.h"
+#include "mongo/db/sessions_collection.h"
+#include "mongo/db/wire_version.h"
+#include "mongo/executor/task_executor.h"
+#include "mongo/rpc/get_status_from_command_result.h"
+#include "mongo/s/catalog/config_server_version.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/type_database.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/client/shard_connection.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/cluster_identity_loader.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/shard_util.h"
+#include "mongo/s/write_ops/batched_command_request.h"
+#include "mongo/s/write_ops/batched_command_response.h"
+#include "mongo/util/fail_point_service.h"
+#include "mongo/util/log.h"
+#include "mongo/util/mongoutils/str.h"
+#include "mongo/util/scopeguard.h"
+
+namespace mongo {
+namespace {
+
+using std::vector;
+
+using CallbackHandle = executor::TaskExecutor::CallbackHandle;
+using CallbackArgs = executor::TaskExecutor::CallbackArgs;
+using RemoteCommandCallbackArgs = executor::TaskExecutor::RemoteCommandCallbackArgs;
+using RemoteCommandCallbackFn = executor::TaskExecutor::RemoteCommandCallbackFn;
+
+const Seconds kDefaultFindHostMaxWaitTime(20);
+
+const ReadPreferenceSetting kConfigReadSelector(ReadPreference::Nearest, TagSet{});
+const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0));
+
+/**
+ * Generates a unique name to be given to a newly added shard.
+ */
+StatusWith<std::string> generateNewShardName(OperationContext* opCtx) {
+ BSONObjBuilder shardNameRegex;
+ shardNameRegex.appendRegex(ShardType::name(), "^shard");
+
+ auto findStatus = Grid::get(opCtx)->shardRegistry()->getConfigShard()->exhaustiveFindOnConfig(
+ opCtx,
+ kConfigReadSelector,
+ repl::ReadConcernLevel::kMajorityReadConcern,
+ ShardType::ConfigNS,
+ shardNameRegex.obj(),
+ BSON(ShardType::name() << -1),
+ 1);
+ if (!findStatus.isOK()) {
+ return findStatus.getStatus();
+ }
+
+ const auto& docs = findStatus.getValue().docs;
+
+ int count = 0;
+ if (!docs.empty()) {
+ const auto shardStatus = ShardType::fromBSON(docs.front());
+ if (!shardStatus.isOK()) {
+ return shardStatus.getStatus();
+ }
+
+ std::istringstream is(shardStatus.getValue().getName().substr(5));
+ is >> count;
+ count++;
+ }
+
+ // TODO: fix so that we can have more than 10000 automatically generated shard names
+ if (count < 9999) {
+ std::stringstream ss;
+ ss << "shard" << std::setfill('0') << std::setw(4) << count;
+ return ss.str();
+ }
+
+ return Status(ErrorCodes::OperationFailed, "unable to generate new shard name");
+}
+
+} // namespace
+
+StatusWith<Shard::CommandResponse> ShardingCatalogManager::_runCommandForAddShard(
+ OperationContext* opCtx,
+ RemoteCommandTargeter* targeter,
+ const std::string& dbName,
+ const BSONObj& cmdObj) {
+ auto swHost = targeter->findHost(opCtx, ReadPreferenceSetting{ReadPreference::PrimaryOnly});
+ if (!swHost.isOK()) {
+ return swHost.getStatus();
+ }
+ auto host = std::move(swHost.getValue());
+
+ executor::RemoteCommandRequest request(
+ host, dbName, cmdObj, rpc::makeEmptyMetadata(), nullptr, Seconds(30));
+
+ executor::RemoteCommandResponse response =
+ Status(ErrorCodes::InternalError, "Internal error running command");
+
+ auto swCallbackHandle = _executorForAddShard->scheduleRemoteCommand(
+ request, [&response](const executor::TaskExecutor::RemoteCommandCallbackArgs& args) {
+ response = args.response;
+ });
+ if (!swCallbackHandle.isOK()) {
+ return swCallbackHandle.getStatus();
+ }
+
+ // Block until the command is carried out
+ _executorForAddShard->wait(swCallbackHandle.getValue());
+
+ if (response.status == ErrorCodes::ExceededTimeLimit) {
+ LOG(0) << "Operation timed out with status " << redact(response.status);
+ }
+
+ if (!response.isOK()) {
+ if (!Shard::shouldErrorBePropagated(response.status.code())) {
+ return {ErrorCodes::OperationFailed,
+ str::stream() << "failed to run command " << cmdObj
+ << " when attempting to add shard "
+ << targeter->connectionString().toString()
+ << causedBy(response.status)};
+ }
+ return response.status;
+ }
+
+ BSONObj result = response.data.getOwned();
+
+ Status commandStatus = getStatusFromCommandResult(result);
+ if (!Shard::shouldErrorBePropagated(commandStatus.code())) {
+ commandStatus = {ErrorCodes::OperationFailed,
+ str::stream() << "failed to run command " << cmdObj
+ << " when attempting to add shard "
+ << targeter->connectionString().toString()
+ << causedBy(commandStatus)};
+ }
+
+ Status writeConcernStatus = getWriteConcernStatusFromCommandResult(result);
+ if (!Shard::shouldErrorBePropagated(writeConcernStatus.code())) {
+ writeConcernStatus = {ErrorCodes::OperationFailed,
+ str::stream() << "failed to satisfy writeConcern for command "
+ << cmdObj
+ << " when attempting to add shard "
+ << targeter->connectionString().toString()
+ << causedBy(writeConcernStatus)};
+ }
+
+ return Shard::CommandResponse(std::move(host),
+ std::move(result),
+ response.metadata.getOwned(),
+ std::move(commandStatus),
+ std::move(writeConcernStatus));
+}
+
+StatusWith<boost::optional<ShardType>> ShardingCatalogManager::_checkIfShardExists(
+ OperationContext* opCtx,
+ const ConnectionString& proposedShardConnectionString,
+ const std::string* proposedShardName,
+ long long proposedShardMaxSize) {
+ // Check whether any host in the connection is already part of the cluster.
+ const auto existingShards = Grid::get(opCtx)->catalogClient()->getAllShards(
+ opCtx, repl::ReadConcernLevel::kLocalReadConcern);
+ if (!existingShards.isOK()) {
+ return existingShards.getStatus().withContext(
+ "Failed to load existing shards during addShard");
+ }
+
+ // Now check if this shard already exists - if it already exists *with the same options* then
+ // the addShard request can return success early without doing anything more.
+ for (const auto& existingShard : existingShards.getValue().value) {
+ auto swExistingShardConnStr = ConnectionString::parse(existingShard.getHost());
+ if (!swExistingShardConnStr.isOK()) {
+ return swExistingShardConnStr.getStatus();
+ }
+ auto existingShardConnStr = std::move(swExistingShardConnStr.getValue());
+
+ // Function for determining if the options for the shard that is being added match the
+ // options of an existing shard that conflicts with it.
+ auto shardsAreEquivalent = [&]() {
+ if (proposedShardName && *proposedShardName != existingShard.getName()) {
+ return false;
+ }
+ if (proposedShardConnectionString.type() != existingShardConnStr.type()) {
+ return false;
+ }
+ if (proposedShardConnectionString.type() == ConnectionString::SET &&
+ proposedShardConnectionString.getSetName() != existingShardConnStr.getSetName()) {
+ return false;
+ }
+ if (proposedShardMaxSize != existingShard.getMaxSizeMB()) {
+ return false;
+ }
+ return true;
+ };
+
+ if (existingShardConnStr.type() == ConnectionString::SET &&
+ proposedShardConnectionString.type() == ConnectionString::SET &&
+ existingShardConnStr.getSetName() == proposedShardConnectionString.getSetName()) {
+ // An existing shard has the same replica set name as the shard being added.
+ // If the options aren't the same, then this is an error,
+ // but if the options match then the addShard operation should be immediately
+ // considered a success and terminated.
+ if (shardsAreEquivalent()) {
+ return {existingShard};
+ } else {
+ return {ErrorCodes::IllegalOperation,
+ str::stream() << "A shard already exists containing the replica set '"
+ << existingShardConnStr.getSetName()
+ << "'"};
+ }
+ }
+
+ for (const auto& existingHost : existingShardConnStr.getServers()) {
+ // Look if any of the hosts in the existing shard are present within the shard trying
+ // to be added.
+ for (const auto& addingHost : proposedShardConnectionString.getServers()) {
+ if (existingHost == addingHost) {
+ // At least one of the hosts in the shard being added already exists in an
+ // existing shard. If the options aren't the same, then this is an error,
+ // but if the options match then the addShard operation should be immediately
+ // considered a success and terminated.
+ if (shardsAreEquivalent()) {
+ return {existingShard};
+ } else {
+ return {ErrorCodes::IllegalOperation,
+ str::stream() << "'" << addingHost.toString() << "' "
+ << "is already a member of the existing shard '"
+ << existingShard.getHost()
+ << "' ("
+ << existingShard.getName()
+ << ")."};
+ }
+ }
+ }
+ }
+
+ if (proposedShardName && *proposedShardName == existingShard.getName()) {
+ // If we get here then we're trying to add a shard with the same name as an existing
+ // shard, but there was no overlap in the hosts between the existing shard and the
+ // proposed connection string for the new shard.
+ return {ErrorCodes::IllegalOperation,
+ str::stream() << "A shard named " << *proposedShardName << " already exists"};
+ }
+ }
+
+ return {boost::none};
+}
+
+StatusWith<ShardType> ShardingCatalogManager::_validateHostAsShard(
+ OperationContext* opCtx,
+ std::shared_ptr<RemoteCommandTargeter> targeter,
+ const std::string* shardProposedName,
+ const ConnectionString& connectionString) {
+ auto swCommandResponse =
+ _runCommandForAddShard(opCtx, targeter.get(), "admin", BSON("isMaster" << 1));
+ if (swCommandResponse.getStatus() == ErrorCodes::IncompatibleServerVersion) {
+ return swCommandResponse.getStatus().withReason(
+ str::stream() << "Cannot add " << connectionString.toString()
+ << " as a shard because its binary version is not compatible with "
+ "the cluster's featureCompatibilityVersion.");
+ } else if (!swCommandResponse.isOK()) {
+ return swCommandResponse.getStatus();
+ }
+
+ // Check for a command response error
+ auto resIsMasterStatus = std::move(swCommandResponse.getValue().commandStatus);
+ if (!resIsMasterStatus.isOK()) {
+ return resIsMasterStatus.withContext(str::stream()
+ << "Error running isMaster against "
+ << targeter->connectionString().toString());
+ }
+
+ auto resIsMaster = std::move(swCommandResponse.getValue().response);
+
+ // Fail if the node being added is a mongos.
+ const std::string msg = resIsMaster.getStringField("msg");
+ if (msg == "isdbgrid") {
+ return {ErrorCodes::IllegalOperation, "cannot add a mongos as a shard"};
+ }
+
+ // Extract the maxWireVersion so we can verify that the node being added has a binary version
+ // greater than or equal to the cluster's featureCompatibilityVersion. We expect an incompatible
+ // binary node to be unable to communicate, returning an IncompatibleServerVersion error,
+ // because of our internal wire version protocol. So we can safely invariant here that the node
+ // is compatible.
+ long long maxWireVersion;
+ Status status = bsonExtractIntegerField(resIsMaster, "maxWireVersion", &maxWireVersion);
+ if (!status.isOK()) {
+ return status.withContext(str::stream() << "isMaster returned invalid 'maxWireVersion' "
+ << "field when attempting to add "
+ << connectionString.toString()
+ << " as a shard");
+ }
+ if (serverGlobalParams.featureCompatibility.getVersion() >
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) {
+ // If FCV 4.0, or upgrading to / downgrading from, wire version must be LATEST.
+ invariant(maxWireVersion == WireVersion::LATEST_WIRE_VERSION);
+ } else if (serverGlobalParams.featureCompatibility.getVersion() >
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34 &&
+ serverGlobalParams.featureCompatibility.getVersion() <=
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo36) {
+ // If FCV 3.6, or upgrading to / downgrading from, wire version must be v3.6
+ // LATEST_WIRE_VERSION or greater.
+ invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 1);
+ } else {
+ // If FCV 3.4, wire version cannot be less than v3.4 LATEST_WIRE_VERSION.
+ invariant(serverGlobalParams.featureCompatibility.getVersion() ==
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyDowngradedTo34);
+ invariant(maxWireVersion >= WireVersion::LATEST_WIRE_VERSION - 2);
+ }
+
+ // Check whether there is a master. If there isn't, the replica set may not have been
+ // initiated. If the connection is a standalone, it will return true for isMaster.
+ bool isMaster;
+ status = bsonExtractBooleanField(resIsMaster, "ismaster", &isMaster);
+ if (!status.isOK()) {
+ return status.withContext(str::stream() << "isMaster returned invalid 'ismaster' "
+ << "field when attempting to add "
+ << connectionString.toString()
+ << " as a shard");
+ }
+ if (!isMaster) {
+ return {ErrorCodes::NotMaster,
+ str::stream()
+ << connectionString.toString()
+ << " does not have a master. If this is a replica set, ensure that it has a"
+ << " healthy primary and that the set has been properly initiated."};
+ }
+
+ const std::string providedSetName = connectionString.getSetName();
+ const std::string foundSetName = resIsMaster["setName"].str();
+
+ // Make sure the specified replica set name (if any) matches the actual shard's replica set
+ if (providedSetName.empty() && !foundSetName.empty()) {
+ return {ErrorCodes::OperationFailed,
+ str::stream() << "host is part of set " << foundSetName << "; "
+ << "use replica set url format "
+ << "<setname>/<server1>,<server2>, ..."};
+ }
+
+ if (!providedSetName.empty() && foundSetName.empty()) {
+ return {ErrorCodes::OperationFailed,
+ str::stream() << "host did not return a set name; "
+ << "is the replica set still initializing? "
+ << resIsMaster};
+ }
+
+ // Make sure the set name specified in the connection string matches the one where its hosts
+ // belong into
+ if (!providedSetName.empty() && (providedSetName != foundSetName)) {
+ return {ErrorCodes::OperationFailed,
+ str::stream() << "the provided connection string (" << connectionString.toString()
+ << ") does not match the actual set name "
+ << foundSetName};
+ }
+
+ // Is it a config server?
+ if (resIsMaster.hasField("configsvr")) {
+ return {ErrorCodes::OperationFailed,
+ str::stream() << "Cannot add " << connectionString.toString()
+ << " as a shard since it is a config server"};
+ }
+
+ // If the shard is part of a replica set, make sure all the hosts mentioned in the connection
+ // string are part of the set. It is fine if not all members of the set are mentioned in the
+ // connection string, though.
+ if (!providedSetName.empty()) {
+ std::set<std::string> hostSet;
+
+ BSONObjIterator iter(resIsMaster["hosts"].Obj());
+ while (iter.more()) {
+ hostSet.insert(iter.next().String()); // host:port
+ }
+
+ if (resIsMaster["passives"].isABSONObj()) {
+ BSONObjIterator piter(resIsMaster["passives"].Obj());
+ while (piter.more()) {
+ hostSet.insert(piter.next().String()); // host:port
+ }
+ }
+
+ if (resIsMaster["arbiters"].isABSONObj()) {
+ BSONObjIterator piter(resIsMaster["arbiters"].Obj());
+ while (piter.more()) {
+ hostSet.insert(piter.next().String()); // host:port
+ }
+ }
+
+ for (const auto& hostEntry : connectionString.getServers()) {
+ const auto& host = hostEntry.toString(); // host:port
+ if (hostSet.find(host) == hostSet.end()) {
+ return {ErrorCodes::OperationFailed,
+ str::stream() << "in seed list " << connectionString.toString() << ", host "
+ << host
+ << " does not belong to replica set "
+ << foundSetName
+ << "; found "
+ << resIsMaster.toString()};
+ }
+ }
+ }
+
+ std::string actualShardName;
+
+ if (shardProposedName) {
+ actualShardName = *shardProposedName;
+ } else if (!foundSetName.empty()) {
+ // Default it to the name of the replica set
+ actualShardName = foundSetName;
+ }
+
+ // Disallow adding shard replica set with name 'config'
+ if (actualShardName == NamespaceString::kConfigDb) {
+ return {ErrorCodes::BadValue, "use of shard replica set with name 'config' is not allowed"};
+ }
+
+ // Retrieve the most up to date connection string that we know from the replica set monitor (if
+ // this is a replica set shard, otherwise it will be the same value as connectionString).
+ ConnectionString actualShardConnStr = targeter->connectionString();
+
+ ShardType shard;
+ shard.setName(actualShardName);
+ shard.setHost(actualShardConnStr.toString());
+ shard.setState(ShardType::ShardState::kShardAware);
+
+ return shard;
+}
+
+Status ShardingCatalogManager::_dropSessionsCollection(
+ OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter) {
+
+ BSONObjBuilder builder;
+ builder.append("drop", SessionsCollection::kSessionsCollection.toString());
+ {
+ BSONObjBuilder wcBuilder(builder.subobjStart("writeConcern"));
+ wcBuilder.append("w", "majority");
+ }
+
+ auto swCommandResponse = _runCommandForAddShard(
+ opCtx, targeter.get(), SessionsCollection::kSessionsDb.toString(), builder.done());
+ if (!swCommandResponse.isOK()) {
+ return swCommandResponse.getStatus();
+ }
+
+ auto cmdStatus = std::move(swCommandResponse.getValue().commandStatus);
+ if (!cmdStatus.isOK() && cmdStatus.code() != ErrorCodes::NamespaceNotFound) {
+ return cmdStatus;
+ }
+
+ return Status::OK();
+}
+
+StatusWith<std::vector<std::string>> ShardingCatalogManager::_getDBNamesListFromShard(
+ OperationContext* opCtx, std::shared_ptr<RemoteCommandTargeter> targeter) {
+
+ auto swCommandResponse = _runCommandForAddShard(
+ opCtx, targeter.get(), "admin", BSON("listDatabases" << 1 << "nameOnly" << true));
+ if (!swCommandResponse.isOK()) {
+ return swCommandResponse.getStatus();
+ }
+
+ auto cmdStatus = std::move(swCommandResponse.getValue().commandStatus);
+ if (!cmdStatus.isOK()) {
+ return cmdStatus;
+ }
+
+ auto cmdResult = std::move(swCommandResponse.getValue().response);
+
+ std::vector<std::string> dbNames;
+
+ for (const auto& dbEntry : cmdResult["databases"].Obj()) {
+ const auto& dbName = dbEntry["name"].String();
+
+ if (!(dbName == NamespaceString::kAdminDb || dbName == NamespaceString::kLocalDb ||
+ dbName == NamespaceString::kConfigDb)) {
+ dbNames.push_back(dbName);
+ }
+ }
+
+ return dbNames;
+}
+
+StatusWith<std::string> ShardingCatalogManager::addShard(
+ OperationContext* opCtx,
+ const std::string* shardProposedName,
+ const ConnectionString& shardConnectionString,
+ const long long maxSize) {
+ if (shardConnectionString.type() == ConnectionString::INVALID) {
+ return {ErrorCodes::BadValue, "Invalid connection string"};
+ }
+
+ if (shardProposedName && shardProposedName->empty()) {
+ return {ErrorCodes::BadValue, "shard name cannot be empty"};
+ }
+
+ // Only one addShard operation can be in progress at a time.
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kShardMembershipLock);
+
+ // Check if this shard has already been added (can happen in the case of a retry after a network
+ // error, for example) and thus this addShard request should be considered a no-op.
+ auto existingShard =
+ _checkIfShardExists(opCtx, shardConnectionString, shardProposedName, maxSize);
+ if (!existingShard.isOK()) {
+ return existingShard.getStatus();
+ }
+ if (existingShard.getValue()) {
+ // These hosts already belong to an existing shard, so report success and terminate the
+ // addShard request. Make sure to set the last optime for the client to the system last
+ // optime so that we'll still wait for replication so that this state is visible in the
+ // committed snapshot.
+ repl::ReplClientInfo::forClient(opCtx->getClient()).setLastOpToSystemLastOpTime(opCtx);
+ return existingShard.getValue()->getName();
+ }
+
+ // Force a reload of the ShardRegistry to ensure that, in case this addShard is to re-add a
+ // replica set that has recently been removed, we have detached the ReplicaSetMonitor for the
+ // set with that setName from the ReplicaSetMonitorManager and will create a new
+ // ReplicaSetMonitor when targeting the set below.
+ // Note: This is necessary because as of 3.4, removeShard is performed by mongos (unlike
+ // addShard), so the ShardRegistry is not synchronously reloaded on the config server when a
+ // shard is removed.
+ if (!Grid::get(opCtx)->shardRegistry()->reload(opCtx)) {
+ // If the first reload joined an existing one, call reload again to ensure the reload is
+ // fresh.
+ Grid::get(opCtx)->shardRegistry()->reload(opCtx);
+ }
+
+ // TODO: Don't create a detached Shard object, create a detached RemoteCommandTargeter instead.
+ const std::shared_ptr<Shard> shard{
+ Grid::get(opCtx)->shardRegistry()->createConnection(shardConnectionString)};
+ invariant(shard);
+ auto targeter = shard->getTargeter();
+
+ auto stopMonitoringGuard = MakeGuard([&] {
+ if (shardConnectionString.type() == ConnectionString::SET) {
+ // This is a workaround for the case were we could have some bad shard being
+ // requested to be added and we put that bad connection string on the global replica set
+ // monitor registry. It needs to be cleaned up so that when a correct replica set is
+ // added, it will be recreated.
+ ReplicaSetMonitor::remove(shardConnectionString.getSetName());
+ }
+ });
+
+ // Validate the specified connection string may serve as shard at all
+ auto shardStatus =
+ _validateHostAsShard(opCtx, targeter, shardProposedName, shardConnectionString);
+ if (!shardStatus.isOK()) {
+ return shardStatus.getStatus();
+ }
+ ShardType& shardType = shardStatus.getValue();
+
+ // Check that none of the existing shard candidate's dbs exist already
+ auto dbNamesStatus = _getDBNamesListFromShard(opCtx, targeter);
+ if (!dbNamesStatus.isOK()) {
+ return dbNamesStatus.getStatus();
+ }
+
+ for (const auto& dbName : dbNamesStatus.getValue()) {
+ auto dbt = Grid::get(opCtx)->catalogClient()->getDatabase(
+ opCtx, dbName, repl::ReadConcernLevel::kLocalReadConcern);
+ if (dbt.isOK()) {
+ const auto& dbDoc = dbt.getValue().value;
+ return Status(ErrorCodes::OperationFailed,
+ str::stream() << "can't add shard "
+ << "'"
+ << shardConnectionString.toString()
+ << "'"
+ << " because a local database '"
+ << dbName
+ << "' exists in another "
+ << dbDoc.getPrimary());
+ } else if (dbt != ErrorCodes::NamespaceNotFound) {
+ return dbt.getStatus();
+ }
+ }
+
+ // Check that the shard candidate does not have a local config.system.sessions collection
+ auto res = _dropSessionsCollection(opCtx, targeter);
+
+ if (!res.isOK()) {
+ return res.withContext(
+ "can't add shard with a local copy of config.system.sessions, please drop this "
+ "collection from the shard manually and try again.");
+ }
+
+ // If a name for a shard wasn't provided, generate one
+ if (shardType.getName().empty()) {
+ auto result = generateNewShardName(opCtx);
+ if (!result.isOK()) {
+ return result.getStatus();
+ }
+ shardType.setName(result.getValue());
+ }
+
+ if (maxSize > 0) {
+ shardType.setMaxSizeMB(maxSize);
+ }
+
+ // Insert a shardIdentity document onto the shard. This also triggers sharding initialization on
+ // the shard.
+ LOG(2) << "going to insert shardIdentity document into shard: " << shardType;
+ auto commandRequest = createShardIdentityUpsertForAddShard(opCtx, shardType.getName());
+ auto swCommandResponse = _runCommandForAddShard(opCtx, targeter.get(), "admin", commandRequest);
+ if (!swCommandResponse.isOK()) {
+ return swCommandResponse.getStatus();
+ }
+ auto commandResponse = std::move(swCommandResponse.getValue());
+ BatchedCommandResponse batchResponse;
+ auto batchResponseStatus =
+ Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
+ if (!batchResponseStatus.isOK()) {
+ return batchResponseStatus;
+ }
+
+ // The featureCompatibilityVersion should be the same throughout the cluster. We don't
+ // explicitly send writeConcern majority to the added shard, because a 3.4 mongod will reject
+ // it (setFCV did not support writeConcern until 3.6), and a 3.6 mongod will still default to
+ // majority writeConcern.
+ //
+ // TODO SERVER-32045: propagate the user's writeConcern
+ auto versionResponse = _runCommandForAddShard(
+ opCtx,
+ targeter.get(),
+ "admin",
+ BSON(FeatureCompatibilityVersion::kCommandName << FeatureCompatibilityVersion::toString(
+ serverGlobalParams.featureCompatibility.getVersion())));
+ if (!versionResponse.isOK()) {
+ return versionResponse.getStatus();
+ }
+
+ if (!versionResponse.getValue().commandStatus.isOK()) {
+ return versionResponse.getValue().commandStatus;
+ }
+
+ log() << "going to insert new entry for shard into config.shards: " << shardType.toString();
+
+ Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument(
+ opCtx,
+ ShardType::ConfigNS,
+ shardType.toBSON(),
+ ShardingCatalogClient::kMajorityWriteConcern);
+ if (!result.isOK()) {
+ log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
+ return result;
+ }
+
+ // Add all databases which were discovered on the new shard
+ for (const auto& dbName : dbNamesStatus.getValue()) {
+ DatabaseType dbt(dbName, shardType.getName(), false);
+ Status status = Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbt);
+ if (!status.isOK()) {
+ log() << "adding shard " << shardConnectionString.toString()
+ << " even though could not add database " << dbName;
+ }
+ }
+
+ // Record in changelog
+ BSONObjBuilder shardDetails;
+ shardDetails.append("name", shardType.getName());
+ shardDetails.append("host", shardConnectionString.toString());
+
+ Grid::get(opCtx)
+ ->catalogClient()
+ ->logChange(
+ opCtx, "addShard", "", shardDetails.obj(), ShardingCatalogClient::kMajorityWriteConcern)
+ .transitional_ignore();
+
+ // Ensure the added shard is visible to this process.
+ auto shardRegistry = Grid::get(opCtx)->shardRegistry();
+ if (!shardRegistry->getShard(opCtx, shardType.getName()).isOK()) {
+ return {ErrorCodes::OperationFailed,
+ "Could not find shard metadata for shard after adding it. This most likely "
+ "indicates that the shard was removed immediately after it was added."};
+ }
+ stopMonitoringGuard.Dismiss();
+
+ return shardType.getName();
+}
+
+StatusWith<ShardDrainingStatus> ShardingCatalogManager::removeShard(OperationContext* opCtx,
+ const ShardId& shardId) {
+ // Check preconditions for removing the shard
+ std::string name = shardId.toString();
+ auto countStatus = _runCountCommandOnConfig(
+ opCtx,
+ ShardType::ConfigNS,
+ BSON(ShardType::name() << NE << name << ShardType::draining(true)));
+ if (!countStatus.isOK()) {
+ return countStatus.getStatus();
+ }
+ if (countStatus.getValue() > 0) {
+ return Status(ErrorCodes::ConflictingOperationInProgress,
+ "Can't have more than one draining shard at a time");
+ }
+
+ countStatus =
+ _runCountCommandOnConfig(opCtx, ShardType::ConfigNS, BSON(ShardType::name() << NE << name));
+ if (!countStatus.isOK()) {
+ return countStatus.getStatus();
+ }
+ if (countStatus.getValue() == 0) {
+ return Status(ErrorCodes::IllegalOperation, "Can't remove last shard");
+ }
+
+ // Figure out if shard is already draining
+ countStatus = _runCountCommandOnConfig(
+ opCtx, ShardType::ConfigNS, BSON(ShardType::name() << name << ShardType::draining(true)));
+ if (!countStatus.isOK()) {
+ return countStatus.getStatus();
+ }
+
+ auto* const shardRegistry = Grid::get(opCtx)->shardRegistry();
+
+ if (countStatus.getValue() == 0) {
+ log() << "going to start draining shard: " << name;
+
+ auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
+ opCtx,
+ ShardType::ConfigNS,
+ BSON(ShardType::name() << name),
+ BSON("$set" << BSON(ShardType::draining(true))),
+ false,
+ ShardingCatalogClient::kLocalWriteConcern);
+ if (!updateStatus.isOK()) {
+ log() << "error starting removeShard: " << name
+ << causedBy(redact(updateStatus.getStatus()));
+ return updateStatus.getStatus();
+ }
+
+ shardRegistry->reload(opCtx);
+
+ // Record start in changelog
+ Grid::get(opCtx)
+ ->catalogClient()
+ ->logChange(opCtx,
+ "removeShard.start",
+ "",
+ BSON("shard" << name),
+ ShardingCatalogClient::kLocalWriteConcern)
+ .transitional_ignore();
+
+ return ShardDrainingStatus::STARTED;
+ }
+
+ // Draining has already started, now figure out how many chunks and databases are still on the
+ // shard.
+ countStatus =
+ _runCountCommandOnConfig(opCtx, ChunkType::ConfigNS, BSON(ChunkType::shard(name)));
+ if (!countStatus.isOK()) {
+ return countStatus.getStatus();
+ }
+ const long long chunkCount = countStatus.getValue();
+
+ countStatus =
+ _runCountCommandOnConfig(opCtx, DatabaseType::ConfigNS, BSON(DatabaseType::primary(name)));
+ if (!countStatus.isOK()) {
+ return countStatus.getStatus();
+ }
+ const long long databaseCount = countStatus.getValue();
+
+ if (chunkCount > 0 || databaseCount > 0) {
+ // Still more draining to do
+ LOG(0) << "chunkCount: " << chunkCount;
+ LOG(0) << "databaseCount: " << databaseCount;
+ return ShardDrainingStatus::ONGOING;
+ }
+
+ // Draining is done, now finish removing the shard.
+ log() << "going to remove shard: " << name;
+ audit::logRemoveShard(opCtx->getClient(), name);
+
+ Status status = Grid::get(opCtx)->catalogClient()->removeConfigDocuments(
+ opCtx,
+ ShardType::ConfigNS,
+ BSON(ShardType::name() << name),
+ ShardingCatalogClient::kLocalWriteConcern);
+ if (!status.isOK()) {
+ log() << "Error concluding removeShard operation on: " << name
+ << "; err: " << status.reason();
+ return status;
+ }
+
+ shardConnectionPool.removeHost(name);
+ ReplicaSetMonitor::remove(name);
+
+ shardRegistry->reload(opCtx);
+
+ // Record finish in changelog
+ Grid::get(opCtx)
+ ->catalogClient()
+ ->logChange(opCtx,
+ "removeShard",
+ "",
+ BSON("shard" << name),
+ ShardingCatalogClient::kLocalWriteConcern)
+ .transitional_ignore();
+
+ return ShardDrainingStatus::COMPLETED;
+}
+
+void ShardingCatalogManager::appendConnectionStats(executor::ConnectionPoolStats* stats) {
+ _executorForAddShard->appendConnectionStats(stats);
+}
+
+BSONObj ShardingCatalogManager::createShardIdentityUpsertForAddShard(OperationContext* opCtx,
+ const std::string& shardName) {
+ BatchedCommandRequest request([&] {
+ write_ops::Update updateOp(NamespaceString::kServerConfigurationNamespace);
+ updateOp.setUpdates(
+ {[&] {
+ write_ops::UpdateOpEntry entry;
+ entry.setQ(BSON("_id"
+ << "shardIdentity"
+ << ShardIdentityType::shardName(shardName)
+ << ShardIdentityType::clusterId(
+ ClusterIdentityLoader::get(opCtx)->getClusterId())));
+ entry.setU(BSON("$set" << BSON(ShardIdentityType::configsvrConnString(
+ repl::ReplicationCoordinator::get(opCtx)
+ ->getConfig()
+ .getConnectionString()
+ .toString()))));
+ entry.setUpsert(true);
+ return entry;
+ }()});
+ return updateOp;
+ }());
+ request.setWriteConcern(ShardingCatalogClient::kMajorityWriteConcern.toBSON());
+
+ return request.toBSON();
+}
+
+// static
+StatusWith<ShardId> ShardingCatalogManager::_selectShardForNewDatabase(
+ OperationContext* opCtx, ShardRegistry* shardRegistry) {
+ vector<ShardId> allShardIds;
+
+ shardRegistry->getAllShardIds(&allShardIds);
+ if (allShardIds.empty()) {
+ shardRegistry->reload(opCtx);
+ shardRegistry->getAllShardIds(&allShardIds);
+
+ if (allShardIds.empty()) {
+ return Status(ErrorCodes::ShardNotFound, "No shards found");
+ }
+ }
+
+ ShardId candidateShardId = allShardIds[0];
+
+ auto candidateSizeStatus = shardutil::retrieveTotalShardSize(opCtx, candidateShardId);
+ if (!candidateSizeStatus.isOK()) {
+ return candidateSizeStatus.getStatus();
+ }
+
+ for (size_t i = 1; i < allShardIds.size(); i++) {
+ const ShardId shardId = allShardIds[i];
+
+ const auto sizeStatus = shardutil::retrieveTotalShardSize(opCtx, shardId);
+ if (!sizeStatus.isOK()) {
+ return sizeStatus.getStatus();
+ }
+
+ if (sizeStatus.getValue() < candidateSizeStatus.getValue()) {
+ candidateSizeStatus = sizeStatus;
+ candidateShardId = shardId;
+ }
+ }
+
+ return candidateShardId;
+}
+
+StatusWith<long long> ShardingCatalogManager::_runCountCommandOnConfig(OperationContext* opCtx,
+ const NamespaceString& nss,
+ BSONObj query) {
+ BSONObjBuilder countBuilder;
+ countBuilder.append("count", nss.coll());
+ countBuilder.append("query", query);
+
+ auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+ auto resultStatus =
+ configShard->runCommandWithFixedRetryAttempts(opCtx,
+ kConfigReadSelector,
+ nss.db().toString(),
+ countBuilder.done(),
+ Shard::kDefaultConfigCommandTimeout,
+ Shard::RetryPolicy::kIdempotent);
+ if (!resultStatus.isOK()) {
+ return resultStatus.getStatus();
+ }
+ if (!resultStatus.getValue().commandStatus.isOK()) {
+ return resultStatus.getValue().commandStatus;
+ }
+
+ auto responseObj = std::move(resultStatus.getValue().response);
+
+ long long result;
+ auto status = bsonExtractIntegerField(responseObj, "n", &result);
+ if (!status.isOK()) {
+ return status;
+ }
+
+ return result;
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
new file mode 100644
index 00000000000..31d3a794d41
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_split_chunk_test.cpp
@@ -0,0 +1,379 @@
+/**
+ * Copyright (C) 2016 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects
+ * for all of the code used other than as permitted herein. If you modify
+ * file(s) with this exception, you may extend this exception to your
+ * version of the file(s), but you are not obligated to do so. If you do not
+ * wish to do so, delete this exception statement from your version. If you
+ * delete this exception statement from all source files in the program,
+ * then also delete it in the license file.
+ */
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/client/read_preference.h"
+#include "mongo/db/namespace_string.h"
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+#include "mongo/s/catalog/type_chunk.h"
+#include "mongo/s/config_server_test_fixture.h"
+
+namespace mongo {
+namespace {
+
+using SplitChunkTest = ConfigServerTestFixture;
+
+const NamespaceString kNamespace("TestDB", "TestColl");
+
+TEST_F(SplitChunkTest, SplitExistingChunkCorrectlyShouldSucceed) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ auto chunkSplitPoint = BSON("a" << 5);
+ std::vector<BSONObj> splitPoints{chunkSplitPoint};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000"));
+
+ // First chunkDoc should have range [chunkMin, chunkSplitPoint]
+ auto chunkDocStatus = getChunkDoc(operationContext(), chunkMin);
+ ASSERT_OK(chunkDocStatus.getStatus());
+
+ auto chunkDoc = chunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkSplitPoint, chunkDoc.getMax());
+
+ // Check for increment on first chunkDoc's minor version
+ ASSERT_EQ(origVersion.majorVersion(), chunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 1, chunkDoc.getVersion().minorVersion());
+
+ // Second chunkDoc should have range [chunkSplitPoint, chunkMax]
+ auto otherChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint);
+ ASSERT_OK(otherChunkDocStatus.getStatus());
+
+ auto otherChunkDoc = otherChunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkMax, otherChunkDoc.getMax());
+
+ // Check for increment on second chunkDoc's minor version
+ ASSERT_EQ(origVersion.majorVersion(), otherChunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 2, otherChunkDoc.getVersion().minorVersion());
+}
+
+TEST_F(SplitChunkTest, MultipleSplitsOnExistingChunkShouldSucceed) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ auto chunkSplitPoint = BSON("a" << 5);
+ auto chunkSplitPoint2 = BSON("a" << 7);
+ std::vector<BSONObj> splitPoints{chunkSplitPoint, chunkSplitPoint2};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000"));
+
+ // First chunkDoc should have range [chunkMin, chunkSplitPoint]
+ auto chunkDocStatus = getChunkDoc(operationContext(), chunkMin);
+ ASSERT_OK(chunkDocStatus.getStatus());
+
+ auto chunkDoc = chunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkSplitPoint, chunkDoc.getMax());
+
+ // Check for increment on first chunkDoc's minor version
+ ASSERT_EQ(origVersion.majorVersion(), chunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 1, chunkDoc.getVersion().minorVersion());
+
+ // Second chunkDoc should have range [chunkSplitPoint, chunkSplitPoint2]
+ auto midChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint);
+ ASSERT_OK(midChunkDocStatus.getStatus());
+
+ auto midChunkDoc = midChunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkSplitPoint2, midChunkDoc.getMax());
+
+ // Check for increment on second chunkDoc's minor version
+ ASSERT_EQ(origVersion.majorVersion(), midChunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 2, midChunkDoc.getVersion().minorVersion());
+
+ // Third chunkDoc should have range [chunkSplitPoint2, chunkMax]
+ auto lastChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint2);
+ ASSERT_OK(lastChunkDocStatus.getStatus());
+
+ auto lastChunkDoc = lastChunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkMax, lastChunkDoc.getMax());
+
+ // Check for increment on third chunkDoc's minor version
+ ASSERT_EQ(origVersion.majorVersion(), lastChunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(origVersion.minorVersion() + 3, lastChunkDoc.getVersion().minorVersion());
+}
+
+TEST_F(SplitChunkTest, NewSplitShouldClaimHighestVersion) {
+ ChunkType chunk, chunk2;
+ chunk.setNS(kNamespace);
+ chunk2.setNS(kNamespace);
+ auto collEpoch = OID::gen();
+
+ // set up first chunk
+ auto origVersion = ChunkVersion(1, 2, collEpoch);
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints;
+ auto chunkSplitPoint = BSON("a" << 5);
+ splitPoints.push_back(chunkSplitPoint);
+
+ // set up second chunk (chunk2)
+ auto competingVersion = ChunkVersion(2, 1, collEpoch);
+ chunk2.setVersion(competingVersion);
+ chunk2.setShard(ShardId("shard0000"));
+ chunk2.setMin(BSON("a" << 10));
+ chunk2.setMax(BSON("a" << 20));
+
+ setupChunks({chunk, chunk2}).transitional_ignore();
+
+ ASSERT_OK(ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ collEpoch,
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000"));
+
+ // First chunkDoc should have range [chunkMin, chunkSplitPoint]
+ auto chunkDocStatus = getChunkDoc(operationContext(), chunkMin);
+ ASSERT_OK(chunkDocStatus.getStatus());
+
+ auto chunkDoc = chunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkSplitPoint, chunkDoc.getMax());
+
+ // Check for increment based on the competing chunk version
+ ASSERT_EQ(competingVersion.majorVersion(), chunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(competingVersion.minorVersion() + 1, chunkDoc.getVersion().minorVersion());
+
+ // Second chunkDoc should have range [chunkSplitPoint, chunkMax]
+ auto otherChunkDocStatus = getChunkDoc(operationContext(), chunkSplitPoint);
+ ASSERT_OK(otherChunkDocStatus.getStatus());
+
+ auto otherChunkDoc = otherChunkDocStatus.getValue();
+ ASSERT_BSONOBJ_EQ(chunkMax, otherChunkDoc.getMax());
+
+ // Check for increment based on the competing chunk version
+ ASSERT_EQ(competingVersion.majorVersion(), otherChunkDoc.getVersion().majorVersion());
+ ASSERT_EQ(competingVersion.minorVersion() + 2, otherChunkDoc.getVersion().minorVersion());
+}
+
+TEST_F(SplitChunkTest, PreConditionFailErrors) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints;
+ auto chunkSplitPoint = BSON("a" << 5);
+ splitPoints.push_back(chunkSplitPoint);
+
+ setupChunks({chunk}).transitional_ignore();
+
+ auto splitStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, BSON("a" << 7)),
+ splitPoints,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::BadValue, splitStatus);
+}
+
+TEST_F(SplitChunkTest, NonExisingNamespaceErrors) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints{BSON("a" << 5)};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ auto splitStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.NonExistingColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::IllegalOperation, splitStatus);
+}
+
+TEST_F(SplitChunkTest, NonMatchingEpochsOfChunkAndRequestErrors) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints{BSON("a" << 5)};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ auto splitStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ OID::gen(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::StaleEpoch, splitStatus);
+}
+
+TEST_F(SplitChunkTest, SplitPointsOutOfOrderShouldFail) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints{BSON("a" << 5), BSON("a" << 4)};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ auto splitStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::InvalidOptions, splitStatus);
+}
+
+TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMinShouldFail) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints{BSON("a" << 0), BSON("a" << 5)};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ auto splitStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::InvalidOptions, splitStatus);
+}
+
+TEST_F(SplitChunkTest, SplitPointsOutOfRangeAtMaxShouldFail) {
+ ChunkType chunk;
+ chunk.setNS(kNamespace);
+
+ auto origVersion = ChunkVersion(1, 0, OID::gen());
+ chunk.setVersion(origVersion);
+ chunk.setShard(ShardId("shard0000"));
+
+ auto chunkMin = BSON("a" << 1);
+ auto chunkMax = BSON("a" << 10);
+ chunk.setMin(chunkMin);
+ chunk.setMax(chunkMax);
+
+ std::vector<BSONObj> splitPoints{BSON("a" << 5), BSON("a" << 15)};
+
+ setupChunks({chunk}).transitional_ignore();
+
+ auto splitStatus = ShardingCatalogManager::get(operationContext())
+ ->commitChunkSplit(operationContext(),
+ NamespaceString("TestDB.TestColl"),
+ origVersion.epoch(),
+ ChunkRange(chunkMin, chunkMax),
+ splitPoints,
+ "shard0000");
+ ASSERT_EQ(ErrorCodes::InvalidOptions, splitStatus);
+}
+
+} // namespace
+} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp
new file mode 100644
index 00000000000..37d783a9a02
--- /dev/null
+++ b/src/mongo/db/s/config/sharding_catalog_manager_zone_operations.cpp
@@ -0,0 +1,397 @@
+/**
+ * Copyright (C) 2017 MongoDB Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * As a special exception, the copyright holders give permission to link the
+ * code of portions of this program with the OpenSSL library under certain
+ * conditions as described in each individual source file and distribute
+ * linked combinations including the program with the OpenSSL library. You
+ * must comply with the GNU Affero General Public License in all respects for
+ * all of the code used other than as permitted herein. If you modify file(s)
+ * with this exception, you may extend this exception to your version of the
+ * file(s), but you are not obligated to do so. If you do not wish to do so,
+ * delete this exception statement from your version. If you delete this
+ * exception statement from all source files in the program, then also delete
+ * it in the license file.
+ */
+
+#define MONGO_LOG_DEFAULT_COMPONENT ::mongo::logger::LogComponent::kSharding
+
+#include "mongo/platform/basic.h"
+
+#include "mongo/db/s/config/sharding_catalog_manager.h"
+
+#include "mongo/base/status_with.h"
+#include "mongo/client/read_preference.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/s/balancer/balancer_policy.h"
+#include "mongo/db/write_concern_options.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
+#include "mongo/s/catalog/type_collection.h"
+#include "mongo/s/catalog/type_shard.h"
+#include "mongo/s/catalog/type_tags.h"
+#include "mongo/s/client/shard.h"
+#include "mongo/s/client/shard_registry.h"
+#include "mongo/s/grid.h"
+#include "mongo/s/shard_key_pattern.h"
+#include "mongo/util/log.h"
+
+namespace mongo {
+namespace {
+
+const ReadPreferenceSetting kConfigPrimarySelector(ReadPreference::PrimaryOnly);
+const WriteConcernOptions kNoWaitWriteConcern(1, WriteConcernOptions::SyncMode::UNSET, Seconds(0));
+
+/**
+ * Checks if the given key range for the given namespace conflicts with an existing key range.
+ * Note: range should have the full shard key.
+ * Returns ErrorCodes::RangeOverlapConflict is an overlap is detected.
+ */
+Status checkForOveralappedZonedKeyRange(OperationContext* opCtx,
+ Shard* configServer,
+ const NamespaceString& nss,
+ const ChunkRange& range,
+ const std::string& zoneName,
+ const KeyPattern& shardKeyPattern) {
+ DistributionStatus chunkDist(nss, ShardToChunksMap{});
+
+ auto tagStatus = configServer->exhaustiveFindOnConfig(opCtx,
+ kConfigPrimarySelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ TagsType::ConfigNS,
+ BSON(TagsType::ns(nss.ns())),
+ BSONObj(),
+ 0);
+ if (!tagStatus.isOK()) {
+ return tagStatus.getStatus();
+ }
+
+ const auto& tagDocList = tagStatus.getValue().docs;
+ for (const auto& tagDoc : tagDocList) {
+ auto tagParseStatus = TagsType::fromBSON(tagDoc);
+ if (!tagParseStatus.isOK()) {
+ return tagParseStatus.getStatus();
+ }
+
+ // Always extend ranges to full shard key to be compatible with tags created before
+ // the zone commands were implemented.
+ const auto& parsedTagDoc = tagParseStatus.getValue();
+ auto overlapStatus = chunkDist.addRangeToZone(
+ ZoneRange(shardKeyPattern.extendRangeBound(parsedTagDoc.getMinKey(), false),
+ shardKeyPattern.extendRangeBound(parsedTagDoc.getMaxKey(), false),
+ parsedTagDoc.getTag()));
+ if (!overlapStatus.isOK()) {
+ return overlapStatus;
+ }
+ }
+
+ auto overlapStatus =
+ chunkDist.addRangeToZone(ZoneRange(range.getMin(), range.getMax(), zoneName));
+ if (!overlapStatus.isOK()) {
+ return overlapStatus;
+ }
+
+ return Status::OK();
+}
+
+/**
+ * Returns a new range based on the given range with the full shard key.
+ * Returns:
+ * - ErrorCodes::NamespaceNotSharded if nss is not sharded.
+ * - ErrorCodes::ShardKeyNotFound if range is not compatible (for example, not a prefix of shard
+ * key) with the shard key of nss.
+ */
+StatusWith<ChunkRange> includeFullShardKey(OperationContext* opCtx,
+ Shard* configServer,
+ const NamespaceString& nss,
+ const ChunkRange& range,
+ KeyPattern* shardKeyPatternOut) {
+ auto findCollStatus =
+ configServer->exhaustiveFindOnConfig(opCtx,
+ kConfigPrimarySelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ CollectionType::ConfigNS,
+ BSON(CollectionType::fullNs(nss.ns())),
+ BSONObj(),
+ 1);
+
+ if (!findCollStatus.isOK()) {
+ return findCollStatus.getStatus();
+ }
+
+ const auto& findCollResult = findCollStatus.getValue().docs;
+
+ if (findCollResult.size() < 1) {
+ return {ErrorCodes::NamespaceNotSharded, str::stream() << nss.ns() << " is not sharded"};
+ }
+
+ auto parseStatus = CollectionType::fromBSON(findCollResult.front());
+ if (!parseStatus.isOK()) {
+ return parseStatus.getStatus();
+ }
+
+ auto collDoc = parseStatus.getValue();
+ if (collDoc.getDropped()) {
+ return {ErrorCodes::NamespaceNotSharded, str::stream() << nss.ns() << " is not sharded"};
+ }
+
+ const auto& shardKeyPattern = collDoc.getKeyPattern();
+ const auto& shardKeyBSON = shardKeyPattern.toBSON();
+ *shardKeyPatternOut = shardKeyPattern;
+
+ if (!range.getMin().isFieldNamePrefixOf(shardKeyBSON)) {
+ return {ErrorCodes::ShardKeyNotFound,
+ str::stream() << "min: " << range.getMin() << " is not a prefix of the shard key "
+ << shardKeyBSON
+ << " of ns: "
+ << nss.ns()};
+ }
+
+ if (!range.getMax().isFieldNamePrefixOf(shardKeyBSON)) {
+ return {ErrorCodes::ShardKeyNotFound,
+ str::stream() << "max: " << range.getMax() << " is not a prefix of the shard key "
+ << shardKeyBSON
+ << " of ns: "
+ << nss.ns()};
+ }
+
+ return ChunkRange(shardKeyPattern.extendRangeBound(range.getMin(), false),
+ shardKeyPattern.extendRangeBound(range.getMax(), false));
+}
+
+} // namespace
+
+Status ShardingCatalogManager::addShardToZone(OperationContext* opCtx,
+ const std::string& shardName,
+ const std::string& zoneName) {
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock);
+
+ auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
+ opCtx,
+ ShardType::ConfigNS,
+ BSON(ShardType::name(shardName)),
+ BSON("$addToSet" << BSON(ShardType::tags() << zoneName)),
+ false,
+ kNoWaitWriteConcern);
+
+ if (!updateStatus.isOK()) {
+ return updateStatus.getStatus();
+ }
+
+ if (!updateStatus.getValue()) {
+ return {ErrorCodes::ShardNotFound,
+ str::stream() << "shard " << shardName << " does not exist"};
+ }
+
+ return Status::OK();
+}
+
+Status ShardingCatalogManager::removeShardFromZone(OperationContext* opCtx,
+ const std::string& shardName,
+ const std::string& zoneName) {
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock);
+
+ auto configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+ const NamespaceString shardNS(ShardType::ConfigNS);
+
+ //
+ // Check whether the shard even exist in the first place.
+ //
+
+ auto findShardExistsStatus =
+ configShard->exhaustiveFindOnConfig(opCtx,
+ kConfigPrimarySelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ shardNS,
+ BSON(ShardType::name() << shardName),
+ BSONObj(),
+ 1);
+
+ if (!findShardExistsStatus.isOK()) {
+ return findShardExistsStatus.getStatus();
+ }
+
+ if (findShardExistsStatus.getValue().docs.size() == 0) {
+ return {ErrorCodes::ShardNotFound,
+ str::stream() << "shard " << shardName << " does not exist"};
+ }
+
+ //
+ // Check how many shards belongs to this zone.
+ //
+
+ auto findShardStatus =
+ configShard->exhaustiveFindOnConfig(opCtx,
+ kConfigPrimarySelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ shardNS,
+ BSON(ShardType::tags() << zoneName),
+ BSONObj(),
+ 2);
+
+ if (!findShardStatus.isOK()) {
+ return findShardStatus.getStatus();
+ }
+
+ const auto shardDocs = findShardStatus.getValue().docs;
+
+ if (shardDocs.size() == 0) {
+ // The zone doesn't exists, this could be a retry.
+ return Status::OK();
+ }
+
+ if (shardDocs.size() == 1) {
+ auto shardDocStatus = ShardType::fromBSON(shardDocs.front());
+ if (!shardDocStatus.isOK()) {
+ return shardDocStatus.getStatus();
+ }
+
+ auto shardDoc = shardDocStatus.getValue();
+ if (shardDoc.getName() != shardName) {
+ // The last shard that belongs to this zone is a different shard.
+ // This could be a retry, so return OK.
+ return Status::OK();
+ }
+
+ auto findChunkRangeStatus =
+ configShard->exhaustiveFindOnConfig(opCtx,
+ kConfigPrimarySelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ TagsType::ConfigNS,
+ BSON(TagsType::tag() << zoneName),
+ BSONObj(),
+ 1);
+
+ if (!findChunkRangeStatus.isOK()) {
+ return findChunkRangeStatus.getStatus();
+ }
+
+ if (findChunkRangeStatus.getValue().docs.size() > 0) {
+ return {ErrorCodes::ZoneStillInUse,
+ "cannot remove a shard from zone if a chunk range is associated with it"};
+ }
+ }
+
+ //
+ // Perform update.
+ //
+
+ auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
+ opCtx,
+ ShardType::ConfigNS,
+ BSON(ShardType::name(shardName)),
+ BSON("$pull" << BSON(ShardType::tags() << zoneName)),
+ false,
+ kNoWaitWriteConcern);
+
+ if (!updateStatus.isOK()) {
+ return updateStatus.getStatus();
+ }
+
+ // The update did not match a document, another thread could have removed it.
+ if (!updateStatus.getValue()) {
+ return {ErrorCodes::ShardNotFound,
+ str::stream() << "shard " << shardName << " no longer exist"};
+ }
+
+ return Status::OK();
+}
+
+
+Status ShardingCatalogManager::assignKeyRangeToZone(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ChunkRange& givenRange,
+ const std::string& zoneName) {
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock);
+
+ auto configServer = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+
+ KeyPattern shardKeyPattern{BSONObj()};
+ auto fullShardKeyStatus =
+ includeFullShardKey(opCtx, configServer.get(), nss, givenRange, &shardKeyPattern);
+ if (!fullShardKeyStatus.isOK()) {
+ return fullShardKeyStatus.getStatus();
+ }
+
+ const auto& fullShardKeyRange = fullShardKeyStatus.getValue();
+
+ auto zoneExistStatus =
+ configServer->exhaustiveFindOnConfig(opCtx,
+ kConfigPrimarySelector,
+ repl::ReadConcernLevel::kLocalReadConcern,
+ ShardType::ConfigNS,
+ BSON(ShardType::tags() << zoneName),
+ BSONObj(),
+ 1);
+
+ if (!zoneExistStatus.isOK()) {
+ return zoneExistStatus.getStatus();
+ }
+
+ auto zoneExist = zoneExistStatus.getValue().docs.size() > 0;
+ if (!zoneExist) {
+ return {ErrorCodes::ZoneNotFound,
+ str::stream() << "zone " << zoneName << " does not exist"};
+ }
+
+ auto overlapStatus = checkForOveralappedZonedKeyRange(
+ opCtx, configServer.get(), nss, fullShardKeyRange, zoneName, shardKeyPattern);
+ if (!overlapStatus.isOK()) {
+ return overlapStatus;
+ }
+
+ BSONObj updateQuery(
+ BSON("_id" << BSON(TagsType::ns(nss.ns()) << TagsType::min(fullShardKeyRange.getMin()))));
+
+ BSONObjBuilder updateBuilder;
+ updateBuilder.append("_id",
+ BSON(TagsType::ns(nss.ns()) << TagsType::min(fullShardKeyRange.getMin())));
+ updateBuilder.append(TagsType::ns(), nss.ns());
+ updateBuilder.append(TagsType::min(), fullShardKeyRange.getMin());
+ updateBuilder.append(TagsType::max(), fullShardKeyRange.getMax());
+ updateBuilder.append(TagsType::tag(), zoneName);
+
+ auto updateStatus = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
+ opCtx, TagsType::ConfigNS, updateQuery, updateBuilder.obj(), true, kNoWaitWriteConcern);
+
+ if (!updateStatus.isOK()) {
+ return updateStatus.getStatus();
+ }
+
+ return Status::OK();
+}
+
+Status ShardingCatalogManager::removeKeyRangeFromZone(OperationContext* opCtx,
+ const NamespaceString& nss,
+ const ChunkRange& range) {
+ Lock::ExclusiveLock lk(opCtx->lockState(), _kZoneOpLock);
+
+ auto configServer = Grid::get(opCtx)->shardRegistry()->getConfigShard();
+
+ KeyPattern shardKeyPattern{BSONObj()};
+ auto fullShardKeyStatus =
+ includeFullShardKey(opCtx, configServer.get(), nss, range, &shardKeyPattern);
+ if (!fullShardKeyStatus.isOK()) {
+ return fullShardKeyStatus.getStatus();
+ }
+
+ BSONObjBuilder removeBuilder;
+ removeBuilder.append("_id", BSON(TagsType::ns(nss.ns()) << TagsType::min(range.getMin())));
+ removeBuilder.append(TagsType::max(), range.getMax());
+
+ return Grid::get(opCtx)->catalogClient()->removeConfigDocuments(
+ opCtx, TagsType::ConfigNS, removeBuilder.obj(), kNoWaitWriteConcern);
+}
+
+} // namespace mongo
diff --git a/src/mongo/db/s/sharding_initialization_mongod.cpp b/src/mongo/db/s/sharding_initialization_mongod.cpp
index bccdebf8f21..6f93f3498d6 100644
--- a/src/mongo/db/s/sharding_initialization_mongod.cpp
+++ b/src/mongo/db/s/sharding_initialization_mongod.cpp
@@ -45,7 +45,6 @@
#include "mongo/db/server_options.h"
#include "mongo/executor/task_executor.h"
#include "mongo/rpc/metadata/egress_metadata_hook_list.h"
-#include "mongo/s/catalog/sharding_catalog_manager.h"
#include "mongo/s/catalog_cache.h"
#include "mongo/s/catalog_cache_loader.h"
#include "mongo/s/client/shard_factory.h"