summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJack Mulrow <jack.mulrow@mongodb.com>2023-01-30 22:41:48 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2023-02-10 21:44:18 +0000
commitaf8143514cbc698fe459d89d5f51446eb79051c1 (patch)
tree40eacc82105022a234793acc770cae94deaa7bcb
parenteb74202c608550490a603fa31ad8a46f2a92932f (diff)
downloadmongo-af8143514cbc698fe459d89d5f51446eb79051c1.tar.gz
SERVER-72489 Config server always has shardIdentity and uses ShardServerCatalogCacheLoader
-rw-r--r--jstests/noPassthrough/catalog_shard.js63
-rw-r--r--jstests/noPassthrough/cluster_commands_require_cluster_node.js12
-rw-r--r--jstests/noPassthrough/dedicated_to_catalog_shard.js18
-rw-r--r--jstests/sharding/clone_catalog_data.js24
-rw-r--r--jstests/sharding/printShardingStatus.js7
-rw-r--r--jstests/sharding/refine_collection_shard_key_basic.js2
-rw-r--r--src/mongo/db/pipeline/sharded_agg_helpers.cpp7
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp24
-rw-r--r--src/mongo/db/s/add_shard_cmd.cpp6
-rw-r--r--src/mongo/db/s/add_shard_util.cpp5
-rw-r--r--src/mongo/db/s/add_shard_util.h4
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager.h6
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp14
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp45
-rw-r--r--src/mongo/db/s/shard_server_catalog_cache_loader.cpp32
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator.cpp2
-rw-r--r--src/mongo/db/s/sharding_ddl_coordinator_service.cpp2
-rw-r--r--src/mongo/db/s/sharding_initialization_mongod.cpp19
-rw-r--r--src/mongo/db/shard_id.cpp3
-rw-r--r--src/mongo/db/shard_id.h1
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_impl.cpp6
-rw-r--r--src/mongo/s/client/shard_registry.cpp5
-rw-r--r--src/mongo/s/config_server_catalog_cache_loader.cpp18
-rw-r--r--src/mongo/shell/shardingtest.js12
24 files changed, 257 insertions, 80 deletions
diff --git a/jstests/noPassthrough/catalog_shard.js b/jstests/noPassthrough/catalog_shard.js
index 3dcc609b6bc..b784070287c 100644
--- a/jstests/noPassthrough/catalog_shard.js
+++ b/jstests/noPassthrough/catalog_shard.js
@@ -11,6 +11,7 @@
load("jstests/libs/fail_point_util.js");
+const kCatalogShardId = "catalogShard";
const dbName = "foo";
const collName = "bar";
const ns = dbName + "." + collName;
@@ -22,6 +23,13 @@ function basicCRUD(conn) {
assert.eq(conn.getCollection(ns).find({x: 1}).toArray().length, 0);
}
+function flushRoutingAndDBCacheUpdates(conn) {
+ assert.commandWorked(conn.adminCommand({_flushRoutingTableCacheUpdates: ns}));
+ assert.commandWorked(conn.adminCommand({_flushDatabaseCacheUpdates: dbName}));
+ assert.commandWorked(conn.adminCommand({_flushRoutingTableCacheUpdates: "does.not.exist"}));
+ assert.commandWorked(conn.adminCommand({_flushDatabaseCacheUpdates: "notRealDB"}));
+}
+
const st = new ShardingTest({
shards: 1,
config: 3,
@@ -45,6 +53,9 @@ const configShardName = st.shard0.shardName;
assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {skey: 1}}));
basicCRUD(st.s);
+
+ // Flushing routing / db cache updates works.
+ flushRoutingAndDBCacheUpdates(st.configRS.getPrimary());
}
// Add a shard to move chunks to and from it in later tests.
@@ -130,6 +141,58 @@ const newShardName =
moveChunkThread.join();
}
+{
+ //
+ // Remove the catalog shard.
+ //
+
+ let removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: kCatalogShardId}));
+ assert.eq("started", removeRes.state);
+
+ // The removal won't complete until all chunks and dbs are moved off the catalog shard.
+ removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: kCatalogShardId}));
+ assert.eq("ongoing", removeRes.state);
+
+ assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {skey: -1}, to: newShardName}));
+ assert.commandWorked(
+ st.s.adminCommand({moveChunk: "config.system.sessions", find: {_id: 0}, to: newShardName}));
+
+ // Still blocked until the db has been moved away.
+ removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: kCatalogShardId}));
+ assert.eq("ongoing", removeRes.state);
+
+ assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: newShardName}));
+
+ removeRes = assert.commandWorked(st.s0.adminCommand({removeShard: kCatalogShardId}));
+ assert.eq("completed", removeRes.state);
+
+ // Basic CRUD and sharded DDL work.
+ basicCRUD(st.s);
+ assert.commandWorked(st.s.adminCommand({split: ns, middle: {skey: 220}}));
+ basicCRUD(st.s);
+
+ // Flushing routing / db cache updates works.
+ flushRoutingAndDBCacheUpdates(st.configRS.getPrimary());
+}
+
+{
+ //
+ // Add back the catalog shard.
+ //
+
+ // movePrimary won't delete from the source, so drop the moved db directly to avoid a conflict
+ // in addShard.
+ assert.commandWorked(st.configRS.getPrimary().getDB(dbName).dropDatabase());
+ assert.commandWorked(
+ st.s.adminCommand({addShard: st.configRS.getURL(), name: kCatalogShardId}));
+
+ // Basic CRUD and sharded DDL work.
+ basicCRUD(st.s);
+ assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {skey: 0}, to: configShardName}));
+ assert.commandWorked(st.s.adminCommand({split: ns, middle: {skey: 5}}));
+ basicCRUD(st.s);
+}
+
st.stop();
newShardRS.stopSet();
}());
diff --git a/jstests/noPassthrough/cluster_commands_require_cluster_node.js b/jstests/noPassthrough/cluster_commands_require_cluster_node.js
index 8a8b6616bfc..6366db4538e 100644
--- a/jstests/noPassthrough/cluster_commands_require_cluster_node.js
+++ b/jstests/noPassthrough/cluster_commands_require_cluster_node.js
@@ -101,9 +101,15 @@ function runTestCaseExpectSuccess(conn, testCase) {
const isCatalogShardEnabled = CatalogShardUtil.isEnabledIgnoringFCV(st);
for (let testCase of clusterCommandsCases) {
- const expectedErrCode = isCatalogShardEnabled ? ErrorCodes.ShardingStateNotInitialized
- : ErrorCodes.NoShardingEnabled;
- runTestCaseExpectFail(st.configRS.getPrimary(), testCase, expectedErrCode);
+ if (isCatalogShardEnabled) {
+ if (testCase.expectedErr) {
+ runTestCaseExpectFail(st.rs0.getPrimary(), testCase, testCase.expectedErr);
+ } else {
+ runTestCaseExpectSuccess(st.rs0.getPrimary(), testCase);
+ }
+ } else {
+ runTestCaseExpectFail(st.configRS.getPrimary(), testCase, ErrorCodes.NoShardingEnabled);
+ }
}
//
diff --git a/jstests/noPassthrough/dedicated_to_catalog_shard.js b/jstests/noPassthrough/dedicated_to_catalog_shard.js
index 166c2df9d81..d04aa50e854 100644
--- a/jstests/noPassthrough/dedicated_to_catalog_shard.js
+++ b/jstests/noPassthrough/dedicated_to_catalog_shard.js
@@ -13,6 +13,13 @@ const dbName = "foo";
const collName = "bar";
const ns = dbName + "." + collName;
+function flushRoutingAndDBCacheUpdates(conn) {
+ assert.commandWorked(conn.adminCommand({_flushRoutingTableCacheUpdates: ns}));
+ assert.commandWorked(conn.adminCommand({_flushDatabaseCacheUpdates: dbName}));
+ assert.commandWorked(conn.adminCommand({_flushRoutingTableCacheUpdates: "does.not.exist"}));
+ assert.commandWorked(conn.adminCommand({_flushDatabaseCacheUpdates: "notRealDB"}));
+}
+
const st = new ShardingTest({shards: 0, config: 3});
const configCS = st.configRS.getURL();
@@ -21,9 +28,8 @@ const configCS = st.configRS.getURL();
// Dedicated config server mode tests (pre addShard).
//
{
- // Can't create user namespaces.
- assert.commandFailedWithCode(st.configRS.getPrimary().getCollection(ns).insert({_id: 1, x: 1}),
- ErrorCodes.InvalidNamespace);
+ // Can create user namespaces via direct writes.
+ assert.commandWorked(st.configRS.getPrimary().getCollection(ns).insert({_id: 1, x: 1}));
// Failover works.
st.configRS.stepUp(st.configRS.getSecondary());
@@ -34,6 +40,9 @@ const configCS = st.configRS.getURL();
st.configRS.restart(node, undefined, undefined, false /* wait */);
});
st.configRS.getPrimary(); // Waits for a stable primary.
+
+ // Flushing routing / db cache updates works.
+ flushRoutingAndDBCacheUpdates(st.configRS.getPrimary());
}
//
@@ -48,6 +57,9 @@ const configCS = st.configRS.getURL();
// More than once works.
assert.commandWorked(st.s.adminCommand({addShard: configCS}));
assert.commandWorked(st.s.adminCommand({addShard: configCS}));
+
+ // Flushing routing / db cache updates works.
+ flushRoutingAndDBCacheUpdates(st.configRS.getPrimary());
}
// Refresh the logical session cache now that we have a shard to create the sessions collection to
diff --git a/jstests/sharding/clone_catalog_data.js b/jstests/sharding/clone_catalog_data.js
index d87e2dd1e11..5053b0df92c 100644
--- a/jstests/sharding/clone_catalog_data.js
+++ b/jstests/sharding/clone_catalog_data.js
@@ -160,14 +160,22 @@
const isCatalogShardEnabled = CatalogShardUtil.isEnabledIgnoringFCV(st);
- // Check that the command fails when attempting to run on the config server.
- assert.commandFailedWithCode(st.configRS.getPrimary().adminCommand({
- _shardsvrCloneCatalogData: 'test',
- from: fromShard.host,
- writeConcern: {w: "majority"}
- }),
- isCatalogShardEnabled ? ErrorCodes.ShardingStateNotInitialized
- : ErrorCodes.NoShardingEnabled);
+ // Check that the command fails when attempting to run on a config server that doesn't support
+ // catalog shard mode.
+ if (isCatalogShardEnabled) {
+ assert.commandWorked(st.configRS.getPrimary().adminCommand({
+ _shardsvrCloneCatalogData: 'test',
+ from: fromShard.host,
+ writeConcern: {w: "majority"}
+ }));
+ } else {
+ assert.commandFailedWithCode(st.configRS.getPrimary().adminCommand({
+ _shardsvrCloneCatalogData: 'test',
+ from: fromShard.host,
+ writeConcern: {w: "majority"}
+ }),
+ ErrorCodes.NoShardingEnabled);
+ }
// Check that the command fails when failing to specify a source.
assert.commandFailedWithCode(
diff --git a/jstests/sharding/printShardingStatus.js b/jstests/sharding/printShardingStatus.js
index ee308fea5bb..951556fbfdd 100644
--- a/jstests/sharding/printShardingStatus.js
+++ b/jstests/sharding/printShardingStatus.js
@@ -99,7 +99,12 @@ var config = mongos.getDB("config");
var configCopy = standalone.getDB("configCopy");
config.getCollectionInfos().forEach(function(c) {
// It's illegal to copy the system collections.
- if (["system.indexBuilds", "system.preimages", "system.change_collection"].includes(c.name)) {
+ if ([
+ "system.indexBuilds",
+ "system.preimages",
+ "system.change_collection",
+ "cache.chunks.config.system.sessions"
+ ].includes(c.name)) {
return;
}
diff --git a/jstests/sharding/refine_collection_shard_key_basic.js b/jstests/sharding/refine_collection_shard_key_basic.js
index cd1ecd1e9f2..35419d3745e 100644
--- a/jstests/sharding/refine_collection_shard_key_basic.js
+++ b/jstests/sharding/refine_collection_shard_key_basic.js
@@ -310,7 +310,7 @@ assert.commandFailedWithCode(
const isCatalogShardEnabled = CatalogShardUtil.isEnabledIgnoringFCV(st);
assert.commandFailedWithCode(
mongos.adminCommand({refineCollectionShardKey: "config.collections", key: {_id: 1, aKey: 1}}),
- isCatalogShardEnabled ? ErrorCodes.ShardingStateNotInitialized : ErrorCodes.NoShardingEnabled);
+ isCatalogShardEnabled ? ErrorCodes.NamespaceNotSharded : ErrorCodes.NoShardingEnabled);
enableShardingAndShardColl({_id: 1});
diff --git a/src/mongo/db/pipeline/sharded_agg_helpers.cpp b/src/mongo/db/pipeline/sharded_agg_helpers.cpp
index dfa0e5452b3..f6c7bb7e6fb 100644
--- a/src/mongo/db/pipeline/sharded_agg_helpers.cpp
+++ b/src/mongo/db/pipeline/sharded_agg_helpers.cpp
@@ -1518,8 +1518,13 @@ StatusWith<CollectionRoutingInfo> getExecutionNsRoutingInfo(OperationContext* op
// a collection before its enclosing database is created. However, if there are no shards
// present, then $changeStream should immediately return an empty cursor just as other
// aggregations do when the database does not exist.
+ //
+ // Note despite config.collections always being unsharded, to support $shardedDataDistribution
+ // we always take the shard targeting path. The collection must only exist on the config server,
+ // so even if there are no shards, the query can still succeed and we shouldn't return
+ // ShardNotFound.
const auto shardIds = Grid::get(opCtx)->shardRegistry()->getAllShardIds(opCtx);
- if (shardIds.empty()) {
+ if (shardIds.empty() && execNss != NamespaceString::kConfigsvrCollectionsNamespace) {
return {ErrorCodes::ShardNotFound, "No shards are present in the cluster"};
}
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index e1949b28ac3..3cc920d06cc 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -43,6 +43,7 @@
#include "mongo/db/catalog/database_holder.h"
#include "mongo/db/catalog/drop_collection.h"
#include "mongo/db/catalog/local_oplog_info.h"
+#include "mongo/db/catalog_shard_feature_flag_gen.h"
#include "mongo/db/change_stream_change_collection_manager.h"
#include "mongo/db/change_stream_pre_images_collection_manager.h"
#include "mongo/db/change_stream_serverless_helpers.h"
@@ -836,11 +837,9 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnStepDownHook() {
if (ShardingState::get(_service)->enabled()) {
ChunkSplitter::get(_service).onStepDown();
PeriodicBalancerConfigRefresher::get(_service).onStepDown();
+ CatalogCacheLoader::get(_service).onStepDown();
if (serverGlobalParams.clusterRole != ClusterRole::ConfigServer) {
- // Config shards don't use a loader that requires this.
- CatalogCacheLoader::get(_service).onStepDown();
-
// Called earlier for config servers.
TransactionCoordinatorService::get(_service)->onStepDown();
}
@@ -945,6 +944,10 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
PeriodicShardedIndexConsistencyChecker::get(_service).onStepUp(_service);
TransactionCoordinatorService::get(_service)->onStepUp(opCtx);
+
+ if (gFeatureFlagCatalogShard.isEnabledAndIgnoreFCV()) {
+ CatalogCacheLoader::get(_service).onStepUp();
+ }
}
if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
if (ShardingState::get(opCtx)->enabled()) {
@@ -962,11 +965,9 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
ChunkSplitter::get(_service).onStepUp();
PeriodicBalancerConfigRefresher::get(_service).onStepUp(_service);
+ CatalogCacheLoader::get(_service).onStepUp();
if (serverGlobalParams.clusterRole != ClusterRole::ConfigServer) {
- // Config shards don't use a loader that requires this.
- CatalogCacheLoader::get(_service).onStepUp();
-
// Called earlier for config servers.
TransactionCoordinatorService::get(_service)->onStepUp(opCtx);
}
@@ -1059,6 +1060,17 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
validator->enableKeyGenerator(opCtx, true);
}
}
+
+ if (gFeatureFlagCatalogShard.isEnabled(serverGlobalParams.featureCompatibility) &&
+ serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
+ !ShardingState::get(opCtx)->enabled()) {
+ // Note this must be called after the config server has created the cluster ID and also
+ // after the onStepUp logic for the shard role because this triggers sharding state
+ // initialization which will transition some components into the "primary" state, like the
+ // TransactionCoordinatorService, and they would fail if the onStepUp logic attempted the
+ // same transition.
+ ShardingCatalogManager::get(opCtx)->installConfigShardIdentityDocument(opCtx);
+ }
}
void ReplicationCoordinatorExternalStateImpl::signalApplierToChooseNewSyncSource() {
diff --git a/src/mongo/db/s/add_shard_cmd.cpp b/src/mongo/db/s/add_shard_cmd.cpp
index 82b4077f619..d2a48cb784a 100644
--- a/src/mongo/db/s/add_shard_cmd.cpp
+++ b/src/mongo/db/s/add_shard_cmd.cpp
@@ -72,15 +72,15 @@ public:
.containsCustomizedGetLastErrorDefaults());
auto addShardCmd = request();
- auto shardIdUpsertCmd =
- add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd);
+ auto shardIdUpsertCmd = add_shard_util::createShardIdentityUpsertForAddShard(
+ addShardCmd, ShardingCatalogClient::kMajorityWriteConcern);
DBDirectClient localClient(opCtx);
BSONObj res;
localClient.runCommand(
DatabaseName(boost::none, NamespaceString::kAdminDb), shardIdUpsertCmd, res);
- uassertStatusOK(getStatusFromCommandResult(res));
+ uassertStatusOK(getStatusFromWriteCommandReply(res));
const auto balancerConfig = Grid::get(opCtx)->getBalancerConfiguration();
invariant(balancerConfig);
diff --git a/src/mongo/db/s/add_shard_util.cpp b/src/mongo/db/s/add_shard_util.cpp
index 36151320805..a2196828731 100644
--- a/src/mongo/db/s/add_shard_util.cpp
+++ b/src/mongo/db/s/add_shard_util.cpp
@@ -59,7 +59,8 @@ AddShard createAddShardCmd(OperationContext* opCtx, const ShardId& shardName) {
return addShardCmd;
}
-BSONObj createShardIdentityUpsertForAddShard(const AddShard& addShardCmd) {
+BSONObj createShardIdentityUpsertForAddShard(const AddShard& addShardCmd,
+ const WriteConcernOptions& wc) {
BatchedCommandRequest request([&] {
write_ops::UpdateCommandRequest updateOp(NamespaceString::kServerConfigurationNamespace);
updateOp.setUpdates({[&] {
@@ -73,7 +74,7 @@ BSONObj createShardIdentityUpsertForAddShard(const AddShard& addShardCmd) {
return updateOp;
}());
- request.setWriteConcern(ShardingCatalogClient::kMajorityWriteConcern.toBSON());
+ request.setWriteConcern(wc.toBSON());
return request.toBSON();
}
diff --git a/src/mongo/db/s/add_shard_util.h b/src/mongo/db/s/add_shard_util.h
index 020831833ba..7ae28d52dba 100644
--- a/src/mongo/db/s/add_shard_util.h
+++ b/src/mongo/db/s/add_shard_util.h
@@ -32,6 +32,7 @@
#include <string>
#include "mongo/base/string_data.h"
+#include "mongo/db/write_concern_options.h"
namespace mongo {
class AddShard;
@@ -58,7 +59,8 @@ AddShard createAddShardCmd(OperationContext* opCtx, const ShardId& shardName);
* doc into the shard with the given shardName (or update the shard's existing shardIdentity
* doc's configsvrConnString if the _id, shardName, and clusterId do not conflict).
*/
-BSONObj createShardIdentityUpsertForAddShard(const AddShard& addShardCmd);
+BSONObj createShardIdentityUpsertForAddShard(const AddShard& addShardCmd,
+ const WriteConcernOptions& wc);
} // namespace add_shard_util
} // namespace mongo
diff --git a/src/mongo/db/s/config/sharding_catalog_manager.h b/src/mongo/db/s/config/sharding_catalog_manager.h
index 3688394efc7..9133b00ed26 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager.h
+++ b/src/mongo/db/s/config/sharding_catalog_manager.h
@@ -483,6 +483,12 @@ public:
const ConnectionString& shardConnectionString);
/**
+ * Inserts the config server shard identity document using a sentinel shard id. Requires the
+ * config server's ShardingState has not already been enabled. Throws on errors.
+ */
+ void installConfigShardIdentityDocument(OperationContext* opCtx);
+
+ /**
* Tries to remove a shard. To completely remove a shard from a sharded cluster,
* the data residing in that shard must be moved to the remaining shards in the
* cluster by "draining" chunks from that shard.
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
index bf758421ab9..490a463b29c 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_add_shard_test.cpp
@@ -50,6 +50,7 @@
#include "mongo/idl/cluster_server_parameter_common.h"
#include "mongo/idl/cluster_server_parameter_gen.h"
#include "mongo/logv2/log.h"
+#include "mongo/s/catalog/sharding_catalog_client.h"
#include "mongo/s/catalog/type_changelog.h"
#include "mongo/s/catalog/type_config_version.h"
#include "mongo/s/catalog/type_database_gen.h"
@@ -370,7 +371,8 @@ protected:
using namespace add_shard_util;
// Create the expected upsert shardIdentity command for this shardType.
auto upsertCmdObj = createShardIdentityUpsertForAddShard(
- createAddShardCmd(operationContext(), expectedShardName));
+ createAddShardCmd(operationContext(), expectedShardName),
+ ShardingCatalogClient::kMajorityWriteConcern);
const auto opMsgRequest =
OpMsgRequest::fromDBAndBody(NamespaceString::kAdminDb, upsertCmdObj);
@@ -385,7 +387,8 @@ protected:
using namespace add_shard_util;
// Create the expected upsert shardIdentity command for this shardType.
auto upsertCmdObj = createShardIdentityUpsertForAddShard(
- createAddShardCmd(operationContext(), expectedShardName));
+ createAddShardCmd(operationContext(), expectedShardName),
+ ShardingCatalogClient::kMajorityWriteConcern);
const auto opMsgRequest =
OpMsgRequest::fromDBAndBody(NamespaceString::kAdminDb, upsertCmdObj);
@@ -414,8 +417,8 @@ protected:
auto addShardCmd =
AddShard::parse(IDLParserContext(AddShard::kCommandName), addShardOpMsgRequest);
- const auto& updateOpField =
- add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd);
+ const auto& updateOpField = add_shard_util::createShardIdentityUpsertForAddShard(
+ addShardCmd, ShardingCatalogClient::kMajorityWriteConcern);
const auto updateOpMsgRequest =
OpMsgRequest::fromDBAndBody(request.dbname, updateOpField);
@@ -566,7 +569,8 @@ TEST_F(AddShardTest, CreateShardIdentityUpsertForAddShard) {
<< "majority"
<< "wtimeout" << 60000));
auto addShardCmd = add_shard_util::createAddShardCmd(operationContext(), shardName);
- auto actualBSON = add_shard_util::createShardIdentityUpsertForAddShard(addShardCmd);
+ auto actualBSON = add_shard_util::createShardIdentityUpsertForAddShard(
+ addShardCmd, ShardingCatalogClient::kMajorityWriteConcern);
ASSERT_BSONOBJ_EQ(expectedBSON, actualBSON);
}
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
index 216eab707cd..99cd2dfe0f7 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -63,6 +63,7 @@
#include "mongo/db/s/add_shard_cmd_gen.h"
#include "mongo/db/s/add_shard_util.h"
#include "mongo/db/s/sharding_logging.h"
+#include "mongo/db/s/sharding_state.h"
#include "mongo/db/s/type_shard_identity.h"
#include "mongo/db/s/user_writes_critical_section_document_gen.h"
#include "mongo/db/s/user_writes_recoverable_critical_section_service.h"
@@ -604,6 +605,26 @@ StatusWith<std::vector<std::string>> ShardingCatalogManager::_getDBNamesListFrom
return dbNames;
}
+void ShardingCatalogManager::installConfigShardIdentityDocument(OperationContext* opCtx) {
+ invariant(!ShardingState::get(opCtx)->enabled());
+
+ // Insert a shard identity document. Note we insert with local write concern, so the shard
+ // identity may roll back, which will trigger an fassert to clear the in-memory sharding state.
+ {
+ auto addShardCmd = add_shard_util::createAddShardCmd(opCtx, ShardId::kCatalogShardId);
+
+ auto shardIdUpsertCmd = add_shard_util::createShardIdentityUpsertForAddShard(
+ addShardCmd, ShardingCatalogClient::kLocalWriteConcern);
+ DBDirectClient localClient(opCtx);
+ BSONObj res;
+
+ localClient.runCommand(
+ DatabaseName(boost::none, NamespaceString::kAdminDb), shardIdUpsertCmd, res);
+
+ uassertStatusOK(getStatusFromWriteCommandReply(res));
+ }
+}
+
StatusWith<std::string> ShardingCatalogManager::addShard(
OperationContext* opCtx,
const std::string* shardProposedName,
@@ -708,13 +729,18 @@ StatusWith<std::string> ShardingCatalogManager::addShard(
return Shard::CommandResponse::processBatchWriteResponse(commandResponse, &batchResponse);
};
- AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName());
-
- // Use the _addShard command to add the shard, which in turn inserts a shardIdentity document
- // into the shard and triggers sharding state initialization.
- auto addShardStatus = runCmdOnNewShard(addShardCmd.toBSON({}));
- if (!addShardStatus.isOK()) {
- return addShardStatus;
+ // The catalog shard will already have a ShardIdentity document on the config server, so skip
+ // adding it here.
+ if (!gFeatureFlagCatalogShard.isEnabled(serverGlobalParams.featureCompatibility) ||
+ shardType.getName() != ShardId::kCatalogShardId) {
+ AddShard addShardCmd = add_shard_util::createAddShardCmd(opCtx, shardType.getName());
+
+ // Use the _addShard command to add the shard, which in turn inserts a shardIdentity
+ // document into the shard and triggers sharding state initialization.
+ auto addShardStatus = runCmdOnNewShard(addShardCmd.toBSON({}));
+ if (!addShardStatus.isOK()) {
+ return addShardStatus;
+ }
}
// Set the user-writes blocking state on the new shard.
@@ -974,7 +1000,10 @@ RemoveShardProgress ShardingCatalogManager::removeShard(OperationContext* opCtx,
// set monitor is removed, otherwise the shard would be referencing a dropped RSM.
Grid::get(opCtx)->shardRegistry()->reload(opCtx);
- ReplicaSetMonitor::remove(name);
+ if (shardId != ShardId::kCatalogShardId) {
+ // Don't remove the catalog shard's RSM because it is used to target the config server.
+ ReplicaSetMonitor::remove(name);
+ }
// Record finish in changelog
ShardingLogging::get(opCtx)->logChange(
diff --git a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
index 6264ee66cde..fee6e9cbb46 100644
--- a/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
+++ b/src/mongo/db/s/shard_server_catalog_cache_loader.cpp
@@ -298,17 +298,24 @@ StatusWith<CollectionAndChangedChunks> getIncompletePersistedMetadataSinceVersio
}
}
+ShardId getSelfShardId(OperationContext* opCtx) {
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
+ return ShardId::kConfigServerId;
+ }
+
+ auto const shardingState = ShardingState::get(opCtx);
+ invariant(shardingState->enabled());
+ return shardingState->shardId();
+}
+
/**
* Sends _flushRoutingTableCacheUpdates to the primary to force it to refresh its routing table for
* collection 'nss' and then waits for the refresh to replicate to this node.
*/
void forcePrimaryCollectionRefreshAndWaitForReplication(OperationContext* opCtx,
const NamespaceString& nss) {
- auto const shardingState = ShardingState::get(opCtx);
- invariant(shardingState->enabled());
-
- auto selfShard = uassertStatusOK(
- Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardingState->shardId()));
+ auto selfShard =
+ uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, getSelfShardId(opCtx)));
auto cmdResponse = uassertStatusOK(selfShard->runCommandWithFixedRetryAttempts(
opCtx,
@@ -329,11 +336,8 @@ void forcePrimaryCollectionRefreshAndWaitForReplication(OperationContext* opCtx,
* database 'dbName' and then waits for the refresh to replicate to this node.
*/
void forcePrimaryDatabaseRefreshAndWaitForReplication(OperationContext* opCtx, StringData dbName) {
- auto const shardingState = ShardingState::get(opCtx);
- invariant(shardingState->enabled());
-
- auto selfShard = uassertStatusOK(
- Grid::get(opCtx)->shardRegistry()->getShard(opCtx, shardingState->shardId()));
+ auto selfShard =
+ uassertStatusOK(Grid::get(opCtx)->shardRegistry()->getShard(opCtx, getSelfShardId(opCtx)));
auto cmdResponse = uassertStatusOK(selfShard->runCommandWithFixedRetryAttempts(
opCtx,
@@ -424,6 +428,14 @@ void ShardServerCatalogCacheLoader::shutDown() {
SemiFuture<CollectionAndChangedChunks> ShardServerCatalogCacheLoader::getChunksSince(
const NamespaceString& nss, ChunkVersion version) {
+ // There's no need to refresh if a collection is always unsharded. Further, attempting to refesh
+ // config.collections or config.chunks would trigger recursive refreshes, and, if this is
+ // running on a config server secondary, the refresh would not succeed if the primary is
+ // unavailable, unnecessarily reducing availability.
+ if (nss.isNamespaceAlwaysUnsharded()) {
+ return Status(ErrorCodes::NamespaceNotFound,
+ str::stream() << "Collection " << nss.ns() << " not found");
+ }
bool isPrimary;
long long term;
diff --git a/src/mongo/db/s/sharding_ddl_coordinator.cpp b/src/mongo/db/s/sharding_ddl_coordinator.cpp
index 4d53f6e7984..62b1e4eef44 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator.cpp
@@ -266,7 +266,7 @@ SemiFuture<void> ShardingDDLCoordinator::run(std::shared_ptr<executor::ScopedTas
return _acquireLockAsync(executor, token, originalNss().db());
})
.then([this, executor, token, anchor = shared_from_this()] {
- if (!originalNss().isConfigDB() && !_recoveredFromDisk) {
+ if (!originalNss().isConfigDB() && !originalNss().isAdminDB() && !_recoveredFromDisk) {
auto opCtxHolder = cc().makeOperationContext();
auto* opCtx = opCtxHolder.get();
invariant(metadata().getDatabaseVersion());
diff --git a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
index fa8c1b8e66e..77799dbd322 100644
--- a/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
+++ b/src/mongo/db/s/sharding_ddl_coordinator_service.cpp
@@ -263,7 +263,7 @@ ShardingDDLCoordinatorService::getOrCreateInstance(OperationContext* opCtx, BSON
auto coorMetadata = extractShardingDDLCoordinatorMetadata(coorDoc);
const auto& nss = coorMetadata.getId().getNss();
- if (!nss.isConfigDB()) {
+ if (!nss.isConfigDB() && !nss.isAdminDB()) {
// Check that the operation context has a database version for this namespace
const auto clientDbVersion = OperationShardingState::get(opCtx).getDbVersion(nss.db());
uassert(ErrorCodes::IllegalOperation,
diff --git a/src/mongo/db/s/sharding_initialization_mongod.cpp b/src/mongo/db/s/sharding_initialization_mongod.cpp
index 2e7f0dac299..2c2121bb0db 100644
--- a/src/mongo/db/s/sharding_initialization_mongod.cpp
+++ b/src/mongo/db/s/sharding_initialization_mongod.cpp
@@ -41,6 +41,7 @@
#include "mongo/db/catalog_shard_feature_flag_gen.h"
#include "mongo/db/client_metadata_propagation_egress_hook.h"
#include "mongo/db/concurrency/d_concurrency.h"
+#include "mongo/db/concurrency/replication_state_transition_lock_guard.h"
#include "mongo/db/dbhelpers.h"
#include "mongo/db/keys_collection_client_direct.h"
#include "mongo/db/keys_collection_client_sharded.h"
@@ -557,7 +558,23 @@ void initializeGlobalShardingStateForConfigServerIfNeeded(OperationContext* opCt
return {ConnectionString::forLocal()};
}();
- CatalogCacheLoader::set(service, std::make_unique<ConfigServerCatalogCacheLoader>());
+ if (gFeatureFlagCatalogShard.isEnabledAndIgnoreFCV()) {
+ CatalogCacheLoader::set(service,
+ std::make_unique<ShardServerCatalogCacheLoader>(
+ std::make_unique<ConfigServerCatalogCacheLoader>()));
+
+ // This is only called in startup when there shouldn't be replication state changes, but to
+ // be safe we take the RSTL anyway.
+ repl::ReplicationStateTransitionLockGuard rstl(opCtx, MODE_IX);
+ const auto replCoord = repl::ReplicationCoordinator::get(opCtx);
+ bool isReplSet =
+ replCoord->getReplicationMode() == repl::ReplicationCoordinator::modeReplSet;
+ bool isStandaloneOrPrimary =
+ !isReplSet || (replCoord->getMemberState() == repl::MemberState::RS_PRIMARY);
+ CatalogCacheLoader::get(opCtx).initializeReplicaSetRole(isStandaloneOrPrimary);
+ } else {
+ CatalogCacheLoader::set(service, std::make_unique<ConfigServerCatalogCacheLoader>());
+ }
initializeGlobalShardingStateForMongoD(opCtx, configCS);
diff --git a/src/mongo/db/shard_id.cpp b/src/mongo/db/shard_id.cpp
index 39d66aa56a7..445abe37baa 100644
--- a/src/mongo/db/shard_id.cpp
+++ b/src/mongo/db/shard_id.cpp
@@ -35,6 +35,9 @@ namespace mongo {
const ShardId ShardId::kConfigServerId("config");
+// TODO SERVER-XXXXX: Use "config" shard id instead.
+const ShardId ShardId::kCatalogShardId("catalogShard");
+
bool ShardId::isValid() const {
return !_shardId.empty();
}
diff --git a/src/mongo/db/shard_id.h b/src/mongo/db/shard_id.h
index 798f8e427a1..b99469d1459 100644
--- a/src/mongo/db/shard_id.h
+++ b/src/mongo/db/shard_id.h
@@ -43,6 +43,7 @@ namespace mongo {
class ShardId {
public:
static const ShardId kConfigServerId;
+ static const ShardId kCatalogShardId;
ShardId(std::string shardId) : _shardId(std::move(shardId)) {}
diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
index b2a052fee22..c2c1a0c6817 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
@@ -732,12 +732,6 @@ std::pair<CollectionType, std::vector<ChunkType>> ShardingCatalogClientImpl::get
const NamespaceString& nss,
const ChunkVersion& sinceVersion,
const repl::ReadConcernArgs& readConcern) {
- // The config.collections collection is always unsharded. Attempting to run the aggregation
- // pipeline on it will trigger recursive refreshes, so return NamespaceNotFound right away.
- uassert(ErrorCodes::NamespaceNotFound,
- str::stream() << "Collection " << nss.ns() << " not found",
- nss != NamespaceString::kConfigsvrCollectionsNamespace);
-
auto aggRequest = makeCollectionAndChunksAggregation(opCtx, nss, sinceVersion);
std::vector<BSONObj> aggResult =
diff --git a/src/mongo/s/client/shard_registry.cpp b/src/mongo/s/client/shard_registry.cpp
index c9b58cf8997..77e02f641c6 100644
--- a/src/mongo/s/client/shard_registry.cpp
+++ b/src/mongo/s/client/shard_registry.cpp
@@ -188,7 +188,10 @@ ShardRegistry::Cache::LookupResult ShardRegistry::_lookup(OperationContext* opCt
invariant(shard);
auto name = shard->getConnString().getSetName();
- ReplicaSetMonitor::remove(name);
+ if (shardId != ShardId::kCatalogShardId) {
+ // Don't remove the catalog shard's RSM because it is used to target the config server.
+ ReplicaSetMonitor::remove(name);
+ }
_removeReplicaSet(name);
for (auto& callback : _shardRemovalHooks) {
// Run callbacks asynchronously.
diff --git a/src/mongo/s/config_server_catalog_cache_loader.cpp b/src/mongo/s/config_server_catalog_cache_loader.cpp
index 8b3736e79c5..899b7299132 100644
--- a/src/mongo/s/config_server_catalog_cache_loader.cpp
+++ b/src/mongo/s/config_server_catalog_cache_loader.cpp
@@ -111,29 +111,17 @@ void ConfigServerCatalogCacheLoader::shutDown() {
}
void ConfigServerCatalogCacheLoader::notifyOfCollectionVersionUpdate(const NamespaceString& nss) {
- // TODO SERVER-72489: Restore MONGO_UNREACHABLE if we decide to use a
- // ShardServerCatalogCacheLoader on the catalog shard.
- if (!gFeatureFlagCatalogShard.isEnabledAndIgnoreFCV()) {
- MONGO_UNREACHABLE;
- }
+ MONGO_UNREACHABLE;
}
void ConfigServerCatalogCacheLoader::waitForCollectionFlush(OperationContext* opCtx,
const NamespaceString& nss) {
- // TODO SERVER-72489: Restore MONGO_UNREACHABLE if we decide to use a
- // ShardServerCatalogCacheLoader on the catalog shard.
- if (!gFeatureFlagCatalogShard.isEnabledAndIgnoreFCV()) {
- MONGO_UNREACHABLE;
- }
+ MONGO_UNREACHABLE;
}
void ConfigServerCatalogCacheLoader::waitForDatabaseFlush(OperationContext* opCtx,
StringData dbName) {
- // TODO SERVER-72489: Restore MONGO_UNREACHABLE if we decide to use a
- // ShardServerCatalogCacheLoader on the catalog shard.
- if (!gFeatureFlagCatalogShard.isEnabledAndIgnoreFCV()) {
- MONGO_UNREACHABLE;
- }
+ MONGO_UNREACHABLE;
}
SemiFuture<CollectionAndChangedChunks> ConfigServerCatalogCacheLoader::getChunksSince(
diff --git a/src/mongo/shell/shardingtest.js b/src/mongo/shell/shardingtest.js
index 19547df7589..de6dcf93c3a 100644
--- a/src/mongo/shell/shardingtest.js
+++ b/src/mongo/shell/shardingtest.js
@@ -1811,13 +1811,19 @@ var ShardingTest = function(params) {
var testName = this._testName;
var admin = this.admin;
- this._connections.forEach(function(z) {
+ this._connections.forEach(function(z, idx) {
var n = z.name || z.host || z;
+ var name;
+ if (isCatalogShardMode && idx == 0) {
+ name = "catalogShard";
+ }
+
print("ShardingTest " + testName + " going to add shard : " + n);
- var result = assert.commandWorked(admin.runCommand({addshard: n}),
- "Failed to add shard " + n);
+ var result = assert.commandWorked(
+ admin.runCommand(name ? {addshard: n, name: name} : {addshard: n}),
+ "Failed to add shard " + n);
z.shardName = result.shardAdded;
});
}