summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2018-06-11 15:58:28 -0400
committerEsha Maharishi <esha.maharishi@mongodb.com>2018-06-11 16:15:23 -0400
commitb7c49f9ba1e9684028a08ee9de0b03be51470706 (patch)
tree6852b4395215c96f64e51e6256204860480459fd
parent1d378efb7e7aec14404b4d50df524fc2f1a3e881 (diff)
downloadmongo-b7c49f9ba1e9684028a08ee9de0b03be51470706.tar.gz
SERVER-34072 Temporarily blacklist database_versioning.js workload from non-stepdown concurrency suites as well
(cherry picked from commit f4f7f2cc15ad46f20de8566e913fb5d25cfd4778) SERVER-34072 config.databases writes with new version field should take the global IX lock rather than the fcvLock (cherry picked from commit 0254efea54ab78d065e7df44a2d21b4c5b62212e)
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml3
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml5
-rw-r--r--buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml6
-rw-r--r--jstests/concurrency/fsm_workloads/database_versioning.js154
-rw-r--r--src/mongo/db/s/SConscript1
-rw-r--r--src/mongo/db/s/config/configsvr_add_shard_command.cpp5
-rw-r--r--src/mongo/db/s/config/configsvr_move_primary_command.cpp20
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp44
-rw-r--r--src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp118
-rw-r--r--src/mongo/db/s/shard_filtering_metadata_refresh.cpp62
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client.h7
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_impl.cpp14
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_impl.h4
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_mock.cpp11
-rw-r--r--src/mongo/s/catalog/sharding_catalog_client_mock.h6
-rw-r--r--src/mongo/s/catalog/sharding_catalog_test.cpp18
19 files changed, 335 insertions, 152 deletions
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml
index d8a5284ae5b..e3eba518069 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency.yml
@@ -112,6 +112,9 @@ selector:
- jstests/concurrency/fsm_workloads/yield_and_hashed.js
- jstests/concurrency/fsm_workloads/yield_and_sorted.js
+ # TODO Unblacklist (SERVER-35538).
+ - jstests/concurrency/fsm_workloads/database_versioning.js
+
executor:
archive:
hooks:
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml
index d87180a16f2..5da18cfa424 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_causal_consistency_and_balancer.yml
@@ -119,6 +119,9 @@ selector:
- jstests/concurrency/fsm_workloads/yield_and_hashed.js
- jstests/concurrency/fsm_workloads/yield_and_sorted.js
+ # TODO Unblacklist (SERVER-35538).
+ - jstests/concurrency/fsm_workloads/database_versioning.js
+
executor:
archive:
hooks:
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml
index b5e67ad6a15..48843083a1f 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication.yml
@@ -109,6 +109,9 @@ selector:
- jstests/concurrency/fsm_workloads/yield_and_hashed.js
- jstests/concurrency/fsm_workloads/yield_and_sorted.js
+ # TODO Unblacklist (SERVER-35538).
+ - jstests/concurrency/fsm_workloads/database_versioning.js
+
executor:
archive:
hooks:
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml
index 2f166cd0027..ebcf7db5533 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_replication_with_balancer.yml
@@ -116,6 +116,9 @@ selector:
- jstests/concurrency/fsm_workloads/yield_and_hashed.js
- jstests/concurrency/fsm_workloads/yield_and_sorted.js
+ # TODO Unblacklist (SERVER-35538).
+ - jstests/concurrency/fsm_workloads/database_versioning.js
+
executor:
archive:
hooks:
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
index e118de74a15..d3812362bc1 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns.yml
@@ -173,6 +173,11 @@ selector:
# much value in running it.
- jstests/concurrency/fsm_workloads/drop_collection.js
+ # TODO (SERVER-35534) Unblacklist this workload from the concurrency stepdown suites. It fails
+ # with PooledConnectionsDropped when setFCV is run concurrently with movePrimary, which seems like
+ # it's due to a race condition in the NetworkInterfaceTL.
+ - jstests/concurrency/fsm_workloads/database_versioning.js
+
executor:
archive:
hooks:
diff --git a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
index d198a78899f..9b0ac84484d 100644
--- a/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
+++ b/buildscripts/resmokeconfig/suites/concurrency_sharded_with_stepdowns_and_balancer.yml
@@ -178,6 +178,12 @@ selector:
# much value in running it.
- jstests/concurrency/fsm_workloads/drop_collection.js
+
+ # TODO (SERVER-35534) Unblacklist this workload from the concurrency stepdown suites. It fails
+ # with PooledConnectionsDropped when setFCV is run concurrently with movePrimary, which seems like
+ # it's due to a race condition in the NetworkInterfaceTL.
+ - jstests/concurrency/fsm_workloads/database_versioning.js
+
executor:
archive:
hooks:
diff --git a/jstests/concurrency/fsm_workloads/database_versioning.js b/jstests/concurrency/fsm_workloads/database_versioning.js
new file mode 100644
index 00000000000..f7b02fc914a
--- /dev/null
+++ b/jstests/concurrency/fsm_workloads/database_versioning.js
@@ -0,0 +1,154 @@
+'use strict';
+
+/**
+ * Stress tests writes to config.databases while continuously running setFCV to ensure that the
+ * config.databases schema always matches the FCV.
+ */
+var $config = (function() {
+
+ var states = (function() {
+
+ function init(db, collName) {
+ // Dynamically load the shard names for the movePrimary thread to avoid hard-coding
+ // them.
+ this.shards = db.getSiblingDB("config").shards.find().toArray().map(shard => shard._id);
+ }
+
+ function setFCV(db, data) {
+ if (data.fcv === undefined) {
+ data.fcv = "4.0";
+ }
+
+ // First check that the current entries match the current FCV's schema.
+ const databases = db.getSiblingDB("config").databases.find().toArray();
+ for (let i in databases) {
+ const database = databases[i];
+ if (data.fcv === "3.6") {
+ assertAlways.eq(undefined,
+ database.version,
+ "database had a version in FCV 3.6: " + tojson(database));
+ } else {
+ assertAlways.neq(
+ undefined,
+ database.version,
+ "database did not have a version in FCV 4.0: " + tojson(database));
+ }
+ }
+
+ // Then change the FCV.
+ data.fcv = (data.fcv === "4.0") ? "3.6" : "4.0";
+ assertAlways.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: data.fcv}));
+ }
+
+ function createWithEnableSharding(db, data) {
+ data.dbNameCount = (data.dbNameCount === undefined) ? 0 : data.dbNameCount + 1;
+ assertAlways.commandWorked(
+ db.adminCommand({enableSharding: "createWithEnableShardingDb" + data.dbNameCount}));
+ }
+
+ function createWithInsert(db, data) {
+ data.dbNameCount = (data.dbNameCount === undefined) ? 0 : data.dbNameCount + 1;
+ assertAlways.commandWorked(
+ db.getSiblingDB("createWithInsertDb" + data.dbNameCount).foo.insert({x: 1}));
+ }
+
+ function movePrimary(db, data) {
+ // Assume an arbitrary shard is the current primary shard; if it's not, the first
+ // iteration will be a no-op.
+ if (data.primaryShard === undefined) {
+ data.primaryShard = data.shards[0];
+ }
+
+ const toShard =
+ (data.primaryShard === data.shards[0]) ? data.shards[1] : data.shards[0];
+ const res = db.adminCommand({movePrimary: "movePrimaryDb", to: toShard});
+
+ // movePrimary will correctly error if the FCV changes while it is running.
+ if (res.code === ErrorCodes.ConflictingOperationInProgress) {
+ return;
+ }
+
+ assertAlways.commandWorked(res);
+ data.primaryShard = toShard;
+ }
+
+ function state(db, collName) {
+ switch (this.tid) {
+ case 0:
+ setFCV(db, this);
+ break;
+ case 1:
+ createWithEnableSharding(db, this);
+ break;
+ case 2:
+ createWithInsert(db, this);
+ break;
+ case 3:
+ movePrimary(db, this);
+ break;
+ }
+ }
+
+ return {init: init, state: state};
+
+ })();
+
+ var transitions = {init: {state: 1}, state: {state: 1}};
+
+ function setup(db, collName, cluster) {
+ // Ensure the cluster starts in FCV 4.0.
+ assertAlways.commandWorked(db.adminCommand({setFeatureCompatibilityVersion: "4.0"}));
+
+ // Create a database for the thread doing movePrimary. Use 'enableSharding' rather than
+ // an insert to create the database (if an unsharded collection exists in a database and an
+ // initial movePrimary attempt fails after copying the unsharded collection to the new
+ // primary, retrying the movePrimary will fail).
+ assertAlways.commandWorked(db.adminCommand({enableSharding: "movePrimaryDb"}));
+ }
+
+ function teardown(db, collName, cluster) {
+ // Ensure all databases have data in them so that they show up in listDatabases (which calls
+ // listDatabases against all shards rather than reading config.databases). This guarantees
+ // they are dropped by the cleanup hook.
+ const databases = db.getSiblingDB("config").databases.find().toArray();
+ for (let i in databases) {
+ const database = databases[i];
+ assertAlways.commandWorked(db.getSiblingDB(databases[i]._id).foo.insert({x: 1}));
+ }
+
+ // If this workload is run with --repeat, mongos will already have all the database entries
+ // cached. Because of SERVER-xxx (mongos does not mark its database entry as invalid on
+ // CannotImplicitlyCreateCollection), this mongos will never realize the databases have been
+ // dropped, and so will never send the implicit createDatabase for writes in the next run
+ // (and instead will exhaust retries of CannotImplicitlyCreateCollection).
+ // As a temporary workaround, flush mongos's cache at the end of each workload.
+ assertAlways.commandWorked(db.adminCommand({flushRouterConfig: 1}));
+ }
+
+ function skip(cluster) {
+ if (!cluster.isSharded()) {
+ return {skip: true, msg: 'only runs in a sharded cluster'};
+ }
+ return {skip: false};
+ }
+
+ // This test performs sharding catalog operations (which take distributed locks) concurrently
+ // from many threads. Since a distributed lock is acquired by repeatedly attempting to grab the
+ // lock every half second for 20 seconds (a max of 40 attempts), it's possible that some thread
+ // will be starved by the other threads and fail to grab the lock after 40 attempts. To reduce
+ // the likelihood of this, we choose threadCount and iterations so that threadCount * iterations
+ // does not exceed 40.
+ // Note that this test's structure requires at least 4 threads (one per sharding catalog op).
+ // The iterations can be increased after PM-697 ("Remove all usages of distributed lock").
+ return {
+ threadCount: 4,
+ iterations: 10,
+ data: {},
+ states: states,
+ transitions: transitions,
+ setup: setup,
+ teardown: teardown,
+ skip: skip,
+ };
+
+})();
diff --git a/src/mongo/db/s/SConscript b/src/mongo/db/s/SConscript
index 66ce03e8045..2216e68abae 100644
--- a/src/mongo/db/s/SConscript
+++ b/src/mongo/db/s/SConscript
@@ -197,6 +197,7 @@ env.Library(
'$BUILD_DIR/mongo/db/catalog/collection_options',
'$BUILD_DIR/mongo/db/catalog_raii',
'$BUILD_DIR/mongo/db/repl/read_concern_args',
+ '$BUILD_DIR/mongo/db/rw_concern_d',
'$BUILD_DIR/mongo/executor/network_interface',
'$BUILD_DIR/mongo/s/catalog/sharding_catalog_client',
'$BUILD_DIR/mongo/s/client/sharding_client',
diff --git a/src/mongo/db/s/config/configsvr_add_shard_command.cpp b/src/mongo/db/s/config/configsvr_add_shard_command.cpp
index bf5ed552446..53a7a53b6e6 100644
--- a/src/mongo/db/s/config/configsvr_add_shard_command.cpp
+++ b/src/mongo/db/s/config/configsvr_add_shard_command.cpp
@@ -97,11 +97,6 @@ public:
"_configsvrAddShard can only be run on config servers");
}
- // Do not allow adding shards while a featureCompatibilityVersion upgrade or downgrade is in
- // progress (see SERVER-31231 for details).
- invariant(!opCtx->lockState()->isLocked());
- Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);
-
// Set the operation context read concern level to local for reads into the config database.
repl::ReadConcernArgs::get(opCtx) =
repl::ReadConcernArgs(repl::ReadConcernLevel::kLocalReadConcern);
diff --git a/src/mongo/db/s/config/configsvr_move_primary_command.cpp b/src/mongo/db/s/config/configsvr_move_primary_command.cpp
index 82b49d344af..5dbab2f5023 100644
--- a/src/mongo/db/s/config/configsvr_move_primary_command.cpp
+++ b/src/mongo/db/s/config/configsvr_move_primary_command.cpp
@@ -259,12 +259,13 @@ public:
}
{
- // Check if the FCV has been changed under us.
- invariant(!opCtx->lockState()->isLocked());
- Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);
+ // Hold the Global IX lock across checking the FCV and writing the database entry.
+ // Because of the Global S lock barrier in setFCV, this ensures a concurrent setFCV
+ // will block before performing schema upgrade until we have written the entry.
+ Lock::GlobalLock lk(opCtx, MODE_IX);
// If we are upgrading to (or are fully on) FCV 4.0, then fail. If we do not fail, we
- // will potentially write an unversioned database in a schema that requires versions.
+ // will potentially change the primary shard of a database without changing its version.
if (serverGlobalParams.featureCompatibility.getVersion() ==
ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40 ||
serverGlobalParams.featureCompatibility.getVersion() ==
@@ -273,9 +274,14 @@ public:
"committing movePrimary failed due to version mismatch");
}
- // Update the new primary in the config server metadata.
- dbType.setPrimary(toShard->getId());
- uassertStatusOK(catalogClient->updateDatabase(opCtx, dbname, dbType));
+ // Update the database entry with the new primary shard.
+ uassertStatusOK(catalogClient->updateConfigDocument(
+ opCtx,
+ DatabaseType::ConfigNS,
+ BSON(DatabaseType::name(dbname)),
+ BSON("$set" << BSON(DatabaseType::primary(toShard->getId().toString()))),
+ false,
+ ShardingCatalogClient::kLocalWriteConcern));
}
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
index ad61fe0d787..a7371789025 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_database_operations.cpp
@@ -37,6 +37,7 @@
#include "mongo/db/namespace_string.h"
#include "mongo/db/repl/repl_client_info.h"
#include "mongo/db/server_options.h"
+#include "mongo/db/write_concern.h"
#include "mongo/s/catalog/sharding_catalog_client_impl.h"
#include "mongo/s/catalog/type_database.h"
#include "mongo/s/catalog_cache.h"
@@ -111,11 +112,11 @@ DatabaseType ShardingCatalogManager::createDatabase(OperationContext* opCtx,
const auto primaryShardId =
uassertStatusOK(_selectShardForNewDatabase(opCtx, Grid::get(opCtx)->shardRegistry()));
- // Take the fcvLock to prevent the fcv from changing from the point we decide whether to include
- // a databaseVersion until after we finish writing the database entry. Otherwise, we may end up
- // in fcv>3.6, but without a databaseVersion.
- invariant(!opCtx->lockState()->isLocked());
- Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);
+ // Hold the Global IX lock across checking the FCV and writing the new database entry.
+ // Because of the Global S lock barrier in setFCV, this ensures either the setFCV schema
+ // upgrade/downgrade will block until we have written the entry, or we will write the entry in
+ // the target FCV's schema.
+ Lock::GlobalLock lk(opCtx, MODE_IX);
// If in FCV>3.6, generate a databaseVersion, including a UUID, for the new database.
boost::optional<DatabaseVersion> dbVersion = boost::none;
@@ -132,7 +133,7 @@ DatabaseType ShardingCatalogManager::createDatabase(OperationContext* opCtx,
log() << "Registering new database " << db << " in sharding catalog";
uassertStatusOK(Grid::get(opCtx)->catalogClient()->insertConfigDocument(
- opCtx, DatabaseType::ConfigNS, db.toBSON(), ShardingCatalogClient::kMajorityWriteConcern));
+ opCtx, DatabaseType::ConfigNS, db.toBSON(), ShardingCatalogClient::kLocalWriteConcern));
return db;
}
@@ -154,8 +155,26 @@ void ShardingCatalogManager::enableSharding(OperationContext* opCtx, const std::
auto dbType = createDatabase(opCtx, dbName);
dbType.setSharded(true);
+ // We must wait for the database entry to be majority committed, because it's possible that
+ // reading from the majority snapshot has been set on the RecoveryUnit due to an earlier read,
+ // such as overtaking a distlock or loading the ShardRegistry.
+ WriteConcernResult unusedResult;
+ uassertStatusOK(
+ waitForWriteConcern(opCtx,
+ repl::ReplClientInfo::forClient(opCtx->getClient()).getLastOp(),
+ WriteConcernOptions(WriteConcernOptions::kMajority,
+ WriteConcernOptions::SyncMode::UNSET,
+ Milliseconds{30000}),
+ &unusedResult));
+
log() << "Enabling sharding for database [" << dbName << "] in config db";
- uassertStatusOK(Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbType));
+ uassertStatusOK(Grid::get(opCtx)->catalogClient()->updateConfigDocument(
+ opCtx,
+ DatabaseType::ConfigNS,
+ BSON(DatabaseType::name(dbName)),
+ BSON("$set" << BSON(DatabaseType::sharded(true))),
+ false,
+ ShardingCatalogClient::kLocalWriteConcern));
}
StatusWith<std::vector<std::string>> ShardingCatalogManager::getDatabasesForShard(
@@ -192,9 +211,10 @@ Status ShardingCatalogManager::commitMovePrimary(OperationContext* opCtx,
auto const configShard = Grid::get(opCtx)->shardRegistry()->getConfigShard();
- // Check if the FCV has been changed under us.
- invariant(!opCtx->lockState()->isLocked());
- Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);
+ // Hold the Global IX lock across checking the FCV and writing the database entry. Because of
+ // the Global S lock barrier in setFCV, this ensures a concurrent setFCV will block before
+ // performing schema downgrade until we have written the entry.
+ Lock::GlobalLock lk(opCtx, MODE_IX);
auto currentFCV = serverGlobalParams.featureCompatibility.getVersion();
@@ -250,8 +270,8 @@ Status ShardingCatalogManager::commitMovePrimary(OperationContext* opCtx,
DatabaseType::ConfigNS,
updateQueryBuilder.obj(),
newDbType.toBSON(),
- true,
- ShardingCatalogClient::kMajorityWriteConcern);
+ false,
+ ShardingCatalogClient::kLocalWriteConcern);
if (!updateStatus.isOK()) {
log() << "error committing movePrimary: " << dbname
diff --git a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
index e6c68ba2f04..153e9f78b71 100644
--- a/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
+++ b/src/mongo/db/s/config/sharding_catalog_manager_shard_operations.cpp
@@ -45,6 +45,7 @@
#include "mongo/db/audit.h"
#include "mongo/db/catalog_raii.h"
#include "mongo/db/client.h"
+#include "mongo/db/commands/feature_compatibility_version.h"
#include "mongo/db/commands/feature_compatibility_version_command_parser.h"
#include "mongo/db/commands/feature_compatibility_version_parser.h"
#include "mongo/db/namespace_string.h"
@@ -659,65 +660,84 @@ StatusWith<std::string> ShardingCatalogManager::addShard(
return batchResponseStatus;
}
+ {
+ // Hold the fcvLock across checking the FCV, sending setFCV to the new shard, and
+ // writing the entry for the new shard to config.shards. This ensures the FCV doesn't change
+ // after we send setFCV to the new shard, but before we write its entry to config.shards.
+ // (Note, we don't use a Global IX lock here, because we don't want to hold the global lock
+ // while blocking on the network).
+ invariant(!opCtx->lockState()->isLocked());
+ Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);
+
+ BSONObj setFCVCmd;
+ switch (serverGlobalParams.featureCompatibility.getVersion()) {
+ case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo40:
+ case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40:
+ setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
+ << FeatureCompatibilityVersionParser::kVersion40
+ << WriteConcernOptions::kWriteConcernField
+ << opCtx->getWriteConcern().toBSON());
+ break;
+ default:
+ setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
+ << FeatureCompatibilityVersionParser::kVersion36
+ << WriteConcernOptions::kWriteConcernField
+ << opCtx->getWriteConcern().toBSON());
+ break;
+ }
+ auto versionResponse =
+ _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd);
+ if (!versionResponse.isOK()) {
+ return versionResponse.getStatus();
+ }
- // Since addShard runs under the fcvLock, it is guaranteed the fcv state won't change, but it's
- // possible an earlier setFCV failed partway, so we handle all possible fcv states. Note, if
- // the state is upgrading (downgrading), a user cannot switch to downgrading (upgrading) without
- // first finishing the upgrade (downgrade).
- BSONObj setFCVCmd;
- switch (serverGlobalParams.featureCompatibility.getVersion()) {
- case ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo40:
- case ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40:
- setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
- << FeatureCompatibilityVersionParser::kVersion40
- << WriteConcernOptions::kWriteConcernField
- << opCtx->getWriteConcern().toBSON());
- break;
- default:
- setFCVCmd = BSON(FeatureCompatibilityVersionCommandParser::kCommandName
- << FeatureCompatibilityVersionParser::kVersion36
- << WriteConcernOptions::kWriteConcernField
- << opCtx->getWriteConcern().toBSON());
- break;
- }
- auto versionResponse =
- _runCommandForAddShard(opCtx, targeter.get(), NamespaceString::kAdminDb, setFCVCmd);
- if (!versionResponse.isOK()) {
- return versionResponse.getStatus();
- }
-
- if (!versionResponse.getValue().commandStatus.isOK()) {
- return versionResponse.getValue().commandStatus;
- }
+ if (!versionResponse.getValue().commandStatus.isOK()) {
+ return versionResponse.getValue().commandStatus;
+ }
- log() << "going to insert new entry for shard into config.shards: " << shardType.toString();
+ log() << "going to insert new entry for shard into config.shards: " << shardType.toString();
- Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument(
- opCtx,
- ShardType::ConfigNS,
- shardType.toBSON(),
- ShardingCatalogClient::kMajorityWriteConcern);
- if (!result.isOK()) {
- log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
- return result;
+ Status result = Grid::get(opCtx)->catalogClient()->insertConfigDocument(
+ opCtx,
+ ShardType::ConfigNS,
+ shardType.toBSON(),
+ ShardingCatalogClient::kLocalWriteConcern);
+ if (!result.isOK()) {
+ log() << "error adding shard: " << shardType.toBSON() << " err: " << result.reason();
+ return result;
+ }
}
// Add all databases which were discovered on the new shard
for (const auto& dbName : dbNamesStatus.getValue()) {
DatabaseType dbt(dbName, shardType.getName(), false);
- // If we're in FCV 4.0, we should add a version to each database.
- if (serverGlobalParams.featureCompatibility.getVersion() ==
- ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo40 ||
- serverGlobalParams.featureCompatibility.getVersion() ==
- ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40) {
- dbt.setVersion(databaseVersion::makeNew());
- }
+ {
+ // Hold the Global IX lock across checking the FCV and writing the database entry.
+ // Because of the Global S lock barrier in setFCV, this ensures either the setFCV schema
+ // upgrade/downgrade will block until we have written the entry, or we will write the
+ // entry in the target FCV's schema.
+ Lock::GlobalLock lk(opCtx, MODE_IX);
+
+ // If we're in FCV 4.0, we should add a version to each database.
+ if (serverGlobalParams.featureCompatibility.getVersion() ==
+ ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo40 ||
+ serverGlobalParams.featureCompatibility.getVersion() ==
+ ServerGlobalParams::FeatureCompatibility::Version::kUpgradingTo40) {
+ dbt.setVersion(databaseVersion::makeNew());
+ }
- Status status = Grid::get(opCtx)->catalogClient()->updateDatabase(opCtx, dbName, dbt);
- if (!status.isOK()) {
- log() << "adding shard " << shardConnectionString.toString()
- << " even though could not add database " << dbName;
+ const auto status = Grid::get(opCtx)->catalogClient()->updateConfigDocument(
+ opCtx,
+ DatabaseType::ConfigNS,
+ BSON(DatabaseType::name(dbName)),
+ dbt.toBSON(),
+ true,
+ ShardingCatalogClient::kLocalWriteConcern);
+ if (!status.isOK()) {
+ log() << "adding shard " << shardConnectionString.toString()
+ << " even though could not add database " << dbName;
+ }
}
}
diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
index 04288ed16bd..7a677c60d8a 100644
--- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
+++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp
@@ -204,39 +204,13 @@ void forceDatabaseRefresh(OperationContext* opCtx, const StringData dbName) {
uassertStatusOK(Grid::get(opCtx)->catalogCache()->getDatabaseWithRefresh(opCtx, dbName))
.databaseVersion();
- // Only set the in-memory version in FCV 4.0, and hold the lock across checking the FCV and
- // setting the version.
- Lock::SharedLock lk(opCtx->lockState(), FeatureCompatibilityVersion::fcvLock);
- if (serverGlobalParams.featureCompatibility.getVersion() ==
- ServerGlobalParams::FeatureCompatibility::Version::kFullyUpgradedTo40) {
- // First, check under a shared lock if another thread already updated the cached version.
- // This is a best-effort optimization to make as few threads as possible to convoy on the
- // exclusive lock below.
- {
- // Take the DBLock directly rather than using AutoGetDb, to prevent a recursive call
- // into checkDbVersion().
- Lock::DBLock dbLock(opCtx, dbName, MODE_IS);
- const auto db = DatabaseHolder::getDatabaseHolder().get(opCtx, dbName);
- if (!db) {
- log() << "Database " << dbName
- << " has been dropped; not caching the refreshed databaseVersion";
- return;
- }
-
- const auto cachedDbVersion = DatabaseShardingState::get(db).getDbVersion(opCtx);
- if (cachedDbVersion && refreshedDbVersion &&
- cachedDbVersion->getUuid() == refreshedDbVersion->getUuid() &&
- cachedDbVersion->getLastMod() >= refreshedDbVersion->getLastMod()) {
- LOG(2) << "Skipping setting cached databaseVersion for " << dbName
- << " to refreshed version " << refreshedDbVersion->toBSON()
- << " because current cached databaseVersion is already "
- << cachedDbVersion->toBSON();
- return;
- }
- }
-
- // The cached version is older than the refreshed version; update the cached version.
- Lock::DBLock dbLock(opCtx, dbName, MODE_X);
+ // First, check under a shared lock if another thread already updated the cached version.
+ // This is a best-effort optimization to make as few threads as possible to convoy on the
+ // exclusive lock below.
+ {
+ // Take the DBLock directly rather than using AutoGetDb, to prevent a recursive call
+ // into checkDbVersion().
+ Lock::DBLock dbLock(opCtx, dbName, MODE_IS);
const auto db = DatabaseHolder::getDatabaseHolder().get(opCtx, dbName);
if (!db) {
log() << "Database " << dbName
@@ -244,8 +218,28 @@ void forceDatabaseRefresh(OperationContext* opCtx, const StringData dbName) {
return;
}
- DatabaseShardingState::get(db).setDbVersion(opCtx, std::move(refreshedDbVersion));
+ const auto cachedDbVersion = DatabaseShardingState::get(db).getDbVersion(opCtx);
+ if (cachedDbVersion && refreshedDbVersion &&
+ cachedDbVersion->getUuid() == refreshedDbVersion->getUuid() &&
+ cachedDbVersion->getLastMod() >= refreshedDbVersion->getLastMod()) {
+ LOG(2) << "Skipping setting cached databaseVersion for " << dbName
+ << " to refreshed version " << refreshedDbVersion->toBSON()
+ << " because current cached databaseVersion is already "
+ << cachedDbVersion->toBSON();
+ return;
+ }
}
+
+ // The cached version is older than the refreshed version; update the cached version.
+ Lock::DBLock dbLock(opCtx, dbName, MODE_X);
+ const auto db = DatabaseHolder::getDatabaseHolder().get(opCtx, dbName);
+ if (!db) {
+ log() << "Database " << dbName
+ << " has been dropped; not caching the refreshed databaseVersion";
+ return;
+ }
+
+ DatabaseShardingState::get(db).setDbVersion(opCtx, std::move(refreshedDbVersion));
}
} // namespace mongo
diff --git a/src/mongo/s/catalog/sharding_catalog_client.h b/src/mongo/s/catalog/sharding_catalog_client.h
index 427a4c53775..ef0337dbf1a 100644
--- a/src/mongo/s/catalog/sharding_catalog_client.h
+++ b/src/mongo/s/catalog/sharding_catalog_client.h
@@ -109,13 +109,6 @@ public:
virtual void shutDown(OperationContext* opCtx) = 0;
/**
- * Updates or creates the metadata for a given database.
- */
- virtual Status updateDatabase(OperationContext* opCtx,
- const std::string& dbName,
- const DatabaseType& db) = 0;
-
- /**
* Retrieves the metadata for a given database, if it exists.
*
* @param dbName name of the database (case sensitive)
diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
index 0afb6f59f2c..7ebc7561849 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_impl.cpp
@@ -150,20 +150,6 @@ Status ShardingCatalogClientImpl::updateShardingCatalogEntryForCollection(
return status.getStatus().withContext(str::stream() << "Collection metadata write failed");
}
-Status ShardingCatalogClientImpl::updateDatabase(OperationContext* opCtx,
- const std::string& dbName,
- const DatabaseType& db) {
- fassert(28616, db.validate());
-
- auto status = updateConfigDocument(opCtx,
- DatabaseType::ConfigNS,
- BSON(DatabaseType::name(dbName)),
- db.toBSON(),
- true,
- ShardingCatalogClient::kMajorityWriteConcern);
- return status.getStatus().withContext(str::stream() << "Database metadata write failed");
-}
-
Status ShardingCatalogClientImpl::logAction(OperationContext* opCtx,
const std::string& what,
const std::string& ns,
diff --git a/src/mongo/s/catalog/sharding_catalog_client_impl.h b/src/mongo/s/catalog/sharding_catalog_client_impl.h
index f20ea1e4ac1..6ef87d060ff 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_impl.h
+++ b/src/mongo/s/catalog/sharding_catalog_client_impl.h
@@ -71,10 +71,6 @@ public:
void shutDown(OperationContext* opCtx) override;
- Status updateDatabase(OperationContext* opCtx,
- const std::string& dbName,
- const DatabaseType& db) override;
-
Status logAction(OperationContext* opCtx,
const std::string& what,
const std::string& ns,
diff --git a/src/mongo/s/catalog/sharding_catalog_client_mock.cpp b/src/mongo/s/catalog/sharding_catalog_client_mock.cpp
index 04fdd6b649e..b1a1633f4b0 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_mock.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_client_mock.cpp
@@ -63,17 +63,6 @@ void ShardingCatalogClientMock::shutDown(OperationContext* opCtx) {
}
}
-Status ShardingCatalogClientMock::enableSharding(OperationContext* opCtx,
- const std::string& dbName) {
- return {ErrorCodes::InternalError, "Method not implemented"};
-}
-
-Status ShardingCatalogClientMock::updateDatabase(OperationContext* opCtx,
- const string& dbName,
- const DatabaseType& db) {
- return {ErrorCodes::InternalError, "Method not implemented"};
-}
-
StatusWith<repl::OpTimeWith<DatabaseType>> ShardingCatalogClientMock::getDatabase(
OperationContext* opCtx, const string& dbName, repl::ReadConcernLevel readConcernLevel) {
return {ErrorCodes::InternalError, "Method not implemented"};
diff --git a/src/mongo/s/catalog/sharding_catalog_client_mock.h b/src/mongo/s/catalog/sharding_catalog_client_mock.h
index 643ef0e3d78..d98c644792d 100644
--- a/src/mongo/s/catalog/sharding_catalog_client_mock.h
+++ b/src/mongo/s/catalog/sharding_catalog_client_mock.h
@@ -44,12 +44,6 @@ public:
void shutDown(OperationContext* opCtx) override;
- Status enableSharding(OperationContext* opCtx, const std::string& dbName);
-
- Status updateDatabase(OperationContext* opCtx,
- const std::string& dbName,
- const DatabaseType& db) override;
-
StatusWith<repl::OpTimeWith<DatabaseType>> getDatabase(
OperationContext* opCtx,
const std::string& dbName,
diff --git a/src/mongo/s/catalog/sharding_catalog_test.cpp b/src/mongo/s/catalog/sharding_catalog_test.cpp
index c8a15fb5c36..fa954472e32 100644
--- a/src/mongo/s/catalog/sharding_catalog_test.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_test.cpp
@@ -1080,7 +1080,13 @@ TEST_F(ShardingCatalogClientTest, UpdateDatabase) {
DatabaseType dbt("test", ShardId("shard0000"), true);
auto future = launchAsync([this, dbt] {
- auto status = catalogClient()->updateDatabase(operationContext(), dbt.getName(), dbt);
+ auto status =
+ catalogClient()->updateConfigDocument(operationContext(),
+ DatabaseType::ConfigNS,
+ BSON(DatabaseType::name(dbt.getName())),
+ dbt.toBSON(),
+ true,
+ ShardingCatalogClient::kMajorityWriteConcern);
ASSERT_OK(status);
});
@@ -1114,14 +1120,20 @@ TEST_F(ShardingCatalogClientTest, UpdateDatabase) {
future.timed_get(kFutureTimeout);
}
-TEST_F(ShardingCatalogClientTest, UpdateDatabaseExceededTimeLimit) {
+TEST_F(ShardingCatalogClientTest, UpdateConfigDocumentExceededTimeLimit) {
HostAndPort host1("TestHost1");
configTargeter()->setFindHostReturnValue(host1);
DatabaseType dbt("test", ShardId("shard0001"), false);
auto future = launchAsync([this, dbt] {
- auto status = catalogClient()->updateDatabase(operationContext(), dbt.getName(), dbt);
+ auto status =
+ catalogClient()->updateConfigDocument(operationContext(),
+ DatabaseType::ConfigNS,
+ BSON(DatabaseType::name(dbt.getName())),
+ dbt.toBSON(),
+ true,
+ ShardingCatalogClient::kMajorityWriteConcern);
ASSERT_EQ(ErrorCodes::ExceededTimeLimit, status);
});