summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@mongodb.com>2016-08-24 15:56:55 -0400
committerSpencer T Brody <spencer@mongodb.com>2016-08-26 16:55:38 -0400
commit6bf9fd2e5a5f043b950cb77361be3c1ed7a7d0af (patch)
treedd6d2cdcf3d3ef2eee3d156b3417b116d2f5ef3b
parenta4a9a9ad29415239091db171e01f45677464f668 (diff)
downloadmongo-6bf9fd2e5a5f043b950cb77361be3c1ed7a7d0af.tar.gz
SERVER-25677 Clear cached clusterId if config.version document is rolled back.
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml2
-rw-r--r--jstests/sharding/config_version_rollback.js93
-rw-r--r--jstests/sharding/shard_aware_on_config_election.js4
-rw-r--r--jstests/sharding/shard_identity_rollback.js2
-rw-r--r--src/mongo/db/op_observer.cpp5
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp8
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.h4
-rw-r--r--src/mongo/db/s/collection_sharding_state.cpp71
-rw-r--r--src/mongo/db/s/collection_sharding_state.h2
-rw-r--r--src/mongo/s/catalog/replset/sharding_catalog_config_initialization_test.cpp54
-rw-r--r--src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp12
-rw-r--r--src/mongo/s/catalog/replset/sharding_catalog_manager_impl.h2
-rw-r--r--src/mongo/s/catalog/sharding_catalog_manager.h8
-rw-r--r--src/mongo/s/catalog/sharding_catalog_manager_mock.cpp2
-rw-r--r--src/mongo/s/catalog/sharding_catalog_manager_mock.h2
-rw-r--r--src/mongo/s/cluster_identity_loader.cpp12
-rw-r--r--src/mongo/s/cluster_identity_loader.h6
-rw-r--r--src/mongo/s/config_server_test_fixture.cpp7
-rw-r--r--src/mongo/s/config_server_test_fixture.h7
19 files changed, 270 insertions, 33 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 34091bebed6..ed10d68506c 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -14,6 +14,8 @@ selector:
- jstests/sharding/add_shard_to_zone.js
- jstests/sharding/remove_shard_from_zone.js
- jstests/sharding/update_zone_key_range.js
+ # Doesn't use ShardingTest so won't actually be run in a mixed version configuration
+ - jstests/sharding/config_version_rollback.js
# TODO Assumes mongod and mongos handle read on view. Enable when 3.4 becomes 'last-stable'.
- jstests/sharding/movePrimary1.js
# v3.4 replace noAutoSplit flag with document in config.settings
diff --git a/jstests/sharding/config_version_rollback.js b/jstests/sharding/config_version_rollback.js
new file mode 100644
index 00000000000..cba395d86f7
--- /dev/null
+++ b/jstests/sharding/config_version_rollback.js
@@ -0,0 +1,93 @@
+/**
+ * Tests that if the config.version document on a config server is rolled back, that config server
+ * will detect the new config.version document when it gets recreated.
+ * @tags: [requires_persistence]
+ */
+
+(function() {
+ "use strict";
+
+ var configRS = new ReplSetTest({nodes: 3});
+ var nodes = configRS.startSet({configsvr: '', storageEngine: 'wiredTiger'});
+
+ // Prevent any replication from happening, so that the initial writes that the config
+ // server performs on first transition to primary can be rolled back.
+ nodes.forEach(function(node) {
+ assert.commandWorked(node.getDB('admin').runCommand(
+ {configureFailPoint: 'stopOplogFetcher', mode: 'alwaysOn'}));
+ });
+
+ configRS.initiate();
+
+ var origPriConn = configRS.getPrimary();
+ var secondaries = configRS.getSecondaries();
+
+ jsTest.log("Confirming that the primary has the config.version doc but the secondaries do not");
+ var origConfigVersionDoc = origPriConn.getCollection('config.version').findOne();
+ assert.neq(null, origConfigVersionDoc);
+ secondaries.forEach(function(secondary) {
+ secondary.setSlaveOk();
+ assert.eq(null, secondary.getCollection('config.version').findOne());
+ });
+
+ // Ensure manually deleting the config.version document is not allowed.
+ assert.writeErrorWithCode(origPriConn.getCollection('config.version').remove({}), 40302);
+ assert.commandFailedWithCode(origPriConn.getDB('config').runCommand({drop: 'version'}), 40303);
+
+ jsTest.log("Stepping down original primary");
+ try {
+ origPriConn.adminCommand({replSetStepDown: 60, force: true});
+ } catch (x) {
+ // replSetStepDown closes all connections, thus a network exception is expected here.
+ }
+
+ jsTest.log("Waiting for new primary to be elected and write a new config.version document");
+ var newPriConn = configRS.getPrimary();
+ assert.neq(newPriConn, origPriConn);
+
+ var newConfigVersionDoc = newPriConn.getCollection('config.version').findOne();
+ assert.neq(null, newConfigVersionDoc);
+ assert.neq(origConfigVersionDoc.clusterId, newConfigVersionDoc.clusterId);
+
+ jsTest.log("Re-enabling replication on all nodes");
+ nodes.forEach(function(node) {
+ assert.commandWorked(
+ node.getDB('admin').runCommand({configureFailPoint: 'stopOplogFetcher', mode: 'off'}));
+ });
+
+ jsTest.log(
+ "Waiting for original primary to rollback and replicate new config.version document");
+ origPriConn.setSlaveOk();
+ assert.soonNoExcept(function() {
+ var foundClusterId = origPriConn.getCollection('config.version').findOne().clusterId;
+ return friendlyEqual(newConfigVersionDoc.clusterId, foundClusterId);
+ });
+
+ jsTest.log("Forcing original primary to step back up and become primary again.");
+ // Ensure former primary is eligible to become primary once more.
+ assert.commandWorked(origPriConn.adminCommand({replSetFreeze: 0}));
+ assert.commandWorked(origPriConn.adminCommand({replSetStepUp: 1}));
+
+ assert.soon(function() {
+ return origPriConn == configRS.getPrimary();
+ });
+
+ // Now we just need to start up a mongos and add a shard to confirm that the shard gets added
+ // with the proper clusterId value.
+ jsTest.log("Starting mongos");
+ var mongos = MongoRunner.runMongos({configdb: configRS.getURL()});
+
+ jsTest.log("Starting shard mongod");
+ var shard = MongoRunner.runMongod({shardsvr: ""});
+
+ jsTest.log("Adding shard to cluster");
+ assert.commandWorked(mongos.adminCommand({addShard: shard.host}));
+
+ jsTest.log("Verifying that shard was provided the proper clusterId");
+ var shardIdentityDoc = shard.getDB('admin').system.version.findOne({_id: 'shardIdentity'});
+ printjson(shardIdentityDoc);
+ assert.eq(newConfigVersionDoc.clusterId,
+ shardIdentityDoc.clusterId,
+ "oldPriClusterId: " + origConfigVersionDoc.clusterId);
+ configRS.stopSet();
+})();
diff --git a/jstests/sharding/shard_aware_on_config_election.js b/jstests/sharding/shard_aware_on_config_election.js
index b93740f0799..a885a37455a 100644
--- a/jstests/sharding/shard_aware_on_config_election.js
+++ b/jstests/sharding/shard_aware_on_config_election.js
@@ -90,6 +90,10 @@
assert.writeOK(st.s.getDB("config").getCollection("shards").update(
{"_id": rst.name}, {$unset: {"state": ""}}, {writeConcern: {w: "majority"}}));
+ // Make sure shardIdentity delete replicated to all nodes before restarting them with
+ // --shardsvr since if they try to replicate that delete while runnning with --shardsvr
+ // they will crash.
+ rst.awaitReplication();
jsTest.log("Restart " + rst.name +
" with --shardsvr to allow initializing its sharding awareness");
for (var nodeId = 0; nodeId < rst.nodes.length; nodeId++) {
diff --git a/jstests/sharding/shard_identity_rollback.js b/jstests/sharding/shard_identity_rollback.js
index 57a46f20ff8..9096c6eb110 100644
--- a/jstests/sharding/shard_identity_rollback.js
+++ b/jstests/sharding/shard_identity_rollback.js
@@ -71,7 +71,7 @@
// Restart the original primary so it triggers a rollback of the shardIdentity insert.
jsTest.log("Restarting original primary");
- priConn = replTest.restart(priConn, {shardsvr: ''});
+ priConn = replTest.restart(priConn);
// Wait until we cannot create a connection to the former primary, which indicates that it must
// have shut itself down during the rollback.
diff --git a/src/mongo/db/op_observer.cpp b/src/mongo/db/op_observer.cpp
index 9b0e1e1b138..8defc1c71fe 100644
--- a/src/mongo/db/op_observer.cpp
+++ b/src/mongo/db/op_observer.cpp
@@ -225,7 +225,12 @@ void OpObserver::onDropCollection(OperationContext* txn, const NamespaceString&
if (collectionName.coll() == DurableViewCatalog::viewsCollectionName()) {
DurableViewCatalog::onExternalChange(txn, collectionName);
}
+
getGlobalAuthorizationManager()->logOp(txn, "c", dbName.c_str(), cmdObj, nullptr);
+
+ auto css = CollectionShardingState::get(txn, collectionName);
+ css->onDropCollection(txn, collectionName);
+
logOpForDbHash(txn, dbName.c_str());
}
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 1f35c5a3afd..e8deee8fab8 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -364,8 +364,8 @@ OpTime ReplicationCoordinatorExternalStateImpl::onTransitionToPrimary(OperationC
}
const auto opTimeToReturn = fassertStatusOK(28665, loadLastOpTime(txn));
- shardingOnTransitionToPrimaryHook(txn);
- dropAllTempCollections(txn);
+ _shardingOnTransitionToPrimaryHook(txn);
+ _dropAllTempCollections(txn);
return opTimeToReturn;
}
@@ -621,7 +621,7 @@ void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() {
ShardingState::get(getGlobalServiceContext())->clearCollectionMetadata();
}
-void ReplicationCoordinatorExternalStateImpl::shardingOnTransitionToPrimaryHook(
+void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook(
OperationContext* txn) {
auto status = ShardingStateRecovery::recover(txn);
@@ -722,7 +722,7 @@ void ReplicationCoordinatorExternalStateImpl::signalApplierToCancelFetcher() {
_bgSync->cancelFetcher();
}
-void ReplicationCoordinatorExternalStateImpl::dropAllTempCollections(OperationContext* txn) {
+void ReplicationCoordinatorExternalStateImpl::_dropAllTempCollections(OperationContext* txn) {
std::vector<std::string> dbNames;
StorageEngine* storageEngine = getGlobalServiceContext()->getGlobalStorageEngine();
storageEngine->listDatabases(&dbNames);
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.h b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
index 0dec8b25560..6f6d7174d78 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.h
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.h
@@ -128,7 +128,7 @@ private:
*
* Throws on errors.
*/
- void shardingOnTransitionToPrimaryHook(OperationContext* txn);
+ void _shardingOnTransitionToPrimaryHook(OperationContext* txn);
/**
* Drops all temporary collections on all databases except "local".
@@ -136,7 +136,7 @@ private:
* The implementation may assume that the caller has acquired the global exclusive lock
* for "txn".
*/
- void dropAllTempCollections(OperationContext* txn);
+ void _dropAllTempCollections(OperationContext* txn);
// Guards starting threads and setting _startedThreads
stdx::mutex _threadMutex;
diff --git a/src/mongo/db/s/collection_sharding_state.cpp b/src/mongo/db/s/collection_sharding_state.cpp
index a6d1fa3c47a..4b4f3c3f711 100644
--- a/src/mongo/db/s/collection_sharding_state.cpp
+++ b/src/mongo/db/s/collection_sharding_state.cpp
@@ -47,8 +47,10 @@
#include "mongo/db/server_options.h"
#include "mongo/db/service_context.h"
#include "mongo/s/catalog/sharding_catalog_manager.h"
+#include "mongo/s/catalog/type_config_version.h"
#include "mongo/s/catalog/type_shard.h"
#include "mongo/s/chunk_version.h"
+#include "mongo/s/cluster_identity_loader.h"
#include "mongo/s/grid.h"
#include "mongo/s/stale_exception.h"
#include "mongo/util/log.h"
@@ -281,30 +283,37 @@ void CollectionShardingState::onDeleteOp(OperationContext* txn,
if (auto idElem = deleteState.idDoc["_id"]) {
auto idStr = idElem.str();
if (idStr == ShardIdentityType::IdName) {
- if (txn->writesAreReplicated()) {
+ if (!repl::ReplicationCoordinator::get(txn)->getMemberState().rollback()) {
uasserted(40070,
"cannot delete shardIdentity document while in --shardsvr mode");
} else {
- if (repl::ReplicationCoordinator::get(txn)->getMemberState().rollback()) {
- warning() << "Shard identity document rolled back. Will shut down after "
- "finishing rollback.";
- ShardIdentityRollbackNotifier::get(txn)->recordThatRollbackHappened();
- }
+ warning() << "Shard identity document rolled back. Will shut down after "
+ "finishing rollback.";
+ ShardIdentityRollbackNotifier::get(txn)->recordThatRollbackHappened();
}
}
}
}
- // For backwards compatibility, cancel a pending asynchronous addShard task created on the
- // primary config as a result of a 3.2 mongos doing addShard for the shard with id
- // deletedDocId.
- if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer &&
- _nss == ShardType::ConfigNS) {
- BSONElement idElement = deleteState.idDoc["_id"];
- invariant(!idElement.eoo());
- auto shardIdStr = idElement.valuestrsafe();
- txn->recoveryUnit()->registerChange(
- new RemoveShardLogOpHandler(txn, ShardId(std::move(shardIdStr))));
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
+ if (_nss == ShardType::ConfigNS) {
+ // For backwards compatibility, cancel a pending asynchronous addShard task created on
+ // the primary config as a result of a 3.2 mongos doing addShard for the shard with id
+ // deletedDocId.
+ BSONElement idElement = deleteState.idDoc["_id"];
+ invariant(!idElement.eoo());
+ auto shardIdStr = idElement.valuestrsafe();
+ txn->recoveryUnit()->registerChange(
+ new RemoveShardLogOpHandler(txn, ShardId(std::move(shardIdStr))));
+ } else if (_nss == VersionType::ConfigNS) {
+ if (!repl::ReplicationCoordinator::get(txn)->getMemberState().rollback()) {
+ uasserted(40302, "cannot delete config.version document while in --configsvr mode");
+ } else {
+ // Throw out any cached information related to the cluster ID.
+ Grid::get(txn)->catalogManager()->discardCachedConfigDatabaseInitializationState();
+ ClusterIdentityLoader::get(txn)->discardCachedClusterId();
+ }
+ }
}
checkShardVersionOrThrow(txn);
@@ -314,6 +323,36 @@ void CollectionShardingState::onDeleteOp(OperationContext* txn,
}
}
+void CollectionShardingState::onDropCollection(OperationContext* txn,
+ const NamespaceString& collectionName) {
+ dassert(txn->lockState()->isCollectionLockedForMode(_nss.ns(), MODE_IX));
+
+ if (serverGlobalParams.clusterRole == ClusterRole::ShardServer &&
+ _nss == NamespaceString::kConfigCollectionNamespace) {
+ // Dropping system collections is not allowed for end users.
+ invariant(!txn->writesAreReplicated());
+ invariant(repl::ReplicationCoordinator::get(txn)->getMemberState().rollback());
+
+ // Can't confirm whether there was a ShardIdentity document or not yet, so assume there was
+ // one and shut down the process to clear the in-memory sharding state.
+ warning() << "admin.system.version collection rolled back. Will shut down after "
+ "finishing rollback";
+ ShardIdentityRollbackNotifier::get(txn)->recordThatRollbackHappened();
+ }
+
+ if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
+ if (_nss == VersionType::ConfigNS) {
+ if (!repl::ReplicationCoordinator::get(txn)->getMemberState().rollback()) {
+ uasserted(40303, "cannot drop config.version document while in --configsvr mode");
+ } else {
+ // Throw out any cached information related to the cluster ID.
+ Grid::get(txn)->catalogManager()->discardCachedConfigDatabaseInitializationState();
+ ClusterIdentityLoader::get(txn)->discardCachedClusterId();
+ }
+ }
+ }
+}
+
bool CollectionShardingState::_checkShardVersionOk(OperationContext* txn,
string* errmsg,
ChunkVersion* expectedShardVersion,
diff --git a/src/mongo/db/s/collection_sharding_state.h b/src/mongo/db/s/collection_sharding_state.h
index 87279422bcc..e98a61572b8 100644
--- a/src/mongo/db/s/collection_sharding_state.h
+++ b/src/mongo/db/s/collection_sharding_state.h
@@ -159,6 +159,8 @@ public:
void onDeleteOp(OperationContext* txn, const DeleteState& deleteState);
+ void onDropCollection(OperationContext* txn, const NamespaceString& collectionName);
+
private:
friend class CollectionRangeDeleter;
diff --git a/src/mongo/s/catalog/replset/sharding_catalog_config_initialization_test.cpp b/src/mongo/s/catalog/replset/sharding_catalog_config_initialization_test.cpp
index 8d2b3595252..20d5bf39e5f 100644
--- a/src/mongo/s/catalog/replset/sharding_catalog_config_initialization_test.cpp
+++ b/src/mongo/s/catalog/replset/sharding_catalog_config_initialization_test.cpp
@@ -33,6 +33,8 @@
#include "mongo/bson/json.h"
#include "mongo/db/namespace_string.h"
+#include "mongo/db/operation_context.h"
+#include "mongo/db/repl/replication_coordinator_mock.h"
#include "mongo/s/catalog/config_server_version.h"
#include "mongo/s/catalog/sharding_catalog_client.h"
#include "mongo/s/catalog/sharding_catalog_manager.h"
@@ -44,6 +46,7 @@
#include "mongo/s/catalog/type_tags.h"
#include "mongo/s/client/shard.h"
#include "mongo/s/config_server_test_fixture.h"
+#include "mongo/util/scopeguard.h"
namespace mongo {
namespace {
@@ -188,20 +191,55 @@ TEST_F(ConfigInitializationTest, OnlyRunsOnce) {
ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion());
ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion());
- // Now remove the version document and re-run initializeConfigDatabaseIfNeeded().
- ASSERT_OK(catalogClient()->removeConfigDocuments(operationContext(),
- VersionType::ConfigNS,
- BSONObj(),
- ShardingCatalogClient::kMajorityWriteConcern));
-
ASSERT_EQUALS(ErrorCodes::AlreadyInitialized,
catalogManager()->initializeConfigDatabaseIfNeeded(operationContext()));
+}
+
+TEST_F(ConfigInitializationTest, ReRunsIfDocRolledBackThenReElected) {
+ ASSERT_OK(catalogManager()->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto versionDoc = assertGet(findOneOnConfigCollection(
+ operationContext(), NamespaceString(VersionType::ConfigNS), BSONObj()));
+
+ VersionType foundVersion = assertGet(VersionType::fromBSON(versionDoc));
- // Even though there was no version document, initializeConfigDatabaseIfNeeded() returned
- // without making one because it has already run once successfully so didn't bother to check.
+ ASSERT_TRUE(foundVersion.getClusterId().isSet());
+ ASSERT_EQUALS(CURRENT_CONFIG_VERSION, foundVersion.getCurrentVersion());
+ ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, foundVersion.getMinCompatibleVersion());
+
+ // Now remove the version document and re-run initializeConfigDatabaseIfNeeded().
+ {
+ // Mirror what happens if the config.version document is rolled back.
+ ON_BLOCK_EXIT([&] {
+ operationContext()->setReplicatedWrites(true);
+ getReplicationCoordinator()->setFollowerMode(repl::MemberState::RS_PRIMARY);
+ });
+ operationContext()->setReplicatedWrites(false);
+ getReplicationCoordinator()->setFollowerMode(repl::MemberState::RS_ROLLBACK);
+ ASSERT_OK(
+ catalogClient()->removeConfigDocuments(operationContext(),
+ VersionType::ConfigNS,
+ BSONObj(),
+ ShardingCatalogClient::kMajorityWriteConcern));
+ }
+
+ // Verify the document was actually removed.
ASSERT_EQUALS(ErrorCodes::NoMatchingDocument,
findOneOnConfigCollection(
operationContext(), NamespaceString(VersionType::ConfigNS), BSONObj()));
+
+ // Re-create the config.version document.
+ ASSERT_OK(catalogManager()->initializeConfigDatabaseIfNeeded(operationContext()));
+
+ auto newVersionDoc = assertGet(findOneOnConfigCollection(
+ operationContext(), NamespaceString(VersionType::ConfigNS), BSONObj()));
+
+ VersionType newFoundVersion = assertGet(VersionType::fromBSON(newVersionDoc));
+
+ ASSERT_TRUE(newFoundVersion.getClusterId().isSet());
+ ASSERT_NOT_EQUALS(newFoundVersion.getClusterId(), foundVersion.getClusterId());
+ ASSERT_EQUALS(CURRENT_CONFIG_VERSION, newFoundVersion.getCurrentVersion());
+ ASSERT_EQUALS(MIN_COMPATIBLE_CONFIG_VERSION, newFoundVersion.getMinCompatibleVersion());
}
TEST_F(ConfigInitializationTest, BuildsNecessaryIndexes) {
diff --git a/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp b/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp
index aac7bed4935..2b982106dc5 100644
--- a/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp
+++ b/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.cpp
@@ -1408,12 +1408,15 @@ Status ShardingCatalogManagerImpl::initializeConfigDatabaseIfNeeded(OperationCon
}
}
- Status status = _initConfigVersion(txn);
+ Status status = _initConfigIndexes(txn);
if (!status.isOK()) {
return status;
}
- status = _initConfigIndexes(txn);
+ // Make sure to write config.version last since we detect rollbacks of config.version and
+ // will re-run initializeConfigDatabaseIfNeeded if that happens, but we don't detect rollback
+ // of the index builds.
+ status = _initConfigVersion(txn);
if (!status.isOK()) {
return status;
}
@@ -1424,6 +1427,11 @@ Status ShardingCatalogManagerImpl::initializeConfigDatabaseIfNeeded(OperationCon
return Status::OK();
}
+void ShardingCatalogManagerImpl::discardCachedConfigDatabaseInitializationState() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+ _configInitialized = false;
+}
+
Status ShardingCatalogManagerImpl::_initConfigVersion(OperationContext* txn) {
auto versionStatus =
_catalogClient->getConfigVersion(txn, repl::ReadConcernLevel::kLocalReadConcern);
diff --git a/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.h b/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.h
index 516ab7aca52..aa4c437151b 100644
--- a/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.h
+++ b/src/mongo/s/catalog/replset/sharding_catalog_manager_impl.h
@@ -104,6 +104,8 @@ public:
Status initializeConfigDatabaseIfNeeded(OperationContext* txn) override;
+ void discardCachedConfigDatabaseInitializationState() override;
+
Status initializeShardingAwarenessOnUnawareShards(OperationContext* txn) override;
Status upsertShardIdentityOnShard(OperationContext* txn, ShardType shardType) override;
diff --git a/src/mongo/s/catalog/sharding_catalog_manager.h b/src/mongo/s/catalog/sharding_catalog_manager.h
index 3a0c9c34a74..63f242d3598 100644
--- a/src/mongo/s/catalog/sharding_catalog_manager.h
+++ b/src/mongo/s/catalog/sharding_catalog_manager.h
@@ -170,6 +170,14 @@ public:
virtual Status initializeConfigDatabaseIfNeeded(OperationContext* txn) = 0;
/**
+ * Called if the config.version document is rolled back. Indicates to the
+ * ShardingCatalogManager that on the next transition to primary
+ * initializeConfigDatabaseIfNeeded will need to re-run the work to initialize the config
+ * database.
+ */
+ virtual void discardCachedConfigDatabaseInitializationState() = 0;
+
+ /**
* For upgrade from 3.2 to 3.4, for each shard in config.shards that is not marked as sharding
* aware, schedules a task to upsert a shardIdentity doc into the shard and mark the shard as
* sharding aware.
diff --git a/src/mongo/s/catalog/sharding_catalog_manager_mock.cpp b/src/mongo/s/catalog/sharding_catalog_manager_mock.cpp
index c9549bc32a4..cc53dfbada1 100644
--- a/src/mongo/s/catalog/sharding_catalog_manager_mock.cpp
+++ b/src/mongo/s/catalog/sharding_catalog_manager_mock.cpp
@@ -104,6 +104,8 @@ Status ShardingCatalogManagerMock::initializeConfigDatabaseIfNeeded(OperationCon
return {ErrorCodes::InternalError, "Method not implemented"};
}
+void ShardingCatalogManagerMock::discardCachedConfigDatabaseInitializationState() {}
+
Status ShardingCatalogManagerMock::initializeShardingAwarenessOnUnawareShards(
OperationContext* txn) {
return {ErrorCodes::InternalError, "Method not implemented"};
diff --git a/src/mongo/s/catalog/sharding_catalog_manager_mock.h b/src/mongo/s/catalog/sharding_catalog_manager_mock.h
index 88d304385c7..23ab27831da 100644
--- a/src/mongo/s/catalog/sharding_catalog_manager_mock.h
+++ b/src/mongo/s/catalog/sharding_catalog_manager_mock.h
@@ -86,6 +86,8 @@ public:
Status initializeConfigDatabaseIfNeeded(OperationContext* txn) override;
+ void discardCachedConfigDatabaseInitializationState() override;
+
Status initializeShardingAwarenessOnUnawareShards(OperationContext* txn) override;
Status upsertShardIdentityOnShard(OperationContext* txn, ShardType shardType) override;
diff --git a/src/mongo/s/cluster_identity_loader.cpp b/src/mongo/s/cluster_identity_loader.cpp
index dc2ae335f94..741a280ab4c 100644
--- a/src/mongo/s/cluster_identity_loader.cpp
+++ b/src/mongo/s/cluster_identity_loader.cpp
@@ -105,4 +105,16 @@ StatusWith<OID> ClusterIdentityLoader::_fetchClusterIdFromConfig(
return loadResult.getValue().getClusterId();
}
+void ClusterIdentityLoader::discardCachedClusterId() {
+ stdx::lock_guard<stdx::mutex> lk(_mutex);
+
+ if (_initializationState == InitializationState::kUninitialized) {
+ return;
+ }
+ invariant(_initializationState == InitializationState::kInitialized);
+ _lastLoadResult = {
+ Status{ErrorCodes::InternalError, "cluster ID never re-loaded after rollback"}};
+ _initializationState = InitializationState::kUninitialized;
+}
+
} // namespace mongo
diff --git a/src/mongo/s/cluster_identity_loader.h b/src/mongo/s/cluster_identity_loader.h
index 25645111ef1..d34f5368850 100644
--- a/src/mongo/s/cluster_identity_loader.h
+++ b/src/mongo/s/cluster_identity_loader.h
@@ -73,6 +73,12 @@ public:
*/
Status loadClusterId(OperationContext* txn, const repl::ReadConcernLevel& readConcernLevel);
+ /**
+ * Called if the config.version document is rolled back. Notifies the ClusterIdentityLoader
+ * that the cached cluster ID is invalid and needs to be reloaded.
+ */
+ void discardCachedClusterId();
+
private:
enum class InitializationState {
kUninitialized, // We have never successfully loaded the cluster ID
diff --git a/src/mongo/s/config_server_test_fixture.cpp b/src/mongo/s/config_server_test_fixture.cpp
index ed0f4810307..70b391e7077 100644
--- a/src/mongo/s/config_server_test_fixture.cpp
+++ b/src/mongo/s/config_server_test_fixture.cpp
@@ -109,6 +109,7 @@ void ConfigServerTestFixture::setUp() {
repl::ReplSettings replSettings;
replSettings.setReplSetString("mySet/node1:12345,node2:54321,node3:12543");
auto replCoord = stdx::make_unique<repl::ReplicationCoordinatorMock>(replSettings);
+ _replCoord = replCoord.get();
repl::ReplicaSetConfig config;
config.initialize(BSON("_id"
@@ -307,6 +308,12 @@ OperationContext* ConfigServerTestFixture::operationContext() const {
return _opCtx.get();
}
+repl::ReplicationCoordinatorMock* ConfigServerTestFixture::getReplicationCoordinator() const {
+ invariant(_replCoord);
+
+ return _replCoord;
+}
+
void ConfigServerTestFixture::onCommand(NetworkTestEnv::OnCommandFunction func) {
_networkTestEnv->onCommand(func);
}
diff --git a/src/mongo/s/config_server_test_fixture.h b/src/mongo/s/config_server_test_fixture.h
index fc929ca5a43..943b6d122b6 100644
--- a/src/mongo/s/config_server_test_fixture.h
+++ b/src/mongo/s/config_server_test_fixture.h
@@ -63,6 +63,10 @@ class NetworkInterfaceMock;
class TaskExecutor;
} // namespace executor
+namespace repl {
+class ReplicationCoordinatorMock;
+}
+
/**
* Sets up the mocked out objects for testing the catalog manager and catalog client with the
* remote interface backed by the NetworkTestEnv and config server as the local storage engine.
@@ -109,6 +113,8 @@ public:
OperationContext* operationContext() const;
+ repl::ReplicationCoordinatorMock* getReplicationCoordinator() const;
+
/**
* Insert a document to this config server to the specified namespace.
*/
@@ -191,6 +197,7 @@ private:
ReplSetDistLockManager* _distLockManager = nullptr;
ShardingCatalogClientImpl* _catalogClient = nullptr;
ShardingCatalogManagerImpl* _catalogManager = nullptr;
+ repl::ReplicationCoordinatorMock* _replCoord = nullptr;
};
} // namespace mongo