summaryrefslogtreecommitdiff
path: root/src/mongo/db
diff options
context:
space:
mode:
authorKaloian Manassiev <kaloian.manassiev@mongodb.com>2018-09-24 14:31:22 -0400
committerKaloian Manassiev <kaloian.manassiev@mongodb.com>2018-09-26 02:47:40 -0400
commita0ebd4bb3a30fdf574fd08ab473e7d6ce1b59619 (patch)
treeba437e69a942aa896bf856c02bb7c51eac80e407 /src/mongo/db
parentdbc88bbcf51c3adc2f2a0ad3e4c22b98a2dc9388 (diff)
downloadmongo-a0ebd4bb3a30fdf574fd08ab473e7d6ce1b59619.tar.gz
SERVER-30714 Handle 'not master' errors in ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
Diffstat (limited to 'src/mongo/db')
-rw-r--r--src/mongo/db/db.cpp4
-rw-r--r--src/mongo/db/repl/replication_coordinator_external_state_impl.cpp43
-rw-r--r--src/mongo/db/s/sharding_state_recovery.cpp10
3 files changed, 27 insertions, 30 deletions
diff --git a/src/mongo/db/db.cpp b/src/mongo/db/db.cpp
index f92ee6409e9..0b9c8105759 100644
--- a/src/mongo/db/db.cpp
+++ b/src/mongo/db/db.cpp
@@ -549,7 +549,9 @@ ExitCode _initAndListen(int listenPort) {
if (serverGlobalParams.clusterRole == ClusterRole::ShardServer) {
// Note: For replica sets, ShardingStateRecovery happens on transition to primary.
if (!repl::ReplicationCoordinator::get(startupOpCtx.get())->isReplEnabled()) {
- uassertStatusOK(ShardingStateRecovery::recover(startupOpCtx.get()));
+ if (ShardingState::get(startupOpCtx.get())->enabled()) {
+ uassertStatusOK(ShardingStateRecovery::recover(startupOpCtx.get()));
+ }
}
} else if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
initializeGlobalShardingStateForMongoD(startupOpCtx.get(),
diff --git a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
index 3ff7d24f4b9..4e7d3dd21f7 100644
--- a/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_external_state_impl.cpp
@@ -686,7 +686,6 @@ void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() {
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
Balancer::get(_service)->interruptBalancer();
} else if (ShardingState::get(_service)->enabled()) {
- invariant(serverGlobalParams.clusterRole == ClusterRole::ShardServer);
ChunkSplitter::get(_service).onStepDown();
CatalogCacheLoader::get(_service).onStepDown();
PeriodicBalancerConfigRefresher::get(_service).onStepDown();
@@ -706,24 +705,16 @@ void ReplicationCoordinatorExternalStateImpl::shardingOnStepDownHook() {
void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook(
OperationContext* opCtx) {
- auto status = ShardingStateRecovery::recover(opCtx);
-
- if (ErrorCodes::isShutdownError(status.code())) {
- // Note: callers of this method don't expect exceptions, so throw only unexpected fatal
- // errors.
- return;
- }
-
- fassert(40107, status);
-
if (serverGlobalParams.clusterRole == ClusterRole::ConfigServer) {
- status = ShardingCatalogManager::get(opCtx)->initializeConfigDatabaseIfNeeded(opCtx);
+ Status status = ShardingCatalogManager::get(opCtx)->initializeConfigDatabaseIfNeeded(opCtx);
if (!status.isOK() && status != ErrorCodes::AlreadyInitialized) {
- if (ErrorCodes::isShutdownError(status.code())) {
- // Don't fassert if we're mid-shutdown, let the shutdown happen gracefully.
+ // If the node is shutting down or it lost quorum just as it was becoming primary, don't
+ // run the sharding onStepUp machinery. The onStepDown counterpart to these methods is
+ // already idempotent, so the machinery will remain in the stepped down state.
+ if (ErrorCodes::isShutdownError(status.code()) ||
+ ErrorCodes::isNotMasterError(status.code())) {
return;
}
-
fassertFailedWithStatus(
40184,
status.withContext("Failed to initialize config database on config server's "
@@ -735,17 +726,16 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
// readConcern in drain mode because the global lock prevents replication. This is
// safe, since if the clusterId write is rolled back, any writes that depend on it will
// also be rolled back.
- // Since we *just* wrote the cluster ID to the config.version document (via
- // ShardingCatalogManager::initializeConfigDatabaseIfNeeded), this should always
- // succeed.
+ //
+ // Since we *just* wrote the cluster ID to the config.version document (via the call to
+ // ShardingCatalogManager::initializeConfigDatabaseIfNeeded above), this read can only
+ // meaningfully fail if the node is shutting down.
status = ClusterIdentityLoader::get(opCtx)->loadClusterId(
opCtx, repl::ReadConcernLevel::kLocalReadConcern);
if (ErrorCodes::isShutdownError(status.code())) {
- // Don't fassert if we're mid-shutdown, let the shutdown happen gracefully.
return;
}
-
fassert(40217, status);
}
@@ -760,11 +750,20 @@ void ReplicationCoordinatorExternalStateImpl::_shardingOnTransitionToPrimaryHook
validator->enableKeyGenerator(opCtx, true);
}
} else if (ShardingState::get(opCtx)->enabled()) {
- invariant(serverGlobalParams.clusterRole == ClusterRole::ShardServer);
+ Status status = ShardingStateRecovery::recover(opCtx);
+
+ // If the node is shutting down or it lost quorum just as it was becoming primary, don't run
+ // the sharding onStepUp machinery. The onStepDown counterpart to these methods is already
+ // idempotent, so the machinery will remain in the stepped down state.
+ if (ErrorCodes::isShutdownError(status.code()) ||
+ ErrorCodes::isNotMasterError(status.code())) {
+ return;
+ }
+ fassert(40107, status);
const auto configsvrConnStr =
Grid::get(opCtx)->shardRegistry()->getConfigShard()->getConnString();
- auto status = ShardingInitializationMongoD::get(opCtx)->updateShardIdentityConfigString(
+ status = ShardingInitializationMongoD::get(opCtx)->updateShardIdentityConfigString(
opCtx, configsvrConnStr);
if (!status.isOK()) {
warning() << "error encountered while trying to update config connection string to "
diff --git a/src/mongo/db/s/sharding_state_recovery.cpp b/src/mongo/db/s/sharding_state_recovery.cpp
index 0eecfbfe135..8914dc56254 100644
--- a/src/mongo/db/s/sharding_state_recovery.cpp
+++ b/src/mongo/db/s/sharding_state_recovery.cpp
@@ -218,9 +218,9 @@ void ShardingStateRecovery::endMetadataOp(OperationContext* opCtx) {
}
Status ShardingStateRecovery::recover(OperationContext* opCtx) {
- if (serverGlobalParams.clusterRole != ClusterRole::ShardServer) {
- return Status::OK();
- }
+ Grid* const grid = Grid::get(opCtx);
+ ShardingState* const shardingState = ShardingState::get(opCtx);
+ invariant(shardingState->enabled());
BSONObj recoveryDocBSON;
@@ -242,10 +242,6 @@ Status ShardingStateRecovery::recover(OperationContext* opCtx) {
log() << "Sharding state recovery process found document " << redact(recoveryDoc.toBSON());
- Grid* const grid = Grid::get(opCtx);
- ShardingState* const shardingState = ShardingState::get(opCtx);
- invariant(shardingState->enabled());
-
if (!recoveryDoc.getMinOpTimeUpdaters()) {
// Treat the minOpTime as up-to-date
grid->advanceConfigOpTime(recoveryDoc.getMinOpTime());