diff options
author | Vesselina Ratcheva <vesselina.ratcheva@10gen.com> | 2022-03-28 14:10:08 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-03-28 16:20:39 +0000 |
commit | 4f57c205480557f133535c65f743b88414d32280 (patch) | |
tree | dd1b71b81de3434b2c7207813549dd76e1607253 /src/mongo/db/repl | |
parent | e6bae62861fbc97245fadd2efff98e2fc15ab250 (diff) | |
download | mongo-4f57c205480557f133535c65f743b88414d32280.tar.gz |
SERVER-61117 Prevent uncaught errors in ReplicationCoordinatorImpl::startLoadLocalConfig from causing server hangs
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r-- | src/mongo/db/repl/replication_coordinator_impl.cpp | 56 |
1 files changed, 37 insertions, 19 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp index 89fcbd5e291..36ac01c19e6 100644 --- a/src/mongo/db/repl/replication_coordinator_impl.cpp +++ b/src/mongo/db/repl/replication_coordinator_impl.cpp @@ -150,6 +150,8 @@ MONGO_FAIL_POINT_DEFINE(skipBeforeFetchingConfig); MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL); // Simulates returning a specified error in the hello response. MONGO_FAIL_POINT_DEFINE(setCustomErrorInHelloResponseMongoD); +// Throws right before the call into recoverTenantMigrationAccessBlockers. +MONGO_FAIL_POINT_DEFINE(throwBeforeRecoveringTenantMigrationAccessBlockers); // Number of times we tried to go live as a secondary. Counter64 attemptsToBecomeSecondary; @@ -521,6 +523,13 @@ bool ReplicationCoordinatorImpl::_startLoadLocalConfig( _replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp); LOGV2(4280505, "Creating any necessary TenantMigrationAccessBlockers for unfinished migrations"); + + if (MONGO_unlikely(throwBeforeRecoveringTenantMigrationAccessBlockers.shouldFail())) { + uasserted(6111700, + "Failpoint 'throwBeforeRecoveringTenantMigrationAccessBlockers' triggered. " + "Throwing exception."); + } + tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx); LOGV2(4280506, "Reconstructing prepared transactions"); reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering); @@ -914,29 +923,38 @@ void ReplicationCoordinatorImpl::startup(OperationContext* opCtx, _storage->initializeStorageControlsForReplication(opCtx->getServiceContext()); - { - stdx::lock_guard<Latch> lk(_mutex); - fassert(18822, !_inShutdown); - _setConfigState_inlock(kConfigStartingUp); - _topCoord->setStorageEngineSupportsReadCommitted( - _externalState->isReadCommittedSupportedByStorageEngine(opCtx)); - } + // We are expected to be able to transition out of the kConfigStartingUp state by the end + // of this function. Any uncaught exceptions here leave us in an invalid state and we will + // not be able to shut down by normal means, as clean shutdown assumes we can leave that state. + try { + { + stdx::lock_guard<Latch> lk(_mutex); + fassert(18822, !_inShutdown); + _setConfigState_inlock(kConfigStartingUp); + _topCoord->setStorageEngineSupportsReadCommitted( + _externalState->isReadCommittedSupportedByStorageEngine(opCtx)); + } - // Initialize the cached pointer to the oplog collection. - acquireOplogCollectionForLogging(opCtx); + // Initialize the cached pointer to the oplog collection. + acquireOplogCollectionForLogging(opCtx); - _replExecutor->startup(); + _replExecutor->startup(); - LOGV2(6005300, "Starting up replica set aware services"); - ReplicaSetAwareServiceRegistry::get(_service).onStartup(opCtx); + LOGV2(6005300, "Starting up replica set aware services"); + ReplicaSetAwareServiceRegistry::get(_service).onStartup(opCtx); - bool doneLoadingConfig = _startLoadLocalConfig(opCtx, lastShutdownState); - if (doneLoadingConfig) { - // If we're not done loading the config, then the config state will be set by - // _finishLoadLocalConfig. - stdx::lock_guard<Latch> lk(_mutex); - invariant(!_rsConfig.isInitialized()); - _setConfigState_inlock(kConfigUninitialized); + bool doneLoadingConfig = _startLoadLocalConfig(opCtx, lastShutdownState); + if (doneLoadingConfig) { + // If we're not done loading the config, then the config state will be set by + // _finishLoadLocalConfig. + stdx::lock_guard<Latch> lk(_mutex); + invariant(!_rsConfig.isInitialized()); + _setConfigState_inlock(kConfigUninitialized); + } + } catch (DBException& e) { + auto status = e.toStatus(); + LOGV2_FATAL_NOTRACE( + 6111701, "Failed to load local replica set config on startup", "status"_attr = status); } } |