summaryrefslogtreecommitdiff
path: root/src/mongo/db/repl
diff options
context:
space:
mode:
authorVesselina Ratcheva <vesselina.ratcheva@10gen.com>2022-03-28 14:10:08 +0000
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2022-03-28 16:20:39 +0000
commit4f57c205480557f133535c65f743b88414d32280 (patch)
treedd1b71b81de3434b2c7207813549dd76e1607253 /src/mongo/db/repl
parente6bae62861fbc97245fadd2efff98e2fc15ab250 (diff)
downloadmongo-4f57c205480557f133535c65f743b88414d32280.tar.gz
SERVER-61117 Prevent uncaught errors in ReplicationCoordinatorImpl::startLoadLocalConfig from causing server hangs
Diffstat (limited to 'src/mongo/db/repl')
-rw-r--r--src/mongo/db/repl/replication_coordinator_impl.cpp56
1 files changed, 37 insertions, 19 deletions
diff --git a/src/mongo/db/repl/replication_coordinator_impl.cpp b/src/mongo/db/repl/replication_coordinator_impl.cpp
index 89fcbd5e291..36ac01c19e6 100644
--- a/src/mongo/db/repl/replication_coordinator_impl.cpp
+++ b/src/mongo/db/repl/replication_coordinator_impl.cpp
@@ -150,6 +150,8 @@ MONGO_FAIL_POINT_DEFINE(skipBeforeFetchingConfig);
MONGO_FAIL_POINT_DEFINE(stepdownHangAfterGrabbingRSTL);
// Simulates returning a specified error in the hello response.
MONGO_FAIL_POINT_DEFINE(setCustomErrorInHelloResponseMongoD);
+// Throws right before the call into recoverTenantMigrationAccessBlockers.
+MONGO_FAIL_POINT_DEFINE(throwBeforeRecoveringTenantMigrationAccessBlockers);
// Number of times we tried to go live as a secondary.
Counter64 attemptsToBecomeSecondary;
@@ -521,6 +523,13 @@ bool ReplicationCoordinatorImpl::_startLoadLocalConfig(
_replicationProcess->getReplicationRecovery()->recoverFromOplog(opCtx, stableTimestamp);
LOGV2(4280505,
"Creating any necessary TenantMigrationAccessBlockers for unfinished migrations");
+
+ if (MONGO_unlikely(throwBeforeRecoveringTenantMigrationAccessBlockers.shouldFail())) {
+ uasserted(6111700,
+ "Failpoint 'throwBeforeRecoveringTenantMigrationAccessBlockers' triggered. "
+ "Throwing exception.");
+ }
+
tenant_migration_access_blocker::recoverTenantMigrationAccessBlockers(opCtx);
LOGV2(4280506, "Reconstructing prepared transactions");
reconstructPreparedTransactions(opCtx, OplogApplication::Mode::kRecovering);
@@ -914,29 +923,38 @@ void ReplicationCoordinatorImpl::startup(OperationContext* opCtx,
_storage->initializeStorageControlsForReplication(opCtx->getServiceContext());
- {
- stdx::lock_guard<Latch> lk(_mutex);
- fassert(18822, !_inShutdown);
- _setConfigState_inlock(kConfigStartingUp);
- _topCoord->setStorageEngineSupportsReadCommitted(
- _externalState->isReadCommittedSupportedByStorageEngine(opCtx));
- }
+ // We are expected to be able to transition out of the kConfigStartingUp state by the end
+ // of this function. Any uncaught exceptions here leave us in an invalid state and we will
+ // not be able to shut down by normal means, as clean shutdown assumes we can leave that state.
+ try {
+ {
+ stdx::lock_guard<Latch> lk(_mutex);
+ fassert(18822, !_inShutdown);
+ _setConfigState_inlock(kConfigStartingUp);
+ _topCoord->setStorageEngineSupportsReadCommitted(
+ _externalState->isReadCommittedSupportedByStorageEngine(opCtx));
+ }
- // Initialize the cached pointer to the oplog collection.
- acquireOplogCollectionForLogging(opCtx);
+ // Initialize the cached pointer to the oplog collection.
+ acquireOplogCollectionForLogging(opCtx);
- _replExecutor->startup();
+ _replExecutor->startup();
- LOGV2(6005300, "Starting up replica set aware services");
- ReplicaSetAwareServiceRegistry::get(_service).onStartup(opCtx);
+ LOGV2(6005300, "Starting up replica set aware services");
+ ReplicaSetAwareServiceRegistry::get(_service).onStartup(opCtx);
- bool doneLoadingConfig = _startLoadLocalConfig(opCtx, lastShutdownState);
- if (doneLoadingConfig) {
- // If we're not done loading the config, then the config state will be set by
- // _finishLoadLocalConfig.
- stdx::lock_guard<Latch> lk(_mutex);
- invariant(!_rsConfig.isInitialized());
- _setConfigState_inlock(kConfigUninitialized);
+ bool doneLoadingConfig = _startLoadLocalConfig(opCtx, lastShutdownState);
+ if (doneLoadingConfig) {
+ // If we're not done loading the config, then the config state will be set by
+ // _finishLoadLocalConfig.
+ stdx::lock_guard<Latch> lk(_mutex);
+ invariant(!_rsConfig.isInitialized());
+ _setConfigState_inlock(kConfigUninitialized);
+ }
+ } catch (DBException& e) {
+ auto status = e.toStatus();
+ LOGV2_FATAL_NOTRACE(
+ 6111701, "Failed to load local replica set config on startup", "status"_attr = status);
}
}