From 2aede7ad2fce2616e4140f2ae398e4e570c84703 Mon Sep 17 00:00:00 2001 From: Esha Maharishi Date: Mon, 27 Aug 2018 14:06:28 -0400 Subject: SERVER-36755 CollectionLock acquisition in shard_filtering_metadata_refresh.cpp can cause server to terminate on stepdown (1/2) --- .../db/s/shard_filtering_metadata_refresh.cpp | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'src/mongo/db/s/shard_filtering_metadata_refresh.cpp') diff --git a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp index 8bb5b5a2614..0f284f1f6dd 100644 --- a/src/mongo/db/s/shard_filtering_metadata_refresh.cpp +++ b/src/mongo/db/s/shard_filtering_metadata_refresh.cpp @@ -47,10 +47,12 @@ namespace mongo { -Status onShardVersionMismatch(OperationContext* opCtx, - const NamespaceString& nss, - ChunkVersion shardVersionReceived, - bool forceRefreshFromThisThread) noexcept { +namespace { + +void onShardVersionMismatch(OperationContext* opCtx, + const NamespaceString& nss, + ChunkVersion shardVersionReceived, + bool forceRefreshFromThisThread) { invariant(!opCtx->lockState()->isLocked()); invariant(!opCtx->getClient()->isInDirectClient()); @@ -65,12 +67,8 @@ Status onShardVersionMismatch(OperationContext* opCtx, // Ensure any ongoing migrations have completed before trying to do the refresh. This wait is // just an optimization so that MongoS does not exhaust its maximum number of StaleConfig retry // attempts while the migration is being committed. - try { - auto& oss = OperationShardingState::get(opCtx); - oss.waitForMigrationCriticalSectionSignal(opCtx); - } catch (const DBException& ex) { - return ex.toStatus(); - } + auto& oss = OperationShardingState::get(opCtx); + oss.waitForMigrationCriticalSectionSignal(opCtx); const auto currentShardVersion = [&] { AutoGetCollection autoColl(opCtx, nss, MODE_IS); @@ -86,11 +84,19 @@ Status onShardVersionMismatch(OperationContext* opCtx, currentShardVersion.majorVersion() >= shardVersionReceived.majorVersion()) { // Don't need to remotely reload if we're in the same epoch and the requested version is // smaller than the one we know about. This means that the remote side is behind. - return Status::OK(); } + forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread); +} + +} // namespace + +Status onShardVersionMismatchNoExcept(OperationContext* opCtx, + const NamespaceString& nss, + ChunkVersion shardVersionReceived, + bool forceRefreshFromThisThread) noexcept { try { - forceShardFilteringMetadataRefresh(opCtx, nss, forceRefreshFromThisThread); + onShardVersionMismatch(opCtx, nss, shardVersionReceived, forceRefreshFromThisThread); return Status::OK(); } catch (const DBException& ex) { log() << "Failed to refresh metadata for collection" << nss << causedBy(redact(ex)); -- cgit v1.2.1