diff options
author | Matthew Saltz <matthew.saltz@mongodb.com> | 2020-05-13 15:34:41 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-14 00:10:54 +0000 |
commit | abf419c46ca4df9b6f3948e450dfa7d8f8ac0a87 (patch) | |
tree | 3d0581a358da2cea41aaeb8448a9d75d6885b8cc | |
parent | c97f0868afcd8d8ce72a14f09283c827b33d44e9 (diff) | |
download | mongo-abf419c46ca4df9b6f3948e450dfa7d8f8ac0a87.tar.gz |
SERVER-48190 Acquire CSRLock in shared mode before submitting ranges for deletion
(cherry picked from commit 7404de00911b537a78074370b3643d774aaa9dd3)
-rw-r--r-- | src/mongo/db/s/cleanup_orphaned_cmd.cpp | 2 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 4 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 88 |
3 files changed, 56 insertions, 38 deletions
diff --git a/src/mongo/db/s/cleanup_orphaned_cmd.cpp b/src/mongo/db/s/cleanup_orphaned_cmd.cpp index 0b375a5f5d2..2ce25caec4b 100644 --- a/src/mongo/db/s/cleanup_orphaned_cmd.cpp +++ b/src/mongo/db/s/cleanup_orphaned_cmd.cpp @@ -170,6 +170,8 @@ CleanupResult cleanupOrphanedData(OperationContext* opCtx, { AutoGetCollection autoColl(opCtx, ns, MODE_IX); auto* const css = CollectionShardingRuntime::get(opCtx, ns); + // Keep the collection metadata from changing for the rest of this scope. + auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, css); const auto collDesc = css->getCollectionDescription(); if (!collDesc.isSharded()) { LOGV2(21911, diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index e9803768990..7ec6e5eaecf 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -1374,6 +1374,8 @@ SharedSemiFuture<void> MigrationDestinationManager::_notePending(OperationContex AutoGetCollection autoColl(opCtx, _nss, MODE_X); auto* const css = CollectionShardingRuntime::get(opCtx, _nss); + // Keep the collection metadata from changing for the rest of this scope. + auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, css); const auto optMetadata = css->getCurrentMetadataIfKnown(); // This can currently happen because drops and shard key refine operations aren't guaranteed to @@ -1405,6 +1407,8 @@ void MigrationDestinationManager::_forgetPending(OperationContext* opCtx, ChunkR UninterruptibleLockGuard noInterrupt(opCtx->lockState()); AutoGetCollection autoColl(opCtx, _nss, MODE_IX); auto* const css = CollectionShardingRuntime::get(opCtx, _nss); + // Keep the collection metadata from changing for the rest of this scope. + auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, css); const auto optMetadata = css->getCurrentMetadataIfKnown(); // This can currently happen because drops aren't synchronized with in-migrations. The idea for diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 8cd12d652e3..bd341b4d83c 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -239,6 +239,14 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, const auto serviceContext = opCtx->getServiceContext(); return ExecutorFuture<void>(getMigrationUtilExecutor()) .then([=] { + auto deletionTaskUuidMatchesFilteringMetadataUuid = + [](CollectionShardingRuntime* csr, const RangeDeletionTask& deletionTask) { + return csr->getCurrentMetadataIfKnown() && + csr->getCollectionDescription().isSharded() && + csr->getCollectionDescription().uuidMatches( + deletionTask.getCollectionUuid()); + }; + ThreadClient tc(kRangeDeletionThreadName, serviceContext); { stdx::lock_guard<Client> lk(*tc.get()); @@ -247,45 +255,49 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); - boost::optional<AutoGetCollection> autoColl; - autoColl.emplace(opCtx, deletionTask.getNss(), MODE_IS); - - auto css = CollectionShardingRuntime::get(opCtx, deletionTask.getNss()); - if (!css->getCurrentMetadataIfKnown() || !css->getCollectionDescription().isSharded() || - !css->getCollectionDescription().uuidMatches(deletionTask.getCollectionUuid())) { - // If the collection's filtering metadata is not known, is unsharded, or its UUID - // does not match the UUID of the deletion task, force a filtering metadata refresh - // once, because this node may have just stepped up and therefore may have a stale - // cache. - LOGV2(22024, - "Filtering metadata for this range deletion task; forcing refresh", - "deletionTask"_attr = redact(deletionTask.toBSON()), - "error"_attr = (css->getCurrentMetadataIfKnown() - ? (css->getCollectionDescription().isSharded() - ? "Collection has UUID that does not match UUID " - "of the deletion task" - : "Collection is unsharded") - : "Collection's sharding state is not known"), - "namespace"_attr = deletionTask.getNss(), - "migrationId"_attr = deletionTask.getId()); - - // TODO (SERVER-46075): Add an asynchronous version of - // forceShardFilteringMetadataRefresh to avoid blocking on the network in the - // thread pool. - autoColl.reset(); - refreshFilteringMetadataUntilSuccess(opCtx, deletionTask.getNss()); + // Make sure the collection metadata is up-to-date. + { + boost::optional<AutoGetCollection> autoColl; + autoColl.emplace(opCtx, deletionTask.getNss(), MODE_IS); + auto csr = CollectionShardingRuntime::get(opCtx, deletionTask.getNss()); + if (!deletionTaskUuidMatchesFilteringMetadataUuid(csr, deletionTask)) { + // If the collection's filtering metadata is not known, is unsharded, or its + // UUID does not match the UUID of the deletion task, force a filtering metadata + // refresh once, because this node may have just stepped up and therefore may + // have a stale cache. + LOGV2(22024, + "Filtering metadata for this range deletion task may be outdated; " + "forcing refresh", + "deletionTask"_attr = redact(deletionTask.toBSON()), + "error"_attr = + (csr->getCurrentMetadataIfKnown() + ? (csr->getCollectionDescription().isSharded() + ? "Collection has UUID that does not match UUID " + "of the deletion task" + : "Collection is unsharded") + : "Collection's sharding state is not known"), + "namespace"_attr = deletionTask.getNss(), + "migrationId"_attr = deletionTask.getId()); + + autoColl.reset(); + refreshFilteringMetadataUntilSuccess(opCtx, deletionTask.getNss()); + } } - autoColl.emplace(opCtx, deletionTask.getNss(), MODE_IS); - uassert( - ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist, - str::stream() << "Even after forced refresh, filtering metadata for namespace in " - "deletion task " - << (css->getCollectionDescription().isSharded() - ? " has UUID that does not match UUID of the deletion task" - : " is unsharded"), - css->getCollectionDescription().isSharded() && - css->getCollectionDescription().uuidMatches(deletionTask.getCollectionUuid())); + AutoGetCollection autoColl(opCtx, deletionTask.getNss(), MODE_IS); + auto csr = CollectionShardingRuntime::get(opCtx, deletionTask.getNss()); + // Keep the collection metadata from changing for the rest of this scope. + auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, csr); + uassert(ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist, + str::stream() + << "Even after forced refresh, filtering metadata for namespace in " + "deletion task " + << (csr->getCurrentMetadataIfKnown() + ? (csr->getCollectionDescription().isSharded() + ? " has UUID that does not match UUID of the deletion task" + : " is unsharded") + : " is not known"), + deletionTaskUuidMatchesFilteringMetadataUuid(csr, deletionTask)); LOGV2(22026, "Submitting range deletion task", @@ -296,7 +308,7 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, ? CollectionShardingRuntime::kNow : CollectionShardingRuntime::kDelayed; - return css->cleanUpRange(deletionTask.getRange(), deletionTask.getId(), whenToClean); + return csr->cleanUpRange(deletionTask.getRange(), deletionTask.getId(), whenToClean); }) .onError([=](const Status status) { ThreadClient tc(kRangeDeletionThreadName, serviceContext); |