diff options
Diffstat (limited to 'src/mongo/db/s/range_deleter_service.cpp')
-rw-r--r-- | src/mongo/db/s/range_deleter_service.cpp | 173 |
1 files changed, 167 insertions, 6 deletions
diff --git a/src/mongo/db/s/range_deleter_service.cpp b/src/mongo/db/s/range_deleter_service.cpp index a24e14082f9..ee5ff61f608 100644 --- a/src/mongo/db/s/range_deleter_service.cpp +++ b/src/mongo/db/s/range_deleter_service.cpp @@ -28,10 +28,16 @@ */ #include "mongo/db/s/range_deleter_service.h" +#include "mongo/db/catalog_raii.h" #include "mongo/db/dbdirectclient.h" #include "mongo/db/op_observer/op_observer_registry.h" +#include "mongo/db/repl/repl_client_info.h" +#include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/balancer_stats_registry.h" +#include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/range_deleter_service_op_observer.h" +#include "mongo/db/s/range_deletion_util.h" +#include "mongo/db/s/shard_filtering_metadata_refresh.h" #include "mongo/logv2/log.h" #include "mongo/s/sharding_feature_flags_gen.h" #include "mongo/util/future_util.h" @@ -41,8 +47,32 @@ namespace mongo { namespace { const auto rangeDeleterServiceDecorator = ServiceContext::declareDecoration<RangeDeleterService>(); + +const BSONObj getShardKeyPattern(OperationContext* opCtx, + const DatabaseName& dbName, + const UUID& collectionUuid) { + while (true) { + opCtx->checkForInterrupt(); + boost::optional<NamespaceString> optNss; + { + AutoGetCollection collection( + opCtx, NamespaceStringOrUUID{dbName.toString(), collectionUuid}, MODE_IS); + + auto optMetadata = CollectionShardingRuntime::get(opCtx, collection.getNss()) + ->getCurrentMetadataIfKnown(); + if (optMetadata && optMetadata->isSharded()) { + return optMetadata->getShardKeyPattern().toBSON(); + } + optNss = collection.getNss(); + } + + onShardVersionMismatchNoExcept(opCtx, *optNss, boost::none).ignore(); + continue; + } } +} // namespace + const ReplicaSetAwareServiceRegistry::Registerer<RangeDeleterService> rangeDeleterServiceRegistryRegisterer("RangeDeleterService"); @@ -59,6 +89,13 @@ void RangeDeleterService::onStepUpComplete(OperationContext* opCtx, long long te return; } + if (disableResumableRangeDeleter.load()) { + LOGV2_INFO( + 6872508, + "Not resuming range deletions on step-up because `disableResumableRangeDeleter=true`"); + return; + } + auto lock = _acquireMutexUnconditionally(); dassert(_state.load() == kDown, "Service expected to be down before stepping up"); @@ -236,6 +273,8 @@ SharedSemiFuture<void> RangeDeleterService::registerTask( bool fromResubmitOnStepUp) { if (disableResumableRangeDeleter.load()) { + LOGV2_INFO(6872509, + "Not scheduling range deletion because `disableResumableRangeDeleter=true`"); return SemiFuture<void>::makeReady( Status(ErrorCodes::ResumableRangeDeleterDisabled, "Not submitting any range deletion task because the " @@ -278,12 +317,134 @@ SharedSemiFuture<void> RangeDeleterService::registerTask( _executor->now() + delayForActiveQueriesOnSecondariesToComplete) .share(); }) - .then([this, collUuid = rdt.getCollectionUuid(), range = rdt.getRange()]() { - // Step 3: perform the actual range deletion - // TODO - - // Deregister the task - deregisterTask(collUuid, range); + .then([this, + dbName = rdt.getNss().dbName(), + collectionUuid = rdt.getCollectionUuid(), + range = rdt.getRange()]() { + return withTemporaryOperationContext( + [&](OperationContext* opCtx) { + // A task is considered completed when all the following conditions are met: + // - All orphans have been deleted + // - The deletions have been majority committed + // - The range deletion task document has been deleted + bool taskCompleted = false; + + while (!taskCompleted) { + try { + // Perform the actual range deletion + bool orphansRemovalCompleted = false; + while (!orphansRemovalCompleted) { + try { + LOGV2_DEBUG( + 6872501, + 2, + "Beginning deletion of documents in orphan range", + "dbName"_attr = dbName, + "collectionUUID"_attr = collectionUuid.toString(), + "range"_attr = redact(range.toString())); + + auto shardKeyPattern = + getShardKeyPattern(opCtx, dbName, collectionUuid); + + uassertStatusOK(deleteRangeInBatches( + opCtx, dbName, collectionUuid, shardKeyPattern, range)); + orphansRemovalCompleted = true; + } catch (ExceptionFor<ErrorCodes::NamespaceNotFound>&) { + // No orphaned documents to remove from a dropped collection + orphansRemovalCompleted = true; + } catch ( + ExceptionFor< + ErrorCodes:: + RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist>&) { + // No orphaned documents to remove from a dropped collection + orphansRemovalCompleted = true; + } catch ( + ExceptionFor< + ErrorCodes:: + RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist>&) { + // The task can be considered completed because the range + // deletion document doesn't exist + orphansRemovalCompleted = true; + } catch (const DBException& e) { + LOGV2_ERROR(6872502, + "Failed to delete documents in orphan range", + "dbName"_attr = dbName, + "collectionUUID"_attr = + collectionUuid.toString(), + "range"_attr = redact(range.toString()), + "error"_attr = e); + throw; + } + } + + { + repl::ReplClientInfo::forClient(opCtx->getClient()) + .setLastOpToSystemLastOpTime(opCtx); + auto clientOpTime = + repl::ReplClientInfo::forClient(opCtx->getClient()) + .getLastOp(); + + LOGV2_DEBUG( + 6872503, + 2, + "Waiting for majority replication of local deletions", + "dbName"_attr = dbName, + "collectionUUID"_attr = collectionUuid, + "range"_attr = redact(range.toString()), + "clientOpTime"_attr = clientOpTime); + + // Synchronously wait for majority before removing the range + // deletion task document: oplog gets applied in parallel for + // different collections, so it's important not to apply + // out of order the deletions of orphans and the removal of the + // entry persisted in `config.rangeDeletions` + WaitForMajorityService::get(opCtx->getServiceContext()) + .waitUntilMajority(clientOpTime, + CancellationToken::uncancelable()) + .get(opCtx); + } + + // Remove persistent range deletion task + try { + removePersistentRangeDeletionTask(opCtx, collectionUuid, range); + + LOGV2_DEBUG( + 6872504, + 2, + "Completed removal of persistent range deletion task", + "dbName"_attr = dbName, + "collectionUUID"_attr = collectionUuid.toString(), + "range"_attr = redact(range.toString())); + + } catch (const DBException& e) { + LOGV2_ERROR(6872505, + "Failed to remove persistent range deletion task", + "dbName"_attr = dbName, + "collectionUUID"_attr = collectionUuid.toString(), + "range"_attr = redact(range.toString()), + "error"_attr = e); + throw; + } + } catch (const DBException& e) { + // Fail in case of shutdown/stepdown errors as the range + // deletion will be resumed on the next step up + if (ErrorCodes::isShutdownError(e.code()) || + ErrorCodes::isNotPrimaryError(e.code())) { + return e.toStatus(); + } + + // Iterate again in case of any other error + continue; + } + + taskCompleted = true; + } + + return Status::OK(); + }, + dbName, + collectionUuid, + true); }) // IMPORTANT: no continuation should be added to this chain after this point // in order to make sure range deletions order is preserved. |