From 1937d9457f2f04ca865e983fdce5db6d014f1f73 Mon Sep 17 00:00:00 2001 From: Kaloian Manassiev Date: Fri, 10 Jun 2022 12:06:49 +0000 Subject: SERVER-66866 Make the range deleter not sleep under collection lock (cherry picked from commit cec61a460e69c47e1bf0e455214f776c62053f53) --- src/mongo/db/s/range_deletion_util.cpp | 127 +++++++++++++-------------- src/mongo/db/s/range_deletion_util.h | 20 +---- src/mongo/db/s/sharding_runtime_d_params.idl | 8 +- 3 files changed, 68 insertions(+), 87 deletions(-) diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp index 82b18854782..700412d18b3 100644 --- a/src/mongo/db/s/range_deletion_util.cpp +++ b/src/mongo/db/s/range_deletion_util.cpp @@ -29,14 +29,11 @@ #define MONGO_LOGV2_DEFAULT_COMPONENT ::mongo::logv2::LogComponent::kShardingRangeDeleter -#include "mongo/platform/basic.h" - #include "mongo/db/s/range_deletion_util.h" #include -#include - #include +#include #include "mongo/db/catalog_raii.h" #include "mongo/db/client.h" @@ -56,6 +53,7 @@ #include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/migration_util.h" #include "mongo/db/s/shard_key_index_util.h" +#include "mongo/db/s/sharding_runtime_d_params_gen.h" #include "mongo/db/s/sharding_statistics.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/remove_saver.h" @@ -67,11 +65,7 @@ #include "mongo/util/future_util.h" namespace mongo { - namespace { -const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority, - WriteConcernOptions::SyncMode::UNSET, - WriteConcernOptions::kWriteConcernTimeoutSharding); MONGO_FAIL_POINT_DEFINE(hangBeforeDoingDeletion); MONGO_FAIL_POINT_DEFINE(hangAfterDoingDeletion); @@ -230,7 +224,6 @@ StatusWith deleteNextBatch(OperationContext* opCtx, return numDeleted; } - template auto withTemporaryOperationContext(Callable&& callable, const NamespaceString& nss) { ThreadClient tc(migrationutil::kRangeDeletionThreadName, getGlobalServiceContext()); @@ -284,9 +277,18 @@ void ensureRangeDeletionTaskStillExists(OperationContext* opCtx, const UUID& mig // holding any locks. } +void markRangeDeletionTaskAsProcessing(OperationContext* opCtx, const UUID& migrationId) { + PersistentTaskStore store(NamespaceString::kRangeDeletionNamespace); + auto query = BSON(RangeDeletionTask::kIdFieldName << migrationId); + static const auto update = + BSON("$set" << BSON(RangeDeletionTask::kProcessingFieldName << true)); + + store.update(opCtx, query, update, WriteConcerns::kLocalWriteConcern); +} + /** - * Delete the range in a sequence of batches until there are no more documents to - * delete or deletion returns an error. + * Delete the range in a sequence of batches until there are no more documents to delete or deletion + * returns an error. */ ExecutorFuture deleteRangeInBatches(const std::shared_ptr& executor, const NamespaceString& nss, @@ -309,45 +311,48 @@ ExecutorFuture deleteRangeInBatches(const std::shared_ptr 0) { // (SERVER-62368) The range-deleter executor is mono-threaded, so @@ -356,21 +361,22 @@ ExecutorFuture deleteRangeInBatches(const std::shared_ptrsleepFor(delayBetweenBatches); } - return numDeleted; + return numDeleted == 0; }, nss); }) - .until([=](StatusWith swNumDeleted) { - // Continue iterating until there are no more documents to delete, retrying on - // any error that doesn't indicate that this node is stepping down. - return (swNumDeleted.isOK() && swNumDeleted.getValue() < numDocsToRemovePerBatch) || - swNumDeleted.getStatus() == + .until([=](StatusWith swAllDocumentsInRangeDeleted) { + // Continue iterating until there are no more documents to delete, retrying on any error + // that doesn't indicate that this node is stepping down. + return (swAllDocumentsInRangeDeleted.isOK() && + swAllDocumentsInRangeDeleted.getValue()) || + swAllDocumentsInRangeDeleted == ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist || - swNumDeleted.getStatus() == + swAllDocumentsInRangeDeleted == ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist || - swNumDeleted.getStatus().code() == ErrorCodes::KeyPatternShorterThanBound || - ErrorCodes::isShutdownError(swNumDeleted.getStatus()) || - ErrorCodes::isNotPrimaryError(swNumDeleted.getStatus()); + swAllDocumentsInRangeDeleted == ErrorCodes::KeyPatternShorterThanBound || + ErrorCodes::isShutdownError(swAllDocumentsInRangeDeleted.getStatus()) || + ErrorCodes::isNotPrimaryError(swAllDocumentsInRangeDeleted.getStatus()); }) .on(executor, CancellationToken::uncancelable()) .ignoreValue(); @@ -502,15 +508,6 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx, BSON(RangeDeletionTask::kNssFieldName << toNss.ns())); } -void markAsProcessingRangeDeletionTask(OperationContext* opCtx, const UUID& migrationId) { - PersistentTaskStore store(NamespaceString::kRangeDeletionNamespace); - auto query = BSON(RangeDeletionTask::kIdFieldName << migrationId); - static const auto update = - BSON("$set" << BSON(RangeDeletionTask::kProcessingFieldName << true)); - - store.update(opCtx, query, update, WriteConcerns::kLocalWriteConcern); -} - SharedSemiFuture removeDocumentsInRange( const std::shared_ptr& executor, SemiFuture waitForActiveQueriesToComplete, diff --git a/src/mongo/db/s/range_deletion_util.h b/src/mongo/db/s/range_deletion_util.h index 9d30bcc577c..1c2fdffef1d 100644 --- a/src/mongo/db/s/range_deletion_util.h +++ b/src/mongo/db/s/range_deletion_util.h @@ -28,9 +28,8 @@ */ #pragma once -#include - #include +#include #include "mongo/db/concurrency/d_concurrency.h" #include "mongo/db/namespace_string.h" @@ -40,18 +39,6 @@ namespace mongo { -class BSONObj; - -// The maximum number of documents to delete in a single batch during range deletion. -// secondaryThrottle and rangeDeleterBatchDelayMS apply between each batch. -// Must be positive or 0 (the default), which means to use the value of -// internalQueryExecYieldIterations (or 1 if that's negative or zero). -extern AtomicWord rangeDeleterBatchSize; - -// After completing a batch of document deletions, the time in millis to wait before commencing the -// next batch of deletions. -extern AtomicWord rangeDeleterBatchDelayMS; - /** * Acquires the config db lock in IX mode and the collection lock for config.rangeDeletions in X * mode. @@ -114,11 +101,6 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx, const NamespaceString& fromNss, const NamespaceString& toNss); -/** - * Sets the processing field on the range deletion document for the given migration id. - */ -void markAsProcessingRangeDeletionTask(OperationContext* opCtx, const UUID& migrationId); - /** * Computes and sets the numOrphanDocs field for each document in `config.rangeDeletions` (skips * documents referring to older incarnations of a collection) diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl index 23d298f0d6b..aeb0c8b676a 100644 --- a/src/mongo/db/s/sharding_runtime_d_params.idl +++ b/src/mongo/db/s/sharding_runtime_d_params.idl @@ -32,8 +32,10 @@ server_parameters: rangeDeleterBatchSize: description: >- The maximum number of documents in each batch to delete during the cleanup stage of chunk - migration (or the cleanupOrphaned command). A value of 0 indicates that the system chooses - the default value (INT_MAX). + migration (or the cleanupOrphaned command). Between each batch, secondaryThrottle and + rangeDeleterBatchDelayMS will apply. + + A value of 0 indicates that the system chooses the default value (INT_MAX). set_at: [startup, runtime] cpp_vartype: AtomicWord cpp_varname: rangeDeleterBatchSize @@ -110,7 +112,7 @@ server_parameters: default: 10 orphanCleanupDelaySecs: - description: 'How long to wait before starting cleanup of an emigrated chunk range.' + description: How long to wait before starting cleanup of an emigrated chunk range. set_at: [startup, runtime] cpp_vartype: AtomicWord cpp_varname: orphanCleanupDelaySecs -- cgit v1.2.1