diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2022-05-31 17:07:58 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-06-04 21:32:08 +0000 |
commit | bd0d8c22a35afbbc145a850900df0ddf56da597f (patch) | |
tree | 9b1b9a52ebefd6a458a182ec4eabf381446201e6 | |
parent | fb08d4e18b6fc61924514a7785352c735c1cceeb (diff) | |
download | mongo-bd0d8c22a35afbbc145a850900df0ddf56da597f.tar.gz |
SERVER-66866 Make the range deleter not sleep under collection lock
(cherry picked from commit cec61a460e69c47e1bf0e455214f776c62053f53)
-rw-r--r-- | src/mongo/db/s/range_deletion_util.cpp | 89 | ||||
-rw-r--r-- | src/mongo/db/s/range_deletion_util.h | 15 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_runtime_d_params.idl | 8 |
3 files changed, 49 insertions, 63 deletions
diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp index cf2106d0575..22d03b32ce4 100644 --- a/src/mongo/db/s/range_deletion_util.cpp +++ b/src/mongo/db/s/range_deletion_util.cpp @@ -34,9 +34,8 @@ #include "mongo/db/s/range_deletion_util.h" #include <algorithm> -#include <utility> - #include <boost/optional.hpp> +#include <utility> #include "mongo/db/catalog/index_catalog.h" #include "mongo/db/catalog_raii.h" @@ -56,6 +55,7 @@ #include "mongo/db/repl/repl_client_info.h" #include "mongo/db/repl/wait_for_majority_service.h" #include "mongo/db/s/migration_util.h" +#include "mongo/db/s/sharding_runtime_d_params_gen.h" #include "mongo/db/s/sharding_statistics.h" #include "mongo/db/service_context.h" #include "mongo/db/storage/remove_saver.h" @@ -66,11 +66,7 @@ #include "mongo/util/future_util.h" namespace mongo { - namespace { -const WriteConcernOptions kMajorityWriteConcern(WriteConcernOptions::kMajority, - WriteConcernOptions::SyncMode::UNSET, - WriteConcernOptions::kWriteConcernTimeoutSharding); MONGO_FAIL_POINT_DEFINE(hangBeforeDoingDeletion); MONGO_FAIL_POINT_DEFINE(suspendRangeDeletion); @@ -239,7 +235,6 @@ StatusWith<int> deleteNextBatch(OperationContext* opCtx, return numDeleted; } - template <typename Callable> auto withTemporaryOperationContext(Callable&& callable, const NamespaceString& nss) { ThreadClient tc(migrationutil::kRangeDeletionThreadName, getGlobalServiceContext()); @@ -299,8 +294,8 @@ void ensureRangeDeletionTaskStillExists(OperationContext* opCtx, const UUID& mig } /** - * Delete the range in a sequence of batches until there are no more documents to - * delete or deletion returns an error. + * Delete the range in a sequence of batches until there are no more documents to delete or deletion + * returns an error. */ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskExecutor>& executor, const NamespaceString& nss, @@ -325,32 +320,34 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx ensureRangeDeletionTaskStillExists(opCtx, *migrationId); } - AutoGetCollection collection(opCtx, nss, MODE_IX); - - // Ensure the collection exists and has not been dropped or dropped and - // recreated. - uassert( - ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist, - "Collection has been dropped since enqueuing this range " - "deletion task. No need to delete documents.", - !collectionUuidHasChanged( - nss, collection.getCollection(), collectionUuid)); - - auto numDeleted = uassertStatusOK(deleteNextBatch(opCtx, - collection.getCollection(), - keyPattern, - range, - numDocsToRemovePerBatch)); - LOGV2_DEBUG( - 23769, - 1, - "Deleted {numDeleted} documents in pass in namespace {namespace} with " - "UUID {collectionUUID} for range {range}", - "Deleted documents in pass", - "numDeleted"_attr = numDeleted, - "namespace"_attr = nss.ns(), - "collectionUUID"_attr = collectionUuid, - "range"_attr = range.toString()); + int numDeleted; + + { + AutoGetCollection collection(opCtx, nss, MODE_IX); + + // Ensure the collection exists and has not been dropped or dropped and + // recreated + uassert(ErrorCodes:: + RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist, + "Collection has been dropped since enqueuing this range " + "deletion task. No need to delete documents.", + !collectionUuidHasChanged( + nss, collection.getCollection(), collectionUuid)); + + numDeleted = uassertStatusOK(deleteNextBatch(opCtx, + collection.getCollection(), + keyPattern, + range, + numDocsToRemovePerBatch)); + } + + LOGV2_DEBUG(23769, + 1, + "Deleted documents in pass", + "numDeleted"_attr = numDeleted, + "namespace"_attr = nss.ns(), + "collectionUUID"_attr = collectionUuid, + "range"_attr = range.toString()); if (numDeleted > 0) { // (SERVER-62368) The range-deleter executor is mono-threaded, so @@ -359,21 +356,22 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx opCtx->sleepFor(delayBetweenBatches); } - return numDeleted; + return numDeleted == 0; }, nss); }) - .until([=](StatusWith<int> swNumDeleted) { - // Continue iterating until there are no more documents to delete, retrying on - // any error that doesn't indicate that this node is stepping down. - return (swNumDeleted.isOK() && swNumDeleted.getValue() < numDocsToRemovePerBatch) || - swNumDeleted.getStatus() == + .until([=](StatusWith<bool> swAllDocumentsInRangeDeleted) { + // Continue iterating until there are no more documents to delete, retrying on any + // error that doesn't indicate that this node is stepping down. + return (swAllDocumentsInRangeDeleted.isOK() && + swAllDocumentsInRangeDeleted.getValue()) || + swAllDocumentsInRangeDeleted == ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist || - swNumDeleted.getStatus() == + swAllDocumentsInRangeDeleted == ErrorCodes::RangeDeletionAbandonedBecauseTaskDocumentDoesNotExist || - swNumDeleted.getStatus().code() == ErrorCodes::KeyPatternShorterThanBound || - ErrorCodes::isShutdownError(swNumDeleted.getStatus()) || - ErrorCodes::isNotPrimaryError(swNumDeleted.getStatus()); + swAllDocumentsInRangeDeleted == ErrorCodes::KeyPatternShorterThanBound || + ErrorCodes::isShutdownError(swAllDocumentsInRangeDeleted.getStatus()) || + ErrorCodes::isNotPrimaryError(swAllDocumentsInRangeDeleted.getStatus()); }) .on(executor, CancellationToken::uncancelable()) .ignoreValue(); @@ -505,7 +503,6 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx, QUERY(RangeDeletionTask::kNssFieldName << toNss.ns())); } - SharedSemiFuture<void> removeDocumentsInRange( const std::shared_ptr<executor::TaskExecutor>& executor, SemiFuture<void> waitForActiveQueriesToComplete, diff --git a/src/mongo/db/s/range_deletion_util.h b/src/mongo/db/s/range_deletion_util.h index b2ab01ec168..510acbc8e74 100644 --- a/src/mongo/db/s/range_deletion_util.h +++ b/src/mongo/db/s/range_deletion_util.h @@ -28,9 +28,8 @@ */ #pragma once -#include <list> - #include <boost/optional.hpp> +#include <list> #include "mongo/db/namespace_string.h" #include "mongo/db/s/range_deletion_task_gen.h" @@ -39,18 +38,6 @@ namespace mongo { -class BSONObj; - -// The maximum number of documents to delete in a single batch during range deletion. -// secondaryThrottle and rangeDeleterBatchDelayMS apply between each batch. -// Must be positive or 0 (the default), which means to use the value of -// internalQueryExecYieldIterations (or 1 if that's negative or zero). -extern AtomicWord<int> rangeDeleterBatchSize; - -// After completing a batch of document deletions, the time in millis to wait before commencing the -// next batch of deletions. -extern AtomicWord<int> rangeDeleterBatchDelayMS; - /** * Deletes a range of orphaned documents for the given namespace and collection UUID. Returns a * future which will be resolved when the range has finished being deleted. The resulting future diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl index 70f9466365c..793a5b8cab2 100644 --- a/src/mongo/db/s/sharding_runtime_d_params.idl +++ b/src/mongo/db/s/sharding_runtime_d_params.idl @@ -32,8 +32,10 @@ server_parameters: rangeDeleterBatchSize: description: >- The maximum number of documents in each batch to delete during the cleanup stage of chunk - migration (or the cleanupOrphaned command). A value of 0 indicates that the system chooses - the default value (INT_MAX). + migration (or the cleanupOrphaned command). Between each batch, secondaryThrottle and + rangeDeleterBatchDelayMS will apply. + + A value of 0 indicates that the system chooses the default value (INT_MAX). set_at: [startup, runtime] cpp_vartype: AtomicWord<int> cpp_varname: rangeDeleterBatchSize @@ -110,7 +112,7 @@ server_parameters: default: 10 orphanCleanupDelaySecs: - description: 'How long to wait before starting cleanup of an emigrated chunk range.' + description: How long to wait before starting cleanup of an emigrated chunk range. set_at: [startup, runtime] cpp_vartype: AtomicWord<int> cpp_varname: orphanCleanupDelaySecs |