diff options
author | Esha Maharishi <esha.maharishi@mongodb.com> | 2020-05-12 17:26:45 -0400 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2020-05-18 04:14:45 +0000 |
commit | d0e6f87351312a299a17f6d63e3f2f4db834ae1f (patch) | |
tree | cf758ddf2c85d414d9eab4aa1b318791934aae30 /src/mongo/db/s | |
parent | 7e2111ef33fc40959a254bd3109466176ae60718 (diff) | |
download | mongo-d0e6f87351312a299a17f6d63e3f2f4db834ae1f.tar.gz |
SERVER-47992 Make disableResumableRangeDeleter just prevent ranges from being submitted for deletio
Diffstat (limited to 'src/mongo/db/s')
-rw-r--r-- | src/mongo/db/s/cleanup_orphaned_cmd.cpp | 276 | ||||
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source.h | 3 | ||||
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp | 6 | ||||
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source_legacy.h | 3 | ||||
-rw-r--r-- | src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 168 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.h | 6 | ||||
-rw-r--r-- | src/mongo/db/s/migration_source_manager.cpp | 126 | ||||
-rw-r--r-- | src/mongo/db/s/migration_source_manager.h | 4 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/s/sharding_runtime_d_params.idl | 5 | ||||
-rw-r--r-- | src/mongo/db/s/start_chunk_clone_request.cpp | 21 | ||||
-rw-r--r-- | src/mongo/db/s/start_chunk_clone_request.h | 9 | ||||
-rw-r--r-- | src/mongo/db/s/start_chunk_clone_request_test.cpp | 3 |
14 files changed, 192 insertions, 454 deletions
diff --git a/src/mongo/db/s/cleanup_orphaned_cmd.cpp b/src/mongo/db/s/cleanup_orphaned_cmd.cpp index 3725ab951d6..97b78e600ba 100644 --- a/src/mongo/db/s/cleanup_orphaned_cmd.cpp +++ b/src/mongo/db/s/cleanup_orphaned_cmd.cpp @@ -59,14 +59,8 @@ namespace { enum class CleanupResult { kDone, kContinue, kError }; /** - * If the resumable range deleter is disabled: - * Cleans up one range of orphaned data starting from a range that overlaps or starts at - * 'startingFromKey'. If empty, startingFromKey is the minimum key of the sharded range. - * - * If the resumable range deleter is enabled: * Waits for all possibly orphaned ranges on 'nss' to be cleaned up. * - * @return CleanupResult::kContinue and 'stoppedAtKey' if orphaned range was found and cleaned * @return CleanupResult::kDone if no orphaned ranges remain * @return CleanupResult::kError and 'errMsg' if an error occurred * @@ -75,205 +69,86 @@ enum class CleanupResult { kDone, kContinue, kError }; CleanupResult cleanupOrphanedData(OperationContext* opCtx, const NamespaceString& ns, const BSONObj& startingFromKeyConst, - BSONObj* stoppedAtKey, std::string* errMsg) { - // Note that 'disableResumableRangeDeleter' is a startup-only parameter, so it cannot change - // while this process is running. - if (!disableResumableRangeDeleter.load()) { - boost::optional<ChunkRange> range; - boost::optional<UUID> collectionUuid; - { - AutoGetCollection autoColl(opCtx, ns, MODE_IX); - if (!autoColl.getCollection()) { - LOGV2(4416000, - "cleanupOrphaned skipping waiting for orphaned data cleanup because " - "{namespace} does not exist", - "cleanupOrphaned skipping waiting for orphaned data cleanup because " - "collection does not exist", - "namespace"_attr = ns.ns()); - return CleanupResult::kDone; - } - collectionUuid.emplace(autoColl.getCollection()->uuid()); - - auto* const css = CollectionShardingRuntime::get(opCtx, ns); - const auto collDesc = css->getCollectionDescription(); - if (!collDesc.isSharded()) { - LOGV2(4416001, - "cleanupOrphaned skipping waiting for orphaned data cleanup because " - "{namespace} is not sharded", - "cleanupOrphaned skipping waiting for orphaned data cleanup because " - "collection is not sharded", - "namespace"_attr = ns.ns()); - return CleanupResult::kDone; - } - range.emplace(collDesc.getMinKey(), collDesc.getMaxKey()); - - // Though the 'startingFromKey' parameter is not used as the min key of the range to - // wait for, we still validate that 'startingFromKey' in the same way as the original - // cleanupOrphaned logic did if 'startingFromKey' is present. - BSONObj keyPattern = collDesc.getKeyPattern(); - if (!startingFromKeyConst.isEmpty() && !collDesc.isValidKey(startingFromKeyConst)) { - LOGV2_ERROR_OPTIONS( - 4416002, - {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)}, - "Could not cleanup orphaned data because start key does not match shard key " - "pattern", - "startKey"_attr = redact(startingFromKeyConst), - "shardKeyPattern"_attr = keyPattern); - } + boost::optional<ChunkRange> range; + boost::optional<UUID> collectionUuid; + { + AutoGetCollection autoColl(opCtx, ns, MODE_IX); + if (!autoColl.getCollection()) { + LOGV2(4416000, + "cleanupOrphaned skipping waiting for orphaned data cleanup because " + "{namespace} does not exist", + "cleanupOrphaned skipping waiting for orphaned data cleanup because " + "collection does not exist", + "namespace"_attr = ns.ns()); + return CleanupResult::kDone; } - - // We actually want to wait until there are no range deletion tasks for this namespace/UUID, - // but we don't have a good way to wait for that event, so instead we wait for there to be - // no tasks being processed in memory for this namespace/UUID. - // However, it's possible this node has recently stepped up, and the stepup recovery task to - // resubmit range deletion tasks for processing has not yet completed. In that case, - // waitForClean will return though there are still tasks in config.rangeDeletions, so we - // sleep for a short time and then try waitForClean again. - while (auto numRemainingDeletionTasks = - migrationutil::checkForConflictingDeletions(opCtx, *range, *collectionUuid)) { - LOGV2(4416003, - "cleanupOrphaned going to wait for range deletion tasks to complete", - "namespace"_attr = ns.ns(), - "collectionUUID"_attr = *collectionUuid, - "numRemainingDeletionTasks"_attr = numRemainingDeletionTasks); - - auto status = - CollectionShardingRuntime::waitForClean(opCtx, ns, *collectionUuid, *range); - - if (!status.isOK()) { - *errMsg = status.reason(); - return CleanupResult::kError; - } - - opCtx->sleepFor(Milliseconds(1000)); + collectionUuid.emplace(autoColl.getCollection()->uuid()); + + auto* const css = CollectionShardingRuntime::get(opCtx, ns); + const auto collDesc = css->getCollectionDescription(); + if (!collDesc.isSharded()) { + LOGV2(4416001, + "cleanupOrphaned skipping waiting for orphaned data cleanup because " + "{namespace} is not sharded", + "cleanupOrphaned skipping waiting for orphaned data cleanup because " + "collection is not sharded", + "namespace"_attr = ns.ns()); + return CleanupResult::kDone; } - - return CleanupResult::kDone; - } else { - - BSONObj startingFromKey = startingFromKeyConst; - boost::optional<ChunkRange> targetRange; - SharedSemiFuture<void> cleanupCompleteFuture; - - { - AutoGetCollection autoColl(opCtx, ns, MODE_IX); - auto* const css = CollectionShardingRuntime::get(opCtx, ns); - // Keep the collection metadata from changing for the rest of this scope. - auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, css); - const auto collDesc = css->getCollectionDescription(); - if (!collDesc.isSharded()) { - LOGV2(21911, - "cleanupOrphaned skipping orphaned data cleanup because collection is not " - "sharded", - "namespace"_attr = ns.ns()); - return CleanupResult::kDone; - } - - BSONObj keyPattern = collDesc.getKeyPattern(); - if (!startingFromKey.isEmpty()) { - if (!collDesc.isValidKey(startingFromKey)) { - LOGV2_ERROR_OPTIONS( - 21912, - {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)}, - "Could not cleanup orphaned data, start key {startKey} does not match " - "shard key pattern {shardKeyPattern}", - "Could not cleanup orphaned data because start key does not match shard " - "key pattern", - "startKey"_attr = redact(startingFromKey), - "shardKeyPattern"_attr = keyPattern); - } - } else { - startingFromKey = collDesc.getMinKey(); - } - - targetRange = css->getNextOrphanRange(startingFromKey); - if (!targetRange) { - LOGV2_DEBUG(21913, - 1, - "cleanupOrphaned returning because no orphan ranges remain", - "namespace"_attr = ns.toString(), - "startingFromKey"_attr = redact(startingFromKey)); - - return CleanupResult::kDone; - } - - *stoppedAtKey = targetRange->getMax(); - - cleanupCompleteFuture = - css->cleanUpRange(*targetRange, boost::none, CollectionShardingRuntime::kNow); + range.emplace(collDesc.getMinKey(), collDesc.getMaxKey()); + + // Though the 'startingFromKey' parameter is not used as the min key of the range to + // wait for, we still validate that 'startingFromKey' in the same way as the original + // cleanupOrphaned logic did if 'startingFromKey' is present. + BSONObj keyPattern = collDesc.getKeyPattern(); + if (!startingFromKeyConst.isEmpty() && !collDesc.isValidKey(startingFromKeyConst)) { + LOGV2_ERROR_OPTIONS( + 4416002, + {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)}, + "Could not cleanup orphaned data because start key does not match shard key " + "pattern", + "startKey"_attr = redact(startingFromKeyConst), + "shardKeyPattern"_attr = keyPattern); } + } - // Sleep waiting for our own deletion. We don't actually care about any others, so there is - // no need to call css::waitForClean() here. - - LOGV2_DEBUG(21914, - 1, - "cleanupOrphaned requested for {namespace} starting from {startingFromKey}, " - "removing next orphan range {targetRange}; waiting...", - "cleanupOrphaned requested", - "namespace"_attr = ns.toString(), - "startingFromKey"_attr = redact(startingFromKey), - "targetRange"_attr = redact(targetRange->toString())); - - Status result = cleanupCompleteFuture.getNoThrow(opCtx); - - LOGV2_DEBUG(21915, - 1, - "Finished waiting for last {namespace} orphan range cleanup", - "Finished waiting for last orphan range cleanup in collection", - "namespace"_attr = ns.toString()); - - if (!result.isOK()) { - LOGV2_ERROR_OPTIONS(21916, - {logv2::UserAssertAfterLog(result.code())}, - "Error waiting for last {namespace} orphan range cleanup: {error}", - "Error waiting for last orphan range cleanup in collection", - "namespace"_attr = ns.ns(), - "error"_attr = redact(result.reason())); + // We actually want to wait until there are no range deletion tasks for this namespace/UUID, + // but we don't have a good way to wait for that event, so instead we wait for there to be + // no tasks being processed in memory for this namespace/UUID. + // However, it's possible this node has recently stepped up, and the stepup recovery task to + // resubmit range deletion tasks for processing has not yet completed. In that case, + // waitForClean will return though there are still tasks in config.rangeDeletions, so we + // sleep for a short time and then try waitForClean again. + while (auto numRemainingDeletionTasks = + migrationutil::checkForConflictingDeletions(opCtx, *range, *collectionUuid)) { + uassert(ErrorCodes::ResumableRangeDeleterDisabled, + "Failing cleanupOrphaned because the disableResumableRangeDeleter server parameter " + "is set to true and this shard contains range deletion tasks for the collection.", + !disableResumableRangeDeleter.load()); + + LOGV2(4416003, + "cleanupOrphaned going to wait for range deletion tasks to complete", + "namespace"_attr = ns.ns(), + "collectionUUID"_attr = *collectionUuid, + "numRemainingDeletionTasks"_attr = numRemainingDeletionTasks); + + auto status = CollectionShardingRuntime::waitForClean(opCtx, ns, *collectionUuid, *range); + + if (!status.isOK()) { + *errMsg = status.reason(); + return CleanupResult::kError; } - return CleanupResult::kContinue; + opCtx->sleepFor(Milliseconds(1000)); } + + return CleanupResult::kDone; } /** - * If 'disableResumableRangeDeleter=true': - * - * Called on a particular namespace, and if the collection is sharded will clean up a single - * orphaned data range which overlaps or starts after a passed-in 'startingFromKey'. Returns true - * and a 'stoppedAtKey' (which will start a search for the next orphaned range if the command is - * called again) or no key if there are no more orphaned ranges in the collection. - * - * If the collection is not sharded, returns true but no 'stoppedAtKey'. - * On failure, returns false and an error message. - * - * Calling this command repeatedly until no 'stoppedAtKey' is returned ensures that the - * full collection range is searched for orphaned documents, but since sharding state may - * change between calls there is no guarantee that all orphaned documents were found unless - * the balancer is off. - * - * Safe to call with the balancer on. - * - * Format: - * - * { - * cleanupOrphaned: <ns>, - * // optional parameters: - * startingAtKey: { <shardKeyValue> }, // defaults to lowest value - * secondaryThrottle: <bool>, // defaults to true - * // defaults to { w: "majority", wtimeout: 60000 }. Applies to individual writes. - * writeConcern: { <writeConcern options> } - * } - * - * If 'disableResumableRangeDeleter=false': - * * Called on a particular namespace, and if the collection is sharded will wait for the number of - * range deletion tasks on the collection on this shard to reach zero. Returns true on completion, - * but never returns 'stoppedAtKey', since it always returns once there are no more orphaned ranges. - * - * If the collection is not sharded, returns true and no 'stoppedAtKey'. - * On failure, returns false and an error message. + * range deletion tasks on the collection on this shard to reach zero. * * Since the sharding state may change after this call returns, there is no guarantee that orphans * won't re-appear as a result of migrations that commit after this call returns. @@ -310,9 +185,6 @@ public: static BSONField<std::string> nsField; static BSONField<BSONObj> startingFromKeyField; - // Output - static BSONField<BSONObj> stoppedAtKeyField; - bool errmsgRun(OperationContext* opCtx, std::string const& db, const BSONObj& cmdObj, @@ -343,19 +215,12 @@ public: forceShardFilteringMetadataRefresh(opCtx, nss, true /* forceRefreshFromThisThread */); - BSONObj stoppedAtKey; - CleanupResult cleanupResult = - cleanupOrphanedData(opCtx, nss, startingFromKey, &stoppedAtKey, &errmsg); + CleanupResult cleanupResult = cleanupOrphanedData(opCtx, nss, startingFromKey, &errmsg); if (cleanupResult == CleanupResult::kError) { return false; } - - if (cleanupResult == CleanupResult::kContinue) { - result.append(stoppedAtKeyField(), stoppedAtKey); - } else { - dassert(cleanupResult == CleanupResult::kDone); - } + dassert(cleanupResult == CleanupResult::kDone); return true; } @@ -364,7 +229,6 @@ public: BSONField<std::string> CleanupOrphanedCommand::nsField("cleanupOrphaned"); BSONField<BSONObj> CleanupOrphanedCommand::startingFromKeyField("startingFromKey"); -BSONField<BSONObj> CleanupOrphanedCommand::stoppedAtKeyField("stoppedAtKey"); } // namespace } // namespace mongo diff --git a/src/mongo/db/s/migration_chunk_cloner_source.h b/src/mongo/db/s/migration_chunk_cloner_source.h index c06a9d0a5a1..00e17e8fe3a 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source.h +++ b/src/mongo/db/s/migration_chunk_cloner_source.h @@ -77,8 +77,7 @@ public: virtual Status startClone(OperationContext* opCtx, const UUID& migrationId, const LogicalSessionId& lsid, - TxnNumber txnNumber, - bool resumableRangeDeleterDisabled) = 0; + TxnNumber txnNumber) = 0; /** * Blocking method, which uses some custom selected logic for deciding whether it is appropriate diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp index f64e96fcd48..40a1703318b 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp @@ -242,8 +242,7 @@ MigrationChunkClonerSourceLegacy::~MigrationChunkClonerSourceLegacy() { Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* opCtx, const UUID& migrationId, const LogicalSessionId& lsid, - TxnNumber txnNumber, - bool resumableRangeDeleterDisabled) { + TxnNumber txnNumber) { invariant(_state == kNew); invariant(!opCtx->lockState()->isLocked()); @@ -296,8 +295,7 @@ Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* opCtx, _args.getMinKey(), _args.getMaxKey(), _shardKeyPattern.toBSON(), - _args.getSecondaryThrottle(), - resumableRangeDeleterDisabled); + _args.getSecondaryThrottle()); // Commands sent to shards that accept writeConcern, must always have writeConcern. So if the // StartChunkCloneRequest didn't add writeConcern (from secondaryThrottle), then we add the diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h index 957615b6e29..8e34e2033f3 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h @@ -93,8 +93,7 @@ public: Status startClone(OperationContext* opCtx, const UUID& migrationId, const LogicalSessionId& lsid, - TxnNumber txnNumber, - bool resumableRangeDeleterDisabled) override; + TxnNumber txnNumber) override; Status awaitUntilCriticalSectionIsAppropriate(OperationContext* opCtx, Milliseconds maxTimeToWait) override; diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp index 44bd0c01b5b..730a2d4af3b 100644 --- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp +++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp @@ -252,7 +252,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, CorrectDocumentsFetched) { onCommand([&](const RemoteCommandRequest& request) { return BSON("ok" << true); }); }); - ASSERT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false)); + ASSERT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber)); futureStartClone.default_timed_get(); } @@ -350,7 +350,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, CollectionNotFound) { kDonorConnStr, kRecipientConnStr.getServers()[0]); - ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false)); + ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber)); cloner.cancelClone(operationContext()); } @@ -363,7 +363,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, ShardKeyIndexNotFound) { kDonorConnStr, kRecipientConnStr.getServers()[0]); - ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false)); + ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber)); cloner.cancelClone(operationContext()); } @@ -390,7 +390,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, FailedToEngageRecipientShard) { }); auto startCloneStatus = - cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false); + cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber); ASSERT_EQ(ErrorCodes::NetworkTimeout, startCloneStatus.code()); futureStartClone.default_timed_get(); } diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index af0888d8992..fc5b7258138 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -345,24 +345,13 @@ Status MigrationDestinationManager::start(OperationContext* opCtx, invariant(!_sessionId); invariant(!_scopedReceiveChunk); - _enableResumableRangeDeleter = !disableResumableRangeDeleter.load(); - _state = READY; _stateChangedCV.notify_all(); _errmsg = ""; - if (_enableResumableRangeDeleter) { - uassert(ErrorCodes::IllegalOperation, - str::stream() << "Did not receive migrationId in _recvChunkStart, but this node " - "has'disableResumableRangeDeleter=false'. Does the donor shard " - << cloneRequest.getFromShardId() - << " have 'disableResumableRangeDeleter=true'?", - cloneRequest.hasMigrationId()); - - _migrationId = cloneRequest.getMigrationId(); - _lsid = cloneRequest.getLsid(); - _txnNumber = cloneRequest.getTxnNumber(); - } + _migrationId = cloneRequest.getMigrationId(); + _lsid = cloneRequest.getLsid(); + _txnNumber = cloneRequest.getTxnNumber(); _nss = nss; _fromShard = cloneRequest.getFromShardId(); @@ -784,31 +773,21 @@ void MigrationDestinationManager::_migrateThread() { // txnNumber on this session while this node is still executing the recipient side //(which is important because otherwise, this node may create orphans after the // range deletion task on this node has been processed). - if (_enableResumableRangeDeleter) { - opCtx->setLogicalSessionId(_lsid); - opCtx->setTxnNumber(_txnNumber); - - MongoDOperationContextSession sessionTxnState(opCtx); - - auto txnParticipant = TransactionParticipant::get(opCtx); - txnParticipant.beginOrContinue(opCtx, - *opCtx->getTxnNumber(), - boost::none /* autocommit */, - boost::none /* startTransaction */); - _migrateDriver(opCtx); - } else { - _migrateDriver(opCtx); - } + opCtx->setLogicalSessionId(_lsid); + opCtx->setTxnNumber(_txnNumber); + + MongoDOperationContextSession sessionTxnState(opCtx); + + auto txnParticipant = TransactionParticipant::get(opCtx); + txnParticipant.beginOrContinue(opCtx, + *opCtx->getTxnNumber(), + boost::none /* autocommit */, + boost::none /* startTransaction */); + _migrateDriver(opCtx); } catch (...) { _setStateFail(str::stream() << "migrate failed: " << redact(exceptionToStatus())); } - if (!_enableResumableRangeDeleter) { - if (getState() != DONE) { - _forgetPending(opCtx, ChunkRange(_min, _max)); - } - } - stdx::lock_guard<Latch> lk(_mutex); _sessionId.reset(); _collUuid.reset(); @@ -833,7 +812,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { "fromShard"_attr = _fromShard, "epoch"_attr = _epoch, "sessionId"_attr = *_sessionId, - "migrationId"_attr = _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); MoveTimingHelper timing( outerOpCtx, "to", _nss.ns(), _min, _max, 6 /* steps */, &_errmsg, _toShard, _fromShard); @@ -843,8 +822,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { if (initialState == ABORT) { LOGV2_ERROR(22013, "Migration abort requested before the migration started", - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); return; } @@ -861,67 +839,44 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { // 2. Ensure any data which might have been left orphaned in the range being moved has been // deleted. - if (_enableResumableRangeDeleter) { - while (migrationutil::checkForConflictingDeletions( - outerOpCtx, range, donorCollectionOptionsAndIndexes.uuid)) { - LOGV2(22001, - "Migration paused because the requested range {range} for {namespace} " - "overlaps with a range already scheduled for deletion", - "Migration paused because the requested range overlaps with a range already " - "scheduled for deletion", - "namespace"_attr = _nss.ns(), - "range"_attr = redact(range.toString()), - "migrationId"_attr = *_migrationId); - - auto status = CollectionShardingRuntime::waitForClean( - outerOpCtx, _nss, donorCollectionOptionsAndIndexes.uuid, range); - - if (!status.isOK()) { - _setStateFail(redact(status.reason())); - return; - } - - outerOpCtx->sleepFor(Milliseconds(1000)); - } + while (migrationutil::checkForConflictingDeletions( + outerOpCtx, range, donorCollectionOptionsAndIndexes.uuid)) { + uassert(ErrorCodes::ResumableRangeDeleterDisabled, + "Failing migration because the disableResumableRangeDeleter server " + "parameter is set to true on the recipient shard, which contains range " + "deletion tasks overlapping the incoming range.", + !disableResumableRangeDeleter.load()); + + LOGV2(22001, + "Migration paused because the requested range {range} for {namespace} " + "overlaps with a range already scheduled for deletion", + "Migration paused because the requested range overlaps with a range already " + "scheduled for deletion", + "namespace"_attr = _nss.ns(), + "range"_attr = redact(range.toString()), + "migrationId"_attr = _migrationId.toBSON()); - RangeDeletionTask recipientDeletionTask(*_migrationId, - _nss, - donorCollectionOptionsAndIndexes.uuid, - _fromShard, - range, - CleanWhenEnum::kNow); - recipientDeletionTask.setPending(true); + auto status = CollectionShardingRuntime::waitForClean( + outerOpCtx, _nss, donorCollectionOptionsAndIndexes.uuid, range); - migrationutil::persistRangeDeletionTaskLocally(outerOpCtx, recipientDeletionTask); - } else { - // Synchronously delete any data which might have been left orphaned in the range - // being moved, and wait for completion - - // Needed for _forgetPending to make sure the collection has the same UUID at the end of - // an aborted migration as at the beginning. Must be set before calling _notePending. - _collUuid = donorCollectionOptionsAndIndexes.uuid; - auto cleanupCompleteFuture = _notePending(outerOpCtx, range); - auto cleanupStatus = cleanupCompleteFuture.getNoThrow(outerOpCtx); - // Wait for the range deletion to report back. Swallow - // RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist error since the - // collection could either never exist or get dropped directly from the shard after the - // range deletion task got scheduled. - if (!cleanupStatus.isOK() && - cleanupStatus != - ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist) { - _setStateFail(redact(cleanupStatus.reason())); + if (!status.isOK()) { + _setStateFail(redact(status.reason())); return; } - // Wait for any other, overlapping queued deletions to drain - cleanupStatus = CollectionShardingRuntime::waitForClean( - outerOpCtx, _nss, donorCollectionOptionsAndIndexes.uuid, range); - if (!cleanupStatus.isOK()) { - _setStateFail(redact(cleanupStatus.reason())); - return; - } + outerOpCtx->sleepFor(Milliseconds(1000)); } + // Insert a pending range deletion task for the incoming range. + RangeDeletionTask recipientDeletionTask(_migrationId, + _nss, + donorCollectionOptionsAndIndexes.uuid, + _fromShard, + range, + CleanWhenEnum::kNow); + recipientDeletionTask.setPending(true); + migrationutil::persistRangeDeletionTaskLocally(outerOpCtx, recipientDeletionTask); + timing.done(1); migrateThreadHangAtStep1.pauseWhileSet(); } @@ -1015,9 +970,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { if (replStatus.status.code() == ErrorCodes::WriteConcernFailed) { LOGV2_WARNING(22011, "secondaryThrottle on, but doc insert timed out; continuing", - "migrationId"_attr = _enableResumableRangeDeleter - ? _migrationId->toBSON() - : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); } else { uassertStatusOK(replStatus.status); } @@ -1093,8 +1046,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { if (getState() == ABORT) { LOGV2(22002, "Migration aborted while waiting for replication at catch up stage", - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); return; } @@ -1104,8 +1056,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { if (i > 100) { LOGV2(22003, "secondaries having hard time keeping up with migrate", - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); } sleepmillis(20); @@ -1127,8 +1078,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { LOGV2(22004, "Waiting for replication to catch up before entering critical section", - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); auto awaitReplicationResult = repl::ReplicationCoordinator::get(opCtx)->awaitReplication( opCtx, lastOpApplied, _writeConcern); @@ -1137,8 +1087,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { LOGV2(22005, "Chunk data replicated successfully.", - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); } { @@ -1177,8 +1126,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) { if (getState() == ABORT) { LOGV2(22006, "Migration aborted while transferring mods", - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); return; } @@ -1300,9 +1248,6 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, autoColl.getDb(), updatedDoc, &localDoc)) { - const auto migrationId = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj(); - // Exception will abort migration cleanly LOGV2_ERROR_OPTIONS( 16977, @@ -1313,7 +1258,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx, "reloaded remote document", "localDoc"_attr = redact(localDoc), "remoteDoc"_attr = redact(updatedDoc), - "migrationId"_attr = migrationId); + "migrationId"_attr = _migrationId.toBSON()); } // We are in write lock here, so sure we aren't killing @@ -1344,8 +1289,7 @@ bool MigrationDestinationManager::_flushPendingWrites(OperationContext* opCtx, "chunkMin"_attr = redact(_min), "chunkMax"_attr = redact(_max), "lastOpApplied"_attr = op, - "migrationId"_attr = - _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); } return false; } @@ -1356,7 +1300,7 @@ bool MigrationDestinationManager::_flushPendingWrites(OperationContext* opCtx, "namespace"_attr = _nss.ns(), "chunkMin"_attr = redact(_min), "chunkMax"_attr = redact(_max), - "migrationId"_attr = _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj()); + "migrationId"_attr = _migrationId.toBSON()); return true; } diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h index d2310ddffe2..62e6b6fc99b 100644 --- a/src/mongo/db/s/migration_destination_manager.h +++ b/src/mongo/db/s/migration_destination_manager.h @@ -201,11 +201,7 @@ private: stdx::thread _migrateThreadHandle; - // Whether to use the resumable range deleter. This decision is based on whether the FCV 4.2 or - // FCV 4.4 protocol are in use and the disableResumableRangeDeleter option is off. - bool _enableResumableRangeDeleter{true}; - - boost::optional<UUID> _migrationId; + UUID _migrationId; LogicalSessionId _lsid; TxnNumber _txnNumber; NamespaceString _nss; diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp index 7424b8078a5..13c0546313c 100644 --- a/src/mongo/db/s/migration_source_manager.cpp +++ b/src/mongo/db/s/migration_source_manager.cpp @@ -51,7 +51,6 @@ #include "mongo/db/s/shard_filtering_metadata_refresh.h" #include "mongo/db/s/shard_metadata_util.h" #include "mongo/db/s/sharding_logging.h" -#include "mongo/db/s/sharding_runtime_d_params_gen.h" #include "mongo/db/s/sharding_state.h" #include "mongo/db/s/sharding_state_recovery.h" #include "mongo/db/s/sharding_statistics.h" @@ -134,8 +133,6 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx, _stats(ShardingStatistics::get(_opCtx)) { invariant(!_opCtx->lockState()->isLocked()); - _enableResumableRangeDeleter = !disableResumableRangeDeleter.load(); - // Disallow moving a chunk to ourselves uassert(ErrorCodes::InvalidOptions, "Destination shard cannot be the same as source", @@ -146,8 +143,7 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx, "{collectionEpoch}", "Starting chunk migration donation", "requestParameters"_attr = redact(_args.toString()), - "collectionEpoch"_attr = _args.getVersionEpoch(), - "resumableRangeDeleterEnabled"_attr = _enableResumableRangeDeleter); + "collectionEpoch"_attr = _args.getVersionEpoch()); // Force refresh of the metadata to ensure we have the latest forceShardFilteringMetadataRefresh(_opCtx, getNss()); @@ -287,15 +283,12 @@ Status MigrationSourceManager::startClone() { _opCtx, readConcernArgs, PrepareConflictBehavior::kEnforce); } - if (_enableResumableRangeDeleter) { - _coordinator->startMigration(_opCtx); - } + _coordinator->startMigration(_opCtx); Status startCloneStatus = _cloneDriver->startClone(_opCtx, _coordinator->getMigrationId(), _coordinator->getLsid(), - _coordinator->getTxnNumber(), - !_enableResumableRangeDeleter); + _coordinator->getTxnNumber()); if (!startCloneStatus.isOK()) { return startCloneStatus; } @@ -468,10 +461,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() { "have re-received the chunk"}; } - if (_enableResumableRangeDeleter) { - _coordinator->setMigrationDecision( - migrationutil::MigrationCoordinator::Decision::kAborted); - } + _coordinator->setMigrationDecision(migrationutil::MigrationCoordinator::Decision::kAborted); // The chunk modification was not applied, so report the original error return migrationCommitStatus.withContext("Chunk move was not successful"); @@ -484,10 +474,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() { "updatedCollectionVersion"_attr = refreshedMetadata.getCollVersion(), "migrationId"_attr = _coordinator->getMigrationId()); - if (_enableResumableRangeDeleter) { - _coordinator->setMigrationDecision( - migrationutil::MigrationCoordinator::Decision::kCommitted); - } + _coordinator->setMigrationDecision(migrationutil::MigrationCoordinator::Decision::kCommitted); hangBeforeLeavingCriticalSection.pauseWhileSet(); @@ -521,61 +508,20 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() { << "Moved chunks successfully but failed to clean up " << getNss().ns() << " range " << redact(range.toString()) << " due to: "; - if (_enableResumableRangeDeleter) { - if (_args.getWaitForDelete()) { - LOGV2(22019, - "Waiting for migration cleanup after chunk commit for the namespace {namespace} " - "and range {range}", - "Waiting for migration cleanup after chunk commit", - "namespace"_attr = getNss().ns(), - "range"_attr = redact(range.toString()), - "migrationId"_attr = _coordinator->getMigrationId()); - - invariant(_cleanupCompleteFuture); - auto deleteStatus = _cleanupCompleteFuture->getNoThrow(_opCtx); - if (!deleteStatus.isOK()) { - return {ErrorCodes::OrphanedRangeCleanUpFailed, - orphanedRangeCleanUpErrMsg + redact(deleteStatus)}; - } - } - } else { - auto cleanupCompleteFuture = [&] { - auto const whenToClean = _args.getWaitForDelete() ? CollectionShardingRuntime::kNow - : CollectionShardingRuntime::kDelayed; - UninterruptibleLockGuard noInterrupt(_opCtx->lockState()); - AutoGetCollection autoColl(_opCtx, getNss(), MODE_IS); - return CollectionShardingRuntime::get(_opCtx, getNss()) - ->cleanUpRange(range, boost::none, whenToClean); - }(); - - if (_args.getWaitForDelete()) { - LOGV2(22020, - "Waiting for migration cleanup after chunk commit for the namespace {namespace} " - "and range {range}", - "Waiting for migration cleanup after chunk commit", - "namespace"_attr = getNss().ns(), - "range"_attr = redact(range.toString())); - - auto deleteStatus = cleanupCompleteFuture.getNoThrow(_opCtx); - - if (!deleteStatus.isOK()) { - return {ErrorCodes::OrphanedRangeCleanUpFailed, - orphanedRangeCleanUpErrMsg + redact(deleteStatus)}; - } - - return Status::OK(); - } - - if (cleanupCompleteFuture.isReady() && !cleanupCompleteFuture.getNoThrow(_opCtx).isOK()) { + if (_args.getWaitForDelete()) { + LOGV2(22019, + "Waiting for migration cleanup after chunk commit for the namespace {namespace} " + "and range {range}", + "Waiting for migration cleanup after chunk commit", + "namespace"_attr = getNss().ns(), + "range"_attr = redact(range.toString()), + "migrationId"_attr = _coordinator->getMigrationId()); + + invariant(_cleanupCompleteFuture); + auto deleteStatus = _cleanupCompleteFuture->getNoThrow(_opCtx); + if (!deleteStatus.isOK()) { return {ErrorCodes::OrphanedRangeCleanUpFailed, - orphanedRangeCleanUpErrMsg + redact(cleanupCompleteFuture.getNoThrow(_opCtx))}; - } else { - LOGV2(22021, - "Leaving migration cleanup after chunk commit to complete in background; " - "namespace: {namespace}, range: {range}", - "Leaving migration cleanup after chunk commit to complete in background", - "namespace"_attr = getNss().ns(), - "range"_attr = redact(range.toString())); + orphanedRangeCleanUpErrMsg + redact(deleteStatus)}; } } @@ -727,26 +673,24 @@ void MigrationSourceManager::_cleanup() { ShardingStateRecovery::endMetadataOp(_opCtx); } - if (_enableResumableRangeDeleter) { - if (_state >= kCloning) { - invariant(_coordinator); - if (_state < kCommittingOnConfig) { - _coordinator->setMigrationDecision( - migrationutil::MigrationCoordinator::Decision::kAborted); - } - // This can be called on an exception path after the OperationContext has been - // interrupted, so use a new OperationContext. Note, it's valid to call - // getServiceContext on an interrupted OperationContext. - auto newClient = _opCtx->getServiceContext()->makeClient("MigrationCoordinator"); - { - stdx::lock_guard<Client> lk(*newClient.get()); - newClient->setSystemOperationKillable(lk); - } - AlternativeClientRegion acr(newClient); - auto newOpCtxPtr = cc().makeOperationContext(); - auto newOpCtx = newOpCtxPtr.get(); - _cleanupCompleteFuture = _coordinator->completeMigration(newOpCtx); + if (_state >= kCloning) { + invariant(_coordinator); + if (_state < kCommittingOnConfig) { + _coordinator->setMigrationDecision( + migrationutil::MigrationCoordinator::Decision::kAborted); + } + // This can be called on an exception path after the OperationContext has been + // interrupted, so use a new OperationContext. Note, it's valid to call + // getServiceContext on an interrupted OperationContext. + auto newClient = _opCtx->getServiceContext()->makeClient("MigrationCoordinator"); + { + stdx::lock_guard<Client> lk(*newClient.get()); + newClient->setSystemOperationKillable(lk); } + AlternativeClientRegion acr(newClient); + auto newOpCtxPtr = cc().makeOperationContext(); + auto newOpCtx = newOpCtxPtr.get(); + _cleanupCompleteFuture = _coordinator->completeMigration(newOpCtx); } _state = kDone; diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h index 923b1d8876e..5f43c9f9780 100644 --- a/src/mongo/db/s/migration_source_manager.h +++ b/src/mongo/db/s/migration_source_manager.h @@ -252,10 +252,6 @@ private: // collection doesn't have UUID. boost::optional<UUID> _collectionUuid; - // Whether to use the resumable range deleter. This decision is based on whether the FCV 4.2 or - // FCV 4.4 protocol are in use and the disableResumableRangeDeleter option is off. - bool _enableResumableRangeDeleter; - // Contains logic for ensuring the donor's and recipient's config.rangeDeletions entries are // correctly updated based on whether the migration committed or aborted. std::unique_ptr<migrationutil::MigrationCoordinator> _coordinator; diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index fa8c0f89365..57bbe3f635a 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -53,6 +53,7 @@ #include "mongo/db/s/collection_sharding_runtime.h" #include "mongo/db/s/migration_coordinator.h" #include "mongo/db/s/shard_filtering_metadata_refresh.h" +#include "mongo/db/s/sharding_runtime_d_params_gen.h" #include "mongo/db/s/sharding_statistics.h" #include "mongo/db/s/wait_for_majority_service.h" #include "mongo/db/write_concern.h" @@ -294,6 +295,13 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx, auto uniqueOpCtx = tc->makeOperationContext(); auto opCtx = uniqueOpCtx.get(); + uassert( + ErrorCodes::ResumableRangeDeleterDisabled, + str::stream() + << "Not submitting range deletion task " << redact(deletionTask.toBSON()) + << " because the disableResumableRangeDeleter server parameter is set to true", + !disableResumableRangeDeleter.load()); + // Make sure the collection metadata is up-to-date. { boost::optional<AutoGetCollection> autoColl; diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl index 4533c957adf..0d92e71aae6 100644 --- a/src/mongo/db/s/sharding_runtime_d_params.idl +++ b/src/mongo/db/s/sharding_runtime_d_params.idl @@ -92,7 +92,10 @@ server_parameters: default: 900 disableResumableRangeDeleter: - description: 'Disable the resumable range deleter and revert to prior behavior.' + description: >- + Disable the resumable range deleter. Ranges will not be submitted for deletion, and if an + incoming migration sees orphans in the incoming range, the migration will fail. The + resumable range deleter replaced the old non-durable range deleter as of FCV 4.4. set_at: [startup] cpp_vartype: AtomicWord<bool> cpp_varname : disableResumableRangeDeleter diff --git a/src/mongo/db/s/start_chunk_clone_request.cpp b/src/mongo/db/s/start_chunk_clone_request.cpp index e81f5754b90..86567dce161 100644 --- a/src/mongo/db/s/start_chunk_clone_request.cpp +++ b/src/mongo/db/s/start_chunk_clone_request.cpp @@ -152,12 +152,10 @@ StatusWith<StartChunkCloneRequest> StartChunkCloneRequest::createFromCommand(Nam } } - if (obj.getField("uuid")) { - request._migrationId = UUID::parse(obj); - request._lsid = LogicalSessionId::parse(IDLParserErrorContext("StartChunkCloneRequest"), - obj[kLsid].Obj()); - request._txnNumber = obj.getField(kTxnNumber).Long(); - } + request._migrationId = UUID::parse(obj); + request._lsid = + LogicalSessionId::parse(IDLParserErrorContext("StartChunkCloneRequest"), obj[kLsid].Obj()); + request._txnNumber = obj.getField(kTxnNumber).Long(); return request; } @@ -175,19 +173,16 @@ void StartChunkCloneRequest::appendAsCommand( const BSONObj& chunkMinKey, const BSONObj& chunkMaxKey, const BSONObj& shardKeyPattern, - const MigrationSecondaryThrottleOptions& secondaryThrottle, - bool resumableRangeDeleterDisabled) { + const MigrationSecondaryThrottleOptions& secondaryThrottle) { invariant(builder->asTempObj().isEmpty()); invariant(nss.isValid()); invariant(fromShardConnectionString.isValid()); builder->append(kRecvChunkStart, nss.ns()); - if (!resumableRangeDeleterDisabled) { - migrationId.appendToBuilder(builder, kMigrationId); - builder->append(kLsid, lsid.toBSON()); - builder->append(kTxnNumber, txnNumber); - } + migrationId.appendToBuilder(builder, kMigrationId); + builder->append(kLsid, lsid.toBSON()); + builder->append(kTxnNumber, txnNumber); sessionId.append(builder); builder->append(kFromShardConnectionString, fromShardConnectionString.toString()); diff --git a/src/mongo/db/s/start_chunk_clone_request.h b/src/mongo/db/s/start_chunk_clone_request.h index f1ff3222b03..171639c0c8a 100644 --- a/src/mongo/db/s/start_chunk_clone_request.h +++ b/src/mongo/db/s/start_chunk_clone_request.h @@ -72,8 +72,7 @@ public: const BSONObj& chunkMinKey, const BSONObj& chunkMaxKey, const BSONObj& shardKeyPattern, - const MigrationSecondaryThrottleOptions& secondaryThrottle, - bool resumableRangeDeleterDisabled); + const MigrationSecondaryThrottleOptions& secondaryThrottle); const NamespaceString& getNss() const { return _nss; @@ -128,10 +127,6 @@ public: return _secondaryThrottle; } - bool resumableRangeDeleterDisabled() const { - return _resumableRangeDeleterDisabled; - } - private: StartChunkCloneRequest(NamespaceString nss, MigrationSessionId sessionId, @@ -163,8 +158,6 @@ private: // The parsed secondary throttle options MigrationSecondaryThrottleOptions _secondaryThrottle; - - bool _resumableRangeDeleterDisabled{false}; }; } // namespace mongo diff --git a/src/mongo/db/s/start_chunk_clone_request_test.cpp b/src/mongo/db/s/start_chunk_clone_request_test.cpp index 9272c53eb25..f498be0e366 100644 --- a/src/mongo/db/s/start_chunk_clone_request_test.cpp +++ b/src/mongo/db/s/start_chunk_clone_request_test.cpp @@ -70,8 +70,7 @@ TEST(StartChunkCloneRequest, CreateAsCommandComplete) { BSON("Key" << -100), BSON("Key" << 100), BSON("Key" << 1), - MigrationSecondaryThrottleOptions::create(MigrationSecondaryThrottleOptions::kOff), - false); + MigrationSecondaryThrottleOptions::create(MigrationSecondaryThrottleOptions::kOff)); BSONObj cmdObj = builder.obj(); |