diff options
author | Allison Easton <allison.easton@mongodb.com> | 2022-04-12 07:34:03 +0000 |
---|---|---|
committer | Evergreen Agent <no-reply@evergreen.mongodb.com> | 2022-04-12 18:52:14 +0000 |
commit | cb0e7061575b9879326598cb4bd75fd3ee3588cb (patch) | |
tree | f13b9482732cc7600099e0be049cd888dcd57fb9 | |
parent | 9f06d30d0701e6b224598e19b5d63a257f4ed554 (diff) | |
download | mongo-cb0e7061575b9879326598cb4bd75fd3ee3588cb.tar.gz |
SERVER-64979 Range deleter must start with same range deletion task on step up
-rw-r--r-- | jstests/sharding/range_deletion_ordering_with_stepdown.js | 106 | ||||
-rw-r--r-- | src/mongo/db/s/migration_util.cpp | 32 | ||||
-rw-r--r-- | src/mongo/db/s/range_deletion_task.idl | 3 | ||||
-rw-r--r-- | src/mongo/db/s/range_deletion_util.cpp | 10 | ||||
-rw-r--r-- | src/mongo/db/s/range_deletion_util.h | 5 |
5 files changed, 149 insertions, 7 deletions
diff --git a/jstests/sharding/range_deletion_ordering_with_stepdown.js b/jstests/sharding/range_deletion_ordering_with_stepdown.js new file mode 100644 index 00000000000..0f684899b73 --- /dev/null +++ b/jstests/sharding/range_deletion_ordering_with_stepdown.js @@ -0,0 +1,106 @@ +/** + * Test that an ongoing range deletion is the first range deletion executed upon step up. + * + * @tags: [ + * requires_fcv_60, + * ] + */ + +(function() { +'use strict'; + +load("jstests/libs/fail_point_util.js"); + +const rangeDeleterBatchSize = 128; + +const st = new ShardingTest( + {shards: 2, rs: {nodes: 2, setParameter: {rangeDeleterBatchSize: rangeDeleterBatchSize}}}); + +// Setup database +const dbName = 'db'; +const db = st.getDB(dbName); +assert.commandWorked( + st.s.adminCommand({enableSharding: dbName, primaryShard: st.shard0.shardName})); + +// Setup collection for test with orphans +const coll = db['test']; +const nss = coll.getFullName(); +assert.commandWorked(st.s.adminCommand({shardCollection: nss, key: {_id: 1}})); + +// Create two chunks +assert.commandWorked(st.s.adminCommand({split: nss, middle: {_id: 0}})); + +// Insert 1000 docs into both chunks. +const numDocs = 1000; +let bulk = coll.initializeUnorderedBulkOp(); +for (let i = 1; i <= numDocs; i++) { + bulk.insert({_id: i}); + bulk.insert({_id: -i}); +} +assert.commandWorked(bulk.execute()); + +// Pause before first range deletion task +let beforeDeletionStarts = configureFailPoint(st.shard0, "suspendRangeDeletion"); +assert.commandWorked(db.adminCommand({moveChunk: nss, find: {_id: 1}, to: st.shard1.shardName})); +assert.commandWorked(db.adminCommand({moveChunk: nss, find: {_id: -1}, to: st.shard1.shardName})); + +// Allow first batch from one of the ranges to be deleted +let beforeDeletionFailpoint = configureFailPoint(st.shard0, "hangBeforeDoingDeletion"); +beforeDeletionStarts.off(); +beforeDeletionFailpoint.wait(); +let afterDeletionFailpoint = configureFailPoint(st.shard0, "hangAfterDoingDeletion"); +beforeDeletionFailpoint.off(); +afterDeletionFailpoint.wait(); + +// Figure out which range had a batch deleted from it +let rangeDeletionDocs = st.shard0.getDB("config").getCollection("rangeDeletions").find().toArray(); +assert.eq(rangeDeletionDocs.length, 2); +let processingDoc, otherDoc; +if (rangeDeletionDocs[0].numOrphanDocs.valueOf() === numDocs) { + assert.eq(rangeDeletionDocs[1].numOrphanDocs, numDocs - rangeDeleterBatchSize); + processingDoc = rangeDeletionDocs[1]; + otherDoc = rangeDeletionDocs[0]; +} else { + assert.eq(rangeDeletionDocs[0].numOrphanDocs, numDocs - rangeDeleterBatchSize); + assert.eq(rangeDeletionDocs[1].numOrphanDocs, numDocs); + processingDoc = rangeDeletionDocs[0]; + otherDoc = rangeDeletionDocs[1]; +} + +// Reorder the tasks on disk to make it more likely they would be submitted out of order +assert.commandWorked(st.shard0.getDB("config").getCollection("rangeDeletions").deleteMany({})); +assert.commandWorked( + st.shard0.getDB("config").getCollection("rangeDeletions").insert(rangeDeletionDocs[1])); +assert.commandWorked( + st.shard0.getDB("config").getCollection("rangeDeletions").insert(rangeDeletionDocs[0])); + +// Step down +assert.commandWorked(st.rs0.getPrimary().adminCommand({replSetStepDown: 300, force: true})); +st.rs0.awaitNodesAgreeOnPrimary(); + +// Allow another batch deletion +beforeDeletionFailpoint = configureFailPoint(st.shard0, "hangBeforeDoingDeletion"); +afterDeletionFailpoint.off(); +beforeDeletionFailpoint.wait(); +afterDeletionFailpoint = configureFailPoint(st.shard0, "hangAfterDoingDeletion"); +beforeDeletionFailpoint.off(); +afterDeletionFailpoint.wait(); + +// Make sure the batch deleted was from the same range deletion +rangeDeletionDocs = st.shard0.getDB("config").getCollection("rangeDeletions").find().toArray(); +assert.eq(rangeDeletionDocs.length, 2); +rangeDeletionDocs.forEach((doc) => { + if (bsonWoCompare(processingDoc.range, doc.range) === 0) { + jsTest.log("Same id: " + tojson(doc)); + assert.eq(doc.numOrphanDocs, numDocs - 2 * rangeDeleterBatchSize); + } else { + jsTest.log("Diff id: " + tojson(doc)); + assert.eq(doc.numOrphanDocs, numDocs); + } +}); + +// Allow everything to finish +afterDeletionFailpoint.off(); + +st.stop(); +})(); diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp index 2ab5852c6ec..bf55f7aecf1 100644 --- a/src/mongo/db/s/migration_util.cpp +++ b/src/mongo/db/s/migration_util.cpp @@ -510,16 +510,34 @@ void resubmitRangeDeletionsOnStepUp(ServiceContext* serviceContext) { } auto opCtx = tc->makeOperationContext(); + opCtx->setAlwaysInterruptAtStepDownOrUp(); + + DBDirectClient client(opCtx.get()); + FindCommandRequest findCommand(NamespaceString::kRangeDeletionNamespace); + findCommand.setFilter(BSON(RangeDeletionTask::kProcessingFieldName << true)); + auto cursor = client.find(std::move(findCommand)); + if (cursor->more()) { + return migrationutil::submitRangeDeletionTask( + opCtx.get(), + RangeDeletionTask::parse(IDLParserErrorContext("rangeDeletionRecovery"), + cursor->next())); + } else { + return ExecutorFuture<void>(getMigrationUtilExecutor(serviceContext)); + } + }) + .then([serviceContext] { + ThreadClient tc("ResubmitRangeDeletions", serviceContext); + { + stdx::lock_guard<Client> lk(*tc.get()); + tc->setSystemOperationKillableByStepdown(lk); + } + + auto opCtx = tc->makeOperationContext(); + opCtx->setAlwaysInterruptAtStepDownOrUp(); submitPendingDeletions(opCtx.get()); }) - .getAsync([](const Status& status) { - if (!status.isOK()) { - LOGV2(45739, - "Error while submitting pending range deletions", - "error"_attr = redact(status)); - } - }); + .getAsync([](auto) {}); } void dropRangeDeletionsCollection(OperationContext* opCtx) { diff --git a/src/mongo/db/s/range_deletion_task.idl b/src/mongo/db/s/range_deletion_task.idl index af7bf53322e..2e47651abdb 100644 --- a/src/mongo/db/s/range_deletion_task.idl +++ b/src/mongo/db/s/range_deletion_task.idl @@ -71,6 +71,9 @@ structs: type: bool description: "Flag that is present if the range is not yet ready for deletion" optional: true + processing: + type: optionalBool + description: "Flag that is present if the range is currently being deleted" whenToClean: type: CleanWhen description: "Enumeration that defines when to cleanup the range." diff --git a/src/mongo/db/s/range_deletion_util.cpp b/src/mongo/db/s/range_deletion_util.cpp index 99ef78ed220..311973d4bee 100644 --- a/src/mongo/db/s/range_deletion_util.cpp +++ b/src/mongo/db/s/range_deletion_util.cpp @@ -320,6 +320,8 @@ ExecutorFuture<void> deleteRangeInBatches(const std::shared_ptr<executor::TaskEx !collectionUuidHasChanged( nss, collection.getCollection(), collectionUuid)); + markAsProcessingRangeDeletionTask(opCtx, migrationId); + auto numDeleted = uassertStatusOK(deleteNextBatch(opCtx, collection.getCollection(), keyPattern, @@ -496,6 +498,14 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx, BSON(RangeDeletionTask::kNssFieldName << toNss.ns())); } +void markAsProcessingRangeDeletionTask(OperationContext* opCtx, const UUID& migrationId) { + PersistentTaskStore<RangeDeletionTask> store(NamespaceString::kRangeDeletionNamespace); + auto query = BSON(RangeDeletionTask::kIdFieldName << migrationId); + static const auto update = + BSON("$set" << BSON(RangeDeletionTask::kProcessingFieldName << true)); + + store.update(opCtx, query, update, WriteConcerns::kLocalWriteConcern); +} SharedSemiFuture<void> removeDocumentsInRange( const std::shared_ptr<executor::TaskExecutor>& executor, diff --git a/src/mongo/db/s/range_deletion_util.h b/src/mongo/db/s/range_deletion_util.h index 5d345f8826e..2d537eee8cc 100644 --- a/src/mongo/db/s/range_deletion_util.h +++ b/src/mongo/db/s/range_deletion_util.h @@ -113,6 +113,11 @@ void deleteRangeDeletionTasksForRename(OperationContext* opCtx, const NamespaceString& toNss); /** + * Sets the processing field on the range deletion document for the given migration id. + */ +void markAsProcessingRangeDeletionTask(OperationContext* opCtx, const UUID& migrationId); + +/** * Computes and sets the numOrphanDocs field for each document in `config.rangeDeletions` (skips * documents referring to older incarnations of a collection) */ |