summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEsha Maharishi <esha.maharishi@mongodb.com>2020-05-12 17:26:45 -0400
committerEvergreen Agent <no-reply@evergreen.mongodb.com>2020-05-18 04:14:45 +0000
commitd0e6f87351312a299a17f6d63e3f2f4db834ae1f (patch)
treecf758ddf2c85d414d9eab4aa1b318791934aae30
parent7e2111ef33fc40959a254bd3109466176ae60718 (diff)
downloadmongo-d0e6f87351312a299a17f6d63e3f2f4db834ae1f.tar.gz
SERVER-47992 Make disableResumableRangeDeleter just prevent ranges from being submitted for deletio
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth.yml4
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth_audit.yml4
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml6
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml1
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_misc.yml9
-rw-r--r--jstests/auth/upgrade_noauth_to_keyfile_with_sharding.js4
-rw-r--r--jstests/libs/cleanup_orphaned_util.js131
-rw-r--r--jstests/sharding/cleanup_orphaned.js16
-rw-r--r--jstests/sharding/cleanup_orphaned_basic.js46
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js160
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js131
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_hashed.js78
-rw-r--r--jstests/sharding/cleanup_orphaned_cmd_prereload.js96
-rw-r--r--jstests/sharding/cleanup_orphaned_compound.js18
-rw-r--r--jstests/sharding/disable_resumable_range_deleter.js85
-rw-r--r--jstests/sharding/disable_resumable_range_deleter_flag.js133
-rw-r--r--jstests/sharding/migration_ignore_interrupts_3.js98
-rw-r--r--jstests/sharding/migration_ignore_interrupts_4.js101
-rw-r--r--jstests/sharding/move_jumbo_chunk.js5
-rw-r--r--jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js182
-rw-r--r--jstests/ssl/libs/ssl_helpers.js13
-rw-r--r--jstests/ssl/mixed_mode_sharded.js3
-rw-r--r--jstests/ssl/mixed_mode_sharded_transition.js12
-rw-r--r--jstests/sslSpecial/mixed_mode_sharded_nossl.js3
-rw-r--r--jstests/sslSpecial/mixed_mode_sharded_transition_nossl.js3
-rw-r--r--src/mongo/base/error_codes.yml2
-rw-r--r--src/mongo/db/s/cleanup_orphaned_cmd.cpp276
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source.h3
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp6
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy.h3
-rw-r--r--src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp8
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp168
-rw-r--r--src/mongo/db/s/migration_destination_manager.h6
-rw-r--r--src/mongo/db/s/migration_source_manager.cpp126
-rw-r--r--src/mongo/db/s/migration_source_manager.h4
-rw-r--r--src/mongo/db/s/migration_util.cpp8
-rw-r--r--src/mongo/db/s/sharding_runtime_d_params.idl5
-rw-r--r--src/mongo/db/s/start_chunk_clone_request.cpp21
-rw-r--r--src/mongo/db/s/start_chunk_clone_request.h9
-rw-r--r--src/mongo/db/s/start_chunk_clone_request_test.cpp3
40 files changed, 306 insertions, 1684 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth.yml b/buildscripts/resmokeconfig/suites/sharding_auth.yml
index 0e3978b52f4..65154e40aa0 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth.yml
@@ -22,12 +22,8 @@ selector:
# Skip these additional tests when running with auth enabled.
- jstests/sharding/parallel.js
# Skip the testcases that do not have auth bypass when running ops in parallel.
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_1.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_2.js # SERVER-21713
- - jstests/sharding/migration_ignore_interrupts_3.js # SERVER-21713
- - jstests/sharding/migration_ignore_interrupts_4.js # SERVER-21713
- jstests/sharding/migration_move_chunk_after_receive.js # SERVER-21713
- jstests/sharding/migration_server_status.js # SERVER-21713
- jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
index 426eb432a1a..a26ead09f61 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
@@ -22,14 +22,10 @@ selector:
# Skip these additional tests when running with auth enabled.
- jstests/sharding/parallel.js
# Skip the testcases that do not have auth bypass when running ops in parallel.
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713
- jstests/sharding/migration_with_source_ops.js # SERVER-21713
- jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_1.js # SERVER-21713
- jstests/sharding/migration_ignore_interrupts_2.js # SERVER-21713
- - jstests/sharding/migration_ignore_interrupts_3.js # SERVER-21713
- - jstests/sharding/migration_ignore_interrupts_4.js # SERVER-21713
- jstests/sharding/movechunk_interrupt_at_primary_stepdown.js # SERVER-21713
- jstests/sharding/movechunk_parallel.js # SERVER-21713
- jstests/sharding/migration_server_status.js # SERVER-21713
diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
index 8e623717c26..7ff4348a990 100644
--- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml
@@ -12,7 +12,6 @@ selector:
- jstests/sharding/query/*[aA]uth*.js
- jstests/sharding/change_streams/*[aA]uth*.js
- - jstests/sharding/cleanup_orphaned_auth.js
- jstests/sharding/localhostAuthBypass.js
- jstests/sharding/kill_sessions.js
- jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js
@@ -144,8 +143,6 @@ selector:
- jstests/sharding/config_rs_change.js
- jstests/sharding/empty_cluster_init.js
# Temporarily blacklisted until more robust
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
# Expects same secondaries for entire test
- jstests/sharding/commands_that_write_accept_wc_configRS.js
- jstests/sharding/commands_that_write_accept_wc_shards.js
@@ -157,9 +154,6 @@ selector:
- jstests/sharding/migration_ignore_interrupts_1.js
- jstests/sharding/migration_sets_fromMigrate_flag.js
- jstests/sharding/migration_waits_for_majority_commit.js
- # Stepping down the config can cause moveChunks stopped on shards via killOp to be restarted.
- - jstests/sharding/migration_ignore_interrupts_3.js
- - jstests/sharding/migration_ignore_interrupts_4.js
# listCollections is not retryable
- jstests/sharding/sessions_collection_auto_healing.js
# shardCollection is not retryable
diff --git a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
index 406554b3ffa..935dcc02aba 100644
--- a/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_last_stable_mongos_and_mixed_shards.yml
@@ -31,7 +31,6 @@ selector:
- jstests/sharding/killop.js
- jstests/sharding/verify_sessions_expiration_sharded.js
# Enable when 4.6 becomes last stable
- - jstests/sharding/disable_resumable_range_deleter_flag.js
- jstests/sharding/mongos_dataSize.js
# Enable when SERVER-44733 is backported
- jstests/sharding/change_streams_update_lookup_shard_metadata_missing.js
diff --git a/buildscripts/resmokeconfig/suites/sharding_misc.yml b/buildscripts/resmokeconfig/suites/sharding_misc.yml
index cb661f1e1fc..2cfaa7a899c 100644
--- a/buildscripts/resmokeconfig/suites/sharding_misc.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_misc.yml
@@ -135,9 +135,7 @@ selector:
- jstests/sharding/coll_epoch_test1.js
- jstests/sharding/hash_shard_num_chunks.js
- jstests/sharding/basic_sharding_params.js
- - jstests/sharding/migration_ignore_interrupts_3.js
- jstests/sharding/change_streams.js
- - jstests/sharding/migration_ignore_interrupts_4.js
- jstests/sharding/move_chunk_basic.js
- jstests/sharding/query/collation_targeting_inherited.js
- jstests/sharding/secondary_shard_version_protocol_with_causal_consistency.js
@@ -175,15 +173,11 @@ selector:
- jstests/sharding/causal_consistency_shell_support.js
- jstests/sharding/change_streams_establishment_finds_new_shards.js
- jstests/sharding/retryable_writes.js
- - jstests/sharding/cleanup_orphaned_cmd_prereload.js
- jstests/sharding/basic_merge.js
- jstests/sharding/migration_critical_section_concurrency.js
- jstests/sharding/sort1.js
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
- jstests/sharding/upsert_sharded.js
- - jstests/sharding/cleanup_orphaned_cmd_hashed.js
- jstests/sharding/addshard5.js
- - jstests/sharding/cleanup_orphaned_compound.js
- jstests/sharding/query/agg_sort.js
- jstests/sharding/remove1.js
- jstests/sharding/shard_targeting.js
@@ -200,8 +194,6 @@ selector:
- jstests/sharding/count2.js
- jstests/sharding/no_empty_reset.js
- jstests/sharding/kill_pinned_cursor.js
- - jstests/sharding/cleanup_orphaned.js
- - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
- jstests/sharding/sharded_limit_batchsize.js
- jstests/sharding/migration_sets_fromMigrate_flag.js
- jstests/sharding/change_stream_metadata_notifications.js
@@ -309,7 +301,6 @@ selector:
- jstests/sharding/hash_single_shard.js
- jstests/sharding/version2.js
- jstests/sharding/advance_logical_time_with_valid_signature.js
- - jstests/sharding/cleanup_orphaned_auth.js
- jstests/sharding/query/mrShardedOutputAuth.js
- jstests/sharding/split_against_shard_with_invalid_split_points.js
- jstests/sharding/version1.js
diff --git a/jstests/auth/upgrade_noauth_to_keyfile_with_sharding.js b/jstests/auth/upgrade_noauth_to_keyfile_with_sharding.js
index 9047c2cddef..5a723a9659a 100644
--- a/jstests/auth/upgrade_noauth_to_keyfile_with_sharding.js
+++ b/jstests/auth/upgrade_noauth_to_keyfile_with_sharding.js
@@ -1,6 +1,8 @@
// Tests access control upgrade on a sharded cluster
// The purpose is to verify the connectivity between mongos, config server, and the shards
-// @tags: [requires_sharding]
+//
+// TODO (SERVER-48261): Fix test to allow it to work with the resumable range deleter enabled.
+// @tags: [requires_sharding, __TEMPORARILY_DISABLED__]
load('jstests/ssl/libs/ssl_helpers.js');
diff --git a/jstests/libs/cleanup_orphaned_util.js b/jstests/libs/cleanup_orphaned_util.js
deleted file mode 100644
index a88c9e8dbf7..00000000000
--- a/jstests/libs/cleanup_orphaned_util.js
+++ /dev/null
@@ -1,131 +0,0 @@
-//
-// Utilities for testing cleanupOrphaned command.
-//
-
-//
-// Run cleanupOrphaned on a shard, and assert cleanupOrphaned runs the
-// expected number of times before stopping.
-//
-function cleanupOrphaned(shardConnection, ns, expectedIterations) {
- var admin = shardConnection.getDB('admin'), result = admin.runCommand({cleanupOrphaned: ns}),
- iterations = 1;
-
- if (!result.ok) {
- printjson(result);
- }
- assert(result.ok);
- while (result.stoppedAtKey) {
- result = admin.runCommand({cleanupOrphaned: ns, startingFromKey: result.stoppedAtKey});
-
- assert(result.ok);
- ++iterations;
- }
-
- assert.eq(iterations,
- expectedIterations,
- 'Expected to run ' +
- 'cleanupOrphaned' + expectedIterations + ' times, but it only ran ' + iterations +
- ' times before stoppedAtKey was null.');
-}
-
-// Shards data from key range, then inserts orphan documents, runs cleanupOrphans
-// and makes sure that orphans are removed.
-// Pass an options object like:
-// {
-// shardKey: { a: 1, b: 1 },
-// keyGen: function() { return [{ a: 'foo', b: 1 }, { a: 'bar', b: 2 }]; }
-// }
-function testCleanupOrphaned(options) {
- var st = new ShardingTest({
- shards: 2,
- mongos: 2,
- shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
- });
-
- var mongos = st.s0, admin = mongos.getDB('admin'),
- shards = mongos.getCollection('config.shards').find().toArray(),
- coll = mongos.getCollection('foo.bar'),
- shard0Coll = st.shard0.getCollection(coll.getFullName()), keys = options.keyGen(),
- beginning = keys[0], oneQuarter = keys[Math.round(keys.length / 4)],
- middle = keys[Math.round(keys.length / 2)],
- threeQuarters = keys[Math.round(3 * keys.length / 4)];
-
- assert.commandWorked(admin.runCommand({enableSharding: coll.getDB().getName()}));
-
- printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: shards[0]._id}));
-
- assert.commandWorked(
- admin.runCommand({shardCollection: coll.getFullName(), key: options.shardKey}));
-
- st.printShardingStatus();
-
- jsTest.log('Inserting some regular docs...');
-
- assert.commandWorked(admin.runCommand({split: coll.getFullName(), middle: middle}));
-
- assert.commandWorked(admin.runCommand(
- {moveChunk: coll.getFullName(), find: middle, to: shards[1]._id, _waitForDelete: true}));
-
- for (var i = 0; i < keys.length; i++)
- coll.insert(keys[i]);
- assert.eq(null, coll.getDB().getLastError());
-
- // Half of the data is on each shard:
- // shard 0: [beginning, middle)
- // shard 1: [middle, end)
- //
- assert.eq(keys.length / 2, shard0Coll.count());
- assert.eq(keys.length, coll.find().itcount());
-
- jsTest.log('Inserting some orphaned docs...');
-
- shard0Coll.insert(threeQuarters);
-
- // I'll represent the orphan doc like {threeQuarters}, in this diagram:
- //
- // shard 0: [beginning, middle) {threeQuarters}
- // shard 1: [middle, end)
- assert.eq(null, shard0Coll.getDB().getLastError());
- assert.eq(1 + keys.length / 2, shard0Coll.count());
-
- jsTest.log('Cleaning up orphaned data...');
-
- cleanupOrphaned(st.shard0, coll.getFullName(), 2);
- assert.eq(keys.length / 2, shard0Coll.count());
- assert.eq(keys.length, coll.find().itcount());
-
- jsTest.log('Moving half the data out again (making a hole)...');
-
- assert.commandWorked(admin.runCommand({split: coll.getFullName(), middle: oneQuarter}));
-
- assert.commandWorked(admin.runCommand(
- {moveChunk: coll.getFullName(), find: beginning, to: shards[1]._id, _waitForDelete: true}));
-
- // 1/4 of the data is on the first shard.
- // shard 0: [threeQuarters, middle)
- // shard 1: [beginning, threeQuarters) [middle, end)
- assert.eq(Math.round(keys.length / 4), shard0Coll.count());
- assert.eq(keys.length, coll.find().itcount());
-
- jsTest.log('Inserting some more orphaned docs...');
-
- shard0Coll.insert(beginning);
- shard0Coll.insert(middle);
- assert.eq(null, shard0Coll.getDB().getLastError());
-
- // shard 0: {beginning} [threeQuarters, middle) {middle}
- // shard 1: [beginning, threeQuarters) [middle, end)
- assert.eq(2 + Math.round(keys.length / 4), shard0Coll.count());
- assert.eq(100, coll.find().itcount());
-
- jsTest.log('Cleaning up more orphaned data...');
-
- // Now cleanupOrphaned must iterate over 3 regions, not 2.
- cleanupOrphaned(st.shard0, coll.getFullName(), 3);
- assert.eq(Math.round(keys.length / 4), shard0Coll.count());
- assert.eq(keys.length, coll.find().itcount());
-
- jsTest.log('DONE!');
-
- st.stop();
-}
diff --git a/jstests/sharding/cleanup_orphaned.js b/jstests/sharding/cleanup_orphaned.js
deleted file mode 100644
index 7e9b70f0b47..00000000000
--- a/jstests/sharding/cleanup_orphaned.js
+++ /dev/null
@@ -1,16 +0,0 @@
-//
-// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
-// and makes sure that orphans are removed. Uses an _id as a shard key.
-
-load('./jstests/libs/cleanup_orphaned_util.js');
-
-testCleanupOrphaned({
- shardKey: {_id: 1},
- keyGen: function() {
- var ids = [];
- for (var i = -50; i < 50; i++) {
- ids.push({_id: i});
- }
- return ids;
- }
-});
diff --git a/jstests/sharding/cleanup_orphaned_basic.js b/jstests/sharding/cleanup_orphaned_basic.js
index 8aa9109c633..f121c6fcb07 100644
--- a/jstests/sharding/cleanup_orphaned_basic.js
+++ b/jstests/sharding/cleanup_orphaned_basic.js
@@ -1,4 +1,3 @@
-//
// Basic tests of cleanupOrphaned. Validates that non allowed uses of the cleanupOrphaned
// command fail.
//
@@ -24,9 +23,7 @@ MongoRunner.stopMongod(mongod);
* Bad invocations of cleanupOrphaned command.
****************************************************************************/
-var st = new ShardingTest({
- other: {rs: true, rsOptions: {nodes: 2, setParameter: {"disableResumableRangeDeleter": true}}}
-});
+var st = new ShardingTest({other: {rs: true, rsOptions: {nodes: 2}}});
var mongos = st.s0;
var mongosAdmin = mongos.getDB('admin');
@@ -64,36 +61,6 @@ assert.commandWorked(mongosAdmin.runCommand({shardCollection: ns, key: {_id: 1}}
assert.commandWorked(shardAdmin.runCommand({cleanupOrphaned: ns}));
/*****************************************************************************
- * Empty shard.
- ****************************************************************************/
-
-// Ping shard[1] so it will be aware that it is sharded. Otherwise cleanupOrphaned
-// may fail.
-assert.commandWorked(mongosAdmin.runCommand({
- moveChunk: coll.getFullName(),
- find: {_id: 1},
- to: st.shard1.shardName,
- _waitForDelete: true
-}));
-
-assert.commandWorked(mongosAdmin.runCommand({
- moveChunk: coll.getFullName(),
- find: {_id: 1},
- to: st.shard0.shardName,
- _waitForDelete: true
-}));
-
-// Collection's home is shard0, there are no chunks assigned to shard1.
-st.shard1.getCollection(ns).insert({});
-assert.eq(null, st.shard1.getDB(dbName).getLastError());
-assert.eq(1, st.shard1.getCollection(ns).count());
-response = st.shard1.getDB('admin').runCommand({cleanupOrphaned: ns});
-assert.commandWorked(response);
-assert.eq({_id: {$maxKey: 1}}, response.stoppedAtKey);
-assert.eq(
- 0, st.shard1.getCollection(ns).count(), "cleanupOrphaned didn't delete orphan on empty shard.");
-
-/*****************************************************************************
* Bad startingFromKeys.
****************************************************************************/
@@ -125,16 +92,9 @@ function testBadStartingFromKeys(shardAdmin) {
{cleanupOrphaned: coll2.getFullName(), startingFromKey: {a: 'someValue', c: 1}}));
}
-// Test when disableResumableRangeDeleter=true.
+// Note the 'startingFromKey' parameter is validated FCV is 4.4+, but is not otherwise used (in
+// FCV 4.4+, cleanupOrphaned waits for there to be no orphans in the entire key space).
testBadStartingFromKeys(shardAdmin);
-// Restart the shard with disableResumableRangeDeleter=false and test bad startingFromKey's. Note
-// that the 'startingFromKey' parameter is validated when disableResumableRangeDeleter=false and the
-// FCV is 4.4, but is not otherwise used (cleanupOrphaned waits for there to be no orphans in the
-// entire key space).
-st.rs0.stopSet(null /* signal */, true /* forRestart */);
-st.rs0.startSet({restart: true, setParameter: {disableResumableRangeDeleter: false}});
-testBadStartingFromKeys(st.rs0.getPrimary().getDB("admin"));
-
st.stop();
})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
deleted file mode 100644
index 605ebd4efe3..00000000000
--- a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js
+++ /dev/null
@@ -1,160 +0,0 @@
-//
-// Tests cleanupOrphaned concurrent with moveChunk.
-// Inserts orphan documents to the donor and recipient shards during the moveChunk and
-// verifies that cleanupOrphaned removes orphans.
-
-load('./jstests/libs/chunk_manipulation_util.js');
-load('./jstests/libs/cleanup_orphaned_util.js');
-
-(function() {
-"use strict";
-
-var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
-var st = new ShardingTest({
- shards: 2,
- other: {separateConfig: true},
- shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
-});
-
-var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar',
- coll = mongos.getCollection(ns), donor = st.shard0, recipient = st.shard1,
- donorColl = donor.getCollection(ns), recipientColl = st.shard1.getCollection(ns);
-
-// Three chunks of 10 documents each, with ids -20, -18, -16, ..., 38.
-// Donor: [minKey, 0) [0, 20)
-// Recipient: [20, maxKey)
-assert.commandWorked(admin.runCommand({enableSharding: dbName}));
-printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName}));
-assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}}));
-assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 0}}));
-assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 20}}));
-assert.commandWorked(admin.runCommand(
- {moveChunk: ns, find: {_id: 20}, to: st.shard1.shardName, _waitForDelete: true}));
-
-jsTest.log('Inserting 20 docs into shard 0....');
-for (var i = -20; i < 20; i += 2) {
- coll.insert({_id: i});
-}
-assert.eq(null, coll.getDB().getLastError());
-assert.eq(20, donorColl.count());
-
-jsTest.log('Inserting 10 docs into shard 1....');
-for (i = 20; i < 40; i += 2) {
- coll.insert({_id: i});
-}
-assert.eq(null, coll.getDB().getLastError());
-assert.eq(10, recipientColl.count());
-
-//
-// Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs,
-// from shard 0 to shard 1. Pause it at some points in the donor's and
-// recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1.
-//
-
-jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)');
-pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
-pauseMigrateAtStep(recipient, migrateStepNames.cloned);
-var joinMoveChunk = moveChunkParallel(
- staticMongod, st.s0.host, {_id: 0}, null, coll.getFullName(), st.shard1.shardName);
-
-waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
-waitForMigrateStep(recipient, migrateStepNames.cloned);
-// Recipient has run _recvChunkStart and begun its migration thread; docs have
-// been cloned and chunk [0, 20) is noted as 'pending' on recipient.
-
-// Donor: [minKey, 0) [0, 20)
-// Recipient (pending): [0, 20)
-// Recipient: [20, maxKey)
-
-// Create orphans. I'll show an orphaned doc on donor with _id 26 like {26}:
-//
-// Donor: [minKey, 0) [0, 20) {26}
-// Recipient (pending): [0, 20)
-// Recipient: {-1} [20, maxKey)
-donorColl.insert([{_id: 26}]);
-assert.eq(null, donorColl.getDB().getLastError());
-assert.eq(21, donorColl.count());
-recipientColl.insert([{_id: -1}]);
-assert.eq(null, recipientColl.getDB().getLastError());
-assert.eq(21, recipientColl.count());
-
-cleanupOrphaned(donor, ns, 2);
-assert.eq(20, donorColl.count());
-cleanupOrphaned(recipient, ns, 2);
-assert.eq(20, recipientColl.count());
-
-jsTest.log('Inserting document on donor side');
-// Inserted a new document (not an orphan) with id 19, which belongs in the
-// [0, 20) chunk.
-donorColl.insert({_id: 19});
-assert.eq(null, coll.getDB().getLastError());
-assert.eq(21, donorColl.count());
-
-// Recipient transfers this modification.
-jsTest.log('Let migrate proceed to transferredMods');
-proceedToMigrateStep(recipient, migrateStepNames.catchup);
-jsTest.log('Done letting migrate proceed to transferredMods');
-
-assert.eq(21, recipientColl.count(), "Recipient didn't transfer inserted document.");
-
-cleanupOrphaned(donor, ns, 2);
-assert.eq(21, donorColl.count());
-cleanupOrphaned(recipient, ns, 2);
-assert.eq(21, recipientColl.count());
-
-// Create orphans.
-donorColl.insert([{_id: 26}]);
-assert.eq(null, donorColl.getDB().getLastError());
-assert.eq(22, donorColl.count());
-recipientColl.insert([{_id: -1}]);
-assert.eq(null, recipientColl.getDB().getLastError());
-assert.eq(22, recipientColl.count());
-
-cleanupOrphaned(donor, ns, 2);
-assert.eq(21, donorColl.count());
-cleanupOrphaned(recipient, ns, 2);
-assert.eq(21, recipientColl.count());
-
-// Recipient has been waiting for donor to call _recvChunkCommit.
-pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
-unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
-proceedToMigrateStep(recipient, migrateStepNames.steady);
-proceedToMigrateStep(recipient, migrateStepNames.done);
-
-// Create orphans.
-donorColl.insert([{_id: 26}]);
-assert.eq(null, donorColl.getDB().getLastError());
-assert.eq(22, donorColl.count());
-recipientColl.insert([{_id: -1}]);
-assert.eq(null, recipientColl.getDB().getLastError());
-assert.eq(22, recipientColl.count());
-
-cleanupOrphaned(donor, ns, 2);
-assert.eq(21, donorColl.count());
-cleanupOrphaned(recipient, ns, 2);
-assert.eq(21, recipientColl.count());
-
-// Let recipient side of the migration finish so that the donor can proceed with the commit.
-unpauseMigrateAtStep(recipient, migrateStepNames.done);
-waitForMoveChunkStep(donor, moveChunkStepNames.committed);
-
-// Donor is paused after the migration chunk commit, but before it finishes the cleanup that
-// includes running the range deleter. Thus it technically has orphaned data -- commit is
-// complete, but moved data is still present. cleanupOrphaned can remove the data the donor
-// would otherwise clean up itself in its post-move delete phase.
-cleanupOrphaned(donor, ns, 2);
-assert.eq(10, donorColl.count());
-
-// Let the donor migration finish.
-unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
-joinMoveChunk();
-
-// Donor has finished post-move delete, which had nothing to remove with the range deleter
-// because of the preemptive cleanupOrphaned call.
-assert.eq(10, donorColl.count());
-assert.eq(21, recipientColl.count());
-assert.eq(31, coll.count());
-
-st.stop();
-MongoRunner.stopMongod(staticMongod);
-})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
deleted file mode 100644
index 687f4566ab8..00000000000
--- a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js
+++ /dev/null
@@ -1,131 +0,0 @@
-//
-//
-// Tests cleanupOrphaned concurrent with moveChunk with a hashed shard key.
-// Inserts orphan documents to the donor and recipient shards during the moveChunk and
-// verifies that cleanupOrphaned removes orphans.
-//
-
-load('./jstests/libs/chunk_manipulation_util.js');
-load('./jstests/libs/cleanup_orphaned_util.js');
-
-(function() {
-"use strict";
-
-var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
-var st = new ShardingTest({
- shards: 2,
- other: {separateConfig: true},
- shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
-});
-
-var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar',
- coll = mongos.getCollection(ns);
-
-assert.commandWorked(admin.runCommand({enableSharding: dbName}));
-printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName}));
-assert.commandWorked(admin.runCommand({shardCollection: ns, key: {key: 'hashed'}}));
-
-// Makes four chunks by default, two on each shard.
-var chunks = st.config.chunks.find({ns: ns}).sort({min: 1}).toArray();
-assert.eq(4, chunks.length);
-
-var chunkWithDoc = chunks[1];
-print('Trying to make doc that hashes to this chunk: ' + tojson(chunkWithDoc));
-
-var found = false;
-for (var i = 0; i < 10000; i++) {
- var doc = {key: ObjectId()}, hash = mongos.adminCommand({_hashBSONElement: doc.key}).out;
-
- print('doc.key ' + doc.key + ' hashes to ' + hash);
-
- if (mongos.getCollection('config.chunks')
- .findOne({_id: chunkWithDoc._id, 'min.key': {$lte: hash}, 'max.key': {$gt: hash}})) {
- found = true;
- break;
- }
-}
-
-assert(found, "Couldn't make doc that belongs to chunk 1.");
-print('Doc: ' + tojson(doc));
-coll.insert(doc);
-assert.eq(null, coll.getDB().getLastError());
-
-//
-// Start a moveChunk in the background from shard 0 to shard 1. Pause it at
-// some points in the donor's and recipient's work flows, and test
-// cleanupOrphaned.
-//
-
-var donor, recip;
-if (chunkWithDoc.shard == st.shard0.shardName) {
- donor = st.shard0;
- recip = st.shard1;
-} else {
- recip = st.shard0;
- donor = st.shard1;
-}
-
-jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)');
-pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
-pauseMigrateAtStep(recip, migrateStepNames.cloned);
-
-var joinMoveChunk = moveChunkParallel(staticMongod,
- st.s0.host,
- null,
- [chunkWithDoc.min, chunkWithDoc.max], // bounds
- coll.getFullName(),
- recip.shardName);
-
-waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk);
-waitForMigrateStep(recip, migrateStepNames.cloned);
-proceedToMigrateStep(recip, migrateStepNames.catchup);
-// recipient has run _recvChunkStart and begun its migration thread;
-// 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient.
-
-var donorColl = donor.getCollection(ns), recipColl = recip.getCollection(ns);
-
-assert.eq(1, donorColl.count());
-assert.eq(1, recipColl.count());
-
-// cleanupOrphaned should go through two iterations, since the default chunk
-// setup leaves two unowned ranges on each shard.
-cleanupOrphaned(donor, ns, 2);
-cleanupOrphaned(recip, ns, 2);
-assert.eq(1, donorColl.count());
-assert.eq(1, recipColl.count());
-
-// recip has been waiting for donor to call _recvChunkCommit.
-pauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
-unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk);
-proceedToMigrateStep(recip, migrateStepNames.steady);
-proceedToMigrateStep(recip, migrateStepNames.done);
-
-cleanupOrphaned(donor, ns, 2);
-assert.eq(1, donorColl.count());
-cleanupOrphaned(recip, ns, 2);
-assert.eq(1, recipColl.count());
-
-// Let recip side of the migration finish so that the donor proceeds with the commit.
-unpauseMigrateAtStep(recip, migrateStepNames.done);
-waitForMoveChunkStep(donor, moveChunkStepNames.committed);
-
-// Donor is paused after the migration chunk commit, but before it finishes the cleanup that
-// includes running the range deleter. Thus it technically has orphaned data -- commit is
-// complete, but moved data is still present. cleanupOrphaned can remove the data the donor
-// would otherwise clean up itself in its post-move delete phase.
-cleanupOrphaned(donor, ns, 2);
-assert.eq(0, donorColl.count());
-
-// Let migration thread complete.
-unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed);
-joinMoveChunk();
-
-// donor has finished post-move delete, which had nothing to remove with the range deleter
-// because of the preemptive cleanupOrphaned call.
-assert.eq(0, donorColl.count());
-assert.eq(1, recipColl.count());
-assert.eq(1, coll.count());
-
-MongoRunner.stopMongod(staticMongod);
-st.stop();
-})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_hashed.js
deleted file mode 100644
index 11ab4c1d8bc..00000000000
--- a/jstests/sharding/cleanup_orphaned_cmd_hashed.js
+++ /dev/null
@@ -1,78 +0,0 @@
-//
-// Tests cleanup of orphaned data in hashed sharded coll via the orphaned data cleanup command
-//
-
-(function() {
-"use strict";
-
-var st = new ShardingTest(
- {shards: 2, mongos: 1, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
-
-var mongos = st.s0;
-var admin = mongos.getDB("admin");
-var coll = mongos.getCollection("foo.bar");
-
-assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
-printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName}));
-assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: "hashed"}}));
-
-// Create two orphaned data holes, one bounded by min or max on each shard
-
-assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-100)}}));
-assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-50)}}));
-assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(50)}}));
-assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(100)}}));
-assert.commandWorked(admin.runCommand({
- moveChunk: coll + "",
- bounds: [{_id: NumberLong(-100)}, {_id: NumberLong(-50)}],
- to: st.shard1.shardName,
- _waitForDelete: true
-}));
-assert.commandWorked(admin.runCommand({
- moveChunk: coll + "",
- bounds: [{_id: NumberLong(50)}, {_id: NumberLong(100)}],
- to: st.shard0.shardName,
- _waitForDelete: true
-}));
-st.printShardingStatus();
-
-jsTest.log("Inserting some docs on each shard, so 1/2 will be orphaned...");
-
-for (var s = 0; s < 2; s++) {
- var shardColl = (s == 0 ? st.shard0 : st.shard1).getCollection(coll + "");
- var bulk = shardColl.initializeUnorderedBulkOp();
- for (var i = 0; i < 100; i++)
- bulk.insert({_id: i});
- assert.commandWorked(bulk.execute());
-}
-
-assert.eq(200,
- st.shard0.getCollection(coll + "").find().itcount() +
- st.shard1.getCollection(coll + "").find().itcount());
-assert.eq(100, coll.find().itcount());
-
-jsTest.log("Cleaning up orphaned data in hashed coll...");
-
-for (var s = 0; s < 2; s++) {
- var shardAdmin = (s == 0 ? st.shard0 : st.shard1).getDB("admin");
-
- var result = shardAdmin.runCommand({cleanupOrphaned: coll + ""});
- while (result.ok && result.stoppedAtKey) {
- printjson(result);
- result = shardAdmin.runCommand(
- {cleanupOrphaned: coll + "", startingFromKey: result.stoppedAtKey});
- }
-
- printjson(result);
- assert(result.ok);
-}
-
-assert.eq(100,
- st.shard0.getCollection(coll + "").find().itcount() +
- st.shard1.getCollection(coll + "").find().itcount());
-assert.eq(100, coll.find().itcount());
-
-jsTest.log("DONE!");
-
-st.stop();
-})();
diff --git a/jstests/sharding/cleanup_orphaned_cmd_prereload.js b/jstests/sharding/cleanup_orphaned_cmd_prereload.js
deleted file mode 100644
index 548e79bd275..00000000000
--- a/jstests/sharding/cleanup_orphaned_cmd_prereload.js
+++ /dev/null
@@ -1,96 +0,0 @@
-//
-// Tests failed cleanup of orphaned data when we have pending chunks
-//
-
-var st = new ShardingTest(
- {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
-
-var mongos = st.s0;
-var admin = mongos.getDB("admin");
-var coll = mongos.getCollection("foo.bar");
-
-assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""}));
-printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName}));
-assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}));
-
-// Turn off best-effort recipient metadata refresh post-migration commit on both shards because it
-// would clean up the pending chunks on migration recipients.
-assert.commandWorked(st.shard0.getDB('admin').runCommand(
- {configureFailPoint: 'doNotRefreshRecipientAfterCommit', mode: 'alwaysOn'}));
-assert.commandWorked(st.shard1.getDB('admin').runCommand(
- {configureFailPoint: 'doNotRefreshRecipientAfterCommit', mode: 'alwaysOn'}));
-
-jsTest.log("Moving some chunks to shard1...");
-
-assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}}));
-assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 1}}));
-
-assert.commandWorked(admin.runCommand(
- {moveChunk: coll + "", find: {_id: 0}, to: st.shard1.shardName, _waitForDelete: true}));
-assert.commandWorked(admin.runCommand(
- {moveChunk: coll + "", find: {_id: 1}, to: st.shard1.shardName, _waitForDelete: true}));
-
-var metadata =
- st.shard1.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-
-printjson(metadata);
-
-assert.eq(metadata.pending[0][0]._id, 1);
-assert.eq(metadata.pending[0][1]._id, MaxKey);
-
-jsTest.log("Ensuring we won't remove orphaned data in pending chunk...");
-
-assert(!st.shard1.getDB("admin")
- .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}})
- .stoppedAtKey);
-
-jsTest.log("Moving some chunks back to shard0 after empty...");
-
-assert.commandWorked(admin.runCommand(
- {moveChunk: coll + "", find: {_id: -1}, to: st.shard1.shardName, _waitForDelete: true}));
-
-var metadata =
- st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-
-printjson(metadata);
-
-assert.eq(metadata.shardVersion.t, 0);
-assert.neq(metadata.collVersion.t, 0);
-assert.eq(metadata.pending.length, 0);
-
-assert.commandWorked(admin.runCommand(
- {moveChunk: coll + "", find: {_id: 1}, to: st.shard0.shardName, _waitForDelete: true}));
-
-var metadata =
- st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-
-printjson(metadata);
-assert.eq(metadata.shardVersion.t, 0);
-assert.neq(metadata.collVersion.t, 0);
-assert.eq(metadata.pending[0][0]._id, 1);
-assert.eq(metadata.pending[0][1]._id, MaxKey);
-
-jsTest.log("Ensuring again we won't remove orphaned data in pending chunk...");
-
-assert(!st.shard0.getDB("admin")
- .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}})
- .stoppedAtKey);
-
-jsTest.log("Checking that pending chunk is promoted on reload...");
-
-assert.eq(null, coll.findOne({_id: 1}));
-
-var metadata =
- st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata;
-
-printjson(metadata);
-assert.neq(metadata.shardVersion.t, 0);
-assert.neq(metadata.collVersion.t, 0);
-assert.eq(metadata.chunks[0][0]._id, 1);
-assert.eq(metadata.chunks[0][1]._id, MaxKey);
-
-st.printShardingStatus();
-
-jsTest.log("DONE!");
-
-st.stop();
diff --git a/jstests/sharding/cleanup_orphaned_compound.js b/jstests/sharding/cleanup_orphaned_compound.js
deleted file mode 100644
index ebf7163c77d..00000000000
--- a/jstests/sharding/cleanup_orphaned_compound.js
+++ /dev/null
@@ -1,18 +0,0 @@
-//
-// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans
-// and makes sure that orphans are removed. Uses a compound shard key.
-//
-
-load('./jstests/libs/cleanup_orphaned_util.js');
-
-testCleanupOrphaned({
- shardKey: {a: 1, b: 1},
- keyGen: function() {
- var ids = [];
- for (var i = -50; i < 50; i++) {
- ids.push({a: i, b: Math.random()});
- }
-
- return ids;
- }
-});
diff --git a/jstests/sharding/disable_resumable_range_deleter.js b/jstests/sharding/disable_resumable_range_deleter.js
new file mode 100644
index 00000000000..631c065daea
--- /dev/null
+++ b/jstests/sharding/disable_resumable_range_deleter.js
@@ -0,0 +1,85 @@
+/**
+ * Verifies the effect of setting disableResumableRangeDeleter to true on a shard.
+ *
+ * requires_persistence - This test restarts shards and expects them to remember their data.
+ * requires_fcv_46 - This test changed the behavior of disableResumableRangeDeleter from 4.4.
+ * @tags: [requires_persistence, requires_fcv_46]
+ */
+(function() {
+
+"use strict";
+
+load("jstests/libs/fail_point_util.js");
+
+// This test intentionally disables the resumable range deleter.
+TestData.skipCheckOrphans = true;
+
+const dbName = "test";
+const collName = "foo";
+const ns = dbName + "." + collName;
+
+const st = new ShardingTest({shards: 2});
+
+jsTest.log("Setup");
+assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
+assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName}));
+assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {_id: 1}}));
+assert.commandWorked(st.s.adminCommand({split: ns, middle: {_id: 0}}));
+
+jsTest.log("Suspend range deletion and cause a range deletion task to be created on shard0.");
+let suspendRangeDeletionFailpoint = configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
+assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: st.shard1.shardName}));
+
+jsTest.log("Restart shard0 with disableResumableRangeDeleter=true.");
+// Note, the suspendRangeDeletion failpoint will not survive the restart.
+st.rs0.restart(0, {
+ remember: true,
+ appendOptions: true,
+ startClean: false,
+ setParameter: "disableResumableRangeDeleter=true"
+});
+
+jsTest.log("Shard0 should fail to submit the range deletion task on stepup.");
+checkLog.contains(st.rs0.getPrimary(), "Failed to submit range deletion task");
+
+jsTest.log("Shard0 should fail to receive a range that overlaps the range deletion task.");
+// The error from moveChunk gets wrapped as an OperationFailed error, so we have to check the error
+// message to find the original cause.
+const moveChunkRes = st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: st.shard0.shardName});
+assert.commandFailedWithCode(moveChunkRes, ErrorCodes.OperationFailed);
+assert(moveChunkRes.errmsg.indexOf("ResumableRangeDeleterDisabled") > -1);
+
+jsTest.log("Shard0 should fail to do cleanupOrphaned on the namespace.");
+assert.commandFailedWithCode(st.rs0.getPrimary().adminCommand({cleanupOrphaned: ns}),
+ ErrorCodes.ResumableRangeDeleterDisabled);
+
+jsTest.log("Shard0 should be able to do cleanupOrphaned on an unrelated namespace.");
+assert.commandWorked(st.rs0.getPrimary().adminCommand({cleanupOrphaned: "test.unrelated"}));
+
+jsTest.log("Restart shard1 with disableResumableRangeDeleter=true.");
+st.rs1.restart(0, {
+ remember: true,
+ appendOptions: true,
+ startClean: false,
+ setParameter: "disableResumableRangeDeleter=true"
+});
+
+jsTest.log("Shard0 should be able to donate a chunk and shard1 should be able to receive it.");
+// disableResumableRangeDeleter should not prevent a shard from donating a chunk, and should not
+// prevent a shard from receiving a chunk for which it doesn't have overlapping range deletion
+// tasks.
+assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: -1}, to: st.shard1.shardName}));
+
+jsTest.log("Restart shard0 with disableResumableRangeDeleter=false.");
+st.rs0.restart(0, {
+ remember: true,
+ appendOptions: true,
+ startClean: false,
+ setParameter: "disableResumableRangeDeleter=false"
+});
+
+jsTest.log("Shard0 should now be able to re-receive the chunk it failed to receive earlier.");
+assert.commandWorked(st.s.adminCommand({moveChunk: ns, find: {_id: 0}, to: st.shard0.shardName}));
+
+st.stop();
+})();
diff --git a/jstests/sharding/disable_resumable_range_deleter_flag.js b/jstests/sharding/disable_resumable_range_deleter_flag.js
deleted file mode 100644
index 770747df3be..00000000000
--- a/jstests/sharding/disable_resumable_range_deleter_flag.js
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- * Tests that migrations behave correctly when the resumable range deleter protocol is
- * disabled.
- *
- * requires_persistence because this test restarts shards and expects them to have their data files.
- * @tags: [requires_persistence]
- */
-
-(function() {
-"use strict";
-
-load("jstests/libs/fail_point_util.js");
-load('jstests/libs/parallel_shell_helpers.js');
-
-// This test runs a migration with 'disableResumableRangeDeleter=true', then restarts the shards,
-// so the orphans from that migration will never be cleaned up.
-TestData.skipCheckOrphans = true;
-
-const dbName = "test";
-
-let st = new ShardingTest({shards: 2});
-
-assert.commandWorked(st.s.adminCommand({enableSharding: dbName}));
-assert.commandWorked(st.s.adminCommand({movePrimary: dbName, to: st.shard0.shardName}));
-
-function getNewNs(dbName) {
- if (typeof getNewNs.counter == 'undefined') {
- getNewNs.counter = 0;
- }
- getNewNs.counter++;
- const collName = "ns" + getNewNs.counter;
- return [collName, dbName + "." + collName];
-}
-
-let moveChunk = function(ns, shard) {
- jsTestLog("Starting moveChunk " + ns + " " + shard);
-
- let adminDb = db.getSiblingDB("admin");
- assert.commandWorked(adminDb.runCommand({moveChunk: ns, find: {x: 50}, to: shard}));
-};
-
-function testBothDisabledSucceeds() {
- jsTestLog("Test that disabled donor and recipient succeeds migration");
-
- const [collName, ns] = getNewNs(dbName);
-
- assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}}));
- assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 50}}));
-
- // Insert documents into both chunks on shard0.
- let testColl = st.s.getDB(dbName).getCollection(collName);
- for (let i = 0; i < 100; i++) {
- testColl.insert({x: i});
- }
-
- // Disable resumable range deleter on both shards.
- st.rs0.stopSet(null /* signal */, true /* forRestart */);
- st.rs0.startSet({restart: true, setParameter: {disableResumableRangeDeleter: true}});
- st.rs1.stopSet(null /* signal */, true /* forRestart */);
- st.rs1.startSet({restart: true, setParameter: {disableResumableRangeDeleter: true}});
-
- // Move chunk [50, inf) to shard1 should succeed.
- assert.commandWorked(
- st.s.adminCommand({moveChunk: ns, find: {x: 50}, to: st.shard1.shardName}));
-
- // Re-enable resumable range delete.
- st.rs0.stopSet(null /* signal */, true /* forRestart */);
- st.rs0.startSet({restart: true, setParameter: {disableResumableRangeDeleter: false}});
- st.rs1.stopSet(null /* signal */, true /* forRestart */);
- st.rs1.startSet({restart: true, setParameter: {disableResumableRangeDeleter: false}});
-}
-
-function testDisabledSourceFailsMigration() {
- jsTestLog("Test that disabled donor fails migration");
-
- const [collName, ns] = getNewNs(dbName);
-
- assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}}));
- assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 50}}));
-
- // Insert documents into both chunks on shard0.
- let testColl = st.s.getDB(dbName).getCollection(collName);
- for (let i = 0; i < 100; i++) {
- testColl.insert({x: i});
- }
-
- // Disable resumable range deleter on shard0.
- st.rs0.stopSet(null /* signal */, true /* forRestart */);
- st.rs0.startSet({restart: true, setParameter: {disableResumableRangeDeleter: true}});
-
- // Move chunk [50, inf) to shard1 should fail since migration id is missing.
- assert.commandFailedWithCode(
- st.s.adminCommand({moveChunk: ns, find: {x: 50}, to: st.shard1.shardName}),
- [ErrorCodes.ConflictingOperationInProgress, ErrorCodes.IllegalOperation]);
-
- // Re-enable resumable range deleter on shard0.
- st.rs0.stopSet(null /* signal */, true /* forRestart */);
- st.rs0.startSet({restart: true, setParameter: {disableResumableRangeDeleter: false}});
-}
-
-function testDisabledRecipientSucceedsMigration() {
- jsTestLog("Test that disabled recipient succeeds migration");
-
- const [collName, ns] = getNewNs(dbName);
-
- assert.commandWorked(st.s.adminCommand({shardCollection: ns, key: {x: 1}}));
- assert.commandWorked(st.s.adminCommand({split: ns, middle: {x: 50}}));
-
- // Insert documents into both chunks on shard0.
- let testColl = st.s.getDB(dbName).getCollection(collName);
- for (let i = 0; i < 100; i++) {
- testColl.insert({x: i});
- }
-
- // Disable resumable range deleter on shard1.
- st.rs1.stopSet(null /* signal */, true /* forRestart */);
- st.rs1.startSet({restart: true, setParameter: {disableResumableRangeDeleter: true}});
-
- // Move chunk [50, inf) to shard1 should succeed.
- assert.commandWorked(
- st.s.adminCommand({moveChunk: ns, find: {x: 50}, to: st.shard1.shardName}));
-
- // Re-enable resumable range deleter on shard1.
- st.rs1.stopSet(null /* signal */, true /* forRestart */);
- st.rs1.startSet({restart: true, setParameter: {disableResumableRangeDeleter: false}});
-}
-
-testBothDisabledSucceeds();
-testDisabledSourceFailsMigration();
-testDisabledRecipientSucceedsMigration();
-
-st.stop();
-})();
diff --git a/jstests/sharding/migration_ignore_interrupts_3.js b/jstests/sharding/migration_ignore_interrupts_3.js
deleted file mode 100644
index 4b99fc2b537..00000000000
--- a/jstests/sharding/migration_ignore_interrupts_3.js
+++ /dev/null
@@ -1,98 +0,0 @@
-// If a donor aborts a migration to a recipient, the recipient does not realize the migration has
-// been aborted, and the donor moves on to a new migration, the original recipient will then fail to
-// clone documents from the donor.
-//
-// Note: don't use coll1 in this test after a coll1 migration is interrupted -- the distlock isn't
-// released promptly when interrupted.
-
-load('./jstests/libs/chunk_manipulation_util.js');
-
-(function() {
-"use strict";
-
-var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
-
-// This test only makes sense if resumable range deletion is off, because when it is on, it is not
-// possible for the donor to begin a new migration before having completed the previous one.
-var st = new ShardingTest(
- {shards: 3, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
-
-var mongos = st.s0, admin = mongos.getDB('admin'), dbName = "testDB", ns1 = dbName + ".foo",
- ns2 = dbName + ".bar", coll1 = mongos.getCollection(ns1), coll2 = mongos.getCollection(ns2),
- shard0 = st.shard0, shard1 = st.shard1, shard2 = st.shard2,
- shard0Coll1 = shard0.getCollection(ns1), shard1Coll1 = shard1.getCollection(ns1),
- shard2Coll1 = shard2.getCollection(ns1), shard0Coll2 = shard0.getCollection(ns2),
- shard1Coll2 = shard1.getCollection(ns2), shard2Coll2 = shard2.getCollection(ns2);
-
-assert.commandWorked(admin.runCommand({enableSharding: dbName}));
-st.ensurePrimaryShard(dbName, st.shard0.shardName);
-
-assert.commandWorked(admin.runCommand({shardCollection: ns1, key: {a: 1}}));
-assert.commandWorked(coll1.insert({a: 0}));
-assert.eq(1, shard0Coll1.find().itcount());
-assert.eq(0, shard1Coll1.find().itcount());
-assert.eq(0, shard2Coll1.find().itcount());
-assert.eq(1, coll1.find().itcount());
-
-assert.commandWorked(admin.runCommand({shardCollection: ns2, key: {a: 1}}));
-assert.commandWorked(coll2.insert({a: 0}));
-assert.eq(1, shard0Coll2.find().itcount());
-assert.eq(0, shard1Coll2.find().itcount());
-assert.eq(0, shard2Coll2.find().itcount());
-assert.eq(1, coll2.find().itcount());
-
-// Shard0:
-// coll1: [-inf, +inf)
-// coll2: [-inf, +inf)
-// Shard1:
-// Shard2:
-
-jsTest.log("Set up complete, now proceeding to test that migration interruption fails.");
-
-// Start coll1 migration to shard1: pause recipient after delete step, donor before interrupt
-// check.
-pauseMigrateAtStep(shard1, migrateStepNames.deletedPriorDataInRange);
-pauseMoveChunkAtStep(shard0, moveChunkStepNames.startedMoveChunk);
-const joinMoveChunk = moveChunkParallel(
- staticMongod, st.s0.host, {a: 0}, null, coll1.getFullName(), st.shard1.shardName);
-waitForMigrateStep(shard1, migrateStepNames.deletedPriorDataInRange);
-
-// Abort migration on donor side, recipient is unaware.
-killRunningMoveChunk(admin);
-
-unpauseMoveChunkAtStep(shard0, moveChunkStepNames.startedMoveChunk);
-assert.throws(function() {
- joinMoveChunk();
-});
-
-// Start coll2 migration to shard2, pause recipient after delete step.
-pauseMigrateAtStep(shard2, migrateStepNames.deletedPriorDataInRange);
-const joinMoveChunk2 = moveChunkParallel(
- staticMongod, st.s0.host, {a: 0}, null, coll2.getFullName(), st.shard2.shardName);
-waitForMigrateStep(shard2, migrateStepNames.deletedPriorDataInRange);
-
-jsTest.log('Releasing coll1 migration recipient, whose clone command should fail....');
-unpauseMigrateAtStep(shard1, migrateStepNames.deletedPriorDataInRange);
-assert.soon(function() {
- // Wait for the destination shard to report that it is not in an active migration.
- var res = shard1.adminCommand({'_recvChunkStatus': 1});
- return (res.active == false);
-}, "coll1 migration recipient didn't abort migration in clone phase.", 2 * 60 * 1000);
-assert.eq(1, shard0Coll1.find().itcount(), "donor shard0 completed a migration that it aborted.");
-assert.eq(
- 0, shard1Coll1.find().itcount(), "shard1 cloned documents despite donor migration abortion.");
-
-jsTest.log('Finishing coll2 migration, which should succeed....');
-unpauseMigrateAtStep(shard2, migrateStepNames.deletedPriorDataInRange);
-assert.doesNotThrow(function() {
- joinMoveChunk2();
-});
-
-assert.eq(0,
- shard0Coll2.find().itcount(),
- "donor shard0 failed to complete a migration after aborting a prior migration.");
-assert.eq(1, shard2Coll2.find().itcount(), "shard2 failed to complete migration.");
-
-st.stop();
-MongoRunner.stopMongod(staticMongod);
-})();
diff --git a/jstests/sharding/migration_ignore_interrupts_4.js b/jstests/sharding/migration_ignore_interrupts_4.js
deleted file mode 100644
index d28d809178d..00000000000
--- a/jstests/sharding/migration_ignore_interrupts_4.js
+++ /dev/null
@@ -1,101 +0,0 @@
-// If a donor aborts a migration to a recipient, the recipient does not realize the migration has
-// been aborted, and the donor moves on to a new migration, the original recipient will then fail to
-// retrieve transferMods from the donor's xfermods log.
-//
-// Note: don't use coll1 in this test after a coll1 migration is interrupted -- the distlock isn't
-// released promptly when interrupted.
-
-load('./jstests/libs/chunk_manipulation_util.js');
-
-(function() {
-"use strict";
-
-var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
-
-// This test only makes sense if resumable range deletion is off, because when it is on, it is not
-// possible for the donor to begin a new migration before having completed the previous one.
-var st = new ShardingTest(
- {shards: 3, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
-
-var mongos = st.s0, admin = mongos.getDB('admin'), dbName = "testDB", ns1 = dbName + ".foo",
- ns2 = dbName + ".bar", coll1 = mongos.getCollection(ns1), coll2 = mongos.getCollection(ns2),
- shard0 = st.shard0, shard1 = st.shard1, shard2 = st.shard2,
- shard0Coll1 = shard0.getCollection(ns1), shard1Coll1 = shard1.getCollection(ns1),
- shard2Coll1 = shard2.getCollection(ns1), shard0Coll2 = shard0.getCollection(ns2),
- shard1Coll2 = shard1.getCollection(ns2), shard2Coll2 = shard2.getCollection(ns2);
-
-assert.commandWorked(admin.runCommand({enableSharding: dbName}));
-st.ensurePrimaryShard(dbName, st.shard0.shardName);
-
-assert.commandWorked(admin.runCommand({shardCollection: ns1, key: {a: 1}}));
-assert.commandWorked(coll1.insert({a: 0}));
-assert.eq(1, shard0Coll1.find().itcount());
-assert.eq(0, shard1Coll1.find().itcount());
-assert.eq(0, shard2Coll1.find().itcount());
-assert.eq(1, coll1.find().itcount());
-
-assert.commandWorked(admin.runCommand({shardCollection: ns2, key: {a: 1}}));
-assert.commandWorked(coll2.insert({a: 0}));
-assert.eq(1, shard0Coll2.find().itcount());
-assert.eq(0, shard1Coll2.find().itcount());
-assert.eq(0, shard2Coll2.find().itcount());
-assert.eq(1, coll2.find().itcount());
-
-// Shard0:
-// coll1: [-inf, +inf)
-// coll2: [-inf, +inf)
-// Shard1:
-// Shard2:
-
-jsTest.log("Set up complete, now proceeding to test that migration interruption fails.");
-
-// Start coll1 migration to shard1: pause recipient after cloning, donor before interrupt check
-pauseMigrateAtStep(shard1, migrateStepNames.cloned);
-pauseMoveChunkAtStep(shard0, moveChunkStepNames.startedMoveChunk);
-const joinMoveChunk = moveChunkParallel(
- staticMongod, st.s0.host, {a: 0}, null, coll1.getFullName(), st.shard1.shardName);
-waitForMigrateStep(shard1, migrateStepNames.cloned);
-
-// Abort migration on donor side, recipient is unaware
-killRunningMoveChunk(admin);
-
-unpauseMoveChunkAtStep(shard0, moveChunkStepNames.startedMoveChunk);
-
-assert.throws(function() {
- joinMoveChunk();
-});
-
-// Start coll2 migration to shard2, pause recipient after cloning step.
-pauseMigrateAtStep(shard2, migrateStepNames.cloned);
-const joinMoveChunk2 = moveChunkParallel(
- staticMongod, st.s0.host, {a: 0}, null, coll2.getFullName(), st.shard2.shardName);
-waitForMigrateStep(shard2, migrateStepNames.cloned);
-
-// Populate donor (shard0) xfermods log.
-assert.commandWorked(coll2.insert({a: 1}));
-assert.commandWorked(coll2.insert({a: 2}));
-assert.eq(3, coll2.find().itcount(), "Failed to insert documents into coll2.");
-assert.eq(3, shard0Coll2.find().itcount());
-
-jsTest.log('Releasing coll1 migration recipient, whose transferMods command should fail....');
-unpauseMigrateAtStep(shard1, migrateStepNames.cloned);
-assert.soon(function() {
- // Wait for the destination shard to report that it is not in an active migration.
- var res = shard1.adminCommand({'_recvChunkStatus': 1});
- return (res.active == false);
-}, "coll1 migration recipient didn't abort migration in catchup phase.", 2 * 60 * 1000);
-assert.eq(1, shard0Coll1.find().itcount(), "donor shard0 completed a migration that it aborted.");
-
-jsTest.log('Finishing coll2 migration, which should succeed....');
-unpauseMigrateAtStep(shard2, migrateStepNames.cloned);
-assert.doesNotThrow(function() {
- joinMoveChunk2();
-});
-assert.eq(0,
- shard0Coll2.find().itcount(),
- "donor shard0 failed to complete a migration after aborting a prior migration.");
-assert.eq(3, shard2Coll2.find().itcount(), "shard2 failed to complete migration.");
-
-st.stop();
-MongoRunner.stopMongod(staticMongod);
-})();
diff --git a/jstests/sharding/move_jumbo_chunk.js b/jstests/sharding/move_jumbo_chunk.js
index 95da8dee66a..f470960910a 100644
--- a/jstests/sharding/move_jumbo_chunk.js
+++ b/jstests/sharding/move_jumbo_chunk.js
@@ -1,17 +1,18 @@
/**
* Test that a jumbo chunk can be moved using both manually and by the balancer when the
* 'forceJumbo' option is set to true.
+ *
+ * TODO (SERVER-46420): Fix test to allow it to work with the resumable range deleter enabled.
+ * @tags: [__TEMPORARILY_DISABLED__]
*/
(function() {
'use strict';
-// TODO(SERVER-46420): Update test to run with resumable range deleter enabled.
let st = new ShardingTest({
shards: 2,
mongos: 1,
other: {chunkSize: 1},
- shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}
});
let kDbName = "test";
diff --git a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
index cc7ce024724..efb5387cf24 100644
--- a/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
+++ b/jstests/sharding/range_deleter_interacts_correctly_with_refine_shard_key.js
@@ -68,7 +68,6 @@ function test(st, description, testBody) {
jsTest.log(`Finished Running Test: ${description}`);
}
-// Tests with resumable range deleter enabled.
(() => {
const st = new ShardingTest({shards: {rs0: {nodes: 3}, rs1: {nodes: 3}}});
test(st,
@@ -304,185 +303,4 @@ function test(st, description, testBody) {
st.stop();
})();
-
-// Tests with resumable range deleter disabled.
-(() => {
- const st = new ShardingTest(
- {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}});
-
- test(st,
- "Refining the shard key does not prevent removal of orphaned documents on a donor" +
- " shard after a successful migration",
- () => {
- // Enable failpoint which will cause range deletion to hang indefinitely.
- let suspendRangeDeletionFailpoint =
- configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
-
- // Note that _waitForDelete has to be absent/false since we're suspending range
- // deletion.
- assert.commandWorked(st.s.adminCommand(
- {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
-
- jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
-
- suspendRangeDeletionFailpoint.wait();
-
- jsTestLog("Refining the shard key");
-
- // Create an index on the refined shard key.
- assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
-
- // Refine the shard key from just the field 'x' to 'x' and 'y'.
- assert.commandWorked(
- st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
-
- // The index on the original shard key shouldn't be required anymore.
- assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
-
- // Allow range deletion to continue.
- suspendRangeDeletionFailpoint.off();
-
- jsTestLog("Waiting for orphans to be removed from shard 0");
-
- // The range deletion should eventually succeed in the background.
- assert.soon(() => {
- return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
- });
- });
-
- test(st,
- "Chunks with a refined shard key cannot migrate back onto a shard with " +
- "an orphaned range created with the prior shard key",
- () => {
- // Enable failpoint which will cause range deletion to hang indefinitely.
- let suspendRangeDeletionFailpoint =
- configureFailPoint(st.rs0.getPrimary(), "suspendRangeDeletion");
-
- // Note that _waitForDelete has to be absent/false since we're suspending range
- // deletion.
- assert.commandWorked(st.s.adminCommand(
- {moveChunk: ns, find: shardKeyValueInChunk, to: st.shard1.shardName}));
-
- jsTestLog("Waiting for the suspendRangeDeletion failpoint to be hit");
-
- suspendRangeDeletionFailpoint.wait();
-
- jsTestLog("Refining the shard key");
-
- // Create an index on the refined shard key.
- assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
-
- // Refine the shard key from just the field 'x' to 'x' and 'y'.
- assert.commandWorked(
- st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
-
- // The index on the original shard key shouldn't be required anymore.
- assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
-
- // We will use this to wait until the following migration has completed, since we
- // expect the client to time out. Waiting for this failpoint technically just waits for
- // the recipient side of the migration to complete, but it's expected that if the
- // migration can get to that point, then it should be able to succeed overall.
- let hangDonorAtEndOfMigration =
- configureFailPoint(st.rs1.getPrimary(), "moveChunkHangAtStep6");
-
- jsTestLog("Attempting to move the chunk back to shard 0");
-
- // Attempt to move the chunk back to shard 0, sending it with maxTimeMS. Since there
- // will be orphaned documents still on shard 0 (because range deletion is paused), we
- // expected this command to time out. This will NOT fail the migration, however, since
- // that occurs in a background OperationContext.
- assert.commandFailedWithCode(st.s.adminCommand({
- moveChunk: ns,
- find: refinedShardKeyValueInChunk,
- to: st.shard0.shardName,
- maxTimeMS: 1000
- }),
- ErrorCodes.MaxTimeMSExpired);
-
- // Hang after waiting for orphan cleanup so that in the test we can check for orphans
- // on disk before documents begin migrating.
- let hangRecipient =
- configureFailPoint(st.rs0.getPrimary(), "migrateThreadHangAtStep1");
-
- // Allow range deletion to continue.
- suspendRangeDeletionFailpoint.off();
-
- jsTestLog("Waiting for orphans to be removed from shard 0");
-
- // The range deletion should eventually succeed in the background.
- assert.soon(() => {
- return st.rs0.getPrimary().getCollection(ns).find().itcount() == 0;
- });
-
- hangRecipient.off();
-
- // Wait for the previous migration to complete before continuing.
- hangDonorAtEndOfMigration.wait();
- hangDonorAtEndOfMigration.off();
-
- // TODO (SERVER-47003): There will be a left-over entry in config.migrations after the
- // previous moveChunk fails with MaxTimeMSExpired, so we clear the collection.
- // Otherwise future migrations would receive a DuplicateKeyError when trying to update
- // config.migrations.
- st.config.getSiblingDB('config').migrations.remove({}, false /* justOne */);
- });
-
- test(st,
- "Refining the shard key does not prevent removal of orphaned documents on a recipient" +
- " shard after a failed migration",
- () => {
- let hangRecipientAfterCloningDocuments =
- configureFailPoint(st.rs1.getPrimary(), "migrateThreadHangAtStep3");
-
- // Attempt to move the chunk to shard 1. This will clone all documents from shard 0 to
- // shard 1 and then block behind the hangRecipientAfterCloningDocuments failpoint.
- // Then, when the index is created on the refined shard key, the migration will be
- // interrupted, causing it to fail with error code Interrupted.
- const awaitResult =
- startParallelShell(funWithArgs(function(ns, shardKeyValueInChunk, toShardName) {
- assert.commandFailedWithCode(db.adminCommand({
- moveChunk: ns,
- find: shardKeyValueInChunk,
- to: toShardName,
- _waitForDelete: true
- }),
- ErrorCodes.Interrupted);
- jsTestLog("Recipient failed in parallel shell");
- }, ns, shardKeyValueInChunk, st.shard1.shardName), st.s.port);
-
- jsTestLog("Waiting for recipient to finish cloning documents");
-
- hangRecipientAfterCloningDocuments.wait();
-
- jsTestLog("Refining the shard key");
-
- // Create an index on the refined shard key.
- assert.commandWorked(st.s.getCollection(ns).createIndex(refinedShardKey));
-
- // Refine the shard key from just the field 'x' to 'x' and 'y'.
- assert.commandWorked(
- st.s.adminCommand({refineCollectionShardKey: ns, key: refinedShardKey}));
-
- // The index on the original shard key shouldn't be required anymore.
- assert.commandWorked(st.s.getCollection(ns).dropIndex(originalShardKey));
-
- // Turn off failpoint and wait for recipient to fail.
- hangRecipientAfterCloningDocuments.off();
- awaitResult();
-
- // TODO (SERVER-47025): Without creating this index, the range deleter will hang
- // indefinitely looking for a shard key index.
- assert.commandWorked(st.shard1.getCollection(ns).createIndex(refinedShardKey));
-
- jsTestLog("Waiting for orphans to be removed from shard 1");
-
- // The range deletion should eventually succeed in the background on the recipient.
- assert.soon(() => {
- return st.rs1.getPrimary().getCollection(ns).find().itcount() == 0;
- });
- });
-
- st.stop();
-})();
})();
diff --git a/jstests/ssl/libs/ssl_helpers.js b/jstests/ssl/libs/ssl_helpers.js
index 6e1c9756420..2045cc464b9 100644
--- a/jstests/ssl/libs/ssl_helpers.js
+++ b/jstests/ssl/libs/ssl_helpers.js
@@ -110,7 +110,7 @@ function testShardedLookup(shardingTest) {
* Takes in two mongod/mongos configuration options and runs a basic
* sharding test to see if they can work together...
*/
-function mixedShardTest(options1, options2, shouldSucceed, disableResumableRangeDeleter) {
+function mixedShardTest(options1, options2, shouldSucceed) {
let authSucceeded = false;
try {
// Start ShardingTest with enableBalancer because ShardingTest attempts to turn
@@ -123,19 +123,12 @@ function mixedShardTest(options1, options2, shouldSucceed, disableResumableRange
//
// Once SERVER-14017 is fixed the "enableBalancer" line can be removed.
// TODO: SERVER-43899 Make sharding_with_x509.js and mixed_mode_sharded_transition.js start
- // shards as replica sets and remove disableResumableRangeDeleter parameter.
- let otherOptions = {enableBalancer: true};
-
- if (disableResumableRangeDeleter) {
- otherOptions.shardAsReplicaSet = false;
- otherOptions.shardOptions = {setParameter: {"disableResumableRangeDeleter": true}};
- }
-
+ // shards as replica sets.
var st = new ShardingTest({
mongos: [options1],
config: [options1],
shards: [options1, options2],
- other: otherOptions
+ other: {enableBalancer: true, shardAsReplicaSet: false},
});
// Create admin user in case the options include auth
diff --git a/jstests/ssl/mixed_mode_sharded.js b/jstests/ssl/mixed_mode_sharded.js
index 40271d2d2d3..523393bff07 100644
--- a/jstests/ssl/mixed_mode_sharded.js
+++ b/jstests/ssl/mixed_mode_sharded.js
@@ -1,6 +1,9 @@
/**
* This test checks if different mixtures of ssl modes
* in a sharded cluster can or cannot function
+ *
+ * TODO (SERVER-48261): Fix test to allow it to work with the resumable range deleter enabled.
+ * @tags: [__TEMPORARILY_DISABLED__]
*/
load("jstests/ssl/libs/ssl_helpers.js");
diff --git a/jstests/ssl/mixed_mode_sharded_transition.js b/jstests/ssl/mixed_mode_sharded_transition.js
index f6b0d7bbaec..9cfd86495ed 100644
--- a/jstests/ssl/mixed_mode_sharded_transition.js
+++ b/jstests/ssl/mixed_mode_sharded_transition.js
@@ -4,6 +4,9 @@
*
* NOTE: This test is similar to the mixed_mode_sharded_transition.js in the sslSpecial
* test suite. This suite must use ssl so it cannot test modes without ssl.
+ *
+ * TODO (SERVER-48261): Fix test to allow it to work with the resumable range deleter enabled.
+ * @tags: [__TEMPORARILY_DISABLED__]
*/
load('jstests/ssl/libs/ssl_helpers.js');
@@ -11,8 +14,6 @@ load('jstests/ssl/libs/ssl_helpers.js');
(function() {
'use strict';
-const disableResumableRangeDeleter = true;
-
var transitionToX509AllowSSL =
Object.merge(allowSSL, {transitionToAuth: '', clusterAuthMode: 'x509'});
var transitionToX509PreferSSL =
@@ -20,16 +21,15 @@ var transitionToX509PreferSSL =
var x509RequireSSL = Object.merge(requireSSL, {clusterAuthMode: 'x509'});
function testCombos(opt1, opt2, shouldSucceed) {
- mixedShardTest(opt1, opt2, shouldSucceed, disableResumableRangeDeleter);
- mixedShardTest(opt2, opt1, shouldSucceed, disableResumableRangeDeleter);
+ mixedShardTest(opt1, opt2, shouldSucceed);
+ mixedShardTest(opt2, opt1, shouldSucceed);
}
print('=== Testing transitionToAuth/allowSSL - transitionToAuth/preferSSL cluster ===');
testCombos(transitionToX509AllowSSL, transitionToX509PreferSSL, true);
print('=== Testing transitionToAuth/preferSSL - transitionToAuth/preferSSL cluster ===');
-mixedShardTest(
- transitionToX509PreferSSL, transitionToX509PreferSSL, true, disableResumableRangeDeleter);
+mixedShardTest(transitionToX509PreferSSL, transitionToX509PreferSSL, true);
print('=== Testing transitionToAuth/preferSSL - x509/requireSSL cluster ===');
testCombos(transitionToX509PreferSSL, x509RequireSSL, true);
diff --git a/jstests/sslSpecial/mixed_mode_sharded_nossl.js b/jstests/sslSpecial/mixed_mode_sharded_nossl.js
index 78e48c2098c..7cd73689b02 100644
--- a/jstests/sslSpecial/mixed_mode_sharded_nossl.js
+++ b/jstests/sslSpecial/mixed_mode_sharded_nossl.js
@@ -1,6 +1,9 @@
/**
* This test checks if different mixtures of ssl modes
* in a sharded clutster can or cannot function
+ *
+ * TODO (SERVER-48261): Fix test to allow it to work with the resumable range deleter enabled.
+ * @tags: [ __TEMPORARILY_DISABLED__]
*/
load("jstests/ssl/libs/ssl_helpers.js");
diff --git a/jstests/sslSpecial/mixed_mode_sharded_transition_nossl.js b/jstests/sslSpecial/mixed_mode_sharded_transition_nossl.js
index 59f2f90b4e5..f01680a4d5d 100644
--- a/jstests/sslSpecial/mixed_mode_sharded_transition_nossl.js
+++ b/jstests/sslSpecial/mixed_mode_sharded_transition_nossl.js
@@ -4,6 +4,9 @@
*
* NOTE: This test is similar to the mixed_mode_sharded_transition.js in the ssl
* test suite. This suite does not use ssl so it cannot test modes with ssl.
+ *
+ * TODO (SERVER-48261): Fix test to allow it to work with the resumable range deleter enabled.
+ * @tags: [ __TEMPORARILY_DISABLED__]
*/
// Test setup randomly have auth/no auth setting on shards, which make hooks targetting shard
diff --git a/src/mongo/base/error_codes.yml b/src/mongo/base/error_codes.yml
index 29de011915d..ec378cb034a 100644
--- a/src/mongo/base/error_codes.yml
+++ b/src/mongo/base/error_codes.yml
@@ -363,6 +363,8 @@ error_codes:
- {code: 311,name: TransactionCoordinatorCanceled}
- {code: 312,name: OperationIsKilledAndDelisted,categories: [CancelationError]}
+ - {code: 313,name: ResumableRangeDeleterDisabled}
+
# Error codes 4000-8999 are reserved.
# Non-sequential error codes for compatibility only)
diff --git a/src/mongo/db/s/cleanup_orphaned_cmd.cpp b/src/mongo/db/s/cleanup_orphaned_cmd.cpp
index 3725ab951d6..97b78e600ba 100644
--- a/src/mongo/db/s/cleanup_orphaned_cmd.cpp
+++ b/src/mongo/db/s/cleanup_orphaned_cmd.cpp
@@ -59,14 +59,8 @@ namespace {
enum class CleanupResult { kDone, kContinue, kError };
/**
- * If the resumable range deleter is disabled:
- * Cleans up one range of orphaned data starting from a range that overlaps or starts at
- * 'startingFromKey'. If empty, startingFromKey is the minimum key of the sharded range.
- *
- * If the resumable range deleter is enabled:
* Waits for all possibly orphaned ranges on 'nss' to be cleaned up.
*
- * @return CleanupResult::kContinue and 'stoppedAtKey' if orphaned range was found and cleaned
* @return CleanupResult::kDone if no orphaned ranges remain
* @return CleanupResult::kError and 'errMsg' if an error occurred
*
@@ -75,205 +69,86 @@ enum class CleanupResult { kDone, kContinue, kError };
CleanupResult cleanupOrphanedData(OperationContext* opCtx,
const NamespaceString& ns,
const BSONObj& startingFromKeyConst,
- BSONObj* stoppedAtKey,
std::string* errMsg) {
- // Note that 'disableResumableRangeDeleter' is a startup-only parameter, so it cannot change
- // while this process is running.
- if (!disableResumableRangeDeleter.load()) {
- boost::optional<ChunkRange> range;
- boost::optional<UUID> collectionUuid;
- {
- AutoGetCollection autoColl(opCtx, ns, MODE_IX);
- if (!autoColl.getCollection()) {
- LOGV2(4416000,
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "{namespace} does not exist",
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "collection does not exist",
- "namespace"_attr = ns.ns());
- return CleanupResult::kDone;
- }
- collectionUuid.emplace(autoColl.getCollection()->uuid());
-
- auto* const css = CollectionShardingRuntime::get(opCtx, ns);
- const auto collDesc = css->getCollectionDescription();
- if (!collDesc.isSharded()) {
- LOGV2(4416001,
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "{namespace} is not sharded",
- "cleanupOrphaned skipping waiting for orphaned data cleanup because "
- "collection is not sharded",
- "namespace"_attr = ns.ns());
- return CleanupResult::kDone;
- }
- range.emplace(collDesc.getMinKey(), collDesc.getMaxKey());
-
- // Though the 'startingFromKey' parameter is not used as the min key of the range to
- // wait for, we still validate that 'startingFromKey' in the same way as the original
- // cleanupOrphaned logic did if 'startingFromKey' is present.
- BSONObj keyPattern = collDesc.getKeyPattern();
- if (!startingFromKeyConst.isEmpty() && !collDesc.isValidKey(startingFromKeyConst)) {
- LOGV2_ERROR_OPTIONS(
- 4416002,
- {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)},
- "Could not cleanup orphaned data because start key does not match shard key "
- "pattern",
- "startKey"_attr = redact(startingFromKeyConst),
- "shardKeyPattern"_attr = keyPattern);
- }
+ boost::optional<ChunkRange> range;
+ boost::optional<UUID> collectionUuid;
+ {
+ AutoGetCollection autoColl(opCtx, ns, MODE_IX);
+ if (!autoColl.getCollection()) {
+ LOGV2(4416000,
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "{namespace} does not exist",
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "collection does not exist",
+ "namespace"_attr = ns.ns());
+ return CleanupResult::kDone;
}
-
- // We actually want to wait until there are no range deletion tasks for this namespace/UUID,
- // but we don't have a good way to wait for that event, so instead we wait for there to be
- // no tasks being processed in memory for this namespace/UUID.
- // However, it's possible this node has recently stepped up, and the stepup recovery task to
- // resubmit range deletion tasks for processing has not yet completed. In that case,
- // waitForClean will return though there are still tasks in config.rangeDeletions, so we
- // sleep for a short time and then try waitForClean again.
- while (auto numRemainingDeletionTasks =
- migrationutil::checkForConflictingDeletions(opCtx, *range, *collectionUuid)) {
- LOGV2(4416003,
- "cleanupOrphaned going to wait for range deletion tasks to complete",
- "namespace"_attr = ns.ns(),
- "collectionUUID"_attr = *collectionUuid,
- "numRemainingDeletionTasks"_attr = numRemainingDeletionTasks);
-
- auto status =
- CollectionShardingRuntime::waitForClean(opCtx, ns, *collectionUuid, *range);
-
- if (!status.isOK()) {
- *errMsg = status.reason();
- return CleanupResult::kError;
- }
-
- opCtx->sleepFor(Milliseconds(1000));
+ collectionUuid.emplace(autoColl.getCollection()->uuid());
+
+ auto* const css = CollectionShardingRuntime::get(opCtx, ns);
+ const auto collDesc = css->getCollectionDescription();
+ if (!collDesc.isSharded()) {
+ LOGV2(4416001,
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "{namespace} is not sharded",
+ "cleanupOrphaned skipping waiting for orphaned data cleanup because "
+ "collection is not sharded",
+ "namespace"_attr = ns.ns());
+ return CleanupResult::kDone;
}
-
- return CleanupResult::kDone;
- } else {
-
- BSONObj startingFromKey = startingFromKeyConst;
- boost::optional<ChunkRange> targetRange;
- SharedSemiFuture<void> cleanupCompleteFuture;
-
- {
- AutoGetCollection autoColl(opCtx, ns, MODE_IX);
- auto* const css = CollectionShardingRuntime::get(opCtx, ns);
- // Keep the collection metadata from changing for the rest of this scope.
- auto csrLock = CollectionShardingRuntime::CSRLock::lockShared(opCtx, css);
- const auto collDesc = css->getCollectionDescription();
- if (!collDesc.isSharded()) {
- LOGV2(21911,
- "cleanupOrphaned skipping orphaned data cleanup because collection is not "
- "sharded",
- "namespace"_attr = ns.ns());
- return CleanupResult::kDone;
- }
-
- BSONObj keyPattern = collDesc.getKeyPattern();
- if (!startingFromKey.isEmpty()) {
- if (!collDesc.isValidKey(startingFromKey)) {
- LOGV2_ERROR_OPTIONS(
- 21912,
- {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)},
- "Could not cleanup orphaned data, start key {startKey} does not match "
- "shard key pattern {shardKeyPattern}",
- "Could not cleanup orphaned data because start key does not match shard "
- "key pattern",
- "startKey"_attr = redact(startingFromKey),
- "shardKeyPattern"_attr = keyPattern);
- }
- } else {
- startingFromKey = collDesc.getMinKey();
- }
-
- targetRange = css->getNextOrphanRange(startingFromKey);
- if (!targetRange) {
- LOGV2_DEBUG(21913,
- 1,
- "cleanupOrphaned returning because no orphan ranges remain",
- "namespace"_attr = ns.toString(),
- "startingFromKey"_attr = redact(startingFromKey));
-
- return CleanupResult::kDone;
- }
-
- *stoppedAtKey = targetRange->getMax();
-
- cleanupCompleteFuture =
- css->cleanUpRange(*targetRange, boost::none, CollectionShardingRuntime::kNow);
+ range.emplace(collDesc.getMinKey(), collDesc.getMaxKey());
+
+ // Though the 'startingFromKey' parameter is not used as the min key of the range to
+ // wait for, we still validate that 'startingFromKey' in the same way as the original
+ // cleanupOrphaned logic did if 'startingFromKey' is present.
+ BSONObj keyPattern = collDesc.getKeyPattern();
+ if (!startingFromKeyConst.isEmpty() && !collDesc.isValidKey(startingFromKeyConst)) {
+ LOGV2_ERROR_OPTIONS(
+ 4416002,
+ {logv2::UserAssertAfterLog(ErrorCodes::OrphanedRangeCleanUpFailed)},
+ "Could not cleanup orphaned data because start key does not match shard key "
+ "pattern",
+ "startKey"_attr = redact(startingFromKeyConst),
+ "shardKeyPattern"_attr = keyPattern);
}
+ }
- // Sleep waiting for our own deletion. We don't actually care about any others, so there is
- // no need to call css::waitForClean() here.
-
- LOGV2_DEBUG(21914,
- 1,
- "cleanupOrphaned requested for {namespace} starting from {startingFromKey}, "
- "removing next orphan range {targetRange}; waiting...",
- "cleanupOrphaned requested",
- "namespace"_attr = ns.toString(),
- "startingFromKey"_attr = redact(startingFromKey),
- "targetRange"_attr = redact(targetRange->toString()));
-
- Status result = cleanupCompleteFuture.getNoThrow(opCtx);
-
- LOGV2_DEBUG(21915,
- 1,
- "Finished waiting for last {namespace} orphan range cleanup",
- "Finished waiting for last orphan range cleanup in collection",
- "namespace"_attr = ns.toString());
-
- if (!result.isOK()) {
- LOGV2_ERROR_OPTIONS(21916,
- {logv2::UserAssertAfterLog(result.code())},
- "Error waiting for last {namespace} orphan range cleanup: {error}",
- "Error waiting for last orphan range cleanup in collection",
- "namespace"_attr = ns.ns(),
- "error"_attr = redact(result.reason()));
+ // We actually want to wait until there are no range deletion tasks for this namespace/UUID,
+ // but we don't have a good way to wait for that event, so instead we wait for there to be
+ // no tasks being processed in memory for this namespace/UUID.
+ // However, it's possible this node has recently stepped up, and the stepup recovery task to
+ // resubmit range deletion tasks for processing has not yet completed. In that case,
+ // waitForClean will return though there are still tasks in config.rangeDeletions, so we
+ // sleep for a short time and then try waitForClean again.
+ while (auto numRemainingDeletionTasks =
+ migrationutil::checkForConflictingDeletions(opCtx, *range, *collectionUuid)) {
+ uassert(ErrorCodes::ResumableRangeDeleterDisabled,
+ "Failing cleanupOrphaned because the disableResumableRangeDeleter server parameter "
+ "is set to true and this shard contains range deletion tasks for the collection.",
+ !disableResumableRangeDeleter.load());
+
+ LOGV2(4416003,
+ "cleanupOrphaned going to wait for range deletion tasks to complete",
+ "namespace"_attr = ns.ns(),
+ "collectionUUID"_attr = *collectionUuid,
+ "numRemainingDeletionTasks"_attr = numRemainingDeletionTasks);
+
+ auto status = CollectionShardingRuntime::waitForClean(opCtx, ns, *collectionUuid, *range);
+
+ if (!status.isOK()) {
+ *errMsg = status.reason();
+ return CleanupResult::kError;
}
- return CleanupResult::kContinue;
+ opCtx->sleepFor(Milliseconds(1000));
}
+
+ return CleanupResult::kDone;
}
/**
- * If 'disableResumableRangeDeleter=true':
- *
- * Called on a particular namespace, and if the collection is sharded will clean up a single
- * orphaned data range which overlaps or starts after a passed-in 'startingFromKey'. Returns true
- * and a 'stoppedAtKey' (which will start a search for the next orphaned range if the command is
- * called again) or no key if there are no more orphaned ranges in the collection.
- *
- * If the collection is not sharded, returns true but no 'stoppedAtKey'.
- * On failure, returns false and an error message.
- *
- * Calling this command repeatedly until no 'stoppedAtKey' is returned ensures that the
- * full collection range is searched for orphaned documents, but since sharding state may
- * change between calls there is no guarantee that all orphaned documents were found unless
- * the balancer is off.
- *
- * Safe to call with the balancer on.
- *
- * Format:
- *
- * {
- * cleanupOrphaned: <ns>,
- * // optional parameters:
- * startingAtKey: { <shardKeyValue> }, // defaults to lowest value
- * secondaryThrottle: <bool>, // defaults to true
- * // defaults to { w: "majority", wtimeout: 60000 }. Applies to individual writes.
- * writeConcern: { <writeConcern options> }
- * }
- *
- * If 'disableResumableRangeDeleter=false':
- *
* Called on a particular namespace, and if the collection is sharded will wait for the number of
- * range deletion tasks on the collection on this shard to reach zero. Returns true on completion,
- * but never returns 'stoppedAtKey', since it always returns once there are no more orphaned ranges.
- *
- * If the collection is not sharded, returns true and no 'stoppedAtKey'.
- * On failure, returns false and an error message.
+ * range deletion tasks on the collection on this shard to reach zero.
*
* Since the sharding state may change after this call returns, there is no guarantee that orphans
* won't re-appear as a result of migrations that commit after this call returns.
@@ -310,9 +185,6 @@ public:
static BSONField<std::string> nsField;
static BSONField<BSONObj> startingFromKeyField;
- // Output
- static BSONField<BSONObj> stoppedAtKeyField;
-
bool errmsgRun(OperationContext* opCtx,
std::string const& db,
const BSONObj& cmdObj,
@@ -343,19 +215,12 @@ public:
forceShardFilteringMetadataRefresh(opCtx, nss, true /* forceRefreshFromThisThread */);
- BSONObj stoppedAtKey;
- CleanupResult cleanupResult =
- cleanupOrphanedData(opCtx, nss, startingFromKey, &stoppedAtKey, &errmsg);
+ CleanupResult cleanupResult = cleanupOrphanedData(opCtx, nss, startingFromKey, &errmsg);
if (cleanupResult == CleanupResult::kError) {
return false;
}
-
- if (cleanupResult == CleanupResult::kContinue) {
- result.append(stoppedAtKeyField(), stoppedAtKey);
- } else {
- dassert(cleanupResult == CleanupResult::kDone);
- }
+ dassert(cleanupResult == CleanupResult::kDone);
return true;
}
@@ -364,7 +229,6 @@ public:
BSONField<std::string> CleanupOrphanedCommand::nsField("cleanupOrphaned");
BSONField<BSONObj> CleanupOrphanedCommand::startingFromKeyField("startingFromKey");
-BSONField<BSONObj> CleanupOrphanedCommand::stoppedAtKeyField("stoppedAtKey");
} // namespace
} // namespace mongo
diff --git a/src/mongo/db/s/migration_chunk_cloner_source.h b/src/mongo/db/s/migration_chunk_cloner_source.h
index c06a9d0a5a1..00e17e8fe3a 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source.h
+++ b/src/mongo/db/s/migration_chunk_cloner_source.h
@@ -77,8 +77,7 @@ public:
virtual Status startClone(OperationContext* opCtx,
const UUID& migrationId,
const LogicalSessionId& lsid,
- TxnNumber txnNumber,
- bool resumableRangeDeleterDisabled) = 0;
+ TxnNumber txnNumber) = 0;
/**
* Blocking method, which uses some custom selected logic for deciding whether it is appropriate
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
index f64e96fcd48..40a1703318b 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.cpp
@@ -242,8 +242,7 @@ MigrationChunkClonerSourceLegacy::~MigrationChunkClonerSourceLegacy() {
Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* opCtx,
const UUID& migrationId,
const LogicalSessionId& lsid,
- TxnNumber txnNumber,
- bool resumableRangeDeleterDisabled) {
+ TxnNumber txnNumber) {
invariant(_state == kNew);
invariant(!opCtx->lockState()->isLocked());
@@ -296,8 +295,7 @@ Status MigrationChunkClonerSourceLegacy::startClone(OperationContext* opCtx,
_args.getMinKey(),
_args.getMaxKey(),
_shardKeyPattern.toBSON(),
- _args.getSecondaryThrottle(),
- resumableRangeDeleterDisabled);
+ _args.getSecondaryThrottle());
// Commands sent to shards that accept writeConcern, must always have writeConcern. So if the
// StartChunkCloneRequest didn't add writeConcern (from secondaryThrottle), then we add the
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
index 957615b6e29..8e34e2033f3 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy.h
@@ -93,8 +93,7 @@ public:
Status startClone(OperationContext* opCtx,
const UUID& migrationId,
const LogicalSessionId& lsid,
- TxnNumber txnNumber,
- bool resumableRangeDeleterDisabled) override;
+ TxnNumber txnNumber) override;
Status awaitUntilCriticalSectionIsAppropriate(OperationContext* opCtx,
Milliseconds maxTimeToWait) override;
diff --git a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
index 44bd0c01b5b..730a2d4af3b 100644
--- a/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
+++ b/src/mongo/db/s/migration_chunk_cloner_source_legacy_test.cpp
@@ -252,7 +252,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, CorrectDocumentsFetched) {
onCommand([&](const RemoteCommandRequest& request) { return BSON("ok" << true); });
});
- ASSERT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false));
+ ASSERT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber));
futureStartClone.default_timed_get();
}
@@ -350,7 +350,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, CollectionNotFound) {
kDonorConnStr,
kRecipientConnStr.getServers()[0]);
- ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false));
+ ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber));
cloner.cancelClone(operationContext());
}
@@ -363,7 +363,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, ShardKeyIndexNotFound) {
kDonorConnStr,
kRecipientConnStr.getServers()[0]);
- ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false));
+ ASSERT_NOT_OK(cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber));
cloner.cancelClone(operationContext());
}
@@ -390,7 +390,7 @@ TEST_F(MigrationChunkClonerSourceLegacyTest, FailedToEngageRecipientShard) {
});
auto startCloneStatus =
- cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber, false);
+ cloner.startClone(operationContext(), UUID::gen(), _lsid, _txnNumber);
ASSERT_EQ(ErrorCodes::NetworkTimeout, startCloneStatus.code());
futureStartClone.default_timed_get();
}
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index af0888d8992..fc5b7258138 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -345,24 +345,13 @@ Status MigrationDestinationManager::start(OperationContext* opCtx,
invariant(!_sessionId);
invariant(!_scopedReceiveChunk);
- _enableResumableRangeDeleter = !disableResumableRangeDeleter.load();
-
_state = READY;
_stateChangedCV.notify_all();
_errmsg = "";
- if (_enableResumableRangeDeleter) {
- uassert(ErrorCodes::IllegalOperation,
- str::stream() << "Did not receive migrationId in _recvChunkStart, but this node "
- "has'disableResumableRangeDeleter=false'. Does the donor shard "
- << cloneRequest.getFromShardId()
- << " have 'disableResumableRangeDeleter=true'?",
- cloneRequest.hasMigrationId());
-
- _migrationId = cloneRequest.getMigrationId();
- _lsid = cloneRequest.getLsid();
- _txnNumber = cloneRequest.getTxnNumber();
- }
+ _migrationId = cloneRequest.getMigrationId();
+ _lsid = cloneRequest.getLsid();
+ _txnNumber = cloneRequest.getTxnNumber();
_nss = nss;
_fromShard = cloneRequest.getFromShardId();
@@ -784,31 +773,21 @@ void MigrationDestinationManager::_migrateThread() {
// txnNumber on this session while this node is still executing the recipient side
//(which is important because otherwise, this node may create orphans after the
// range deletion task on this node has been processed).
- if (_enableResumableRangeDeleter) {
- opCtx->setLogicalSessionId(_lsid);
- opCtx->setTxnNumber(_txnNumber);
-
- MongoDOperationContextSession sessionTxnState(opCtx);
-
- auto txnParticipant = TransactionParticipant::get(opCtx);
- txnParticipant.beginOrContinue(opCtx,
- *opCtx->getTxnNumber(),
- boost::none /* autocommit */,
- boost::none /* startTransaction */);
- _migrateDriver(opCtx);
- } else {
- _migrateDriver(opCtx);
- }
+ opCtx->setLogicalSessionId(_lsid);
+ opCtx->setTxnNumber(_txnNumber);
+
+ MongoDOperationContextSession sessionTxnState(opCtx);
+
+ auto txnParticipant = TransactionParticipant::get(opCtx);
+ txnParticipant.beginOrContinue(opCtx,
+ *opCtx->getTxnNumber(),
+ boost::none /* autocommit */,
+ boost::none /* startTransaction */);
+ _migrateDriver(opCtx);
} catch (...) {
_setStateFail(str::stream() << "migrate failed: " << redact(exceptionToStatus()));
}
- if (!_enableResumableRangeDeleter) {
- if (getState() != DONE) {
- _forgetPending(opCtx, ChunkRange(_min, _max));
- }
- }
-
stdx::lock_guard<Latch> lk(_mutex);
_sessionId.reset();
_collUuid.reset();
@@ -833,7 +812,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
"fromShard"_attr = _fromShard,
"epoch"_attr = _epoch,
"sessionId"_attr = *_sessionId,
- "migrationId"_attr = _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
MoveTimingHelper timing(
outerOpCtx, "to", _nss.ns(), _min, _max, 6 /* steps */, &_errmsg, _toShard, _fromShard);
@@ -843,8 +822,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
if (initialState == ABORT) {
LOGV2_ERROR(22013,
"Migration abort requested before the migration started",
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
return;
}
@@ -861,67 +839,44 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
// 2. Ensure any data which might have been left orphaned in the range being moved has been
// deleted.
- if (_enableResumableRangeDeleter) {
- while (migrationutil::checkForConflictingDeletions(
- outerOpCtx, range, donorCollectionOptionsAndIndexes.uuid)) {
- LOGV2(22001,
- "Migration paused because the requested range {range} for {namespace} "
- "overlaps with a range already scheduled for deletion",
- "Migration paused because the requested range overlaps with a range already "
- "scheduled for deletion",
- "namespace"_attr = _nss.ns(),
- "range"_attr = redact(range.toString()),
- "migrationId"_attr = *_migrationId);
-
- auto status = CollectionShardingRuntime::waitForClean(
- outerOpCtx, _nss, donorCollectionOptionsAndIndexes.uuid, range);
-
- if (!status.isOK()) {
- _setStateFail(redact(status.reason()));
- return;
- }
-
- outerOpCtx->sleepFor(Milliseconds(1000));
- }
+ while (migrationutil::checkForConflictingDeletions(
+ outerOpCtx, range, donorCollectionOptionsAndIndexes.uuid)) {
+ uassert(ErrorCodes::ResumableRangeDeleterDisabled,
+ "Failing migration because the disableResumableRangeDeleter server "
+ "parameter is set to true on the recipient shard, which contains range "
+ "deletion tasks overlapping the incoming range.",
+ !disableResumableRangeDeleter.load());
+
+ LOGV2(22001,
+ "Migration paused because the requested range {range} for {namespace} "
+ "overlaps with a range already scheduled for deletion",
+ "Migration paused because the requested range overlaps with a range already "
+ "scheduled for deletion",
+ "namespace"_attr = _nss.ns(),
+ "range"_attr = redact(range.toString()),
+ "migrationId"_attr = _migrationId.toBSON());
- RangeDeletionTask recipientDeletionTask(*_migrationId,
- _nss,
- donorCollectionOptionsAndIndexes.uuid,
- _fromShard,
- range,
- CleanWhenEnum::kNow);
- recipientDeletionTask.setPending(true);
+ auto status = CollectionShardingRuntime::waitForClean(
+ outerOpCtx, _nss, donorCollectionOptionsAndIndexes.uuid, range);
- migrationutil::persistRangeDeletionTaskLocally(outerOpCtx, recipientDeletionTask);
- } else {
- // Synchronously delete any data which might have been left orphaned in the range
- // being moved, and wait for completion
-
- // Needed for _forgetPending to make sure the collection has the same UUID at the end of
- // an aborted migration as at the beginning. Must be set before calling _notePending.
- _collUuid = donorCollectionOptionsAndIndexes.uuid;
- auto cleanupCompleteFuture = _notePending(outerOpCtx, range);
- auto cleanupStatus = cleanupCompleteFuture.getNoThrow(outerOpCtx);
- // Wait for the range deletion to report back. Swallow
- // RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist error since the
- // collection could either never exist or get dropped directly from the shard after the
- // range deletion task got scheduled.
- if (!cleanupStatus.isOK() &&
- cleanupStatus !=
- ErrorCodes::RangeDeletionAbandonedBecauseCollectionWithUUIDDoesNotExist) {
- _setStateFail(redact(cleanupStatus.reason()));
+ if (!status.isOK()) {
+ _setStateFail(redact(status.reason()));
return;
}
- // Wait for any other, overlapping queued deletions to drain
- cleanupStatus = CollectionShardingRuntime::waitForClean(
- outerOpCtx, _nss, donorCollectionOptionsAndIndexes.uuid, range);
- if (!cleanupStatus.isOK()) {
- _setStateFail(redact(cleanupStatus.reason()));
- return;
- }
+ outerOpCtx->sleepFor(Milliseconds(1000));
}
+ // Insert a pending range deletion task for the incoming range.
+ RangeDeletionTask recipientDeletionTask(_migrationId,
+ _nss,
+ donorCollectionOptionsAndIndexes.uuid,
+ _fromShard,
+ range,
+ CleanWhenEnum::kNow);
+ recipientDeletionTask.setPending(true);
+ migrationutil::persistRangeDeletionTaskLocally(outerOpCtx, recipientDeletionTask);
+
timing.done(1);
migrateThreadHangAtStep1.pauseWhileSet();
}
@@ -1015,9 +970,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
if (replStatus.status.code() == ErrorCodes::WriteConcernFailed) {
LOGV2_WARNING(22011,
"secondaryThrottle on, but doc insert timed out; continuing",
- "migrationId"_attr = _enableResumableRangeDeleter
- ? _migrationId->toBSON()
- : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
} else {
uassertStatusOK(replStatus.status);
}
@@ -1093,8 +1046,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
if (getState() == ABORT) {
LOGV2(22002,
"Migration aborted while waiting for replication at catch up stage",
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
return;
}
@@ -1104,8 +1056,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
if (i > 100) {
LOGV2(22003,
"secondaries having hard time keeping up with migrate",
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
}
sleepmillis(20);
@@ -1127,8 +1078,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
LOGV2(22004,
"Waiting for replication to catch up before entering critical section",
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
auto awaitReplicationResult = repl::ReplicationCoordinator::get(opCtx)->awaitReplication(
opCtx, lastOpApplied, _writeConcern);
@@ -1137,8 +1087,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
LOGV2(22005,
"Chunk data replicated successfully.",
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
}
{
@@ -1177,8 +1126,7 @@ void MigrationDestinationManager::_migrateDriver(OperationContext* outerOpCtx) {
if (getState() == ABORT) {
LOGV2(22006,
"Migration aborted while transferring mods",
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
return;
}
@@ -1300,9 +1248,6 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx,
autoColl.getDb(),
updatedDoc,
&localDoc)) {
- const auto migrationId =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj();
-
// Exception will abort migration cleanly
LOGV2_ERROR_OPTIONS(
16977,
@@ -1313,7 +1258,7 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* opCtx,
"reloaded remote document",
"localDoc"_attr = redact(localDoc),
"remoteDoc"_attr = redact(updatedDoc),
- "migrationId"_attr = migrationId);
+ "migrationId"_attr = _migrationId.toBSON());
}
// We are in write lock here, so sure we aren't killing
@@ -1344,8 +1289,7 @@ bool MigrationDestinationManager::_flushPendingWrites(OperationContext* opCtx,
"chunkMin"_attr = redact(_min),
"chunkMax"_attr = redact(_max),
"lastOpApplied"_attr = op,
- "migrationId"_attr =
- _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
}
return false;
}
@@ -1356,7 +1300,7 @@ bool MigrationDestinationManager::_flushPendingWrites(OperationContext* opCtx,
"namespace"_attr = _nss.ns(),
"chunkMin"_attr = redact(_min),
"chunkMax"_attr = redact(_max),
- "migrationId"_attr = _enableResumableRangeDeleter ? _migrationId->toBSON() : BSONObj());
+ "migrationId"_attr = _migrationId.toBSON());
return true;
}
diff --git a/src/mongo/db/s/migration_destination_manager.h b/src/mongo/db/s/migration_destination_manager.h
index d2310ddffe2..62e6b6fc99b 100644
--- a/src/mongo/db/s/migration_destination_manager.h
+++ b/src/mongo/db/s/migration_destination_manager.h
@@ -201,11 +201,7 @@ private:
stdx::thread _migrateThreadHandle;
- // Whether to use the resumable range deleter. This decision is based on whether the FCV 4.2 or
- // FCV 4.4 protocol are in use and the disableResumableRangeDeleter option is off.
- bool _enableResumableRangeDeleter{true};
-
- boost::optional<UUID> _migrationId;
+ UUID _migrationId;
LogicalSessionId _lsid;
TxnNumber _txnNumber;
NamespaceString _nss;
diff --git a/src/mongo/db/s/migration_source_manager.cpp b/src/mongo/db/s/migration_source_manager.cpp
index 7424b8078a5..13c0546313c 100644
--- a/src/mongo/db/s/migration_source_manager.cpp
+++ b/src/mongo/db/s/migration_source_manager.cpp
@@ -51,7 +51,6 @@
#include "mongo/db/s/shard_filtering_metadata_refresh.h"
#include "mongo/db/s/shard_metadata_util.h"
#include "mongo/db/s/sharding_logging.h"
-#include "mongo/db/s/sharding_runtime_d_params_gen.h"
#include "mongo/db/s/sharding_state.h"
#include "mongo/db/s/sharding_state_recovery.h"
#include "mongo/db/s/sharding_statistics.h"
@@ -134,8 +133,6 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx,
_stats(ShardingStatistics::get(_opCtx)) {
invariant(!_opCtx->lockState()->isLocked());
- _enableResumableRangeDeleter = !disableResumableRangeDeleter.load();
-
// Disallow moving a chunk to ourselves
uassert(ErrorCodes::InvalidOptions,
"Destination shard cannot be the same as source",
@@ -146,8 +143,7 @@ MigrationSourceManager::MigrationSourceManager(OperationContext* opCtx,
"{collectionEpoch}",
"Starting chunk migration donation",
"requestParameters"_attr = redact(_args.toString()),
- "collectionEpoch"_attr = _args.getVersionEpoch(),
- "resumableRangeDeleterEnabled"_attr = _enableResumableRangeDeleter);
+ "collectionEpoch"_attr = _args.getVersionEpoch());
// Force refresh of the metadata to ensure we have the latest
forceShardFilteringMetadataRefresh(_opCtx, getNss());
@@ -287,15 +283,12 @@ Status MigrationSourceManager::startClone() {
_opCtx, readConcernArgs, PrepareConflictBehavior::kEnforce);
}
- if (_enableResumableRangeDeleter) {
- _coordinator->startMigration(_opCtx);
- }
+ _coordinator->startMigration(_opCtx);
Status startCloneStatus = _cloneDriver->startClone(_opCtx,
_coordinator->getMigrationId(),
_coordinator->getLsid(),
- _coordinator->getTxnNumber(),
- !_enableResumableRangeDeleter);
+ _coordinator->getTxnNumber());
if (!startCloneStatus.isOK()) {
return startCloneStatus;
}
@@ -468,10 +461,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() {
"have re-received the chunk"};
}
- if (_enableResumableRangeDeleter) {
- _coordinator->setMigrationDecision(
- migrationutil::MigrationCoordinator::Decision::kAborted);
- }
+ _coordinator->setMigrationDecision(migrationutil::MigrationCoordinator::Decision::kAborted);
// The chunk modification was not applied, so report the original error
return migrationCommitStatus.withContext("Chunk move was not successful");
@@ -484,10 +474,7 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() {
"updatedCollectionVersion"_attr = refreshedMetadata.getCollVersion(),
"migrationId"_attr = _coordinator->getMigrationId());
- if (_enableResumableRangeDeleter) {
- _coordinator->setMigrationDecision(
- migrationutil::MigrationCoordinator::Decision::kCommitted);
- }
+ _coordinator->setMigrationDecision(migrationutil::MigrationCoordinator::Decision::kCommitted);
hangBeforeLeavingCriticalSection.pauseWhileSet();
@@ -521,61 +508,20 @@ Status MigrationSourceManager::commitChunkMetadataOnConfig() {
<< "Moved chunks successfully but failed to clean up " << getNss().ns() << " range "
<< redact(range.toString()) << " due to: ";
- if (_enableResumableRangeDeleter) {
- if (_args.getWaitForDelete()) {
- LOGV2(22019,
- "Waiting for migration cleanup after chunk commit for the namespace {namespace} "
- "and range {range}",
- "Waiting for migration cleanup after chunk commit",
- "namespace"_attr = getNss().ns(),
- "range"_attr = redact(range.toString()),
- "migrationId"_attr = _coordinator->getMigrationId());
-
- invariant(_cleanupCompleteFuture);
- auto deleteStatus = _cleanupCompleteFuture->getNoThrow(_opCtx);
- if (!deleteStatus.isOK()) {
- return {ErrorCodes::OrphanedRangeCleanUpFailed,
- orphanedRangeCleanUpErrMsg + redact(deleteStatus)};
- }
- }
- } else {
- auto cleanupCompleteFuture = [&] {
- auto const whenToClean = _args.getWaitForDelete() ? CollectionShardingRuntime::kNow
- : CollectionShardingRuntime::kDelayed;
- UninterruptibleLockGuard noInterrupt(_opCtx->lockState());
- AutoGetCollection autoColl(_opCtx, getNss(), MODE_IS);
- return CollectionShardingRuntime::get(_opCtx, getNss())
- ->cleanUpRange(range, boost::none, whenToClean);
- }();
-
- if (_args.getWaitForDelete()) {
- LOGV2(22020,
- "Waiting for migration cleanup after chunk commit for the namespace {namespace} "
- "and range {range}",
- "Waiting for migration cleanup after chunk commit",
- "namespace"_attr = getNss().ns(),
- "range"_attr = redact(range.toString()));
-
- auto deleteStatus = cleanupCompleteFuture.getNoThrow(_opCtx);
-
- if (!deleteStatus.isOK()) {
- return {ErrorCodes::OrphanedRangeCleanUpFailed,
- orphanedRangeCleanUpErrMsg + redact(deleteStatus)};
- }
-
- return Status::OK();
- }
-
- if (cleanupCompleteFuture.isReady() && !cleanupCompleteFuture.getNoThrow(_opCtx).isOK()) {
+ if (_args.getWaitForDelete()) {
+ LOGV2(22019,
+ "Waiting for migration cleanup after chunk commit for the namespace {namespace} "
+ "and range {range}",
+ "Waiting for migration cleanup after chunk commit",
+ "namespace"_attr = getNss().ns(),
+ "range"_attr = redact(range.toString()),
+ "migrationId"_attr = _coordinator->getMigrationId());
+
+ invariant(_cleanupCompleteFuture);
+ auto deleteStatus = _cleanupCompleteFuture->getNoThrow(_opCtx);
+ if (!deleteStatus.isOK()) {
return {ErrorCodes::OrphanedRangeCleanUpFailed,
- orphanedRangeCleanUpErrMsg + redact(cleanupCompleteFuture.getNoThrow(_opCtx))};
- } else {
- LOGV2(22021,
- "Leaving migration cleanup after chunk commit to complete in background; "
- "namespace: {namespace}, range: {range}",
- "Leaving migration cleanup after chunk commit to complete in background",
- "namespace"_attr = getNss().ns(),
- "range"_attr = redact(range.toString()));
+ orphanedRangeCleanUpErrMsg + redact(deleteStatus)};
}
}
@@ -727,26 +673,24 @@ void MigrationSourceManager::_cleanup() {
ShardingStateRecovery::endMetadataOp(_opCtx);
}
- if (_enableResumableRangeDeleter) {
- if (_state >= kCloning) {
- invariant(_coordinator);
- if (_state < kCommittingOnConfig) {
- _coordinator->setMigrationDecision(
- migrationutil::MigrationCoordinator::Decision::kAborted);
- }
- // This can be called on an exception path after the OperationContext has been
- // interrupted, so use a new OperationContext. Note, it's valid to call
- // getServiceContext on an interrupted OperationContext.
- auto newClient = _opCtx->getServiceContext()->makeClient("MigrationCoordinator");
- {
- stdx::lock_guard<Client> lk(*newClient.get());
- newClient->setSystemOperationKillable(lk);
- }
- AlternativeClientRegion acr(newClient);
- auto newOpCtxPtr = cc().makeOperationContext();
- auto newOpCtx = newOpCtxPtr.get();
- _cleanupCompleteFuture = _coordinator->completeMigration(newOpCtx);
+ if (_state >= kCloning) {
+ invariant(_coordinator);
+ if (_state < kCommittingOnConfig) {
+ _coordinator->setMigrationDecision(
+ migrationutil::MigrationCoordinator::Decision::kAborted);
+ }
+ // This can be called on an exception path after the OperationContext has been
+ // interrupted, so use a new OperationContext. Note, it's valid to call
+ // getServiceContext on an interrupted OperationContext.
+ auto newClient = _opCtx->getServiceContext()->makeClient("MigrationCoordinator");
+ {
+ stdx::lock_guard<Client> lk(*newClient.get());
+ newClient->setSystemOperationKillable(lk);
}
+ AlternativeClientRegion acr(newClient);
+ auto newOpCtxPtr = cc().makeOperationContext();
+ auto newOpCtx = newOpCtxPtr.get();
+ _cleanupCompleteFuture = _coordinator->completeMigration(newOpCtx);
}
_state = kDone;
diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h
index 923b1d8876e..5f43c9f9780 100644
--- a/src/mongo/db/s/migration_source_manager.h
+++ b/src/mongo/db/s/migration_source_manager.h
@@ -252,10 +252,6 @@ private:
// collection doesn't have UUID.
boost::optional<UUID> _collectionUuid;
- // Whether to use the resumable range deleter. This decision is based on whether the FCV 4.2 or
- // FCV 4.4 protocol are in use and the disableResumableRangeDeleter option is off.
- bool _enableResumableRangeDeleter;
-
// Contains logic for ensuring the donor's and recipient's config.rangeDeletions entries are
// correctly updated based on whether the migration committed or aborted.
std::unique_ptr<migrationutil::MigrationCoordinator> _coordinator;
diff --git a/src/mongo/db/s/migration_util.cpp b/src/mongo/db/s/migration_util.cpp
index fa8c0f89365..57bbe3f635a 100644
--- a/src/mongo/db/s/migration_util.cpp
+++ b/src/mongo/db/s/migration_util.cpp
@@ -53,6 +53,7 @@
#include "mongo/db/s/collection_sharding_runtime.h"
#include "mongo/db/s/migration_coordinator.h"
#include "mongo/db/s/shard_filtering_metadata_refresh.h"
+#include "mongo/db/s/sharding_runtime_d_params_gen.h"
#include "mongo/db/s/sharding_statistics.h"
#include "mongo/db/s/wait_for_majority_service.h"
#include "mongo/db/write_concern.h"
@@ -294,6 +295,13 @@ ExecutorFuture<void> submitRangeDeletionTask(OperationContext* opCtx,
auto uniqueOpCtx = tc->makeOperationContext();
auto opCtx = uniqueOpCtx.get();
+ uassert(
+ ErrorCodes::ResumableRangeDeleterDisabled,
+ str::stream()
+ << "Not submitting range deletion task " << redact(deletionTask.toBSON())
+ << " because the disableResumableRangeDeleter server parameter is set to true",
+ !disableResumableRangeDeleter.load());
+
// Make sure the collection metadata is up-to-date.
{
boost::optional<AutoGetCollection> autoColl;
diff --git a/src/mongo/db/s/sharding_runtime_d_params.idl b/src/mongo/db/s/sharding_runtime_d_params.idl
index 4533c957adf..0d92e71aae6 100644
--- a/src/mongo/db/s/sharding_runtime_d_params.idl
+++ b/src/mongo/db/s/sharding_runtime_d_params.idl
@@ -92,7 +92,10 @@ server_parameters:
default: 900
disableResumableRangeDeleter:
- description: 'Disable the resumable range deleter and revert to prior behavior.'
+ description: >-
+ Disable the resumable range deleter. Ranges will not be submitted for deletion, and if an
+ incoming migration sees orphans in the incoming range, the migration will fail. The
+ resumable range deleter replaced the old non-durable range deleter as of FCV 4.4.
set_at: [startup]
cpp_vartype: AtomicWord<bool>
cpp_varname : disableResumableRangeDeleter
diff --git a/src/mongo/db/s/start_chunk_clone_request.cpp b/src/mongo/db/s/start_chunk_clone_request.cpp
index e81f5754b90..86567dce161 100644
--- a/src/mongo/db/s/start_chunk_clone_request.cpp
+++ b/src/mongo/db/s/start_chunk_clone_request.cpp
@@ -152,12 +152,10 @@ StatusWith<StartChunkCloneRequest> StartChunkCloneRequest::createFromCommand(Nam
}
}
- if (obj.getField("uuid")) {
- request._migrationId = UUID::parse(obj);
- request._lsid = LogicalSessionId::parse(IDLParserErrorContext("StartChunkCloneRequest"),
- obj[kLsid].Obj());
- request._txnNumber = obj.getField(kTxnNumber).Long();
- }
+ request._migrationId = UUID::parse(obj);
+ request._lsid =
+ LogicalSessionId::parse(IDLParserErrorContext("StartChunkCloneRequest"), obj[kLsid].Obj());
+ request._txnNumber = obj.getField(kTxnNumber).Long();
return request;
}
@@ -175,19 +173,16 @@ void StartChunkCloneRequest::appendAsCommand(
const BSONObj& chunkMinKey,
const BSONObj& chunkMaxKey,
const BSONObj& shardKeyPattern,
- const MigrationSecondaryThrottleOptions& secondaryThrottle,
- bool resumableRangeDeleterDisabled) {
+ const MigrationSecondaryThrottleOptions& secondaryThrottle) {
invariant(builder->asTempObj().isEmpty());
invariant(nss.isValid());
invariant(fromShardConnectionString.isValid());
builder->append(kRecvChunkStart, nss.ns());
- if (!resumableRangeDeleterDisabled) {
- migrationId.appendToBuilder(builder, kMigrationId);
- builder->append(kLsid, lsid.toBSON());
- builder->append(kTxnNumber, txnNumber);
- }
+ migrationId.appendToBuilder(builder, kMigrationId);
+ builder->append(kLsid, lsid.toBSON());
+ builder->append(kTxnNumber, txnNumber);
sessionId.append(builder);
builder->append(kFromShardConnectionString, fromShardConnectionString.toString());
diff --git a/src/mongo/db/s/start_chunk_clone_request.h b/src/mongo/db/s/start_chunk_clone_request.h
index f1ff3222b03..171639c0c8a 100644
--- a/src/mongo/db/s/start_chunk_clone_request.h
+++ b/src/mongo/db/s/start_chunk_clone_request.h
@@ -72,8 +72,7 @@ public:
const BSONObj& chunkMinKey,
const BSONObj& chunkMaxKey,
const BSONObj& shardKeyPattern,
- const MigrationSecondaryThrottleOptions& secondaryThrottle,
- bool resumableRangeDeleterDisabled);
+ const MigrationSecondaryThrottleOptions& secondaryThrottle);
const NamespaceString& getNss() const {
return _nss;
@@ -128,10 +127,6 @@ public:
return _secondaryThrottle;
}
- bool resumableRangeDeleterDisabled() const {
- return _resumableRangeDeleterDisabled;
- }
-
private:
StartChunkCloneRequest(NamespaceString nss,
MigrationSessionId sessionId,
@@ -163,8 +158,6 @@ private:
// The parsed secondary throttle options
MigrationSecondaryThrottleOptions _secondaryThrottle;
-
- bool _resumableRangeDeleterDisabled{false};
};
} // namespace mongo
diff --git a/src/mongo/db/s/start_chunk_clone_request_test.cpp b/src/mongo/db/s/start_chunk_clone_request_test.cpp
index 9272c53eb25..f498be0e366 100644
--- a/src/mongo/db/s/start_chunk_clone_request_test.cpp
+++ b/src/mongo/db/s/start_chunk_clone_request_test.cpp
@@ -70,8 +70,7 @@ TEST(StartChunkCloneRequest, CreateAsCommandComplete) {
BSON("Key" << -100),
BSON("Key" << 100),
BSON("Key" << 1),
- MigrationSecondaryThrottleOptions::create(MigrationSecondaryThrottleOptions::kOff),
- false);
+ MigrationSecondaryThrottleOptions::create(MigrationSecondaryThrottleOptions::kOff));
BSONObj cmdObj = builder.obj();