diff options
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_auth.yml | 2 | ||||
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_auth_audit.yml | 2 | ||||
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml | 4 | ||||
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_misc.yml | 8 | ||||
-rw-r--r-- | jstests/libs/cleanup_orphaned_util.js | 6 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned.js | 19 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_auth.js | 56 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_basic.js | 124 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js | 163 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js | 133 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_hashed.js | 80 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_prereload.js | 98 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_compound.js | 20 |
13 files changed, 714 insertions, 1 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth.yml b/buildscripts/resmokeconfig/suites/sharding_auth.yml index 8cdc924883a..5f361dd80cb 100644 --- a/buildscripts/resmokeconfig/suites/sharding_auth.yml +++ b/buildscripts/resmokeconfig/suites/sharding_auth.yml @@ -17,6 +17,8 @@ selector: # Skip these additional tests when running with auth enabled. - jstests/sharding/parallel.js # Skip the testcases that do not have auth bypass when running ops in parallel. + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713 + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713 - jstests/sharding/migration_ignore_interrupts_1.js # SERVER-21713 - jstests/sharding/migration_ignore_interrupts_2.js # SERVER-21713 - jstests/sharding/migration_ignore_interrupts_3.js # SERVER-21713 diff --git a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml index 7813b446927..25cf393d692 100644 --- a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml +++ b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml @@ -17,6 +17,8 @@ selector: # Skip these additional tests when running with auth enabled. - jstests/sharding/parallel.js # Skip the testcases that do not have auth bypass when running ops in parallel. + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713 + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713 - jstests/sharding/migration_with_source_ops.js # SERVER-21713 - jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713 - jstests/sharding/migration_ignore_interrupts_1.js # SERVER-21713 diff --git a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml index 7bcbeab0bfb..e23481be51c 100644 --- a/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml +++ b/buildscripts/resmokeconfig/suites/sharding_continuous_config_stepdown.yml @@ -6,6 +6,7 @@ selector: exclude_files: # Auth tests require authentication on the stepdown thread's connection - jstests/sharding/auth*.js + - jstests/sharding/cleanup_orphaned_auth.js - jstests/sharding/localhostAuthBypass.js - jstests/sharding/kill_sessions.js - jstests/sharding/mongos_rs_auth_shard_failure_tolerance.js @@ -137,6 +138,9 @@ selector: # ShardingTest is never used, so continuous step down thread never starts - jstests/sharding/config_rs_change.js - jstests/sharding/empty_cluster_init.js + # Temporarily blacklisted until more robust + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # Expects same secondaries for entire test - jstests/sharding/commands_that_write_accept_wc_configRS.js - jstests/sharding/commands_that_write_accept_wc_shards.js diff --git a/buildscripts/resmokeconfig/suites/sharding_misc.yml b/buildscripts/resmokeconfig/suites/sharding_misc.yml index 23f5460dc4d..f7446f41245 100644 --- a/buildscripts/resmokeconfig/suites/sharding_misc.yml +++ b/buildscripts/resmokeconfig/suites/sharding_misc.yml @@ -122,6 +122,7 @@ selector: - jstests/sharding/shard3.js - jstests/sharding/merge_chunks_test.js - jstests/sharding/move_stale_mongos.js + - jstests/sharding/cleanup_orphaned_basic.js - jstests/sharding/validate_collection.js - jstests/sharding/change_stream_enforce_max_time_ms_on_mongos.js - jstests/sharding/unowned_doc_filtering.js @@ -173,11 +174,15 @@ selector: - jstests/sharding/causal_consistency_shell_support.js - jstests/sharding/change_streams_establishment_finds_new_shards.js - jstests/sharding/retryable_writes.js + - jstests/sharding/cleanup_orphaned_cmd_prereload.js - jstests/sharding/basic_merge.js - jstests/sharding/migration_critical_section_concurrency.js - jstests/sharding/sort1.js + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js - jstests/sharding/upsert_sharded.js + - jstests/sharding/cleanup_orphaned_cmd_hashed.js - jstests/sharding/addshard5.js + - jstests/sharding/cleanup_orphaned_compound.js - jstests/sharding/agg_sort.js - jstests/sharding/remove1.js - jstests/sharding/shard_targeting.js @@ -194,6 +199,8 @@ selector: - jstests/sharding/count2.js - jstests/sharding/no_empty_reset.js - jstests/sharding/kill_pinned_cursor.js + - jstests/sharding/cleanup_orphaned.js + - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js - jstests/sharding/sharded_limit_batchsize.js - jstests/sharding/migration_sets_fromMigrate_flag.js - jstests/sharding/change_stream_metadata_notifications.js @@ -301,6 +308,7 @@ selector: - jstests/sharding/hash_single_shard.js - jstests/sharding/version2.js - jstests/sharding/advance_logical_time_with_valid_signature.js + - jstests/sharding/cleanup_orphaned_auth.js - jstests/sharding/mrShardedOutputAuth.js - jstests/sharding/split_against_shard_with_invalid_split_points.js - jstests/sharding/version1.js diff --git a/jstests/libs/cleanup_orphaned_util.js b/jstests/libs/cleanup_orphaned_util.js index 3990c148df4..a88c9e8dbf7 100644 --- a/jstests/libs/cleanup_orphaned_util.js +++ b/jstests/libs/cleanup_orphaned_util.js @@ -36,7 +36,11 @@ function cleanupOrphaned(shardConnection, ns, expectedIterations) { // keyGen: function() { return [{ a: 'foo', b: 1 }, { a: 'bar', b: 2 }]; } // } function testCleanupOrphaned(options) { - var st = new ShardingTest({shards: 2, mongos: 2}); + var st = new ShardingTest({ + shards: 2, + mongos: 2, + shardOptions: {setParameter: {"disableResumableRangeDeleter": true}} + }); var mongos = st.s0, admin = mongos.getDB('admin'), shards = mongos.getCollection('config.shards').find().toArray(), diff --git a/jstests/sharding/cleanup_orphaned.js b/jstests/sharding/cleanup_orphaned.js new file mode 100644 index 00000000000..6359d4be110 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned.js @@ -0,0 +1,19 @@ +// +// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans +// and makes sure that orphans are removed. Uses an _id as a shard key. +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +load('./jstests/libs/cleanup_orphaned_util.js'); + +testCleanupOrphaned({ + shardKey: {_id: 1}, + keyGen: function() { + var ids = []; + for (var i = -50; i < 50; i++) { + ids.push({_id: i}); + } + return ids; + } +}); diff --git a/jstests/sharding/cleanup_orphaned_auth.js b/jstests/sharding/cleanup_orphaned_auth.js new file mode 100644 index 00000000000..8b0fac00f5d --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_auth.js @@ -0,0 +1,56 @@ +// +// Tests of cleanupOrphaned command permissions. +// + +(function() { +'use strict'; + +// TODO SERVER-35447: Multiple users cannot be authenticated on one connection within a session. +TestData.disableImplicitSessions = true; + +function assertUnauthorized(res, msg) { + if (assert._debug && msg) + print("in assert for: " + msg); + + if (res.ok == 0 && + (res.errmsg.startsWith('not authorized') || res.errmsg.match(/requires authentication/))) + return; + + var finalMsg = "command worked when it should have been unauthorized: " + tojson(res); + if (msg) { + finalMsg += " : " + msg; + } + doassert(finalMsg); +} + +var st = new ShardingTest({auth: true, other: {keyFile: 'jstests/libs/key1', useHostname: false}}); + +var shardAdmin = st.shard0.getDB('admin'); +shardAdmin.createUser({user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']}); +shardAdmin.auth('admin', 'x'); + +var mongos = st.s0; +var mongosAdmin = mongos.getDB('admin'); +var coll = mongos.getCollection('foo.bar'); + +mongosAdmin.createUser({user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']}); +mongosAdmin.auth('admin', 'x'); + +assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()})); + +assert.commandWorked( + mongosAdmin.runCommand({shardCollection: coll.getFullName(), key: {_id: 'hashed'}})); + +// cleanupOrphaned requires auth as admin user. +assert.commandWorked(shardAdmin.logout()); +assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'})); + +var fooDB = st.shard0.getDB('foo'); +shardAdmin.auth('admin', 'x'); +fooDB.createUser({user: 'user', pwd: 'x', roles: ['readWrite', 'dbAdmin']}); +shardAdmin.logout(); +fooDB.auth('user', 'x'); +assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'})); + +st.stop(); +})(); diff --git a/jstests/sharding/cleanup_orphaned_basic.js b/jstests/sharding/cleanup_orphaned_basic.js new file mode 100644 index 00000000000..9e785300050 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_basic.js @@ -0,0 +1,124 @@ +// +// Basic tests of cleanupOrphaned. Validates that non allowed uses of the cleanupOrphaned +// command fail. +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +(function() { +"use strict"; + +/***************************************************************************** + * Unsharded mongod. + ****************************************************************************/ + +// cleanupOrphaned fails against unsharded mongod. +var mongod = MongoRunner.runMongod(); +assert.commandFailed(mongod.getDB('admin').runCommand({cleanupOrphaned: 'foo.bar'})); + +/***************************************************************************** + * Bad invocations of cleanupOrphaned command. + ****************************************************************************/ + +var st = new ShardingTest({ + other: {rs: true, rsOptions: {nodes: 2, setParameter: {"disableResumableRangeDeleter": true}}} +}); + +var mongos = st.s0; +var mongosAdmin = mongos.getDB('admin'); +var dbName = 'foo'; +var collectionName = 'bar'; +var ns = dbName + '.' + collectionName; +var coll = mongos.getCollection(ns); + +// cleanupOrphaned fails against mongos ('no such command'): it must be run +// on mongod. +assert.commandFailed(mongosAdmin.runCommand({cleanupOrphaned: ns})); + +// cleanupOrphaned must be run on admin DB. +var shardFooDB = st.shard0.getDB(dbName); +assert.commandFailed(shardFooDB.runCommand({cleanupOrphaned: ns})); + +// Must be run on primary. +var secondaryAdmin = st.rs0.getSecondary().getDB('admin'); +var response = secondaryAdmin.runCommand({cleanupOrphaned: ns}); +print('cleanupOrphaned on secondary:'); +printjson(response); +assert.commandFailed(response); + +var shardAdmin = st.shard0.getDB('admin'); +var badNS = ' \\/."*<>:|?'; +assert.commandFailed(shardAdmin.runCommand({cleanupOrphaned: badNS})); + +// cleanupOrphaned works on sharded collection. +assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()})); + +st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName); + +assert.commandWorked(mongosAdmin.runCommand({shardCollection: ns, key: {_id: 1}})); + +assert.commandWorked(shardAdmin.runCommand({cleanupOrphaned: ns})); + +/***************************************************************************** + * Empty shard. + ****************************************************************************/ + +// Ping shard[1] so it will be aware that it is sharded. Otherwise cleanupOrphaned +// may fail. +assert.commandWorked(mongosAdmin.runCommand({ + moveChunk: coll.getFullName(), + find: {_id: 1}, + to: st.shard1.shardName, + _waitForDelete: true +})); + +assert.commandWorked(mongosAdmin.runCommand({ + moveChunk: coll.getFullName(), + find: {_id: 1}, + to: st.shard0.shardName, + _waitForDelete: true +})); + +// Collection's home is shard0, there are no chunks assigned to shard1. +st.shard1.getCollection(ns).insert({}); +assert.eq(null, st.shard1.getDB(dbName).getLastError()); +assert.eq(1, st.shard1.getCollection(ns).count()); +response = st.shard1.getDB('admin').runCommand({cleanupOrphaned: ns}); +assert.commandWorked(response); +assert.eq({_id: {$maxKey: 1}}, response.stoppedAtKey); +assert.eq( + 0, st.shard1.getCollection(ns).count(), "cleanupOrphaned didn't delete orphan on empty shard."); + +/***************************************************************************** + * Bad startingFromKeys. + ****************************************************************************/ + +// startingFromKey of MaxKey. +response = shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {_id: MaxKey}}); +assert.commandWorked(response); +assert.eq(null, response.stoppedAtKey); + +// startingFromKey doesn't match number of fields in shard key. +assert.commandFailed(shardAdmin.runCommand( + {cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue', someOtherKey: 1}})); + +// startingFromKey matches number of fields in shard key but not field names. +assert.commandFailed( + shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue'}})); + +var coll2 = mongos.getCollection('foo.baz'); + +assert.commandWorked( + mongosAdmin.runCommand({shardCollection: coll2.getFullName(), key: {a: 1, b: 1}})); + +// startingFromKey doesn't match number of fields in shard key. +assert.commandFailed(shardAdmin.runCommand( + {cleanupOrphaned: coll2.getFullName(), startingFromKey: {someKey: 'someValue'}})); + +// startingFromKey matches number of fields in shard key but not field names. +assert.commandFailed(shardAdmin.runCommand( + {cleanupOrphaned: coll2.getFullName(), startingFromKey: {a: 'someValue', c: 1}})); + +st.stop(); +MongoRunner.stopMongod(mongod); +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js new file mode 100644 index 00000000000..6f3ae3ba8ac --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js @@ -0,0 +1,163 @@ +// +// Tests cleanupOrphaned concurrent with moveChunk. +// Inserts orphan documents to the donor and recipient shards during the moveChunk and +// verifies that cleanupOrphaned removes orphans. +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +load('./jstests/libs/chunk_manipulation_util.js'); +load('./jstests/libs/cleanup_orphaned_util.js'); + +(function() { +"use strict"; + +var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. +var st = new ShardingTest({ + shards: 2, + other: {separateConfig: true}, + shardOptions: {setParameter: {"disableResumableRangeDeleter": true}} +}); + +var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar', + coll = mongos.getCollection(ns), donor = st.shard0, recipient = st.shard1, + donorColl = donor.getCollection(ns), recipientColl = st.shard1.getCollection(ns); + +// Three chunks of 10 documents each, with ids -20, -18, -16, ..., 38. +// Donor: [minKey, 0) [0, 20) +// Recipient: [20, maxKey) +assert.commandWorked(admin.runCommand({enableSharding: dbName})); +printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName})); +assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}})); +assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 0}})); +assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 20}})); +assert.commandWorked(admin.runCommand( + {moveChunk: ns, find: {_id: 20}, to: st.shard1.shardName, _waitForDelete: true})); + +jsTest.log('Inserting 20 docs into shard 0....'); +for (var i = -20; i < 20; i += 2) { + coll.insert({_id: i}); +} +assert.eq(null, coll.getDB().getLastError()); +assert.eq(20, donorColl.count()); + +jsTest.log('Inserting 10 docs into shard 1....'); +for (i = 20; i < 40; i += 2) { + coll.insert({_id: i}); +} +assert.eq(null, coll.getDB().getLastError()); +assert.eq(10, recipientColl.count()); + +// +// Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs, +// from shard 0 to shard 1. Pause it at some points in the donor's and +// recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1. +// + +jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)'); +pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); +pauseMigrateAtStep(recipient, migrateStepNames.cloned); +var joinMoveChunk = moveChunkParallel( + staticMongod, st.s0.host, {_id: 0}, null, coll.getFullName(), st.shard1.shardName); + +waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk); +waitForMigrateStep(recipient, migrateStepNames.cloned); +// Recipient has run _recvChunkStart and begun its migration thread; docs have +// been cloned and chunk [0, 20) is noted as 'pending' on recipient. + +// Donor: [minKey, 0) [0, 20) +// Recipient (pending): [0, 20) +// Recipient: [20, maxKey) + +// Create orphans. I'll show an orphaned doc on donor with _id 26 like {26}: +// +// Donor: [minKey, 0) [0, 20) {26} +// Recipient (pending): [0, 20) +// Recipient: {-1} [20, maxKey) +donorColl.insert([{_id: 26}]); +assert.eq(null, donorColl.getDB().getLastError()); +assert.eq(21, donorColl.count()); +recipientColl.insert([{_id: -1}]); +assert.eq(null, recipientColl.getDB().getLastError()); +assert.eq(21, recipientColl.count()); + +cleanupOrphaned(donor, ns, 2); +assert.eq(20, donorColl.count()); +cleanupOrphaned(recipient, ns, 2); +assert.eq(20, recipientColl.count()); + +jsTest.log('Inserting document on donor side'); +// Inserted a new document (not an orphan) with id 19, which belongs in the +// [0, 20) chunk. +donorColl.insert({_id: 19}); +assert.eq(null, coll.getDB().getLastError()); +assert.eq(21, donorColl.count()); + +// Recipient transfers this modification. +jsTest.log('Let migrate proceed to transferredMods'); +proceedToMigrateStep(recipient, migrateStepNames.catchup); +jsTest.log('Done letting migrate proceed to transferredMods'); + +assert.eq(21, recipientColl.count(), "Recipient didn't transfer inserted document."); + +cleanupOrphaned(donor, ns, 2); +assert.eq(21, donorColl.count()); +cleanupOrphaned(recipient, ns, 2); +assert.eq(21, recipientColl.count()); + +// Create orphans. +donorColl.insert([{_id: 26}]); +assert.eq(null, donorColl.getDB().getLastError()); +assert.eq(22, donorColl.count()); +recipientColl.insert([{_id: -1}]); +assert.eq(null, recipientColl.getDB().getLastError()); +assert.eq(22, recipientColl.count()); + +cleanupOrphaned(donor, ns, 2); +assert.eq(21, donorColl.count()); +cleanupOrphaned(recipient, ns, 2); +assert.eq(21, recipientColl.count()); + +// Recipient has been waiting for donor to call _recvChunkCommit. +pauseMoveChunkAtStep(donor, moveChunkStepNames.committed); +unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); +proceedToMigrateStep(recipient, migrateStepNames.steady); +proceedToMigrateStep(recipient, migrateStepNames.done); + +// Create orphans. +donorColl.insert([{_id: 26}]); +assert.eq(null, donorColl.getDB().getLastError()); +assert.eq(22, donorColl.count()); +recipientColl.insert([{_id: -1}]); +assert.eq(null, recipientColl.getDB().getLastError()); +assert.eq(22, recipientColl.count()); + +cleanupOrphaned(donor, ns, 2); +assert.eq(21, donorColl.count()); +cleanupOrphaned(recipient, ns, 2); +assert.eq(21, recipientColl.count()); + +// Let recipient side of the migration finish so that the donor can proceed with the commit. +unpauseMigrateAtStep(recipient, migrateStepNames.done); +waitForMoveChunkStep(donor, moveChunkStepNames.committed); + +// Donor is paused after the migration chunk commit, but before it finishes the cleanup that +// includes running the range deleter. Thus it technically has orphaned data -- commit is +// complete, but moved data is still present. cleanupOrphaned can remove the data the donor +// would otherwise clean up itself in its post-move delete phase. +cleanupOrphaned(donor, ns, 2); +assert.eq(10, donorColl.count()); + +// Let the donor migration finish. +unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed); +joinMoveChunk(); + +// Donor has finished post-move delete, which had nothing to remove with the range deleter +// because of the preemptive cleanupOrphaned call. +assert.eq(10, donorColl.count()); +assert.eq(21, recipientColl.count()); +assert.eq(31, coll.count()); + +st.stop(); +MongoRunner.stopMongod(staticMongod); +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js new file mode 100644 index 00000000000..2ab59df4a54 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js @@ -0,0 +1,133 @@ +// +// +// Tests cleanupOrphaned concurrent with moveChunk with a hashed shard key. +// Inserts orphan documents to the donor and recipient shards during the moveChunk and +// verifies that cleanupOrphaned removes orphans. +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +load('./jstests/libs/chunk_manipulation_util.js'); +load('./jstests/libs/cleanup_orphaned_util.js'); + +(function() { +"use strict"; + +var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. +var st = new ShardingTest({ + shards: 2, + other: {separateConfig: true}, + shardOptions: {setParameter: {"disableResumableRangeDeleter": true}} +}); + +var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar', + coll = mongos.getCollection(ns); + +assert.commandWorked(admin.runCommand({enableSharding: dbName})); +printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName})); +assert.commandWorked(admin.runCommand({shardCollection: ns, key: {key: 'hashed'}})); + +// Makes four chunks by default, two on each shard. +var chunks = st.config.chunks.find({ns: ns}).sort({min: 1}).toArray(); +assert.eq(4, chunks.length); + +var chunkWithDoc = chunks[1]; +print('Trying to make doc that hashes to this chunk: ' + tojson(chunkWithDoc)); + +var found = false; +for (var i = 0; i < 10000; i++) { + var doc = {key: ObjectId()}, hash = mongos.adminCommand({_hashBSONElement: doc.key}).out; + + print('doc.key ' + doc.key + ' hashes to ' + hash); + + if (mongos.getCollection('config.chunks') + .findOne({_id: chunkWithDoc._id, 'min.key': {$lte: hash}, 'max.key': {$gt: hash}})) { + found = true; + break; + } +} + +assert(found, "Couldn't make doc that belongs to chunk 1."); +print('Doc: ' + tojson(doc)); +coll.insert(doc); +assert.eq(null, coll.getDB().getLastError()); + +// +// Start a moveChunk in the background from shard 0 to shard 1. Pause it at +// some points in the donor's and recipient's work flows, and test +// cleanupOrphaned. +// + +var donor, recip; +if (chunkWithDoc.shard == st.shard0.shardName) { + donor = st.shard0; + recip = st.shard1; +} else { + recip = st.shard0; + donor = st.shard1; +} + +jsTest.log('setting failpoint startedMoveChunk (donor) and cloned (recipient)'); +pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); +pauseMigrateAtStep(recip, migrateStepNames.cloned); + +var joinMoveChunk = moveChunkParallel(staticMongod, + st.s0.host, + null, + [chunkWithDoc.min, chunkWithDoc.max], // bounds + coll.getFullName(), + recip.shardName); + +waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk); +waitForMigrateStep(recip, migrateStepNames.cloned); +proceedToMigrateStep(recip, migrateStepNames.catchup); +// recipient has run _recvChunkStart and begun its migration thread; +// 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient. + +var donorColl = donor.getCollection(ns), recipColl = recip.getCollection(ns); + +assert.eq(1, donorColl.count()); +assert.eq(1, recipColl.count()); + +// cleanupOrphaned should go through two iterations, since the default chunk +// setup leaves two unowned ranges on each shard. +cleanupOrphaned(donor, ns, 2); +cleanupOrphaned(recip, ns, 2); +assert.eq(1, donorColl.count()); +assert.eq(1, recipColl.count()); + +// recip has been waiting for donor to call _recvChunkCommit. +pauseMoveChunkAtStep(donor, moveChunkStepNames.committed); +unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); +proceedToMigrateStep(recip, migrateStepNames.steady); +proceedToMigrateStep(recip, migrateStepNames.done); + +cleanupOrphaned(donor, ns, 2); +assert.eq(1, donorColl.count()); +cleanupOrphaned(recip, ns, 2); +assert.eq(1, recipColl.count()); + +// Let recip side of the migration finish so that the donor proceeds with the commit. +unpauseMigrateAtStep(recip, migrateStepNames.done); +waitForMoveChunkStep(donor, moveChunkStepNames.committed); + +// Donor is paused after the migration chunk commit, but before it finishes the cleanup that +// includes running the range deleter. Thus it technically has orphaned data -- commit is +// complete, but moved data is still present. cleanupOrphaned can remove the data the donor +// would otherwise clean up itself in its post-move delete phase. +cleanupOrphaned(donor, ns, 2); +assert.eq(0, donorColl.count()); + +// Let migration thread complete. +unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed); +joinMoveChunk(); + +// donor has finished post-move delete, which had nothing to remove with the range deleter +// because of the preemptive cleanupOrphaned call. +assert.eq(0, donorColl.count()); +assert.eq(1, recipColl.count()); +assert.eq(1, coll.count()); + +MongoRunner.stopMongod(staticMongod); +st.stop(); +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_hashed.js new file mode 100644 index 00000000000..5efa4ad6c40 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_cmd_hashed.js @@ -0,0 +1,80 @@ +// +// Tests cleanup of orphaned data in hashed sharded coll via the orphaned data cleanup command +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +(function() { +"use strict"; + +var st = new ShardingTest( + {shards: 2, mongos: 1, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}}); + +var mongos = st.s0; +var admin = mongos.getDB("admin"); +var coll = mongos.getCollection("foo.bar"); + +assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""})); +printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName})); +assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: "hashed"}})); + +// Create two orphaned data holes, one bounded by min or max on each shard + +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-100)}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-50)}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(50)}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(100)}})); +assert.commandWorked(admin.runCommand({ + moveChunk: coll + "", + bounds: [{_id: NumberLong(-100)}, {_id: NumberLong(-50)}], + to: st.shard1.shardName, + _waitForDelete: true +})); +assert.commandWorked(admin.runCommand({ + moveChunk: coll + "", + bounds: [{_id: NumberLong(50)}, {_id: NumberLong(100)}], + to: st.shard0.shardName, + _waitForDelete: true +})); +st.printShardingStatus(); + +jsTest.log("Inserting some docs on each shard, so 1/2 will be orphaned..."); + +for (var s = 0; s < 2; s++) { + var shardColl = (s == 0 ? st.shard0 : st.shard1).getCollection(coll + ""); + var bulk = shardColl.initializeUnorderedBulkOp(); + for (var i = 0; i < 100; i++) + bulk.insert({_id: i}); + assert.commandWorked(bulk.execute()); +} + +assert.eq(200, + st.shard0.getCollection(coll + "").find().itcount() + + st.shard1.getCollection(coll + "").find().itcount()); +assert.eq(100, coll.find().itcount()); + +jsTest.log("Cleaning up orphaned data in hashed coll..."); + +for (var s = 0; s < 2; s++) { + var shardAdmin = (s == 0 ? st.shard0 : st.shard1).getDB("admin"); + + var result = shardAdmin.runCommand({cleanupOrphaned: coll + ""}); + while (result.ok && result.stoppedAtKey) { + printjson(result); + result = shardAdmin.runCommand( + {cleanupOrphaned: coll + "", startingFromKey: result.stoppedAtKey}); + } + + printjson(result); + assert(result.ok); +} + +assert.eq(100, + st.shard0.getCollection(coll + "").find().itcount() + + st.shard1.getCollection(coll + "").find().itcount()); +assert.eq(100, coll.find().itcount()); + +jsTest.log("DONE!"); + +st.stop(); +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_prereload.js b/jstests/sharding/cleanup_orphaned_cmd_prereload.js new file mode 100644 index 00000000000..33aa8bca9c8 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_cmd_prereload.js @@ -0,0 +1,98 @@ +// +// Tests failed cleanup of orphaned data when we have pending chunks +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +var st = new ShardingTest( + {shards: 2, shardOptions: {setParameter: {"disableResumableRangeDeleter": true}}}); + +var mongos = st.s0; +var admin = mongos.getDB("admin"); +var coll = mongos.getCollection("foo.bar"); + +assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""})); +printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName})); +assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: 1}})); + +// Turn off best-effort recipient metadata refresh post-migration commit on both shards because it +// would clean up the pending chunks on migration recipients. +assert.commandWorked(st.shard0.getDB('admin').runCommand( + {configureFailPoint: 'doNotRefreshRecipientAfterCommit', mode: 'alwaysOn'})); +assert.commandWorked(st.shard1.getDB('admin').runCommand( + {configureFailPoint: 'doNotRefreshRecipientAfterCommit', mode: 'alwaysOn'})); + +jsTest.log("Moving some chunks to shard1..."); + +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 0}})); +assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: 1}})); + +assert.commandWorked(admin.runCommand( + {moveChunk: coll + "", find: {_id: 0}, to: st.shard1.shardName, _waitForDelete: true})); +assert.commandWorked(admin.runCommand( + {moveChunk: coll + "", find: {_id: 1}, to: st.shard1.shardName, _waitForDelete: true})); + +var metadata = + st.shard1.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; + +printjson(metadata); + +assert.eq(metadata.pending[0][0]._id, 1); +assert.eq(metadata.pending[0][1]._id, MaxKey); + +jsTest.log("Ensuring we won't remove orphaned data in pending chunk..."); + +assert(!st.shard1.getDB("admin") + .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}}) + .stoppedAtKey); + +jsTest.log("Moving some chunks back to shard0 after empty..."); + +assert.commandWorked(admin.runCommand( + {moveChunk: coll + "", find: {_id: -1}, to: st.shard1.shardName, _waitForDelete: true})); + +var metadata = + st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; + +printjson(metadata); + +assert.eq(metadata.shardVersion.t, 0); +assert.neq(metadata.collVersion.t, 0); +assert.eq(metadata.pending.length, 0); + +assert.commandWorked(admin.runCommand( + {moveChunk: coll + "", find: {_id: 1}, to: st.shard0.shardName, _waitForDelete: true})); + +var metadata = + st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; + +printjson(metadata); +assert.eq(metadata.shardVersion.t, 0); +assert.neq(metadata.collVersion.t, 0); +assert.eq(metadata.pending[0][0]._id, 1); +assert.eq(metadata.pending[0][1]._id, MaxKey); + +jsTest.log("Ensuring again we won't remove orphaned data in pending chunk..."); + +assert(!st.shard0.getDB("admin") + .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}}) + .stoppedAtKey); + +jsTest.log("Checking that pending chunk is promoted on reload..."); + +assert.eq(null, coll.findOne({_id: 1})); + +var metadata = + st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; + +printjson(metadata); +assert.neq(metadata.shardVersion.t, 0); +assert.neq(metadata.collVersion.t, 0); +assert.eq(metadata.chunks[0][0]._id, 1); +assert.eq(metadata.chunks[0][1]._id, MaxKey); + +st.printShardingStatus(); + +jsTest.log("DONE!"); + +st.stop(); diff --git a/jstests/sharding/cleanup_orphaned_compound.js b/jstests/sharding/cleanup_orphaned_compound.js new file mode 100644 index 00000000000..931c08e4387 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_compound.js @@ -0,0 +1,20 @@ +// +// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans +// and makes sure that orphans are removed. Uses a compound shard key. +// +// requires_fcv_44 because the 'disableResumableRangeDeleter' parameter was introduced in v4.4. +// @tags: [requires_fcv_44] + +load('./jstests/libs/cleanup_orphaned_util.js'); + +testCleanupOrphaned({ + shardKey: {a: 1, b: 1}, + keyGen: function() { + var ids = []; + for (var i = -50; i < 50; i++) { + ids.push({a: i, b: Math.random()}); + } + + return ids; + } +}); |