diff options
author | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2016-03-30 17:25:51 -0400 |
---|---|---|
committer | Kaloian Manassiev <kaloian.manassiev@mongodb.com> | 2016-03-31 17:58:17 -0400 |
commit | 3edc84475b10154a76f268edb5e80ac6ca609411 (patch) | |
tree | e05f42b49a69820a0d30a9fcdaf664a16e0cf3a8 | |
parent | 3ce338f6fc95322141bbf35f982513a831bb74ca (diff) | |
download | mongo-3edc84475b10154a76f268edb5e80ac6ca609411.tar.gz |
SERVER-23425 Port 3.2 sharding move chunk unit tests
-rwxr-xr-x | buildscripts/smoke.py | 13 | ||||
-rw-r--r-- | jstests/libs/chunk_manipulation_util.js | 214 | ||||
-rw-r--r-- | jstests/libs/cleanup_orphaned_util.js | 131 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned.js | 17 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_auth.js | 51 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_basic.js | 113 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js | 148 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js | 124 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_hashed.js | 133 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_cmd_prereload.js | 121 | ||||
-rw-r--r-- | jstests/sharding/cleanup_orphaned_compound.js | 18 | ||||
-rw-r--r-- | jstests/sharding/migration_sets_fromMigrate_flag.js | 167 | ||||
-rw-r--r-- | jstests/sharding/migration_with_source_ops.js | 146 | ||||
-rw-r--r-- | src/mongo/s/d_migrate.cpp | 17 |
14 files changed, 1278 insertions, 135 deletions
diff --git a/buildscripts/smoke.py b/buildscripts/smoke.py index 6d95b45d610..22eb828f3d3 100755 --- a/buildscripts/smoke.py +++ b/buildscripts/smoke.py @@ -469,14 +469,15 @@ def skipTest(path): authTestsToSkip = [("jstests", "drop2.js"), # SERVER-8589, ("jstests", "killop.js"), # SERVER-10128 + ("sharding", "cleanup_orphaned_cmd_during_movechunk.js"), # SERVER-21713 + ("sharding", "cleanup_orphaned_cmd_during_movechunk_hashed.js"), # SERVER-21713 + ("sharding", "copydb_from_mongos.js"), # SERVER-13080 + ("sharding", "migration_ignore_interrupts.js"), # SERVER-21713 + ("sharding", "migration_sets_fromMigrate_flag.js"), # SERVER-21713 + ("sharding", "migration_with_source_ops.js"), # SERVER-21713 + ("sharding", "parallel.js"), ("sharding", "sync3.js"), # SERVER-6388 for this and those below ("sharding", "sync6.js"), - ("sharding", "parallel.js"), - ("sharding", "copydb_from_mongos.js"), # SERVER-13080 - ("jstests", "bench_test1.js"), - ("jstests", "bench_test2.js"), - ("jstests", "bench_test3.js"), - ("jstests", "bench_test_insert.js"), ("core", "bench_test1.js"), ("core", "bench_test2.js"), ("core", "bench_test3.js"), diff --git a/jstests/libs/chunk_manipulation_util.js b/jstests/libs/chunk_manipulation_util.js new file mode 100644 index 00000000000..a334cbe8aec --- /dev/null +++ b/jstests/libs/chunk_manipulation_util.js @@ -0,0 +1,214 @@ +// +// Utilities for testing chunk manipulation: moveChunk, mergeChunks, etc. +// + +load('./jstests/libs/test_background_ops.js'); + +// +// Start a background moveChunk. +// staticMongod: Server to use for communication, use +// "MongoRunner.runMongod({})" to make one. +// mongosURL: Like 'localhost:27017'. +// findCriteria: Like { _id: 1 }, passed to moveChunk's "find" option. +// bounds: Array of two documents that specify the lower and upper +// shard key values of a chunk to move. Specify either the +// bounds field or the find field but not both. +// ns: Like 'dbName.collectionName'. +// toShardId: Like 'shard0001'. +// +// Returns a join function; call it to wait for moveChunk to complete. +// + +function moveChunkParallel(staticMongod, mongosURL, findCriteria, bounds, ns, toShardId) { + assert((findCriteria || bounds) && !(findCriteria && bounds), + 'Specify either findCriteria or bounds, but not both.'); + + function runMoveChunk(mongosURL, findCriteria, bounds, ns, toShardId) { + assert(mongosURL && ns && toShardId, 'Missing arguments.'); + assert((findCriteria || bounds) && !(findCriteria && bounds), + 'Specify either findCriteria or bounds, but not both.'); + + var mongos = new Mongo(mongosURL), admin = mongos.getDB('admin'), cmd = { + moveChunk: ns + }; + + if (findCriteria) { + cmd.find = findCriteria; + } else { + cmd.bounds = bounds; + } + + cmd.to = toShardId; + cmd._waitForDelete = true; + + printjson(cmd); + var result = admin.runCommand(cmd); + printjson(result); + assert(result.ok); + } + + // Return the join function. + return startParallelOps( + staticMongod, runMoveChunk, [mongosURL, findCriteria, bounds, ns, toShardId]); +} + +// moveChunk starts at step 0 and proceeds to 1 (it has *finished* parsing +// options), 2 (it has reloaded config and got distributed lock) and so on. +var moveChunkStepNames = { + parsedOptions: 1, + gotDistLock: 2, + startedMoveChunk: 3, // called _recvChunkStart on recipient + reachedSteadyState: 4, // recipient reports state is "steady" + committed: 5, + done: 6 +}; + +function numberToName(names, stepNumber) { + for (var name in names) { + if (names.hasOwnProperty(name) && names[name] == stepNumber) { + return name; + } + } + + assert(false); +} + +// +// Configure a failpoint to make moveChunk hang at a step. +// +function pauseMoveChunkAtStep(shardConnection, stepNumber) { + configureMoveChunkFailPoint(shardConnection, stepNumber, 'alwaysOn'); +} + +// +// Allow moveChunk to proceed past a step. +// +function unpauseMoveChunkAtStep(shardConnection, stepNumber) { + configureMoveChunkFailPoint(shardConnection, stepNumber, 'off'); +} + +function proceedToMoveChunkStep(shardConnection, stepNumber) { + jsTest.log('moveChunk proceeding from step "' + + numberToName(moveChunkStepNames, stepNumber - 1) + '" to "' + + numberToName(moveChunkStepNames, stepNumber) + '".'); + + pauseMoveChunkAtStep(shardConnection, stepNumber); + unpauseMoveChunkAtStep(shardConnection, stepNumber - 1); + waitForMoveChunkStep(shardConnection, stepNumber); +} + +function configureMoveChunkFailPoint(shardConnection, stepNumber, mode) { + assert.between(migrateStepNames.copiedIndexes, + stepNumber, + migrateStepNames.done, + "incorrect stepNumber", + true); + var admin = shardConnection.getDB('admin'); + admin.runCommand({configureFailPoint: 'moveChunkHangAtStep' + stepNumber, mode: mode}); +} + +// +// Wait for moveChunk to reach a step (1 through 6). Assumes only one moveChunk +// is in mongos's currentOp. +// +function waitForMoveChunkStep(shardConnection, stepNumber) { + var searchString = 'step ' + stepNumber, admin = shardConnection.getDB('admin'); + + assert.between(migrateStepNames.copiedIndexes, + stepNumber, + migrateStepNames.done, + "incorrect stepNumber", + true); + + var msg = ('moveChunk on ' + shardConnection.shardName + ' never reached step "' + + numberToName(moveChunkStepNames, stepNumber) + '".'); + + assert.soon(function() { + var in_progress = admin.currentOp().inprog; + for (var i = 0; i < in_progress.length; ++i) { + var op = in_progress[i]; + if (op.query && op.query.moveChunk) { + return op.msg && op.msg.startsWith(searchString); + } + } + + return false; + }, msg); +} + +var migrateStepNames = { + copiedIndexes: 1, + deletedPriorDataInRange: 2, + cloned: 3, + catchup: 4, // About to enter steady state. + steady: 5, + done: 6 +}; + +// +// Configure a failpoint to make migration thread hang at a step (1 through 5). +// +function pauseMigrateAtStep(shardConnection, stepNumber) { + configureMigrateFailPoint(shardConnection, stepNumber, 'alwaysOn'); +} + +// +// Allow _recvChunkStart to proceed past a step. +// +function unpauseMigrateAtStep(shardConnection, stepNumber) { + configureMigrateFailPoint(shardConnection, stepNumber, 'off'); +} + +function proceedToMigrateStep(shardConnection, stepNumber) { + jsTest.log('Migration thread proceeding from step "' + + numberToName(migrateStepNames, stepNumber - 1) + '" to "' + + numberToName(migrateStepNames, stepNumber) + '".'); + + pauseMigrateAtStep(shardConnection, stepNumber); + unpauseMigrateAtStep(shardConnection, stepNumber - 1); + waitForMigrateStep(shardConnection, stepNumber); +} + +function configureMigrateFailPoint(shardConnection, stepNumber, mode) { + assert.between(migrateStepNames.copiedIndexes, + stepNumber, + migrateStepNames.done, + "incorrect stepNumber", + true); + + var admin = shardConnection.getDB('admin'); + admin.runCommand({configureFailPoint: 'migrateThreadHangAtStep' + stepNumber, mode: mode}); +} + +// +// Wait for moveChunk to reach a step (1 through 6). +// +function waitForMigrateStep(shardConnection, stepNumber) { + var searchString = 'step ' + stepNumber, admin = shardConnection.getDB('admin'); + + assert.between(migrateStepNames.copiedIndexes, + stepNumber, + migrateStepNames.done, + "incorrect stepNumber", + true); + + var msg = ('Migrate thread on ' + shardConnection.shardName + ' never reached step "' + + numberToName(migrateStepNames, stepNumber) + '".'); + + assert.soon(function() { + // verbose = True so we see the migration thread. + var in_progress = admin.currentOp(true).inprog; + for (var i = 0; i < in_progress.length; ++i) { + var op = in_progress[i]; + if (op.desc && op.desc === 'migrateThread') { + if (op.hasOwnProperty('msg')) { + return op.msg.startsWith(searchString); + } else { + return false; + } + } + } + + return false; + }, msg); +} diff --git a/jstests/libs/cleanup_orphaned_util.js b/jstests/libs/cleanup_orphaned_util.js new file mode 100644 index 00000000000..cfd69ab128f --- /dev/null +++ b/jstests/libs/cleanup_orphaned_util.js @@ -0,0 +1,131 @@ +// +// Utilities for testing cleanupOrphaned command. +// + +// +// Run cleanupOrphaned on a shard, and assert cleanupOrphaned runs the +// expected number of times before stopping. +// +function cleanupOrphaned(shardConnection, ns, expectedIterations) { + var admin = shardConnection.getDB('admin'), result = admin.runCommand({cleanupOrphaned: ns}), + iterations = 1; + + if (!result.ok) { + printjson(result); + } + assert(result.ok); + while (result.stoppedAtKey) { + result = admin.runCommand({cleanupOrphaned: ns, startingFromKey: result.stoppedAtKey}); + + assert(result.ok); + ++iterations; + } + + assert.eq(iterations, + expectedIterations, + 'Expected to run ' + + 'cleanupOrphaned' + expectedIterations + ' times, but it only ran ' + iterations + + ' times before stoppedAtKey was null.'); +} + +// Shards data from key range, then inserts orphan documents, runs cleanupOrphans +// and makes sure that orphans are removed. +// Pass an options object like: +// { +// shardKey: { a: 1, b: 1 }, +// keyGen: function() { return [{ a: 'foo', b: 1 }, { a: 'bar', b: 2 }]; } +// } +function testCleanupOrphaned(options) { + var st = new ShardingTest({shards: 2, mongos: 2}); + + var mongos = st.s0, admin = mongos.getDB('admin'), + shards = mongos.getCollection('config.shards').find().toArray(), + coll = mongos.getCollection('foo.bar'), + shard0Coll = st.shard0.getCollection(coll.getFullName()), keys = options.keyGen(), + beginning = keys[0], oneQuarter = keys[Math.round(keys.length / 4)], + middle = keys[Math.round(keys.length / 2)], + threeQuarters = keys[Math.round(3 * keys.length / 4)]; + + assert.commandWorked(admin.runCommand({enableSharding: coll.getDB().getName()})); + + printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: shards[0]._id})); + + assert.commandWorked( + admin.runCommand({shardCollection: coll.getFullName(), key: options.shardKey})); + + st.printShardingStatus(); + + jsTest.log('Inserting some regular docs...'); + + assert.commandWorked(admin.runCommand({split: coll.getFullName(), middle: middle})); + + assert.commandWorked(admin.runCommand( + {moveChunk: coll.getFullName(), find: middle, to: shards[1]._id, _waitForDelete: true})); + + for (var i = 0; i < keys.length; i++) + coll.insert(keys[i]); + assert.eq(null, coll.getDB().getLastError()); + + // Half of the data is on each shard: + // shard 0: [beginning, middle) + // shard 1: [middle, end) + // + assert.eq(keys.length / 2, shard0Coll.count()); + assert.eq(keys.length, coll.find().itcount()); + + jsTest.log('Inserting some orphaned docs...'); + + shard0Coll.insert(threeQuarters); + + // I'll represent the orphan doc like {threeQuarters}, in this diagram: + // + // shard 0: [beginning, middle) {threeQuarters} + // shard 1: [middle, end) + assert.eq(null, shard0Coll.getDB().getLastError()); + assert.eq(1 + keys.length / 2, shard0Coll.count()); + + jsTest.log('Cleaning up orphaned data...'); + + cleanupOrphaned(st.shard0, coll.getFullName(), 2); + assert.eq(keys.length / 2, shard0Coll.count()); + assert.eq(keys.length, coll.find().itcount()); + + jsTest.log('Moving half the data out again (making a hole)...'); + + assert.commandWorked(admin.runCommand({split: coll.getFullName(), middle: oneQuarter})); + + assert.commandWorked(admin.runCommand({ + moveChunk: coll.getFullName(), + find: beginning, + to: shards[1]._id, + _waitForDelete: true + })); + + // 1/4 of the data is on the first shard. + // shard 0: [threeQuarters, middle) + // shard 1: [beginning, threeQuarters) [middle, end) + assert.eq(Math.round(keys.length / 4), shard0Coll.count()); + assert.eq(keys.length, coll.find().itcount()); + + jsTest.log('Inserting some more orphaned docs...'); + + shard0Coll.insert(beginning); + shard0Coll.insert(middle); + assert.eq(null, shard0Coll.getDB().getLastError()); + + // shard 0: {beginning} [threeQuarters, middle) {middle} + // shard 1: [beginning, threeQuarters) [middle, end) + assert.eq(2 + Math.round(keys.length / 4), shard0Coll.count()); + assert.eq(100, coll.find().itcount()); + + jsTest.log('Cleaning up more orphaned data...'); + + // Now cleanupOrphaned must iterate over 3 regions, not 2. + cleanupOrphaned(st.shard0, coll.getFullName(), 3); + assert.eq(Math.round(keys.length / 4), shard0Coll.count()); + assert.eq(keys.length, coll.find().itcount()); + + jsTest.log('DONE!'); + + st.stop(); +} diff --git a/jstests/sharding/cleanup_orphaned.js b/jstests/sharding/cleanup_orphaned.js new file mode 100644 index 00000000000..a63991f7a23 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned.js @@ -0,0 +1,17 @@ +// +// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans +// and makes sure that orphans are removed. Uses an _id as a shard key. +// + +load('./jstests/libs/cleanup_orphaned_util.js'); + +testCleanupOrphaned({ + shardKey: {_id: 1}, + keyGen: function() { + var ids = []; + for (var i = -50; i < 50; i++) { + ids.push({_id: i}); + } + return ids; + } +}); diff --git a/jstests/sharding/cleanup_orphaned_auth.js b/jstests/sharding/cleanup_orphaned_auth.js new file mode 100644 index 00000000000..4e9a3e0214c --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_auth.js @@ -0,0 +1,51 @@ +// +// Tests of cleanupOrphaned command permissions. +// + +(function() { + "use strict"; + + function assertUnauthorized(res, msg) { + if (assert._debug && msg) + print("in assert for: " + msg); + + if (res.ok == 0 && res.errmsg.startsWith('not authorized')) + return; + + var finalMsg = "command worked when it should have been unauthorized: " + tojson(res); + if (msg) { + finalMsg += " : " + msg; + } + doassert(finalMsg); + } + + var st = + new ShardingTest({auth: true, keyFile: 'jstests/libs/key1', other: {useHostname: false}}); + + var mongosAdmin = st.s0.getDB('admin'); + var coll = st.s0.getCollection('foo.bar'); + + mongosAdmin.createUser( + {user: 'admin', pwd: 'x', roles: ['clusterAdmin', 'userAdminAnyDatabase']}); + mongosAdmin.auth('admin', 'x'); + + assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()})); + + assert.commandWorked( + mongosAdmin.runCommand({shardCollection: coll.getFullName(), key: {_id: 'hashed'}})); + + // cleanupOrphaned requires auth as admin user. + var shardAdmin = st.shard0.getDB('admin'); + assert.commandWorked(shardAdmin.logout()); + assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'})); + + var fooDB = st.shard0.getDB('foo'); + shardAdmin.auth('admin', 'x'); + fooDB.createUser({user: 'user', pwd: 'x', roles: ['readWrite', 'dbAdmin']}); + shardAdmin.logout(); + fooDB.auth('user', 'x'); + assertUnauthorized(shardAdmin.runCommand({cleanupOrphaned: 'foo.bar'})); + + st.stop(); + +})(); diff --git a/jstests/sharding/cleanup_orphaned_basic.js b/jstests/sharding/cleanup_orphaned_basic.js new file mode 100644 index 00000000000..e03e093fd82 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_basic.js @@ -0,0 +1,113 @@ +// +// Basic tests of cleanupOrphaned. Validates that non allowed uses of the cleanupOrphaned +// command fail. +// + +(function() { + "use strict"; + + /***************************************************************************** + * Unsharded mongod. + ****************************************************************************/ + + // cleanupOrphaned fails against unsharded mongod. + var mongod = MongoRunner.runMongod(); + assert.commandFailed(mongod.getDB('admin').runCommand({cleanupOrphaned: 'foo.bar'})); + + /***************************************************************************** + * Bad invocations of cleanupOrphaned command. + ****************************************************************************/ + + var st = new ShardingTest({other: {rs: true, rsOptions: {nodes: 2}}}); + + var mongos = st.s0; + var mongosAdmin = mongos.getDB('admin'); + var dbName = 'foo'; + var collectionName = 'bar'; + var ns = dbName + '.' + collectionName; + var coll = mongos.getCollection(ns); + + // cleanupOrphaned fails against mongos ('no such command'): it must be run + // on mongod. + assert.commandFailed(mongosAdmin.runCommand({cleanupOrphaned: ns})); + + // cleanupOrphaned must be run on admin DB. + var shardFooDB = st.shard0.getDB(dbName); + assert.commandFailed(shardFooDB.runCommand({cleanupOrphaned: ns})); + + // Must be run on primary. + var secondaryAdmin = st.rs0.getSecondary().getDB('admin'); + var response = secondaryAdmin.runCommand({cleanupOrphaned: ns}); + print('cleanupOrphaned on secondary:'); + printjson(response); + assert.commandFailed(response); + + var shardAdmin = st.shard0.getDB('admin'); + var badNS = ' \\/."*<>:|?'; + assert.commandFailed(shardAdmin.runCommand({cleanupOrphaned: badNS})); + + // cleanupOrphaned works on sharded collection. + assert.commandWorked(mongosAdmin.runCommand({enableSharding: coll.getDB().getName()})); + + st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName); + + assert.commandWorked(mongosAdmin.runCommand({shardCollection: ns, key: {_id: 1}})); + + assert.commandWorked(shardAdmin.runCommand({cleanupOrphaned: ns})); + + /***************************************************************************** + * Empty shard. + ****************************************************************************/ + + // Ping shard[1] so it will be aware that it is sharded. Otherwise cleanupOrphaned + // may fail. + assert.commandWorked(mongosAdmin.runCommand( + {moveChunk: coll.getFullName(), find: {_id: 1}, to: st.shard1.shardName})); + + assert.commandWorked(mongosAdmin.runCommand( + {moveChunk: coll.getFullName(), find: {_id: 1}, to: st.shard0.shardName})); + + // Collection's home is shard0, there are no chunks assigned to shard1. + st.shard1.getCollection(ns).insert({}); + assert.eq(null, st.shard1.getDB(dbName).getLastError()); + assert.eq(1, st.shard1.getCollection(ns).count()); + response = st.shard1.getDB('admin').runCommand({cleanupOrphaned: ns}); + assert.commandWorked(response); + assert.eq({_id: {$maxKey: 1}}, response.stoppedAtKey); + assert.eq(0, + st.shard1.getCollection(ns).count(), + "cleanupOrphaned didn't delete orphan on empty shard."); + + /***************************************************************************** + * Bad startingFromKeys. + ****************************************************************************/ + + // startingFromKey of MaxKey. + response = shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {_id: MaxKey}}); + assert.commandWorked(response); + assert.eq(null, response.stoppedAtKey); + + // startingFromKey doesn't match number of fields in shard key. + assert.commandFailed(shardAdmin.runCommand( + {cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue', someOtherKey: 1}})); + + // startingFromKey matches number of fields in shard key but not field names. + assert.commandFailed( + shardAdmin.runCommand({cleanupOrphaned: ns, startingFromKey: {someKey: 'someValue'}})); + + var coll2 = mongos.getCollection('foo.baz'); + + assert.commandWorked( + mongosAdmin.runCommand({shardCollection: coll2.getFullName(), key: {a: 1, b: 1}})); + + // startingFromKey doesn't match number of fields in shard key. + assert.commandFailed(shardAdmin.runCommand( + {cleanupOrphaned: coll2.getFullName(), startingFromKey: {someKey: 'someValue'}})); + + // startingFromKey matches number of fields in shard key but not field names. + assert.commandFailed(shardAdmin.runCommand( + {cleanupOrphaned: coll2.getFullName(), startingFromKey: {a: 'someValue', c: 1}})); + + st.stop(); + +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js new file mode 100644 index 00000000000..68cb7688981 --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js @@ -0,0 +1,148 @@ +// +// Tests cleanupOrphaned concurrent with moveChunk. +// Inserts orphan documents to the donor and recipient shards during the moveChunk and +// verifies that cleanupOrphaned removes orphans. +// + +load('./jstests/libs/chunk_manipulation_util.js'); +load('./jstests/libs/cleanup_orphaned_util.js'); + +(function() { + "use strict"; + + var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. + var st = new ShardingTest({shards: 2, other: {separateConfig: true}}); + + var mongos = st.s0, admin = mongos.getDB('admin'), dbName = 'foo', ns = dbName + '.bar', + coll = mongos.getCollection(ns), donor = st.shard0, recipient = st.shard1, + donorColl = donor.getCollection(ns), recipientColl = st.shard1.getCollection(ns); + + // Three chunks of 10 documents each, with ids -20, -18, -16, ..., 38. + // Donor: [minKey, 0) [0, 20) + // Recipient: [20, maxKey) + assert.commandWorked(admin.runCommand({enableSharding: dbName})); + printjson(admin.runCommand({movePrimary: dbName, to: st.shard0.shardName})); + assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}})); + assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 0}})); + assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 20}})); + assert.commandWorked(admin.runCommand( + {moveChunk: ns, find: {_id: 20}, to: st.shard1.shardName, _waitForDelete: true})); + + jsTest.log('Inserting 40 docs into shard 0....'); + for (var i = -20; i < 20; i += 2) + coll.insert({_id: i}); + assert.eq(null, coll.getDB().getLastError()); + assert.eq(20, donorColl.count()); + + jsTest.log('Inserting 25 docs into shard 1....'); + for (i = 20; i < 40; i += 2) + coll.insert({_id: i}); + assert.eq(null, coll.getDB().getLastError()); + assert.eq(10, recipientColl.count()); + + // + // Start a moveChunk in the background. Move chunk [0, 20), which has 10 docs, + // from shard 0 to shard 1. Pause it at some points in the donor's and + // recipient's work flows, and test cleanupOrphaned on shard 0 and shard 1. + // + + jsTest.log('setting failpoint startedMoveChunk'); + pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); + pauseMigrateAtStep(recipient, migrateStepNames.cloned); + var joinMoveChunk = moveChunkParallel( + staticMongod, st.s0.host, {_id: 0}, null, coll.getFullName(), st.shard1.shardName); + + waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk); + waitForMigrateStep(recipient, migrateStepNames.cloned); + // Recipient has run _recvChunkStart and begun its migration thread; docs have + // been cloned and chunk [0, 20) is noted as 'pending' on recipient. + + // Donor: [minKey, 0) [0, 20) + // Recipient (pending): [0, 20) + // Recipient: [20, maxKey) + + // Create orphans. I'll show an orphaned doc on donor with _id 26 like {26}: + // + // Donor: [minKey, 0) [0, 20) {26} + // Recipient (pending): [0, 20) + // Recipient: {-1} [20, maxKey) + donorColl.insert([{_id: 26}]); + assert.eq(null, donorColl.getDB().getLastError()); + assert.eq(21, donorColl.count()); + recipientColl.insert([{_id: -1}]); + assert.eq(null, recipientColl.getDB().getLastError()); + assert.eq(21, recipientColl.count()); + + cleanupOrphaned(donor, ns, 2); + assert.eq(20, donorColl.count()); + cleanupOrphaned(recipient, ns, 2); + assert.eq(20, recipientColl.count()); + + jsTest.log('Inserting document on donor side'); + // Inserted a new document (not an orphan) with id 19, which belongs in the + // [0, 20) chunk. + donorColl.insert({_id: 19}); + assert.eq(null, coll.getDB().getLastError()); + assert.eq(21, donorColl.count()); + + // Recipient transfers this modification. + jsTest.log('Let migrate proceed to transferredMods'); + pauseMigrateAtStep(recipient, migrateStepNames.catchup); + unpauseMigrateAtStep(recipient, migrateStepNames.cloned); + waitForMigrateStep(recipient, migrateStepNames.catchup); + jsTest.log('Done letting migrate proceed to transferredMods'); + + assert.eq(21, recipientColl.count(), "Recipient didn't transfer inserted document."); + + cleanupOrphaned(donor, ns, 2); + assert.eq(21, donorColl.count()); + cleanupOrphaned(recipient, ns, 2); + assert.eq(21, recipientColl.count()); + + // Create orphans. + donorColl.insert([{_id: 26}]); + assert.eq(null, donorColl.getDB().getLastError()); + assert.eq(22, donorColl.count()); + recipientColl.insert([{_id: -1}]); + assert.eq(null, recipientColl.getDB().getLastError()); + assert.eq(22, recipientColl.count()); + + cleanupOrphaned(donor, ns, 2); + assert.eq(21, donorColl.count()); + cleanupOrphaned(recipient, ns, 2); + assert.eq(21, recipientColl.count()); + + // Recipient has been waiting for donor to call _recvChunkCommit. + pauseMoveChunkAtStep(donor, moveChunkStepNames.committed); + unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); + proceedToMigrateStep(recipient, migrateStepNames.steady); + proceedToMigrateStep(recipient, migrateStepNames.done); + + // Create orphans. + donorColl.insert([{_id: 26}]); + assert.eq(null, donorColl.getDB().getLastError()); + assert.eq(22, donorColl.count()); + recipientColl.insert([{_id: -1}]); + assert.eq(null, recipientColl.getDB().getLastError()); + assert.eq(22, recipientColl.count()); + + // cleanupOrphaned should still fail on donor, but should work on the recipient + cleanupOrphaned(donor, ns, 2); + assert.eq(10, donorColl.count()); + cleanupOrphaned(recipient, ns, 2); + assert.eq(21, recipientColl.count()); + + // Let migration thread complete. + unpauseMigrateAtStep(recipient, migrateStepNames.done); + unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed); + joinMoveChunk(); + + // Donor has finished post-move delete. + cleanupOrphaned(donor, ns, 2); // this is necessary for the count to not be 11 + assert.eq(10, donorColl.count()); + assert.eq(21, recipientColl.count()); + assert.eq(31, coll.count()); + + st.stop(); + +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js new file mode 100644 index 00000000000..58ea9e806fd --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js @@ -0,0 +1,124 @@ +// +// +// Tests cleanupOrphaned concurrent with moveChunk with a hashed shard key. +// Inserts orphan documents to the donor and recipient shards during the moveChunk and +// verifies that cleanupOrphaned removes orphans. +// + +load('./jstests/libs/chunk_manipulation_util.js'); +load('./jstests/libs/cleanup_orphaned_util.js'); + +(function() { + "use strict"; + + var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. + var st = new ShardingTest({shards: 2, other: {separateConfig: true}}); + + var mongos = st.s0, admin = mongos.getDB('admin'), + shards = mongos.getCollection('config.shards').find().toArray(), dbName = 'foo', + ns = dbName + '.bar', coll = mongos.getCollection(ns); + + assert.commandWorked(admin.runCommand({enableSharding: dbName})); + printjson(admin.runCommand({movePrimary: dbName, to: shards[0]._id})); + assert.commandWorked(admin.runCommand({shardCollection: ns, key: {key: 'hashed'}})); + + // Makes four chunks by default, two on each shard. + var chunks = st.config.chunks.find().sort({min: 1}).toArray(); + assert.eq(4, chunks.length); + + var chunkWithDoc = chunks[1]; + print('Trying to make doc that hashes to this chunk: ' + tojson(chunkWithDoc)); + + var found = false; + for (var i = 0; i < 10000; i++) { + var doc = + { + key: ObjectId() + }, + hash = mongos.adminCommand({_hashBSONElement: doc.key}).out; + + print('doc.key ' + doc.key + ' hashes to ' + hash); + + if (mongos.getCollection('config.chunks') + .findOne( + {_id: chunkWithDoc._id, 'min.key': {$lte: hash}, 'max.key': {$gt: hash}})) { + found = true; + break; + } + } + + assert(found, "Couldn't make doc that belongs to chunk 1."); + print('Doc: ' + tojson(doc)); + coll.insert(doc); + assert.eq(null, coll.getDB().getLastError()); + + // + // Start a moveChunk in the background from shard 0 to shard 1. Pause it at + // some points in the donor's and recipient's work flows, and test + // cleanupOrphaned. + // + + var donor, recip; + if (chunkWithDoc.shard == st.shard0.shardName) { + donor = st.shard0; + recip = st.shard1; + } else { + recip = st.shard0; + donor = st.shard1; + } + + jsTest.log('setting failpoint startedMoveChunk'); + pauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); + pauseMigrateAtStep(recip, migrateStepNames.cloned); + + var joinMoveChunk = moveChunkParallel(staticMongod, + st.s0.host, + null, + [chunkWithDoc.min, chunkWithDoc.max], // bounds + coll.getFullName(), + recip.shardName); + + waitForMoveChunkStep(donor, moveChunkStepNames.startedMoveChunk); + waitForMigrateStep(recip, migrateStepNames.cloned); + proceedToMigrateStep(recip, migrateStepNames.catchup); + // recipient has run _recvChunkStart and begun its migration thread; + // 'doc' has been cloned and chunkWithDoc is noted as 'pending' on recipient. + + var donorColl = donor.getCollection(ns), recipColl = recip.getCollection(ns); + + assert.eq(1, donorColl.count()); + assert.eq(1, recipColl.count()); + + // cleanupOrphaned should go through two iterations, since the default chunk + // setup leaves two unowned ranges on each shard. + cleanupOrphaned(donor, ns, 2); + cleanupOrphaned(recip, ns, 2); + assert.eq(1, donorColl.count()); + assert.eq(1, recipColl.count()); + + // recip has been waiting for donor to call _recvChunkCommit. + pauseMoveChunkAtStep(donor, moveChunkStepNames.committed); + unpauseMoveChunkAtStep(donor, moveChunkStepNames.startedMoveChunk); + proceedToMigrateStep(recip, migrateStepNames.steady); + proceedToMigrateStep(recip, migrateStepNames.done); + + // cleanupOrphaned removes migrated data from donor. The donor would + // otherwise clean them up itself, in the post-move delete phase. + cleanupOrphaned(donor, ns, 2); + assert.eq(0, donorColl.count()); + cleanupOrphaned(recip, ns, 2); + assert.eq(1, recipColl.count()); + + // Let migration thread complete. + unpauseMigrateAtStep(recip, migrateStepNames.done); + unpauseMoveChunkAtStep(donor, moveChunkStepNames.committed); + joinMoveChunk(); + + // donor has finished post-move delete. + assert.eq(0, donorColl.count()); + assert.eq(1, recipColl.count()); + assert.eq(1, coll.count()); + + st.stop(); + +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_hashed.js b/jstests/sharding/cleanup_orphaned_cmd_hashed.js index 52cc1657c1d..a4f9cfb25eb 100644 --- a/jstests/sharding/cleanup_orphaned_cmd_hashed.js +++ b/jstests/sharding/cleanup_orphaned_cmd_hashed.js @@ -1,71 +1,78 @@ // // Tests cleanup of orphaned data in hashed sharded coll via the orphaned data cleanup command -// @tags : [ hashed ] // -var options = { separateConfig : true, shardOptions : { verbose : 2 } }; - -var st = new ShardingTest({ shards : 2, mongos : 1, other : options }); -st.stopBalancer(); - -var mongos = st.s0; -var admin = mongos.getDB( "admin" ); -var shards = mongos.getCollection( "config.shards" ).find().toArray(); -var coll = mongos.getCollection( "foo.bar" ); - -assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok ); -printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) ); -assert( admin.runCommand({ shardCollection : coll + "", key : { _id : "hashed" } }).ok ); - -// Create two orphaned data holes, one bounded by min or max on each shard - -assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-100) } }).ok ); -assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(-50) } }).ok ); -assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(50) } }).ok ); -assert( admin.runCommand({ split : coll + "", middle : { _id : NumberLong(100) } }).ok ); -assert( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(-100) }, - { _id : NumberLong(-50) }], - to : shards[1]._id, - _waitForDelete : true }).ok ); -assert( admin.runCommand({ moveChunk : coll + "", bounds : [{ _id : NumberLong(50) }, - { _id : NumberLong(100) }], - to : shards[0]._id, - _waitForDelete : true }).ok ); -st.printShardingStatus(); - -jsTest.log( "Inserting some docs on each shard, so 1/2 will be orphaned..." ); - -for ( var s = 0; s < 2; s++ ) { - var shardColl = ( s == 0 ? st.shard0 : st.shard1 ).getCollection( coll + "" ); - var bulk = shardColl.initializeUnorderedBulkOp(); - for ( var i = 0; i < 100; i++ ) bulk.insert({ _id : i }); - assert.writeOK(bulk.execute()); -} - -assert.eq( 200, st.shard0.getCollection( coll + "" ).find().itcount() + - st.shard1.getCollection( coll + "" ).find().itcount() ); -assert.eq( 100, coll.find().itcount() ); - -jsTest.log( "Cleaning up orphaned data in hashed coll..." ); - -for ( var s = 0; s < 2; s++ ) { - var shardAdmin = ( s == 0 ? st.shard0 : st.shard1 ).getDB( "admin" ); - - var result = shardAdmin.runCommand({ cleanupOrphaned : coll + "" }); - while ( result.ok && result.stoppedAtKey ) { - printjson( result ); - result = shardAdmin.runCommand({ cleanupOrphaned : coll + "", - startingFromKey : result.stoppedAtKey }); +(function() { + "use strict"; + + var st = new ShardingTest({shards: 2, mongos: 1}); + + var mongos = st.s0; + var admin = mongos.getDB("admin"); + var coll = mongos.getCollection("foo.bar"); + + assert.commandWorked(admin.runCommand({enableSharding: coll.getDB() + ""})); + printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: st.shard0.shardName})); + assert.commandWorked(admin.runCommand({shardCollection: coll + "", key: {_id: "hashed"}})); + + // Create two orphaned data holes, one bounded by min or max on each shard + + assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-100)}})); + assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(-50)}})); + assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(50)}})); + assert.commandWorked(admin.runCommand({split: coll + "", middle: {_id: NumberLong(100)}})); + assert.commandWorked(admin.runCommand({ + moveChunk: coll + "", + bounds: [{_id: NumberLong(-100)}, {_id: NumberLong(-50)}], + to: st.shard1.shardName, + _waitForDelete: true + })); + assert.commandWorked(admin.runCommand({ + moveChunk: coll + "", + bounds: [{_id: NumberLong(50)}, {_id: NumberLong(100)}], + to: st.shard0.shardName, + _waitForDelete: true + })); + st.printShardingStatus(); + + jsTest.log("Inserting some docs on each shard, so 1/2 will be orphaned..."); + + for (var s = 0; s < 2; s++) { + var shardColl = (s == 0 ? st.shard0 : st.shard1).getCollection(coll + ""); + var bulk = shardColl.initializeUnorderedBulkOp(); + for (var i = 0; i < 100; i++) + bulk.insert({_id: i}); + assert.writeOK(bulk.execute()); } - - printjson( result ); - assert( result.ok ); -} -assert.eq( 100, st.shard0.getCollection( coll + "" ).find().itcount() + - st.shard1.getCollection( coll + "" ).find().itcount() ); -assert.eq( 100, coll.find().itcount() ); + assert.eq(200, + st.shard0.getCollection(coll + "").find().itcount() + + st.shard1.getCollection(coll + "").find().itcount()); + assert.eq(100, coll.find().itcount()); + + jsTest.log("Cleaning up orphaned data in hashed coll..."); + + for (var s = 0; s < 2; s++) { + var shardAdmin = (s == 0 ? st.shard0 : st.shard1).getDB("admin"); + + var result = shardAdmin.runCommand({cleanupOrphaned: coll + ""}); + while (result.ok && result.stoppedAtKey) { + printjson(result); + result = shardAdmin.runCommand( + {cleanupOrphaned: coll + "", startingFromKey: result.stoppedAtKey}); + } + + printjson(result); + assert(result.ok); + } + + assert.eq(100, + st.shard0.getCollection(coll + "").find().itcount() + + st.shard1.getCollection(coll + "").find().itcount()); + assert.eq(100, coll.find().itcount()); + + jsTest.log("DONE!"); -jsTest.log( "DONE!" ); + st.stop(); -st.stop(); +})(); diff --git a/jstests/sharding/cleanup_orphaned_cmd_prereload.js b/jstests/sharding/cleanup_orphaned_cmd_prereload.js index 7c3170268f3..7155baea970 100644 --- a/jstests/sharding/cleanup_orphaned_cmd_prereload.js +++ b/jstests/sharding/cleanup_orphaned_cmd_prereload.js @@ -2,97 +2,90 @@ // Tests failed cleanup of orphaned data when we have pending chunks // -var options = { separateConfig : true, shardOptions : { verbose : 2 } }; - -var st = new ShardingTest({ shards : 2, mongos : 2, other : options }); -st.stopBalancer(); +var st = new ShardingTest({shards: 2}); var mongos = st.s0; -var admin = mongos.getDB( "admin" ); -var shards = mongos.getCollection( "config.shards" ).find().toArray(); -var coll = mongos.getCollection( "foo.bar" ); +var admin = mongos.getDB("admin"); +var shards = mongos.getCollection("config.shards").find().toArray(); +var coll = mongos.getCollection("foo.bar"); -assert( admin.runCommand({ enableSharding : coll.getDB() + "" }).ok ); -printjson( admin.runCommand({ movePrimary : coll.getDB() + "", to : shards[0]._id }) ); -assert( admin.runCommand({ shardCollection : coll + "", key : { _id : 1 } }).ok ); +assert(admin.runCommand({enableSharding: coll.getDB() + ""}).ok); +printjson(admin.runCommand({movePrimary: coll.getDB() + "", to: shards[0]._id})); +assert(admin.runCommand({shardCollection: coll + "", key: {_id: 1}}).ok); -jsTest.log( "Moving some chunks to shard1..." ); +jsTest.log("Moving some chunks to shard1..."); -assert( admin.runCommand({ split : coll + "", middle : { _id : 0 } }).ok ); -assert( admin.runCommand({ split : coll + "", middle : { _id : 1 } }).ok ); +assert(admin.runCommand({split: coll + "", middle: {_id: 0}}).ok); +assert(admin.runCommand({split: coll + "", middle: {_id: 1}}).ok); -assert( admin.runCommand({ moveChunk : coll + "", - find : { _id : 0 }, - to : shards[1]._id, - _waitForDelete : true }).ok ); -assert( admin.runCommand({ moveChunk : coll + "", - find : { _id : 1 }, - to : shards[1]._id, - _waitForDelete : true }).ok ); +assert(admin.runCommand( + {moveChunk: coll + "", find: {_id: 0}, to: shards[1]._id, _waitForDelete: true}) + .ok); +assert(admin.runCommand( + {moveChunk: coll + "", find: {_id: 1}, to: shards[1]._id, _waitForDelete: true}) + .ok); -var metadata = st.shard1.getDB( "admin" ) - .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata; +var metadata = + st.shard1.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; -printjson( metadata ); +printjson(metadata); -assert.eq( metadata.pending[0][0]._id, 1 ); -assert.eq( metadata.pending[0][1]._id, MaxKey ); +assert.eq(metadata.pending[0][0]._id, 1); +assert.eq(metadata.pending[0][1]._id, MaxKey); -jsTest.log( "Ensuring we won't remove orphaned data in pending chunk..." ); +jsTest.log("Ensuring we won't remove orphaned data in pending chunk..."); -assert( !st.shard1.getDB( "admin" ) - .runCommand({ cleanupOrphaned : coll + "", startingFromKey : { _id : 1 } }).stoppedAtKey ); +assert(!st.shard1.getDB("admin") + .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}}) + .stoppedAtKey); -jsTest.log( "Moving some chunks back to shard0 after empty..." ); +jsTest.log("Moving some chunks back to shard0 after empty..."); -admin.runCommand({ moveChunk : coll + "", - find : { _id : -1 }, - to : shards[1]._id, - _waitForDelete : true }); +admin.runCommand({moveChunk: coll + "", find: {_id: -1}, to: shards[1]._id, _waitForDelete: true}); -var metadata = st.shard0.getDB( "admin" ) - .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata; +var metadata = + st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; -printjson( metadata ); +printjson(metadata); -assert.eq( metadata.shardVersion.t, 0 ); -assert.neq( metadata.collVersion.t, 0 ); -assert.eq( metadata.pending.length, 0 ); +assert.eq(metadata.shardVersion.t, 0); +assert.neq(metadata.collVersion.t, 0); +assert.eq(metadata.pending.length, 0); -assert( admin.runCommand({ moveChunk : coll + "", - find : { _id : 1 }, - to : shards[0]._id, - _waitForDelete : true }).ok ); +assert(admin.runCommand( + {moveChunk: coll + "", find: {_id: 1}, to: shards[0]._id, _waitForDelete: true}) + .ok); -var metadata = st.shard0.getDB( "admin" ) - .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata; +var metadata = + st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; -printjson( metadata ); -assert.eq( metadata.shardVersion.t, 0 ); -assert.neq( metadata.collVersion.t, 0 ); -assert.eq( metadata.pending[0][0]._id, 1 ); -assert.eq( metadata.pending[0][1]._id, MaxKey ); +printjson(metadata); +assert.eq(metadata.shardVersion.t, 0); +assert.neq(metadata.collVersion.t, 0); +assert.eq(metadata.pending[0][0]._id, 1); +assert.eq(metadata.pending[0][1]._id, MaxKey); -jsTest.log( "Ensuring again we won't remove orphaned data in pending chunk..." ); +jsTest.log("Ensuring again we won't remove orphaned data in pending chunk..."); -assert( !st.shard0.getDB( "admin" ) - .runCommand({ cleanupOrphaned : coll + "", startingFromKey : { _id : 1 } }).stoppedAtKey ); +assert(!st.shard0.getDB("admin") + .runCommand({cleanupOrphaned: coll + "", startingFromKey: {_id: 1}}) + .stoppedAtKey); -jsTest.log( "Checking that pending chunk is promoted on reload..." ); +jsTest.log("Checking that pending chunk is promoted on reload..."); -assert.eq( null, coll.findOne({ _id : 1 }) ); +assert.eq(null, coll.findOne({_id: 1})); -var metadata = st.shard0.getDB( "admin" ) - .runCommand({ getShardVersion : coll + "", fullMetadata : true }).metadata; +var metadata = + st.shard0.getDB("admin").runCommand({getShardVersion: coll + "", fullMetadata: true}).metadata; -printjson( metadata ); -assert.neq( metadata.shardVersion.t, 0 ); -assert.neq( metadata.collVersion.t, 0 ); -assert.eq( metadata.chunks[0][0]._id, 1 ); -assert.eq( metadata.chunks[0][1]._id, MaxKey ); +printjson(metadata); +assert.neq(metadata.shardVersion.t, 0); +assert.neq(metadata.collVersion.t, 0); +assert.eq(metadata.chunks[0][0]._id, 1); +assert.eq(metadata.chunks[0][1]._id, MaxKey); st.printShardingStatus(); -jsTest.log( "DONE!" ); +jsTest.log("DONE!"); st.stop(); diff --git a/jstests/sharding/cleanup_orphaned_compound.js b/jstests/sharding/cleanup_orphaned_compound.js new file mode 100644 index 00000000000..ebf7163c77d --- /dev/null +++ b/jstests/sharding/cleanup_orphaned_compound.js @@ -0,0 +1,18 @@ +// +// Shards data from the key range, then inserts orphan documents, runs cleanupOrphans +// and makes sure that orphans are removed. Uses a compound shard key. +// + +load('./jstests/libs/cleanup_orphaned_util.js'); + +testCleanupOrphaned({ + shardKey: {a: 1, b: 1}, + keyGen: function() { + var ids = []; + for (var i = -50; i < 50; i++) { + ids.push({a: i, b: Math.random()}); + } + + return ids; + } +}); diff --git a/jstests/sharding/migration_sets_fromMigrate_flag.js b/jstests/sharding/migration_sets_fromMigrate_flag.js new file mode 100644 index 00000000000..55dbca8b5fa --- /dev/null +++ b/jstests/sharding/migration_sets_fromMigrate_flag.js @@ -0,0 +1,167 @@ +// +// Tests whether the fromMigrate flag is correctly set during migrations. +// +// Tests: +// #1 (delete op) fromMigrate is set when recipient shard deletes all documents locally +// in the chunk range it is about to receive from the donor shard. +// #2 (delete op) fromMigrate is set when the donor shard deletes documents that have +// been migrated to another shard. +// #3 (insert op) fromMigrate is set when the recipient shard receives chunk migration +// data and inserts it. +// #4 (update op) fromMigrate is set when an update occurs in the donor shard during +// migration and is sent to the recipient via the transfer logs. +// #5 fromMigrate is NOT set on donor shard and IS set on the recipient shard when real +// delete op is done during chunk migration within the chunk range. +// + +load('./jstests/libs/chunk_manipulation_util.js'); + +(function() { + "use strict"; + + var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. + + /** + * Start up new sharded cluster, stop balancer that would interfere in manual chunk management. + */ + + var st = new ShardingTest({shards: 2, mongos: 1, rs: {nodes: 3}}); + st.stopBalancer(); + + var mongos = st.s0, admin = mongos.getDB('admin'), + shards = mongos.getCollection('config.shards').find().toArray(), dbName = "testDB", + ns = dbName + ".foo", coll = mongos.getCollection(ns), donor = st.shard0, + recipient = st.shard1, donorColl = donor.getCollection(ns), + recipientColl = recipient.getCollection(ns), donorLocal = donor.getDB('local'), + recipientLocal = recipient.getDB('local'); + + // Two chunks + // Donor: [0, 2) [2, 5) + // Recipient: + jsTest.log('Enable sharding of the collection and pre-split into two chunks....'); + + assert.commandWorked(admin.runCommand({enableSharding: dbName})); + st.ensurePrimaryShard(dbName, shards[0]._id); + assert.commandWorked(donorColl.createIndex({_id: 1})); + assert.commandWorked(admin.runCommand({shardCollection: ns, key: {_id: 1}})); + assert.commandWorked(admin.runCommand({split: ns, middle: {_id: 2}})); + + // 6 documents, + // donor: 2 in the first chunk, 3 in the second. + // recipient: 1 document (shardkey overlaps with a doc in second chunk of donor shard) + jsTest.log('Inserting 5 docs into donor shard, 1 doc into the recipient shard....'); + + for (var i = 0; i < 5; ++i) + assert.writeOK(coll.insert({_id: i})); + assert.eq(5, donorColl.count()); + + for (var i = 2; i < 3; ++i) + assert.writeOK(recipientColl.insert({_id: i})); + assert.eq(1, recipientColl.count()); + + /** + * Set failpoint: recipient will pause migration after cloning chunk data from donor, + * before checking transfer mods log on donor. + */ + + jsTest.log('setting recipient failpoint cloned'); + pauseMigrateAtStep(recipient, migrateStepNames.cloned); + + /** + * Start moving chunk [2, 5) from donor shard to recipient shard, run in the background. + */ + + // Donor: [0, 2) + // Recipient: [2, 5) + jsTest.log('Starting chunk migration, pause after cloning...'); + + var joinMoveChunk = moveChunkParallel( + staticMongod, st.s0.host, {_id: 2}, null, coll.getFullName(), shards[1]._id); + + /** + * Wait for recipient to finish cloning. + * THEN update 1 document {_id: 3} on donor within the currently migrating chunk. + * AND delete 1 document {_id: 4} on donor within the currently migrating chunk. + */ + + waitForMigrateStep(recipient, migrateStepNames.cloned); + + jsTest.log('Update 1 doc and delete 1 doc on donor within the currently migrating chunk...'); + + assert.writeOK(coll.update({_id: 3}, {_id: 3, a: "updated doc"})); + assert.writeOK(coll.remove({_id: 4})); + + /** + * Finish migration. Unpause recipient migration, wait for it to collect + * the transfer mods log from donor and finish migration. + */ + + jsTest.log('Continuing and finishing migration...'); + unpauseMigrateAtStep(recipient, migrateStepNames.cloned); + joinMoveChunk(); + + /** + * Check documents are where they should be: 2 docs in donor chunk, 2 docs in recipient chunk + * (because third doc in recipient shard's chunk got deleted on the donor shard during + * migration). + */ + + jsTest.log('Checking that documents are on the shards they should be...'); + + assert.eq(2, recipientColl.count(), "Recipient shard doesn't have exactly 2 documents!"); + assert.eq(2, donorColl.count(), "Donor shard doesn't have exactly 2 documents!"); + assert.eq(4, coll.count(), "Collection total is not 4!"); + + /** + * Check that the fromMigrate flag has been set correctly in donor and recipient oplogs, + */ + + jsTest.log('Checking donor and recipient oplogs for correct fromMigrate flags...'); + + var donorOplogRes = donorLocal.oplog.rs.find({op: 'd', fromMigrate: true, 'o._id': 2}).count(); + assert.eq(1, + donorOplogRes, + "fromMigrate flag wasn't set on the donor shard's oplog for " + + "migrating delete op on {_id: 2}! Test #2 failed."); + + donorOplogRes = + donorLocal.oplog.rs.find({op: 'd', fromMigrate: {$exists: false}, 'o._id': 4}).count(); + assert.eq(1, + donorOplogRes, + "Real delete of {_id: 4} on donor shard incorrectly set the " + + "fromMigrate flag in the oplog! Test #5 failed."); + + var recipientOplogRes = + recipientLocal.oplog.rs.find({op: 'i', fromMigrate: true, 'o._id': 2}).count(); + assert.eq(1, + recipientOplogRes, + "fromMigrate flag wasn't set on the recipient shard's " + + "oplog for migrating insert op on {_id: 2}! Test #3 failed."); + + recipientOplogRes = + recipientLocal.oplog.rs.find({op: 'd', fromMigrate: true, 'o._id': 2}).count(); + assert.eq(1, + recipientOplogRes, + "fromMigrate flag wasn't set on the recipient shard's " + + "oplog for delete op on the old {_id: 2} that overlapped " + + "with the chunk about to be copied! Test #1 failed."); + + recipientOplogRes = + recipientLocal.oplog.rs.find({op: 'u', fromMigrate: true, 'o._id': 3}).count(); + assert.eq(1, + recipientOplogRes, + "fromMigrate flag wasn't set on the recipient shard's " + + "oplog for update op on {_id: 3}! Test #4 failed."); + + recipientOplogRes = + recipientLocal.oplog.rs.find({op: 'd', fromMigrate: true, 'o._id': 4}).count(); + assert.eq(1, + recipientOplogRes, + "fromMigrate flag wasn't set on the recipient shard's " + + "oplog for delete op on {_id: 4} that occurred during " + + "migration! Test #5 failed."); + + jsTest.log('DONE!'); + st.stop(); + +})(); diff --git a/jstests/sharding/migration_with_source_ops.js b/jstests/sharding/migration_with_source_ops.js new file mode 100644 index 00000000000..31b6fff75e9 --- /dev/null +++ b/jstests/sharding/migration_with_source_ops.js @@ -0,0 +1,146 @@ +// +// Tests during chunk migration that the recipient does not receive out of range operations from +// the donor. +// +// Pauses the migration on the recipient shard after the initial data chunk cloning is finished. +// This allows time for the donor shard to perform inserts/deletes/updates, half of which are on +// the migrating chunk. The recipient is then set to continue, collecting the mods from the +// donor's transfer mods log, and finishes the migration. A failpoint is set prior to resuming +// in the recipient shard to fail if it receives an out of chunk range insert/delete/update from +// the donor's transfer mods log. +// +// The idea is that the recipient shard should not be collecting inserts/deletes/updates from +// the donor shard's transfer mods log that are not in range and will unnecessarily prevent the +// migration from finishing: the migration can only end when donor's log of mods for the migrating +// chunk is empty. +// + +load('./jstests/libs/chunk_manipulation_util.js'); + +(function() { + "use strict"; + + var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. + + /** + * Start up new sharded cluster, stop balancer that would interfere in manual chunk management. + */ + + var st = new ShardingTest({shards: 2, mongos: 1}); + st.stopBalancer(); + + var mongos = st.s0, admin = mongos.getDB('admin'), + shards = mongos.getCollection('config.shards').find().toArray(), dbName = "testDB", + ns = dbName + ".foo", coll = mongos.getCollection(ns), donor = st.shard0, + recipient = st.shard1, donorColl = donor.getCollection(ns), + recipientColl = recipient.getCollection(ns); + + /** + * Exable sharding, and split collection into two chunks. + */ + + // Two chunks + // Donor: [0, 20) [20, 40) + // Recipient: + jsTest.log('Enabling sharding of the collection and pre-splitting into two chunks....'); + assert.commandWorked(admin.runCommand({enableSharding: dbName})); + st.ensurePrimaryShard(dbName, shards[0]._id); + assert.commandWorked(admin.runCommand({shardCollection: ns, key: {a: 1}})); + assert.commandWorked(admin.runCommand({split: ns, middle: {a: 20}})); + + /** + * Insert data into collection + */ + + // 10 documents in each chunk on the donor + jsTest.log('Inserting 20 docs into donor shard, 10 in each chunk....'); + for (var i = 0; i < 10; ++i) + assert.writeOK(coll.insert({a: i})); + for (var i = 20; i < 30; ++i) + assert.writeOK(coll.insert({a: i})); + assert.eq(20, coll.count()); + + /** + * Set failpoints. Recipient will crash if an out of chunk range data op is + * received from donor. Recipient will pause migration after cloning chunk data from donor, + * before checking transfer mods log on donor. + */ + + jsTest.log('Setting failpoint failMigrationReceivedOutOfRangeOperation'); + assert.commandWorked(recipient.getDB('admin').runCommand( + {configureFailPoint: 'failMigrationReceivedOutOfRangeOperation', mode: 'alwaysOn'})); + + jsTest.log( + 'Setting chunk migration recipient failpoint so that it pauses after bulk clone step'); + pauseMigrateAtStep(recipient, migrateStepNames.cloned); + + /** + * Start a moveChunk in the background. Move chunk [20, 40), which has 10 docs in the + * range, from shard 0 (donor) to shard 1 (recipient). Migration will pause after + * cloning step (when it reaches the recipient failpoint). + */ + + // Donor: [0, 20) + // Recipient: [20, 40) + jsTest.log('Starting migration, pause after cloning...'); + var joinMoveChunk = moveChunkParallel( + staticMongod, st.s0.host, {a: 20}, null, coll.getFullName(), shards[1]._id); + + /** + * Wait for recipient to finish cloning step. + * THEN delete 10 documents on the donor shard, 5 in the migrating chunk and 5 in the remaining + *chunk. + * AND insert 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining + *chunk. + * AND update 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining + *chunk. + * + * This will populate the migration transfer mods log, which the recipient will collect when it + * is unpaused. + */ + + waitForMigrateStep(recipient, migrateStepNames.cloned); + + jsTest.log('Deleting 5 docs from each chunk, migrating chunk and remaining chunk...'); + assert.writeOK(coll.remove({$and: [{a: {$gte: 5}}, {a: {$lt: 25}}]})); + + jsTest.log('Inserting 1 in the migrating chunk range and 1 in the remaining chunk range...'); + assert.writeOK(coll.insert({a: 10})); + assert.writeOK(coll.insert({a: 30})); + + jsTest.log('Updating 1 in the migrating chunk range and 1 in the remaining chunk range...'); + assert.writeOK(coll.update({a: 0}, {a: 0, updatedData: "updated"})); + assert.writeOK(coll.update({a: 25}, {a: 25, updatedData: "updated"})); + + /** + * Finish migration. Unpause recipient migration, wait for it to collect + * the new ops from the donor shard's migration transfer mods log, and finish. + */ + + jsTest.log('Continuing and finishing migration...'); + unpauseMigrateAtStep(recipient, migrateStepNames.cloned); + joinMoveChunk(); + + /** + * Check documents are where they should be: 6 docs in each shard's respective chunk. + */ + + jsTest.log('Checking that documents are on the shards they should be...'); + assert.eq(6, donorColl.count()); + assert.eq(6, recipientColl.count()); + assert.eq(12, coll.count()); + + /** + * Check that the updated documents are where they should be, one on each shard. + */ + + jsTest.log('Checking that documents were updated correctly...'); + var donorCollUpdatedNum = donorColl.find({updatedData: "updated"}).count(); + assert.eq(1, donorCollUpdatedNum, "Update failed on donor shard during migration!"); + var recipientCollUpdatedNum = recipientColl.find({updatedData: "updated"}).count(); + assert.eq(1, recipientCollUpdatedNum, "Update failed on recipient shard during migration!"); + + jsTest.log('DONE!'); + st.stop(); + +})(); diff --git a/src/mongo/s/d_migrate.cpp b/src/mongo/s/d_migrate.cpp index fdcd48f0ef6..5fa87e71784 100644 --- a/src/mongo/s/d_migrate.cpp +++ b/src/mongo/s/d_migrate.cpp @@ -1827,6 +1827,9 @@ MONGO_FP_DECLARE(migrateThreadHangAtStep2); MONGO_FP_DECLARE(migrateThreadHangAtStep3); MONGO_FP_DECLARE(migrateThreadHangAtStep4); MONGO_FP_DECLARE(migrateThreadHangAtStep5); +MONGO_FP_DECLARE(migrateThreadHangAtStep6); + +MONGO_FP_DECLARE(failMigrationReceivedOutOfRangeOperation); class MigrateStatus { public: @@ -1948,7 +1951,7 @@ public: << epoch.toString() << endl; string errmsg; - MoveTimingHelper timing(txn, "to", ns, min, max, 5 /* steps */, &errmsg, "", ""); + MoveTimingHelper timing(txn, "to", ns, min, max, 6 /* steps */, &errmsg, "", ""); ScopedDbConnection conn(fromShard); conn->getLastError(); // just test connection @@ -2358,6 +2361,8 @@ public: } setState(DONE); + timing.done(6); + MONGO_FP_PAUSE_WHILE(migrateThreadHangAtStep6); conn.done(); } @@ -2415,8 +2420,11 @@ public: BSONObj fullObj; if (Helpers::findById(txn, ctx.db(), ns.c_str(), id, fullObj)) { if (!isInRange(fullObj, min, max, shardKeyPattern)) { - log() << "not applying out of range deletion: " << fullObj << migrateLog; + if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation)) { + invariant(false); + } + log() << "not applying out of range deletion: " << fullObj << migrateLog; continue; } } @@ -2447,6 +2455,11 @@ public: BSONObj updatedDoc = i.next().Obj(); + if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation) && + !isInRange(updatedDoc, min, max, shardKeyPattern)) { + invariant(false); + } + BSONObj localDoc; if (willOverrideLocalId( txn, ns, min, max, shardKeyPattern, cx.ctx().db(), updatedDoc, &localDoc)) { |