diff options
author | Dianna Hohensee <dianna.hohensee@10gen.com> | 2015-12-23 14:29:21 -0500 |
---|---|---|
committer | Dianna Hohensee <dianna.hohensee@10gen.com> | 2016-01-20 08:32:45 -0500 |
commit | 8bef9a14e742978df4c5db15968b796e7d63f1f5 (patch) | |
tree | 4ebd856b95d5851e8916696a920497e67c5e4aaf | |
parent | f6716ef64ff42a71aa351dc4d10aebbbf893bb6c (diff) | |
download | mongo-8bef9a14e742978df4c5db15968b796e7d63f1f5.tar.gz |
SERVER-21945 adding testing coverage for inserts/updates during chunk migration
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_auth.yml | 2 | ||||
-rw-r--r-- | buildscripts/resmokeconfig/suites/sharding_auth_audit.yml | 2 | ||||
-rw-r--r-- | jstests/sharding/migration_with_source_deletes.js | 123 | ||||
-rw-r--r-- | jstests/sharding/migration_with_source_ops.js | 152 | ||||
-rw-r--r-- | src/mongo/db/s/migration_destination_manager.cpp | 18 | ||||
-rw-r--r-- | src/mongo/db/s/migration_source_manager.h | 2 |
6 files changed, 168 insertions, 131 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth.yml b/buildscripts/resmokeconfig/suites/sharding_auth.yml index a114442e28a..d2474b28526 100644 --- a/buildscripts/resmokeconfig/suites/sharding_auth.yml +++ b/buildscripts/resmokeconfig/suites/sharding_auth.yml @@ -21,7 +21,7 @@ selector: # Skip the testcases that do not have auth bypass when running ops in parallel. - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713 - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713 - - jstests/sharding/migration_with_source_deletes.js # SERVER-21713 + - jstests/sharding/migration_with_source_ops.js # SERVER-21713 - jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713 executor: diff --git a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml index b8845773d5c..9982507a546 100644 --- a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml +++ b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml @@ -21,7 +21,7 @@ selector: # Skip the testcases that do not have auth bypass when running ops in parallel. - jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713 - jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713 - - jstests/sharding/migration_with_source_deletes.js # SERVER-21713 + - jstests/sharding/migration_with_source_ops.js # SERVER-21713 - jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713 executor: diff --git a/jstests/sharding/migration_with_source_deletes.js b/jstests/sharding/migration_with_source_deletes.js deleted file mode 100644 index afc00e513e2..00000000000 --- a/jstests/sharding/migration_with_source_deletes.js +++ /dev/null @@ -1,123 +0,0 @@ -// -// Tests during chunk migration that the recipient does not receive out of range deletes from -// the donor. -// -// Pauses the migration on the recipient shard after the initial data chunk cloning is finished. -// This allows time for the donor shard to perform deletes, half of which are on the migrating -// chunk. The recipient is then set to continue, collecting the delete mods from the donor, and -// finishes the migration. A failpoint is set prior to resuming in the recipient shard to fail -// if it receives an out of chunk range delete from the donor's delete mods log. -// -// The idea is that the recipient shard should not be collecting deletes from the donor shard -// that are not in range and that will unnecessarily prevent the migration from finishing: the -// migration can only end when donor's log of delete mods for the migrating chunk is empty. -// - -load('./jstests/libs/chunk_manipulation_util.js'); - -(function() { -"use strict"; - -var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. - -/** - * Start up new sharded cluster, stop balancer that would interfere in manual chunk management. - */ - -var st = new ShardingTest({ shards : 2, mongos : 1 }); -st.stopBalancer(); - -var mongos = st.s0, - admin = mongos.getDB('admin'), - shards = mongos.getCollection('config.shards').find().toArray(), - dbName = "testDB", - ns = dbName + ".foo", - coll = mongos.getCollection(ns), - donor = st.shard0, - recipient = st.shard1, - donorColl = donor.getCollection(ns), - recipientColl = recipient.getCollection(ns); - -/** - * Exable sharding, and split collection into two chunks. - */ - -// Two chunks -// Donor: [0, 10) [10, 20) -// Recipient: -jsTest.log('Enable sharding of the collection and pre-split into two chunks....'); -assert.commandWorked( admin.runCommand( {enableSharding: dbName} ) ); -st.ensurePrimaryShard(dbName, shards[0]._id); -assert.commandWorked( admin.runCommand( {shardCollection: ns, key: {a: 1}} ) ); -assert.commandWorked( admin.runCommand( {split: ns, middle: {a: 10}} ) ); - -/** - * Insert data into collection - */ - -// 10 documents in each chunk on the donor -jsTest.log('Inserting 20 docs into donor shard, 10 in each chunk....'); -for (var i = 0; i < 20; ++i) donorColl.insert( {a: i} ); -assert.eq(null, donorColl.getDB().getLastError()); -assert.eq(20, donorColl.count()); - -/** - * Set failpoints. Recipient will crash if an out of chunk range data delete is - * received from donor. Recipient will pause migration after cloning chunk data from donor, - * before checking delete mods log on donor. - */ - -jsTest.log('setting failpoint failMigrationReceivedOutOfRangeDelete'); -assert.commandWorked(recipient.getDB('admin').runCommand( {configureFailPoint: 'failMigrationReceivedOutOfRangeDelete', mode: 'alwaysOn'} )) - -jsTest.log('setting recipient failpoint cloned'); -pauseMigrateAtStep(recipient, migrateStepNames.cloned); - -/** - * Start a moveChunk in the background. Move chunk [10, 20), which has 10 docs, - * from shard 0 (donor) to shard 1 (recipient). Migration will pause after cloning - * (when it reaches the recipient failpoint). - */ - -// Donor: [0, 10) -// Recipient: [10, 20) -jsTest.log('starting migration, pause after cloning...'); -var joinMoveChunk = moveChunkParallel( - staticMongod, - st.s0.host, - {a: 10}, - null, - coll.getFullName(), - shards[1]._id); - -/** - * Wait for recipient to finish cloning. - * THEN delete 10 documents on donor, 5 in the migrating chunk and the 5 in the remaining chunk. - */ - -jsTest.log('Delete 5 docs from each chunk, migrating chunk and remaining chunk...'); -waitForMigrateStep(recipient, migrateStepNames.cloned); -donorColl.remove( {$and : [ {a: {$gte: 5}}, {a: {$lt: 15}} ]} ); - -/** - * Finish migration. Unpause recipient migration, wait for it to collect - * the delete diffs from donor and finish. - */ - -jsTest.log('Continuing and finishing migration...'); -unpauseMigrateAtStep(recipient, migrateStepNames.cloned); -joinMoveChunk(); - -/** - * Check documents are where they should be: 5 docs in each shard's chunk. - */ - -jsTest.log('Checking that documents are on the shards they should be...'); -assert.eq(5, donorColl.count()); -assert.eq(5, recipientColl.count()); -assert.eq(10, coll.count()); - -jsTest.log('DONE!'); -st.stop(); - -})() diff --git a/jstests/sharding/migration_with_source_ops.js b/jstests/sharding/migration_with_source_ops.js new file mode 100644 index 00000000000..621b2de9b42 --- /dev/null +++ b/jstests/sharding/migration_with_source_ops.js @@ -0,0 +1,152 @@ +// +// Tests during chunk migration that the recipient does not receive out of range operations from +// the donor. +// +// Pauses the migration on the recipient shard after the initial data chunk cloning is finished. +// This allows time for the donor shard to perform inserts/deletes/updates, half of which are on +// the migrating chunk. The recipient is then set to continue, collecting the mods from the +// donor's transfer mods log, and finishes the migration. A failpoint is set prior to resuming +// in the recipient shard to fail if it receives an out of chunk range insert/delete/update from +// the donor's transfer mods log. +// +// The idea is that the recipient shard should not be collecting inserts/deletes/updates from +// the donor shard's transfer mods log that are not in range and will unnecessarily prevent the +// migration from finishing: the migration can only end when donor's log of mods for the migrating +// chunk is empty. +// + +load('./jstests/libs/chunk_manipulation_util.js'); + +(function() { +"use strict"; + +var staticMongod = MongoRunner.runMongod({}); // For startParallelOps. + +/** + * Start up new sharded cluster, stop balancer that would interfere in manual chunk management. + */ + +var st = new ShardingTest({ shards : 2, mongos : 1 }); +st.stopBalancer(); + +var mongos = st.s0, + admin = mongos.getDB('admin'), + shards = mongos.getCollection('config.shards').find().toArray(), + dbName = "testDB", + ns = dbName + ".foo", + coll = mongos.getCollection(ns), + donor = st.shard0, + recipient = st.shard1, + donorColl = donor.getCollection(ns), + recipientColl = recipient.getCollection(ns); + +/** + * Exable sharding, and split collection into two chunks. + */ + +// Two chunks +// Donor: [0, 20) [20, 40) +// Recipient: +jsTest.log('Enabling sharding of the collection and pre-splitting into two chunks....'); +assert.commandWorked(admin.runCommand({enableSharding: dbName})); +st.ensurePrimaryShard(dbName, shards[0]._id); +assert.commandWorked(admin.runCommand({shardCollection: ns, key: {a: 1}})); +assert.commandWorked(admin.runCommand({split: ns, middle: {a: 20}})); + +/** + * Insert data into collection + */ + +// 10 documents in each chunk on the donor +jsTest.log('Inserting 20 docs into donor shard, 10 in each chunk....'); +for (var i = 0; i < 10; ++i) + assert.writeOK(coll.insert({a: i})); +for (var i = 20; i < 30; ++i) + assert.writeOK(coll.insert({a: i})); +assert.eq(20, coll.count()); + +/** + * Set failpoints. Recipient will crash if an out of chunk range data op is + * received from donor. Recipient will pause migration after cloning chunk data from donor, + * before checking transfer mods log on donor. + */ + +jsTest.log('Setting failpoint failMigrationReceivedOutOfRangeOperation'); +assert.commandWorked(recipient.getDB('admin').runCommand( + {configureFailPoint: 'failMigrationReceivedOutOfRangeOperation', mode: 'alwaysOn'})); + +jsTest.log('Setting chunk migration recipient failpoint so that it pauses after bulk clone step'); +pauseMigrateAtStep(recipient, migrateStepNames.cloned); + +/** + * Start a moveChunk in the background. Move chunk [20, 40), which has 10 docs in the + * range, from shard 0 (donor) to shard 1 (recipient). Migration will pause after + * cloning step (when it reaches the recipient failpoint). + */ + +// Donor: [0, 20) +// Recipient: [20, 40) +jsTest.log('Starting migration, pause after cloning...'); +var joinMoveChunk = moveChunkParallel( + staticMongod, + st.s0.host, + {a: 20}, + null, + coll.getFullName(), + shards[1]._id); + +/** + * Wait for recipient to finish cloning step. + * THEN delete 10 documents on the donor shard, 5 in the migrating chunk and 5 in the remaining chunk. + * AND insert 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining chunk. + * AND update 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining chunk. + * + * This will populate the migration transfer mods log, which the recipient will collect when it + * is unpaused. + */ + +waitForMigrateStep(recipient, migrateStepNames.cloned); + +jsTest.log('Deleting 5 docs from each chunk, migrating chunk and remaining chunk...'); +assert.writeOK(coll.remove({$and : [ {a: {$gte: 5}}, {a: {$lt: 25}} ]})); + +jsTest.log('Inserting 1 in the migrating chunk range and 1 in the remaining chunk range...'); +assert.writeOK(coll.insert({a: 10})); +assert.writeOK(coll.insert({a: 30})); + +jsTest.log('Updating 1 in the migrating chunk range and 1 in the remaining chunk range...'); +assert.writeOK(coll.update({a: 0}, {a: 0, updatedData: "updated"})); +assert.writeOK(coll.update({a: 25}, {a: 25, updatedData: "updated"})); + +/** + * Finish migration. Unpause recipient migration, wait for it to collect + * the new ops from the donor shard's migration transfer mods log, and finish. + */ + +jsTest.log('Continuing and finishing migration...'); +unpauseMigrateAtStep(recipient, migrateStepNames.cloned); +joinMoveChunk(); + +/** + * Check documents are where they should be: 6 docs in each shard's respective chunk. + */ + +jsTest.log('Checking that documents are on the shards they should be...'); +assert.eq(6, donorColl.count()); +assert.eq(6, recipientColl.count()); +assert.eq(12, coll.count()); + +/** + * Check that the updated documents are where they should be, one on each shard. + */ + +jsTest.log('Checking that documents were updated correctly...'); +var donorCollUpdatedNum = donorColl.find({updatedData: "updated"}).count(); +assert.eq(1, donorCollUpdatedNum, "Update failed on donor shard during migration!"); +var recipientCollUpdatedNum = recipientColl.find({updatedData: "updated"}).count(); +assert.eq(1, recipientCollUpdatedNum, "Update failed on recipient shard during migration!"); + +jsTest.log('DONE!'); +st.stop(); + +})() diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp index 27e767cd6a9..c354ee5dd41 100644 --- a/src/mongo/db/s/migration_destination_manager.cpp +++ b/src/mongo/db/s/migration_destination_manager.cpp @@ -161,7 +161,7 @@ bool opReplicatedEnough(OperationContext* txn, return majorityStatus.isOK() && userStatus.isOK(); } -MONGO_FP_DECLARE(failMigrationReceivedOutOfRangeDelete); +MONGO_FP_DECLARE(failMigrationReceivedOutOfRangeOperation); } // namespace @@ -816,18 +816,18 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* txn, Lock::DBLock dlk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX); Helpers::RemoveSaver rs("moveChunk", ns, "removedDuring"); - BSONObjIterator i(xfer["deleted"].Obj()); + BSONObjIterator i(xfer["deleted"].Obj()); // deleted documents while (i.more()) { Lock::CollectionLock clk(txn->lockState(), ns, MODE_X); OldClientContext ctx(txn, ns); BSONObj id = i.next().Obj(); - // do not apply deletes if they do not belong to the chunk being migrated + // do not apply delete if doc does not belong to the chunk being migrated BSONObj fullObj; if (Helpers::findById(txn, ctx.db(), ns.c_str(), id, fullObj)) { if (!isInRange(fullObj, min, max, shardKeyPattern)) { - if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeDelete)) { + if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation)) { invariant(0); } continue; @@ -852,13 +852,21 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* txn, } } - if (xfer["reload"].isABSONObj()) { + if (xfer["reload"].isABSONObj()) { // modified documents (insert/update) BSONObjIterator i(xfer["reload"].Obj()); while (i.more()) { OldClientWriteContext cx(txn, ns); BSONObj updatedDoc = i.next().Obj(); + // do not apply insert/update if doc does not belong to the chunk being migrated + if (!isInRange(updatedDoc, min, max, shardKeyPattern)) { + if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation)) { + invariant(0); + } + continue; + } + BSONObj localDoc; if (willOverrideLocalId( txn, ns, min, max, shardKeyPattern, cx.db(), updatedDoc, &localDoc)) { diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h index 64ed807b1d6..3f3c662e981 100644 --- a/src/mongo/db/s/migration_source_manager.h +++ b/src/mongo/db/s/migration_source_manager.h @@ -146,7 +146,7 @@ private: /** * Insert items from docIdList to a new array with the given fieldName in the given builder. If - * explode is true, the inserted object will be the full version of the document. Note that the + * explode is true, the inserted object will be the full version of the document. Note that * whenever an item from the docList is inserted to the array, it will also be removed from * docList. * |