summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDianna Hohensee <dianna.hohensee@10gen.com>2015-12-23 14:29:21 -0500
committerDianna Hohensee <dianna.hohensee@10gen.com>2016-01-20 08:32:45 -0500
commit8bef9a14e742978df4c5db15968b796e7d63f1f5 (patch)
tree4ebd856b95d5851e8916696a920497e67c5e4aaf
parentf6716ef64ff42a71aa351dc4d10aebbbf893bb6c (diff)
downloadmongo-8bef9a14e742978df4c5db15968b796e7d63f1f5.tar.gz
SERVER-21945 adding testing coverage for inserts/updates during chunk migration
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth.yml2
-rw-r--r--buildscripts/resmokeconfig/suites/sharding_auth_audit.yml2
-rw-r--r--jstests/sharding/migration_with_source_deletes.js123
-rw-r--r--jstests/sharding/migration_with_source_ops.js152
-rw-r--r--src/mongo/db/s/migration_destination_manager.cpp18
-rw-r--r--src/mongo/db/s/migration_source_manager.h2
6 files changed, 168 insertions, 131 deletions
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth.yml b/buildscripts/resmokeconfig/suites/sharding_auth.yml
index a114442e28a..d2474b28526 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth.yml
@@ -21,7 +21,7 @@ selector:
# Skip the testcases that do not have auth bypass when running ops in parallel.
- jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713
- jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713
- - jstests/sharding/migration_with_source_deletes.js # SERVER-21713
+ - jstests/sharding/migration_with_source_ops.js # SERVER-21713
- jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713
executor:
diff --git a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
index b8845773d5c..9982507a546 100644
--- a/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
+++ b/buildscripts/resmokeconfig/suites/sharding_auth_audit.yml
@@ -21,7 +21,7 @@ selector:
# Skip the testcases that do not have auth bypass when running ops in parallel.
- jstests/sharding/cleanup_orphaned_cmd_during_movechunk.js # SERVER-21713
- jstests/sharding/cleanup_orphaned_cmd_during_movechunk_hashed.js # SERVER-21713
- - jstests/sharding/migration_with_source_deletes.js # SERVER-21713
+ - jstests/sharding/migration_with_source_ops.js # SERVER-21713
- jstests/sharding/migration_sets_fromMigrate_flag.js # SERVER-21713
executor:
diff --git a/jstests/sharding/migration_with_source_deletes.js b/jstests/sharding/migration_with_source_deletes.js
deleted file mode 100644
index afc00e513e2..00000000000
--- a/jstests/sharding/migration_with_source_deletes.js
+++ /dev/null
@@ -1,123 +0,0 @@
-//
-// Tests during chunk migration that the recipient does not receive out of range deletes from
-// the donor.
-//
-// Pauses the migration on the recipient shard after the initial data chunk cloning is finished.
-// This allows time for the donor shard to perform deletes, half of which are on the migrating
-// chunk. The recipient is then set to continue, collecting the delete mods from the donor, and
-// finishes the migration. A failpoint is set prior to resuming in the recipient shard to fail
-// if it receives an out of chunk range delete from the donor's delete mods log.
-//
-// The idea is that the recipient shard should not be collecting deletes from the donor shard
-// that are not in range and that will unnecessarily prevent the migration from finishing: the
-// migration can only end when donor's log of delete mods for the migrating chunk is empty.
-//
-
-load('./jstests/libs/chunk_manipulation_util.js');
-
-(function() {
-"use strict";
-
-var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
-
-/**
- * Start up new sharded cluster, stop balancer that would interfere in manual chunk management.
- */
-
-var st = new ShardingTest({ shards : 2, mongos : 1 });
-st.stopBalancer();
-
-var mongos = st.s0,
- admin = mongos.getDB('admin'),
- shards = mongos.getCollection('config.shards').find().toArray(),
- dbName = "testDB",
- ns = dbName + ".foo",
- coll = mongos.getCollection(ns),
- donor = st.shard0,
- recipient = st.shard1,
- donorColl = donor.getCollection(ns),
- recipientColl = recipient.getCollection(ns);
-
-/**
- * Exable sharding, and split collection into two chunks.
- */
-
-// Two chunks
-// Donor: [0, 10) [10, 20)
-// Recipient:
-jsTest.log('Enable sharding of the collection and pre-split into two chunks....');
-assert.commandWorked( admin.runCommand( {enableSharding: dbName} ) );
-st.ensurePrimaryShard(dbName, shards[0]._id);
-assert.commandWorked( admin.runCommand( {shardCollection: ns, key: {a: 1}} ) );
-assert.commandWorked( admin.runCommand( {split: ns, middle: {a: 10}} ) );
-
-/**
- * Insert data into collection
- */
-
-// 10 documents in each chunk on the donor
-jsTest.log('Inserting 20 docs into donor shard, 10 in each chunk....');
-for (var i = 0; i < 20; ++i) donorColl.insert( {a: i} );
-assert.eq(null, donorColl.getDB().getLastError());
-assert.eq(20, donorColl.count());
-
-/**
- * Set failpoints. Recipient will crash if an out of chunk range data delete is
- * received from donor. Recipient will pause migration after cloning chunk data from donor,
- * before checking delete mods log on donor.
- */
-
-jsTest.log('setting failpoint failMigrationReceivedOutOfRangeDelete');
-assert.commandWorked(recipient.getDB('admin').runCommand( {configureFailPoint: 'failMigrationReceivedOutOfRangeDelete', mode: 'alwaysOn'} ))
-
-jsTest.log('setting recipient failpoint cloned');
-pauseMigrateAtStep(recipient, migrateStepNames.cloned);
-
-/**
- * Start a moveChunk in the background. Move chunk [10, 20), which has 10 docs,
- * from shard 0 (donor) to shard 1 (recipient). Migration will pause after cloning
- * (when it reaches the recipient failpoint).
- */
-
-// Donor: [0, 10)
-// Recipient: [10, 20)
-jsTest.log('starting migration, pause after cloning...');
-var joinMoveChunk = moveChunkParallel(
- staticMongod,
- st.s0.host,
- {a: 10},
- null,
- coll.getFullName(),
- shards[1]._id);
-
-/**
- * Wait for recipient to finish cloning.
- * THEN delete 10 documents on donor, 5 in the migrating chunk and the 5 in the remaining chunk.
- */
-
-jsTest.log('Delete 5 docs from each chunk, migrating chunk and remaining chunk...');
-waitForMigrateStep(recipient, migrateStepNames.cloned);
-donorColl.remove( {$and : [ {a: {$gte: 5}}, {a: {$lt: 15}} ]} );
-
-/**
- * Finish migration. Unpause recipient migration, wait for it to collect
- * the delete diffs from donor and finish.
- */
-
-jsTest.log('Continuing and finishing migration...');
-unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
-joinMoveChunk();
-
-/**
- * Check documents are where they should be: 5 docs in each shard's chunk.
- */
-
-jsTest.log('Checking that documents are on the shards they should be...');
-assert.eq(5, donorColl.count());
-assert.eq(5, recipientColl.count());
-assert.eq(10, coll.count());
-
-jsTest.log('DONE!');
-st.stop();
-
-})()
diff --git a/jstests/sharding/migration_with_source_ops.js b/jstests/sharding/migration_with_source_ops.js
new file mode 100644
index 00000000000..621b2de9b42
--- /dev/null
+++ b/jstests/sharding/migration_with_source_ops.js
@@ -0,0 +1,152 @@
+//
+// Tests during chunk migration that the recipient does not receive out of range operations from
+// the donor.
+//
+// Pauses the migration on the recipient shard after the initial data chunk cloning is finished.
+// This allows time for the donor shard to perform inserts/deletes/updates, half of which are on
+// the migrating chunk. The recipient is then set to continue, collecting the mods from the
+// donor's transfer mods log, and finishes the migration. A failpoint is set prior to resuming
+// in the recipient shard to fail if it receives an out of chunk range insert/delete/update from
+// the donor's transfer mods log.
+//
+// The idea is that the recipient shard should not be collecting inserts/deletes/updates from
+// the donor shard's transfer mods log that are not in range and will unnecessarily prevent the
+// migration from finishing: the migration can only end when donor's log of mods for the migrating
+// chunk is empty.
+//
+
+load('./jstests/libs/chunk_manipulation_util.js');
+
+(function() {
+"use strict";
+
+var staticMongod = MongoRunner.runMongod({}); // For startParallelOps.
+
+/**
+ * Start up new sharded cluster, stop balancer that would interfere in manual chunk management.
+ */
+
+var st = new ShardingTest({ shards : 2, mongos : 1 });
+st.stopBalancer();
+
+var mongos = st.s0,
+ admin = mongos.getDB('admin'),
+ shards = mongos.getCollection('config.shards').find().toArray(),
+ dbName = "testDB",
+ ns = dbName + ".foo",
+ coll = mongos.getCollection(ns),
+ donor = st.shard0,
+ recipient = st.shard1,
+ donorColl = donor.getCollection(ns),
+ recipientColl = recipient.getCollection(ns);
+
+/**
+ * Exable sharding, and split collection into two chunks.
+ */
+
+// Two chunks
+// Donor: [0, 20) [20, 40)
+// Recipient:
+jsTest.log('Enabling sharding of the collection and pre-splitting into two chunks....');
+assert.commandWorked(admin.runCommand({enableSharding: dbName}));
+st.ensurePrimaryShard(dbName, shards[0]._id);
+assert.commandWorked(admin.runCommand({shardCollection: ns, key: {a: 1}}));
+assert.commandWorked(admin.runCommand({split: ns, middle: {a: 20}}));
+
+/**
+ * Insert data into collection
+ */
+
+// 10 documents in each chunk on the donor
+jsTest.log('Inserting 20 docs into donor shard, 10 in each chunk....');
+for (var i = 0; i < 10; ++i)
+ assert.writeOK(coll.insert({a: i}));
+for (var i = 20; i < 30; ++i)
+ assert.writeOK(coll.insert({a: i}));
+assert.eq(20, coll.count());
+
+/**
+ * Set failpoints. Recipient will crash if an out of chunk range data op is
+ * received from donor. Recipient will pause migration after cloning chunk data from donor,
+ * before checking transfer mods log on donor.
+ */
+
+jsTest.log('Setting failpoint failMigrationReceivedOutOfRangeOperation');
+assert.commandWorked(recipient.getDB('admin').runCommand(
+ {configureFailPoint: 'failMigrationReceivedOutOfRangeOperation', mode: 'alwaysOn'}));
+
+jsTest.log('Setting chunk migration recipient failpoint so that it pauses after bulk clone step');
+pauseMigrateAtStep(recipient, migrateStepNames.cloned);
+
+/**
+ * Start a moveChunk in the background. Move chunk [20, 40), which has 10 docs in the
+ * range, from shard 0 (donor) to shard 1 (recipient). Migration will pause after
+ * cloning step (when it reaches the recipient failpoint).
+ */
+
+// Donor: [0, 20)
+// Recipient: [20, 40)
+jsTest.log('Starting migration, pause after cloning...');
+var joinMoveChunk = moveChunkParallel(
+ staticMongod,
+ st.s0.host,
+ {a: 20},
+ null,
+ coll.getFullName(),
+ shards[1]._id);
+
+/**
+ * Wait for recipient to finish cloning step.
+ * THEN delete 10 documents on the donor shard, 5 in the migrating chunk and 5 in the remaining chunk.
+ * AND insert 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining chunk.
+ * AND update 2 documents on the donor shard, 1 in the migrating chunk and 1 in the remaining chunk.
+ *
+ * This will populate the migration transfer mods log, which the recipient will collect when it
+ * is unpaused.
+ */
+
+waitForMigrateStep(recipient, migrateStepNames.cloned);
+
+jsTest.log('Deleting 5 docs from each chunk, migrating chunk and remaining chunk...');
+assert.writeOK(coll.remove({$and : [ {a: {$gte: 5}}, {a: {$lt: 25}} ]}));
+
+jsTest.log('Inserting 1 in the migrating chunk range and 1 in the remaining chunk range...');
+assert.writeOK(coll.insert({a: 10}));
+assert.writeOK(coll.insert({a: 30}));
+
+jsTest.log('Updating 1 in the migrating chunk range and 1 in the remaining chunk range...');
+assert.writeOK(coll.update({a: 0}, {a: 0, updatedData: "updated"}));
+assert.writeOK(coll.update({a: 25}, {a: 25, updatedData: "updated"}));
+
+/**
+ * Finish migration. Unpause recipient migration, wait for it to collect
+ * the new ops from the donor shard's migration transfer mods log, and finish.
+ */
+
+jsTest.log('Continuing and finishing migration...');
+unpauseMigrateAtStep(recipient, migrateStepNames.cloned);
+joinMoveChunk();
+
+/**
+ * Check documents are where they should be: 6 docs in each shard's respective chunk.
+ */
+
+jsTest.log('Checking that documents are on the shards they should be...');
+assert.eq(6, donorColl.count());
+assert.eq(6, recipientColl.count());
+assert.eq(12, coll.count());
+
+/**
+ * Check that the updated documents are where they should be, one on each shard.
+ */
+
+jsTest.log('Checking that documents were updated correctly...');
+var donorCollUpdatedNum = donorColl.find({updatedData: "updated"}).count();
+assert.eq(1, donorCollUpdatedNum, "Update failed on donor shard during migration!");
+var recipientCollUpdatedNum = recipientColl.find({updatedData: "updated"}).count();
+assert.eq(1, recipientCollUpdatedNum, "Update failed on recipient shard during migration!");
+
+jsTest.log('DONE!');
+st.stop();
+
+})()
diff --git a/src/mongo/db/s/migration_destination_manager.cpp b/src/mongo/db/s/migration_destination_manager.cpp
index 27e767cd6a9..c354ee5dd41 100644
--- a/src/mongo/db/s/migration_destination_manager.cpp
+++ b/src/mongo/db/s/migration_destination_manager.cpp
@@ -161,7 +161,7 @@ bool opReplicatedEnough(OperationContext* txn,
return majorityStatus.isOK() && userStatus.isOK();
}
-MONGO_FP_DECLARE(failMigrationReceivedOutOfRangeDelete);
+MONGO_FP_DECLARE(failMigrationReceivedOutOfRangeOperation);
} // namespace
@@ -816,18 +816,18 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* txn,
Lock::DBLock dlk(txn->lockState(), nsToDatabaseSubstring(ns), MODE_IX);
Helpers::RemoveSaver rs("moveChunk", ns, "removedDuring");
- BSONObjIterator i(xfer["deleted"].Obj());
+ BSONObjIterator i(xfer["deleted"].Obj()); // deleted documents
while (i.more()) {
Lock::CollectionLock clk(txn->lockState(), ns, MODE_X);
OldClientContext ctx(txn, ns);
BSONObj id = i.next().Obj();
- // do not apply deletes if they do not belong to the chunk being migrated
+ // do not apply delete if doc does not belong to the chunk being migrated
BSONObj fullObj;
if (Helpers::findById(txn, ctx.db(), ns.c_str(), id, fullObj)) {
if (!isInRange(fullObj, min, max, shardKeyPattern)) {
- if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeDelete)) {
+ if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation)) {
invariant(0);
}
continue;
@@ -852,13 +852,21 @@ bool MigrationDestinationManager::_applyMigrateOp(OperationContext* txn,
}
}
- if (xfer["reload"].isABSONObj()) {
+ if (xfer["reload"].isABSONObj()) { // modified documents (insert/update)
BSONObjIterator i(xfer["reload"].Obj());
while (i.more()) {
OldClientWriteContext cx(txn, ns);
BSONObj updatedDoc = i.next().Obj();
+ // do not apply insert/update if doc does not belong to the chunk being migrated
+ if (!isInRange(updatedDoc, min, max, shardKeyPattern)) {
+ if (MONGO_FAIL_POINT(failMigrationReceivedOutOfRangeOperation)) {
+ invariant(0);
+ }
+ continue;
+ }
+
BSONObj localDoc;
if (willOverrideLocalId(
txn, ns, min, max, shardKeyPattern, cx.db(), updatedDoc, &localDoc)) {
diff --git a/src/mongo/db/s/migration_source_manager.h b/src/mongo/db/s/migration_source_manager.h
index 64ed807b1d6..3f3c662e981 100644
--- a/src/mongo/db/s/migration_source_manager.h
+++ b/src/mongo/db/s/migration_source_manager.h
@@ -146,7 +146,7 @@ private:
/**
* Insert items from docIdList to a new array with the given fieldName in the given builder. If
- * explode is true, the inserted object will be the full version of the document. Note that the
+ * explode is true, the inserted object will be the full version of the document. Note that
* whenever an item from the docList is inserted to the array, it will also be removed from
* docList.
*