diff options
Diffstat (limited to 'jstests/sharding/remove2.js')
-rw-r--r-- | jstests/sharding/remove2.js | 360 |
1 files changed, 184 insertions, 176 deletions
diff --git a/jstests/sharding/remove2.js b/jstests/sharding/remove2.js index c74daa28c59..e862167fb12 100644 --- a/jstests/sharding/remove2.js +++ b/jstests/sharding/remove2.js @@ -2,188 +2,196 @@ load("jstests/replsets/rslib.js"); -seedString = function(replTest) { - members = replTest.getReplSetConfig().members.map(function(elem) { - return elem.host; - }); - return replTest.name + '/' + members.join(','); -}; - -removeShard = function(st, replTest) { - jsTest.log("Removing shard with name: " + replTest.name); - var res = st.s.adminCommand({removeShard: replTest.name}); - assert.commandWorked(res); - assert.eq('started', res.state); - assert.soon(function() { - res = st.s.adminCommand({removeShard: replTest.name}); +(function() { + 'use strict'; + + function seedString(replTest) { + var members = replTest.getReplSetConfig().members.map(function(elem) { + return elem.host; + }); + return replTest.name + '/' + members.join(','); + }; + + function removeShard(st, replTest) { + jsTest.log("Removing shard with name: " + replTest.name); + var res = st.s.adminCommand({removeShard: replTest.name}); assert.commandWorked(res); - return ('completed' === res.state); - }, "failed to remove shard: " + tojson(res)); - - // Drop the database so the shard can be re-added. - assert.commandWorked(replTest.getPrimary().getDB(coll.getDB().getName()).dropDatabase()); -}; - -addShard = function(st, replTest) { - seed = seedString(replTest); - print("Adding shard with seed: " + seed); - try { - assert.eq(true, st.adminCommand({addshard: seed})); - } catch (e) { - print("First attempt to addShard failed, trying again"); - // transport error on first attempt is expected. Make sure second attempt goes through - assert.eq(true, st.adminCommand({addshard: seed})); + assert.eq('started', res.state); + assert.soon(function() { + res = st.s.adminCommand({removeShard: replTest.name}); + assert.commandWorked(res); + return ('completed' === res.state); + }, "failed to remove shard: " + tojson(res)); + + // Drop the database so the shard can be re-added. + assert.commandWorked(replTest.getPrimary().getDB(coll.getDB().getName()).dropDatabase()); + }; + + function addShard(st, replTest) { + var seed = seedString(replTest); + print("Adding shard with seed: " + seed); + try { + assert.eq(true, st.adminCommand({addshard: seed})); + } catch (e) { + print("First attempt to addShard failed, trying again"); + // transport error on first attempt is expected. Make sure second attempt goes through + assert.eq(true, st.adminCommand({addshard: seed})); + } + awaitRSClientHosts( + new Mongo(st.s.host), replTest.getSecondaries(), {ok: true, secondary: true}); + + assert.soon(function() { + var x = st.chunkDiff(coll.getName(), coll.getDB().getName()); + print("chunk diff: " + x); + return x < 2; + }, "no balance happened", 30 * 60 * 1000); + + try { + assert.eq(300, coll.find().itcount()); + } catch (e) { + // Expected. First query might get transport error and need to reconnect. + printjson(e); + assert.eq(300, coll.find().itcount()); + } + print("Shard added successfully"); + }; + + var st = new ShardingTest( + {shards: {rs0: {nodes: 2}, rs1: {nodes: 2}}, other: {chunkSize: 1, enableBalancer: true}}); + + // Pending resolution of SERVER-8598, we need to wait for deletion after chunk migrations to + // avoid a pending delete re-creating a database after it was dropped. + st.s.getDB("config").settings.update({_id: "balancer"}, {$set: {_waitForDelete: true}}, true); + + var conn = new Mongo(st.s.host); + var coll = conn.getCollection("test.remove2"); + coll.drop(); + + assert.commandWorked(st.s0.adminCommand({enableSharding: coll.getDB().getName()})); + st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName); + assert.commandWorked(st.s0.adminCommand({shardCollection: coll.getFullName(), key: {i: 1}})); + + // Setup initial data + var str = 'a'; + while (str.length < 1024 * 16) { + str += str; + } + + var bulk = coll.initializeUnorderedBulkOp(); + for (var i = 0; i < 300; i++) { + bulk.insert({i: i % 10, str: str}); } - awaitRSClientHosts( - new Mongo(st.s.host), replTest.getSecondaries(), {ok: true, secondary: true}); + assert.writeOK(bulk.execute()); + + assert.eq(300, coll.find().itcount()); assert.soon(function() { - var x = st.chunkDiff(coll.getName(), coll.getDB().getName()); + var x = st.chunkDiff('remove2', "test"); print("chunk diff: " + x); return x < 2; }, "no balance happened", 30 * 60 * 1000); - try { - assert.eq(300, coll.find().itcount()); - } catch (e) { - // Expected. First query might get transport error and need to reconnect. - printjson(e); - assert.eq(300, coll.find().itcount()); - } - print("Shard added successfully"); -}; - -var st = new ShardingTest( - {shards: {rs0: {nodes: 2}, rs1: {nodes: 2}}, other: {chunkSize: 1, enableBalancer: true}}); - -// Pending resolution of SERVER-8598, we need to wait for deletion after chunk migrations to avoid -// a pending delete re-creating a database after it was dropped. -st.s.getDB("config").settings.update({_id: "balancer"}, {$set: {_waitForDelete: true}}, true); - -var rst0 = st._rs[0].test; -var rst1 = st._rs[1].test; - -var conn = new Mongo(st.s.host); -var coll = conn.getCollection("test.remove2"); -coll.drop(); - -st.admin.runCommand({enableSharding: coll.getDB().getName()}); -st.ensurePrimaryShard(coll.getDB().getName(), st.shard0.shardName); -st.admin.runCommand({shardCollection: coll.getFullName(), key: {i: 1}}); - -// Setup initial data -var str = 'a'; -while (str.length < 1024 * 16) { - str += str; -} - -var bulk = coll.initializeUnorderedBulkOp(); -for (var i = 0; i < 300; i++) { - bulk.insert({i: i % 10, str: str}); -} -assert.writeOK(bulk.execute()); - -assert.eq(300, coll.find().itcount()); - -assert.soon(function() { - var x = st.chunkDiff('remove2', "test"); - print("chunk diff: " + x); - return x < 2; -}, "no balance happened", 30 * 60 * 1000); - -assert.eq(300, coll.find().itcount()); - -st.printShardingStatus(); - -// Remove shard and add it back in, without shutting it down. -jsTestLog("Attempting to remove shard and add it back in"); -removeShard(st, rst1); -addShard(st, rst1); - -// Remove shard, restart set, then add it back in. -jsTestLog("Attempting to remove shard, restart the set, and then add it back in"); -originalSeed = seedString(rst1); - -removeShard(st, rst1); -rst1.stopSet(); -print("Sleeping for 20 seconds to let the other shard's ReplicaSetMonitor time out"); -sleep(20000); // 1 failed check should take 10 seconds, sleep for 20 just to be safe - -rst1.startSet({restart: true}); -rst1.initiate(); -rst1.awaitReplication(); - -assert.eq(originalSeed, seedString(rst1), "Set didn't come back up with the same hosts as before"); -addShard(st, rst1); - -// Shut down shard and wait for its ReplicaSetMonitor to be cleaned up, then start it back up and -// use it. -// TODO: test this both with AND without waiting for the ReplicaSetMonitor to be cleaned up. -// This part doesn't pass, even without cleaning up the ReplicaSetMonitor - see SERVER-5900. -/*printjson( conn.getDB('admin').runCommand({movePrimary : 'test2', to : rst1.name}) ); -printjson( conn.getDB('admin').runCommand({setParameter : 1, replMonitorMaxFailedChecks : 5}) ); -jsTestLog( "Shutting down set" ) -rst1.stopSet(); -jsTestLog( "sleeping for 20 seconds to make sure ReplicaSetMonitor gets cleaned up"); -sleep(20000); // 1 failed check should take 10 seconds, sleep for 20 just to be safe - -// Should fail since rst1 is the primary for test2 -assert.throws(function() {conn.getDB('test2').foo.find().itcount()}); -jsTestLog( "Bringing set back up" ); -rst1.startSet(); -rst1.initiate(); -rst1.awaitReplication(); - -jsTestLog( "Checking that set is usable again" ); -//conn.getDB('admin').runCommand({flushRouterConfig:1}); // Uncommenting this makes test pass -conn.getDB('test2').foo.insert({a:1}); -gle = conn.getDB('test2').runCommand('getLastError'); -if ( !gle.ok ) { - // Expected. First write will fail and need to re-connect - print( "write failed" ); - printjson( gle ); + assert.eq(300, coll.find().itcount()); + + st.printShardingStatus(); + + var rst1 = st.rs1; + // Remove shard and add it back in, without shutting it down. + jsTestLog("Attempting to remove shard and add it back in"); + removeShard(st, rst1); + addShard(st, rst1); + + // Remove shard, restart set, then add it back in. + jsTestLog("Attempting to remove shard, restart the set, and then add it back in"); + var originalSeed = seedString(rst1); + + removeShard(st, rst1); + rst1.stopSet(); + print("Sleeping for 20 seconds to let the other shard's ReplicaSetMonitor time out"); + sleep(20000); // 1 failed check should take 10 seconds, sleep for 20 just to be safe + + rst1.startSet({restart: true}); + rst1.initiate(); + rst1.awaitReplication(); + + assert.eq( + originalSeed, seedString(rst1), "Set didn't come back up with the same hosts as before"); + addShard(st, rst1); + + // Shut down shard and wait for its ReplicaSetMonitor to be cleaned up, then start it back up + // and use it. + // + // TODO: test this both with AND without waiting for the ReplicaSetMonitor to be cleaned up. + // + // This part doesn't pass, even without cleaning up the ReplicaSetMonitor - see SERVER-5900. + /* + printjson( conn.getDB('admin').runCommand({movePrimary : 'test2', to : rst1.name}) ); + printjson( conn.getDB('admin').runCommand({setParameter : 1, replMonitorMaxFailedChecks : 5}) ); + jsTestLog( "Shutting down set" ) + rst1.stopSet(); + jsTestLog( "sleeping for 20 seconds to make sure ReplicaSetMonitor gets cleaned up"); + sleep(20000); // 1 failed check should take 10 seconds, sleep for 20 just to be safe + + // Should fail since rst1 is the primary for test2 + assert.throws(function() {conn.getDB('test2').foo.find().itcount()}); + jsTestLog( "Bringing set back up" ); + rst1.startSet(); + rst1.initiate(); + rst1.awaitReplication(); + + jsTestLog( "Checking that set is usable again" ); + //conn.getDB('admin').runCommand({flushRouterConfig:1}); // Uncommenting this makes test pass conn.getDB('test2').foo.insert({a:1}); - assert( conn.getDB('test2').getLastErrorObj().ok ); -} - -assert.eq( 1, conn.getDB('test2').foo.find().itcount() ); -assert( conn.getDB('test2').dropDatabase().ok );*/ - -// Remove shard and add a new shard with the same replica set and shard name, but different ports. -jsTestLog("Attempt removing shard and adding a new shard with the same Replica Set name"); -removeShard(st, rst1); -rst1.stopSet(); -print("Sleeping for 60 seconds to let the other shards restart their ReplicaSetMonitors"); -sleep(60000); - -var rst2 = new ReplSetTest({name: rst1.name, nodes: 2, useHostName: true}); -rst2.startSet({shardsvr: ""}); -rst2.initiate(); -rst2.awaitReplication(); - -addShard(st, rst2); -printjson(st.admin.runCommand({movePrimary: 'test2', to: rst2.name})); - -assert.eq(300, coll.find().itcount()); -conn.getDB('test2').foo.insert({a: 1}); -assert.eq(1, conn.getDB('test2').foo.find().itcount()); - -// Can't shut down with rst2 in the set or ShardingTest will fail trying to cleanup on shutdown. -// Have to take out rst2 and put rst1 back into the set so that it can clean up. -jsTestLog("Putting ShardingTest back to state it expects"); -printjson(st.admin.runCommand({movePrimary: 'test2', to: rst0.name})); -removeShard(st, rst2); -rst2.stopSet(); -print("Sleeping for 60 seconds to let the other shards restart their ReplicaSetMonitors"); -sleep(60000); - -rst1.startSet({restart: true}); -rst1.initiate(); -rst1.awaitReplication(); - -assert.eq(originalSeed, seedString(rst1), "Set didn't come back up with the same hosts as before"); -addShard(st, rst1); - -jsTestLog("finishing!"); -// this should be fixed by SERVER-22176 -st.stop({allowedExitCodes: [MongoRunner.EXIT_ABRUPT]}); + gle = conn.getDB('test2').runCommand('getLastError'); + if ( !gle.ok ) { + // Expected. First write will fail and need to re-connect + print( "write failed" ); + printjson( gle ); + conn.getDB('test2').foo.insert({a:1}); + assert( conn.getDB('test2').getLastErrorObj().ok ); + } + + assert.eq( 1, conn.getDB('test2').foo.find().itcount() ); + assert( conn.getDB('test2').dropDatabase().ok ); + */ + + // Remove shard and add a new shard with the same replica set and shard name, but different + // ports + jsTestLog("Attempt removing shard and adding a new shard with the same Replica Set name"); + removeShard(st, rst1); + rst1.stopSet(); + print("Sleeping for 60 seconds to let the other shards restart their ReplicaSetMonitors"); + sleep(60000); + + var rst2 = new ReplSetTest({name: rst1.name, nodes: 2, useHostName: true}); + rst2.startSet({shardsvr: ""}); + rst2.initiate(); + rst2.awaitReplication(); + + addShard(st, rst2); + printjson(st.admin.runCommand({movePrimary: 'test2', to: rst2.name})); + + assert.eq(300, coll.find().itcount()); + conn.getDB('test2').foo.insert({a: 1}); + assert.eq(1, conn.getDB('test2').foo.find().itcount()); + + // Can't shut down with rst2 in the set or ShardingTest will fail trying to cleanup on shutdown. + // Have to take out rst2 and put rst1 back into the set so that it can clean up. + jsTestLog("Putting ShardingTest back to state it expects"); + printjson(st.admin.runCommand({movePrimary: 'test2', to: st.rs0.name})); + removeShard(st, rst2); + rst2.stopSet(); + + print("Sleeping for 60 seconds to let the other shards restart their ReplicaSetMonitors"); + sleep(60000); + + rst1.startSet({restart: true}); + rst1.initiate(); + rst1.awaitReplication(); + + assert.eq( + originalSeed, seedString(rst1), "Set didn't come back up with the same hosts as before"); + addShard(st, rst1); + + st.stop(); +})(); |