summaryrefslogtreecommitdiff
path: root/jstests/sharding/remove2.js
diff options
context:
space:
mode:
authorSpencer T Brody <spencer@10gen.com>2012-03-26 14:31:33 -0400
committerSpencer T Brody <spencer@10gen.com>2012-06-12 16:51:25 -0400
commit9ab21eeb9443c41455a18f3ff7016166a16a6425 (patch)
tree4aca471161d3d3fa8b3fd49111f467f4566625ce /jstests/sharding/remove2.js
parent665c1cfeac637465bb4ed8d24b7b76cec835fbea (diff)
downloadmongo-9ab21eeb9443c41455a18f3ff7016166a16a6425.tar.gz
Clean up ReplicaSetMonitor when the whole set has been down for a long time. SERVER-4581
Diffstat (limited to 'jstests/sharding/remove2.js')
-rw-r--r--jstests/sharding/remove2.js178
1 files changed, 178 insertions, 0 deletions
diff --git a/jstests/sharding/remove2.js b/jstests/sharding/remove2.js
new file mode 100644
index 00000000000..76a94d6853c
--- /dev/null
+++ b/jstests/sharding/remove2.js
@@ -0,0 +1,178 @@
+// Test that removing and re-adding shard works correctly.
+
+seedString = function(replTest) {
+ members = replTest.getReplSetConfig().members.map(function(elem) { return elem.host; });
+ return replTest.name + '/' + members.join(',');
+};
+
+removeShard = function(st, replTest) {
+ print( "Removing shard with name: " + replTest.name );
+ res = st.admin.runCommand( { removeshard: replTest.name } )
+ printjson(res);
+ assert( res.ok , "failed to start draining shard" );
+ checkRemoveShard = function() {
+ res = st.admin.runCommand( { removeshard: replTest.name } );
+ printjson(res);
+ return res.ok && res.msg == 'removeshard completed successfully';
+ }
+ assert.soon( checkRemoveShard , "failed to remove shard" );
+ replTest.getPrimary().getDB( coll.getDB().getName() ).dropDatabase();
+ print( "Shard removed successfully" );
+};
+
+addShard = function(st, replTest) {
+ seed = seedString(replTest);
+ print( "Adding shard with seed: " + seed );
+ try {
+ assert.eq(true, st.adminCommand({ addshard : seed }));
+ } catch (e) {
+ print("First attempt to addShard failed, trying again")
+ // transport error on first attempt is expected. Make sure second attempt goes through
+ assert.eq(true, st.adminCommand({ addshard : seed }));
+ }
+ ReplSetTest.awaitRSClientHosts( new Mongo( st.s.host ),
+ replTest.getSecondaries(),
+ {ok : true, secondary : true} );
+
+ assert.soon( function() {
+ var x = st.chunkDiff( coll.getName() , coll.getDB().getName() );
+ print( "chunk diff: " + x );
+ return x < 2;
+ } , "no balance happened", 60000 );
+
+ try {
+ assert.eq( 300, coll.find().itcount() );
+ } catch (e) {
+ // Expected. First query might get transport error and need to reconnect.
+ printjson(e);
+ assert.eq( 300, coll.find().itcount() );
+ }
+ print( "Shard added successfully" );
+};
+
+var st = new ShardingTest( testName = "remove2",
+ numShards = 2,
+ verboseLevel = 0,
+ numMongos = 1,
+ { chunkSize : 1,
+ rs : true,
+ rs0 : { nodes : 2 },
+ rs1 : { nodes : 2 }
+ });
+
+var rst0 = st._rs[0].test;
+var rst1 = st._rs[1].test;
+
+var conn = new Mongo( st.s.host );
+var coll = conn.getCollection( "test.remove2" );
+coll.drop();
+
+// Decrease how long it will take for rst0 to time out its ReplicaSetMonitor for rst1 when rs1 is shut down
+for( var i = 0; i < rst0.nodes.length; i++ ) {
+ node = rst0.nodes[i];
+ res = node.getDB('admin').runCommand({ setParameter : 1, replMonitorMaxFailedChecks : 1 });
+ printjson( res );
+ assert( res.ok );
+}
+
+st.admin.runCommand({ enableSharding : coll.getDB().getName() });
+st.admin.runCommand({ shardCollection : coll.getFullName(), key: { i : 1 }});
+
+// Setup initial data
+var str = 'a';
+while( str.length < 1024 * 16 ) {
+ str += str;
+}
+for( var i = 0; i < 300; i++ ){
+ coll.insert( { i : i % 10, str : str } );
+}
+
+assert.soon( function() {
+ var x = st.chunkDiff( 'remove2' , "test" ); print( "chunk diff: " + x ); return x < 2;
+} , "no balance happened" );
+
+printjson(res);
+assert(res.ok);
+
+assert.eq( 300, coll.find().itcount() );
+
+st.admin.printShardingStatus();
+
+// Remove shard and add it back in, without shutting it down.
+jsTestLog( "Attempting to remove shard and add it back in" )
+removeShard( st, rst1 );
+addShard(st, rst1 );
+
+
+// Remove shard, restart set, then add it back in.
+jsTestLog( "Attempting to remove shard, restart the set, and then add it back in" )
+originalSeed = seedString(rst1);
+
+removeShard( st, rst1 );
+rst1.stopSet();
+print( "Sleeping for 20 seconds to let the other shard's ReplicaSetMonitor time out" );
+sleep( 20000 ); // 1 failed check should take 10 seconds, sleep for 20 just to be safe
+
+rst1.startSet();
+rst1.initiate();
+rst1.awaitReplication();
+
+assert.eq( originalSeed, seedString(rst1), "Set didn't come back up with the same hosts as before" );
+addShard( st, rst1 );
+
+
+// Shut down shard and wait for its ReplicaSetMonitor to be cleaned up, then start it back up and use it.
+// TODO: test this both with AND without waiting for the ReplicaSetMonitor to be cleaned up.
+// This part doesn't pass, even without cleaning up the ReplicaSetMonitor - see SERVER-5900.
+/*printjson( conn.getDB('admin').runCommand({movePrimary : 'test2', to : rst1.name}) );
+printjson( conn.getDB('admin').runCommand({setParameter : 1, replMonitorMaxFailedChecks : 5}) );
+jsTestLog( "Shutting down set" )
+rst1.stopSet();
+jsTestLog( "sleeping for 20 seconds to make sure ReplicaSetMonitor gets cleaned up");
+sleep(20000); // 1 failed check should take 10 seconds, sleep for 20 just to be safe
+
+// Should fail since rst1 is the primary for test2
+assert.throws(function() {conn.getDB('test2').foo.find().itcount()});
+jsTestLog( "Bringing set back up" );
+rst1.startSet();
+rst1.initiate();
+rst1.awaitReplication();
+
+jsTestLog( "Checking that set is usable again" );
+//conn.getDB('admin').runCommand({flushRouterConfig:1}); // Uncommenting this makes test pass
+conn.getDB('test2').foo.insert({a:1});
+gle = conn.getDB('test2').runCommand('getLastError');
+if ( !gle.ok ) {
+ // Expected. First write will fail and need to re-connect
+ print( "write failed" );
+ printjson( gle );
+ conn.getDB('test2').foo.insert({a:1});
+ assert( conn.getDB('test2').getLastErrorObj().ok );
+}
+
+assert.eq( 1, conn.getDB('test2').foo.find().itcount() );
+assert( conn.getDB('test2').dropDatabase().ok );*/
+
+
+// Remove shard and add a new shard with the same replica set and shard name, but different ports.
+jsTestLog( "Attempt removing shard and adding a new shard with the same Replica Set name" );
+removeShard( st, rst1 );
+rst1.stopSet();
+print( "Sleeping for 20 seconds to let the other shard's ReplicaSetMonitor time out" );
+sleep( 20000 );
+
+
+var rst2 = new ReplSetTest({name : rst1.name, nodes : 2, startPort : rst1.startPort + 1500, useHostName : true});
+rst2.startSet();
+rst2.initiate();
+rst2.awaitReplication();
+
+addShard( st, rst2 );
+printjson( st.admin.runCommand({movePrimary : 'test2', to : rst2.name}) );
+
+assert.eq( 300, coll.find().itcount() );
+conn.getDB('test2').foo.insert({a:1});
+assert.eq( 1, conn.getDB('test2').foo.find().itcount() );
+
+jsTestLog( "finishing!" )
+st.stop()