diff options
author | Kevin Pulo <kevin.pulo@mongodb.com> | 2015-05-07 14:43:27 +1000 |
---|---|---|
committer | Spencer T Brody <spencer@mongodb.com> | 2015-05-07 11:07:20 -0400 |
commit | 7c51c3a17457f46aa55c4c419c15add471d4e232 (patch) | |
tree | dd48f58ef136a25a29da9db1a93d067e472dc3c8 | |
parent | bb112233c8b7717a8f83501fc8d992c07731f817 (diff) | |
download | mongo-7c51c3a17457f46aa55c4c419c15add471d4e232.tar.gz |
SERVER-18096: don't try to reuse closed socket fds
Signed-off-by: Spencer T Brody <spencer@mongodb.com>
-rw-r--r-- | jstests/sharding/shard_primary_relinquish_migrate.js | 137 | ||||
-rw-r--r-- | src/mongo/util/net/sock.cpp | 6 |
2 files changed, 143 insertions, 0 deletions
diff --git a/jstests/sharding/shard_primary_relinquish_migrate.js b/jstests/sharding/shard_primary_relinquish_migrate.js new file mode 100644 index 00000000000..1c476888656 --- /dev/null +++ b/jstests/sharding/shard_primary_relinquish_migrate.js @@ -0,0 +1,137 @@ +/* + * Test that migrations still succeed after a shard primary steps down and is + * re-elected again. + * + * This test is composed of 3 sub-tests: + * - When the "from" shard has stepped down and been re-elected. + * - (*) When the "to" shard has stepped down and been re-elected. + * - (*) When both the "from" and "to" shards have stepped down and been re-elected. + * + * (*) Not in v2.6, due to SERVER-15022. + * + * Each sub-test does: + * - Setup basic sharded collection, 2 shards, 2 chunks. + * - Force a brief stepdown and re-election of the "from" and/or "to" shard primary. + * - Migrate a chunk. + */ + +(function() { +"use strict"; + +var testShardPrimaryRelinquishMigrate = function(opts) { + jsTestLog("START testShardPrimaryRelinquishMigrate(" + tojson(opts) + ")"); + + // The shards need to have specific priorities in the replset configs. + // Hence, create the ShardingTest with no shards, and then manually add + // each shard (with the correct replset config). + var st = new ShardingTest({ shards: [], mongos: 1, config: 3, + other: { smallfiles: true } }); + + var mongos = st.s0; + var admin = mongos.getDB( "admin" ); + + var doAddShard = function (shardNum) { + var rs = new ReplSetTest({ name: "shard" + shardNum, + nodes: 2, + startPort: 31100 + ( shardNum * 100 ), + useHostName: false, + shardSvr: true }); + rs.startSet(); + var cfg = rs.getReplSetConfig(); + cfg.members[1].priority = 0; + rs.initiate(cfg); + assert.commandWorked( admin.runCommand( { addShard: rs.getURL() } ) ); + st["rs" + shardNum] = rs; + }; + doAddShard(0); + doAddShard(1); + + var shards = mongos.getCollection( "config.shards" ).find().toArray(); + var databases = mongos.getCollection( "config.databases" ); + var coll = mongos.getCollection( "foo.bar" ); + var collName = coll.getFullName(); + var dbName = coll.getDB().getName(); + + st.stopBalancer(); + + assert.commandWorked( admin.runCommand({ enableSharding: dbName }) ); + var dbRecord = databases.find({_id: dbName}).next(); + assert(dbRecord && "primary" in dbRecord); + if (dbRecord.primary != shards[0]._id) { + assert.commandWorked( admin.runCommand({ movePrimary: dbName, + to: shards[0]._id }) ); + } + assert.commandWorked( admin.runCommand({ shardCollection: collName, + key: { _id: 1 } }) ); + assert.commandWorked( admin.runCommand({ split: collName, + middle: { _id: 0 } }) ); + + coll.insert({_id: -1}); + coll.insert({_id: 1}); + assert.eq( coll.count(), 2 ); + + // Move chunk there and back to initialise sharding (and this sharded + // collection) on both shards. + assert.commandWorked( admin.runCommand({ moveChunk: collName, + find: { _id: 0 }, + to: shards[1]._id, + _waitForDelete: true }) ); + + assert.commandWorked( admin.runCommand({ moveChunk: collName, + find: { _id: 0 }, + to: shards[0]._id, + _waitForDelete: true }) ); + + // Force the primary to step down briefly, and wait for it to come back + // (since the other member is priority:0). + var bounce = function(name, rs) { + jsTestLog("START bouncing " + name + " shard"); + var primary = rs.getPrimary(); + assert(primary, "rs.getPrimary() failed"); + var res; + try { + // replSetStepDown should cause an exception + // (when the primary drops the connection). + res = primary.adminCommand({ replSetStepDown: 1, + secondaryCatchUpPeriodSecs: 0, + force: true }); + } catch(e) { + print("Expected exception for replSetStepdown: " + e); + } + // Check if replSetStepDown has returned anything + // (rather than throwing the expected exception). + if (typeof(res) != "undefined") { + throw("Unexpected return from replSetStepDown: " + tojson(res)); + } + rs.waitForMaster(); + jsTestLog("END bouncing " + name + " shard"); + }; + + if (opts.bounceFrom) { + bounce("FROM", st.rs0); + } + + if (opts.bounceTo) { + bounce("TO", st.rs1); + } + + jsTestLog("START migration"); + assert.commandWorked( admin.runCommand({ moveChunk: collName, + find: { _id: 0 }, + to: shards[1]._id, + _waitForDelete: true }) ); + jsTestLog("END migration"); + + st.stop(); + + jsTestLog("END testShardPrimaryRelinquishMigrate(" + tojson(opts) + ")"); +}; + +testShardPrimaryRelinquishMigrate({ bounceFrom: true, bounceTo: false }); + +// These two sub-tests are disabled in v2.6, because bouncing the To shard +// fails due to SERVER-15022 (which hasn't been backported). +//testShardPrimaryRelinquishMigrate({ bounceFrom: false, bounceTo: true }); +//testShardPrimaryRelinquishMigrate({ bounceFrom: true, bounceTo: true }); + +}()); diff --git a/src/mongo/util/net/sock.cpp b/src/mongo/util/net/sock.cpp index 73c4a0a9c6f..aa08d89df53 100644 --- a/src/mongo/util/net/sock.cpp +++ b/src/mongo/util/net/sock.cpp @@ -790,6 +790,12 @@ namespace mongo { // isStillConnected() polls the socket at max every Socket::errorPollIntervalSecs to determine // if any disconnection-type events have happened on the socket. bool Socket::isStillConnected() { + if (_fd == -1) { + // According to the man page, poll will respond with POLLVNAL for invalid or + // unopened descriptors, but it doesn't seem to be properly implemented in + // some platforms - it can return 0 events and 0 for revent. Hence this workaround. + return false; + } if ( errorPollIntervalSecs < 0 ) return true; if ( ! isPollSupported() ) return true; // nothing we can do |