diff options
author | matt dannenberg <matt.dannenberg@10gen.com> | 2014-04-07 09:08:08 -0400 |
---|---|---|
committer | Matt Kangas <matt.kangas@mongodb.com> | 2014-04-10 11:48:15 -0400 |
commit | be1905c24c7e5ea258e537fbf0d2c502c4fc6de2 (patch) | |
tree | 5756f7b3671def6ec6192f10e6d546a14d230329 | |
parent | a280cb5a78e94229598c3a540a933e18037dbf5c (diff) | |
download | mongo-be1905c24c7e5ea258e537fbf0d2c502c4fc6de2.tar.gz |
SERVER-13500 prevent syncSourceFeedback segfault by not allowing NULL members to be added to _members map
(cherry picked from commit ba3823f2a7c08a022bebbe8accebba8893582e09)
-rw-r--r-- | jstests/replsets/remove_node_without_shutdown.js | 33 | ||||
-rw-r--r-- | src/mongo/db/client.cpp | 8 | ||||
-rw-r--r-- | src/mongo/db/client.h | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/replset_commands.cpp | 11 | ||||
-rw-r--r-- | src/mongo/db/repl/rs.cpp | 13 | ||||
-rw-r--r-- | src/mongo/db/repl/rs.h | 2 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_source_feedback.cpp | 5 | ||||
-rw-r--r-- | src/mongo/db/repl/sync_source_feedback.h | 4 |
8 files changed, 65 insertions, 13 deletions
diff --git a/jstests/replsets/remove_node_without_shutdown.js b/jstests/replsets/remove_node_without_shutdown.js new file mode 100644 index 00000000000..be3c051944b --- /dev/null +++ b/jstests/replsets/remove_node_without_shutdown.js @@ -0,0 +1,33 @@ +// Ensure removing a node without shutting it down does not cause a segfault (SERVER-13500) + +// start a three node set +var name = "removeWithoutShutdown"; +var replTest = new ReplSetTest({name: name, nodes: 3}); +var nodes = replTest.startSet(); +replTest.initiate(); + +// remove node 3 from node 2's config by editing the document in system.replset +nodes[1].getDB("local").system.replset.update({}, {$pop: {members: 1}}); +// restart to load that version of the config +replTest.stop(1); +replTest.restart(1); +// assert that a member is gone from the config +assert.eq(2, nodes[1].getDB("local").system.replset.findOne().members.length, + "incorrect number of nodes in config after reconfig"); + +// cause node 3 to sync from node 2 +replTest.awaitSecondaryNodes(); +assert.soon(function () { + nodes[2].getDB("admin").runCommand({replSetSyncFrom: nodes[1].name}); + return replTest.status().members[2].syncingTo === nodes[1].name; +}); +// this sleep is to wait for the actual connection and the problems it would cause +// sadly the above assert.soon does not suffice +sleep(2000); + +// ensure node 2 is still up and happy (pre SERVER-13500 it would fall over and be sad) +assert.soon(function () { + return nodes[1].getDB("local").system.replset.findOne().members.length == 2; +}); + +replTest.stopSet(); diff --git a/src/mongo/db/client.cpp b/src/mongo/db/client.cpp index 98120cbcd7b..cec9ecc6e19 100644 --- a/src/mongo/db/client.cpp +++ b/src/mongo/db/client.cpp @@ -410,7 +410,7 @@ namespace mongo { return c->toString(); } - void Client::gotHandshake( const BSONObj& o ) { + bool Client::gotHandshake( const BSONObj& o ) { BSONObjIterator i(o); { @@ -427,9 +427,11 @@ namespace mongo { _handshake = b.obj(); - if (theReplSet && o.hasField("member")) { - theReplSet->registerSlave(_remoteId, o["member"].Int()); + if (!theReplSet || !o.hasField("member")) { + return false; } + + return theReplSet->registerSlave(_remoteId, o["member"].Int()); } bool ClientBasic::hasCurrent() { diff --git a/src/mongo/db/client.h b/src/mongo/db/client.h index aa744743e15..9c7f594f8aa 100644 --- a/src/mongo/db/client.h +++ b/src/mongo/db/client.h @@ -109,7 +109,7 @@ namespace mongo { bool isGod() const { return _god; } /* this is for map/reduce writes */ bool setGod(bool newVal) { const bool prev = _god; _god = newVal; return prev; } string toString() const; - void gotHandshake( const BSONObj& o ); + bool gotHandshake( const BSONObj& o ); BSONObj getRemoteID() const { return _remoteId; } BSONObj getHandshake() const { return _handshake; } ConnectionId getConnectionId() const { return _connectionId; } diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp index 0331f5f6af5..b0b8048976c 100644 --- a/src/mongo/db/repl/replset_commands.cpp +++ b/src/mongo/db/repl/replset_commands.cpp @@ -431,7 +431,10 @@ namespace mongo { if (cmdObj.hasField("handshake")) { // we have received a handshake, not an update message // handshakes are done here to ensure the receiving end supports the update command - cc().gotHandshake(cmdObj["handshake"].embeddedObject()); + if (!cc().gotHandshake(cmdObj["handshake"].embeddedObject())) { + errmsg = "node could not be found in replica set config during handshake"; + return false; + } // if we aren't primary, pass the handshake along if (!theReplSet->isPrimary() && theReplSet->syncSourceFeedback.supportsUpdater()) { theReplSet->syncSourceFeedback.forwardSlaveHandshake(); @@ -442,7 +445,11 @@ namespace mongo { uassert(16888, "optimes field should be an array with an object for each secondary", cmdObj["optimes"].type() == Array); BSONArray newTimes = BSONArray(cmdObj["optimes"].Obj()); - return updateSlaveLocations(newTimes); + if (!updateSlaveLocations(newTimes)) { + errmsg = "could not update position upstream; will retry"; + return false; + } + return true; } } cmdReplSetUpdatePosition; diff --git a/src/mongo/db/repl/rs.cpp b/src/mongo/db/repl/rs.cpp index 775a4ed76a5..0e733b7f489 100644 --- a/src/mongo/db/repl/rs.cpp +++ b/src/mongo/db/repl/rs.cpp @@ -1029,14 +1029,23 @@ namespace { return OpTime(); } - void ReplSetImpl::registerSlave(const BSONObj& rid, const int memberId) { + bool ReplSetImpl::registerSlave(const BSONObj& rid, const int memberId) { // To prevent race conditions with clearing the cache at reconfig time, // we lock the replset mutex here. + Member* member = NULL; { lock lk(this); ghost->associateSlave(rid, memberId); + member = getMutableMember(memberId); } - syncSourceFeedback.associateMember(rid, memberId); + + // it is possible that a node that was removed in a reconfig tried to handshake this node + // in that case, the Member will no longer be in the _members List and member will be NULL + if (!member) { + return false; + } + syncSourceFeedback.associateMember(rid, member); + return true; } class ReplIndexPrefetch : public ServerParameter { diff --git a/src/mongo/db/repl/rs.h b/src/mongo/db/repl/rs.h index a066ab6ae9c..46bed5b53a1 100644 --- a/src/mongo/db/repl/rs.h +++ b/src/mongo/db/repl/rs.h @@ -545,7 +545,7 @@ namespace mongo { bool setMaintenanceMode(const bool inc); // Records a new slave's id in the GhostSlave map, at handshake time. - void registerSlave(const BSONObj& rid, const int memberId); + bool registerSlave(const BSONObj& rid, const int memberId); private: Member* head() const { return _members.head(); } public: diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp index 8e40774efd4..57ea73731df 100644 --- a/src/mongo/db/repl/sync_source_feedback.cpp +++ b/src/mongo/db/repl/sync_source_feedback.cpp @@ -43,11 +43,12 @@ namespace mongo { // used in replAuthenticate static const BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}"); - void SyncSourceFeedback::associateMember(const BSONObj& id, const int memberId) { + void SyncSourceFeedback::associateMember(const BSONObj& id, Member* member) { + invariant(member); const OID rid = id["_id"].OID(); boost::unique_lock<boost::mutex> lock(_mtx); _handshakeNeeded = true; - _members[rid] = theReplSet->getMutableMember(memberId); + _members[rid] = member; _cond.notify_all(); } diff --git a/src/mongo/db/repl/sync_source_feedback.h b/src/mongo/db/repl/sync_source_feedback.h index 859e3efef66..1cd4d703f2d 100644 --- a/src/mongo/db/repl/sync_source_feedback.h +++ b/src/mongo/db/repl/sync_source_feedback.h @@ -50,8 +50,8 @@ namespace mongo { delete _oplogReader; } - /// Adds an entry to _member for a secondary that has connected to us. - void associateMember(const BSONObj& id, const int memberId); + /// Adds an entry to _members for a secondary that has connected to us. + void associateMember(const BSONObj& id, Member* member); /// Ensures local.me is populated and populates it if not. void ensureMe(); |