summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormatt dannenberg <matt.dannenberg@10gen.com>2014-04-07 09:08:08 -0400
committerMatt Kangas <matt.kangas@mongodb.com>2014-04-10 11:48:15 -0400
commitbe1905c24c7e5ea258e537fbf0d2c502c4fc6de2 (patch)
tree5756f7b3671def6ec6192f10e6d546a14d230329
parenta280cb5a78e94229598c3a540a933e18037dbf5c (diff)
downloadmongo-be1905c24c7e5ea258e537fbf0d2c502c4fc6de2.tar.gz
SERVER-13500 prevent syncSourceFeedback segfault by not allowing NULL members to be added to _members map
(cherry picked from commit ba3823f2a7c08a022bebbe8accebba8893582e09)
-rw-r--r--jstests/replsets/remove_node_without_shutdown.js33
-rw-r--r--src/mongo/db/client.cpp8
-rw-r--r--src/mongo/db/client.h2
-rw-r--r--src/mongo/db/repl/replset_commands.cpp11
-rw-r--r--src/mongo/db/repl/rs.cpp13
-rw-r--r--src/mongo/db/repl/rs.h2
-rw-r--r--src/mongo/db/repl/sync_source_feedback.cpp5
-rw-r--r--src/mongo/db/repl/sync_source_feedback.h4
8 files changed, 65 insertions, 13 deletions
diff --git a/jstests/replsets/remove_node_without_shutdown.js b/jstests/replsets/remove_node_without_shutdown.js
new file mode 100644
index 00000000000..be3c051944b
--- /dev/null
+++ b/jstests/replsets/remove_node_without_shutdown.js
@@ -0,0 +1,33 @@
+// Ensure removing a node without shutting it down does not cause a segfault (SERVER-13500)
+
+// start a three node set
+var name = "removeWithoutShutdown";
+var replTest = new ReplSetTest({name: name, nodes: 3});
+var nodes = replTest.startSet();
+replTest.initiate();
+
+// remove node 3 from node 2's config by editing the document in system.replset
+nodes[1].getDB("local").system.replset.update({}, {$pop: {members: 1}});
+// restart to load that version of the config
+replTest.stop(1);
+replTest.restart(1);
+// assert that a member is gone from the config
+assert.eq(2, nodes[1].getDB("local").system.replset.findOne().members.length,
+ "incorrect number of nodes in config after reconfig");
+
+// cause node 3 to sync from node 2
+replTest.awaitSecondaryNodes();
+assert.soon(function () {
+ nodes[2].getDB("admin").runCommand({replSetSyncFrom: nodes[1].name});
+ return replTest.status().members[2].syncingTo === nodes[1].name;
+});
+// this sleep is to wait for the actual connection and the problems it would cause
+// sadly the above assert.soon does not suffice
+sleep(2000);
+
+// ensure node 2 is still up and happy (pre SERVER-13500 it would fall over and be sad)
+assert.soon(function () {
+ return nodes[1].getDB("local").system.replset.findOne().members.length == 2;
+});
+
+replTest.stopSet();
diff --git a/src/mongo/db/client.cpp b/src/mongo/db/client.cpp
index 98120cbcd7b..cec9ecc6e19 100644
--- a/src/mongo/db/client.cpp
+++ b/src/mongo/db/client.cpp
@@ -410,7 +410,7 @@ namespace mongo {
return c->toString();
}
- void Client::gotHandshake( const BSONObj& o ) {
+ bool Client::gotHandshake( const BSONObj& o ) {
BSONObjIterator i(o);
{
@@ -427,9 +427,11 @@ namespace mongo {
_handshake = b.obj();
- if (theReplSet && o.hasField("member")) {
- theReplSet->registerSlave(_remoteId, o["member"].Int());
+ if (!theReplSet || !o.hasField("member")) {
+ return false;
}
+
+ return theReplSet->registerSlave(_remoteId, o["member"].Int());
}
bool ClientBasic::hasCurrent() {
diff --git a/src/mongo/db/client.h b/src/mongo/db/client.h
index aa744743e15..9c7f594f8aa 100644
--- a/src/mongo/db/client.h
+++ b/src/mongo/db/client.h
@@ -109,7 +109,7 @@ namespace mongo {
bool isGod() const { return _god; } /* this is for map/reduce writes */
bool setGod(bool newVal) { const bool prev = _god; _god = newVal; return prev; }
string toString() const;
- void gotHandshake( const BSONObj& o );
+ bool gotHandshake( const BSONObj& o );
BSONObj getRemoteID() const { return _remoteId; }
BSONObj getHandshake() const { return _handshake; }
ConnectionId getConnectionId() const { return _connectionId; }
diff --git a/src/mongo/db/repl/replset_commands.cpp b/src/mongo/db/repl/replset_commands.cpp
index 0331f5f6af5..b0b8048976c 100644
--- a/src/mongo/db/repl/replset_commands.cpp
+++ b/src/mongo/db/repl/replset_commands.cpp
@@ -431,7 +431,10 @@ namespace mongo {
if (cmdObj.hasField("handshake")) {
// we have received a handshake, not an update message
// handshakes are done here to ensure the receiving end supports the update command
- cc().gotHandshake(cmdObj["handshake"].embeddedObject());
+ if (!cc().gotHandshake(cmdObj["handshake"].embeddedObject())) {
+ errmsg = "node could not be found in replica set config during handshake";
+ return false;
+ }
// if we aren't primary, pass the handshake along
if (!theReplSet->isPrimary() && theReplSet->syncSourceFeedback.supportsUpdater()) {
theReplSet->syncSourceFeedback.forwardSlaveHandshake();
@@ -442,7 +445,11 @@ namespace mongo {
uassert(16888, "optimes field should be an array with an object for each secondary",
cmdObj["optimes"].type() == Array);
BSONArray newTimes = BSONArray(cmdObj["optimes"].Obj());
- return updateSlaveLocations(newTimes);
+ if (!updateSlaveLocations(newTimes)) {
+ errmsg = "could not update position upstream; will retry";
+ return false;
+ }
+ return true;
}
} cmdReplSetUpdatePosition;
diff --git a/src/mongo/db/repl/rs.cpp b/src/mongo/db/repl/rs.cpp
index 775a4ed76a5..0e733b7f489 100644
--- a/src/mongo/db/repl/rs.cpp
+++ b/src/mongo/db/repl/rs.cpp
@@ -1029,14 +1029,23 @@ namespace {
return OpTime();
}
- void ReplSetImpl::registerSlave(const BSONObj& rid, const int memberId) {
+ bool ReplSetImpl::registerSlave(const BSONObj& rid, const int memberId) {
// To prevent race conditions with clearing the cache at reconfig time,
// we lock the replset mutex here.
+ Member* member = NULL;
{
lock lk(this);
ghost->associateSlave(rid, memberId);
+ member = getMutableMember(memberId);
}
- syncSourceFeedback.associateMember(rid, memberId);
+
+ // it is possible that a node that was removed in a reconfig tried to handshake this node
+ // in that case, the Member will no longer be in the _members List and member will be NULL
+ if (!member) {
+ return false;
+ }
+ syncSourceFeedback.associateMember(rid, member);
+ return true;
}
class ReplIndexPrefetch : public ServerParameter {
diff --git a/src/mongo/db/repl/rs.h b/src/mongo/db/repl/rs.h
index a066ab6ae9c..46bed5b53a1 100644
--- a/src/mongo/db/repl/rs.h
+++ b/src/mongo/db/repl/rs.h
@@ -545,7 +545,7 @@ namespace mongo {
bool setMaintenanceMode(const bool inc);
// Records a new slave's id in the GhostSlave map, at handshake time.
- void registerSlave(const BSONObj& rid, const int memberId);
+ bool registerSlave(const BSONObj& rid, const int memberId);
private:
Member* head() const { return _members.head(); }
public:
diff --git a/src/mongo/db/repl/sync_source_feedback.cpp b/src/mongo/db/repl/sync_source_feedback.cpp
index 8e40774efd4..57ea73731df 100644
--- a/src/mongo/db/repl/sync_source_feedback.cpp
+++ b/src/mongo/db/repl/sync_source_feedback.cpp
@@ -43,11 +43,12 @@ namespace mongo {
// used in replAuthenticate
static const BSONObj userReplQuery = fromjson("{\"user\":\"repl\"}");
- void SyncSourceFeedback::associateMember(const BSONObj& id, const int memberId) {
+ void SyncSourceFeedback::associateMember(const BSONObj& id, Member* member) {
+ invariant(member);
const OID rid = id["_id"].OID();
boost::unique_lock<boost::mutex> lock(_mtx);
_handshakeNeeded = true;
- _members[rid] = theReplSet->getMutableMember(memberId);
+ _members[rid] = member;
_cond.notify_all();
}
diff --git a/src/mongo/db/repl/sync_source_feedback.h b/src/mongo/db/repl/sync_source_feedback.h
index 859e3efef66..1cd4d703f2d 100644
--- a/src/mongo/db/repl/sync_source_feedback.h
+++ b/src/mongo/db/repl/sync_source_feedback.h
@@ -50,8 +50,8 @@ namespace mongo {
delete _oplogReader;
}
- /// Adds an entry to _member for a secondary that has connected to us.
- void associateMember(const BSONObj& id, const int memberId);
+ /// Adds an entry to _members for a secondary that has connected to us.
+ void associateMember(const BSONObj& id, Member* member);
/// Ensures local.me is populated and populates it if not.
void ensureMe();