diff options
author | Kristina <kristina@10gen.com> | 2011-10-05 16:55:55 -0400 |
---|---|---|
committer | Kristina <kristina@10gen.com> | 2011-10-24 15:00:33 -0400 |
commit | e65e110553c725939f0706ca42a952855fea388b (patch) | |
tree | d8e2369841dfeafe5bb6d4b930d3173833599fa8 | |
parent | f74345924b6e2381f351337312401539d3156591 (diff) | |
download | mongo-e65e110553c725939f0706ca42a952855fea388b.tar.gz |
Make secondaries go into recovering state when auth is wrong SERVER-3715
-rw-r--r-- | db/repl/health.cpp | 5 | ||||
-rw-r--r-- | db/repl/heartbeat.cpp | 225 | ||||
-rw-r--r-- | db/repl/manager.cpp | 36 | ||||
-rw-r--r-- | db/repl/rs.cpp | 1 | ||||
-rw-r--r-- | db/repl/rs.h | 6 | ||||
-rw-r--r-- | db/repl/rs_member.h | 4 | ||||
-rw-r--r-- | db/repl/rs_sync.cpp | 11 | ||||
-rw-r--r-- | jstests/replsets/auth2.js | 98 | ||||
-rwxr-xr-x | shell/servers.js | 6 |
9 files changed, 303 insertions, 89 deletions
diff --git a/db/repl/health.cpp b/db/repl/health.cpp index 711b457dd8c..7e5a39f1ddb 100644 --- a/db/repl/health.cpp +++ b/db/repl/health.cpp @@ -402,6 +402,11 @@ namespace mongo { string s = m->lhb(); if( !s.empty() ) bb.append("errmsg", s); + + if (m->hbinfo().authIssue) { + bb.append("authenticated", false); + } + v.push_back(bb.obj()); m = m->next(); } diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp index 7d3f78c73b5..a1780c38740 100644 --- a/db/repl/heartbeat.cpp +++ b/db/repl/heartbeat.cpp @@ -51,11 +51,14 @@ namespace mongo { /* { replSetHeartbeat : <setname> } */ class CmdReplSetHeartbeat : public ReplSetCommand { public: - virtual bool adminOnly() const { return false; } CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { } virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { - if( replSetBlind ) + if( replSetBlind ) { + if (theReplSet) { + errmsg = str::stream() << theReplSet->selfFullName() << " is blind"; + } return false; + } /* we don't call ReplSetCommand::check() here because heartbeat checks many things that are pre-initialization. */ @@ -123,32 +126,54 @@ namespace mongo { } } cmdReplSetHeartbeat; - /* throws dbexception */ - bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result, int myCfgVersion, int& theirCfgVersion, bool checkEmpty) { + bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result, + int myCfgVersion, int& theirCfgVersion, bool checkEmpty) { if( replSetBlind ) { - //sleepmillis( rand() ); return false; } - BSONObj cmd = BSON( "replSetHeartbeat" << setName << "v" << myCfgVersion << "pv" << 1 << "checkEmpty" << checkEmpty << "from" << from ); - - // we might be talking to ourself - generally not a great idea to do outbound waiting calls in a write lock - assert( !dbMutex.isWriteLocked() ); + BSONObj cmd = BSON( "replSetHeartbeat" << setName << + "v" << myCfgVersion << + "pv" << 1 << + "checkEmpty" << checkEmpty << + "from" << from ); - // these are slow (multisecond to respond), so generally we don't want to be locked, at least not without + // generally not a great idea to do outbound waiting calls in a + // write lock. heartbeats can be slow (multisecond to respond), so + // generally we don't want to be locked, at least not without // thinking acarefully about it first. - assert( theReplSet == 0 || !theReplSet->lockedByMe() ); + uassert(15900, "can't heartbeat: too much lock", + !dbMutex.isWriteLocked() || theReplSet == 0 || !theReplSet->lockedByMe() ); ScopedConn conn(memberFullName); return conn.runCommand("admin", cmd, result, 0); } - /* poll every other set member to check its status */ + /** + * Poll every other set member to check its status. + * + * A detail about local machines and authentication: suppose we have 2 + * members, A and B, on the same machine using different keyFiles. A is + * primary. If we're just starting the set, there are no admin users, so A + * and B can access each other because it's local access. + * + * Then we add a user to A. B cannot sync this user from A, because as soon + * as we add a an admin user, A requires auth. However, A can still + * heartbeat B, because B *doesn't* have an admin user. So A can reach B + * but B cannot reach A. + * + * Once B is restarted with the correct keyFile, everything should work as + * expected. + */ class ReplSetHealthPollTask : public task::Task { + private: HostAndPort h; HeartbeatInfo m; + int tries; + const int threshold; public: - ReplSetHealthPollTask(const HostAndPort& hh, const HeartbeatInfo& mm) : h(hh), m(mm) { } + ReplSetHealthPollTask(const HostAndPort& hh, const HeartbeatInfo& mm) + : h(hh), m(mm), tries(0), threshold(15) { } string name() const { return "rsHealthPoll"; } void doWork() { @@ -163,16 +188,7 @@ namespace mongo { BSONObj info; int theirConfigVersion = -10000; - Timer timer; - - bool ok = requestHeartbeat(theReplSet->name(), theReplSet->selfFullName(), h.toString(), info, theReplSet->config().version, theirConfigVersion); - - mem.ping = (unsigned int)timer.millis(); - - time_t before = timer.startTime() / 1000000; - // we set this on any response - we don't get this far if - // couldn't connect because exception is thrown - time_t after = mem.lastHeartbeat = before + (mem.ping / 1000); + bool ok = _requestHeartbeat(mem, info, theirConfigVersion); // weight new ping with old pings // on the first ping, just use the ping value @@ -180,68 +196,12 @@ namespace mongo { mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2)); } - if ( info["time"].isNumber() ) { - long long t = info["time"].numberLong(); - if( t > after ) - mem.skew = (int) (t - after); - else if( t < before ) - mem.skew = (int) (t - before); // negative - } - else { - // it won't be there if remote hasn't initialized yet - if( info.hasElement("time") ) - warning() << "heatbeat.time isn't a number: " << info << endl; - mem.skew = INT_MIN; - } - - { - be state = info["state"]; - if( state.ok() ) - mem.hbstate = MemberState(state.Int()); - } if( ok ) { - HeartbeatInfo::numPings++; - - if( mem.upSince == 0 ) { - log() << "replSet info member " << h.toString() << " is up" << rsLog; - mem.upSince = mem.lastHeartbeat; - } - mem.health = 1.0; - mem.lastHeartbeatMsg = info["hbmsg"].String(); - if( info.hasElement("opTime") ) - mem.opTime = info["opTime"].Date(); - - // see if this member is in the electable set - if( info["e"].eoo() ) { - // for backwards compatibility - const Member *member = theReplSet->findById(mem.id()); - if (member && member->config().potentiallyHot()) { - theReplSet->addToElectable(mem.id()); - } - else { - theReplSet->rmFromElectable(mem.id()); - } - } - // add this server to the electable set if it is within 10 - // seconds of the latest optime we know of - else if( info["e"].trueValue() && - mem.opTime >= theReplSet->lastOpTimeWritten.getSecs() - 10) { - unsigned lastOp = theReplSet->lastOtherOpTime().getSecs(); - if (lastOp > 0 && mem.opTime >= lastOp - 10) { - theReplSet->addToElectable(mem.id()); - } - } - else { - theReplSet->rmFromElectable(mem.id()); - } - - be cfg = info["config"]; - if( cfg.ok() ) { - // received a new config - boost::function<void()> f = - boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy()); - theReplSet->mgr->send(f); - } + up(info, mem); + } + else if (!info["errmsg"].eoo() && + info["errmsg"].str() == "need to login") { + authIssue(mem); } else { down(mem, info.getStringField("errmsg")); @@ -271,7 +231,58 @@ namespace mongo { } private: + bool _requestHeartbeat(HeartbeatInfo& mem, BSONObj& info, int& theirConfigVersion) { + if (tries++ % threshold == (threshold - 1)) { + ScopedConn conn(h.toString()); + conn.reconnect(); + } + + Timer timer; + + bool ok = requestHeartbeat(theReplSet->name(), theReplSet->selfFullName(), + h.toString(), info, theReplSet->config().version, theirConfigVersion); + + mem.ping = (unsigned int)timer.millis(); + + time_t before = timer.startTime() / 1000000; + // we set this on any response - we don't get this far if + // couldn't connect because exception is thrown + time_t after = mem.lastHeartbeat = before + (mem.ping / 1000); + + if ( info["time"].isNumber() ) { + long long t = info["time"].numberLong(); + if( t > after ) + mem.skew = (int) (t - after); + else if( t < before ) + mem.skew = (int) (t - before); // negative + } + else { + // it won't be there if remote hasn't initialized yet + if( info.hasElement("time") ) + warning() << "heatbeat.time isn't a number: " << info << endl; + mem.skew = INT_MIN; + } + + { + be state = info["state"]; + if( state.ok() ) + mem.hbstate = MemberState(state.Int()); + } + + return ok; + } + + void authIssue(HeartbeatInfo& mem) { + mem.authIssue = true; + mem.hbstate = MemberState::RS_UNKNOWN; + + // set health to 0 so that this doesn't count towards majority + mem.health = 0.0; + theReplSet->rmFromElectable(mem.id()); + } + void down(HeartbeatInfo& mem, string msg) { + mem.authIssue = false; mem.health = 0.0; mem.ping = 0; if( mem.upSince || mem.downSince == 0 ) { @@ -283,6 +294,52 @@ namespace mongo { mem.lastHeartbeatMsg = msg; theReplSet->rmFromElectable(mem.id()); } + + void up(const BSONObj& info, HeartbeatInfo& mem) { + HeartbeatInfo::numPings++; + mem.authIssue = false; + + if( mem.upSince == 0 ) { + log() << "replSet member " << h.toString() << " is up" << rsLog; + mem.upSince = mem.lastHeartbeat; + } + mem.health = 1.0; + mem.lastHeartbeatMsg = info["hbmsg"].String(); + if( info.hasElement("opTime") ) + mem.opTime = info["opTime"].Date(); + + // see if this member is in the electable set + if( info["e"].eoo() ) { + // for backwards compatibility + const Member *member = theReplSet->findById(mem.id()); + if (member && member->config().potentiallyHot()) { + theReplSet->addToElectable(mem.id()); + } + else { + theReplSet->rmFromElectable(mem.id()); + } + } + // add this server to the electable set if it is within 10 + // seconds of the latest optime we know of + else if( info["e"].trueValue() && + mem.opTime >= theReplSet->lastOpTimeWritten.getSecs() - 10) { + unsigned lastOp = theReplSet->lastOtherOpTime().getSecs(); + if (lastOp > 0 && mem.opTime >= lastOp - 10) { + theReplSet->addToElectable(mem.id()); + } + } + else { + theReplSet->rmFromElectable(mem.id()); + } + + be cfg = info["config"]; + if( cfg.ok() ) { + // received a new config + boost::function<void()> f = + boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy()); + theReplSet->mgr->send(f); + } + } }; void ReplSetImpl::endOldHealthTasks() { diff --git a/db/repl/manager.cpp b/db/repl/manager.cpp index 3c4c0ebf94c..c91adc322a1 100644 --- a/db/repl/manager.cpp +++ b/db/repl/manager.cpp @@ -119,6 +119,39 @@ namespace mongo { } } + void Manager::checkAuth() { + int down = 0, authIssue = 0, total = 0; + + for( Member *m = rs->head(); m; m=m->next() ) { + total++; + + // all authIssue servers will also be not up + if (!m->hbinfo().up()) { + down++; + if (m->hbinfo().authIssue) { + authIssue++; + } + } + } + + // if all nodes are down or failed auth AND at least one failed + // auth, go into recovering. If all nodes are down, stay a + // secondary. + if (authIssue > 0 && down == total) { + log() << "replset error could not reach/authenticate against any members" << endl; + + if (rs->box.getPrimary() == rs->_self) { + log() << "auth problems, relinquishing primary" << rsLog; + rs->relinquish(); + } + + rs->blockSync(true); + } + else { + rs->blockSync(false); + } + } + /** called as the health threads get new results */ void Manager::msgCheckNewState() { { @@ -130,7 +163,8 @@ namespace mongo { if( busyWithElectSelf ) return; checkElectableSet(); - + checkAuth(); + const Member *p = rs->box.getPrimary(); if( p && p != rs->_self ) { if( !p->hbinfo().up() || diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp index 1fbbc103747..f827291c3f3 100644 --- a/db/repl/rs.cpp +++ b/db/repl/rs.cpp @@ -329,6 +329,7 @@ namespace mongo { ReplSetImpl::ReplSetImpl(ReplSetCmdline& replSetCmdline) : elect(this), _currentSyncTarget(0), + _blockSync(false), _hbmsgTime(0), _self(0), _maintenanceMode(0), diff --git a/db/repl/rs.h b/db/repl/rs.h index ba6a0c90829..2b3ea9be425 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -93,6 +93,7 @@ namespace mongo { void noteARemoteIsPrimary(const Member *); void checkElectableSet(); + void checkAuth(); virtual void starting(); public: Manager(ReplSetImpl *rs); @@ -348,6 +349,9 @@ namespace mongo { const Member* getMemberToSyncTo(); Member* _currentSyncTarget; + bool _blockSync; + void blockSync(bool block); + // set of electable members' _ids set<unsigned> _electableSet; protected: @@ -577,7 +581,7 @@ namespace mongo { * that still need to be checked for auth. */ bool checkAuth(string& errmsg, BSONObjBuilder& result) { - if( !noauth && adminOnly() ) { + if( !noauth ) { AuthenticationInfo *ai = cc().getAuthenticationInfo(); if (!ai->isAuthorizedForLock("admin", locktype())) { errmsg = "replSet command unauthorized"; diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h index d60bb5261e9..3455805e045 100644 --- a/db/repl/rs_member.h +++ b/db/repl/rs_member.h @@ -69,7 +69,8 @@ namespace mongo { class HeartbeatInfo { unsigned _id; public: - HeartbeatInfo() : _id(0xffffffff),hbstate(MemberState::RS_UNKNOWN),health(-1.0),downSince(0),skew(INT_MIN) { } + HeartbeatInfo() : _id(0xffffffff), hbstate(MemberState::RS_UNKNOWN), health(-1.0), + downSince(0), skew(INT_MIN), authIssue(false) { } HeartbeatInfo(unsigned id); unsigned id() const { return _id; } MemberState hbstate; @@ -80,6 +81,7 @@ namespace mongo { DiagStr lastHeartbeatMsg; OpTime opTime; int skew; + bool authIssue; unsigned int ping; // milliseconds static unsigned int numPings; diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index b65a624e1af..a3f57514337 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -501,7 +501,7 @@ namespace mongo { sleepsecs(1); return; } - if( sp.state.fatal() || sp.state.startup() ) { + if( _blockSync || sp.state.fatal() || sp.state.startup() ) { sleepsecs(5); return; } @@ -573,6 +573,15 @@ namespace mongo { replLocalAuth(); } + void ReplSetImpl::blockSync(bool block) { + _blockSync = block; + if (_blockSync) { + // syncing is how we get into SECONDARY state, so we'll be stuck in + // RECOVERING until we unblock + changeState(MemberState::RS_RECOVERING); + } + } + void GhostSync::associateSlave(const BSONObj& id, const int memberId) { const OID rid = id["_id"].OID(); rwlock lk( _lock , true ); diff --git a/jstests/replsets/auth2.js b/jstests/replsets/auth2.js new file mode 100644 index 00000000000..353b0373004 --- /dev/null +++ b/jstests/replsets/auth2.js @@ -0,0 +1,98 @@ +var name = "rs_auth2"; +var port = allocatePorts(3); +var path = "jstests/libs/"; + +print("change permissions on #1 & #2"); +run("chmod", "600", path+"key1"); +run("chmod", "600", path+"key2"); + +var setupReplSet = function() { + print("start up rs"); + var rs = new ReplSetTest({"name" : name, "nodes" : 3, "startPort" : port[0]}); + rs.startSet(); + rs.initiate(); + + print("getting master"); + rs.getMaster(); + + print("getting secondaries"); + assert.soon(function() { + var result1 = rs.nodes[1].getDB("admin").runCommand({isMaster: 1}); + var result2 = rs.nodes[2].getDB("admin").runCommand({isMaster: 1}); + return result1.secondary && result2.secondary; + }); + + return rs; +}; + +var checkNoAuth = function() { + print("without an admin user, things should work"); + + master.getDB("foo").bar.insert({x:1}); + var result = master.getDB("admin").runCommand({getLastError:1}); + + printjson(result); + assert.eq(result.err, null); +} + +var checkInvalidAuthStates = function() { + print("check that 0 is in recovering"); + assert.soon(function() { + var result = m.getDB("admin").runCommand({isMaster: 1}); + return !result.ismaster && !result.secondary; + }); + + print("shut down 1, 0 still in recovering."); + rs.stop(1); + sleep(5); + + assert.soon(function() { + var result = m.getDB("admin").runCommand({isMaster: 1}); + return !result.ismaster && !result.secondary; + }); + + print("shut down 2, 0 becomes a secondary."); + rs.stop(2); + + assert.soon(function() { + var result = m.getDB("admin").runCommand({isMaster: 1}); + return result.secondary; + }); + + rs.restart(1, {"keyFile" : path+"key1"}); + rs.restart(2, {"keyFile" : path+"key1"}); +}; + +var checkValidAuthState = function() { + assert.soon(function() { + var result = m.getDB("admin").runCommand({isMaster : 1}); + return result.secondary; + }); +}; + +var rs = setupReplSet(); +var master = rs.getMaster(); + +print("add an admin user"); +master.getDB("admin").addUser("foo","bar"); +m = rs.nodes[0]; + +print("starting 1 and 2 with key file"); +rs.stop(1); +rs.restart(1, {"keyFile" : path+"key1"}); +rs.stop(2); +rs.restart(2, {"keyFile" : path+"key1"}); + +checkInvalidAuthStates(); + +print("restart mongod with bad keyFile"); + +rs.stop(0); +m = rs.restart(0, {"keyFile" : path+"key2"}); + +checkInvalidAuthStates(); + +rs.stop(0); +m = rs.restart(0, {"keyFile" : path+"key1"}); + +print("0 becomes a secondary"); diff --git a/shell/servers.js b/shell/servers.js index 886e7d8ebcf..efbd9b66dae 100755 --- a/shell/servers.js +++ b/shell/servers.js @@ -1860,7 +1860,11 @@ ReplSetTest.prototype.waitForIndicator = function( node, states, ind, timeout ){ printjson( status ) lastTime = new Date().getTime() } - + + if (typeof status.members == 'undefined') { + return false; + } + for( var i = 0; i < status.members.length; i++ ){ if( status.members[i].name == node.host ){ for( var j = 0; j < states.length; j++ ){ |