summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKristina <kristina@10gen.com>2011-10-05 16:55:55 -0400
committerKristina <kristina@10gen.com>2011-10-24 15:00:33 -0400
commite65e110553c725939f0706ca42a952855fea388b (patch)
treed8e2369841dfeafe5bb6d4b930d3173833599fa8
parentf74345924b6e2381f351337312401539d3156591 (diff)
downloadmongo-e65e110553c725939f0706ca42a952855fea388b.tar.gz
Make secondaries go into recovering state when auth is wrong SERVER-3715
-rw-r--r--db/repl/health.cpp5
-rw-r--r--db/repl/heartbeat.cpp225
-rw-r--r--db/repl/manager.cpp36
-rw-r--r--db/repl/rs.cpp1
-rw-r--r--db/repl/rs.h6
-rw-r--r--db/repl/rs_member.h4
-rw-r--r--db/repl/rs_sync.cpp11
-rw-r--r--jstests/replsets/auth2.js98
-rwxr-xr-xshell/servers.js6
9 files changed, 303 insertions, 89 deletions
diff --git a/db/repl/health.cpp b/db/repl/health.cpp
index 711b457dd8c..7e5a39f1ddb 100644
--- a/db/repl/health.cpp
+++ b/db/repl/health.cpp
@@ -402,6 +402,11 @@ namespace mongo {
string s = m->lhb();
if( !s.empty() )
bb.append("errmsg", s);
+
+ if (m->hbinfo().authIssue) {
+ bb.append("authenticated", false);
+ }
+
v.push_back(bb.obj());
m = m->next();
}
diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp
index 7d3f78c73b5..a1780c38740 100644
--- a/db/repl/heartbeat.cpp
+++ b/db/repl/heartbeat.cpp
@@ -51,11 +51,14 @@ namespace mongo {
/* { replSetHeartbeat : <setname> } */
class CmdReplSetHeartbeat : public ReplSetCommand {
public:
- virtual bool adminOnly() const { return false; }
CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { }
virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
- if( replSetBlind )
+ if( replSetBlind ) {
+ if (theReplSet) {
+ errmsg = str::stream() << theReplSet->selfFullName() << " is blind";
+ }
return false;
+ }
/* we don't call ReplSetCommand::check() here because heartbeat
checks many things that are pre-initialization. */
@@ -123,32 +126,54 @@ namespace mongo {
}
} cmdReplSetHeartbeat;
- /* throws dbexception */
- bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result, int myCfgVersion, int& theirCfgVersion, bool checkEmpty) {
+ bool requestHeartbeat(string setName, string from, string memberFullName, BSONObj& result,
+ int myCfgVersion, int& theirCfgVersion, bool checkEmpty) {
if( replSetBlind ) {
- //sleepmillis( rand() );
return false;
}
- BSONObj cmd = BSON( "replSetHeartbeat" << setName << "v" << myCfgVersion << "pv" << 1 << "checkEmpty" << checkEmpty << "from" << from );
-
- // we might be talking to ourself - generally not a great idea to do outbound waiting calls in a write lock
- assert( !dbMutex.isWriteLocked() );
+ BSONObj cmd = BSON( "replSetHeartbeat" << setName <<
+ "v" << myCfgVersion <<
+ "pv" << 1 <<
+ "checkEmpty" << checkEmpty <<
+ "from" << from );
- // these are slow (multisecond to respond), so generally we don't want to be locked, at least not without
+ // generally not a great idea to do outbound waiting calls in a
+ // write lock. heartbeats can be slow (multisecond to respond), so
+ // generally we don't want to be locked, at least not without
// thinking acarefully about it first.
- assert( theReplSet == 0 || !theReplSet->lockedByMe() );
+ uassert(15900, "can't heartbeat: too much lock",
+ !dbMutex.isWriteLocked() || theReplSet == 0 || !theReplSet->lockedByMe() );
ScopedConn conn(memberFullName);
return conn.runCommand("admin", cmd, result, 0);
}
- /* poll every other set member to check its status */
+ /**
+ * Poll every other set member to check its status.
+ *
+ * A detail about local machines and authentication: suppose we have 2
+ * members, A and B, on the same machine using different keyFiles. A is
+ * primary. If we're just starting the set, there are no admin users, so A
+ * and B can access each other because it's local access.
+ *
+ * Then we add a user to A. B cannot sync this user from A, because as soon
+ * as we add a an admin user, A requires auth. However, A can still
+ * heartbeat B, because B *doesn't* have an admin user. So A can reach B
+ * but B cannot reach A.
+ *
+ * Once B is restarted with the correct keyFile, everything should work as
+ * expected.
+ */
class ReplSetHealthPollTask : public task::Task {
+ private:
HostAndPort h;
HeartbeatInfo m;
+ int tries;
+ const int threshold;
public:
- ReplSetHealthPollTask(const HostAndPort& hh, const HeartbeatInfo& mm) : h(hh), m(mm) { }
+ ReplSetHealthPollTask(const HostAndPort& hh, const HeartbeatInfo& mm)
+ : h(hh), m(mm), tries(0), threshold(15) { }
string name() const { return "rsHealthPoll"; }
void doWork() {
@@ -163,16 +188,7 @@ namespace mongo {
BSONObj info;
int theirConfigVersion = -10000;
- Timer timer;
-
- bool ok = requestHeartbeat(theReplSet->name(), theReplSet->selfFullName(), h.toString(), info, theReplSet->config().version, theirConfigVersion);
-
- mem.ping = (unsigned int)timer.millis();
-
- time_t before = timer.startTime() / 1000000;
- // we set this on any response - we don't get this far if
- // couldn't connect because exception is thrown
- time_t after = mem.lastHeartbeat = before + (mem.ping / 1000);
+ bool ok = _requestHeartbeat(mem, info, theirConfigVersion);
// weight new ping with old pings
// on the first ping, just use the ping value
@@ -180,68 +196,12 @@ namespace mongo {
mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2));
}
- if ( info["time"].isNumber() ) {
- long long t = info["time"].numberLong();
- if( t > after )
- mem.skew = (int) (t - after);
- else if( t < before )
- mem.skew = (int) (t - before); // negative
- }
- else {
- // it won't be there if remote hasn't initialized yet
- if( info.hasElement("time") )
- warning() << "heatbeat.time isn't a number: " << info << endl;
- mem.skew = INT_MIN;
- }
-
- {
- be state = info["state"];
- if( state.ok() )
- mem.hbstate = MemberState(state.Int());
- }
if( ok ) {
- HeartbeatInfo::numPings++;
-
- if( mem.upSince == 0 ) {
- log() << "replSet info member " << h.toString() << " is up" << rsLog;
- mem.upSince = mem.lastHeartbeat;
- }
- mem.health = 1.0;
- mem.lastHeartbeatMsg = info["hbmsg"].String();
- if( info.hasElement("opTime") )
- mem.opTime = info["opTime"].Date();
-
- // see if this member is in the electable set
- if( info["e"].eoo() ) {
- // for backwards compatibility
- const Member *member = theReplSet->findById(mem.id());
- if (member && member->config().potentiallyHot()) {
- theReplSet->addToElectable(mem.id());
- }
- else {
- theReplSet->rmFromElectable(mem.id());
- }
- }
- // add this server to the electable set if it is within 10
- // seconds of the latest optime we know of
- else if( info["e"].trueValue() &&
- mem.opTime >= theReplSet->lastOpTimeWritten.getSecs() - 10) {
- unsigned lastOp = theReplSet->lastOtherOpTime().getSecs();
- if (lastOp > 0 && mem.opTime >= lastOp - 10) {
- theReplSet->addToElectable(mem.id());
- }
- }
- else {
- theReplSet->rmFromElectable(mem.id());
- }
-
- be cfg = info["config"];
- if( cfg.ok() ) {
- // received a new config
- boost::function<void()> f =
- boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy());
- theReplSet->mgr->send(f);
- }
+ up(info, mem);
+ }
+ else if (!info["errmsg"].eoo() &&
+ info["errmsg"].str() == "need to login") {
+ authIssue(mem);
}
else {
down(mem, info.getStringField("errmsg"));
@@ -271,7 +231,58 @@ namespace mongo {
}
private:
+ bool _requestHeartbeat(HeartbeatInfo& mem, BSONObj& info, int& theirConfigVersion) {
+ if (tries++ % threshold == (threshold - 1)) {
+ ScopedConn conn(h.toString());
+ conn.reconnect();
+ }
+
+ Timer timer;
+
+ bool ok = requestHeartbeat(theReplSet->name(), theReplSet->selfFullName(),
+ h.toString(), info, theReplSet->config().version, theirConfigVersion);
+
+ mem.ping = (unsigned int)timer.millis();
+
+ time_t before = timer.startTime() / 1000000;
+ // we set this on any response - we don't get this far if
+ // couldn't connect because exception is thrown
+ time_t after = mem.lastHeartbeat = before + (mem.ping / 1000);
+
+ if ( info["time"].isNumber() ) {
+ long long t = info["time"].numberLong();
+ if( t > after )
+ mem.skew = (int) (t - after);
+ else if( t < before )
+ mem.skew = (int) (t - before); // negative
+ }
+ else {
+ // it won't be there if remote hasn't initialized yet
+ if( info.hasElement("time") )
+ warning() << "heatbeat.time isn't a number: " << info << endl;
+ mem.skew = INT_MIN;
+ }
+
+ {
+ be state = info["state"];
+ if( state.ok() )
+ mem.hbstate = MemberState(state.Int());
+ }
+
+ return ok;
+ }
+
+ void authIssue(HeartbeatInfo& mem) {
+ mem.authIssue = true;
+ mem.hbstate = MemberState::RS_UNKNOWN;
+
+ // set health to 0 so that this doesn't count towards majority
+ mem.health = 0.0;
+ theReplSet->rmFromElectable(mem.id());
+ }
+
void down(HeartbeatInfo& mem, string msg) {
+ mem.authIssue = false;
mem.health = 0.0;
mem.ping = 0;
if( mem.upSince || mem.downSince == 0 ) {
@@ -283,6 +294,52 @@ namespace mongo {
mem.lastHeartbeatMsg = msg;
theReplSet->rmFromElectable(mem.id());
}
+
+ void up(const BSONObj& info, HeartbeatInfo& mem) {
+ HeartbeatInfo::numPings++;
+ mem.authIssue = false;
+
+ if( mem.upSince == 0 ) {
+ log() << "replSet member " << h.toString() << " is up" << rsLog;
+ mem.upSince = mem.lastHeartbeat;
+ }
+ mem.health = 1.0;
+ mem.lastHeartbeatMsg = info["hbmsg"].String();
+ if( info.hasElement("opTime") )
+ mem.opTime = info["opTime"].Date();
+
+ // see if this member is in the electable set
+ if( info["e"].eoo() ) {
+ // for backwards compatibility
+ const Member *member = theReplSet->findById(mem.id());
+ if (member && member->config().potentiallyHot()) {
+ theReplSet->addToElectable(mem.id());
+ }
+ else {
+ theReplSet->rmFromElectable(mem.id());
+ }
+ }
+ // add this server to the electable set if it is within 10
+ // seconds of the latest optime we know of
+ else if( info["e"].trueValue() &&
+ mem.opTime >= theReplSet->lastOpTimeWritten.getSecs() - 10) {
+ unsigned lastOp = theReplSet->lastOtherOpTime().getSecs();
+ if (lastOp > 0 && mem.opTime >= lastOp - 10) {
+ theReplSet->addToElectable(mem.id());
+ }
+ }
+ else {
+ theReplSet->rmFromElectable(mem.id());
+ }
+
+ be cfg = info["config"];
+ if( cfg.ok() ) {
+ // received a new config
+ boost::function<void()> f =
+ boost::bind(&Manager::msgReceivedNewConfig, theReplSet->mgr, cfg.Obj().copy());
+ theReplSet->mgr->send(f);
+ }
+ }
};
void ReplSetImpl::endOldHealthTasks() {
diff --git a/db/repl/manager.cpp b/db/repl/manager.cpp
index 3c4c0ebf94c..c91adc322a1 100644
--- a/db/repl/manager.cpp
+++ b/db/repl/manager.cpp
@@ -119,6 +119,39 @@ namespace mongo {
}
}
+ void Manager::checkAuth() {
+ int down = 0, authIssue = 0, total = 0;
+
+ for( Member *m = rs->head(); m; m=m->next() ) {
+ total++;
+
+ // all authIssue servers will also be not up
+ if (!m->hbinfo().up()) {
+ down++;
+ if (m->hbinfo().authIssue) {
+ authIssue++;
+ }
+ }
+ }
+
+ // if all nodes are down or failed auth AND at least one failed
+ // auth, go into recovering. If all nodes are down, stay a
+ // secondary.
+ if (authIssue > 0 && down == total) {
+ log() << "replset error could not reach/authenticate against any members" << endl;
+
+ if (rs->box.getPrimary() == rs->_self) {
+ log() << "auth problems, relinquishing primary" << rsLog;
+ rs->relinquish();
+ }
+
+ rs->blockSync(true);
+ }
+ else {
+ rs->blockSync(false);
+ }
+ }
+
/** called as the health threads get new results */
void Manager::msgCheckNewState() {
{
@@ -130,7 +163,8 @@ namespace mongo {
if( busyWithElectSelf ) return;
checkElectableSet();
-
+ checkAuth();
+
const Member *p = rs->box.getPrimary();
if( p && p != rs->_self ) {
if( !p->hbinfo().up() ||
diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp
index 1fbbc103747..f827291c3f3 100644
--- a/db/repl/rs.cpp
+++ b/db/repl/rs.cpp
@@ -329,6 +329,7 @@ namespace mongo {
ReplSetImpl::ReplSetImpl(ReplSetCmdline& replSetCmdline) : elect(this),
_currentSyncTarget(0),
+ _blockSync(false),
_hbmsgTime(0),
_self(0),
_maintenanceMode(0),
diff --git a/db/repl/rs.h b/db/repl/rs.h
index ba6a0c90829..2b3ea9be425 100644
--- a/db/repl/rs.h
+++ b/db/repl/rs.h
@@ -93,6 +93,7 @@ namespace mongo {
void noteARemoteIsPrimary(const Member *);
void checkElectableSet();
+ void checkAuth();
virtual void starting();
public:
Manager(ReplSetImpl *rs);
@@ -348,6 +349,9 @@ namespace mongo {
const Member* getMemberToSyncTo();
Member* _currentSyncTarget;
+ bool _blockSync;
+ void blockSync(bool block);
+
// set of electable members' _ids
set<unsigned> _electableSet;
protected:
@@ -577,7 +581,7 @@ namespace mongo {
* that still need to be checked for auth.
*/
bool checkAuth(string& errmsg, BSONObjBuilder& result) {
- if( !noauth && adminOnly() ) {
+ if( !noauth ) {
AuthenticationInfo *ai = cc().getAuthenticationInfo();
if (!ai->isAuthorizedForLock("admin", locktype())) {
errmsg = "replSet command unauthorized";
diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h
index d60bb5261e9..3455805e045 100644
--- a/db/repl/rs_member.h
+++ b/db/repl/rs_member.h
@@ -69,7 +69,8 @@ namespace mongo {
class HeartbeatInfo {
unsigned _id;
public:
- HeartbeatInfo() : _id(0xffffffff),hbstate(MemberState::RS_UNKNOWN),health(-1.0),downSince(0),skew(INT_MIN) { }
+ HeartbeatInfo() : _id(0xffffffff), hbstate(MemberState::RS_UNKNOWN), health(-1.0),
+ downSince(0), skew(INT_MIN), authIssue(false) { }
HeartbeatInfo(unsigned id);
unsigned id() const { return _id; }
MemberState hbstate;
@@ -80,6 +81,7 @@ namespace mongo {
DiagStr lastHeartbeatMsg;
OpTime opTime;
int skew;
+ bool authIssue;
unsigned int ping; // milliseconds
static unsigned int numPings;
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index b65a624e1af..a3f57514337 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -501,7 +501,7 @@ namespace mongo {
sleepsecs(1);
return;
}
- if( sp.state.fatal() || sp.state.startup() ) {
+ if( _blockSync || sp.state.fatal() || sp.state.startup() ) {
sleepsecs(5);
return;
}
@@ -573,6 +573,15 @@ namespace mongo {
replLocalAuth();
}
+ void ReplSetImpl::blockSync(bool block) {
+ _blockSync = block;
+ if (_blockSync) {
+ // syncing is how we get into SECONDARY state, so we'll be stuck in
+ // RECOVERING until we unblock
+ changeState(MemberState::RS_RECOVERING);
+ }
+ }
+
void GhostSync::associateSlave(const BSONObj& id, const int memberId) {
const OID rid = id["_id"].OID();
rwlock lk( _lock , true );
diff --git a/jstests/replsets/auth2.js b/jstests/replsets/auth2.js
new file mode 100644
index 00000000000..353b0373004
--- /dev/null
+++ b/jstests/replsets/auth2.js
@@ -0,0 +1,98 @@
+var name = "rs_auth2";
+var port = allocatePorts(3);
+var path = "jstests/libs/";
+
+print("change permissions on #1 & #2");
+run("chmod", "600", path+"key1");
+run("chmod", "600", path+"key2");
+
+var setupReplSet = function() {
+ print("start up rs");
+ var rs = new ReplSetTest({"name" : name, "nodes" : 3, "startPort" : port[0]});
+ rs.startSet();
+ rs.initiate();
+
+ print("getting master");
+ rs.getMaster();
+
+ print("getting secondaries");
+ assert.soon(function() {
+ var result1 = rs.nodes[1].getDB("admin").runCommand({isMaster: 1});
+ var result2 = rs.nodes[2].getDB("admin").runCommand({isMaster: 1});
+ return result1.secondary && result2.secondary;
+ });
+
+ return rs;
+};
+
+var checkNoAuth = function() {
+ print("without an admin user, things should work");
+
+ master.getDB("foo").bar.insert({x:1});
+ var result = master.getDB("admin").runCommand({getLastError:1});
+
+ printjson(result);
+ assert.eq(result.err, null);
+}
+
+var checkInvalidAuthStates = function() {
+ print("check that 0 is in recovering");
+ assert.soon(function() {
+ var result = m.getDB("admin").runCommand({isMaster: 1});
+ return !result.ismaster && !result.secondary;
+ });
+
+ print("shut down 1, 0 still in recovering.");
+ rs.stop(1);
+ sleep(5);
+
+ assert.soon(function() {
+ var result = m.getDB("admin").runCommand({isMaster: 1});
+ return !result.ismaster && !result.secondary;
+ });
+
+ print("shut down 2, 0 becomes a secondary.");
+ rs.stop(2);
+
+ assert.soon(function() {
+ var result = m.getDB("admin").runCommand({isMaster: 1});
+ return result.secondary;
+ });
+
+ rs.restart(1, {"keyFile" : path+"key1"});
+ rs.restart(2, {"keyFile" : path+"key1"});
+};
+
+var checkValidAuthState = function() {
+ assert.soon(function() {
+ var result = m.getDB("admin").runCommand({isMaster : 1});
+ return result.secondary;
+ });
+};
+
+var rs = setupReplSet();
+var master = rs.getMaster();
+
+print("add an admin user");
+master.getDB("admin").addUser("foo","bar");
+m = rs.nodes[0];
+
+print("starting 1 and 2 with key file");
+rs.stop(1);
+rs.restart(1, {"keyFile" : path+"key1"});
+rs.stop(2);
+rs.restart(2, {"keyFile" : path+"key1"});
+
+checkInvalidAuthStates();
+
+print("restart mongod with bad keyFile");
+
+rs.stop(0);
+m = rs.restart(0, {"keyFile" : path+"key2"});
+
+checkInvalidAuthStates();
+
+rs.stop(0);
+m = rs.restart(0, {"keyFile" : path+"key1"});
+
+print("0 becomes a secondary");
diff --git a/shell/servers.js b/shell/servers.js
index 886e7d8ebcf..efbd9b66dae 100755
--- a/shell/servers.js
+++ b/shell/servers.js
@@ -1860,7 +1860,11 @@ ReplSetTest.prototype.waitForIndicator = function( node, states, ind, timeout ){
printjson( status )
lastTime = new Date().getTime()
}
-
+
+ if (typeof status.members == 'undefined') {
+ return false;
+ }
+
for( var i = 0; i < status.members.length; i++ ){
if( status.members[i].name == node.host ){
for( var j = 0; j < states.length; j++ ){