diff options
Diffstat (limited to 'src/mongo/db/repl/manager.cpp')
-rw-r--r-- | src/mongo/db/repl/manager.cpp | 274 |
1 files changed, 274 insertions, 0 deletions
diff --git a/src/mongo/db/repl/manager.cpp b/src/mongo/db/repl/manager.cpp new file mode 100644 index 00000000000..91648a1b506 --- /dev/null +++ b/src/mongo/db/repl/manager.cpp @@ -0,0 +1,274 @@ +/* @file manager.cpp +*/ + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful,b +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "rs.h" +#include "connections.h" +#include "../client.h" + +namespace mongo { + + enum { + NOPRIMARY = -2, + SELFPRIMARY = -1 + }; + + /* check members OTHER THAN US to see if they think they are primary */ + const Member * Manager::findOtherPrimary(bool& two) { + two = false; + Member *m = rs->head(); + Member *p = 0; + while( m ) { + DEV assert( m != rs->_self ); + if( m->state().primary() && m->hbinfo().up() ) { + if( p ) { + two = true; + return 0; + } + p = m; + } + m = m->next(); + } + if( p ) + noteARemoteIsPrimary(p); + return p; + } + + Manager::Manager(ReplSetImpl *_rs) : + task::Server("rsMgr"), rs(_rs), busyWithElectSelf(false), _primary(NOPRIMARY) { + } + + Manager::~Manager() { + /* we don't destroy the replset object we sit in; however, the destructor could have thrown on init. + the log message below is just a reminder to come back one day and review this code more, and to + make it cleaner. + */ + log() << "info: ~Manager called" << rsLog; + rs->mgr = 0; + } + + void Manager::starting() { + Client::initThread("rsMgr"); + replLocalAuth(); + } + + void Manager::noteARemoteIsPrimary(const Member *m) { + if( rs->box.getPrimary() == m ) + return; + rs->_self->lhb() = ""; + if( rs->iAmArbiterOnly() ) { + rs->box.set(MemberState::RS_ARBITER, m); + } + else { + rs->box.noteRemoteIsPrimary(m); + } + } + + void Manager::checkElectableSet() { + unsigned otherOp = rs->lastOtherOpTime().getSecs(); + + // make sure the electable set is up-to-date + if (rs->elect.aMajoritySeemsToBeUp() && + rs->iAmPotentiallyHot() && + (otherOp == 0 || rs->lastOpTimeWritten.getSecs() >= otherOp - 10)) { + theReplSet->addToElectable(rs->selfId()); + } + else { + theReplSet->rmFromElectable(rs->selfId()); + } + + // check if we should ask the primary (possibly ourselves) to step down + const Member *highestPriority = theReplSet->getMostElectable(); + const Member *primary = rs->box.getPrimary(); + + if (primary && highestPriority && + highestPriority->config().priority > primary->config().priority) { + log() << "stepping down " << primary->fullName() << endl; + + if (primary->h().isSelf()) { + // replSetStepDown tries to acquire the same lock + // msgCheckNewState takes, so we can't call replSetStepDown on + // ourselves. + rs->relinquish(); + } + else { + BSONObj cmd = BSON( "replSetStepDown" << 1 ); + ScopedConn conn(primary->fullName()); + BSONObj result; + if (!conn.runCommand("admin", cmd, result, 0)) { + log() << "stepping down " << primary->fullName() + << " failed: " << result << endl; + } + } + } + } + + void Manager::checkAuth() { + int down = 0, authIssue = 0, total = 0; + + for( Member *m = rs->head(); m; m=m->next() ) { + total++; + + // all authIssue servers will also be not up + if (!m->hbinfo().up()) { + down++; + if (m->hbinfo().authIssue) { + authIssue++; + } + } + } + + // if all nodes are down or failed auth AND at least one failed + // auth, go into recovering. If all nodes are down, stay a + // secondary. + if (authIssue > 0 && down == total) { + log() << "replset error could not reach/authenticate against any members" << endl; + + if (rs->box.getPrimary() == rs->_self) { + log() << "auth problems, relinquishing primary" << rsLog; + rs->relinquish(); + } + + rs->blockSync(true); + } + else { + rs->blockSync(false); + } + } + + /** called as the health threads get new results */ + void Manager::msgCheckNewState() { + { + theReplSet->assertValid(); + rs->assertValid(); + + RSBase::lock lk(rs); + + if( busyWithElectSelf ) return; + + checkElectableSet(); + checkAuth(); + + const Member *p = rs->box.getPrimary(); + if( p && p != rs->_self ) { + if( !p->hbinfo().up() || + !p->hbinfo().hbstate.primary() ) { + p = 0; + rs->box.setOtherPrimary(0); + } + } + + const Member *p2; + { + bool two; + p2 = findOtherPrimary(two); + if( two ) { + /* two other nodes think they are primary (asynchronously polled) -- wait for things to settle down. */ + log() << "replSet info two primaries (transiently)" << rsLog; + return; + } + } + + if( p2 ) { + /* someone else thinks they are primary. */ + if( p == p2 ) { + // we thought the same; all set. + return; + } + if( p == 0 ) { + noteARemoteIsPrimary(p2); + return; + } + // todo xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + if( p != rs->_self ) { + // switch primary from oldremotep->newremotep2 + noteARemoteIsPrimary(p2); + return; + } + /* we thought we were primary, yet now someone else thinks they are. */ + if( !rs->elect.aMajoritySeemsToBeUp() ) { + /* we can't see a majority. so the other node is probably the right choice. */ + noteARemoteIsPrimary(p2); + return; + } + /* ignore for now, keep thinking we are master. + this could just be timing (we poll every couple seconds) or could indicate + a problem? if it happens consistently for a duration of time we should + alert the sysadmin. + */ + return; + } + + /* didn't find anyone who wants to be primary */ + + if( p ) { + /* we are already primary */ + + if( p != rs->_self ) { + rs->sethbmsg("error p != rs->self in checkNewState"); + log() << "replSet " << p->fullName() << rsLog; + log() << "replSet " << rs->_self->fullName() << rsLog; + return; + } + + if( rs->elect.shouldRelinquish() ) { + log() << "can't see a majority of the set, relinquishing primary" << rsLog; + rs->relinquish(); + } + + return; + } + + if( !rs->iAmPotentiallyHot() ) { // if not we never try to be primary + OCCASIONALLY log() << "replSet I don't see a primary and I can't elect myself" << endl; + return; + } + + /* no one seems to be primary. shall we try to elect ourself? */ + if( !rs->elect.aMajoritySeemsToBeUp() ) { + static time_t last; + static int n; + int ll = 0; + if( ++n > 5 ) ll++; + if( last + 60 > time(0 ) ) ll++; + log(ll) << "replSet can't see a majority, will not try to elect self" << rsLog; + last = time(0); + return; + } + + if( !rs->iAmElectable() ) { + return; + } + + busyWithElectSelf = true; // don't try to do further elections & such while we are already working on one. + } + try { + rs->elect.electSelf(); + } + catch(RetryAfterSleepException&) { + /* we want to process new inbounds before trying this again. so we just put a checkNewstate in the queue for eval later. */ + requeue(); + } + catch(...) { + log() << "replSet error unexpected assertion in rs manager" << rsLog; + } + busyWithElectSelf = false; + } + +} |