diff options
author | Eliot Horowitz <eliot@10gen.com> | 2011-08-08 19:00:59 -0400 |
---|---|---|
committer | Eliot Horowitz <eliot@10gen.com> | 2011-08-08 19:00:59 -0400 |
commit | 760f0b28d4b6540923f51ad7a4e33bae43d343ff (patch) | |
tree | 1bc156cc274b43ac57d1e9ee2048bdf77f9a45af /client | |
parent | 5657637bd93afcf4f5d5a1a964a6c433375e70ca (diff) | |
download | mongo-760f0b28d4b6540923f51ad7a4e33bae43d343ff.tar.gz |
update hidden status and check secondary more accurately SERVER-3432
Diffstat (limited to 'client')
-rw-r--r-- | client/dbclient_rs.cpp | 94 | ||||
-rw-r--r-- | client/dbclient_rs.h | 31 |
2 files changed, 89 insertions, 36 deletions
diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp index cd184b763f5..996c664a770 100644 --- a/client/dbclient_rs.cpp +++ b/client/dbclient_rs.cpp @@ -54,9 +54,9 @@ namespace mongo { void run() { log() << "starting" << endl; while ( ! inShutdown() ) { - sleepsecs( 20 ); + sleepsecs( 10 ); try { - ReplicaSetMonitor::checkAll(); + ReplicaSetMonitor::checkAll( true ); } catch ( std::exception& e ) { error() << "check failed: " << e.what() << endl; @@ -99,17 +99,14 @@ namespace mongo { } _nodes.push_back( Node( servers[i] , conn.release() ) ); - + + int myLoc = _nodes.size() - 1; string maybePrimary; - if (_checkConnection( _nodes[_nodes.size()-1].conn , maybePrimary, false)) { - break; - } + _checkConnection( _nodes[myLoc].conn.get() , maybePrimary, false, myLoc ); } } ReplicaSetMonitor::~ReplicaSetMonitor() { - for ( unsigned i=0; i<_nodes.size(); i++ ) - delete _nodes[i].conn; _nodes.clear(); _master = -1; } @@ -134,7 +131,7 @@ namespace mongo { } - void ReplicaSetMonitor::checkAll() { + void ReplicaSetMonitor::checkAll( bool checkAllSecondaries ) { set<string> seen; while ( true ) { @@ -155,7 +152,7 @@ namespace mongo { if ( ! m ) break; - m->check(); + m->check( checkAllSecondaries ); } @@ -211,7 +208,7 @@ namespace mongo { return _nodes[_master].addr; } - _check(); + _check( false ); scoped_lock lk( _lock ); uassert( 10009 , str::stream() << "ReplicaSetMonitor no master found for set: " << _name , _master >= 0 ); @@ -233,7 +230,7 @@ namespace mongo { wasFound = true; - if ( _nodes[i].ok ) + if ( _nodes[i].okForSecondaryQueries() ) return prev; break; @@ -262,7 +259,7 @@ namespace mongo { LOG(2) << "not selecting " << _nodes[_nextSlave].addr << " as it is the current master" << endl; continue; } - if ( _nodes[ _nextSlave ].ok ) + if ( _nodes[ _nextSlave ].okForSecondaryQueries() ) return _nodes[ _nextSlave ].addr; LOG(2) << "not selecting " << _nodes[_nextSlave].addr << " as it is not ok to use" << endl; @@ -342,16 +339,34 @@ namespace mongo { - bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose ) { + bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ) { scoped_lock lk( _checkConnectionLock ); bool isMaster = false; bool changed = false; try { + Timer t; BSONObj o; c->isMaster(isMaster, &o); + + if ( o["setName"].type() != String || o["setName"].String() != _name ) { + warning() << "node: " << c->getServerAddress() << " isn't a part of set: " << _name + << " ismaster: " << o << endl; + if ( nodesOffset >= 0 ) + _nodes[nodesOffset].ok = false; + return false; + } - log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl; + if ( nodesOffset >= 0 ) { + _nodes[nodesOffset].pingTimeMillis = t.millis(); + _nodes[nodesOffset].hidden = o["hidden"].trueValue(); + _nodes[nodesOffset].secondary = o["secondary"].trueValue(); + _nodes[nodesOffset].ismaster = o["ismaster"].trueValue(); + + _nodes[nodesOffset].lastIsMaster = o.copy(); + } + log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl; + // add other nodes if ( o["hosts"].type() == Array ) { if ( o["primary"].type() == String ) @@ -362,8 +377,10 @@ namespace mongo { if (o.hasField("passives") && o["passives"].type() == Array) { _checkHosts(o["passives"].Obj(), changed); } - + _checkStatus(c); + + } catch ( std::exception& e ) { log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: caught exception " << c->toString() << ' ' << e.what() << endl; @@ -375,24 +392,28 @@ namespace mongo { return isMaster; } - void ReplicaSetMonitor::_check() { + void ReplicaSetMonitor::_check( bool checkAllSecondaries ) { bool triedQuickCheck = false; LOG(1) << "_check : " << getServerAddress() << endl; + int newMaster = -1; + for ( int retry = 0; retry < 2; retry++ ) { for ( unsigned i=0; i<_nodes.size(); i++ ) { - DBClientConnection * c; + shared_ptr<DBClientConnection> c; { scoped_lock lk( _lock ); c = _nodes[i].conn; } string maybePrimary; - if ( _checkConnection( c , maybePrimary , retry ) ) { + if ( _checkConnection( c.get() , maybePrimary , retry , i ) ) { _master = i; - return; + newMaster = i; + if ( ! checkAllSecondaries ) + return; } if ( ! triedQuickCheck && maybePrimary.size() ) { @@ -400,36 +421,44 @@ namespace mongo { if ( x >= 0 ) { triedQuickCheck = true; string dummy; - DBClientConnection * testConn; + shared_ptr<DBClientConnection> testConn; { scoped_lock lk( _lock ); testConn = _nodes[x].conn; } - if ( _checkConnection( testConn , dummy , false ) ) { + if ( _checkConnection( testConn.get() , dummy , false , x ) ) { _master = x; - return; + newMaster = x; + if ( ! checkAllSecondaries ) + return; } } } } + + if ( newMaster >= 0 ) + return; + sleepsecs(1); } } - void ReplicaSetMonitor::check() { + void ReplicaSetMonitor::check( bool checkAllSecondaries ) { // first see if the current master is fine if ( _master >= 0 ) { string temp; - if ( _checkConnection( _nodes[_master].conn , temp , false ) ) { - // current master is fine, so we're done - return; + if ( _checkConnection( _nodes[_master].conn.get() , temp , false , _master ) ) { + if ( ! checkAllSecondaries ) { + // current master is fine, so we're done + return; + } } } // we either have no master, or the current is dead - _check(); + _check( checkAllSecondaries ); } int ReplicaSetMonitor::_find( const string& server ) const { @@ -452,13 +481,18 @@ namespace mongo { return i; return -1; } - + void ReplicaSetMonitor::appendInfo( BSONObjBuilder& b ) const { scoped_lock lk( _lock ); BSONArrayBuilder hosts( b.subarrayStart( "hosts" ) ); for ( unsigned i=0; i<_nodes.size(); i++ ) { hosts.append( BSON( "addr" << _nodes[i].addr << - "ok" << _nodes[i].ok ) ); + // "lastIsMaster" << _nodes[i].lastIsMaster << // this is a potential race, so only used when debugging + "ok" << _nodes[i].ok << + "ismaster" << _nodes[i].ismaster << + "hidden" << _nodes[i].hidden << + "secondary" << _nodes[i].secondary << + "pingTimeMillis" << _nodes[i].pingTimeMillis ) ); } hosts.done(); diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h index 0bc61b7a272..2bef9ccfafa 100644 --- a/client/dbclient_rs.h +++ b/client/dbclient_rs.h @@ -52,7 +52,7 @@ namespace mongo { * checks all sets for current master and new secondaries * usually only called from a BackgroundJob */ - static void checkAll(); + static void checkAll( bool checkAllSecondaries ); /** * this is called whenever the config of any repclia set changes @@ -87,7 +87,7 @@ namespace mongo { /** * checks for current master and new secondaries */ - void check(); + void check( bool checkAllSecondaries ); string getName() const { return _name; } @@ -106,7 +106,7 @@ namespace mongo { */ ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers ); - void _check(); + void _check( bool checkAllSecondaries ); /** * Use replSetGetStatus command to make sure hosts in host list are up @@ -127,9 +127,10 @@ namespace mongo { * @param c the connection to check * @param maybePrimary OUT * @param verbose + * @param nodesOffset - offset into _nodes array, -1 for not in it * @return if the connection is good */ - bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose ); + bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ); int _find( const string& server ) const ; int _find_inlock( const string& server ) const ; @@ -140,14 +141,32 @@ namespace mongo { string _name; struct Node { - Node( const HostAndPort& a , DBClientConnection* c ) : addr( a ) , conn(c) , ok(true) {} + Node( const HostAndPort& a , DBClientConnection* c ) + : addr( a ) , conn(c) , ok(true) , + ismaster(false), secondary( false ) , hidden( false ) , pingTimeMillis(0) { + } + + bool okForSecondaryQueries() const { + return ok && ( ismaster || ( secondary && ! hidden ) ); + } + HostAndPort addr; - DBClientConnection* conn; + shared_ptr<DBClientConnection> conn; // if this node is in a failure state // used for slave routing // this is too simple, should make it better bool ok; + + // as reported by ismaster + BSONObj lastIsMaster; + + bool ismaster; + bool secondary; + bool hidden; + + int pingTimeMillis; + }; /** |