diff options
author | Eliot Horowitz <eliot@10gen.com> | 2010-07-13 14:50:53 -0400 |
---|---|---|
committer | Eliot Horowitz <eliot@10gen.com> | 2010-07-13 14:50:53 -0400 |
commit | 130f70d3d4113e5d5f04433072dfc3cda560bb35 (patch) | |
tree | 2e922b529f6bfb5f67d1d4c80cd877d6049fe059 /s/balance.cpp | |
parent | 22653fdefeea36317cbe2557b9cb039e64512ce6 (diff) | |
download | mongo-130f70d3d4113e5d5f04433072dfc3cda560bb35.tar.gz |
use distlock for balancer SERVER-1354
Diffstat (limited to 's/balance.cpp')
-rw-r--r-- | s/balance.cpp | 82 |
1 files changed, 8 insertions, 74 deletions
diff --git a/s/balance.cpp b/s/balance.cpp index 6ecbf61f6ac..19a8673a396 100644 --- a/s/balance.cpp +++ b/s/balance.cpp @@ -21,6 +21,8 @@ #include "../db/jsobj.h" #include "../db/cmdline.h" +#include "../client/distlock.h" + #include "balance.h" #include "server.h" #include "shard.h" @@ -37,78 +39,6 @@ namespace mongo { delete _policy; } - bool Balancer::_shouldIBalance( DBClientBase& conn ){ - BSONObj x = conn.findOne( ShardNS::settings , BSON( "_id" << "balancer" ) ); - log(2) << "balancer configDB entry: " << x << endl; - - if ( ! x.isEmpty() ){ - - if ( x["who"].String() == _myid ){ - log(2) << "I'm the current balancer" << endl; - - // If need be, we can stop the balancer by creating a 'stopped : true' field in its - // entry. - if ( x["stopped"].type() && x["stopped"].Bool() ){ - log() << "stopped flag true" << endl; - return false; - } - - return true; - } - - BSONObj other = conn.findOne( ShardNS::mongos , x["who"].wrap( "_id" ) ); - // TODO: if it can't find it for 10 minutes, should reset - massert( 13125 , (string)"can't find mongos: " + x["who"].String() , ! other.isEmpty() ); - - int secsSincePing = (int)(( jsTime() - other["ping"].Date() ) / 1000 ); - ostringstream msgPing; - msgPing << secsSincePing << "secs since last ping from balancer in charge: " << other << endl; - if ( secsSincePing < ( 60 * 10 ) ){ - log(2) << msgPing.str(); - return false; - } - - log() << msgPing.str(); - log() << "will try to take over: " << x << endl; - // we want to take over, so fall through to below - } - - // Taking over means replacing 'who' with this balancer's address. Note that - // to avoid any races, we use a compare-and-set strategy relying on the - // incarnation of the previous balancer (the key 'x'). - - OID incarnation; - incarnation.init(); - - BSONObjBuilder updateQuery; - updateQuery.append( "_id" , "balancer" ); - if ( x["x"].type() ){ - updateQuery.append( x["x"] ); - - // Carry on the stopped flag, if it existed. - if ( ! x["stopped"].type() ){ - updateQuery.append( "stopped" , x["stopped"].Bool() ); - } - - } else { - updateQuery.append( "x" , BSON( "$exists" << false ) ); - } - - conn.update( ShardNS::settings , - updateQuery.obj() , - BSON( "$set" << BSON( "who" << _myid << "x" << incarnation ) ) , - true ); - - // If another balancer beats this one to the punch, the following query will see - // the incarnation for that other guy. - - x = conn.findOne( ShardNS::settings , BSON( "_id" << "balancer" ) ); - bool takeOver = _myid == x["who"].String() && incarnation == x["x"].OID(); - log() << ( takeOver ? "" : "un" ) << "successful takeover. Current balancer is : " << x << endl; - - return takeOver; - } - int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) { int movedCount = 0; @@ -320,11 +250,14 @@ namespace mongo { _ping(); _checkOIDs(); + ConnectionString config = configServer.getConnectionString(); + DistributedLock balanceLock( config , "balancer" ); + while ( ! inShutdown() ){ sleepsecs( 10 ); try { - ScopedDbConnection conn( configServer.getPrimary() ); + ScopedDbConnection conn( config ); _ping( conn.conn() ); if ( ! _checkOIDs() ){ @@ -332,7 +265,8 @@ namespace mongo { } vector<CandidateChunkPtr> candidateChunks; - if ( _shouldIBalance( conn.conn() ) ){ + dist_lock_try lk( &balanceLock , "doing balance round" ); + if ( lk.got() ){ log(1) << "*** start balancing round" << endl; |