summaryrefslogtreecommitdiff
path: root/s/balance.cpp
diff options
context:
space:
mode:
authorEliot Horowitz <eliot@10gen.com>2010-07-13 14:50:53 -0400
committerEliot Horowitz <eliot@10gen.com>2010-07-13 14:50:53 -0400
commit130f70d3d4113e5d5f04433072dfc3cda560bb35 (patch)
tree2e922b529f6bfb5f67d1d4c80cd877d6049fe059 /s/balance.cpp
parent22653fdefeea36317cbe2557b9cb039e64512ce6 (diff)
downloadmongo-130f70d3d4113e5d5f04433072dfc3cda560bb35.tar.gz
use distlock for balancer SERVER-1354
Diffstat (limited to 's/balance.cpp')
-rw-r--r--s/balance.cpp82
1 files changed, 8 insertions, 74 deletions
diff --git a/s/balance.cpp b/s/balance.cpp
index 6ecbf61f6ac..19a8673a396 100644
--- a/s/balance.cpp
+++ b/s/balance.cpp
@@ -21,6 +21,8 @@
#include "../db/jsobj.h"
#include "../db/cmdline.h"
+#include "../client/distlock.h"
+
#include "balance.h"
#include "server.h"
#include "shard.h"
@@ -37,78 +39,6 @@ namespace mongo {
delete _policy;
}
- bool Balancer::_shouldIBalance( DBClientBase& conn ){
- BSONObj x = conn.findOne( ShardNS::settings , BSON( "_id" << "balancer" ) );
- log(2) << "balancer configDB entry: " << x << endl;
-
- if ( ! x.isEmpty() ){
-
- if ( x["who"].String() == _myid ){
- log(2) << "I'm the current balancer" << endl;
-
- // If need be, we can stop the balancer by creating a 'stopped : true' field in its
- // entry.
- if ( x["stopped"].type() && x["stopped"].Bool() ){
- log() << "stopped flag true" << endl;
- return false;
- }
-
- return true;
- }
-
- BSONObj other = conn.findOne( ShardNS::mongos , x["who"].wrap( "_id" ) );
- // TODO: if it can't find it for 10 minutes, should reset
- massert( 13125 , (string)"can't find mongos: " + x["who"].String() , ! other.isEmpty() );
-
- int secsSincePing = (int)(( jsTime() - other["ping"].Date() ) / 1000 );
- ostringstream msgPing;
- msgPing << secsSincePing << "secs since last ping from balancer in charge: " << other << endl;
- if ( secsSincePing < ( 60 * 10 ) ){
- log(2) << msgPing.str();
- return false;
- }
-
- log() << msgPing.str();
- log() << "will try to take over: " << x << endl;
- // we want to take over, so fall through to below
- }
-
- // Taking over means replacing 'who' with this balancer's address. Note that
- // to avoid any races, we use a compare-and-set strategy relying on the
- // incarnation of the previous balancer (the key 'x').
-
- OID incarnation;
- incarnation.init();
-
- BSONObjBuilder updateQuery;
- updateQuery.append( "_id" , "balancer" );
- if ( x["x"].type() ){
- updateQuery.append( x["x"] );
-
- // Carry on the stopped flag, if it existed.
- if ( ! x["stopped"].type() ){
- updateQuery.append( "stopped" , x["stopped"].Bool() );
- }
-
- } else {
- updateQuery.append( "x" , BSON( "$exists" << false ) );
- }
-
- conn.update( ShardNS::settings ,
- updateQuery.obj() ,
- BSON( "$set" << BSON( "who" << _myid << "x" << incarnation ) ) ,
- true );
-
- // If another balancer beats this one to the punch, the following query will see
- // the incarnation for that other guy.
-
- x = conn.findOne( ShardNS::settings , BSON( "_id" << "balancer" ) );
- bool takeOver = _myid == x["who"].String() && incarnation == x["x"].OID();
- log() << ( takeOver ? "" : "un" ) << "successful takeover. Current balancer is : " << x << endl;
-
- return takeOver;
- }
-
int Balancer::_moveChunks( const vector<CandidateChunkPtr>* candidateChunks ) {
int movedCount = 0;
@@ -320,11 +250,14 @@ namespace mongo {
_ping();
_checkOIDs();
+ ConnectionString config = configServer.getConnectionString();
+ DistributedLock balanceLock( config , "balancer" );
+
while ( ! inShutdown() ){
sleepsecs( 10 );
try {
- ScopedDbConnection conn( configServer.getPrimary() );
+ ScopedDbConnection conn( config );
_ping( conn.conn() );
if ( ! _checkOIDs() ){
@@ -332,7 +265,8 @@ namespace mongo {
}
vector<CandidateChunkPtr> candidateChunks;
- if ( _shouldIBalance( conn.conn() ) ){
+ dist_lock_try lk( &balanceLock , "doing balance round" );
+ if ( lk.got() ){
log(1) << "*** start balancing round" << endl;