From e9ae388f308071c90f883c36c08277bc6ca990a6 Mon Sep 17 00:00:00 2001 From: Kristina Date: Tue, 3 May 2011 11:29:30 -0400 Subject: added more log messages and checks for m/s repl errors SERVER-3029 --- db/repl.cpp | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/db/repl.cpp b/db/repl.cpp index e90d6ca8085..c60c420d3fc 100644 --- a/db/repl.cpp +++ b/db/repl.cpp @@ -436,6 +436,7 @@ namespace mongo { SourceVector sources; ReplSource::loadAll(sources); for( SourceVector::iterator i = sources.begin(); i != sources.end(); ++i ) { + log() << requester << " forcing resync from " << (*i)->hostName << endl; (*i)->forceResync( requester ); } replAllDead = 0; @@ -445,7 +446,9 @@ namespace mongo { BSONObj info; { dbtemprelease t; - oplogReader.connect(hostName); + if (!oplogReader.connect(hostName)) { + msgassertedNoTrace( 14051 , "unable to connect to resync"); + } /* todo use getDatabaseNames() method here */ bool ok = oplogReader.conn()->runCommand( "admin", BSON( "listDatabases" << 1 ), info ); massert( 10385 , "Unable to get database list", ok ); @@ -1236,8 +1239,11 @@ namespace mongo { { dblock lk; if ( replAllDead ) { - if ( !replSettings.autoresync || !ReplSource::throttledForceResyncDead( "auto" ) ) + // throttledForceResyncDead can throw + if ( !replSettings.autoresync || !ReplSource::throttledForceResyncDead( "auto" ) ) { + log() << "all sources dead: " << replAllDead << ", sleeping for 5 seconds" << endl; break; + } } assert( syncing == 0 ); // i.e., there is only one sync thread running. we will want to change/fix this. syncing++; @@ -1271,7 +1277,7 @@ namespace mongo { if ( s ) { stringstream ss; - ss << "repl: sleep " << s << "sec before next pass"; + ss << "repl: sleep " << s << " sec before next pass"; string msg = ss.str(); if ( ! cmdLine.quiet ) log() << msg << endl; @@ -1281,8 +1287,6 @@ namespace mongo { } } - int debug_stop_repl = 0; - static void replMasterThread() { sleepsecs(4); Client::initThread("replmaster"); @@ -1331,8 +1335,6 @@ namespace mongo { while ( 1 ) { try { replMain(); - if ( debug_stop_repl ) - break; sleepsecs(5); } catch ( AssertionException& ) { @@ -1340,6 +1342,15 @@ namespace mongo { problem() << "Assertion in replSlaveThread(): sleeping 5 minutes before retry" << endl; sleepsecs(300); } + catch ( DBException& e ) { + problem() << "exception in replSlaveThread(): " << e.what() + << ", sleeping 5 minutes before retry" << endl; + sleepsecs(300); + } + catch ( ... ) { + problem() << "error in replSlaveThread(): sleeping 5 minutes before retry" << endl; + sleepsecs(300); + } } } -- cgit v1.2.1 From 5eaaddba616acf65c55bb4c0ca0b7f9c48f27999 Mon Sep 17 00:00:00 2001 From: Kristina Date: Tue, 3 May 2011 12:01:31 -0400 Subject: more debugging info on sync errors SERVER-3031 --- db/repl/rs_sync.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index bb21ba7bea6..e59a4d140b9 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -462,7 +462,7 @@ namespace mongo { _syncThread(); } catch(DBException& e) { - sethbmsg("syncThread: " + e.toString()); + sethbmsg("syncThread: " + e.toString() + ", last op: " + lastOpTimeWritten.toString()); sleepsecs(10); } catch(...) { -- cgit v1.2.1