diff options
author | Dwight <dmerriman@gmail.com> | 2009-02-02 18:23:57 -0500 |
---|---|---|
committer | Dwight <dmerriman@gmail.com> | 2009-02-02 18:23:57 -0500 |
commit | 86fa7463e92b290280f21e834b088edc303fa690 (patch) | |
tree | 3a40636faa402bc432b558ddf0bb781ce9a1a29a /db | |
parent | d1ba9dfb497031952f774c7972fdeea4d2992189 (diff) | |
parent | 84a10094d0317fce4572e2aa1adb219c7a621a70 (diff) | |
download | mongo-86fa7463e92b290280f21e834b088edc303fa690.tar.gz |
Merge branch 'master' of git.10gen.com:/data/gitroot/p
Conflicts:
db/instance.cpp
Diffstat (limited to 'db')
-rw-r--r-- | db/db.cpp | 82 | ||||
-rw-r--r-- | db/instance.cpp | 35 | ||||
-rw-r--r-- | db/jsobj.h | 15 | ||||
-rw-r--r-- | db/json.cpp | 2 | ||||
-rw-r--r-- | db/repl.cpp | 103 | ||||
-rw-r--r-- | db/repl.h | 7 | ||||
-rw-r--r-- | db/replset.h | 2 |
7 files changed, 159 insertions, 87 deletions
diff --git a/db/db.cpp b/db/db.cpp index f234ccd3d54..ce9f8783751 100644 --- a/db/db.cpp +++ b/db/db.cpp @@ -37,6 +37,7 @@ namespace mongo { extern int port; extern int curOp; + extern bool autoresync; extern string dashDashSource; extern int opLogging; extern long long oplogSize; @@ -316,10 +317,6 @@ namespace mongo { RecCache::tempStore.init("/data/db/indexes.dat", BucketSize); #if !defined(_WIN32) - assert( signal(SIGSEGV, segvhandler) != SIG_ERR ); -#endif - -#if !defined(_WIN32) pid_t pid = 0; pid = getpid(); #else @@ -332,12 +329,11 @@ namespace mongo { #if !defined(NOJNI) if ( useJNI ) { JavaJS = new JavaJSImpl(appserverLoc); - javajstest(); + // This takes a bit of time, so comenting +// javajstest(); } #endif - - setupSignals(); - + repairDatabases(); /* this is for security on certain platforms */ @@ -348,7 +344,6 @@ namespace mongo { int test2(); void testClient(); - void pipeSigHandler( int signal ); } // namespace mongo @@ -356,6 +351,8 @@ using namespace mongo; int main(int argc, char* argv[], char *envp[] ) { + setupSignals(); + dbExecCommand = argv[0]; srand(curTimeMicros()); @@ -371,10 +368,7 @@ int main(int argc, char* argv[], char *envp[] ) } DEV out() << "warning: DEV mode enabled\n"; - -#if !defined(_WIN32) - signal(SIGPIPE, pipeSigHandler); -#endif + UnitTest::runTests(); if ( argc >= 2 ) { @@ -459,6 +453,8 @@ int main(int argc, char* argv[], char *envp[] ) master = true; else if ( s == "--slave" ) slave = true; + else if ( s == "--autoresync" ) + autoresync = true; else if ( s == "--help" || s == "-?" || s == "--?" ) goto usage; else if ( s == "--quiet" ) @@ -469,10 +465,10 @@ int main(int argc, char* argv[], char *envp[] ) noauth = true; else if ( s == "--auth" ) noauth = false; - else if( s == "--sysinfo" ) { - sysInfo(); - return 0; - } + else if( s == "--sysinfo" ) { + sysInfo(); + return 0; + } else if ( s == "--verbose" ) logLevel = 1; else if ( s.find( "-v" ) == 0 ){ @@ -496,7 +492,7 @@ int main(int argc, char* argv[], char *envp[] ) appsrvPath = argv[++i]; else if ( s == "--nocursors" ) useCursors = false; - else if ( strncmp(s.c_str(), "--oplogSize", 11) == 0 ) { + else if ( s == "--oplogSize" ) { long x = strtol( argv[ ++i ], 0, 10 ); uassert("bad arg", x > 0); oplogSize = x * 1024 * 1024; @@ -546,13 +542,14 @@ usage: out() << " --nocursors diagnostic/debugging option\n"; out() << " --nojni" << endl; out() << " --oplog<n> 0=off 1=W 2=R 3=both 7=W+some reads" << endl; - out() << " --oplogSize <size_in_megabytes> custom size for replication operation log" << endl; + out() << " --oplogSize <size_in_megabytes> custom size if creating new replication operation log" << endl; out() << " --sysinfo print out some diagnostic system information\n"; out() << "\nReplication:" << endl; out() << " --master\n"; out() << " --slave" << endl; out() << " --source <server:port>" << endl; out() << " --pairwith <server:port> <arbiter>" << endl; + out() << " --autoresync" << endl; out() << endl; return 0; @@ -583,37 +580,40 @@ namespace mongo { #endif } - int segvs = 0; - void segvhandler(int x) { - if ( ++segvs > 1 ) { - signal(x, SIG_DFL); - if ( segvs == 2 ) { - out() << "\n\n\n got 2nd SIGSEGV" << endl; - sayDbContext(); - } - return; - } - out() << "got SIGSEGV " << x << ", terminating :-(" << endl; - sayDbContext(); -// closeAllSockets(); -// MemoryMappedFile::closeAllFiles(); -// flushOpLog(); - dbexit(14); + void abruptQuit(int x) { + ostringstream oss; + oss << "Got signal: " << x << ", printing backtrace:" << endl; + printStackTrace( oss ); + rawOut( oss.str() ); + exit(14); } - void mysighandler(int x) { - signal(x, SIG_IGN); - out() << "got kill or ctrl c signal " << x << ", will terminate after current cmd ends" << endl; + sigset_t asyncSignals; + // The above signals will be processed by this thread only, in order to + // ensure the db and log mutexes aren't held. + void interruptThread() { + int x; + sigwait( &asyncSignals, &x ); + log() << "got kill or ctrl c signal " << x << ", will terminate after current cmd ends" << endl; { dblock lk; log() << "now exiting" << endl; exit(12); } } - + void setupSignals() { - assert( signal(SIGINT, mysighandler) != SIG_ERR ); - assert( signal(SIGTERM, mysighandler) != SIG_ERR ); + assert( signal(SIGSEGV, abruptQuit) != SIG_ERR ); + assert( signal(SIGFPE, abruptQuit) != SIG_ERR ); + assert( signal(SIGABRT, abruptQuit) != SIG_ERR ); + assert( signal(SIGBUS, abruptQuit) != SIG_ERR ); + assert( signal(SIGPIPE, pipeSigHandler) != SIG_ERR ); + + sigemptyset( &asyncSignals ); + sigaddset( &asyncSignals, SIGINT ); + sigaddset( &asyncSignals, SIGTERM ); + pthread_sigmask( SIG_SETMASK, &asyncSignals, 0 ); + boost::thread it( interruptThread ); } #else diff --git a/db/instance.cpp b/db/instance.cpp index 43fccf77a57..77ebb102478 100644 --- a/db/instance.cpp +++ b/db/instance.cpp @@ -40,7 +40,8 @@ namespace mongo { bool slave = false; bool master = false; // true means keep an op log extern int curOp; - + bool autoresync = false; + boost::mutex &dbMutex( *(new boost::mutex) ); MutexInfo dbMutexInfo; //int dbLocked = 0; @@ -594,18 +595,36 @@ namespace mongo { void recCacheCloseAll(); - /* not using log() herein in case we are called from segvhandler and we were already locked */ -#undef exit - void dbexit(int rc, const char *why) { - if( why && *why ) out() << "dbexit: " << why << endl; + + boost::mutex &exitMutex( *( new boost::mutex ) ); + bool firstExit = true; + + /* not using log() herein in case we are already locked */ + void dbexit(int rc, const char *why) { + { + boostlock lk( exitMutex ); + if ( !firstExit ) { + stringstream ss; + ss << "dbexit: " << why << "; exiting immediately" << endl; + rawOut( ss.str() ); + ::exit( rc ); + } + firstExit = false; + } + + stringstream ss; + ss << "dbexit: " << why << "; flushing op log and files" << endl; + rawOut( ss.str() ); + flushOpLog(); /* must do this before unmapping mem or you may get a seg fault */ closeAllSockets(); - - MemoryMappedFile::closeAllFiles(); + stringstream ss2; + MemoryMappedFile::closeAllFiles( ss2 ); recCacheCloseAll(); - out() << "dbexit: really exiting now" << endl; + rawOut( ss2.str() ); + rawOut( "dbexit: really exiting now\n" ); ::exit(rc); } diff --git a/db/jsobj.h b/db/jsobj.h index 0bd4ade2e40..30b73e0dc83 100644 --- a/db/jsobj.h +++ b/db/jsobj.h @@ -694,6 +694,7 @@ namespace mongo { BSONObjBuilder& operator<<( const string& v ) { return (*this << v.c_str()); } BSONObjBuilder& operator<<( const int value ); BSONObjBuilder& operator<<( const double value ); + BSONObjBuilder& operator<<( const unsigned long value ){ return (*this << (double)value); } private: const char * _fieldName; @@ -817,7 +818,7 @@ namespace mongo { b.append( (char) MaxKey ); b.append( fieldName ); } - + /* Deprecated (but supported) */ void appendDBRef( const char *fieldName, const char *ns, const OID &oid ) { b.append( (char) DBRef ); @@ -834,6 +835,18 @@ namespace mongo { b.append( (char) type ); b.append( (void *) data, len ); } + + /** + @param len the length of data + */ + void appendBinDataArray( const char * fieldName , const char * data , int len ){ + b.append( (char) BinData ); + b.append( fieldName ); + b.append( len + 4 ); + b.append( (char)0x2 ); + b.append( len ); + b.append( (void *) data, len ); + } /** Append to the BSON object a field of type CodeWScope. This is a javascript code fragment accompanied by some scope that goes with it. diff --git a/db/json.cpp b/db/json.cpp index 92e8ed882f8..207334745e1 100644 --- a/db/json.cpp +++ b/db/json.cpp @@ -525,6 +525,8 @@ public: }; BSONObj fromjson( const char *str ) { + if ( ! strlen(str) ) + return emptyObj; ObjectBuilder b; JsonGrammar parser( b ); massert( "Unable to parse JSON string", parse( str, parser, space_p ).full ); diff --git a/db/repl.cpp b/db/repl.cpp index 6e71f7dbc66..11a9d45f724 100644 --- a/db/repl.cpp +++ b/db/repl.cpp @@ -68,6 +68,9 @@ namespace mongo { */ const char *allDead = 0; + extern bool autoresync; + time_t lastForcedResync = 0; + } // namespace mongo #include "replset.h" @@ -108,7 +111,7 @@ namespace mongo { ReplInfo r("arbitrate"); if ( arbHost == "-" ) { - // no arbiter. we are up, let's assume he is down and network is not partitioned. + // no arbiter. we are up, let's assume partner is down and network is not partitioned. setMasterLocked(State_Master, "remote unreachable"); return; } @@ -120,15 +123,7 @@ namespace mongo { return; } - /* todo: make an arbitrate command we send to the arbiter instead of this */ - bool is_master; - bool ok = conn->isMaster(is_master); - if ( !ok ) { - setMasterLocked(State_CantArb, "can't arb 2"); - return; - } - - setMasterLocked(State_Master, "remote down, arbiter reached"); + negotiate( conn.get(), "arbiter" ); } /* --------------------------------------------- */ @@ -209,14 +204,9 @@ namespace mongo { errmsg = "not dead, no need to resync"; return false; } - vector<ReplSource*> sources; - ReplSource::loadAll(sources); - for( vector< ReplSource * >::iterator i = sources.begin(); i != sources.end(); ++i ) { - (*i)->userResync(); - } - allDead = 0; + ReplSource::forceResyncDead( "user" ); result.append( "info", "triggered resync for all sources" ); - return true; + return true; } } cmdResync; @@ -290,9 +280,32 @@ namespace mongo { virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { if ( replPair == 0 ) { - problem() << "got negotiatemaster cmd but we are not in paired mode." << endl; - errmsg = "not paired"; - return false; + // assume that we are an arbiter and should forward the request + string host = cmdObj.getStringField("your_name"); + int port = cmdObj.getIntField( "your_port" ); + if ( port == INT_MIN ) { + errmsg = "no port specified"; + problem() << errmsg << endl; + return false; + } + stringstream ss; + ss << host << ":" << port; + string remote = ss.str(); + auto_ptr<DBClientConnection> conn( new DBClientConnection() ); + if ( !conn->connect( remote.c_str(), errmsg ) ) { + result.append( "you_are", ReplPair::State_Master ); + return true; + } + BSONObj ret = conn->findOne( "admin.$cmd", cmdObj ); + BSONObjIterator i( ret ); + while( i.more() ) { + BSONElement e = i.next(); + if ( e.eoo() ) + break; + if ( e.fieldName() != string( "ok" ) ) + result.append( e ); + } + return ( ret.getIntField("ok") == 1 ); } int was = cmdObj.getIntField("i_was"); @@ -329,30 +342,31 @@ namespace mongo { return true; } } cmdnegotiatemaster; - - void ReplPair::negotiate(DBClientConnection *conn) { + + void ReplPair::negotiate(DBClientConnection *conn, string method) { BSONObjBuilder b; b.append("negotiatemaster",1); b.append("i_was", state); b.append("your_name", remoteHost); + b.append("your_port", remotePort); BSONObj cmd = b.done(); BSONObj res = conn->findOne("admin.$cmd", cmd); if ( res.getIntField("ok") != 1 ) { - problem() << "negotiate fails: " << res.toString() << '\n'; - setMasterLocked(State_Confused); + string message = method + " negotiate failed"; + problem() << message << ": " << res.toString() << '\n'; + setMasterLocked(State_Confused, message.c_str()); return; } int x = res.getIntField("you_are"); // State_Negotiating means the remote node is not dominant and cannot // choose who is master. if ( x != State_Slave && x != State_Master && x != State_Negotiating ) { - problem() << "negotiate: bad you_are value " << res.toString() << endl; + problem() << method << " negotiate: bad you_are value " << res.toString() << endl; return; } if ( x != State_Negotiating ) { - // Don't actually have to lock here, since we only get here if not the - // dominant node. - setMaster(x); + string message = method + " negotiation"; + setMasterLocked(x, message.c_str()); } } @@ -557,9 +571,29 @@ namespace mongo { BSONObj opTimeQuery = fromjson("{\"getoptime\":1}"); - void ReplSource::userResync() { + bool ReplSource::throttledForceResyncDead( const char *requester ) { + if ( time( 0 ) - lastForcedResync > 600 ) { + forceResyncDead( requester ); + lastForcedResync = time( 0 ); + return true; + } + return false; + } + + void ReplSource::forceResyncDead( const char *requester ) { + if ( !allDead ) + return; + vector<ReplSource*> sources; + ReplSource::loadAll(sources); + for( vector< ReplSource * >::iterator i = sources.begin(); i != sources.end(); ++i ) { + (*i)->forceResync( requester ); + } + allDead = 0; + } + + void ReplSource::forceResync( const char *requester ) { for( set< string >::iterator i = dbs.begin(); i != dbs.end(); ++i ) { - log() << "user resync: dropping database " << *i << endl; + log() << requester << " resync: dropping database " << *i << endl; string dummyns = *i + "."; setClientTempNs( dummyns.c_str() ); assert( database->name == *i ); @@ -941,6 +975,7 @@ namespace mongo { log() << "pull: " << sourceName() << '@' << hostName << endl; nClonedThisPass = 0; + // FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName. if ( (string("localhost") == hostName || string("127.0.0.1") == hostName) && port == DBPort ) { log() << "pull: can't sync from self (localhost). sources configuration may be wrong." << endl; sleepsecs(5); @@ -967,7 +1002,7 @@ namespace mongo { } if ( paired ) - replPair->negotiate(conn.get()); + replPair->negotiate(conn.get(), "direct"); /* // get current mtime at the server. @@ -1125,8 +1160,10 @@ namespace mongo { int s = 0; { dblock lk; - if ( allDead ) - break; + if ( allDead ) { + if ( !autoresync || !ReplSource::throttledForceResyncDead( "auto" ) ) + break; + } assert( syncing == 0 ); syncing++; } diff --git a/db/repl.h b/db/repl.h index 3185db1071a..c2c6f8d2f65 100644 --- a/db/repl.h +++ b/db/repl.h @@ -36,7 +36,7 @@ namespace mongo { class DBClientCursor; extern bool slave; extern bool master; - + bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication, bool slaveOk, bool useReplAuth); @@ -176,8 +176,9 @@ namespace mongo { return !addDbNextPass.empty(); } - // Trigger a resync, at user's request. - void userResync(); + static bool throttledForceResyncDead( const char *requester ); + static void forceResyncDead( const char *requester ); + void forceResync( const char *requester ); }; /* Write operation to the log (local.oplog.$main) diff --git a/db/replset.h b/db/replset.h index c323224c974..795dbc5193c 100644 --- a/db/replset.h +++ b/db/replset.h @@ -86,7 +86,7 @@ namespace mongo { void setMaster(int n, const char *_comment = ""); /* negotiate with our peer who is master */ - void negotiate(DBClientConnection *conn); + void negotiate(DBClientConnection *conn, string method); /* peer unreachable, try our arbitrator */ void arbitrate(); |