summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
authorDwight <dmerriman@gmail.com>2009-02-02 18:23:57 -0500
committerDwight <dmerriman@gmail.com>2009-02-02 18:23:57 -0500
commit86fa7463e92b290280f21e834b088edc303fa690 (patch)
tree3a40636faa402bc432b558ddf0bb781ce9a1a29a /db
parentd1ba9dfb497031952f774c7972fdeea4d2992189 (diff)
parent84a10094d0317fce4572e2aa1adb219c7a621a70 (diff)
downloadmongo-86fa7463e92b290280f21e834b088edc303fa690.tar.gz
Merge branch 'master' of git.10gen.com:/data/gitroot/p
Conflicts: db/instance.cpp
Diffstat (limited to 'db')
-rw-r--r--db/db.cpp82
-rw-r--r--db/instance.cpp35
-rw-r--r--db/jsobj.h15
-rw-r--r--db/json.cpp2
-rw-r--r--db/repl.cpp103
-rw-r--r--db/repl.h7
-rw-r--r--db/replset.h2
7 files changed, 159 insertions, 87 deletions
diff --git a/db/db.cpp b/db/db.cpp
index f234ccd3d54..ce9f8783751 100644
--- a/db/db.cpp
+++ b/db/db.cpp
@@ -37,6 +37,7 @@ namespace mongo {
extern int port;
extern int curOp;
+ extern bool autoresync;
extern string dashDashSource;
extern int opLogging;
extern long long oplogSize;
@@ -316,10 +317,6 @@ namespace mongo {
RecCache::tempStore.init("/data/db/indexes.dat", BucketSize);
#if !defined(_WIN32)
- assert( signal(SIGSEGV, segvhandler) != SIG_ERR );
-#endif
-
-#if !defined(_WIN32)
pid_t pid = 0;
pid = getpid();
#else
@@ -332,12 +329,11 @@ namespace mongo {
#if !defined(NOJNI)
if ( useJNI ) {
JavaJS = new JavaJSImpl(appserverLoc);
- javajstest();
+ // This takes a bit of time, so comenting
+// javajstest();
}
#endif
-
- setupSignals();
-
+
repairDatabases();
/* this is for security on certain platforms */
@@ -348,7 +344,6 @@ namespace mongo {
int test2();
void testClient();
- void pipeSigHandler( int signal );
} // namespace mongo
@@ -356,6 +351,8 @@ using namespace mongo;
int main(int argc, char* argv[], char *envp[] )
{
+ setupSignals();
+
dbExecCommand = argv[0];
srand(curTimeMicros());
@@ -371,10 +368,7 @@ int main(int argc, char* argv[], char *envp[] )
}
DEV out() << "warning: DEV mode enabled\n";
-
-#if !defined(_WIN32)
- signal(SIGPIPE, pipeSigHandler);
-#endif
+
UnitTest::runTests();
if ( argc >= 2 ) {
@@ -459,6 +453,8 @@ int main(int argc, char* argv[], char *envp[] )
master = true;
else if ( s == "--slave" )
slave = true;
+ else if ( s == "--autoresync" )
+ autoresync = true;
else if ( s == "--help" || s == "-?" || s == "--?" )
goto usage;
else if ( s == "--quiet" )
@@ -469,10 +465,10 @@ int main(int argc, char* argv[], char *envp[] )
noauth = true;
else if ( s == "--auth" )
noauth = false;
- else if( s == "--sysinfo" ) {
- sysInfo();
- return 0;
- }
+ else if( s == "--sysinfo" ) {
+ sysInfo();
+ return 0;
+ }
else if ( s == "--verbose" )
logLevel = 1;
else if ( s.find( "-v" ) == 0 ){
@@ -496,7 +492,7 @@ int main(int argc, char* argv[], char *envp[] )
appsrvPath = argv[++i];
else if ( s == "--nocursors" )
useCursors = false;
- else if ( strncmp(s.c_str(), "--oplogSize", 11) == 0 ) {
+ else if ( s == "--oplogSize" ) {
long x = strtol( argv[ ++i ], 0, 10 );
uassert("bad arg", x > 0);
oplogSize = x * 1024 * 1024;
@@ -546,13 +542,14 @@ usage:
out() << " --nocursors diagnostic/debugging option\n";
out() << " --nojni" << endl;
out() << " --oplog<n> 0=off 1=W 2=R 3=both 7=W+some reads" << endl;
- out() << " --oplogSize <size_in_megabytes> custom size for replication operation log" << endl;
+ out() << " --oplogSize <size_in_megabytes> custom size if creating new replication operation log" << endl;
out() << " --sysinfo print out some diagnostic system information\n";
out() << "\nReplication:" << endl;
out() << " --master\n";
out() << " --slave" << endl;
out() << " --source <server:port>" << endl;
out() << " --pairwith <server:port> <arbiter>" << endl;
+ out() << " --autoresync" << endl;
out() << endl;
return 0;
@@ -583,37 +580,40 @@ namespace mongo {
#endif
}
- int segvs = 0;
- void segvhandler(int x) {
- if ( ++segvs > 1 ) {
- signal(x, SIG_DFL);
- if ( segvs == 2 ) {
- out() << "\n\n\n got 2nd SIGSEGV" << endl;
- sayDbContext();
- }
- return;
- }
- out() << "got SIGSEGV " << x << ", terminating :-(" << endl;
- sayDbContext();
-// closeAllSockets();
-// MemoryMappedFile::closeAllFiles();
-// flushOpLog();
- dbexit(14);
+ void abruptQuit(int x) {
+ ostringstream oss;
+ oss << "Got signal: " << x << ", printing backtrace:" << endl;
+ printStackTrace( oss );
+ rawOut( oss.str() );
+ exit(14);
}
- void mysighandler(int x) {
- signal(x, SIG_IGN);
- out() << "got kill or ctrl c signal " << x << ", will terminate after current cmd ends" << endl;
+ sigset_t asyncSignals;
+ // The above signals will be processed by this thread only, in order to
+ // ensure the db and log mutexes aren't held.
+ void interruptThread() {
+ int x;
+ sigwait( &asyncSignals, &x );
+ log() << "got kill or ctrl c signal " << x << ", will terminate after current cmd ends" << endl;
{
dblock lk;
log() << "now exiting" << endl;
exit(12);
}
}
-
+
void setupSignals() {
- assert( signal(SIGINT, mysighandler) != SIG_ERR );
- assert( signal(SIGTERM, mysighandler) != SIG_ERR );
+ assert( signal(SIGSEGV, abruptQuit) != SIG_ERR );
+ assert( signal(SIGFPE, abruptQuit) != SIG_ERR );
+ assert( signal(SIGABRT, abruptQuit) != SIG_ERR );
+ assert( signal(SIGBUS, abruptQuit) != SIG_ERR );
+ assert( signal(SIGPIPE, pipeSigHandler) != SIG_ERR );
+
+ sigemptyset( &asyncSignals );
+ sigaddset( &asyncSignals, SIGINT );
+ sigaddset( &asyncSignals, SIGTERM );
+ pthread_sigmask( SIG_SETMASK, &asyncSignals, 0 );
+ boost::thread it( interruptThread );
}
#else
diff --git a/db/instance.cpp b/db/instance.cpp
index 43fccf77a57..77ebb102478 100644
--- a/db/instance.cpp
+++ b/db/instance.cpp
@@ -40,7 +40,8 @@ namespace mongo {
bool slave = false;
bool master = false; // true means keep an op log
extern int curOp;
-
+ bool autoresync = false;
+
boost::mutex &dbMutex( *(new boost::mutex) );
MutexInfo dbMutexInfo;
//int dbLocked = 0;
@@ -594,18 +595,36 @@ namespace mongo {
void recCacheCloseAll();
- /* not using log() herein in case we are called from segvhandler and we were already locked */
-#undef exit
- void dbexit(int rc, const char *why) {
- if( why && *why ) out() << "dbexit: " << why << endl;
+
+ boost::mutex &exitMutex( *( new boost::mutex ) );
+ bool firstExit = true;
+
+ /* not using log() herein in case we are already locked */
+ void dbexit(int rc, const char *why) {
+ {
+ boostlock lk( exitMutex );
+ if ( !firstExit ) {
+ stringstream ss;
+ ss << "dbexit: " << why << "; exiting immediately" << endl;
+ rawOut( ss.str() );
+ ::exit( rc );
+ }
+ firstExit = false;
+ }
+
+ stringstream ss;
+ ss << "dbexit: " << why << "; flushing op log and files" << endl;
+ rawOut( ss.str() );
+
flushOpLog();
/* must do this before unmapping mem or you may get a seg fault */
closeAllSockets();
-
- MemoryMappedFile::closeAllFiles();
+ stringstream ss2;
+ MemoryMappedFile::closeAllFiles( ss2 );
recCacheCloseAll();
- out() << "dbexit: really exiting now" << endl;
+ rawOut( ss2.str() );
+ rawOut( "dbexit: really exiting now\n" );
::exit(rc);
}
diff --git a/db/jsobj.h b/db/jsobj.h
index 0bd4ade2e40..30b73e0dc83 100644
--- a/db/jsobj.h
+++ b/db/jsobj.h
@@ -694,6 +694,7 @@ namespace mongo {
BSONObjBuilder& operator<<( const string& v ) { return (*this << v.c_str()); }
BSONObjBuilder& operator<<( const int value );
BSONObjBuilder& operator<<( const double value );
+ BSONObjBuilder& operator<<( const unsigned long value ){ return (*this << (double)value); }
private:
const char * _fieldName;
@@ -817,7 +818,7 @@ namespace mongo {
b.append( (char) MaxKey );
b.append( fieldName );
}
-
+
/* Deprecated (but supported) */
void appendDBRef( const char *fieldName, const char *ns, const OID &oid ) {
b.append( (char) DBRef );
@@ -834,6 +835,18 @@ namespace mongo {
b.append( (char) type );
b.append( (void *) data, len );
}
+
+ /**
+ @param len the length of data
+ */
+ void appendBinDataArray( const char * fieldName , const char * data , int len ){
+ b.append( (char) BinData );
+ b.append( fieldName );
+ b.append( len + 4 );
+ b.append( (char)0x2 );
+ b.append( len );
+ b.append( (void *) data, len );
+ }
/** Append to the BSON object a field of type CodeWScope. This is a javascript code
fragment accompanied by some scope that goes with it.
diff --git a/db/json.cpp b/db/json.cpp
index 92e8ed882f8..207334745e1 100644
--- a/db/json.cpp
+++ b/db/json.cpp
@@ -525,6 +525,8 @@ public:
};
BSONObj fromjson( const char *str ) {
+ if ( ! strlen(str) )
+ return emptyObj;
ObjectBuilder b;
JsonGrammar parser( b );
massert( "Unable to parse JSON string", parse( str, parser, space_p ).full );
diff --git a/db/repl.cpp b/db/repl.cpp
index 6e71f7dbc66..11a9d45f724 100644
--- a/db/repl.cpp
+++ b/db/repl.cpp
@@ -68,6 +68,9 @@ namespace mongo {
*/
const char *allDead = 0;
+ extern bool autoresync;
+ time_t lastForcedResync = 0;
+
} // namespace mongo
#include "replset.h"
@@ -108,7 +111,7 @@ namespace mongo {
ReplInfo r("arbitrate");
if ( arbHost == "-" ) {
- // no arbiter. we are up, let's assume he is down and network is not partitioned.
+ // no arbiter. we are up, let's assume partner is down and network is not partitioned.
setMasterLocked(State_Master, "remote unreachable");
return;
}
@@ -120,15 +123,7 @@ namespace mongo {
return;
}
- /* todo: make an arbitrate command we send to the arbiter instead of this */
- bool is_master;
- bool ok = conn->isMaster(is_master);
- if ( !ok ) {
- setMasterLocked(State_CantArb, "can't arb 2");
- return;
- }
-
- setMasterLocked(State_Master, "remote down, arbiter reached");
+ negotiate( conn.get(), "arbiter" );
}
/* --------------------------------------------- */
@@ -209,14 +204,9 @@ namespace mongo {
errmsg = "not dead, no need to resync";
return false;
}
- vector<ReplSource*> sources;
- ReplSource::loadAll(sources);
- for( vector< ReplSource * >::iterator i = sources.begin(); i != sources.end(); ++i ) {
- (*i)->userResync();
- }
- allDead = 0;
+ ReplSource::forceResyncDead( "user" );
result.append( "info", "triggered resync for all sources" );
- return true;
+ return true;
}
} cmdResync;
@@ -290,9 +280,32 @@ namespace mongo {
virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
if ( replPair == 0 ) {
- problem() << "got negotiatemaster cmd but we are not in paired mode." << endl;
- errmsg = "not paired";
- return false;
+ // assume that we are an arbiter and should forward the request
+ string host = cmdObj.getStringField("your_name");
+ int port = cmdObj.getIntField( "your_port" );
+ if ( port == INT_MIN ) {
+ errmsg = "no port specified";
+ problem() << errmsg << endl;
+ return false;
+ }
+ stringstream ss;
+ ss << host << ":" << port;
+ string remote = ss.str();
+ auto_ptr<DBClientConnection> conn( new DBClientConnection() );
+ if ( !conn->connect( remote.c_str(), errmsg ) ) {
+ result.append( "you_are", ReplPair::State_Master );
+ return true;
+ }
+ BSONObj ret = conn->findOne( "admin.$cmd", cmdObj );
+ BSONObjIterator i( ret );
+ while( i.more() ) {
+ BSONElement e = i.next();
+ if ( e.eoo() )
+ break;
+ if ( e.fieldName() != string( "ok" ) )
+ result.append( e );
+ }
+ return ( ret.getIntField("ok") == 1 );
}
int was = cmdObj.getIntField("i_was");
@@ -329,30 +342,31 @@ namespace mongo {
return true;
}
} cmdnegotiatemaster;
-
- void ReplPair::negotiate(DBClientConnection *conn) {
+
+ void ReplPair::negotiate(DBClientConnection *conn, string method) {
BSONObjBuilder b;
b.append("negotiatemaster",1);
b.append("i_was", state);
b.append("your_name", remoteHost);
+ b.append("your_port", remotePort);
BSONObj cmd = b.done();
BSONObj res = conn->findOne("admin.$cmd", cmd);
if ( res.getIntField("ok") != 1 ) {
- problem() << "negotiate fails: " << res.toString() << '\n';
- setMasterLocked(State_Confused);
+ string message = method + " negotiate failed";
+ problem() << message << ": " << res.toString() << '\n';
+ setMasterLocked(State_Confused, message.c_str());
return;
}
int x = res.getIntField("you_are");
// State_Negotiating means the remote node is not dominant and cannot
// choose who is master.
if ( x != State_Slave && x != State_Master && x != State_Negotiating ) {
- problem() << "negotiate: bad you_are value " << res.toString() << endl;
+ problem() << method << " negotiate: bad you_are value " << res.toString() << endl;
return;
}
if ( x != State_Negotiating ) {
- // Don't actually have to lock here, since we only get here if not the
- // dominant node.
- setMaster(x);
+ string message = method + " negotiation";
+ setMasterLocked(x, message.c_str());
}
}
@@ -557,9 +571,29 @@ namespace mongo {
BSONObj opTimeQuery = fromjson("{\"getoptime\":1}");
- void ReplSource::userResync() {
+ bool ReplSource::throttledForceResyncDead( const char *requester ) {
+ if ( time( 0 ) - lastForcedResync > 600 ) {
+ forceResyncDead( requester );
+ lastForcedResync = time( 0 );
+ return true;
+ }
+ return false;
+ }
+
+ void ReplSource::forceResyncDead( const char *requester ) {
+ if ( !allDead )
+ return;
+ vector<ReplSource*> sources;
+ ReplSource::loadAll(sources);
+ for( vector< ReplSource * >::iterator i = sources.begin(); i != sources.end(); ++i ) {
+ (*i)->forceResync( requester );
+ }
+ allDead = 0;
+ }
+
+ void ReplSource::forceResync( const char *requester ) {
for( set< string >::iterator i = dbs.begin(); i != dbs.end(); ++i ) {
- log() << "user resync: dropping database " << *i << endl;
+ log() << requester << " resync: dropping database " << *i << endl;
string dummyns = *i + ".";
setClientTempNs( dummyns.c_str() );
assert( database->name == *i );
@@ -941,6 +975,7 @@ namespace mongo {
log() << "pull: " << sourceName() << '@' << hostName << endl;
nClonedThisPass = 0;
+ // FIXME Handle cases where this db isn't on default port, or default port is spec'd in hostName.
if ( (string("localhost") == hostName || string("127.0.0.1") == hostName) && port == DBPort ) {
log() << "pull: can't sync from self (localhost). sources configuration may be wrong." << endl;
sleepsecs(5);
@@ -967,7 +1002,7 @@ namespace mongo {
}
if ( paired )
- replPair->negotiate(conn.get());
+ replPair->negotiate(conn.get(), "direct");
/*
// get current mtime at the server.
@@ -1125,8 +1160,10 @@ namespace mongo {
int s = 0;
{
dblock lk;
- if ( allDead )
- break;
+ if ( allDead ) {
+ if ( !autoresync || !ReplSource::throttledForceResyncDead( "auto" ) )
+ break;
+ }
assert( syncing == 0 );
syncing++;
}
diff --git a/db/repl.h b/db/repl.h
index 3185db1071a..c2c6f8d2f65 100644
--- a/db/repl.h
+++ b/db/repl.h
@@ -36,7 +36,7 @@ namespace mongo {
class DBClientCursor;
extern bool slave;
extern bool master;
-
+
bool cloneFrom(const char *masterHost, string& errmsg, const string& fromdb, bool logForReplication,
bool slaveOk, bool useReplAuth);
@@ -176,8 +176,9 @@ namespace mongo {
return !addDbNextPass.empty();
}
- // Trigger a resync, at user's request.
- void userResync();
+ static bool throttledForceResyncDead( const char *requester );
+ static void forceResyncDead( const char *requester );
+ void forceResync( const char *requester );
};
/* Write operation to the log (local.oplog.$main)
diff --git a/db/replset.h b/db/replset.h
index c323224c974..795dbc5193c 100644
--- a/db/replset.h
+++ b/db/replset.h
@@ -86,7 +86,7 @@ namespace mongo {
void setMaster(int n, const char *_comment = "");
/* negotiate with our peer who is master */
- void negotiate(DBClientConnection *conn);
+ void negotiate(DBClientConnection *conn, string method);
/* peer unreachable, try our arbitrator */
void arbitrate();