diff options
Diffstat (limited to 'db')
89 files changed, 2567 insertions, 1371 deletions
diff --git a/db/btree.cpp b/db/btree.cpp index 232ac615470..e4753bef696 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -44,7 +44,7 @@ namespace mongo { } /** data check. like assert, but gives a reasonable error message to the user. */ -#define check(expr) _IF(!(expr)) { checkFailed(__LINE__); } +#define check(expr) if(!(expr) ) { checkFailed(__LINE__); } #define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this ); diff --git a/db/btree.h b/db/btree.h index 2e47d69a221..9ffa54cddc0 100644 --- a/db/btree.h +++ b/db/btree.h @@ -1071,7 +1071,7 @@ namespace mongo { * Our btrees may (rarely) have "unused" keys when items are deleted. * Skip past them. */ - virtual bool skipUnusedKeys( bool mayJump ) = 0; + virtual bool skipUnusedKeys() = 0; bool skipOutOfRangeKeysAndCheckEnd(); void skipAndCheck(); diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp index cd145ef861f..f39d5bb0535 100644 --- a/db/btreecursor.cpp +++ b/db/btreecursor.cpp @@ -68,7 +68,7 @@ namespace mongo { return !currKeyNode().prevChildBucket.isNull(); } - bool skipUnusedKeys( bool mayJump ) { + bool skipUnusedKeys() { int u = 0; while ( 1 ) { if ( !ok() ) @@ -80,9 +80,6 @@ namespace mongo { u++; //don't include unused keys in nscanned //++_nscanned; - if ( mayJump && ( u % 10 == 0 ) ) { - skipOutOfRangeKeysAndCheckEnd(); - } } if ( u > 10 ) OCCASIONALLY log() << "btree unused skipped:" << u << '\n'; @@ -114,13 +111,13 @@ namespace mongo { while( 1 ) { // if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && // b->k(keyOfs).recordLoc == locAtKeyOfs ) { - if ( keyAt(keyOfs).shallowEqual(keyAtKeyOfs) ) { + if ( keyAt(keyOfs).binaryEqual(keyAtKeyOfs) ) { const _KeyNode& kn = keyNode(keyOfs); if( kn.recordLoc == locAtKeyOfs ) { if ( !kn.isUsed() ) { // we were deleted but still exist as an unused // marker key. advance. - skipUnusedKeys( false ); + skipUnusedKeys(); } return; } @@ -149,7 +146,7 @@ namespace mongo { bucket = _locate(keyAtKeyOfs, locAtKeyOfs); RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl; if ( ! bucket.isNull() ) - skipUnusedKeys( false ); + skipUnusedKeys(); } @@ -329,18 +326,24 @@ namespace mongo { if ( ok() ) { _nscanned = 1; } - skipUnusedKeys( false ); + skipUnusedKeys(); checkEnd(); } void BtreeCursor::skipAndCheck() { - skipUnusedKeys( true ); + int startNscanned = _nscanned; + skipUnusedKeys(); while( 1 ) { if ( !skipOutOfRangeKeysAndCheckEnd() ) { break; } - while( skipOutOfRangeKeysAndCheckEnd() ); - if ( !skipUnusedKeys( true ) ) { + do { + if ( _nscanned > startNscanned + 20 ) { + skipUnusedKeys(); + return; + } + } while( skipOutOfRangeKeysAndCheckEnd() ); + if ( !skipUnusedKeys() ) { break; } } @@ -395,7 +398,7 @@ namespace mongo { bucket = _advance(bucket, keyOfs, _direction, "BtreeCursor::advance"); if ( !_independentFieldRanges ) { - skipUnusedKeys( false ); + skipUnusedKeys(); checkEnd(); if ( ok() ) { ++_nscanned; diff --git a/db/client.cpp b/db/client.cpp index be5dba9ae56..bf3aead75a6 100644 --- a/db/client.cpp +++ b/db/client.cpp @@ -122,10 +122,13 @@ namespace mongo { error() << "Client::shutdown not called: " << _desc << endl; } - scoped_lock bl(clientsMutex); - if ( ! _shutdown ) - clients.erase(this); - delete _curOp; + if ( ! inShutdown() ) { + // we can't clean up safely once we're in shutdown + scoped_lock bl(clientsMutex); + if ( ! _shutdown ) + clients.erase(this); + delete _curOp; + } } bool Client::shutdown() { @@ -469,7 +472,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return false; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Client& c = cc(); c.gotHandshake( cmdObj ); return 1; @@ -688,11 +691,14 @@ namespace mongo { #define OPDEBUG_APPEND_NUMBER(x) if( x ) b.append( #x , (x) ) #define OPDEBUG_APPEND_BOOL(x) if( x ) b.appendBool( #x , (x) ) - void OpDebug::append( BSONObjBuilder& b ) const { + void OpDebug::append( const CurOp& curop, BSONObjBuilder& b ) const { b.append( "op" , iscommand ? "command" : opToString( op ) ); b.append( "ns" , ns.toString() ); if ( ! query.isEmpty() ) b.append( iscommand ? "command" : "query" , query ); + else if ( ! iscommand && curop.haveQuery() ) + curop.appendQuery( b , "query" ); + if ( ! updateobj.isEmpty() ) b.append( "updateobj" , updateobj ); diff --git a/db/clientcursor.cpp b/db/clientcursor.cpp index 615616e7a7c..e803afd459c 100644 --- a/db/clientcursor.cpp +++ b/db/clientcursor.cpp @@ -447,16 +447,29 @@ namespace mongo { return rec; } - bool ClientCursor::yieldSometimes( RecordNeeds need ) { + bool ClientCursor::yieldSometimes( RecordNeeds need, bool *yielded ) { + if ( yielded ) { + *yielded = false; + } if ( ! _yieldSometimesTracker.ping() ) { Record* rec = _recordForYield( need ); - if ( rec ) + if ( rec ) { + if ( yielded ) { + *yielded = true; + } return yield( yieldSuggest() , rec ); + } return true; } int micros = yieldSuggest(); - return ( micros > 0 ) ? yield( micros , _recordForYield( need ) ) : true; + if ( micros > 0 ) { + if ( yielded ) { + *yielded = true; + } + return yield( micros , _recordForYield( need ) ); + } + return true; } void ClientCursor::staticYield( int micros , const StringData& ns , Record * rec ) { @@ -616,7 +629,7 @@ namespace mongo { help << " example: { cursorInfo : 1 }"; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { ClientCursor::appendStats( result ); return true; } diff --git a/db/clientcursor.h b/db/clientcursor.h index b3bd996c768..75c7da85cc6 100644 --- a/db/clientcursor.h +++ b/db/clientcursor.h @@ -186,9 +186,10 @@ namespace mongo { /** * @param needRecord whether or not the next record has to be read from disk for sure * if this is true, will yield of next record isn't in memory + * @param yielded true if a yield occurred, and potentially if a yield did not occur * @return same as yield() */ - bool yieldSometimes( RecordNeeds need ); + bool yieldSometimes( RecordNeeds need, bool *yielded = 0 ); static int yieldSuggest(); static void staticYield( int micros , const StringData& ns , Record * rec ); diff --git a/db/cloner.cpp b/db/cloner.cpp index 2a46ea22cb4..8956133daa3 100644 --- a/db/cloner.cpp +++ b/db/cloner.cpp @@ -460,7 +460,7 @@ namespace mongo { help << "{ clone : \"host13\" }"; } CmdClone() : Command("clone") { } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string from = cmdObj.getStringField("clone"); if ( from.empty() ) return false; @@ -486,7 +486,7 @@ namespace mongo { "Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there." ; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("from"); if ( fromhost.empty() ) { errmsg = "missing 'from' parameter"; @@ -538,7 +538,7 @@ namespace mongo { help << "get a nonce for subsequent copy db request from secure server\n"; help << "usage: {copydbgetnonce: 1, fromhost: <hostname>}"; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("fromhost"); if ( fromhost.empty() ) { /* copy from self */ @@ -579,7 +579,7 @@ namespace mongo { help << "copy a database from another host to this host\n"; help << "usage: {copydb: 1, fromhost: <hostname>, fromdb: <db>, todb: <db>[, slaveOk: <bool>, username: <username>, nonce: <nonce>, key: <key>]}"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { bool slaveOk = cmdObj["slaveOk"].trueValue(); string fromhost = cmdObj.getStringField("fromhost"); if ( fromhost.empty() ) { @@ -633,7 +633,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << " example: { renameCollection: foo.a, to: bar.b }"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); if ( source.empty() || target.empty() ) { @@ -671,6 +671,8 @@ namespace mongo { nsToDatabase( target.c_str(), to ); if ( strcmp( from, to ) == 0 ) { renameNamespace( source.c_str(), target.c_str() ); + // make sure we drop counters etc + Top::global.collectionDropped( source ); return true; } } diff --git a/db/cmdline.cpp b/db/cmdline.cpp index d0b80455ff2..06880c98829 100644 --- a/db/cmdline.cpp +++ b/db/cmdline.cpp @@ -19,6 +19,7 @@ #include "pch.h" #include "cmdline.h" #include "commands.h" +#include "../util/password.h" #include "../util/processinfo.h" #include "../util/net/listen.h" #include "security_common.h" @@ -27,6 +28,8 @@ #include <direct.h> #endif +#define MAX_LINE_LENGTH 256 + namespace po = boost::program_options; namespace fs = boost::filesystem; @@ -64,6 +67,14 @@ namespace mongo { ("fork" , "fork server process" ) #endif ; + + hidden.add_options() +#ifdef MONGO_SSL + ("sslOnNormalPorts" , "use ssl on configured ports" ) + ("sslPEMKeyFile" , po::value<string>(&cmdLine.sslPEMKeyFile), "PEM file for ssl" ) + ("sslPEMKeyPassword" , new PasswordValue(&cmdLine.sslPEMKeyPassword) , "PEM file password" ) +#endif + ; } @@ -85,6 +96,32 @@ namespace mongo { } #endif + void CmdLine::parseConfigFile( istream &f, stringstream &ss ) { + string s; + char line[MAX_LINE_LENGTH]; + + while ( f ) { + f.getline(line, MAX_LINE_LENGTH); + s = line; + std::remove(s.begin(), s.end(), ' '); + std::remove(s.begin(), s.end(), '\t'); + boost::to_upper(s); + + if ( s.find( "FASTSYNC" ) != string::npos ) + cout << "warning \"fastsync\" should not be put in your configuration file" << endl; + + if ( s.c_str()[0] == '#' ) { + // skipping commented line + } else if ( s.find( "=FALSE" ) == string::npos ) { + ss << line << endl; + } else { + cout << "warning: remove or comment out this line by starting it with \'#\', skipping now : " << line << endl; + } + } + return; + } + + bool CmdLine::store( int argc , char ** argv , boost::program_options::options_description& visible, @@ -141,7 +178,9 @@ namespace mongo { return false; } - po::store( po::parse_config_file( f , all ) , params ); + stringstream ss; + CmdLine::parseConfigFile( f, ss ); + po::store( po::parse_config_file( ss , all ) , params ); f.close(); } @@ -287,7 +326,25 @@ namespace mongo { noauth = false; } +#ifdef MONGO_SSL + if (params.count("sslOnNormalPorts") ) { + cmdLine.sslOnNormalPorts = true; + if ( cmdLine.sslPEMKeyPassword.size() == 0 ) { + log() << "need sslPEMKeyPassword" << endl; + dbexit(EXIT_BADOPTIONS); + } + + if ( cmdLine.sslPEMKeyFile.size() == 0 ) { + log() << "need sslPEMKeyFile" << endl; + dbexit(EXIT_BADOPTIONS); + } + + cmdLine.sslServerManager = new SSLManager( false ); + cmdLine.sslServerManager->setupPEM( cmdLine.sslPEMKeyFile , cmdLine.sslPEMKeyPassword ); + } +#endif + { BSONObjBuilder b; for (po::variables_map::const_iterator it(params.begin()), end(params.end()); it != end; it++){ @@ -354,7 +411,7 @@ namespace mongo { virtual bool adminOnly() const { return true; } virtual bool slaveOk() const { return true; } - virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { result.append("argv", argvArray); result.append("parsed", parsedOpts); return true; diff --git a/db/cmdline.h b/db/cmdline.h index 7b6d12a2e04..60eb668a735 100644 --- a/db/cmdline.h +++ b/db/cmdline.h @@ -21,6 +21,12 @@ namespace mongo { +#ifdef MONGO_SSL + class SSLManager; +#endif + + + /* command line options */ /* concurrency: OK/READ */ @@ -63,6 +69,7 @@ namespace mongo { bool quiet; // --quiet bool noTableScan; // --notablescan no table scans allowed bool prealloc; // --noprealloc no preallocation of data files + bool preallocj; // --nopreallocj no preallocation of journal files bool smallfiles; // --smallfiles allocate smaller data files bool configsvr; // --configsvr @@ -71,7 +78,8 @@ namespace mongo { int quotaFiles; // --quotaFiles bool cpu; // --cpu show cpu time periodically - bool dur; // --dur durability (now --journal) + bool dur; // --dur durability (now --journal) + unsigned journalCommitInterval; // group/batch commit interval ms /** --durOptions 7 dump journal and terminate without doing anything further --durOptions 4 recover and terminate without listening @@ -99,6 +107,14 @@ namespace mongo { bool noUnixSocket; // --nounixsocket string socket; // UNIX domain socket directory +#ifdef MONGO_SSL + bool sslOnNormalPorts; // --sslOnNormalPorts + string sslPEMKeyFile; // --sslPEMKeyFile + string sslPEMKeyPassword; // --sslPEMKeyPassword + + SSLManager* sslServerManager; // currently leaks on close +#endif + static void addGlobalOptions( boost::program_options::options_description& general , boost::program_options::options_description& hidden ); @@ -106,6 +122,7 @@ namespace mongo { boost::program_options::options_description& hidden ); + static void parseConfigFile( istream &f, stringstream &ss); /** * @return true if should run program, false if should exit */ @@ -116,18 +133,28 @@ namespace mongo { boost::program_options::variables_map &output ); }; + // todo move to cmdline.cpp? inline CmdLine::CmdLine() : - port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), smallfiles(sizeof(int*) == 4), + port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), preallocj(true), smallfiles(sizeof(int*) == 4), configsvr(false), quota(false), quotaFiles(8), cpu(false), durOptions(0), objcheck(false), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true ), syncdelay(60), noUnixSocket(false), socket("/tmp") { - // default may change for this later. + journalCommitInterval = 0; // 0 means use default + dur = false; #if defined(_DURABLEDEFAULTON) dur = true; -#else +#endif + if( sizeof(void*) == 8 ) + dur = true; +#if defined(_DURABLEDEFAULTOFF) dur = false; #endif + +#ifdef MONGO_SSL + sslOnNormalPorts = false; + sslServerManager = 0; +#endif } extern CmdLine cmdLine; diff --git a/db/commands.h b/db/commands.h index 454e2277e06..c18621828f2 100644 --- a/db/commands.h +++ b/db/commands.h @@ -20,6 +20,7 @@ #include "../pch.h" #include "jsobj.h" #include "../util/timer.h" +#include "../client/dbclient.h" namespace mongo { @@ -45,7 +46,7 @@ namespace mongo { return value is true if succeeded. if false, set errmsg text. */ - virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0; + virtual bool run(const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) = 0; /* note: logTheTop() MUST be false if READ @@ -94,6 +95,11 @@ namespace mongo { */ virtual bool requiresAuth() { return true; } + /* Return true if a replica set secondary should go into "recovering" + (unreadable) state while running this command. + */ + virtual bool maintenanceMode() const { return false; } + /** @param webUI expose the command in the web ui as localhost:28017/<name> @param oldName an optional old, deprecated name for the command */ @@ -120,7 +126,7 @@ namespace mongo { static const map<string,Command*>* commandsByBestName() { return _commandsByBestName; } static const map<string,Command*>* webCommands() { return _webCommands; } /** @return if command was found and executed */ - static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder); + static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, int queryOptions = 0); static LockType locktype( const string& name ); static Command * findCommand( const string& name ); }; @@ -139,7 +145,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const; CmdShutdown() : Command("shutdown") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl); + bool run(const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); private: bool shutdownHelper(); }; diff --git a/db/commands/distinct.cpp b/db/commands/distinct.cpp index 9a10e69d5a8..48f44050e49 100644 --- a/db/commands/distinct.cpp +++ b/db/commands/distinct.cpp @@ -32,7 +32,7 @@ namespace mongo { help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); diff --git a/db/commands/find_and_modify.cpp b/db/commands/find_and_modify.cpp index 2856ab3d3f1..0cf766fcf87 100644 --- a/db/commands/find_and_modify.cpp +++ b/db/commands/find_and_modify.cpp @@ -37,7 +37,7 @@ namespace mongo { virtual bool logTheOp() { return false; } // the modifications will be logged directly virtual bool slaveOk() const { return false; } virtual LockType locktype() const { return WRITE; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { static DBDirectClient db; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); diff --git a/db/commands/group.cpp b/db/commands/group.cpp index 9d7acbdf7d4..d3e5839748c 100644 --- a/db/commands/group.cpp +++ b/db/commands/group.cpp @@ -20,6 +20,7 @@ #include "../instance.h" #include "../queryoptimizer.h" #include "../../scripting/engine.h" +#include "../clientcursor.h" namespace mongo { @@ -44,7 +45,7 @@ namespace mongo { uassert( 10042 , "return of $key has to be an object" , type == Object ); return s->getObject( "return" ); } - return obj.extractFields( keyPattern , true ); + return obj.extractFields( keyPattern , true ).getOwned(); } bool group( string realdbname , const string& ns , const BSONObj& query , @@ -88,14 +89,27 @@ namespace mongo { list<BSONObj> blah; shared_ptr<Cursor> cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query); + ClientCursor::CleanupPointer ccPointer; + ccPointer.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); while ( cursor->ok() ) { + + if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) || + !cursor->ok() ) { + break; + } + if ( ( cursor->matcher() && !cursor->matcher()->matchesCurrent( cursor.get() ) ) || cursor->getsetdup( cursor->currLoc() ) ) { cursor->advance(); continue; } + if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) || + !cursor->ok() ) { + break; + } + BSONObj obj = cursor->current(); cursor->advance(); @@ -117,6 +131,7 @@ namespace mongo { throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() ); } } + ccPointer.reset(); if (!finalize.empty()) { s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 ); @@ -140,7 +155,7 @@ namespace mongo { return true; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { if ( !globalScriptEngine ) { errmsg = "server-side JavaScript execution is disabled"; diff --git a/db/commands/isself.cpp b/db/commands/isself.cpp index cac8380dc20..5a868de919f 100644 --- a/db/commands/isself.cpp +++ b/db/commands/isself.cpp @@ -130,7 +130,7 @@ namespace mongo { help << "{ _isSelf : 1 } INTERNAL ONLY"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { init(); result.append( "id" , _id ); return true; diff --git a/db/commands/mr.cpp b/db/commands/mr.cpp index 75f5615b9f6..56e9770dff2 100644 --- a/db/commands/mr.cpp +++ b/db/commands/mr.cpp @@ -879,8 +879,6 @@ namespace mongo { } } -// boost::thread_specific_ptr<State*> _tl; - /** * emit that will be called by js function */ @@ -932,7 +930,7 @@ namespace mongo { help << "http://www.mongodb.org/display/DOCS/MapReduce"; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname , BSONObj& cmd, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; Client::GodScope cg; Client& client = cc(); @@ -968,12 +966,6 @@ namespace mongo { state.init(); state.prepTempCollection(); - { - State** s = new State*(); - s[0] = &state; -// _tl.reset( s ); - } - wassert( config.limit < 0x4000000 ); // see case on next line to 32 bit unsigned ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , state.incomingDocuments() ) ); long long mapTime = 0; @@ -988,7 +980,9 @@ namespace mongo { // obtain cursor on data to apply mr to, sorted shared_ptr<Cursor> temp = NamespaceDetailsTransient::getCursor( config.ns.c_str(), config.filter, config.sort ); + uassert( 15876, str::stream() << "could not create cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, temp.get() ); auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , config.ns.c_str() ) ); + uassert( 15877, str::stream() << "could not create client cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, cursor.get() ); Timer mt; // go through each doc @@ -1065,11 +1059,19 @@ namespace mongo { countsBuilder.appendNumber( "reduce" , state.numReduces() ); timingBuilder.append( "reduceTime" , inReduce / 1000 ); timingBuilder.append( "mode" , state.jsMode() ? "js" : "mixed" ); - -// _tl.reset(); + } + // TODO: The error handling code for queries is v. fragile, + // *requires* rethrow AssertionExceptions - should probably fix. + catch ( AssertionException& e ){ + log() << "mr failed, removing collection" << causedBy(e) << endl; + throw e; + } + catch ( std::exception& e ){ + log() << "mr failed, removing collection" << causedBy(e) << endl; + throw e; } catch ( ... ) { - log() << "mr failed, removing collection" << endl; + log() << "mr failed for unknown reason, removing collection" << endl; throw; } @@ -1116,7 +1118,7 @@ namespace mongo { virtual bool slaveOverrideOk() { return true; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe(); string postProcessCollection = cmdObj["postProcessCollection"].valuestrsafe(); bool postProcessOnly = !(postProcessCollection.empty()); diff --git a/db/compact.cpp b/db/compact.cpp index a1197460f4f..c6e5f77ee0e 100644 --- a/db/compact.cpp +++ b/db/compact.cpp @@ -263,6 +263,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool adminOnly() const { return false; } virtual bool slaveOk() const { return true; } + virtual bool maintenanceMode() const { return true; } virtual bool logTheOp() { return false; } virtual void help( stringstream& help ) const { help << "compact collection\n" @@ -274,7 +275,7 @@ namespace mongo { virtual bool requiresAuth() { return true; } CompactCmd() : Command("compact") { } - virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string coll = cmdObj.firstElement().valuestr(); if( coll.empty() || db.empty() ) { errmsg = "no collection name specified"; diff --git a/db/curop.h b/db/curop.h index f261812d383..2717d78cc62 100644 --- a/db/curop.h +++ b/db/curop.h @@ -28,6 +28,8 @@ namespace mongo { + class CurOp; + /* lifespan is different than CurOp because of recursives with DBDirectClient */ class OpDebug { public: @@ -36,7 +38,7 @@ namespace mongo { void reset(); string toString() const; - void append( BSONObjBuilder& b ) const; + void append( const CurOp& curop, BSONObjBuilder& b ) const; // ------------------- @@ -119,7 +121,7 @@ namespace mongo { int size() const { return *_size; } bool have() const { return size() > 0; } - BSONObj get() { + BSONObj get() const { _lock.lock(); BSONObj o; try { @@ -133,7 +135,7 @@ namespace mongo { return o; } - void append( BSONObjBuilder& b , const StringData& name ) { + void append( BSONObjBuilder& b , const StringData& name ) const { scoped_spinlock lk(_lock); BSONObj temp = _get(); b.append( name , temp ); @@ -141,7 +143,7 @@ namespace mongo { private: /** you have to be locked when you call this */ - BSONObj _get() { + BSONObj _get() const { int sz = size(); if ( sz == 0 ) return BSONObj(); @@ -153,7 +155,7 @@ namespace mongo { /** you have to be locked when you call this */ void _reset( int sz ) { _size[0] = sz; } - SpinLock _lock; + mutable SpinLock _lock; int * _size; char _buf[512]; }; @@ -168,7 +170,8 @@ namespace mongo { bool haveQuery() const { return _query.have(); } BSONObj query() { return _query.get(); } - + void appendQuery( BSONObjBuilder& b , const StringData& name ) const { _query.append( b , name ); } + void ensureStarted() { if ( _start == 0 ) _start = _checkpoint = curTimeMicros64(); diff --git a/db/cursor.h b/db/cursor.h index ff9c9821ada..9639b2677b1 100644 --- a/db/cursor.h +++ b/db/cursor.h @@ -132,6 +132,8 @@ namespace mongo { virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { massert( 13285, "manual matcher config not allowed", false ); } + + virtual void explainDetails( BSONObjBuilder& b ) { return; } }; // strategy object implementing direction of traversal. diff --git a/db/database.cpp b/db/database.cpp index 7906e9b435a..97b3fa011cb 100644 --- a/db/database.cpp +++ b/db/database.cpp @@ -192,22 +192,31 @@ namespace mongo { return ret; } + bool fileIndexExceedsQuota( const char *ns, int fileIndex, bool enforceQuota ) { + return + cmdLine.quota && + enforceQuota && + fileIndex >= cmdLine.quotaFiles && + // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g. + // rejecting an index insert after inserting the main record. + !NamespaceString::special( ns ) && + NamespaceString( ns ).db != "local"; + } + MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) { // check existing files for ( int i=numFiles()-1; i>=0; i-- ) { MongoDataFile* f = getFile( i ); if ( f->getHeader()->unusedLength >= sizeNeeded ) { - // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g. - // rejecting an index insert after inserting the main record. - if( cmdLine.quota && enforceQuota && i > cmdLine.quotaFiles && !NamespaceString::special(ns) ) + if ( fileIndexExceedsQuota( ns, i-1, enforceQuota ) ) // NOTE i-1 is the value used historically for this check. ; else return f; } } - if( cmdLine.quota && enforceQuota && numFiles() >= cmdLine.quotaFiles && !NamespaceString::special(ns) ) + if ( fileIndexExceedsQuota( ns, numFiles(), enforceQuota ) ) uasserted(12501, "quota exceeded"); // allocate files until we either get one big enough or hit maxSize @@ -261,8 +270,8 @@ namespace mongo { log() << "creating profile collection: " << profileName << endl; BSONObjBuilder spec; spec.appendBool( "capped", true ); - spec.append( "size", 131072.0 ); - if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , true ) ) { + spec.append( "size", 1024*1024 ); + if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , false /* we don't replica profile messages */ ) ) { return false; } } diff --git a/db/db.cpp b/db/db.cpp index 9f90b9ddd02..2d4970af044 100644 --- a/db/db.cpp +++ b/db/db.cpp @@ -70,7 +70,6 @@ namespace mongo { extern string repairpath; void setupSignals( bool inFork ); - void startReplSets(ReplSetCmdline*); void startReplication(); void exitCleanly( ExitCode code ); @@ -216,8 +215,6 @@ namespace mongo { void listen(int port) { //testTheDb(); - log() << "waiting for connections on port " << port << endl; - MessageServer::Options options; options.port = port; options.ipList = cmdLine.bind_ip; @@ -483,12 +480,6 @@ namespace mongo { clientCursorMonitor.go(); PeriodicTask::theRunner->go(); - if( !cmdLine._replSet.empty() ) { - replSet = true; - ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet); - boost::thread t( boost::bind( &startReplSets, replSetCmdline) ); - } - listen(listenPort); // listen() will return when exit code closes its socket. @@ -575,10 +566,12 @@ int main(int argc, char* argv[]) { ("directoryperdb", "each database will be stored in a separate directory") ("journal", "enable journaling") ("journalOptions", po::value<int>(), "journal diagnostic options") + ("journalCommitInterval", po::value<unsigned>(), "how often to group/batch commit (ms)") ("ipv6", "enable IPv6 support (disabled by default)") ("jsonp","allow JSONP access via http (has security implications)") ("noauth", "run without security") ("nohttpinterface", "disable http interface") + ("nojournal", "disable journaling (journaling is on by default for 64 bit)") ("noprealloc", "disable data file preallocation - will often hurt performance") ("noscripting", "disable scripting engine") ("notablescan", "do not allow table scans") @@ -631,12 +624,11 @@ int main(int argc, char* argv[]) { ("pretouch", po::value<int>(), "n pretouch threads for applying replicationed operations") ("command", po::value< vector<string> >(), "command") ("cacheSize", po::value<long>(), "cache size (in MB) for rec store") - // these move to unhidden later: ("nodur", "disable journaling (currently the default)") - ("nojournal", "disable journaling (currently the default)") // things we don't want people to use ("nocursors", "diagnostic/debugging option that turns off cursors DO NOT USE IN PRODUCTION") ("nohints", "ignore query hints") + ("nopreallocj", "don't preallocate journal files") ("dur", "enable journaling") // deprecated version ("durOptions", po::value<int>(), "durability diagnostic options") // deprecated version // deprecated pairing command line options @@ -745,6 +737,15 @@ int main(int argc, char* argv[]) { if (params.count("durOptions")) { cmdLine.durOptions = params["durOptions"].as<int>(); } + if( params.count("journalCommitInterval") ) { + // don't check if dur is false here as many will just use the default, and will default to off on win32. + // ie no point making life a little more complex by giving an error on a dev environment. + cmdLine.journalCommitInterval = params["journalCommitInterval"].as<unsigned>(); + if( cmdLine.journalCommitInterval <= 1 || cmdLine.journalCommitInterval > 300 ) { + out() << "--journalCommitInterval out of allowed range (0-300ms)" << endl; + dbexit( EXIT_BADOPTIONS ); + } + } if (params.count("journalOptions")) { cmdLine.durOptions = params["journalOptions"].as<int>(); } @@ -761,6 +762,9 @@ int main(int argc, char* argv[]) { if (params.count("nohints")) { useHints = false; } + if (params.count("nopreallocj")) { + cmdLine.preallocj = false; + } if (params.count("nohttpinterface")) { noHttpInterface = true; } diff --git a/db/db.vcxproj b/db/db.vcxproj index 685015ed7f6..8f831cb8559 100755 --- a/db/db.vcxproj +++ b/db/db.vcxproj @@ -459,9 +459,27 @@ <ClCompile Include="..\s\shard.cpp" />
<ClCompile Include="..\s\shardconnection.cpp" />
<ClCompile Include="..\s\shardkey.cpp" />
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\alignedbuilder.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
</ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\concurrency\spin_lock.cpp" />
<ClCompile Include="..\util\concurrency\synchronization.cpp" />
<ClCompile Include="..\util\concurrency\task.cpp" />
@@ -561,6 +579,7 @@ <ClCompile Include="..\client\parallel.cpp" />
<ClCompile Include="pdfile.cpp" />
<ClCompile Include="queryoptimizer.cpp" />
+ <ClCompile Include="scanandorder.cpp" />
<ClCompile Include="security.cpp" />
<ClCompile Include="security_commands.cpp" />
<ClCompile Include="security_common.cpp" />
@@ -652,6 +671,8 @@ <ClInclude Include="..\targetver.h" />
<ClInclude Include="..\pcre-7.4\config.h" />
<ClInclude Include="..\pcre-7.4\pcre.h" />
+ <ClInclude Include="..\third_party\snappy\config.h" />
+ <ClInclude Include="..\third_party\snappy\snappy.h" />
<ClInclude Include="..\util\alignedbuilder.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
<ClInclude Include="..\util\concurrency\race.h" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters index d9e9def86f8..36b0df1ddc2 100755 --- a/db/db.vcxproj.filters +++ b/db/db.vcxproj.filters @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\bson\oid.cpp" />
@@ -166,6 +166,16 @@ <ClCompile Include="..\util\net\message_port.cpp" />
<ClCompile Include="dbmessage.cpp" />
<ClCompile Include="commands\find_and_modify.cpp" />
+ <ClCompile Include="..\util\compress.cpp">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="scanandorder.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\dbclientcursor.h" />
@@ -315,6 +325,12 @@ <ClInclude Include="..\util\net\sock.h" />
<ClInclude Include="..\util\concurrency\rwlockimpl.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
+ <ClInclude Include="..\third_party\snappy\config.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="db.rc" />
@@ -349,4 +365,9 @@ <Library Include="..\..\js\js64d.lib" />
<Library Include="..\..\js\js64r.lib" />
</ItemGroup>
+ <ItemGroup>
+ <Filter Include="snappy">
+ <UniqueIdentifier>{bb99c086-7926-4f50-838d-f5f0c18397c0}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
</Project>
\ No newline at end of file diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index 73c1004d4f2..2edd7684ff8 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -31,6 +31,7 @@ #include "../util/lruishmap.h" #include "../util/md5.hpp" #include "../util/processinfo.h" +#include "../util/ramlog.h" #include "json.h" #include "repl.h" #include "repl_block.h" @@ -53,14 +54,16 @@ namespace mongo { namespace dur { void setAgeOutJournalFiles(bool rotate); } + /** @return true if fields found */ bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { BSONElement e = cmdObj["ageOutJournalFiles"]; if( !e.eoo() ) { bool r = e.trueValue(); log() << "ageOutJournalFiles " << r << endl; dur::setAgeOutJournalFiles(r); + return true; } - return true; + return false; } void flushDiagLog(); @@ -85,7 +88,7 @@ namespace mongo { help << "reset error state (used with getpreverror)"; } CmdResetError() : Command("resetError", false, "reseterror") {} - bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.get(); assert( le ); le->reset(); @@ -116,7 +119,7 @@ namespace mongo { << " { w:n } - await replication to n servers (including self) before returning\n" << " { wtimeout:m} - timeout for w in m milliseconds"; } - bool run(const string& dbname, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& _cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.disableForCommand(); bool err = false; @@ -246,7 +249,7 @@ namespace mongo { return true; } CmdGetPrevError() : Command("getPrevError", false, "getpreverror") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.disableForCommand(); le->appendSelf( result ); if ( le->valid ) @@ -268,14 +271,14 @@ namespace mongo { << "N to wait N seconds for other members to catch up."; } - bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { bool force = cmdObj.hasField("force") && cmdObj["force"].trueValue(); if (!force && theReplSet && theReplSet->isPrimary()) { - int timeout, now, start; + long long timeout, now, start; timeout = now = start = curTimeMicros64()/1000000; if (cmdObj.hasField("timeoutSecs")) { - timeout += cmdObj["timeoutSecs"].numberInt(); + timeout += cmdObj["timeoutSecs"].numberLong(); } OpTime lastOp = theReplSet->lastOpTimeWritten; @@ -329,7 +332,7 @@ namespace mongo { } virtual LockType locktype() const { return WRITE; } CmdDropDatabase() : Command("dropDatabase") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONElement e = cmdObj.firstElement(); log() << "dropDatabase " << dbname << endl; int p = (int) e.number(); @@ -349,12 +352,13 @@ namespace mongo { virtual bool slaveOk() const { return true; } + virtual bool maintenanceMode() const { return true; } virtual void help( stringstream& help ) const { help << "repair database. also compacts. note: slow."; } virtual LockType locktype() const { return WRITE; } CmdRepairDatabase() : Command("repairDatabase") {} - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONElement e = cmdObj.firstElement(); log() << "repairDatabase " << dbname << endl; int p = (int) e.number(); @@ -388,7 +392,7 @@ namespace mongo { } virtual LockType locktype() const { return WRITE; } CmdProfile() : Command("profile") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONElement e = cmdObj.firstElement(); result.append("was", cc().database()->profile); result.append("slowms", cmdLine.slowMS ); @@ -425,7 +429,7 @@ namespace mongo { help << "returns lots of administrative server statistics"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { long long start = Listener::getElapsedTimeMillis(); BSONObjBuilder timeBuilder(128); @@ -596,6 +600,21 @@ namespace mongo { timeBuilder.appendNumber( "after dur" , Listener::getElapsedTimeMillis() - start ); + { + RamLog* rl = RamLog::get( "warnings" ); + verify(15880, rl); + + if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes + vector<const char*> lines; + rl->get( lines ); + + BSONArrayBuilder arr( result.subarrayStart( "warnings" ) ); + for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ ) + arr.append( lines[i] ); + arr.done(); + } + } + if ( ! authed ) result.append( "note" , "run against admin for more info" ); @@ -619,7 +638,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "internal"; } virtual LockType locktype() const { return NONE; } CmdGetOpTime() : Command("getoptime") { } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { writelock l( "" ); result.appendDate("optime", OpTime::now().asDate()); return true; @@ -648,7 +667,7 @@ namespace mongo { } void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Monitoring+and+Diagnostics#MonitoringandDiagnostics-DatabaseRecord%2FReplay"; } virtual LockType locktype() const { return WRITE; } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { int was = _diaglog.setLevel( cmdObj.firstElement().numberInt() ); flushDiagLog(); if ( !cmdLine.quiet ) @@ -771,7 +790,7 @@ namespace mongo { } virtual void help( stringstream& help ) const { help << "drop a collection\n{drop : <collectionName>}"; } virtual LockType locktype() const { return WRITE; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string nsToDrop = dbname + '.' + cmdObj.firstElement().valuestr(); NamespaceDetails *d = nsdetails(nsToDrop.c_str()); if ( !cmdLine.quiet ) @@ -805,7 +824,7 @@ namespace mongo { return false; } virtual void help( stringstream& help ) const { help << "count objects in collection"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string err; long long n = runCount(ns.c_str(), cmdObj, err); @@ -844,7 +863,8 @@ namespace mongo { help << "create a collection explicitly\n" "{ create: <ns>[, capped: <bool>, size: <collSizeInBytes>, max: <nDocs>] }"; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + uassert(15888, "must pass name of collection to create", cmdObj.firstElement().valuestrsafe()[0] != '\0'); string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string err; uassert(14832, "specify size:<n> when capped is true", !cmdObj["capped"].trueValue() || cmdObj["size"].isNumber() || cmdObj.hasField("$nExtents")); @@ -869,7 +889,7 @@ namespace mongo { help << "drop indexes for a collection"; } CmdDropIndexes() : Command("dropIndexes", false, "deleteIndexes") { } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { BSONElement e = jsobj.firstElement(); string toDeleteNs = dbname + '.' + e.valuestr(); NamespaceDetails *d = nsdetails(toDeleteNs.c_str()); @@ -914,7 +934,7 @@ namespace mongo { help << "re-index a collection"; } CmdReIndex() : Command("reIndex") { } - bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { static DBDirectClient db; BSONElement e = jsobj.firstElement(); @@ -969,7 +989,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const { help << "list databases on this server"; } CmdListDatabases() : Command("listDatabases" , true ) {} - bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { vector< string > dbNames; getDatabaseNames( dbNames ); vector< BSONObj > dbInfos; @@ -1038,7 +1058,7 @@ namespace mongo { virtual LockType locktype() const { return WRITE; } CmdCloseAllDatabases() : Command( "closeAllDatabases" ) {} - bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { bool ok; try { ok = dbHolder.closeAll( dbpath , result, false ); @@ -1065,7 +1085,7 @@ namespace mongo { help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }"; } virtual LockType locktype() const { return READ; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname; ns += "."; { @@ -1164,7 +1184,7 @@ namespace mongo { "\nkeyPattern, min, and max parameters are optional." "\nnote: This command may take a while to run"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer timer; string ns = jsobj.firstElement().String(); @@ -1282,7 +1302,7 @@ namespace mongo { help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024\n" " avgObjSize - in bytes"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname + "." + jsobj.firstElement().valuestr(); Client::Context cx( ns ); @@ -1351,7 +1371,7 @@ namespace mongo { "Get stats on a database. Not instantaneous. Slower for databases with large .ns files.\n" << "Example: { dbStats:1, scale:1 }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { int scale = 1; if ( jsobj["scale"].isNumber() ) { scale = jsobj["scale"].numberInt(); @@ -1426,7 +1446,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{ cloneCollectionAsCapped:<fromName>, toCollection:<toName>, size:<sizeInBytes> }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string from = jsobj.getStringField( "cloneCollectionAsCapped" ); string to = jsobj.getStringField( "toCollection" ); long long size = (long long)jsobj.getField( "size" ).number(); @@ -1488,7 +1508,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{ convertToCapped:<fromCollectionName>, size:<sizeInBytes> }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { BackgroundOperation::assertNoBgOpInProgForDb(dbname.c_str()); string from = jsobj.getStringField( "convertToCapped" ); @@ -1544,7 +1564,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{whatsmyuri:1}"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { BSONObj info = cc().curop()->infoNoauth(); result << "you" << info[ "client" ]; return true; @@ -1559,7 +1579,7 @@ namespace mongo { return true; } virtual bool slaveOk() const { - return false; + return true; } virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { @@ -1568,7 +1588,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "internal. for testing only."; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "godinsert" ].valuestrsafe(); uassert( 13049, "godinsert must specify a collection", !coll.empty() ); string ns = dbname + "." + coll; @@ -1583,7 +1603,7 @@ namespace mongo { DBHashCmd() : Command( "dbHash", false, "dbhash" ) {} virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return READ; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { list<string> colls; Database* db = cc().database(); if ( db ) @@ -1629,9 +1649,8 @@ namespace mongo { cursor = findTableScan( c.c_str() , BSONObj() ); } else { - bb.done(); - errmsg = (string)"can't find _id index for: " + c; - return 0; + log() << "can't find _id index for: " << c << endl; + continue; } md5_state_t st; @@ -1677,7 +1696,7 @@ namespace mongo { help << "w:true write lock. secs:<seconds>"; } CmdSleep() : Command("sleep") { } - bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { int secs = 100; if ( cmdObj["secs"].isNumber() ) secs = cmdObj["secs"].numberInt(); @@ -1700,7 +1719,7 @@ namespace mongo { virtual bool slaveOk() const { return false; } virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { return true; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "captrunc" ].valuestrsafe(); uassert( 13416, "captrunc must specify a collection", !coll.empty() ); string ns = dbname + "." + coll; @@ -1727,7 +1746,7 @@ namespace mongo { virtual bool slaveOk() const { return false; } virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { return true; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "emptycapped" ].valuestrsafe(); uassert( 13428, "emptycapped must specify a collection", !coll.empty() ); string ns = dbname + "." + coll; @@ -1792,13 +1811,22 @@ namespace mongo { if ( c->adminOnly() ) log( 2 ) << "command: " << cmdObj << endl; + if (c->maintenanceMode() && theReplSet && theReplSet->isSecondary()) { + theReplSet->setMaintenanceMode(true); + } + if ( c->locktype() == Command::NONE ) { // we also trust that this won't crash client.curop()->ensureStarted(); string errmsg; - int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl ); + int ok = c->run( dbname , cmdObj , queryOptions, errmsg , result , fromRepl ); if ( ! ok ) result.append( "errmsg" , errmsg ); + + if (c->maintenanceMode() && theReplSet) { + theReplSet->setMaintenanceMode(false); + } + return ok; } @@ -1812,11 +1840,13 @@ namespace mongo { client.curop()->ensureStarted(); Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() ); + bool retval = true; + try { string errmsg; - if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ) { + if ( ! c->run(dbname, cmdObj, queryOptions, errmsg, result, fromRepl ) ) { result.append( "errmsg" , errmsg ); - return false; + retval = false; } } catch ( DBException& e ) { @@ -1824,14 +1854,18 @@ namespace mongo { ss << "exception: " << e.what(); result.append( "errmsg" , ss.str() ); result.append( "code" , e.getCode() ); - return false; + retval = false; } - if ( c->logTheOp() && ! fromRepl ) { + if ( retval && c->logTheOp() && ! fromRepl ) { logOp("c", cmdns, cmdObj); } - return true; + if (c->maintenanceMode() && theReplSet) { + theReplSet->setMaintenanceMode(false); + } + + return retval; } diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp index 47f6c691ab4..566027fc594 100644 --- a/db/dbcommands_admin.cpp +++ b/db/dbcommands_admin.cpp @@ -47,7 +47,7 @@ namespace mongo { virtual void help(stringstream& h) const { h << "internal"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string dropns = dbname + "." + cmdObj.firstElement().valuestrsafe(); if ( !cmdLine.quiet ) @@ -82,7 +82,7 @@ namespace mongo { virtual bool adminOnly() const { return true; } virtual void help(stringstream& h) const { h << "test how long to write and fsync to a test file in the journal/ directory"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { filesystem::path p = dur::getJournalDir(); p /= "journalLatencyTest"; @@ -157,7 +157,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } //{ validate: "collectionnamewithoutthedbpart" [, scandata: <bool>] [, full: <bool> } */ - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname + "." + cmdObj.firstElement().valuestrsafe(); NamespaceDetails * d = nsdetails( ns.c_str() ); if ( !cmdLine.quiet ) @@ -473,7 +473,7 @@ namespace mongo { return !x.empty(); }*/ virtual void help(stringstream& h) const { h << url(); } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { bool sync = !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately bool lock = cmdObj["lock"].trueValue(); log() << "CMD fsync: sync:" << sync << " lock:" << lock << endl; diff --git a/db/dbcommands_generic.cpp b/db/dbcommands_generic.cpp index 2e025b500ea..a9e13eab741 100644 --- a/db/dbcommands_generic.cpp +++ b/db/dbcommands_generic.cpp @@ -79,7 +79,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "internal command facilitating running in certain cloud computing environments"; } - bool run(const string& dbname, BSONObj& obj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& obj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { if( !obj.hasElement("servers") ) { vector<string> ips; obj["servers"].Obj().Vals(ips); @@ -106,7 +106,7 @@ namespace mongo { help << "get version #, etc.\n"; help << "{ buildinfo:1 }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo(); result << "versionArray" << versionArray; result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 ); @@ -137,7 +137,7 @@ namespace mongo { help << " syncdelay\n"; help << "{ getParameter:'*' } to get everything\n"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { bool all = *cmdObj.firstElement().valuestrsafe() == '*'; int before = result.len(); @@ -166,11 +166,6 @@ namespace mongo { } } cmdGet; - // dev - experimental. so only in set command for now. may go away or change - namespace dur { - int groupCommitIntervalMs = 100; - } - // tempish bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ); @@ -184,23 +179,24 @@ namespace mongo { help << "set administrative option(s)\n"; help << "{ setParameter:1, <param>:<value> }\n"; help << "supported so far:\n"; - help << " notablescan\n"; + help << " journalCommitInterval\n"; help << " logLevel\n"; + help << " notablescan\n"; help << " quiet\n"; help << " syncdelay\n"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { int s = 0; - setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl); - if( cmdObj.hasElement("groupCommitIntervalMs") ) { + bool found = setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl); + if( cmdObj.hasElement("journalCommitInterval") ) { if( !cmdLine.dur ) { errmsg = "journaling is off"; return false; } - int x = (int) cmdObj["groupCommitIntervalMs"].Number(); - assert( x > 0 && x < 500 ); - dur::groupCommitIntervalMs = x; - log() << "groupCommitIntervalMs " << x << endl; + int x = (int) cmdObj["journalCommitInterval"].Number(); + assert( x > 1 && x < 500 ); + cmdLine.journalCommitInterval = x; + log() << "setParameter journalCommitInterval=" << x << endl; s++; } if( cmdObj.hasElement("notablescan") ) { @@ -241,7 +237,7 @@ namespace mongo { s++; } - if( s == 0 ) { + if( s == 0 && !found ) { errmsg = "no option found to set, use help:true to see options "; return false; } @@ -257,7 +253,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "a way to check that the server is alive. responds immediately even if server is in a db lock."; } virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& badns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // IMPORTANT: Don't put anything in here that might lock db - including authentication return true; } @@ -270,7 +266,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual bool readOnly() { return true; } virtual LockType locktype() const { return NONE; } - virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( globalScriptEngine ) { BSONObjBuilder bb( result.subobjStart( "js" ) ); result.append( "utf8" , globalScriptEngine->utf8Ok() ); @@ -292,7 +288,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return true; } - virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { rotateLogs(); return 1; } @@ -306,7 +302,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return false; } - virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONObjBuilder b( result.subobjStart( "commands" ) ); for ( map<string,Command*>::iterator i=_commands->begin(); i!=_commands->end(); ++i ) { Command * c = i->second; @@ -361,7 +357,7 @@ namespace mongo { } virtual LockType locktype() const { return NONE; } CmdForceError() : Command("forceerror") {} - bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbnamne, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { uassert( 10038 , "forced error", false); return true; } @@ -373,7 +369,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { result << "options" << QueryOption_AllSupported; return true; } @@ -393,7 +389,7 @@ namespace mongo { help << "{ getLog : '*' } OR { getLog : 'global' }"; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string p = cmdObj.firstElement().String(); if ( p == "*" ) { vector<string> names; diff --git a/db/dbeval.cpp b/db/dbeval.cpp index 3a53200a49f..5fe137fc3a3 100644 --- a/db/dbeval.cpp +++ b/db/dbeval.cpp @@ -121,7 +121,7 @@ namespace mongo { } virtual LockType locktype() const { return NONE; } CmdEval() : Command("eval", false, "$eval") { } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { AuthenticationInfo *ai = cc().getAuthenticationInfo(); uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(dbname.c_str()) ); diff --git a/db/dbmessage.h b/db/dbmessage.h index a14d4cf5142..a789bff849c 100644 --- a/db/dbmessage.h +++ b/db/dbmessage.h @@ -122,7 +122,7 @@ namespace mongo { /** the 32 bit field before the ns * track all bit usage here as its cross op - * 0: InsertOption_KeepGoing + * 0: InsertOption_ContinueOnError * 1: fromWriteback */ int& reservedField() { return *reserved; } @@ -233,7 +233,7 @@ namespace mongo { public: enum ReservedOptions { - Reserved_InsertOption_KeepGoing = 1 << 0 , + Reserved_InsertOption_ContinueOnError = 1 << 0 , Reserved_FromWriteback = 1 << 1 }; }; diff --git a/db/dbwebserver.cpp b/db/dbwebserver.cpp index 40950a8ccb3..50a59fa1267 100644 --- a/db/dbwebserver.cpp +++ b/db/dbwebserver.cpp @@ -61,7 +61,7 @@ namespace mongo { class DbWebServer : public MiniWebServer { public: DbWebServer(const string& ip, int port, const AdminAccess* webUsers) - : MiniWebServer(ip, port), _webUsers(webUsers) { + : MiniWebServer("admin web console", ip, port), _webUsers(webUsers) { WebStatusPlugin::initAll(); } @@ -424,7 +424,7 @@ namespace mongo { string errmsg; BSONObjBuilder sub; - if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) ) + if ( ! c->run( "admin.$cmd" , co , 0, errmsg , sub , false ) ) buf.append( cmd , errmsg ); else buf.append( cmd , sub.obj() ); @@ -531,7 +531,6 @@ namespace mongo { Client::initThread("websvr"); const int p = cmdLine.port + 1000; DbWebServer mini(cmdLine.bind_ip, p, adminAccessPtr.get()); - log() << "web admin interface listening on port " << p << endl; mini.initAndListen(); cc().shutdown(); } diff --git a/db/driverHelpers.cpp b/db/driverHelpers.cpp index d98a33b25c5..12aa01886c4 100644 --- a/db/driverHelpers.cpp +++ b/db/driverHelpers.cpp @@ -46,7 +46,7 @@ namespace mongo { class ObjectIdTest : public BasicDriverHelper { public: ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ) {} - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.firstElement().type() != jstOID ) { errmsg = "not oid"; return false; diff --git a/db/dur.cpp b/db/dur.cpp index 6cb69ac5ac2..dfa36f95224 100644 --- a/db/dur.cpp +++ b/db/dur.cpp @@ -62,11 +62,11 @@ #include "dur_journal.h" #include "dur_commitjob.h" #include "dur_recover.h" +#include "dur_stats.h" #include "../util/concurrency/race.h" #include "../util/mongoutils/hash.h" #include "../util/mongoutils/str.h" #include "../util/timer.h" -#include "dur_stats.h" using namespace mongoutils; @@ -74,8 +74,9 @@ namespace mongo { namespace dur { - void WRITETODATAFILES(); - void PREPLOGBUFFER(); + void PREPLOGBUFFER(JSectHeader& outParm); + void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed); + void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed); /** declared later in this file only used in this file -- use DurableInterface::commitNow() outside @@ -129,6 +130,7 @@ namespace mongo { "commits" << _commits << "journaledMB" << _journaledBytes / 1000000.0 << "writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 << + "compression" << _journaledBytes / (_uncompressedBytes+1.0) << "commitsInWriteLock" << _commitsInWriteLock << "earlyCommits" << _earlyCommits << "timeMs" << @@ -143,6 +145,8 @@ namespace mongo { b << "ageOutJournalFiles" << "mutex timeout"; if( r == 0 ) b << "ageOutJournalFiles" << false; + if( cmdLine.journalCommitInterval != 0 ) + b << "journalCommitIntervalMs" << cmdLine.journalCommitInterval; return b.obj(); } @@ -269,6 +273,9 @@ namespace mongo { } bool DurableImpl::commitIfNeeded() { + if ( ! dbMutex.isWriteLocked() ) // we implicitly commit if needed when releasing write lock + return false; + DEV commitJob._nSinceCommitIfNeededCall = 0; if (commitJob.bytes() > UncommittedBytesLimit) { // should this also fire if CmdLine::DurAlwaysCommit? stats.curr->_earlyCommits++; @@ -325,15 +332,6 @@ namespace mongo { } #endif - /** write the buffer we have built to the journal and fsync it. - outside of lock as that could be slow. - */ - static void WRITETOJOURNAL(AlignedBuilder& ab) { - Timer t; - journal(ab); - stats.curr->_writeToJournalMicros += t.micros(); - } - // Functor to be called over all MongoFiles class validateSingleMapMatches { @@ -486,6 +484,7 @@ namespace mongo { stats.curr->_remapPrivateViewMicros += t.micros(); } + // lock order: dbMutex first, then this mutex groupCommitMutex("groupCommit"); bool _groupCommitWithLimitedLocks() { @@ -502,8 +501,8 @@ namespace mongo { commitJob.notifyCommitted(); return true; } - - PREPLOGBUFFER(); + JSectHeader h; + PREPLOGBUFFER(h); RWLockRecursive::Shared lk3(MongoFile::mmmutex); @@ -515,16 +514,15 @@ namespace mongo { lk1.reset(); // ****** now other threads can do writes ****** - - WRITETOJOURNAL(commitJob._ab); + WRITETOJOURNAL(h, commitJob._ab); assert( abLen == commitJob._ab.len() ); // a check that no one touched the builder while we were doing work. if so, our locking is wrong. // data is now in the journal, which is sufficient for acknowledging getLastError. // (ok to crash after that) commitJob.notifyCommitted(); - WRITETODATAFILES(); - assert( abLen == commitJob._ab.len() ); // WRITETODATAFILES uses _ab also + WRITETODATAFILES(h, commitJob._ab); + assert( abLen == commitJob._ab.len() ); // check again wasn't modded commitJob._ab.reset(); // can't : dbMutex._remapPrivateViewRequested = true; @@ -570,18 +568,19 @@ namespace mongo { // (and we are only read locked in the dbMutex, so it could happen) scoped_lock lk(groupCommitMutex); - PREPLOGBUFFER(); + JSectHeader h; + PREPLOGBUFFER(h); // todo : write to the journal outside locks, as this write can be slow. // however, be careful then about remapprivateview as that cannot be done // if new writes are then pending in the private maps. - WRITETOJOURNAL(commitJob._ab); + WRITETOJOURNAL(h, commitJob._ab); // data is now in the journal, which is sufficient for acknowledging getLastError. // (ok to crash after that) commitJob.notifyCommitted(); - WRITETODATAFILES(); + WRITETODATAFILES(h, commitJob._ab); debugValidateAllMapsMatch(); commitJob.reset(); @@ -613,6 +612,7 @@ namespace mongo { } /** locking: in read lock when called + or, for early commits (commitIfNeeded), in write lock @see MongoMMF::close() */ static void groupCommit() { @@ -686,29 +686,41 @@ namespace mongo { } extern int groupCommitIntervalMs; + filesystem::path getJournalDir(); void durThread() { Client::initThread("journal"); + + bool samePartition = true; + try { + const string dbpathDir = boost::filesystem::path(dbpath).native_directory_string(); + samePartition = onSamePartition(getJournalDir().string(), dbpathDir); + } + catch(...) { + } + while( !inShutdown() ) { RACECHECK + + unsigned ms = cmdLine.journalCommitInterval; + if( ms == 0 ) { + // use default + ms = samePartition ? 100 : 30; + } + + unsigned oneThird = (ms / 3) + 1; // +1 so never zero + try { - int millis = groupCommitIntervalMs; - { - stats.rotate(); - { - Timer t; - journalRotate(); // note we do this part outside of mongomutex - millis -= t.millis(); - wassert( millis <= groupCommitIntervalMs ); // race if groupCommitIntervalMs was changing by another thread so wassert - if( millis < 2 ) - millis = 2; - } + stats.rotate(); - // we do this in a couple blocks, which makes it a tiny bit faster (only a little) on throughput, - // but is likely also less spiky on our cpu usage, which is good: - sleepmillis(millis/2); - commitJob.wi()._deferred.invoke(); - sleepmillis(millis/2); + // we do this in a couple blocks (the invoke()), which makes it a tiny bit faster (only a little) on throughput, + // but is likely also less spiky on our cpu usage, which is good. + + // commit sooner if one or more getLastError j:true is pending + for( unsigned i = 1; i <= 2; i++ ) { + sleepmillis(oneThird); + if( commitJob._notify.nWaiting() ) + break; commitJob.wi()._deferred.invoke(); } @@ -772,6 +784,13 @@ namespace mongo { void DurableImpl::syncDataAndTruncateJournal() { dbMutex.assertWriteLocked(); + // a commit from the commit thread won't begin while we are in the write lock, + // but it may already be in progress and the end of that work is done outside + // (dbMutex) locks. This line waits for that to complete if already underway. + { + scoped_lock lk(groupCommitMutex); + } + groupCommit(); MongoFile::flushAll(true); journalCleanup(); diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp index f85dda32b51..0a1bc5ebbad 100644 --- a/db/dur_journal.cpp +++ b/db/dur_journal.cpp @@ -34,6 +34,7 @@ #include "../util/file.h" #include "../util/checksum.h" #include "../util/concurrency/race.h" +#include "../util/compress.h" using namespace mongoutils; @@ -92,6 +93,11 @@ namespace mongo { assert(false); } + JSectFooter::JSectFooter() { + memset(this, 0, sizeof(*this)); + sentinel = JEntry::OpCode_Footer; + } + JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash sentinel = JEntry::OpCode_Footer; reserved = 0; @@ -103,6 +109,10 @@ namespace mongo { } bool JSectFooter::checkHash(const void* begin, int len) const { + if( !magicOk() ) { + log() << "journal footer not valid" << endl; + return false; + } Checksum c; c.gen(begin, len); DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(c.bytes, 16) << endl; @@ -317,13 +327,13 @@ namespace mongo { void preallocateFiles() { if( exists(getJournalDir()/"prealloc.0") || // if enabled previously, keep using - exists(getJournalDir()/"prealloc.1") || - preallocateIsFaster() ) { + exists(getJournalDir()/"prealloc.1") || + ( cmdLine.preallocj && preallocateIsFaster() ) ) { usingPreallocate = true; try { _preallocateFiles(); } - catch(...) { + catch(...) { log() << "warning caught exception in preallocateFiles, continuing" << endl; } } @@ -343,10 +353,12 @@ namespace mongo { { // zero the header File f; - f.open(temppath.string().c_str(), false, true); + f.open(temppath.string().c_str(), false, false); char buf[8192]; memset(buf, 0, 8192); f.write(0, buf, 8192); + f.truncate(DataLimitPerJournalFile); + f.fsync(); } boost::filesystem::rename(temppath, filepath); return; @@ -471,12 +483,6 @@ namespace mongo { /** called during recovery (the error message text below assumes that) */ unsigned long long journalReadLSN() { - if( !debug ) { - // in nondebug build, for now, be conservative until more tests written, and apply the whole journal. - // however we will still write the lsn file to exercise that code, and use in _DEBUG build. - return 0; - } - if( !MemoryMappedFile::exists(lsnPath()) ) { log() << "info no lsn file in journal/ directory" << endl; return 0; @@ -595,15 +601,7 @@ namespace mongo { j._ageOut = a; } - /** check if time to rotate files. assure a file is open. - done separately from the journal() call as we can do this part - outside of lock. - thread: durThread() - */ - void journalRotate() { - j.rotate(); - } - void Journal::rotate() { + void Journal::_rotate() { assert( !dbMutex.atLeastReadLocked() ); RACECHECK @@ -618,6 +616,7 @@ namespace mongo { return; if( _curLogFile ) { + _curLogFile->truncate(); closeCurrentJournalFile(); removeUnneededJournalFiles(); } @@ -636,24 +635,74 @@ namespace mongo { } } - /** write to journal + /** write (append) the buffer we have built to the journal and fsync it. + outside of dbMutex lock as this could be slow. + @param uncompressed - a buffer that will be written to the journal after compression + will not return until on disk */ - void journal(const AlignedBuilder& b) { - j.journal(b); + void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed) { + Timer t; + j.journal(h, uncompressed); + stats.curr->_writeToJournalMicros += t.micros(); } - void Journal::journal(const AlignedBuilder& b) { + void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) { + RACECHECK + static AlignedBuilder b(32*1024*1024); + /* buffer to journal will be + JSectHeader + compressed operations + JSectFooter + */ + const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter); + const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize; + b.reset(max); + + { + dassert( h.sectionLen() == (unsigned) 0xffffffff ); // we will backfill later + b.appendStruct(h); + } + + size_t compressedLength = 0; + rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength); + assert( compressedLength < 0xffffffff ); + assert( compressedLength < max ); + b.skip(compressedLength); + + // footer + unsigned L = 0xffffffff; + { + // pad to alignment, and set the total section length in the JSectHeader + assert( 0xffffe000 == (~(Alignment-1)) ); + unsigned lenUnpadded = b.len() + sizeof(JSectFooter); + L = (lenUnpadded + Alignment-1) & (~(Alignment-1)); + dassert( L >= lenUnpadded ); + + ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded); + + JSectFooter f(b.buf(), b.len()); // computes checksum + b.appendStruct(f); + dassert( b.len() == lenUnpadded ); + + b.skip(L - lenUnpadded); + dassert( b.len() % Alignment == 0 ); + } + try { mutex::scoped_lock lk(_curLogFileMutex); // must already be open -- so that _curFileId is correct for previous buffer building assert( _curLogFile ); - stats.curr->_journaledBytes += b.len(); - _written += b.len(); - _curLogFile->synchronousAppend((void *) b.buf(), b.len()); + stats.curr->_uncompressedBytes += b.len(); + unsigned w = b.len(); + _written += w; + assert( w <= L ); + stats.curr->_journaledBytes += L; + _curLogFile->synchronousAppend((const void *) b.buf(), L); + _rotate(); } catch(std::exception& e) { - log() << "warning exception in dur::journal " << e.what() << endl; + log() << "error exception in dur::journal " << e.what() << endl; throw; } } diff --git a/db/dur_journal.h b/db/dur_journal.h index e8e3dfd1465..664f63942e0 100644 --- a/db/dur_journal.h +++ b/db/dur_journal.h @@ -28,7 +28,8 @@ namespace mongo { extern bool okToCleanUp; /** at termination after db files closed & fsynced - also after covery + also after recovery + closes and removes journal files @param log report in log that we are cleaning up if we actually do any work */ void journalCleanup(bool log = false); @@ -43,12 +44,6 @@ namespace mongo { */ void journalRotate(); - /** write/append to journal file * - @param buf - a buffer that will be written to the journal. - will not return until on disk - */ - void journal(const AlignedBuilder& buf); - /** flag that something has gone wrong during writing to the journal (not for recovery mode) */ @@ -67,5 +62,7 @@ namespace mongo { // in case disk controller buffers writes const long long ExtraKeepTimeMs = 10000; + const unsigned JournalCommitIntervalDefault = 100; + } } diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h index 72587ccd7b6..10ed8487b71 100644 --- a/db/dur_journalformat.h +++ b/db/dur_journalformat.h @@ -22,6 +22,8 @@ namespace mongo { namespace dur { + const unsigned Alignment = 8192; + #pragma pack(1) /** beginning header for a journal/j._<n> file there is nothing important int this header at this time. except perhaps version #. @@ -34,7 +36,11 @@ namespace mongo { // x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near // that. simply incrementing the version # is safe on a fwd basis. +#if defined(_NOCOMPRESS) enum { CurrentVersion = 0x4148 }; +#else + enum { CurrentVersion = 0x4149 }; +#endif unsigned short _version; // these are just for diagnostic ease (make header more useful as plain text) @@ -55,11 +61,25 @@ namespace mongo { /** "Section" header. A section corresponds to a group commit. len is length of the entire section including header and footer. + header and footer are not compressed, just the stuff in between. */ struct JSectHeader { - unsigned len; // length in bytes of the whole section + private: + unsigned _sectionLen; // unpadded length in bytes of the whole section + public: unsigned long long seqNumber; // sequence number that can be used on recovery to not do too much work unsigned long long fileId; // matches JHeader::fileId + unsigned sectionLen() const { return _sectionLen; } + + // we store the unpadded length so we can use that when we uncompress. to + // get the true total size this must be rounded up to the Alignment. + void setSectionLen(unsigned lenUnpadded) { _sectionLen = lenUnpadded; } + + unsigned sectionLenWithPadding() const { + unsigned x = (sectionLen() + (Alignment-1)) & (~(Alignment-1)); + dassert( x % Alignment == 0 ); + return x; + } }; /** an individual write operation within a group commit section. Either the entire section should @@ -111,6 +131,7 @@ namespace mongo { /** group commit section footer. md5 is a key field. */ struct JSectFooter { + JSectFooter(); JSectFooter(const void* begin, int len); // needs buffer to compute hash unsigned sentinel; unsigned char hash[16]; @@ -123,6 +144,8 @@ namespace mongo { @return true if buffer looks valid */ bool checkHash(const void* begin, int len) const; + + bool magicOk() const { return *((unsigned*)magic) == 0x0a0a0a0a; } }; /** declares "the next entry(s) are for this database / file path prefix" */ diff --git a/db/dur_journalimpl.h b/db/dur_journalimpl.h index e436eae45f1..bf771c5d768 100644 --- a/db/dur_journalimpl.h +++ b/db/dur_journalimpl.h @@ -18,6 +18,7 @@ #pragma once +#include "dur_journalformat.h" #include "../util/logfile.h" namespace mongo { @@ -40,14 +41,14 @@ namespace mongo { */ void rotate(); - /** write to journal + /** append to the journal file */ - void journal(const AlignedBuilder& b); + void journal(const JSectHeader& h, const AlignedBuilder& b); boost::filesystem::path getFilePathFor(int filenumber) const; unsigned long long lastFlushTime() const { return _lastFlushTime; } - void cleanup(bool log); + void cleanup(bool log); // closes and removes journal files unsigned long long curFileId() const { return _curFileId; } @@ -61,6 +62,11 @@ namespace mongo { void open(); private: + /** check if time to rotate files. assure a file is open. + * internally called with every commit + */ + void _rotate(); + void _open(); void closeCurrentJournalFile(); void removeUnneededJournalFiles(); diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp index 5851e415408..0d8ef3688db 100644 --- a/db/dur_preplogbuffer.cpp +++ b/db/dur_preplogbuffer.cpp @@ -60,7 +60,7 @@ namespace mongo { size_t ofs = 1; MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs); - _IF( !mmf->willNeedRemap() ) { + if( unlikely(!mmf->willNeedRemap()) ) { // tag this mmf as needed a remap of its private view later. // usually it will already be dirty/already set, so we do the if above first // to avoid possibility of cpu cache line contention @@ -97,7 +97,7 @@ namespace mongo { #endif bb.appendBuf(i->start(), e.len); - _IF (e.len != (unsigned)i->length()) { + if (unlikely(e.len != (unsigned)i->length())) { log() << "journal info splitting prepBasicWrite at boundary" << endl; // This only happens if we write to the last byte in a file and @@ -120,40 +120,25 @@ namespace mongo { // each time events switch to a different database we journal a JDbContext RelativePath lastDbPath; - set<WriteIntent>::iterator i = commitJob.writes().begin(); - - const WriteIntent *w = &(*i); - while(1) { - i++; - const WriteIntent *next = 0; - IF( i != commitJob.writes().end() ) { - next = &(*i); - PREFETCH(next); - } - prepBasicWrite_inlock(bb, w, lastDbPath); - _IF( next == 0 ) - break; - w = next; - }; + for( set<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) { + prepBasicWrite_inlock(bb, &(*i), lastDbPath); + } } - void resetLogBuffer(AlignedBuilder& bb) { + void resetLogBuffer(/*out*/JSectHeader& h, AlignedBuilder& bb) { bb.reset(); - // JSectHeader - JSectHeader h; - h.len = (unsigned) 0xffffffff; // total length, will fill in later + h.setSectionLen(0xffffffff); // total length, will fill in later h.seqNumber = getLastDataFileFlushTime(); h.fileId = j.curFileId(); - - bb.appendStruct(h); } /** we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this caller handles locking + @return partially populated sectheader and _ab set */ - void _PREPLOGBUFFER() { + void _PREPLOGBUFFER(JSectHeader& h) { assert( cmdLine.dur ); { @@ -165,7 +150,7 @@ namespace mongo { } AlignedBuilder& bb = commitJob._ab; - resetLogBuffer(bb); + resetLogBuffer(h, bb); // adds JSectHeader // ops other than basic writes (DurOp's) { @@ -174,34 +159,14 @@ namespace mongo { } } - { - prepBasicWrites(bb); - } - - // pad to alignment, and set the total section length in the JSectHeader - assert( 0xffffe000 == (~(Alignment-1)) ); - unsigned lenWillBe = bb.len() + sizeof(JSectFooter); - unsigned L = (lenWillBe + Alignment-1) & (~(Alignment-1)); - dassert( L >= lenWillBe ); - *((unsigned*)bb.atOfs(0)) = L; - - { - JSectFooter f(bb.buf(), bb.len()); - bb.appendStruct(f); - } - - { - unsigned padding = L - bb.len(); - bb.skip(padding); - dassert( bb.len() % Alignment == 0 ); - } + prepBasicWrites(bb); return; } - void PREPLOGBUFFER() { + void PREPLOGBUFFER(/*out*/ JSectHeader& h) { Timer t; j.assureLogFileOpen(); // so fileId is set - _PREPLOGBUFFER(); + _PREPLOGBUFFER(h); stats.curr->_prepLogBufferMicros += t.micros(); } diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp index 2e1516914f1..1e719c0070d 100644 --- a/db/dur_recover.cpp +++ b/db/dur_recover.cpp @@ -27,6 +27,7 @@ #include "namespace.h" #include "../util/mongoutils/str.h" #include "../util/bufreader.h" +#include "../util/concurrency/race.h" #include "pdfile.h" #include "database.h" #include "db.h" @@ -35,6 +36,7 @@ #include "cmdline.h" #include "curop.h" #include "mongommf.h" +#include "../util/compress.h" #include <sys/stat.h> #include <fcntl.h> @@ -92,59 +94,73 @@ namespace mongo { throws */ class JournalSectionIterator : boost::noncopyable { + auto_ptr<BufReader> _entries; + const JSectHeader _h; + const char *_lastDbName; // pointer into mmaped journal file + const bool _doDurOps; + string _uncompressed; public: - JournalSectionIterator(const void *p, unsigned len, bool doDurOps) - : _br(p, len) - , _sectHead(static_cast<const JSectHeader*>(_br.skip(sizeof(JSectHeader)))) - , _lastDbName(NULL) - , _doDurOps(doDurOps) - {} + JournalSectionIterator(const JSectHeader& h, const void *compressed, unsigned compressedLen, bool doDurOpsRecovering) : + _h(h), + _lastDbName(0) + , _doDurOps(doDurOpsRecovering) + { + assert( doDurOpsRecovering ); + bool ok = uncompress((const char *)compressed, compressedLen, &_uncompressed); + if( !ok ) { + // it should always be ok (i think?) as there is a previous check to see that the JSectFooter is ok + log() << "couldn't uncompress journal section" << endl; + msgasserted(15874, "couldn't uncompress journal section"); + } + const char *p = _uncompressed.c_str(); + assert( compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader) ); + _entries = auto_ptr<BufReader>( new BufReader(p, _uncompressed.size()) ); + } + + // we work with the uncompressed buffer when doing a WRITETODATAFILES (for speed) + JournalSectionIterator(const JSectHeader &h, const void *p, unsigned len) : + _entries( new BufReader((const char *) p, len) ), + _h(h), + _lastDbName(0) + , _doDurOps(false) - bool atEof() const { return _br.atEof(); } + { } - unsigned long long seqNumber() const { return _sectHead->seqNumber; } + bool atEof() const { return _entries->atEof(); } + + unsigned long long seqNumber() const { return _h.seqNumber; } /** get the next entry from the log. this function parses and combines JDbContext and JEntry's. - * @return true if got an entry. false at successful end of section (and no entry returned). * throws on premature end of section. */ - bool next(ParsedJournalEntry& e) { + void next(ParsedJournalEntry& e) { unsigned lenOrOpCode; - _br.read(lenOrOpCode); + _entries->read(lenOrOpCode); if (lenOrOpCode > JEntry::OpCode_Min) { switch( lenOrOpCode ) { case JEntry::OpCode_Footer: { - if (_doDurOps) { - const char* pos = (const char*) _br.pos(); - pos -= sizeof(lenOrOpCode); // rewind to include OpCode - const JSectFooter& footer = *(const JSectFooter*)pos; - int len = pos - (char*)_sectHead; - if (!footer.checkHash(_sectHead, len)) { - massert(13594, "journal checksum doesn't match", false); - } - } - return false; // false return value denotes end of section + assert( false ); } case JEntry::OpCode_FileCreated: case JEntry::OpCode_DropDb: { e.dbName = 0; - boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, _br); + boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries); if (_doDurOps) { e.op = op; } - return true; + return; } case JEntry::OpCode_DbContext: { - _lastDbName = (const char*) _br.pos(); - const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _br.remaining()); + _lastDbName = (const char*) _entries->pos(); + const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _entries->remaining()); const unsigned len = strnlen(_lastDbName, limit); massert(13533, "problem processing journal file during recovery", _lastDbName[len] == '\0'); - _br.skip(len+1); // skip '\0' too - _br.read(lenOrOpCode); + _entries->skip(len+1); // skip '\0' too + _entries->read(lenOrOpCode); // read this for the fall through } // fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet @@ -156,18 +172,13 @@ namespace mongo { // JEntry - a basic write assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min ); - _br.rewind(4); - e.e = (JEntry *) _br.skip(sizeof(JEntry)); + _entries->rewind(4); + e.e = (JEntry *) _entries->skip(sizeof(JEntry)); e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName; assert( e.e->len == lenOrOpCode ); - _br.skip(e.e->len); - return true; + _entries->skip(e.e->len); } - private: - BufReader _br; - const JSectHeader* _sectHead; - const char *_lastDbName; // pointer into mmaped journal file - const bool _doDurOps; + }; static string fileName(const char* dbName, int fileNo) { @@ -289,27 +300,64 @@ namespace mongo { log() << "END section" << endl; } - void RecoveryJob::processSection(const void *p, unsigned len) { + void RecoveryJob::processSection(const JSectHeader *h, const void *p, unsigned len, const JSectFooter *f) { scoped_lock lk(_mx); + RACECHECK + + /** todo: we should really verify the checksum to see that seqNumber is ok? + that is expensive maybe there is some sort of checksum of just the header + within the header itself + */ + if( _recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs ) { + if( h->seqNumber != _lastSeqMentionedInConsoleLog ) { + static int n; + if( ++n < 10 ) { + log() << "recover skipping application of section seq:" << h->seqNumber << " < lsn:" << _lastDataSyncedFromLastRun << endl; + } + else if( n == 10 ) { + log() << "recover skipping application of section more..." << endl; + } + _lastSeqMentionedInConsoleLog = h->seqNumber; + } + return; + } - vector<ParsedJournalEntry> entries; - JournalSectionIterator i(p, len, _recovering); + auto_ptr<JournalSectionIterator> i; + if( _recovering ) { + i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering)); + } + else { + i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, /*after header*/p, /*w/out header*/len)); + } - //DEV log() << "recovery processSection seq:" << i.seqNumber() << endl; - if( _recovering && _lastDataSyncedFromLastRun > i.seqNumber() + ExtraKeepTimeMs ) { - if( i.seqNumber() != _lastSeqMentionedInConsoleLog ) { - log() << "recover skipping application of section seq:" << i.seqNumber() << " < lsn:" << _lastDataSyncedFromLastRun << endl; - _lastSeqMentionedInConsoleLog = i.seqNumber(); + // we use a static so that we don't have to reallocate every time through. occasionally we + // go back to a small allocation so that if there were a spiky growth it won't stick forever. + static vector<ParsedJournalEntry> entries; + entries.clear(); +/** TEMP uncomment + RARELY OCCASIONALLY { + if( entries.capacity() > 2048 ) { + entries.shrink_to_fit(); + entries.reserve(2048); } - return; } +*/ // first read all entries to make sure this section is valid ParsedJournalEntry e; - while( i.next(e) ) { + while( !i->atEof() ) { + i->next(e); entries.push_back(e); } + // after the entries check the footer checksum + if( _recovering ) { + assert( ((const char *)h) + sizeof(JSectHeader) == p ); + if( !f->checkHash(h, len + sizeof(JSectHeader)) ) { + msgasserted(13594, "journal checksum doesn't match"); + } + } + // got all the entries for one group commit. apply them: applyEntries(entries); } @@ -345,11 +393,16 @@ namespace mongo { if( h.fileId != fileId ) { if( debug || (cmdLine.durOptions & CmdLine::DurDumpJournal) ) { log() << "Ending processFileBuffer at differing fileId want:" << fileId << " got:" << h.fileId << endl; - log() << " sect len:" << h.len << " seqnum:" << h.seqNumber << endl; + log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl; } return true; } - processSection(br.skip(h.len), h.len); + unsigned slen = h.sectionLen(); + unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter); + const char *hdr = (const char *) br.skip(h.sectionLenWithPadding()); + const char *data = hdr + sizeof(JSectHeader); + const char *footer = data + dataLen; + processSection((const JSectHeader*) hdr, data, dataLen, (const JSectFooter*) footer); // ctrl c check killCurrentOp.checkForInterrupt(false); @@ -367,6 +420,17 @@ namespace mongo { /** apply a specific journal file */ bool RecoveryJob::processFile(path journalfile) { log() << "recover " << journalfile.string() << endl; + + try { + if( boost::filesystem::file_size( journalfile.string() ) == 0 ) { + log() << "recover info " << journalfile.string() << " has zero length" << endl; + return true; + } + } catch(...) { + // if something weird like a permissions problem keep going so the massert down below can happen (presumably) + log() << "recover exception checking filesize" << endl; + } + MemoryMappedFile f; void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL); massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p); @@ -382,13 +446,19 @@ namespace mongo { _lastDataSyncedFromLastRun = journalReadLSN(); log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl; + // todo: we could truncate the journal file at rotation time to the right length, then this abruptEnd + // check can be turned back on. this is relevant when prealloc is being used. for( unsigned i = 0; i != files.size(); ++i ) { - /*bool abruptEnd = */processFile(files[i]); - /*if( abruptEnd && i+1 < files.size() ) { + bool abruptEnd = processFile(files[i]); + if( abruptEnd && i+1 < files.size() ) { +#if 1 // Leaving this as a warning for now. TODO: make this an error post 2.0 + log() << "recover warning: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl; +#else log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl; close(); uasserted(13535, "recover abrupt journal file end"); - }*/ +#endif + } } close(); diff --git a/db/dur_recover.h b/db/dur_recover.h index b5a922b498a..955e730ea05 100644 --- a/db/dur_recover.h +++ b/db/dur_recover.h @@ -2,6 +2,7 @@ #pragma once +#include "dur_journalformat.h" #include "../util/concurrency/mutex.h" #include "../util/file.h" @@ -15,10 +16,14 @@ namespace mongo { */ class RecoveryJob : boost::noncopyable { public: - RecoveryJob() :_lastDataSyncedFromLastRun(0), _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; } + RecoveryJob() : _lastDataSyncedFromLastRun(0), + _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; } void go(vector<path>& files); ~RecoveryJob(); - void processSection(const void *, unsigned len); + + /** @param data data between header and footer. compressed if recovering. */ + void processSection(const JSectHeader *h, const void *data, unsigned len, const JSectFooter *f); + void close(); // locks and calls _close() static RecoveryJob & get() { return _instance; } diff --git a/db/dur_stats.h b/db/dur_stats.h index d4943c01cb3..50a26d1f215 100644 --- a/db/dur_stats.h +++ b/db/dur_stats.h @@ -20,6 +20,7 @@ namespace mongo { unsigned _commits; unsigned _earlyCommits; // count of early commits from commitIfNeeded() or from getDur().commitNow() unsigned long long _journaledBytes; + unsigned long long _uncompressedBytes; unsigned long long _writeToDataFilesBytes; unsigned long long _prepLogBufferMicros; diff --git a/db/dur_writetodatafiles.cpp b/db/dur_writetodatafiles.cpp index cdccb018d83..6724f0731aa 100644 --- a/db/dur_writetodatafiles.cpp +++ b/db/dur_writetodatafiles.cpp @@ -47,9 +47,9 @@ namespace mongo { @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en */ - void WRITETODATAFILES_Impl1() { + void WRITETODATAFILES_Impl1(const JSectHeader& h, AlignedBuilder& uncompressed) { RWLockRecursive::Shared lk(MongoFile::mmmutex); - RecoveryJob::get().processSection(commitJob._ab.buf(), commitJob._ab.len()); + RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), 0); } #if 0 @@ -81,16 +81,14 @@ namespace mongo { #endif // concurrency: in mmmutex, not necessarily in dbMutex - void WRITETODATAFILES() { + void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed) { Timer t; #if defined(_EXPERIMENTAL) WRITETODATAFILES_Impl3(); #else - WRITETODATAFILES_Impl1(); + WRITETODATAFILES_Impl1(h, uncompressed); #endif stats.curr->_writeToDataFilesMicros += t.micros(); - - } } diff --git a/db/durop.h b/db/durop.h index c4574c2e3cb..9ab1bfcbede 100644 --- a/db/durop.h +++ b/db/durop.h @@ -28,8 +28,6 @@ namespace mongo { namespace dur { - const unsigned Alignment = 8192; - /** DurOp - Operations we journal that aren't just basic writes. * * Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent. diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp index 21b0eaa6601..9b762b260de 100644 --- a/db/geo/2d.cpp +++ b/db/geo/2d.cpp @@ -138,7 +138,11 @@ namespace mongo { GeoHash b = a; b.move(1, 1); - _error = distance(a, b); + // Epsilon is 1/100th of a bucket size + // TODO: Can we actually find error bounds for the sqrt function? + double epsilon = 0.001 / _scaling; + _error = distance(a, b) + epsilon; + // Error in radians _errorSphere = deg2rad( _error ); } @@ -293,6 +297,14 @@ namespace mongo { } + BSONObj _fromBSONHash( const BSONElement& e ) const { + return _unhash( _tohash( e ) ); + } + + BSONObj _fromBSONHash( const BSONObj& o ) const { + return _unhash( _tohash( o.firstElement() ) ); + } + GeoHash _tohash( const BSONElement& e ) const { if ( e.isABSONObj() ) return _hash( e.embeddedObject() ); @@ -368,6 +380,10 @@ namespace mongo { } double sizeEdge( const GeoHash& a ) const { + + if( ! a.constrains() ) + return _max - _min; + double ax,ay,bx,by; GeoHash b = a; b.move( 1 , 1 ); @@ -443,6 +459,10 @@ namespace mongo { Box() {} + BSONArray toBSON() const { + return BSON_ARRAY( BSON_ARRAY( _min._x << _min._y ) << BSON_ARRAY( _max._x << _max._y ) ); + } + string toString() const { StringBuilder buf(64); buf << _min.toString() << " -->> " << _max.toString(); @@ -630,8 +650,8 @@ namespace mongo { } else if( fudge == 0 ){ - if( p._y == p1._y && p._x == p1._x ) return true; - else if( p._y == p2._y && p._x == p2._x ) return true; + if( p._y == p1._y && p._x == p1._x ) return true; + else if( p._y == p2._y && p._x == p2._x ) return true; } // Normal intersection test. @@ -742,293 +762,96 @@ namespace mongo { geo2dplugin.getName(); } - struct GeoUnitTest : public UnitTest { - - int round( double d ) { - return (int)(.5+(d*1000)); - } - -#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); } - - void run() { - assert( ! GeoHash::isBitSet( 0 , 0 ) ); - assert( ! GeoHash::isBitSet( 0 , 31 ) ); - assert( GeoHash::isBitSet( 1 , 31 ) ); - - IndexSpec i( BSON( "loc" << "2d" ) ); - Geo2dType g( &geo2dplugin , &i ); - { - double x = 73.01212; - double y = 41.352964; - BSONObj in = BSON( "x" << x << "y" << y ); - GeoHash h = g._hash( in ); - BSONObj out = g._unhash( h ); - assert( round(x) == round( out["x"].number() ) ); - assert( round(y) == round( out["y"].number() ) ); - assert( round( in["x"].number() ) == round( out["x"].number() ) ); - assert( round( in["y"].number() ) == round( out["y"].number() ) ); - } - - { - double x = -73.01212; - double y = 41.352964; - BSONObj in = BSON( "x" << x << "y" << y ); - GeoHash h = g._hash( in ); - BSONObj out = g._unhash( h ); - assert( round(x) == round( out["x"].number() ) ); - assert( round(y) == round( out["y"].number() ) ); - assert( round( in["x"].number() ) == round( out["x"].number() ) ); - assert( round( in["y"].number() ) == round( out["y"].number() ) ); - } - - { - GeoHash h( "0000" ); - h.move( 0 , 1 ); - GEOHEQ( h , "0001" ); - h.move( 0 , -1 ); - GEOHEQ( h , "0000" ); - - h.init( "0001" ); - h.move( 0 , 1 ); - GEOHEQ( h , "0100" ); - h.move( 0 , -1 ); - GEOHEQ( h , "0001" ); - - - h.init( "0000" ); - h.move( 1 , 0 ); - GEOHEQ( h , "0010" ); - } - - { - Box b( 5 , 5 , 2 ); - assert( "(5,5) -->> (7,7)" == b.toString() ); - } - - { - GeoHash a = g.hash( 1 , 1 ); - GeoHash b = g.hash( 4 , 5 ); - assert( 5 == (int)(g.distance( a , b ) ) ); - a = g.hash( 50 , 50 ); - b = g.hash( 42 , 44 ); - assert( round(10) == round(g.distance( a , b )) ); - } - - { - GeoHash x("0000"); - assert( 0 == x.getHash() ); - x.init( 0 , 1 , 32 ); - GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" ) - - assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) ); - assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) ); - } - - { - GeoHash x("1010"); - GEOHEQ( x , "1010" ); - GeoHash y = x + "01"; - GEOHEQ( y , "101001" ); - } - - { - - GeoHash a = g.hash( 5 , 5 ); - GeoHash b = g.hash( 5 , 7 ); - GeoHash c = g.hash( 100 , 100 ); - /* - cout << "a: " << a << endl; - cout << "b: " << b << endl; - cout << "c: " << c << endl; - - cout << "a: " << a.toStringHex1() << endl; - cout << "b: " << b.toStringHex1() << endl; - cout << "c: " << c.toStringHex1() << endl; - */ - BSONObj oa = a.wrap(); - BSONObj ob = b.wrap(); - BSONObj oc = c.wrap(); - /* - cout << "a: " << oa.hexDump() << endl; - cout << "b: " << ob.hexDump() << endl; - cout << "c: " << oc.hexDump() << endl; - */ - assert( oa.woCompare( ob ) < 0 ); - assert( oa.woCompare( oc ) < 0 ); - - } - - { - GeoHash x( "000000" ); - x.move( -1 , 0 ); - GEOHEQ( x , "101010" ); - x.move( 1 , -1 ); - GEOHEQ( x , "010101" ); - x.move( 0 , 1 ); - GEOHEQ( x , "000000" ); - } - { - GeoHash prefix( "110011000000" ); - GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" ); - assert( ! entry.hasPrefix( prefix ) ); - entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000"); - assert( entry.toString().find( prefix.toString() ) == 0 ); - assert( entry.hasPrefix( GeoHash( "1100" ) ) ); - assert( entry.hasPrefix( prefix ) ); - } - - { - GeoHash a = g.hash( 50 , 50 ); - GeoHash b = g.hash( 48 , 54 ); - assert( round( 4.47214 ) == round( g.distance( a , b ) ) ); - } - - - { - Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) ); - assert( b.inside( 29.763 , -95.363 ) ); - assert( ! b.inside( 32.9570255 , -96.1082497 ) ); - assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) ); - } - - { - GeoHash a( "11001111" ); - assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) ); - assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) ); - } - - { - int N = 10000; - { - Timer t; - for ( int i=0; i<N; i++ ) { - unsigned x = (unsigned)rand(); - unsigned y = (unsigned)rand(); - GeoHash h( x , y ); - unsigned a,b; - h.unhash_slow( a,b ); - assert( a == x ); - assert( b == y ); - } - //cout << "slow: " << t.millis() << endl; - } - - { - Timer t; - for ( int i=0; i<N; i++ ) { - unsigned x = (unsigned)rand(); - unsigned y = (unsigned)rand(); - GeoHash h( x , y ); - unsigned a,b; - h.unhash_fast( a,b ); - assert( a == x ); - assert( b == y ); - } - //cout << "fast: " << t.millis() << endl; - } - - } - - { - // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example - - { - Point BNA (-86.67, 36.12); - Point LAX (-118.40, 33.94); + class GeoHopper; - double dist1 = spheredist_deg(BNA, LAX); - double dist2 = spheredist_deg(LAX, BNA); + class GeoPoint { + public: - // target is 0.45306 - assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); - assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); - } - { - Point BNA (-1.5127, 0.6304); - Point LAX (-2.0665, 0.5924); + GeoPoint() : _distance( -1 ), _exact( false ) + {} - double dist1 = spheredist_rad(BNA, LAX); - double dist2 = spheredist_rad(LAX, BNA); + //// Distance not used //// - // target is 0.45306 - assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); - assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); - } - { - Point JFK (-73.77694444, 40.63861111 ); - Point LAX (-118.40, 33.94); + GeoPoint( const GeoKeyNode& node ) + : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( -1 ) , _exact( false ) { + } - double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES; - assert( dist > 2469 && dist < 2470 ); - } + //// Immediate initialization of distance //// - { - Point BNA (-86.67, 36.12); - Point LAX (-118.40, 33.94); - Point JFK (-73.77694444, 40.63861111 ); - assert( spheredist_deg(BNA, BNA) < 1e-6); - assert( spheredist_deg(LAX, LAX) < 1e-6); - assert( spheredist_deg(JFK, JFK) < 1e-6); + GeoPoint( const GeoKeyNode& node, double distance, bool exact ) + : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( distance ), _exact( exact ) { + } - Point zero (0, 0); - Point antizero (0,-180); + GeoPoint( const GeoPoint& pt, double distance, bool exact ) + : _key( pt.key() ) , _loc( pt.loc() ) , _o( pt.obj() ), _distance( distance ), _exact( exact ) { + } - // these were known to cause NaN - assert( spheredist_deg(zero, zero) < 1e-6); - assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6); - assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6); - } - } + bool operator<( const GeoPoint& other ) const { + if( _distance != other._distance ) return _distance < other._distance; + if( _exact != other._exact ) return _exact < other._exact; + return _loc < other._loc; } - } geoUnitTest; - class GeoHopper; + double distance() const { + return _distance; + } - class GeoPoint { - public: - GeoPoint() { } + bool isExact() const { + return _exact; + } - //// Distance not used //// + BSONObj key() const { + return _key; + } - GeoPoint( const GeoKeyNode& node ) - : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _exactDistance( -1 ), _exactWithin( false ) { + DiskLoc loc() const { + return _loc; } - - //// Immediate initialization of exact distance //// - GeoPoint( const GeoKeyNode& node , double exactDistance, bool exactWithin ) - : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _exactDistance( exactDistance ), _exactWithin( exactWithin ) { + BSONObj obj() const { + return _o; } - bool operator<( const GeoPoint& other ) const { - return _exactDistance < other._exactDistance; + BSONObj pt() const { + return _pt; } - bool isEmpty() const { + bool isEmpty() { return _o.isEmpty(); } string toString() const { - return str::stream() << "Point from " << _o.toString() << " dist : " << _exactDistance << " within ? " << _exactWithin; + return str::stream() << "Point from " << _o << " dist : " << _distance << ( _exact ? " (ex)" : " (app)" ); } BSONObj _key; DiskLoc _loc; BSONObj _o; + BSONObj _pt; - double _exactDistance; - bool _exactWithin; + double _distance; + bool _exact; }; // GeoBrowse subclasses this class GeoAccumulator { public: - GeoAccumulator( const Geo2dType * g , const BSONObj& filter ) - : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) { + GeoAccumulator( const Geo2dType * g , const BSONObj& filter, bool uniqueDocs, bool needDistance ) + : _g(g) , + _keysChecked(0) , + _lookedAt(0) , + _matchesPerfd(0) , + _objectsLoaded(0) , + _pointsLoaded(0) , + _found(0) , + _uniqueDocs( uniqueDocs ) , + _needDistance( needDistance ) + { if ( ! filter.isEmpty() ) { _matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) ); + GEODEBUG( "Matcher is now " << _matcher->docMatcher().toString() ); } } @@ -1042,6 +865,9 @@ namespace mongo { set< pair<DiskLoc,int> > _seen; public: bool seen(DiskLoc bucket, int pos) { + + _keysChecked++; + pair< set<pair<DiskLoc,int> >::iterator, bool > seenBefore = _seen.insert( make_pair(bucket,pos) ); if ( ! seenBefore.second ) { GEODEBUG( "\t\t\t\t already seen : " << bucket.toString() << ' ' << pos ); // node.key.toString() << " @ " << Point( _g, GeoHash( node.key.firstElement() ) ).toString() << " with " << node.recordLoc.obj()["_id"] ); @@ -1050,29 +876,43 @@ namespace mongo { return false; } - void add( const GeoKeyNode& node ) { + enum KeyResult { BAD, BORDER, GOOD }; + + virtual void add( const GeoKeyNode& node ) { - GEODEBUG( "\t\t\t\t checking key " << node.key.toString() ) + GEODEBUG( "\t\t\t\t checking key " << node._key.toString() ) _lookedAt++; - // distance check - double d = 0; - if ( ! checkDistance( node , d ) ) { - GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << d ); + //// + // Approximate distance check using key data + //// + double keyD = 0; + Point keyP( _g, GeoHash( node._key.firstElement(), _g->_bits ) ); + KeyResult keyOk = approxKeyCheck( keyP, keyD ); + if ( keyOk == BAD ) { + GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << keyD ); return; } - GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << d ); + GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << keyD ); + //// + // Check for match using other key (and potentially doc) criteria + //// // Remember match results for each object map<DiskLoc, bool>::iterator match = _matched.find( node.recordLoc ); bool newDoc = match == _matched.end(); if( newDoc ) { + GEODEBUG( "\t\t\t\t matching new doc with " << (_matcher ? _matcher->docMatcher().toString() : "(empty)" ) ); + // matcher MatchDetails details; if ( _matcher.get() ) { bool good = _matcher->matchesWithSingleKeyIndex( node._key , node.recordLoc , &details ); + + _matchesPerfd++; + if ( details._loadedObject ) _objectsLoaded++; @@ -1094,12 +934,50 @@ namespace mongo { return; } - addSpecific( node , d, newDoc ); - _found++; + //// + // Exact check with particular data fields + //// + // Can add multiple points + int diff = addSpecific( node , keyP, keyOk == BORDER, keyD, newDoc ); + if( diff > 0 ) _found += diff; + else _found -= -diff; + + } + + virtual void getPointsFor( const BSONObj& key, const BSONObj& obj, vector< BSONObj >& locsForNode, bool allPoints = false ){ + + // Find all the location objects from the keys + vector< BSONObj > locs; + _g->getKeys( obj, allPoints ? locsForNode : locs ); + _pointsLoaded++; + + if( allPoints ) return; + if( locs.size() == 1 ){ + locsForNode.push_back( locs[0] ); + return; + } + + // Find the particular location we want + GeoHash keyHash( key.firstElement(), _g->_bits ); + + // log() << "Hash: " << node.key << " and " << keyHash.getHash() << " unique " << _uniqueDocs << endl; + for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { + + // Ignore all locations not hashed to the key's hash, since we may see + // those later + if( _g->_hash( *i ) != keyHash ) continue; + + locsForNode.push_back( *i ); + + } + } - virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) = 0; - virtual bool checkDistance( const GeoKeyNode& node , double& d ) = 0; + virtual int addSpecific( const GeoKeyNode& node, const Point& p , bool inBounds, double d, bool newDoc ) = 0; + virtual KeyResult approxKeyCheck( const Point& p , double& keyD ) = 0; + virtual bool exactDocCheck( const Point& p , double& d ) = 0; + virtual bool expensiveExactCheck(){ return false; } + long long found() const { return _found; @@ -1109,9 +987,16 @@ namespace mongo { map<DiskLoc, bool> _matched; shared_ptr<CoveredIndexMatcher> _matcher; + long long _keysChecked; long long _lookedAt; + long long _matchesPerfd; long long _objectsLoaded; + long long _pointsLoaded; long long _found; + + bool _uniqueDocs; + bool _needDistance; + }; struct BtreeLocation { @@ -1264,8 +1149,8 @@ namespace mongo { DONE } _state; - GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() ) - : GeoCursorBase( g ), GeoAccumulator( g , filter ) , + GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj(), bool uniqueDocs = true, bool needDistance = false ) + : GeoCursorBase( g ), GeoAccumulator( g , filter, uniqueDocs, needDistance ) , _type( type ) , _filter( filter ) , _firstCall(true), _nscanned(), _centerPrefix(0, 0, 0) { // Set up the initial expand state @@ -1350,11 +1235,9 @@ namespace mongo { virtual void fillStack( int maxToCheck, int maxToAdd = -1, bool onlyExpand = false ) { #ifdef GEODEBUGGING - - int s = _state; log() << "Filling stack with maximum of " << maxToCheck << ", state : " << (int) _state << endl; - #endif + if( maxToAdd < 0 ) maxToAdd = maxToCheck; int maxFound = _foundInExp + maxToCheck; assert( maxToCheck > 0 ); @@ -1395,7 +1278,6 @@ namespace mongo { while ( true ) { GEODEBUG( "box prefix [" << _prefix << "]" ); - #ifdef GEODEBUGGING if( _prefix.constrains() ) { log() << "current expand box : " << Box( _g, _prefix ).toString() << endl; @@ -1407,6 +1289,9 @@ namespace mongo { GEODEBUG( "expanding box points... "); + // Record the prefix we're actively exploring... + _expPrefix.reset( new GeoHash( _prefix ) ); + // Find points inside this prefix while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded ); while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded ); @@ -1419,7 +1304,7 @@ namespace mongo { #endif - GEODEBUG( "finished expand, found : " << ( maxToCheck - ( maxFound - _found ) ) ); + GEODEBUG( "finished expand, found : " << ( maxToAdd - ( maxAdded - _found ) ) ); if( _foundInExp >= maxFound || _found >= maxAdded ) return; // We've searched this prefix fully, remember @@ -1429,6 +1314,7 @@ namespace mongo { if ( ! _prefix.constrains() ) { GEODEBUG( "box exhausted" ); _state = DONE; + notePrefix(); return; } @@ -1453,8 +1339,9 @@ namespace mongo { break; } - } + notePrefix(); + } // If we doeighbors if( onlyExpand ) return; @@ -1495,7 +1382,7 @@ namespace mongo { GeoHash _neighborPrefix = _centerPrefix; _neighborPrefix.move( i, j ); - GEODEBUG( "moving to " << i << " , " << j ); + GEODEBUG( "moving to " << i << " , " << j << " fringe : " << _fringe.size() ); PREFIXDEBUG( _centerPrefix, _g ); PREFIXDEBUG( _neighborPrefix , _g ); while( _fringe.size() > 0 ) { @@ -1542,7 +1429,7 @@ namespace mongo { // be entirely done. Max recurse depth is < 8 * 16. // If we're maxed out on points, return - if( _foundInExp >= maxFound ) { + if( _foundInExp >= maxFound || _found >= maxAdded ) { // Make sure we'll come back to add more points assert( _state == DOING_EXPAND ); return; @@ -1571,14 +1458,63 @@ namespace mongo { // The amount the current box overlaps our search area virtual double intersectsBox( Box& cur ) = 0; - virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) { + virtual int addSpecific( const GeoKeyNode& node , const Point& keyP , bool onBounds , double keyD , bool newDoc ) { - if( ! newDoc ) return; + int found = 0; - if ( _cur.isEmpty() ) - _cur = GeoPoint( node ); - else - _stack.push_back( GeoPoint( node ) ); + // We need to handle every possible point in this method, even those not in the key value, to + // avoid us tracking which hashes we've already seen. + if( ! newDoc ){ + // log() << "Already handled doc!" << endl; + return 0; + } + + if( _uniqueDocs && ! onBounds ) { + // log() << "Added ind to " << _type << endl; + _stack.push_front( GeoPoint( node ) ); + found++; + } + else { + // We now handle every possible point in the document, even those not in the key value, + // since we're iterating through them anyway - prevents us from having to save the hashes + // we've seen per-doc + + // If we're filtering by hash, get the original + bool expensiveExact = expensiveExactCheck(); + + vector< BSONObj > locs; + getPointsFor( node._key, node.recordLoc.obj(), locs, true ); + for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ){ + + double d = -1; + Point p( *i ); + + // We can avoid exact document checks by redoing approx checks, + // if the exact checks are more expensive. + bool needExact = true; + if( expensiveExact ){ + assert( false ); + KeyResult result = approxKeyCheck( p, d ); + if( result == BAD ) continue; + else if( result == GOOD ) needExact = false; + } + + if( ! needExact || exactDocCheck( p, d ) ){ + // log() << "Added mult to " << _type << endl; + _stack.push_front( GeoPoint( node ) ); + found++; + // If returning unique, just exit after first point is added + if( _uniqueDocs ) break; + } + } + } + + if ( _cur.isEmpty() && _stack.size() > 0 ){ + _cur = _stack.front(); + _stack.pop_front(); + } + + return found; } virtual long long nscanned() { @@ -1588,6 +1524,35 @@ namespace mongo { return _nscanned; } + virtual void explainDetails( BSONObjBuilder& b ){ + b << "keysChecked" << _keysChecked; + b << "lookedAt" << _lookedAt; + b << "matchesPerfd" << _matchesPerfd; + b << "objectsLoaded" << _objectsLoaded; + b << "pointsLoaded" << _pointsLoaded; + } + + virtual BSONObj prettyIndexBounds() const { + + vector<GeoHash>::const_iterator i = _expPrefixes.end(); + if( _expPrefixes.size() > 0 && *(--i) != *( _expPrefix.get() ) ) + _expPrefixes.push_back( *( _expPrefix.get() ) ); + + BSONObjBuilder bob; + BSONArrayBuilder bab; + for( i = _expPrefixes.begin(); i != _expPrefixes.end(); ++i ){ + bab << Box( _g, *i ).toBSON(); + } + bob << _g->_geo << bab.arr(); + + return bob.obj(); + + } + + void notePrefix() { + _expPrefixes.push_back( _prefix ); + } + string _type; BSONObj _filter; list<GeoPoint> _stack; @@ -1616,6 +1581,9 @@ namespace mongo { BtreeLocation _min; BtreeLocation _max; + shared_ptr<GeoHash> _expPrefix; + mutable vector<GeoHash> _expPrefixes; + }; @@ -1623,133 +1591,148 @@ namespace mongo { public: typedef multiset<GeoPoint> Holder; - GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN ) - : GeoBrowse( g, "search", filter ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0) + GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = true ) + : GeoBrowse( g, "search", filter, uniqueDocs, needDistance ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0) {} - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { // Always check approximate distance, since it lets us avoid doing // checks of the rest of the object if it succeeds - // TODO: Refactor so that we can check exact distance and within if we are going to - // anyway. - d = approxDistance( node ); - assert( d >= 0 ); - // Out of the error range, see how close we are to the furthest points - bool good = d <= _maxDistance + 2 * _distError /* In error range */ - && ( _points.size() < _max /* need more points */ - || d <= farthest() + 2 * _distError /* could be closer than previous points */ ); + switch (_type) { + case GEO_PLAIN: + d = _near.distance( p ); + break; + case GEO_SPHERE: + checkEarthBounds( p ); + d = spheredist_deg( _near, p ); + break; + default: assert( false ); + } + assert( d >= 0 ); GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near.toString() - << "\t" << GeoHash( node.key.firstElement() ) << "\t" << d - << " ok: " << good << " farthest: " << farthest() ); + << "\t" << p.toString() << "\t" << d + << " farthest: " << farthest() ); - return good; - } + // If we need more points + double borderDist = ( _points.size() < _max ? _maxDistance : farthest() ); + + if( d >= borderDist - 2 * _distError && d <= borderDist + 2 * _distError ) return BORDER; + else return d < borderDist ? GOOD : BAD; - double approxDistance( const GeoKeyNode& node ) { - return approxDistance( GeoHash( node._key.firstElement() ) ); } - double approxDistance( const GeoHash& h ) { + virtual bool exactDocCheck( const Point& p, double& d ){ - double approxDistance = -1; - Point p( _g, h ); - switch (_type) { + bool within = false; + + // Get the appropriate distance for the type + switch ( _type ) { case GEO_PLAIN: - approxDistance = _near.distance( p ); + d = _near.distance( p ); + within = _near.distanceWithin( p, _maxDistance ); break; case GEO_SPHERE: checkEarthBounds( p ); - approxDistance = spheredist_deg( _near, p ); + d = spheredist_deg( _near, p ); + within = ( d <= _maxDistance ); break; default: assert( false ); } - return approxDistance; + return within; } - double exactDistances( const GeoKeyNode& node ) { - - GEODEBUG( "Finding exact distance for " << node.key.toString() << " and " << node.recordLoc.obj().toString() ); - - // Find all the location objects from the keys - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); + // Always in distance units, whether radians or normal + double farthest() const { + return _farthest; + } - double maxDistance = -1; + virtual int addSpecific( const GeoKeyNode& node, const Point& keyP, bool onBounds, double keyD, bool newDoc ) { - // Find the particular location we want - BSONObj loc; - GeoHash keyHash( node._key.firstElement(), _g->_bits ); - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { + // Unique documents - loc = *i; + GeoPoint newPoint( node, keyD, false ); - // Ignore all locations not hashed to the key's hash, since we may see - // those later - if( _g->_hash( loc ) != keyHash ) continue; + int prevSize = _points.size(); - double exactDistance = -1; - bool exactWithin = false; + // STEP 1 : Remove old duplicate points from the set if needed + if( _uniqueDocs ){ - Point p( loc ); + // Lookup old point with same doc + map< DiskLoc , Holder::iterator >::iterator oldPointIt = _seenPts.find( newPoint.loc() ); - // Get the appropriate distance for the type - switch ( _type ) { - case GEO_PLAIN: - exactDistance = _near.distance( p ); - exactWithin = _near.distanceWithin( p, _maxDistance ); - break; - case GEO_SPHERE: - checkEarthBounds( p ); - exactDistance = spheredist_deg( _near, p ); - exactWithin = ( exactDistance <= _maxDistance ); - break; - default: assert( false ); + if( oldPointIt != _seenPts.end() ){ + const GeoPoint& oldPoint = *(oldPointIt->second); + // We don't need to care if we've already seen this same approx pt or better, + // or we've already gone to disk once for the point + if( oldPoint < newPoint ){ + GEODEBUG( "\t\tOld point closer than new point" ); + return 0; + } + GEODEBUG( "\t\tErasing old point " << oldPointIt->first.obj() ); + _points.erase( oldPointIt->second ); } + } - assert( exactDistance >= 0 ); - if( !exactWithin ) continue; + Holder::iterator newIt = _points.insert( newPoint ); + if( _uniqueDocs ) _seenPts[ newPoint.loc() ] = newIt; - GEODEBUG( "Inserting exact point: " << GeoPoint( node , exactDistance, exactWithin ).toString() ); + GEODEBUG( "\t\tInserted new point " << newPoint.toString() << " approx : " << keyD ); - // Add a point for this location - _points.insert( GeoPoint( node , exactDistance, exactWithin ) ); + assert( _max > 0 ); - if( exactDistance > maxDistance ) maxDistance = exactDistance; - } + Holder::iterator lastPtIt = _points.end(); + lastPtIt--; + _farthest = lastPtIt->distance() + 2 * _distError; - return maxDistance; + return _points.size() - prevSize; } - // Always in distance units, whether radians or normal - double farthest() const { - return _farthest; - } + // Removes extra points from end of _points set. + // Check can be a bit costly if we have lots of exact points near borders, + // so we'll do this every once and awhile. + void processExtraPoints(){ - bool inErrorBounds( double approxD ) const { - return approxD >= _maxDistance - _distError && approxD <= _maxDistance + _distError; - } + if( _points.size() == 0 ) return; - virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) { + int prevSize = _points.size(); - GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d ); + // Erase all points from the set with a position >= _max *and* + // whose distance isn't close to the _max - 1 position distance - double maxDistance = exactDistances( node ); - if( maxDistance >= 0 ){ + int numToErase = _points.size() - _max; + if( numToErase < 0 ) numToErase = 0; - // Recalculate the current furthest point. - int numToErase = _points.size() - _max; - while( numToErase-- > 0 ){ - _points.erase( --_points.end() ); - } + // Get the first point definitely in the _points array + Holder::iterator startErase = _points.end(); + for( int i = 0; i < numToErase + 1; i++ ) startErase--; + _farthest = startErase->distance() + 2 * _distError; - _farthest = boost::next( _points.end(), -1 )->_exactDistance; + GEODEBUG( "\t\tPotentially erasing " << numToErase << " points, " << " size : " << _points.size() << " max : " << _max << " dist : " << startErase->distance() << " farthest dist : " << _farthest << " from error : " << _distError ); + startErase++; + while( numToErase > 0 && startErase->distance() <= _farthest ){ + GEODEBUG( "\t\tNot erasing point " << startErase->toString() ); + numToErase--; + startErase++; + assert( startErase != _points.end() || numToErase == 0 ); } + + if( _uniqueDocs ){ + for( Holder::iterator i = startErase; i != _points.end(); ++i ) + _seenPts.erase( i->loc() ); + } + + _points.erase( startErase, _points.end() ); + + int diff = _points.size() - prevSize; + if( diff > 0 ) _found += diff; + else _found -= -diff; + } unsigned _max; @@ -1760,17 +1743,20 @@ namespace mongo { double _distError; double _farthest; + map< DiskLoc , Holder::iterator > _seenPts; + }; class GeoSearch : public GeoHopper { public: - GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN ) - : GeoHopper( g , numWanted , startPt , filter , maxDistance, type ), + GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = false ) + : GeoHopper( g , numWanted , startPt , filter , maxDistance, type, uniqueDocs, needDistance ), _start( g->hash( startPt._x, startPt._y ) ), - _numWanted( numWanted ), - _type(type) + // TODO: Remove numWanted... + _numWanted( numWanted ), + _type(type) { assert( g->getDetails() ); @@ -1795,6 +1781,8 @@ namespace mongo { void exec() { + if( _numWanted == 0 ) return; + /* * Search algorithm * 1) use geohash prefix to find X items @@ -1805,7 +1793,7 @@ namespace mongo { #ifdef GEODEBUGGING - log() << "start near search for points near " << _near << " (max dist " << _maxDistance << ")" << endl; + log() << "start near search for " << _numWanted << " points near " << _near << " (max dist " << _maxDistance << ")" << endl; #endif @@ -1815,13 +1803,16 @@ namespace mongo { long long f = found(); assert( f <= 0x7fffffff ); fillStack( maxPointsHeuristic, _numWanted - static_cast<int>(f) , true ); + processExtraPoints(); } while( _state != DONE && _state != DONE_NEIGHBOR && found() < _numWanted && (! _prefix.constrains() || _g->sizeEdge( _prefix ) <= _scanDistance ) ); // If we couldn't scan or scanned everything, we're done - if( _state == DONE ) return; - + if( _state == DONE ){ + expandEndPoints(); + return; + } } #ifdef GEODEBUGGING @@ -1856,6 +1847,8 @@ namespace mongo { _want = Box( _near._x - farDist , _near._y - farDist , farDist * 2 ); GEODEBUGPRINT( _want.toString() ); + // log() << "Found : " << found() << " wanted : " << _numWanted << " Far distance : " << farDist << " box : " << _want << endl; + // Remember the far distance for further scans _scanDistance = farDist; @@ -1874,15 +1867,195 @@ namespace mongo { // Do regular search in the full region do { fillStack( maxPointsHeuristic ); + processExtraPoints(); } while( _state != DONE ); } - GEODEBUG( "done near search" ) + GEODEBUG( "done near search with " << _points.size() << " points " ); + + expandEndPoints(); } + void addExactPoints( const GeoPoint& pt, Holder& points, bool force ){ + int before, after; + addExactPoints( pt, points, before, after, force ); + } + + void addExactPoints( const GeoPoint& pt, Holder& points, int& before, int& after, bool force ){ + + before = 0; + after = 0; + + GEODEBUG( "Adding exact points for " << pt.toString() ); + + if( pt.isExact() ){ + if( force ) points.insert( pt ); + return; + } + + vector<BSONObj> locs; + getPointsFor( pt.key(), pt.obj(), locs, _uniqueDocs ); + + GeoPoint nearestPt( pt, -1, true ); + + for( vector<BSONObj>::iterator i = locs.begin(); i != locs.end(); i++ ){ + + Point loc( *i ); + + double d; + if( ! exactDocCheck( loc, d ) ) continue; + + if( _uniqueDocs && ( nearestPt.distance() < 0 || d < nearestPt.distance() ) ){ + nearestPt._distance = d; + nearestPt._pt = *i; + continue; + } + else if( ! _uniqueDocs ){ + GeoPoint exactPt( pt, d, true ); + exactPt._pt = *i; + GEODEBUG( "Inserting exact pt " << exactPt.toString() << " for " << pt.toString() << " exact : " << d << " is less? " << ( exactPt < pt ) << " bits : " << _g->_bits ); + points.insert( exactPt ); + exactPt < pt ? before++ : after++; + } + + } + + if( _uniqueDocs && nearestPt.distance() >= 0 ){ + GEODEBUG( "Inserting unique exact pt " << nearestPt.toString() << " for " << pt.toString() << " exact : " << nearestPt.distance() << " is less? " << ( nearestPt < pt ) << " bits : " << _g->_bits ); + points.insert( nearestPt ); + if( nearestPt < pt ) before++; + else after++; + } + + } + + // TODO: Refactor this back into holder class, allow to run periodically when we are seeing a lot of pts + void expandEndPoints( bool finish = true ){ + + processExtraPoints(); + + // All points in array *could* be in maxDistance + + // Step 1 : Trim points to max size + // TODO: This check will do little for now, but is skeleton for future work in incremental $near + // searches + if( _max > 0 ){ + + int numToErase = _points.size() - _max; + + if( numToErase > 0 ){ + + Holder tested; + + // Work backward through all points we're not sure belong in the set + Holder::iterator maybePointIt = _points.end(); + maybePointIt--; + double approxMin = maybePointIt->distance() - 2 * _distError; + + GEODEBUG( "\t\tNeed to erase " << numToErase << " max : " << _max << " min dist " << approxMin << " error : " << _distError << " starting from : " << (*maybePointIt).toString() ); + + // Insert all + int erased = 0; + while( _points.size() > 0 && ( maybePointIt->distance() >= approxMin || erased < numToErase ) ){ + + Holder::iterator current = maybePointIt--; + + addExactPoints( *current, tested, true ); + _points.erase( current ); + erased++; + + if( tested.size() ) + approxMin = tested.begin()->distance() - 2 * _distError; + + } + + GEODEBUG( "\t\tEnding search at point " << ( _points.size() == 0 ? "(beginning)" : maybePointIt->toString() ) ); + + int numToAddBack = erased - numToErase; + assert( numToAddBack >= 0 ); + + GEODEBUG( "\t\tNum tested valid : " << tested.size() << " erased : " << erased << " added back : " << numToAddBack ); + +#ifdef GEODEBUGGING + for( Holder::iterator it = tested.begin(); it != tested.end(); it++ ){ + log() << "Tested Point: " << *it << endl; + } +#endif + Holder::iterator testedIt = tested.begin(); + for( int i = 0; i < numToAddBack && testedIt != tested.end(); i++ ){ + _points.insert( *testedIt ); + testedIt++; + } + } + } + +#ifdef GEODEBUGGING + for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){ + log() << "Point: " << *it << endl; + } +#endif + // We've now trimmed first set of unneeded points + + GEODEBUG( "\t\t Start expanding, num points : " << _points.size() << " max : " << _max ); + + // Step 2: iterate through all points and add as needed + + unsigned expandedPoints = 0; + Holder::iterator it = _points.begin(); + double expandWindowEnd = -1; + while( it != _points.end() ){ + const GeoPoint& currPt = *it; + + // TODO: If one point is exact, maybe not 2 * _distError + + // See if we're in an expand window + bool inWindow = currPt.distance() <= expandWindowEnd; + // If we're not, and we're done with points, break + if( ! inWindow && expandedPoints >= _max ) break; + + bool expandApprox = ! currPt.isExact() && ( ! _uniqueDocs || ( finish && _needDistance ) || inWindow ); + + if( expandApprox ){ + + // Add new point(s) + // These will only be added in a radius of 2 * _distError around the current point, + // so should not affect previously valid points. + int before, after; + addExactPoints( currPt, _points, before, after, false ); + expandedPoints += before; + + if( _max > 0 && expandedPoints < _max ) + expandWindowEnd = currPt.distance() + 2 * _distError; + + // Iterate to the next point + Holder::iterator current = it++; + // Erase the current point + _points.erase( current ); + + } + else{ + expandedPoints++; + it++; + } + } + + GEODEBUG( "\t\tFinished expanding, num points : " << _points.size() << " max : " << _max ); + + // Finish + // TODO: Don't really need to trim? + for( ; expandedPoints > _max; expandedPoints-- ) it--; + _points.erase( it, _points.end() ); + +#ifdef GEODEBUGGING + for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){ + log() << "Point: " << *it << endl; + } +#endif + } + virtual GeoHash expandStartHash(){ return _start; } @@ -1915,7 +2088,7 @@ namespace mongo { : GeoCursorBase( s->_spec ) , _s( s ) , _cur( s->_points.begin() ) , _end( s->_points.end() ), _nscanned() { if ( _cur != _end ) { - ++_nscanned; + ++_nscanned; } } @@ -1975,8 +2148,8 @@ namespace mongo { class GeoCircleBrowse : public GeoBrowse { public: - GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center") - : GeoBrowse( g , "circle" , filter ) { + GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center", bool uniqueDocs = true ) + : GeoBrowse( g , "circle" , filter, uniqueDocs ) { uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 ); @@ -2040,19 +2213,16 @@ namespace mongo { return cur.intersects( _bBox ); } - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - - GeoHash h( node._key.firstElement(), _g->_bits ); + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { // Inexact hash distance checks. double error = 0; switch (_type) { case GEO_PLAIN: - d = _g->distance( _start , h ); + d = _startPt.distance( p ); error = _g->_error; break; case GEO_SPHERE: { - Point p( _g, h ); checkEarthBounds( p ); d = spheredist_deg( _startPt, p ); error = _g->_errorSphere; @@ -2062,40 +2232,25 @@ namespace mongo { } // If our distance is in the error bounds... - if( d >= _maxDistance - error && d <= _maxDistance + error ) { - - // Do exact check - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); - - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { - - GEODEBUG( "Inexact distance : " << d << " vs " << _maxDistance << " from " << ( *i ).toString() << " due to error " << error ); - - Point p( *i ); - // Exact distance checks. - switch (_type) { - case GEO_PLAIN: { - if( _startPt.distanceWithin( p, _maxDistance ) ) return true; - break; - } - case GEO_SPHERE: - // Ignore all locations not hashed to the key's hash, since spherical calcs are - // more expensive. - if( _g->_hash( *i ) != h ) break; - checkEarthBounds( p ); - if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true; - break; - default: assert( false ); - } + if( d >= _maxDistance - error && d <= _maxDistance + error ) return BORDER; + return d > _maxDistance ? BAD : GOOD; + } - } + virtual bool exactDocCheck( const Point& p, double& d ){ - return false; + switch (_type) { + case GEO_PLAIN: { + if( _startPt.distanceWithin( p, _maxDistance ) ) return true; + break; + } + case GEO_SPHERE: + checkEarthBounds( p ); + if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true; + break; + default: assert( false ); } - GEODEBUG( "\t " << h << "\t" << d ); - return d <= _maxDistance; + return false; } GeoDistType _type; @@ -2111,8 +2266,8 @@ namespace mongo { class GeoBoxBrowse : public GeoBrowse { public: - GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() ) - : GeoBrowse( g , "box" , filter ) { + GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj(), bool uniqueDocs = true ) + : GeoBrowse( g , "box" , filter, uniqueDocs ) { uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 ); @@ -2133,7 +2288,7 @@ namespace mongo { _fudge = _g->_error; _wantLen = _fudge + std::max( ( _want._max._x - _want._min._x ) , - ( _want._max._y - _want._min._y ) ); + ( _want._max._y - _want._min._y ) ) / 2; ok(); } @@ -2171,39 +2326,14 @@ namespace mongo { return cur.intersects( _want ); } - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - - GeoHash h( node._key.firstElement() ); - Point approxPt( _g, h ); - - bool approxInside = _want.inside( approxPt, _fudge ); + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { + if( _want.onBoundary( p, _fudge ) ) return BORDER; + else return _want.inside( p, _fudge ) ? GOOD : BAD; - if( approxInside && _want.onBoundary( approxPt, _fudge ) ) { - - // Do exact check - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); - - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { - if( _want.inside( Point( *i ) ) ) { - - GEODEBUG( "found exact point : " << _want.toString() - << " exact point : " << Point( *i ).toString() - << " approx point : " << approxPt.toString() - << " because of error: " << _fudge ); - - return true; - } - } - - return false; - } - - GEODEBUG( "checking point : " << _want.toString() - << " point: " << approxPt.toString() - << " in : " << _want.inside( approxPt, _fudge ) ); + } - return approxInside; + virtual bool exactDocCheck( const Point& p, double& d ){ + return _want.inside( p ); } Box _want; @@ -2218,7 +2348,7 @@ namespace mongo { public: GeoPolygonBrowse( const Geo2dType* g , const BSONObj& polyPoints , - BSONObj filter = BSONObj() ) : GeoBrowse( g , "polygon" , filter ) { + BSONObj filter = BSONObj(), bool uniqueDocs = true ) : GeoBrowse( g , "polygon" , filter, uniqueDocs ) { GEODEBUG( "In Polygon" ) @@ -2233,7 +2363,7 @@ namespace mongo { uassert( 14030, "polygon must be defined by three points or more", _poly.size() >= 3 ); _bounds = _poly.bounds(); - _maxDim = _bounds.maxDim(); + _maxDim = _g->_error + _bounds.maxDim() / 2; ok(); } @@ -2253,51 +2383,17 @@ namespace mongo { return cur.intersects( _bounds ); } - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - - GeoHash h( node._key.firstElement(), _g->_bits ); - Point p( _g, h ); + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { int in = _poly.contains( p, _g->_error ); - if( in != 0 ) { - - if ( in > 0 ) { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx in polygon" ); - } - else { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx not in polygon" ); - } - - if( in != 0 ) return in > 0; - } - - // Do exact check, since to approximate check was inconclusive - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); - - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { - - Point p( *i ); - // Ignore all points not hashed to the current value - // This implicitly assumes hashing is less costly than the polygon check, which - // may or may not be true. - if( _g->hash( p ) != h ) continue; + if( in == 0 ) return BORDER; + else return in > 0 ? GOOD : BAD; - // Use the point in polygon algorithm to see if the point - // is contained in the polygon. - bool in = _poly.contains( p ); - if ( in ) { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly in polygon" ); - } - else { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly not in polygon" ); - } - if( in ) return in; - - } + } - return false; + virtual bool exactDocCheck( const Point& p, double& d ){ + return _poly.contains( p ); } private: @@ -2324,7 +2420,7 @@ namespace mongo { if ( e.type() == Array ) { // If we get an array query, assume it is a location, and do a $within { $center : [[x, y], 0] } search - shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ) ) ); + shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ), "$center", true ) ); return c; } else if ( e.type() == Object ) { @@ -2364,33 +2460,44 @@ namespace mongo { if ( e.isNumber() ) maxDistance = e.numberDouble(); } - shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type ) ); + + bool uniqueDocs = false; + if( ! n["$uniqueDocs"].eoo() ) uniqueDocs = n["$uniqueDocs"].trueValue(); + + shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type, uniqueDocs ) ); s->exec(); shared_ptr<Cursor> c; c.reset( new GeoSearchCursor( s ) ); return c; } case BSONObj::opWITHIN: { + e = e.embeddedObject().firstElement(); uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() ); + + BSONObj context = e.embeddedObject(); e = e.embeddedObject().firstElement(); string type = e.fieldName(); + + bool uniqueDocs = true; + if( ! context["$uniqueDocs"].eoo() ) uniqueDocs = context["$uniqueDocs"].trueValue(); + if ( startsWith(type, "$center") ) { uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() ); - shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type) ); + shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type, uniqueDocs ) ); return c; } else if ( type == "$box" ) { uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() ); - shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) ); + shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) ); return c; } else if ( startsWith( type, "$poly" ) ) { uassert( 14029 , "$polygon has to take an object or array" , e.isABSONObj() ); - shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query ) ); + shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) ); return c; } - throw UserException( 13058 , (string)"unknown $with type: " + type ); + throw UserException( 13058 , (string)"unknown $within type: " + type ); } default: // Otherwise... assume the object defines a point, and we want to do a zero-radius $within $center @@ -2414,7 +2521,7 @@ namespace mongo { bool slaveOk() const { return true; } void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; } bool slaveOverrideOk() { return true; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails * d = nsdetails( ns.c_str() ); @@ -2450,6 +2557,12 @@ namespace mongo { assert( numWanted >= 0 ); } + bool uniqueDocs = false; + if( ! cmdObj["uniqueDocs"].eoo() ) uniqueDocs = cmdObj["uniqueDocs"].trueValue(); + + bool includeLocs = false; + if( ! cmdObj["includeLocs"].eoo() ) includeLocs = cmdObj["includeLocs"].trueValue(); + uassert(13046, "'near' param missing/invalid", !cmdObj["near"].eoo()); const Point n( cmdObj["near"] ); result.append( "near" , g->_tohash( cmdObj["near"] ).toString() ); @@ -2466,7 +2579,7 @@ namespace mongo { if ( cmdObj["spherical"].trueValue() ) type = GEO_SPHERE; - GeoSearch gs( g , n , numWanted , filter , maxDistance , type ); + GeoSearch gs( g , n , numWanted , filter , maxDistance , type, uniqueDocs, true ); if ( cmdObj["start"].type() == String) { GeoHash start ((string) cmdObj["start"].valuestr()); @@ -2486,11 +2599,12 @@ namespace mongo { for ( GeoHopper::Holder::iterator i=gs._points.begin(); i!=gs._points.end(); i++ ) { const GeoPoint& p = *i; - double dis = distanceMultiplier * p._exactDistance; + double dis = distanceMultiplier * p.distance(); totalDistance += dis; BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ) ) ); bb.append( "dis" , dis ); + if( includeLocs ) bb.append( "loc" , p._pt ); bb.append( "obj" , p._o ); bb.done(); } @@ -2516,7 +2630,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } bool slaveOk() const { return true; } bool slaveOverrideOk() { return true; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails * d = nsdetails( ns.c_str() ); @@ -2571,4 +2685,248 @@ namespace mongo { } geoWalkCmd; + struct GeoUnitTest : public UnitTest { + + int round( double d ) { + return (int)(.5+(d*1000)); + } + +#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); } + + void run() { + assert( ! GeoHash::isBitSet( 0 , 0 ) ); + assert( ! GeoHash::isBitSet( 0 , 31 ) ); + assert( GeoHash::isBitSet( 1 , 31 ) ); + + IndexSpec i( BSON( "loc" << "2d" ) ); + Geo2dType g( &geo2dplugin , &i ); + { + double x = 73.01212; + double y = 41.352964; + BSONObj in = BSON( "x" << x << "y" << y ); + GeoHash h = g._hash( in ); + BSONObj out = g._unhash( h ); + assert( round(x) == round( out["x"].number() ) ); + assert( round(y) == round( out["y"].number() ) ); + assert( round( in["x"].number() ) == round( out["x"].number() ) ); + assert( round( in["y"].number() ) == round( out["y"].number() ) ); + } + + { + double x = -73.01212; + double y = 41.352964; + BSONObj in = BSON( "x" << x << "y" << y ); + GeoHash h = g._hash( in ); + BSONObj out = g._unhash( h ); + assert( round(x) == round( out["x"].number() ) ); + assert( round(y) == round( out["y"].number() ) ); + assert( round( in["x"].number() ) == round( out["x"].number() ) ); + assert( round( in["y"].number() ) == round( out["y"].number() ) ); + } + + { + GeoHash h( "0000" ); + h.move( 0 , 1 ); + GEOHEQ( h , "0001" ); + h.move( 0 , -1 ); + GEOHEQ( h , "0000" ); + + h.init( "0001" ); + h.move( 0 , 1 ); + GEOHEQ( h , "0100" ); + h.move( 0 , -1 ); + GEOHEQ( h , "0001" ); + + + h.init( "0000" ); + h.move( 1 , 0 ); + GEOHEQ( h , "0010" ); + } + + { + Box b( 5 , 5 , 2 ); + assert( "(5,5) -->> (7,7)" == b.toString() ); + } + + { + GeoHash a = g.hash( 1 , 1 ); + GeoHash b = g.hash( 4 , 5 ); + assert( 5 == (int)(g.distance( a , b ) ) ); + a = g.hash( 50 , 50 ); + b = g.hash( 42 , 44 ); + assert( round(10) == round(g.distance( a , b )) ); + } + + { + GeoHash x("0000"); + assert( 0 == x.getHash() ); + x.init( 0 , 1 , 32 ); + GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" ) + + assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) ); + assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) ); + } + + { + GeoHash x("1010"); + GEOHEQ( x , "1010" ); + GeoHash y = x + "01"; + GEOHEQ( y , "101001" ); + } + + { + + GeoHash a = g.hash( 5 , 5 ); + GeoHash b = g.hash( 5 , 7 ); + GeoHash c = g.hash( 100 , 100 ); + /* + cout << "a: " << a << endl; + cout << "b: " << b << endl; + cout << "c: " << c << endl; + + cout << "a: " << a.toStringHex1() << endl; + cout << "b: " << b.toStringHex1() << endl; + cout << "c: " << c.toStringHex1() << endl; + */ + BSONObj oa = a.wrap(); + BSONObj ob = b.wrap(); + BSONObj oc = c.wrap(); + /* + cout << "a: " << oa.hexDump() << endl; + cout << "b: " << ob.hexDump() << endl; + cout << "c: " << oc.hexDump() << endl; + */ + assert( oa.woCompare( ob ) < 0 ); + assert( oa.woCompare( oc ) < 0 ); + + } + + { + GeoHash x( "000000" ); + x.move( -1 , 0 ); + GEOHEQ( x , "101010" ); + x.move( 1 , -1 ); + GEOHEQ( x , "010101" ); + x.move( 0 , 1 ); + GEOHEQ( x , "000000" ); + } + + { + GeoHash prefix( "110011000000" ); + GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" ); + assert( ! entry.hasPrefix( prefix ) ); + + entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000"); + assert( entry.toString().find( prefix.toString() ) == 0 ); + assert( entry.hasPrefix( GeoHash( "1100" ) ) ); + assert( entry.hasPrefix( prefix ) ); + } + + { + GeoHash a = g.hash( 50 , 50 ); + GeoHash b = g.hash( 48 , 54 ); + assert( round( 4.47214 ) == round( g.distance( a , b ) ) ); + } + + + { + Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) ); + assert( b.inside( 29.763 , -95.363 ) ); + assert( ! b.inside( 32.9570255 , -96.1082497 ) ); + assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) ); + } + + { + GeoHash a( "11001111" ); + assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) ); + assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) ); + } + + { + int N = 10000; + { + Timer t; + for ( int i=0; i<N; i++ ) { + unsigned x = (unsigned)rand(); + unsigned y = (unsigned)rand(); + GeoHash h( x , y ); + unsigned a,b; + h.unhash_slow( a,b ); + assert( a == x ); + assert( b == y ); + } + //cout << "slow: " << t.millis() << endl; + } + + { + Timer t; + for ( int i=0; i<N; i++ ) { + unsigned x = (unsigned)rand(); + unsigned y = (unsigned)rand(); + GeoHash h( x , y ); + unsigned a,b; + h.unhash_fast( a,b ); + assert( a == x ); + assert( b == y ); + } + //cout << "fast: " << t.millis() << endl; + } + + } + + { + // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example + + { + Point BNA (-86.67, 36.12); + Point LAX (-118.40, 33.94); + + double dist1 = spheredist_deg(BNA, LAX); + double dist2 = spheredist_deg(LAX, BNA); + + // target is 0.45306 + assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); + assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); + } + { + Point BNA (-1.5127, 0.6304); + Point LAX (-2.0665, 0.5924); + + double dist1 = spheredist_rad(BNA, LAX); + double dist2 = spheredist_rad(LAX, BNA); + + // target is 0.45306 + assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); + assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); + } + { + Point JFK (-73.77694444, 40.63861111 ); + Point LAX (-118.40, 33.94); + + double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES; + assert( dist > 2469 && dist < 2470 ); + } + + { + Point BNA (-86.67, 36.12); + Point LAX (-118.40, 33.94); + Point JFK (-73.77694444, 40.63861111 ); + assert( spheredist_deg(BNA, BNA) < 1e-6); + assert( spheredist_deg(LAX, LAX) < 1e-6); + assert( spheredist_deg(JFK, JFK) < 1e-6); + + Point zero (0, 0); + Point antizero (0,-180); + + // these were known to cause NaN + assert( spheredist_deg(zero, zero) < 1e-6); + assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6); + assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6); + } + } + } + } geoUnitTest; + + } + diff --git a/db/geo/core.h b/db/geo/core.h index 74f4b6e8269..b77997844f2 100644 --- a/db/geo/core.h +++ b/db/geo/core.h @@ -278,14 +278,19 @@ namespace mongo { return *this; } - bool operator==(const GeoHash& h ) { + bool operator==(const GeoHash& h ) const { return _hash == h._hash && _bits == h._bits; } - bool operator!=(const GeoHash& h ) { + bool operator!=(const GeoHash& h ) const { return !( *this == h ); } + bool operator<(const GeoHash& h ) const { + if( _hash != h._hash ) return _hash < h._hash; + return _bits < h._bits; + } + GeoHash& operator+=( const char * s ) { unsigned pos = _bits * 2; _bits += strlen(s) / 2; diff --git a/db/geo/haystack.cpp b/db/geo/haystack.cpp index fd6b2392d6a..a5dd478f625 100644 --- a/db/geo/haystack.cpp +++ b/db/geo/haystack.cpp @@ -264,7 +264,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } bool slaveOk() const { return true; } bool slaveOverrideOk() const { return true; } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); diff --git a/db/index.cpp b/db/index.cpp index 8aebef45e8e..67a0d44e444 100644 --- a/db/index.cpp +++ b/db/index.cpp @@ -27,11 +27,6 @@ namespace mongo { - /** old (<= v1.8) : 0 - 1 is new version - */ - const int DefaultIndexVersionNumber = 1; - template< class V > class IndexInterfaceImpl : public IndexInterface { public: diff --git a/db/index.h b/db/index.h index debe2aa9c26..54b06394435 100644 --- a/db/index.h +++ b/db/index.h @@ -150,14 +150,18 @@ namespace mongo { return io.getStringField("ns"); } - int version() const { - BSONElement e = info.obj()["v"]; + static int versionForIndexObj( const BSONObj &obj ) { + BSONElement e = obj["v"]; if( e.type() == NumberInt ) return e._numberInt(); // should normally be an int. this is for backward compatibility int v = e.numberInt(); uassert(14802, "index v field should be Integer type", v == 0); - return v; + return v; + } + + int version() const { + return versionForIndexObj( info.obj() ); } /** @return true if index has unique constraint */ diff --git a/db/indexkey.cpp b/db/indexkey.cpp index cc2cd43daf5..6d6fcc58cae 100644 --- a/db/indexkey.cpp +++ b/db/indexkey.cpp @@ -22,9 +22,15 @@ #include "btree.h" #include "ops/query.h" #include "background.h" +#include "../util/text.h" namespace mongo { + /** old (<= v1.8) : 0 + 1 is new version + */ + const int DefaultIndexVersionNumber = 1; + map<string,IndexPlugin*> * IndexPlugin::_plugins; IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec ) @@ -100,6 +106,14 @@ namespace mongo { } { + // _undefinedElt + BSONObjBuilder b; + b.appendUndefined( "" ); + _undefinedObj = b.obj(); + _undefinedElt = _undefinedObj.firstElement(); + } + + { // handle plugins string pluginName = IndexPlugin::findPluginName( keyPattern ); if ( pluginName.size() ) { @@ -116,131 +130,289 @@ namespace mongo { _finishedInit = true; } - - void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const { - if ( _indexType.get() ) { //plugin (eg geo) - _indexType->getKeys( obj , keys ); - return; - } - vector<const char*> fieldNames( _fieldNames ); - vector<BSONElement> fixed( _fixed ); - _getKeys( fieldNames , fixed , obj, keys ); - if ( keys.empty() && ! _sparse ) - keys.insert( _nullKey ); + void assertParallelArrays( const char *first, const char *second ) { + stringstream ss; + ss << "cannot index parallel arrays [" << first << "] [" << second << "]"; + uasserted( 10088 , ss.str() ); } - - void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const { - BSONElement arrElt; - unsigned arrIdx = ~0; - int numNotFound = 0; - - for( unsigned i = 0; i < fieldNames.size(); ++i ) { - if ( *fieldNames[ i ] == '\0' ) - continue; - - BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); - - if ( e.eoo() ) { - e = _nullElt; // no matching field - numNotFound++; + + class KeyGeneratorV0 { + public: + KeyGeneratorV0( const IndexSpec &spec ) : _spec( spec ) {} + + void getKeys( const BSONObj &obj, BSONObjSet &keys ) const { + if ( _spec._indexType.get() ) { //plugin (eg geo) + _spec._indexType->getKeys( obj , keys ); + return; } - - if ( e.type() != Array ) - fieldNames[ i ] = ""; // no matching field or non-array match - - if ( *fieldNames[ i ] == '\0' ) - fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) - - if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here - arrIdx = i; - arrElt = e; + vector<const char*> fieldNames( _spec._fieldNames ); + vector<BSONElement> fixed( _spec._fixed ); + _getKeys( fieldNames , fixed , obj, keys ); + if ( keys.empty() && ! _spec._sparse ) + keys.insert( _spec._nullKey ); + } + + private: + void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const { + BSONElement arrElt; + unsigned arrIdx = ~0; + int numNotFound = 0; + + for( unsigned i = 0; i < fieldNames.size(); ++i ) { + if ( *fieldNames[ i ] == '\0' ) + continue; + + BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); + + if ( e.eoo() ) { + e = _spec._nullElt; // no matching field + numNotFound++; + } + + if ( e.type() != Array ) + fieldNames[ i ] = ""; // no matching field or non-array match + + if ( *fieldNames[ i ] == '\0' ) + fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) + + if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here + arrIdx = i; + arrElt = e; + } + + // enforce single array path here + if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) { + assertParallelArrays( e.fieldName(), arrElt.fieldName() ); + } } - - // enforce single array path here - if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) { - stringstream ss; - ss << "cannot index parallel arrays [" << e.fieldName() << "] [" << arrElt.fieldName() << "]"; - uasserted( 10088 , ss.str() ); + + bool allFound = true; // have we found elements for all field names in the key spec? + for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) { + if ( **i != '\0' ) { + allFound = false; + break; + } } - } - - bool allFound = true; // have we found elements for all field names in the key spec? - for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) { - if ( **i != '\0' ) { - allFound = false; - break; + + if ( _spec._sparse && numNotFound == _spec._nFields ) { + // we didn't find any fields + // so we're not going to index this document + return; } - } - - if ( _sparse && numNotFound == _nFields ) { - // we didn't find any fields - // so we're not going to index this document - return; - } - - bool insertArrayNull = false; - - if ( allFound ) { - if ( arrElt.eoo() ) { - // no terminal array element to expand - BSONObjBuilder b(_sizeTracker); - for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) - b.appendAs( *i, "" ); - keys.insert( b.obj() ); + + bool insertArrayNull = false; + + if ( allFound ) { + if ( arrElt.eoo() ) { + // no terminal array element to expand + BSONObjBuilder b(_spec._sizeTracker); + for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) + b.appendAs( *i, "" ); + keys.insert( b.obj() ); + } + else { + // terminal array element to expand, so generate all keys + BSONObjIterator i( arrElt.embeddedObject() ); + if ( i.more() ) { + while( i.more() ) { + BSONObjBuilder b(_spec._sizeTracker); + for( unsigned j = 0; j < fixed.size(); ++j ) { + if ( j == arrIdx ) + b.appendAs( i.next(), "" ); + else + b.appendAs( fixed[ j ], "" ); + } + keys.insert( b.obj() ); + } + } + else if ( fixed.size() > 1 ) { + insertArrayNull = true; + } + } } else { - // terminal array element to expand, so generate all keys + // nonterminal array element to expand, so recurse + assert( !arrElt.eoo() ); BSONObjIterator i( arrElt.embeddedObject() ); if ( i.more() ) { while( i.more() ) { - BSONObjBuilder b(_sizeTracker); - for( unsigned j = 0; j < fixed.size(); ++j ) { - if ( j == arrIdx ) - b.appendAs( i.next(), "" ); - else - b.appendAs( fixed[ j ], "" ); + BSONElement e = i.next(); + if ( e.type() == Object ) { + _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); } - keys.insert( b.obj() ); } } - else if ( fixed.size() > 1 ) { + else { insertArrayNull = true; } } - } - else { - // nonterminal array element to expand, so recurse - assert( !arrElt.eoo() ); - BSONObjIterator i( arrElt.embeddedObject() ); - if ( i.more() ) { - while( i.more() ) { - BSONElement e = i.next(); - if ( e.type() == Object ) { - _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); + + if ( insertArrayNull ) { + // x : [] - need to insert undefined + BSONObjBuilder b(_spec._sizeTracker); + for( unsigned j = 0; j < fixed.size(); ++j ) { + if ( j == arrIdx ) { + b.appendUndefined( "" ); + } + else { + BSONElement e = fixed[j]; + if ( e.eoo() ) + b.appendNull( "" ); + else + b.appendAs( e , "" ); } } + keys.insert( b.obj() ); } - else { - insertArrayNull = true; + } + + const IndexSpec &_spec; + }; + + class KeyGeneratorV1 { + public: + KeyGeneratorV1( const IndexSpec &spec ) : _spec( spec ) {} + + void getKeys( const BSONObj &obj, BSONObjSet &keys ) const { + if ( _spec._indexType.get() ) { //plugin (eg geo) + _spec._indexType->getKeys( obj , keys ); + return; + } + vector<const char*> fieldNames( _spec._fieldNames ); + vector<BSONElement> fixed( _spec._fixed ); + _getKeys( fieldNames , fixed , obj, keys ); + if ( keys.empty() && ! _spec._sparse ) + keys.insert( _spec._nullKey ); + } + + private: + /** + * @param arrayNestedArray - set if the returned element is an array nested directly within arr. + */ + BSONElement extractNextElement( const BSONObj &obj, const BSONObj &arr, const char *&field, bool &arrayNestedArray ) const { + string firstField = mongoutils::str::before( field, '.' ); + bool haveObjField = !obj.getField( firstField ).eoo(); + BSONElement arrField = arr.getField( firstField ); + bool haveArrField = !arrField.eoo(); + + // An index component field name cannot exist in both a document array and one of that array's children. + uassert( 15855 , "Parallel references while expanding indexed field in array", !haveObjField || !haveArrField ); + + arrayNestedArray = false; + if ( haveObjField ) { + return obj.getFieldDottedOrArray( field ); + } + else if ( haveArrField ) { + if ( arrField.type() == Array ) { + arrayNestedArray = true; + } + return arr.getFieldDottedOrArray( field ); } + return BSONElement(); } - - if ( insertArrayNull ) { - // x : [] - need to insert undefined - BSONObjBuilder b(_sizeTracker); - for( unsigned j = 0; j < fixed.size(); ++j ) { - if ( j == arrIdx ) { - b.appendUndefined( "" ); + + void _getKeysArrEltFixed( vector<const char*> &fieldNames , vector<BSONElement> &fixed , const BSONElement &arrEntry, BSONObjSet &keys, int numNotFound, const BSONElement &arrObjElt, const set< unsigned > &arrIdxs, bool mayExpandArrayUnembedded ) const { + // set up any terminal array values + for( set<unsigned>::const_iterator j = arrIdxs.begin(); j != arrIdxs.end(); ++j ) { + if ( *fieldNames[ *j ] == '\0' ) { + fixed[ *j ] = mayExpandArrayUnembedded ? arrEntry : arrObjElt; + } + } + // recurse + _getKeys( fieldNames, fixed, ( arrEntry.type() == Object ) ? arrEntry.embeddedObject() : BSONObj(), keys, numNotFound, arrObjElt.embeddedObject() ); + } + + /** + * @param fieldNames - fields to index, may be postfixes in recursive calls + * @param fixed - values that have already been identified for their index fields + * @param obj - object from which keys should be extracted, based on names in fieldNames + * @param keys - set where index keys are written + * @param numNotFound - number of index fields that have already been identified as missing + * @param array - array from which keys should be extracted, based on names in fieldNames + * If obj and array are both nonempty, obj will be one of the elements of array. + */ + void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const { + BSONElement arrElt; + set<unsigned> arrIdxs; + bool mayExpandArrayUnembedded = true; + for( unsigned i = 0; i < fieldNames.size(); ++i ) { + if ( *fieldNames[ i ] == '\0' ) { + continue; + } + + bool arrayNestedArray; + // Extract element matching fieldName[ i ] from object xor array. + BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray ); + + if ( e.eoo() ) { + // if field not present, set to null + fixed[ i ] = _spec._nullElt; + // done expanding this field name + fieldNames[ i ] = ""; + numNotFound++; + } + else if ( e.type() == Array ) { + arrIdxs.insert( i ); + if ( arrElt.eoo() ) { + // we only expand arrays on a single path -- track the path here + arrElt = e; + } + else if ( e.rawdata() != arrElt.rawdata() ) { + // enforce single array path here + assertParallelArrays( e.fieldName(), arrElt.fieldName() ); + } + if ( arrayNestedArray ) { + mayExpandArrayUnembedded = false; + } } else { - BSONElement e = fixed[j]; - if ( e.eoo() ) - b.appendNull( "" ); - else - b.appendAs( e , "" ); + // not an array - no need for further expansion + fixed[ i ] = e; + } + } + + if ( arrElt.eoo() ) { + // No array, so generate a single key. + if ( _spec._sparse && numNotFound == _spec._nFields ) { + return; + } + BSONObjBuilder b(_spec._sizeTracker); + for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) { + b.appendAs( *i, "" ); + } + keys.insert( b.obj() ); + } + else if ( arrElt.embeddedObject().firstElement().eoo() ) { + // Empty array, so set matching fields to undefined. + _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true ); + } + else { + // Non empty array that can be expanded, so generate a key for each member. + BSONObj arrObj = arrElt.embeddedObject(); + BSONObjIterator i( arrObj ); + while( i.more() ) { + _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded ); } } - keys.insert( b.obj() ); + } + + const IndexSpec &_spec; + }; + + void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const { + switch( indexVersion() ) { + case 0: { + KeyGeneratorV0 g( *this ); + g.getKeys( obj, keys ); + break; + } + case 1: { + KeyGeneratorV1 g( *this ); + g.getKeys( obj, keys ); + break; + } + default: + massert( 15869, "Invalid index version for key generation.", false ); } } @@ -275,6 +447,13 @@ namespace mongo { IndexSuitability IndexType::suitability( const BSONObj& query , const BSONObj& order ) const { return _spec->_suitability( query , order ); } + + int IndexSpec::indexVersion() const { + if ( !info.hasField( "v" ) ) { + return DefaultIndexVersionNumber; + } + return IndexDetails::versionForIndexObj( info ); + } bool IndexType::scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const { return ! order.isEmpty(); diff --git a/db/indexkey.h b/db/indexkey.h index 4a755f8a4e8..c04cd6396f6 100644 --- a/db/indexkey.h +++ b/db/indexkey.h @@ -25,6 +25,8 @@ namespace mongo { + extern const int DefaultIndexVersionNumber; + class Cursor; class IndexSpec; class IndexType; // TODO: this name sucks @@ -161,16 +163,21 @@ namespace mongo { protected: + int indexVersion() const; + IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ; - void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const; - BSONSizeTracker _sizeTracker; vector<const char*> _fieldNames; vector<BSONElement> _fixed; + BSONObj _nullKey; // a full key with all fields null BSONObj _nullObj; // only used for _nullElt BSONElement _nullElt; // jstNull + + BSONObj _undefinedObj; // only used for _undefinedElt + BSONElement _undefinedElt; // undefined + int _nFields; // number of fields in the index bool _sparse; // if the index is sparse shared_ptr<IndexType> _indexType; @@ -179,6 +186,8 @@ namespace mongo { void _init(); friend class IndexType; + friend class KeyGeneratorV0; + friend class KeyGeneratorV1; public: bool _finishedInit; }; diff --git a/db/instance.cpp b/db/instance.cpp index ede433d652b..971cd2e7b38 100644 --- a/db/instance.cpp +++ b/db/instance.cpp @@ -587,7 +587,7 @@ namespace mongo { } NOINLINE_DECL void insertMulti(DbMessage& d, const char *ns, const BSONObj& _js) { - const bool keepGoing = d.reservedField() & InsertOption_KeepGoing; + const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; int n = 0; BSONObj js(_js); while( 1 ) { diff --git a/db/instance.h b/db/instance.h index 2b86eb44fce..422c77d5ffa 100644 --- a/db/instance.h +++ b/db/instance.h @@ -147,6 +147,8 @@ namespace mongo { virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; } double getSoTimeout() const { return 0; } + + virtual bool lazySupported() const { return true; } private: static HostAndPort _clientHost; }; diff --git a/db/introspect.cpp b/db/introspect.cpp index ca65710b3fc..7e1d19ce2f3 100644 --- a/db/introspect.cpp +++ b/db/introspect.cpp @@ -40,7 +40,7 @@ namespace mongo { profileBufBuilder.reset(); BSONObjBuilder b(profileBufBuilder); b.appendDate("ts", jsTime()); - currentOp.debug().append( b ); + currentOp.debug().append( currentOp , b ); b.append("client", c.clientAddress() ); @@ -49,6 +49,26 @@ namespace mongo { BSONObj p = b.done(); + if (p.objsize() > 100*1024){ + string small = p.toString(/*isArray*/false, /*full*/false); + + warning() << "can't add full line to system.profile: " << small; + + // rebuild with limited info + BSONObjBuilder b(profileBufBuilder); + b.appendDate("ts", jsTime()); + b.append("client", c.clientAddress() ); + if ( c.getAuthenticationInfo() ) + b.append( "user" , c.getAuthenticationInfo()->getUser( nsToDatabase( ns ) ) ); + + b.append("err", "profile line too large (max is 100KB)"); + if (small.size() < 100*1024){ // should be much smaller but if not don't break anything + b.append("abbreviated", small); + } + + p = b.done(); + } + // write: not replicated NamespaceDetails *d = db->namespaceIndex.details(ns); if( d ) { diff --git a/db/jsobj.cpp b/db/jsobj.cpp index 53c2329bd35..dcb77447873 100644 --- a/db/jsobj.cpp +++ b/db/jsobj.cpp @@ -45,7 +45,7 @@ BOOST_STATIC_ASSERT( sizeof(mongo::OID) == 12 ); namespace mongo { - BSONElement nullElement; + BSONElement eooElement; GENOIDLabeler GENOID; @@ -508,6 +508,12 @@ namespace mongo { } BSONObj staticNull = fromjson( "{'':null}" ); + BSONObj makeUndefined() { + BSONObjBuilder b; + b.appendUndefined( "" ); + return b.obj(); + } + BSONObj staticUndefined = makeUndefined(); /* well ordered compare */ int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const { @@ -613,13 +619,13 @@ namespace mongo { } if ( sub.eoo() ) - return nullElement; - else if ( sub.type() == Array || name[0] == '\0') + return eooElement; + else if ( sub.type() == Array || name[0] == '\0' ) return sub; else if ( sub.type() == Object ) return sub.embeddedObject().getFieldDottedOrArray( name ); else - return nullElement; + return eooElement; } /** @@ -919,7 +925,7 @@ namespace mongo { c.appendRegex("x", "goo"); BSONObj p = c.done(); - assert( !o.shallowEqual( p ) ); + assert( !o.binaryEqual( p ) ); assert( o.woCompare( p ) < 0 ); } @@ -1024,7 +1030,7 @@ namespace mongo { BSONObj a = A.done(); BSONObj b = B.done(); BSONObj c = C.done(); - assert( !a.shallowEqual( b ) ); // comments on operator== + assert( !a.binaryEqual( b ) ); // comments on operator== int cmp = a.woCompare(b); assert( cmp == 0 ); cmp = a.woCompare(c); @@ -1167,13 +1173,9 @@ namespace mongo { while (l.more() && r.more()){ if (strcmp(l.next().fieldName(), r.next().fieldName())) { - PRINTFL; return false; } } - PRINT(l.more()); - PRINT(r.more()); - PRINT(l.more() || r.more()); return !(l.more() || r.more()); // false if lhs and rhs have diff nFields() } diff --git a/db/key.cpp b/db/key.cpp index ddc2d593350..648502ebf17 100644 --- a/db/key.cpp +++ b/db/key.cpp @@ -264,15 +264,17 @@ namespace mongo { if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) { int len; const char * d = e.binData(len); - int code = BinDataLengthToCode[len]; - if( code >= 0 ) { - if( t >= 128 ) - t = (t-128) | 0x08; - dassert( (code&t) == 0 ); - b.appendUChar( cbindata|bits ); - b.appendUChar( code | t ); - b.appendBuf(d, len); - break; + if( len <= BinDataLenMax ) { + int code = BinDataLengthToCode[len]; + if( code >= 0 ) { + if( t >= 128 ) + t = (t-128) | 0x08; + dassert( (code&t) == 0 ); + b.appendUChar( cbindata|bits ); + b.appendUChar( code | t ); + b.appendBuf(d, len); + break; + } } } traditional(obj); diff --git a/db/matcher.cpp b/db/matcher.cpp index 23d5a7057bf..2b92d5797c3 100644 --- a/db/matcher.cpp +++ b/db/matcher.cpp @@ -64,8 +64,14 @@ namespace mongo { } ~Where() { - if ( scope.get() ) - scope->execSetup( "_mongo.readOnly = false;" , "make not read only" ); + if ( scope.get() ){ + try { + scope->execSetup( "_mongo.readOnly = false;" , "make not read only" ); + } + catch( DBException& e ){ + warning() << "javascript scope cleanup interrupted" << causedBy( e ) << endl; + } + } if ( jsScope ) { delete jsScope; @@ -148,6 +154,9 @@ namespace mongo { rm._prefix = prefix; } else { + uassert( 15882, "$elemMatch not allowed within $in", + ie.type() != Object || + ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH ); _myset->insert(ie); } } diff --git a/db/modules/mms.cpp b/db/modules/mms.cpp index 28fc225477f..40abb391dfb 100644 --- a/db/modules/mms.cpp +++ b/db/modules/mms.cpp @@ -142,7 +142,7 @@ namespace mongo { string errmsg; BSONObjBuilder sub; - if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) ) + if ( ! c->run( "admin.$cmd" , co , 0 , errmsg , sub , false ) ) postData.append( cmd , errmsg ); else postData.append( cmd , sub.obj() ); diff --git a/db/mongommf.h b/db/mongommf.h index b347e4ff259..0c4e8e4a19d 100644 --- a/db/mongommf.h +++ b/db/mongommf.h @@ -75,7 +75,7 @@ namespace mongo { fileSuffixNo() is 3 if the suffix is "ns", fileSuffixNo -1 */ - RelativePath relativePath() const { + const RelativePath& relativePath() const { DEV assert( !_p._p.empty() ); return _p; } diff --git a/db/namespace.cpp b/db/namespace.cpp index 927f56b6e7b..2bc7409e56c 100644 --- a/db/namespace.cpp +++ b/db/namespace.cpp @@ -604,6 +604,17 @@ namespace mongo { } } + void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) { + assertInWriteLock(); + vector< string > found; + for( ouriter i = _map.begin(); i != _map.end(); ++i ) + if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) + found.push_back( i->first ); + for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { + _map.erase(*i); + } + } + void NamespaceDetailsTransient::computeIndexKeys() { _keysComputed = true; _indexKeys.clear(); @@ -657,7 +668,7 @@ namespace mongo { // index details across commands are in cursors and nsd // transient (including query cache) so clear these. ClientCursor::invalidate( from ); - NamespaceDetailsTransient::clearForPrefix( from ); + NamespaceDetailsTransient::eraseForPrefix( from ); NamespaceDetails *details = ni->details( from ); ni->add_ns( to, *details ); diff --git a/db/namespace.h b/db/namespace.h index a1b7c2274bc..3dfb3f33767 100644 --- a/db/namespace.h +++ b/db/namespace.h @@ -454,6 +454,7 @@ namespace mongo { Can be useful as index namespaces share the same start as the regular collection. SLOW - sequential scan of all NamespaceDetailsTransient objects */ static void clearForPrefix(const char *prefix); + static void eraseForPrefix(const char *prefix); /** * @return a cursor interface to the query optimizer. The implementation may diff --git a/db/oplog.cpp b/db/oplog.cpp index 7286fd9053c..dc9db76d9d5 100644 --- a/db/oplog.cpp +++ b/db/oplog.cpp @@ -473,9 +473,9 @@ namespace mongo { return _qp.nsd()->capFirstNewRecord; } - void assertExtentNonempty( const Extent *e ) { + void wassertExtentNonempty( const Extent *e ) { // TODO ensure this requirement is clearly enforced, or fix. - massert( 14834, "empty extent found during finding start scan", !e->firstRecord.isNull() ); + wassert( !e->firstRecord.isNull() ); } DiskLoc FindingStartCursor::prevExtentFirstLoc( const DiskLoc &rec ) { @@ -488,14 +488,14 @@ namespace mongo { e = e->xprev.ext(); } if ( e->myLoc != _qp.nsd()->capExtent ) { - assertExtentNonempty( e ); + wassertExtentNonempty( e ); return e->firstRecord; } } else { if ( !e->xprev.isNull() ) { e = e->xprev.ext(); - assertExtentNonempty( e ); + wassertExtentNonempty( e ); return e->firstRecord; } } @@ -506,20 +506,30 @@ namespace mongo { shared_ptr<Cursor> c = _qp.newCursor( startLoc ); _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns()) ); } + + bool FindingStartCursor::firstDocMatchesOrEmpty() const { + shared_ptr<Cursor> c = _qp.newCursor(); + return !c->ok() || _matcher->matchesCurrent( c.get() ); + } void FindingStartCursor::init() { - // Use a ClientCursor here so we can release db mutex while scanning - // oplog (can take quite a while with large oplogs). - shared_ptr<Cursor> c = _qp.newReverseCursor(); - _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) ); - _findingStartTimer.reset(); - _findingStartMode = Initial; BSONElement tsElt = _qp.originalQuery()[ "ts" ]; massert( 13044, "no ts field in query", !tsElt.eoo() ); BSONObjBuilder b; b.append( tsElt ); BSONObj tsQuery = b.obj(); _matcher.reset(new CoveredIndexMatcher(tsQuery, _qp.indexKey())); + if ( firstDocMatchesOrEmpty() ) { + _c = _qp.newCursor(); + _findingStart = false; + return; + } + // Use a ClientCursor here so we can release db mutex while scanning + // oplog (can take quite a while with large oplogs). + shared_ptr<Cursor> c = _qp.newReverseCursor(); + _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) ); + _findingStartTimer.reset(); + _findingStartMode = Initial; } // ------------------------------------- @@ -704,7 +714,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "internal (sharding)\n{ applyOps : [ ] , preCondition : [ { ns : ... , q : ... , res : ... } ] }"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.firstElement().type() != Array ) { errmsg = "ops has to be an array"; diff --git a/db/oplog.h b/db/oplog.h index f87a1c85e04..79fb01b0a4d 100644 --- a/db/oplog.h +++ b/db/oplog.h @@ -118,6 +118,7 @@ namespace mongo { _findingStartCursor.reset( 0 ); } void init(); + bool firstDocMatchesOrEmpty() const; }; void pretouchOperation(const BSONObj& op); diff --git a/db/ops/query.cpp b/db/ops/query.cpp index 120382fa7d8..f13b6e5ea4b 100644 --- a/db/ops/query.cpp +++ b/db/ops/query.cpp @@ -36,6 +36,7 @@ #include "../lasterror.h" #include "../../s/d_logic.h" #include "../repl_block.h" +#include "../../server.h" namespace mongo { @@ -92,21 +93,15 @@ namespace mongo { ClientCursor::Pointer p(cursorid); ClientCursor *cc = p.c(); - int bufSize = 512; - if ( cc ) { - bufSize += sizeof( QueryResult ); - bufSize += MaxBytesToReturnToClientAtOnce; - } + int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); - b.skip(sizeof(QueryResult)); - int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; - if ( !cc ) { + if ( unlikely(!cc) ) { log() << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; @@ -420,6 +415,8 @@ namespace mongo { *_b << "indexBounds" << c->prettyIndexBounds(); + c->explainDetails( *_b ); + if ( !hint ) { *_b << "allPlans" << _a->arr(); } @@ -899,9 +896,6 @@ namespace mongo { if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { - //NamespaceDetails* d = nsdetails(ns); - //uassert(14820, "capped collections have no _id index by default, can only query by _id if one added", d == NULL || d->haveIdIndex() ); - bool nsFound = false; bool indexFound = false; diff --git a/db/ops/update.cpp b/db/ops/update.cpp index 3221fe0f277..d70048d2cc2 100644 --- a/db/ops/update.cpp +++ b/db/ops/update.cpp @@ -1060,11 +1060,10 @@ namespace mongo { debug.updateobj = updateobj; - /* idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case */ - /* NOTE: when yield() is added herein, these must be refreshed after each call to yield! */ + // idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case + // The pointers may be left invalid on a failed or terminal yield recovery. NamespaceDetails *d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns); - /* end note */ auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; @@ -1105,6 +1104,9 @@ namespace mongo { shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) ); shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) ); + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; @@ -1114,20 +1116,28 @@ namespace mongo { bool atomic = c->matcher()->docMatcher().atomic(); - // ***************** - if ( cc.get() == 0 ) { - shared_ptr< Cursor > cPtr = c; - cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); - } - - if ( ! cc->yieldSometimes( ClientCursor::WillNeed ) ) { - cc.release(); - break; - } - if ( !c->ok() ) { - break; + if ( !atomic ) { + // ***************** + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + + bool didYield; + if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { + cc.release(); + break; + } + if ( !c->ok() ) { + break; + } + + if ( didYield ) { + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + } + // ***************** } - // ***************** // May have already matched in UpdateOp, but do again to get details set correctly if ( ! c->matcher()->matchesCurrent( c.get(), &details ) ) { @@ -1146,6 +1156,8 @@ namespace mongo { if ( !c->ok() ) { break; } + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); } continue; } @@ -1276,10 +1288,11 @@ namespace mongo { if ( !c->ok() ) { break; } + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); } - if (atomic) - getDur().commitIfNeeded(); + getDur().commitIfNeeded(); continue; } diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 0b7a5b0830d..0569ba6868e 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -869,6 +869,7 @@ namespace mongo { result.append("ns", name.c_str()); ClientCursor::invalidate(name.c_str()); Top::global.collectionDropped( name ); + NamespaceDetailsTransient::eraseForPrefix( name.c_str() ); dropNS(name); } @@ -967,7 +968,7 @@ namespace mongo { } } - void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn) { + void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn, bool doLog ) { dassert( todelete == dl.rec() ); NamespaceDetails* d = nsdetails(ns); @@ -976,6 +977,14 @@ namespace mongo { uassert( 10089 , "can't remove from a capped collection" , 0 ); return; } + + BSONObj toDelete; + if ( doLog ) { + BSONElement e = dl.obj()["_id"]; + if ( e.type() ) { + toDelete = e.wrap(); + } + } /* check if any cursors point to us. if so, advance them. */ ClientCursor::aboutToDelete(dl); @@ -984,6 +993,10 @@ namespace mongo { _deleteRecord(d, ns, todelete, dl); NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp(); + + if ( ! toDelete.isEmpty() ) { + logOp( "d" , ns , toDelete ); + } } @@ -1181,7 +1194,13 @@ namespace mongo { BSONObjExternalSorter::Data d = i->next(); try { - btBuilder.addKey(d.first, d.second); + if ( !dupsAllowed && dropDups ) { + LastError::Disabled led( lastError.get() ); + btBuilder.addKey(d.first, d.second); + } + else { + btBuilder.addKey(d.first, d.second); + } } catch( AssertionException& e ) { if ( dupsAllowed ) { @@ -1189,8 +1208,9 @@ namespace mongo { throw; } - if( e.interrupted() ) - throw; + if( e.interrupted() ) { + killCurrentOp.checkForInterrupt(); + } if ( ! dropDups ) throw; @@ -1276,7 +1296,7 @@ namespace mongo { log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; for( list<DiskLoc>::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ ){ - theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true ); + theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true , true ); getDur().commitIfNeeded(); } @@ -1302,18 +1322,27 @@ namespace mongo { while ( cc->ok() ) { BSONObj js = cc->current(); try { - _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed); + { + if ( !dupsAllowed && dropDups ) { + LastError::Disabled led( lastError.get() ); + _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed); + } + else { + _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed); + } + } cc->advance(); } catch( AssertionException& e ) { - if( e.interrupted() ) - throw; + if( e.interrupted() ) { + killCurrentOp.checkForInterrupt(); + } if ( dropDups ) { DiskLoc toDelete = cc->currLoc(); bool ok = cc->advance(); cc->updateLocation(); - theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true ); + theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true , true ); if( ClientCursor::find(id, false) == 0 ) { cc.release(); if( !ok ) { diff --git a/db/pdfile.h b/db/pdfile.h index 0f45e6d337e..64dba68ca41 100644 --- a/db/pdfile.h +++ b/db/pdfile.h @@ -142,7 +142,7 @@ namespace mongo { static Record* getRecord(const DiskLoc& dl); static DeletedRecord* makeDeletedRecord(const DiskLoc& dl, int len); - void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false); + void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false, bool logOp=false); /* does not clean up indexes, etc. : just deletes the record in the pdfile. use deleteRecord() to unindex */ void _deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl); diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index 4173eaaa2cd..e49e9b11ecb 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -52,7 +52,7 @@ namespace mongo { QueryPlan::QueryPlan( NamespaceDetails *d, int idxNo, - const FieldRangeSetPair &frsp, const FieldRangeSetPair &originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : + const FieldRangeSetPair &frsp, const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : _d(d), _idxNo(idxNo), _frs( frsp.frsForIndex( _d, _idxNo ) ), _frsMulti( frsp.frsForIndex( _d, -1 ) ), @@ -166,12 +166,17 @@ doneCheckOrder: _optimal = true; if ( exactIndexedQueryCount == _frs.nNontrivialRanges() && orderFieldsUnindexed.size() == 0 && - exactIndexedQueryCount == _index->keyPattern().nFields() && + exactIndexedQueryCount == idxKey.nFields() && exactIndexedQueryCount == _originalQuery.nFields() ) { _exactKeyMatch = true; } _frv.reset( new FieldRangeVector( _frs, idxSpec, _direction ) ); - _originalFrv.reset( new FieldRangeVector( originalFrsp.frsForIndex( _d, _idxNo ), idxSpec, _direction ) ); + if ( originalFrsp ) { + _originalFrv.reset( new FieldRangeVector( originalFrsp->frsForIndex( _d, _idxNo ), idxSpec, _direction ) ); + } + else { + _originalFrv = _frv; + } if ( _startOrEndSpec ) { BSONObj newStart, newEnd; if ( !startKey.isEmpty() ) @@ -206,8 +211,25 @@ doneCheckOrder: } if ( willScanTable() ) { - if ( _frs.nNontrivialRanges() ) + if ( _frs.nNontrivialRanges() ) { checkTableScanAllowed( _frs.ns() ); + + // if we are doing a table scan on _id + // and its a capped collection + // we disallow as its a common user error + // .system. and local collections are exempt + if ( _d && _d->capped && _frs.range( "_id" ).nontrivial() ) { + if ( cc().isSyncThread() || + str::contains( _frs.ns() , ".system." ) || + str::startsWith( _frs.ns() , "local." ) ) { + // ok + } + else { + warning() << "_id query on capped collection without an _id index, performance will be poor collection: " << _frs.ns() << endl; + //uassert( 14820, str::stream() << "doing _id query on a capped collection without an index is not allowed: " << _frs.ns() , + } + } + } return findTableScan( _frs.ns(), _order, startLoc ); } @@ -328,7 +350,7 @@ doneCheckOrder: massert( 10365 , errmsg, indexDetailsForRange( _frsp->ns(), errmsg, _min, _max, keyPattern ) ); } NamespaceDetails *d = nsdetails(_ns); - _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) ); } // returns an IndexDetails * for a hint, 0 if hint is $natural. @@ -374,7 +396,7 @@ doneCheckOrder: NamespaceDetails *d = nsdetails( ns ); if ( !d || !_frsp->matchPossible() ) { // Table scan plan, when no matches are possible - _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) ); return; } @@ -388,7 +410,7 @@ doneCheckOrder: else { massert( 10366 , "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() ); // Table scan plan - _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) ); } return; } @@ -398,7 +420,7 @@ doneCheckOrder: BSONObj keyPattern; IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern ); massert( 10367 , errmsg, idx ); - _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) ); return; } @@ -407,13 +429,13 @@ doneCheckOrder: if ( idx >= 0 ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; - _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , *_originalFrsp , _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , _originalFrsp.get() , _originalQuery, _order ) ) ); return; } } if ( _originalQuery.isEmpty() && _order.isEmpty() ) { - _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) ); return; } @@ -428,7 +450,7 @@ doneCheckOrder: if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; - _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , *_originalFrsp , _originalQuery, _order , + _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , _originalFrsp.get() , _originalQuery, _order , BSONObj() , BSONObj() , _special ) ) ); return; } @@ -445,7 +467,7 @@ doneCheckOrder: _oldNScanned = oldNScanned; if ( !strcmp( bestIndex.firstElementFieldName(), "$natural" ) ) { // Table scan plan - p.reset( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + p.reset( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); } NamespaceDetails::IndexIterator i = d->ii(); @@ -453,7 +475,7 @@ doneCheckOrder: int j = i.pos(); IndexDetails& ii = i.next(); if( ii.keyPattern().woCompare(bestIndex) == 0 ) { - p.reset( new QueryPlan( d, j, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + p.reset( new QueryPlan( d, j, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); } } @@ -480,7 +502,7 @@ doneCheckOrder: if ( !_frsp->matchPossible() || ( _frsp->noNontrivialRanges() && _order.isEmpty() ) || ( !_order.isEmpty() && !strcmp( _order.firstElementFieldName(), "$natural" ) ) ) { // Table scan plan - addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst ); + addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst ); return; } @@ -490,10 +512,11 @@ doneCheckOrder: QueryPlanPtr optimalPlan; for( int i = 0; i < d->nIndexes; ++i ) { if ( normalQuery ) { - if ( !_frsp->matchPossibleForIndex( d, i, d->idx( i ).keyPattern() ) ) { + BSONObj keyPattern = d->idx( i ).keyPattern(); + if ( !_frsp->matchPossibleForIndex( d, i, keyPattern ) ) { // If no match is possible, only generate a trival plan that won't // scan any documents. - QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); addPlan( p, checkFirst ); return; } @@ -502,7 +525,7 @@ doneCheckOrder: } } - QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); if ( p->optimal() ) { if ( !optimalPlan.get() ) { optimalPlan = p; @@ -520,7 +543,7 @@ doneCheckOrder: addPlan( *i, checkFirst ); // Table scan plan - addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst ); + addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst ); } shared_ptr<QueryOp> QueryPlanSet::runOp( QueryOp &op ) { @@ -538,7 +561,7 @@ doneCheckOrder: return r.runUntilFirstCompletes(); } - shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp ) { + shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp, bool retried ) { if ( !_runner ) { _runner.reset( new Runner( *this, originalOp ) ); shared_ptr<QueryOp> op = _runner->init(); @@ -553,10 +576,14 @@ doneCheckOrder: if ( !_usingPrerecordedPlan || _bestGuessOnly || _plans.size() > 1 ) { return op; } + + // Avoid an infinite loop here + uassert( 15878, str::stream() << "query plans not successful even with no constraints, potentially due to additional sort", ! retried ); + // Retry with all candidate plans. QueryUtilIndexed::clearIndexesForPatterns( *_frsp, _order ); init(); - return nextOp( originalOp ); + return nextOp( originalOp, true ); } bool QueryPlanSet::prepareToYield() { @@ -815,24 +842,29 @@ doneCheckOrder: _ns( ns ), _or( !query.getField( "$or" ).eoo() ), _query( query.getOwned() ), - _org( ns, _query ), _i(), _honorRecordedPlan( honorRecordedPlan ), _bestGuessOnly( bestGuessOnly ), _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ), _mayYield( mayYield ), _tableScanned() { - if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_org.getSpecial().empty() ) { + if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() ) { _or = false; } - if ( _or && uselessOr( _hint.firstElement() ) ) { - _or = false; + if ( _or ) { + // Only construct an OrRangeGenerator if we may handle $or clauses. + _org.reset( new OrRangeGenerator( ns, _query ) ); + if ( !_org->getSpecial().empty() ) { + _or = false; + } + else if ( uselessOr( _hint.firstElement() ) ) { + _or = false; + } } // if _or == false, don't use or clauses for index selection if ( !_or ) { auto_ptr<FieldRangeSetPair> frsp( new FieldRangeSetPair( ns, _query, true ) ); - auto_ptr<FieldRangeSetPair> oldFrsp( new FieldRangeSetPair( *frsp ) ); - _currentQps.reset( new QueryPlanSet( ns, frsp, oldFrsp, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); + _currentQps.reset( new QueryPlanSet( ns, frsp, auto_ptr<FieldRangeSetPair>(), _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); } else { BSONElement e = _query.getField( "$or" ); @@ -847,8 +879,8 @@ doneCheckOrder: return _currentQps->runOp( op ); } ++_i; - auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() ); - auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() ); + auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() ); + auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() ); BSONElement hintElt = _hint.firstElement(); _currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); shared_ptr<QueryOp> ret( _currentQps->runOp( op ) ); @@ -856,7 +888,7 @@ doneCheckOrder: _tableScanned = true; } else { // If the full table was scanned, don't bother popping the last or clause. - _org.popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() ); + _org->popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() ); } return ret; } @@ -877,7 +909,7 @@ doneCheckOrder: if ( op->qp().willScanTable() ) { _tableScanned = true; } else { - _org.popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() ); + _org->popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() ); } return op; } @@ -887,8 +919,8 @@ doneCheckOrder: shared_ptr<QueryOp> op; while( mayRunMore() ) { ++_i; - auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() ); - auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() ); + auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() ); + auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() ); BSONElement hintElt = _hint.firstElement(); _currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); op = nextOpHandleEndOfClause(); @@ -954,9 +986,9 @@ doneCheckOrder: if ( !id ) { return true; } - return QueryUtilIndexed::uselessOr( _org, nsd, nsd->idxNo( *id ) ); + return QueryUtilIndexed::uselessOr( *_org, nsd, nsd->idxNo( *id ) ); } - return QueryUtilIndexed::uselessOr( _org, nsd, -1 ); + return QueryUtilIndexed::uselessOr( *_org, nsd, -1 ); } MultiCursor::MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr<CursorOp> op, bool mayYield ) @@ -1199,12 +1231,13 @@ doneCheckOrder: } bool QueryUtilIndexed::indexUseful( const FieldRangeSetPair &frsp, NamespaceDetails *d, int idxNo, const BSONObj &order ) { - frsp.assertValidIndex( d, idxNo ); - if ( !frsp.matchPossibleForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ) ) { + DEV frsp.assertValidIndex( d, idxNo ); + BSONObj keyPattern = d->idx( idxNo ).keyPattern(); + if ( !frsp.matchPossibleForIndex( d, idxNo, keyPattern ) ) { // No matches are possible in the index so the index may be useful. return true; } - return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ), order ) != USELESS; + return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, keyPattern ), order ) != USELESS; } void QueryUtilIndexed::clearIndexesForPatterns( const FieldRangeSetPair &frsp, const BSONObj &order ) { diff --git a/db/queryoptimizer.h b/db/queryoptimizer.h index e55e791e1ca..ad6b985ab1f 100644 --- a/db/queryoptimizer.h +++ b/db/queryoptimizer.h @@ -35,10 +35,13 @@ namespace mongo { class QueryPlan : boost::noncopyable { public: + /** + * @param originalFrsp - original constraints for this query clause. If null, frsp will be used instead. + */ QueryPlan(NamespaceDetails *d, int idxNo, // -1 = no index const FieldRangeSetPair &frsp, - const FieldRangeSetPair &originalFrsp, + const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey = BSONObj(), @@ -245,6 +248,9 @@ namespace mongo { typedef boost::shared_ptr<QueryPlan> QueryPlanPtr; typedef vector<QueryPlanPtr> PlanSet; + /** + * @param originalFrsp - original constraints for this query clause; if null, frsp will be used. + */ QueryPlanSet( const char *ns, auto_ptr<FieldRangeSetPair> frsp, auto_ptr<FieldRangeSetPair> originalFrsp, @@ -272,7 +278,7 @@ namespace mongo { } /** Initialize or iterate a runner generated from @param originalOp. */ - shared_ptr<QueryOp> nextOp( QueryOp &originalOp ); + shared_ptr<QueryOp> nextOp( QueryOp &originalOp, bool retried = false ); /** Yield the runner member. */ @@ -290,7 +296,7 @@ namespace mongo { //for testing const FieldRangeSetPair &frsp() const { return *_frsp; } - const FieldRangeSetPair &originalFrsp() const { return *_originalFrsp; } + const FieldRangeSetPair *originalFrsp() const { return _originalFrsp.get(); } bool modifiedKeys() const; bool hasMultiKey() const; @@ -420,7 +426,7 @@ namespace mongo { shared_ptr<Cursor> singleCursor() const; /** @return true iff more $or clauses need to be scanned. */ - bool mayRunMore() const { return _or ? ( !_tableScanned && !_org.orFinished() ) : _i == 0; } + bool mayRunMore() const { return _or ? ( !_tableScanned && !_org->orFinished() ) : _i == 0; } /** @return non-$or version of explain output. */ BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); } /** @return true iff this is not a $or query and a plan is selected based on previous success of this plan. */ @@ -445,7 +451,7 @@ namespace mongo { const char * _ns; bool _or; BSONObj _query; - OrRangeGenerator _org; + shared_ptr<OrRangeGenerator> _org; // May be null in certain non $or query cases. auto_ptr<QueryPlanSet> _currentQps; int _i; bool _honorRecordedPlan; diff --git a/db/queryutil-inl.h b/db/queryutil-inl.h index 2c3a757b385..d0fc212cef9 100644 --- a/db/queryutil-inl.h +++ b/db/queryutil-inl.h @@ -130,5 +130,24 @@ namespace mongo { } return ret; } + + inline bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { + assertValidIndexOrNoIndex( d, idxNo ); + if ( !matchPossible() ) { + return false; + } + if ( idxNo < 0 ) { + // multi key matchPossible() is true, so return true. + return true; + } + return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern ); + } + inline void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const { + massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 ); + if ( idxNo >= 0 ) { + assertValidIndex( d, idxNo ); + } + } + } // namespace mongo diff --git a/db/queryutil.cpp b/db/queryutil.cpp index ec9ee693511..717eac816b8 100644 --- a/db/queryutil.cpp +++ b/db/queryutil.cpp @@ -28,6 +28,7 @@ namespace mongo { extern BSONObj staticNull; + extern BSONObj staticUndefined; /** returns a string that when used as a matcher, would match a super set of regex() returns "" for complex regular expressions @@ -79,6 +80,10 @@ namespace mongo { r = r.substr( 0 , r.size() - 1 ); return r; //breaking here fails with /^a?/ } + else if (c == '|') { + // whole match so far is optional. Nothing we can do here. + return string(); + } else if (c == '\\') { c = *(regex++); if (c == 'Q'){ @@ -107,7 +112,7 @@ namespace mongo { ss << c; } } - else if (strchr("^$.[|()+{", c)) { + else if (strchr("^$.[()+{", c)) { // list of "metacharacters" from man pcrepattern r = ss.str(); break; @@ -153,25 +158,33 @@ namespace mongo { FieldRange::FieldRange( const BSONElement &e, bool singleKey, bool isNot, bool optimize ) : _singleKey( singleKey ) { + int op = e.getGtLtOp(); + // NOTE with $not, we could potentially form a complementary set of intervals. - if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) { + if ( !isNot && !e.eoo() && e.type() != RegEx && op == BSONObj::opIN ) { set<BSONElement,element_lt> vals; vector<FieldRange> regexes; uassert( 12580 , "invalid query" , e.isABSONObj() ); BSONObjIterator i( e.embeddedObject() ); while( i.more() ) { BSONElement ie = i.next(); + uassert( 15881, "$elemMatch not allowed within $in", + ie.type() != Object || + ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH ); if ( ie.type() == RegEx ) { regexes.push_back( FieldRange( ie, singleKey, false, optimize ) ); } else { - // A document array may be indexed by its first element, or - // as a full array if it is embedded within another array. + // A document array may be indexed by its first element, by undefined + // if it is empty, or as a full array if it is embedded within another + // array. vals.insert( ie ); if ( ie.type() == Array ) { - if ( !ie.embeddedObject().firstElement().eoo() ) { - vals.insert( ie.embeddedObject().firstElement() ); - } + BSONElement temp = ie.embeddedObject().firstElement(); + if ( temp.eoo() ) { + temp = staticUndefined.firstElement(); + } + vals.insert( temp ); } } } @@ -185,17 +198,21 @@ namespace mongo { return; } - // A document array may be indexed by its first element, or - // as a full array if it is embedded within another array. - if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) { + // A document array may be indexed by its first element, by undefined + // if it is empty, or as a full array if it is embedded within another + // array. + if ( e.type() == Array && op == BSONObj::Equality ) { _intervals.push_back( FieldInterval(e) ); - const BSONElement& temp = e.embeddedObject().firstElement(); - if ( ! temp.eoo() ) { - if ( temp < e ) - _intervals.insert( _intervals.begin() , temp ); - else - _intervals.push_back( FieldInterval(temp) ); + BSONElement temp = e.embeddedObject().firstElement(); + if ( temp.eoo() ) { + temp = staticUndefined.firstElement(); + } + if ( temp < e ) { + _intervals.insert( _intervals.begin() , temp ); + } + else { + _intervals.push_back( FieldInterval(temp) ); } return; @@ -215,8 +232,6 @@ namespace mongo { if ( e.eoo() ) return; - int op = e.getGtLtOp(); - bool existsSpec = false; if ( op == BSONObj::opEXISTS ) { existsSpec = e.trueValue(); @@ -622,6 +637,27 @@ namespace mongo { return o; } + string FieldInterval::toString() const { + StringBuilder buf; + buf << ( _lower._inclusive ? "[" : "(" ); + buf << _lower._bound; + buf << " , "; + buf << _upper._bound; + buf << ( _upper._inclusive ? "]" : ")" ); + return buf.str(); + } + + string FieldRange::toString() const { + StringBuilder buf; + buf << "(FieldRange special: " << _special << " singleKey: " << _special << " intervals: "; + for( vector<FieldInterval>::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) { + buf << i->toString(); + } + + buf << ")"; + return buf.str(); + } + string FieldRangeSet::getSpecial() const { string s = ""; for ( map<string,FieldRange>::const_iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) { @@ -773,30 +809,32 @@ namespace mongo { } void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) { - if ( strcmp( e.fieldName(), "$and" ) == 0 ) { - uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 ); - BSONObjIterator i( e.embeddedObject() ); - while( i.more() ) { - BSONElement e = i.next(); - uassert( 14817 , "$and elements must be objects" , e.type() == Object ); - BSONObjIterator j( e.embeddedObject() ); - while( j.more() ) { - processQueryField( j.next(), optimize ); - } - } - } + if ( e.fieldName()[ 0 ] == '$' ) { + if ( strcmp( e.fieldName(), "$and" ) == 0 ) { + uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 ); + BSONObjIterator i( e.embeddedObject() ); + while( i.more() ) { + BSONElement e = i.next(); + uassert( 14817 , "$and elements must be objects" , e.type() == Object ); + BSONObjIterator j( e.embeddedObject() ); + while( j.more() ) { + processQueryField( j.next(), optimize ); + } + } + } - if ( strcmp( e.fieldName(), "$where" ) == 0 ) { - return; - } + if ( strcmp( e.fieldName(), "$where" ) == 0 ) { + return; + } - if ( strcmp( e.fieldName(), "$or" ) == 0 ) { - return; - } + if ( strcmp( e.fieldName(), "$or" ) == 0 ) { + return; + } - if ( strcmp( e.fieldName(), "$nor" ) == 0 ) { - return; - } + if ( strcmp( e.fieldName(), "$nor" ) == 0 ) { + return; + } + } bool equality = ( getGtLtOp( e ) == BSONObj::Equality ); if ( equality && e.type() == Object ) { @@ -1055,32 +1093,11 @@ namespace mongo { return ret; } - const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const { - assertValidIndexOrNoIndex( nsd, idxNo ); - if ( idxNo < 0 ) { - // An unindexed cursor cannot have a "single key" constraint. - return _multiKey; - } - return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey; - } - bool FieldRangeSetPair::noNontrivialRanges() const { return _singleKey.matchPossible() && _singleKey.nNontrivialRanges() == 0 && _multiKey.matchPossible() && _multiKey.nNontrivialRanges() == 0; } - bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { - assertValidIndexOrNoIndex( d, idxNo ); - if ( !matchPossible() ) { - return false; - } - if ( idxNo < 0 ) { - // multi key matchPossible() is true, so return true. - return true; - } - return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern ); - } - FieldRangeSetPair &FieldRangeSetPair::operator&=( const FieldRangeSetPair &other ) { _singleKey &= other._singleKey; _multiKey &= other._multiKey; @@ -1093,21 +1110,23 @@ namespace mongo { return *this; } + BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { + return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern ); + } + void FieldRangeSetPair::assertValidIndex( const NamespaceDetails *d, int idxNo ) const { massert( 14048, "FieldRangeSetPair invalid index specified", idxNo >= 0 && idxNo < d->nIndexes ); } - - void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const { - massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 ); - if ( idxNo >= 0 ) { - assertValidIndex( d, idxNo ); + + const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const { + assertValidIndexOrNoIndex( nsd, idxNo ); + if ( idxNo < 0 ) { + // An unindexed cursor cannot have a "single key" constraint. + return _multiKey; } + return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey; } - - BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { - return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern ); - } - + bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const { bool eq; int l = matchingLowElement( e, i, forward, eq ); diff --git a/db/queryutil.h b/db/queryutil.h index 00d2d264961..104cde28e4a 100644 --- a/db/queryutil.h +++ b/db/queryutil.h @@ -53,6 +53,8 @@ namespace mongo { /** @return true iff the interval is an equality constraint. */ bool equality() const; mutable int _cachedEquality; + + string toString() const; }; /** @@ -103,6 +105,8 @@ namespace mongo { * NOTE the resulting intervals might not be strictValid(). */ void reverse( FieldRange &ret ) const; + + string toString() const; private: BSONObj addObj( const BSONObj &o ); void finishOperation( const vector<FieldInterval> &newIntervals, const FieldRange &other ); diff --git a/db/record.cpp b/db/record.cpp index f5fa972227a..18be9c75fe2 100644 --- a/db/record.cpp +++ b/db/record.cpp @@ -120,14 +120,16 @@ namespace mongo { /** * after this call, we assume the page is in ram + * @param doHalf if this is a known good access, want to put in first half * @return whether we know the page is in ram */ - bool access( size_t region , short offset ) { + bool access( size_t region , short offset , bool doHalf ) { int regionHash = hash(region); scoped_spinlock lk( _lock ); - - RARELY { + + static int rarely_count = 0; + if ( rarely_count++ % 2048 == 0 ) { long long now = Listener::getElapsedTimeMillis(); RARELY if ( now == 0 ) { tlog() << "warning Listener::getElapsedTimeMillis returning 0ms" << endl; @@ -137,8 +139,8 @@ namespace mongo { _rotate(); } } - - for ( int i=0; i<NumSlices; i++ ) { + + for ( int i=0; i<NumSlices / ( doHalf ? 2 : 1 ); i++ ) { int pos = (_curSlice+i)%NumSlices; State s = _slices[pos].get( regionHash , region , offset ); @@ -205,7 +207,7 @@ namespace mongo { const size_t region = page >> 6; const size_t offset = page & 0x3f; - if ( ps::rolling.access( region , offset ) ) + if ( ps::rolling.access( region , offset , false ) ) return true; if ( ! blockSupported ) @@ -214,14 +216,11 @@ namespace mongo { } Record* Record::accessed() { - if ( ! MemoryTrackingEnabled ) - return this; - const size_t page = (size_t)data >> 12; const size_t region = page >> 6; const size_t offset = page & 0x3f; - - ps::rolling.access( region , offset ); + + ps::rolling.access( region , offset , true ); return this; } diff --git a/db/repl.cpp b/db/repl.cpp index a4ab6e4f0ea..3d08f2324c0 100644 --- a/db/repl.cpp +++ b/db/repl.cpp @@ -95,7 +95,7 @@ namespace mongo { virtual LockType locktype() const { return WRITE; } void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://www.mongodb.org/display/DOCS/Master+Slave"; } CmdResync() : Command("resync") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( cmdLine.usingReplSets() ) { errmsg = "resync command not currently supported with replica sets. See RS102 info in the mongodb documentations"; result.append("info", "http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member"); @@ -232,7 +232,7 @@ namespace mongo { } virtual LockType locktype() const { return NONE; } CmdIsMaster() : Command("isMaster", true, "ismaster") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not authenticated. we allow unauthenticated ismaster but we aren't as verbose informationally if @@ -1407,6 +1407,7 @@ namespace mongo { void newRepl(); void oldRepl(); + void startReplSets(ReplSetCmdline*); void startReplication() { /* if we are going to be a replica set, we aren't doing other forms of replication. */ if( !cmdLine._replSet.empty() ) { @@ -1416,6 +1417,11 @@ namespace mongo { log() << "***" << endl; } newRepl(); + + replSet = true; + ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet); + boost::thread t( boost::bind( &startReplSets, replSetCmdline) ); + return; } diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp index 3a4dd9b5b3d..07ee2fa80a3 100644 --- a/db/repl/consensus.cpp +++ b/db/repl/consensus.cpp @@ -25,6 +25,7 @@ namespace mongo { public: CmdReplSetFresh() : ReplSetCommand("replSetFresh") { } private: + bool shouldVeto(const BSONObj& cmdObj, string& errmsg) { unsigned id = cmdObj["id"].Int(); const Member* primary = theReplSet->box.getPrimary(); @@ -66,7 +67,7 @@ namespace mongo { return false; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; @@ -101,7 +102,7 @@ namespace mongo { public: CmdReplSetElect() : ReplSetCommand("replSetElect") { } private: - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; theReplSet->elect.electCmdReceived(cmdObj, &result); @@ -152,7 +153,7 @@ namespace mongo { LastYea &L = this->ly.ref(lk); time_t now = time(0); if( L.when + LeaseTime >= now && L.who != memberId ) { - log(1) << "replSet not voting yea for " << memberId << + LOG(1) << "replSet not voting yea for " << memberId << " voted for " << L.who << ' ' << now-L.when << " secs ago" << rsLog; throw VoteException(); } @@ -176,7 +177,7 @@ namespace mongo { void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) { BSONObjBuilder& b = *_b; DEV log() << "replSet received elect msg " << cmd.toString() << rsLog; - else log(2) << "replSet received elect msg " << cmd.toString() << rsLog; + else LOG(2) << "replSet received elect msg " << cmd.toString() << rsLog; string set = cmd["set"].String(); unsigned whoid = cmd["whoid"].Int(); int cfgver = cmd["cfgver"].Int(); @@ -309,7 +310,7 @@ namespace mongo { allUp = false; } } - log(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog; + LOG(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog; assert( ord <= theReplSet->lastOpTimeWritten ); // <= as this may change while we are working... return true; } diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp index 6247b4b1d13..7d3f78c73b5 100644 --- a/db/repl/heartbeat.cpp +++ b/db/repl/heartbeat.cpp @@ -39,6 +39,8 @@ namespace mongo { extern bool replSetBlind; extern ReplSettings replSettings; + unsigned int HeartbeatInfo::numPings; + long long HeartbeatInfo::timeDown() const { if( up() ) return 0; if( downSince == 0 ) @@ -51,7 +53,7 @@ namespace mongo { public: virtual bool adminOnly() const { return false; } CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( replSetBlind ) return false; @@ -62,6 +64,10 @@ namespace mongo { return false; } + if (!checkAuth(errmsg, result)) { + return false; + } + /* we want to keep heartbeat connections open when relinquishing primary. tag them here. */ { AbstractMessagingPort *mp = cc().port(); @@ -147,7 +153,7 @@ namespace mongo { string name() const { return "rsHealthPoll"; } void doWork() { if ( !theReplSet ) { - log(2) << "replSet not initialized yet, skipping health poll this round" << rsLog; + LOG(2) << "replSet not initialized yet, skipping health poll this round" << rsLog; return; } @@ -169,7 +175,10 @@ namespace mongo { time_t after = mem.lastHeartbeat = before + (mem.ping / 1000); // weight new ping with old pings - mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2)); + // on the first ping, just use the ping value + if (old.ping != 0) { + mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2)); + } if ( info["time"].isNumber() ) { long long t = info["time"].numberLong(); @@ -191,6 +200,8 @@ namespace mongo { mem.hbstate = MemberState(state.Int()); } if( ok ) { + HeartbeatInfo::numPings++; + if( mem.upSince == 0 ) { log() << "replSet info member " << h.toString() << " is up" << rsLog; mem.upSince = mem.lastHeartbeat; @@ -262,6 +273,7 @@ namespace mongo { private: void down(HeartbeatInfo& mem, string msg) { mem.health = 0.0; + mem.ping = 0; if( mem.upSince || mem.downSince == 0 ) { mem.upSince = 0; mem.downSince = jsTime(); diff --git a/db/repl/replset_commands.cpp b/db/repl/replset_commands.cpp index 79639acd567..68dab7eb3c1 100644 --- a/db/repl/replset_commands.cpp +++ b/db/repl/replset_commands.cpp @@ -45,14 +45,18 @@ namespace mongo { help << "Just for regression tests.\n"; } CmdReplSetTest() : ReplSetCommand("replSetTest") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetTest command received: " << cmdObj.toString() << rsLog; + + if (!checkAuth(errmsg, result)) { + return false; + } + if( cmdObj.hasElement("forceInitialSyncFailure") ) { replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number(); return true; } - // may not need this, but if removed check all tests still work: if( !check(errmsg, result) ) return false; @@ -76,11 +80,11 @@ namespace mongo { help << "internal"; } CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") { - // this is ok but micros or combo with some rand() and/or 64 bits might be better -- + // this is ok but micros or combo with some rand() and/or 64 bits might be better -- // imagine a restart and a clock correction simultaneously (very unlikely but possible...) rbid = (int) curTimeMillis64(); } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; result.append("rbid",rbid); @@ -108,7 +112,7 @@ namespace mongo { help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj["forShell"].trueValue() ) lastError.disableForCommand(); @@ -128,17 +132,21 @@ namespace mongo { help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { } - virtual bool run(const string& a, BSONObj& b, string& errmsg, BSONObjBuilder& c, bool d) { + virtual bool run(const string& a, BSONObj& b, int e, string& errmsg, BSONObjBuilder& c, bool d) { try { rwlock_try_write lk(mutex); - return _run(a,b,errmsg,c,d); + return _run(a,b,e,errmsg,c,d); } catch(rwlock_try_write::exception&) { } errmsg = "a replSetReconfig is already in progress"; return false; } private: - bool _run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool _run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if ( !checkAuth(errmsg, result) ) { + return false; + } + if( cmdObj["replSetReconfig"].type() != Object ) { errmsg = "no configuration specified"; return false; @@ -209,7 +217,7 @@ namespace mongo { } CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; int secs = (int) cmdObj.firstElement().numberInt(); @@ -233,7 +241,7 @@ namespace mongo { } CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; if( !theReplSet->box.getState().primary() ) { @@ -252,19 +260,19 @@ namespace mongo { long long int diff = lastOp - closest; result.append("closest", closest); result.append("difference", diff); - + if (diff < 0) { // not our problem, but we'll wait until thing settle down errmsg = "someone is ahead of the primary?"; return false; } - + if (diff > 10) { errmsg = "no secondaries within 10 seconds of my optime"; return false; } } - + int secs = (int) cmdObj.firstElement().numberInt(); if( secs == 0 ) secs = 60; diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp index 84b92fe9297..243e087eff1 100644 --- a/db/repl/rs.cpp +++ b/db/repl/rs.cpp @@ -24,9 +24,12 @@ #include "rs.h" #include "connections.h" #include "../repl.h" +#include "../instance.h" -namespace mongo { +using namespace std; +namespace mongo { + using namespace bson; bool replSet = false; @@ -60,18 +63,43 @@ namespace mongo { } void ReplSetImpl::assumePrimary() { - log(2) << "assuming primary" << endl; + LOG(2) << "replSet assuming primary" << endl; assert( iAmPotentiallyHot() ); writelock lk("admin."); // so we are synchronized with _logOp() + + // Make sure that new OpTimes are higher than existing ones even with clock skew + DBDirectClient c; + BSONObj lastOp = c.findOne( "local.oplog.rs", Query().sort(reverseNaturalObj), NULL, QueryOption_SlaveOk ); + if ( !lastOp.isEmpty() ) { + OpTime::setLast( lastOp[ "ts" ].date() ); + } + changeState(MemberState::RS_PRIMARY); } void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); } + void ReplSetImpl::setMaintenanceMode(const bool inc) { + lock lk(this); + + if (inc) { + log() << "replSet going into maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog; + + _maintenanceMode++; + changeState(MemberState::RS_RECOVERING); + } + else { + _maintenanceMode--; + // no need to change state, syncTail will try to go live as a secondary soon + + log() << "leaving maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog; + } + } + Member* ReplSetImpl::getMostElectable() { lock lk(this); - - Member *max = 0; + + Member *max = 0; for (set<unsigned>::iterator it = _electableSet.begin(); it != _electableSet.end(); it++) { const Member *temp = findById(*it); @@ -91,7 +119,7 @@ namespace mongo { const bool closeOnRelinquish = true; void ReplSetImpl::relinquish() { - log(2) << "attempting to relinquish" << endl; + LOG(2) << "replSet attempting to relinquish" << endl; if( box.getState().primary() ) { { writelock lk("admin."); // so we are synchronized with _logOp() @@ -239,7 +267,7 @@ namespace mongo { if( myConfig().arbiterOnly ) b.append("arbiterOnly", true); - if( myConfig().priority == 0 ) + if( myConfig().priority == 0 && !myConfig().arbiterOnly) b.append("passive", true); if( myConfig().slaveDelay ) b.append("slaveDelay", myConfig().slaveDelay); @@ -296,8 +324,10 @@ namespace mongo { _currentSyncTarget(0), _hbmsgTime(0), _self(0), + _maintenanceMode(0), mgr( new Manager(this) ), ghost( new GhostSync(this) ) { + _cfg = 0; memset(_hbmsg, 0, sizeof(_hbmsg)); strcpy( _hbmsg , "initial startup" ); @@ -306,7 +336,7 @@ namespace mongo { _seeds = &replSetCmdline.seeds; - log(1) << "replSet beginning startup..." << rsLog; + LOG(1) << "replSet beginning startup..." << rsLog; loadConfig(); @@ -317,7 +347,7 @@ namespace mongo { for( set<HostAndPort>::iterator i = replSetCmdline.seedSet.begin(); i != replSetCmdline.seedSet.end(); i++ ) { if( i->isSelf() ) { if( sss == 1 ) - log(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog; + LOG(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog; } else log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog; @@ -382,7 +412,7 @@ namespace mongo { getLastErrorDefault = new BSONObj( c.getLastErrorDefaults ); } - list<const ReplSetConfig::MemberCfg*> newOnes; + list<ReplSetConfig::MemberCfg*> newOnes; // additive short-cuts the new config setup. If we are just adding a // node/nodes and nothing else is changing, this is additive. If it's // not a reconfig, we're not adding anything @@ -391,8 +421,8 @@ namespace mongo { unsigned nfound = 0; int me = 0; for( vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++ ) { - const ReplSetConfig::MemberCfg& m = *i; + ReplSetConfig::MemberCfg& m = *i; if( m.h.isSelf() ) { me++; } @@ -443,8 +473,8 @@ namespace mongo { // this is a shortcut for simple changes if( additive ) { log() << "replSet info : additive change to configuration" << rsLog; - for( list<const ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) { - const ReplSetConfig::MemberCfg* m = *i; + for( list<ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) { + ReplSetConfig::MemberCfg *m = *i; Member *mi = new Member(m->h, m->_id, m, false); /** we will indicate that new members are up() initially so that we don't relinquish our @@ -456,6 +486,11 @@ namespace mongo { _members.push(mi); startHealthTaskFor(mi); } + + // if we aren't creating new members, we may have to update the + // groups for the current ones + _cfg->updateMembers(_members); + return true; } @@ -479,7 +514,7 @@ namespace mongo { string members = ""; for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) { - const ReplSetConfig::MemberCfg& m = *i; + ReplSetConfig::MemberCfg& m = *i; Member *mi; members += ( members == "" ? "" : ", " ) + m.h.toString(); if( m.h.isSelf() ) { @@ -594,7 +629,7 @@ namespace mongo { if( ++once == 1 ) log() << "replSet info you may need to run replSetInitiate -- rs.initiate() in the shell -- if that is not already done" << rsLog; if( _seeds->size() == 0 ) - log(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog; + LOG(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog; } else { startupStatus = EMPTYUNREACHABLE; diff --git a/db/repl/rs.h b/db/repl/rs.h index 7654597a930..14c630d27a2 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -58,10 +58,11 @@ namespace mongo { ~Member(); // intentionally unimplemented as should never be called -- see List1<>::Base. Member(const Member&); public: - Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self); + Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self); string fullName() const { return h().toString(); } const ReplSetConfig::MemberCfg& config() const { return _config; } + ReplSetConfig::MemberCfg& configw() { return _config; } const HeartbeatInfo& hbinfo() const { return _hbinfo; } HeartbeatInfo& get_hbinfo() { return _hbinfo; } string lhb() const { return _hbinfo.lastHeartbeatMsg; } @@ -74,7 +75,7 @@ namespace mongo { private: friend class ReplSetImpl; - const ReplSetConfig::MemberCfg _config; + ReplSetConfig::MemberCfg _config; const HostAndPort _h; HeartbeatInfo _hbinfo; }; @@ -242,13 +243,19 @@ namespace mongo { const Member *primary; }; const SP get() { - scoped_lock lk(m); + rwlock lk(m, false); return sp; } - MemberState getState() const { return sp.state; } - const Member* getPrimary() const { return sp.primary; } + MemberState getState() const { + rwlock lk(m, false); + return sp.state; + } + const Member* getPrimary() const { + rwlock lk(m, false); + return sp.primary; + } void change(MemberState s, const Member *self) { - scoped_lock lk(m); + rwlock lk(m, true); if( sp.state != s ) { log() << "replSet " << s.toString() << rsLog; } @@ -262,24 +269,25 @@ namespace mongo { } } void set(MemberState s, const Member *p) { - scoped_lock lk(m); - sp.state = s; sp.primary = p; + rwlock lk(m, true); + sp.state = s; + sp.primary = p; } void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); } void setOtherPrimary(const Member *mem) { - scoped_lock lk(m); + rwlock lk(m, true); assert( !sp.state.primary() ); sp.primary = mem; } void noteRemoteIsPrimary(const Member *remote) { - scoped_lock lk(m); + rwlock lk(m, true); if( !sp.state.secondary() && !sp.state.fatal() ) sp.state = MemberState::RS_RECOVERING; sp.primary = remote; } StateBox() : m("StateBox") { } private: - mongo::mutex m; + RWLock m; SP sp; }; @@ -446,11 +454,20 @@ namespace mongo { List1<Member> _members; // all members of the set EXCEPT _self. ReplSetConfig::MemberCfg _config; // config of _self unsigned _id; // _id of _self + + int _maintenanceMode; // if we should stay in recovering state public: // this is called from within a writelock in logOpRS unsigned selfId() const { return _id; } Manager *mgr; GhostSync *ghost; + /** + * This forces a secondary to go into recovering state and stay there + * until this is called again, passing in "false". Multiple threads can + * call this and it will leave maintenance mode once all of the callers + * have called it again, passing in false. + */ + void setMaintenanceMode(const bool inc); private: Member* head() const { return _members.head(); } public: @@ -553,11 +570,29 @@ namespace mongo { virtual bool logTheOp() { return false; } virtual LockType locktype() const { return NONE; } virtual void help( stringstream &help ) const { help << "internal"; } + + /** + * Some replica set commands call this and then call check(). This is + * intentional, as they might do things before theReplSet is initialized + * that still need to be checked for auth. + */ + bool checkAuth(string& errmsg, BSONObjBuilder& result) { + if( !noauth && adminOnly() ) { + AuthenticationInfo *ai = cc().getAuthenticationInfo(); + if (!ai->isAuthorizedForLock("admin", locktype())) { + errmsg = "replSet command unauthorized"; + return false; + } + } + return true; + } + bool check(string& errmsg, BSONObjBuilder& result) { if( !replSet ) { errmsg = "not running with --replSet"; return false; } + if( theReplSet == 0 ) { result.append("startupStatus", ReplSet::startupStatus); string s; @@ -566,7 +601,8 @@ namespace mongo { result.append("info", "run rs.initiate(...) if not yet done for the set"); return false; } - return true; + + return checkAuth(errmsg, result); } }; @@ -578,7 +614,7 @@ namespace mongo { /** inlines ----------------- */ - inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) : + inline Member::Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self) : _config(*c), _h(h), _hbinfo(ord) { assert(c); if( self ) diff --git a/db/repl/rs_config.cpp b/db/repl/rs_config.cpp index 4d6c7b59bba..745d60b537c 100644 --- a/db/repl/rs_config.cpp +++ b/db/repl/rs_config.cpp @@ -83,14 +83,24 @@ namespace mongo { if( hidden ) b << "hidden" << hidden; if( !buildIndexes ) b << "buildIndexes" << buildIndexes; if( !tags.empty() ) { - BSONArrayBuilder a; - for( set<string>::const_iterator i = tags.begin(); i != tags.end(); i++ ) - a.append(*i); - b.appendArray("tags", a.done()); + BSONObjBuilder a; + for( map<string,string>::const_iterator i = tags.begin(); i != tags.end(); i++ ) + a.append((*i).first, (*i).second); + b.append("tags", a.done()); } return b.obj(); } + void ReplSetConfig::updateMembers(List1<Member> &dest) { + for (vector<MemberCfg>::iterator source = members.begin(); source < members.end(); source++) { + for( Member *d = dest.head(); d; d = d->next() ) { + if (d->fullName() == (*source).h.toString()) { + d->configw().groupsw() = (*source).groups(); + } + } + } + } + bo ReplSetConfig::asBson() const { bob b; b.append("_id", _id).append("version", version); @@ -307,85 +317,39 @@ namespace mongo { } void ReplSetConfig::_populateTagMap(map<string,TagClause> &tagMap) { - // stage 1: create subgroups for each server corresponding to each of - // its tags. If a server has three tags, we want it to end up in three - // subgroups, e.g.: A is tagged with ["A", "dc.ny", "m"]. At the end of - // this step, tagMap will contain: - // "A" => {"A.A" : A} - // "dc.ny" => {"dc.ny.A" : A} - // "m" => {"m.A" : A} - // If we have more than one server with the same tag, we end up with - // something like "x.y.z" => [{"x.y.z.A" : A},{"x.y.z.B" : B}] (if A - // and B were tagged with "x.y.z"). + // create subgroups for each server corresponding to each of + // its tags. E.g.: + // + // A is tagged with {"server" : "A", "dc" : "ny"} + // B is tagged with {"server" : "B", "dc" : "ny"} + // + // At the end of this step, tagMap will contain: + // + // "server" => {"A" : [A], "B" : [B]} + // "dc" => {"ny" : [A,B]} + for (unsigned i=0; i<members.size(); i++) { MemberCfg member = members[i]; - for (set<string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) { - TagClause& clause = tagMap[*tag]; - clause.name = *tag; + for (map<string,string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) { + string label = (*tag).first; + string value = (*tag).second; - // we also populate the map, to be used by step 2... I think - // this is correct, as step 2 condenses the groups anyway - string perServerName = *tag+"."+members[i].h.toString(); + TagClause& clause = tagMap[label]; + clause.name = label; TagSubgroup* subgroup; - if (clause.subgroups.find(perServerName) == clause.subgroups.end()) { - clause.subgroups[perServerName] = subgroup = new TagSubgroup(perServerName); + // search for "ny" in "dc"'s clause + if (clause.subgroups.find(value) == clause.subgroups.end()) { + clause.subgroups[value] = subgroup = new TagSubgroup(value); } else { - subgroup = clause.subgroups[perServerName]; + subgroup = clause.subgroups[value]; } subgroup->m.insert(&members[i]); } } - - // stage 2: generate all parent tags. If we have "x.y.z", this - // generates "x.y" and "x" and creates a map for each clause, e.g., - // "x"'s clause might have a map that looks like: - // "x.y" => {A, B} {C} - // "x.w" => {D} {E, F} - for (map<string,TagClause>::iterator baseClause = tagMap.begin(); baseClause != tagMap.end(); baseClause++) { - string prevPrefix = (*baseClause).first; - const char *dot = strrchr(prevPrefix.c_str(), '.'); - - while (dot) { - // get x.y - string xyTag = string(prevPrefix.c_str(), dot - prevPrefix.c_str()); - log(1) << "generating tag " << xyTag << rsLog; - TagClause& xyClause = tagMap[xyTag]; - xyClause.name = xyTag; - - // get all of x.y.z's subgroups, add them as a single subgroup of x.y - TagSubgroup* condensedSubgroup;; - if (xyClause.subgroups.find(prevPrefix) == xyClause.subgroups.end()) { - // label this subgroup one higher than the current, e.g., - // "x.y.z" if we're creating the "x.y" clause - condensedSubgroup = new TagSubgroup(prevPrefix); - xyClause.subgroups[prevPrefix] = condensedSubgroup; - } - else { - condensedSubgroup = xyClause.subgroups[prevPrefix]; - assert(condensedSubgroup->name == prevPrefix); - } - - TagClause& xyzClause = tagMap[prevPrefix]; - - for (map<string,TagSubgroup*>::iterator xyzSubgroup = xyzClause.subgroups.begin(); - xyzSubgroup != xyzClause.subgroups.end(); xyzSubgroup++) { - for (set<MemberCfg*>::const_iterator xyzMember = (*xyzSubgroup).second->m.begin(); - xyzMember != (*xyzSubgroup).second->m.end(); xyzMember++) { - condensedSubgroup->m.insert(*xyzMember); - // we'll link the member back with the group later, to - // avoid creating extra link-backs - } - } - - // advance: if we were handling "x.y", now do "x" - prevPrefix = xyTag; - dot = strrchr(prevPrefix.c_str(), '.'); - } - } } void ReplSetConfig::parseRules(const BSONObj& modes) { @@ -442,7 +406,7 @@ namespace mongo { for (set<MemberCfg *>::iterator cfg = (*sgs).second->m.begin(); !foundMe && cfg != (*sgs).second->m.end(); cfg++) { - (*cfg)->groupsw(this).insert((*sgs).second); + (*cfg)->groupsw().insert((*sgs).second); } } @@ -463,7 +427,7 @@ namespace mongo { } // if we got here, this is a valid rule - log(1) << "new rule " << rule.fieldName() << ": " << r->toString() << rsLog; + LOG(1) << "replSet new rule " << rule.fieldName() << ": " << r->toString() << rsLog; rules[rule.fieldName()] = r; } } @@ -532,9 +496,10 @@ namespace mongo { if( mobj.hasElement("votes") ) m.votes = (unsigned) mobj["votes"].Number(); if( mobj.hasElement("tags") ) { - vector<BSONElement> v = mobj["tags"].Array(); - for( unsigned i = 0; i < v.size(); i++ ) - m.tags.insert( v[i].String() ); + const BSONObj &t = mobj["tags"].Obj(); + for (BSONObj::iterator c = t.begin(); c.more(); c.next()) { + m.tags[(*c).fieldName()] = (*c).String(); + } } m.check(); } diff --git a/db/repl/rs_config.h b/db/repl/rs_config.h index d9c9d97ed4d..4e0d1e862c0 100644 --- a/db/repl/rs_config.h +++ b/db/repl/rs_config.h @@ -25,7 +25,7 @@ #include "health.h" namespace mongo { - + class Member; const string rsConfigNs = "local.system.replset"; class ReplSetConfig { @@ -61,15 +61,14 @@ namespace mongo { int slaveDelay; /* seconds. int rather than unsigned for convenient to/front bson conversion. */ bool hidden; /* if set, don't advertise to drives in isMaster. for non-primaries (priority 0) */ bool buildIndexes; /* if false, do not create any non-_id indexes */ - set<string> tags; /* tagging for data center, rack, etc. */ + map<string,string> tags; /* tagging for data center, rack, etc. */ private: set<TagSubgroup*> _groups; // the subgroups this member belongs to public: const set<TagSubgroup*>& groups() const { return _groups; } - set<TagSubgroup*>& groupsw(ReplSetConfig *c) { - assert(!c->_constructed); + set<TagSubgroup*>& groupsw() { return _groups; } void check() const; /* check validity, assert if not. */ @@ -114,6 +113,11 @@ namespace mongo { void saveConfigLocally(BSONObj comment); // to local db string saveConfigEverywhere(); // returns textual info on what happened + /** + * Update members' groups when the config changes but members stay the same. + */ + void updateMembers(List1<Member> &dest); + BSONObj asBson() const; bool _constructed; diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp index 814bb1d0bf8..142878ab478 100644 --- a/db/repl/rs_initialsync.cpp +++ b/db/repl/rs_initialsync.cpp @@ -75,7 +75,7 @@ namespace mongo { if( d && d->stats.nrecords == 0 ) return; // already empty, ok. - log(1) << "replSet empty oplog" << rsLog; + LOG(1) << "replSet empty oplog" << rsLog; d->emptyCappedCollection(rsoplog); } @@ -85,6 +85,7 @@ namespace mongo { // find the member with the lowest ping time that has more data than me for (Member *m = _members.head(); m; m = m->next()) { if (m->hbinfo().up() && + HeartbeatInfo::numPings > config().members.size()*2 && (m->state() == MemberState::RS_PRIMARY || (m->state() == MemberState::RS_SECONDARY && m->hbinfo().opTime > lastOpTimeWritten)) && (!closest || m->hbinfo().ping < closest->hbinfo().ping)) { diff --git a/db/repl/rs_initiate.cpp b/db/repl/rs_initiate.cpp index 5dd0ab23d24..0a796e1e445 100644 --- a/db/repl/rs_initiate.cpp +++ b/db/repl/rs_initiate.cpp @@ -150,7 +150,7 @@ namespace mongo { h << "Initiate/christen a replica set."; h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetInitiate admin command received from client" << rsLog; if( !replSet ) { diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h index 8e5a8ad9da3..d60bb5261e9 100644 --- a/db/repl/rs_member.h +++ b/db/repl/rs_member.h @@ -80,7 +80,8 @@ namespace mongo { DiagStr lastHeartbeatMsg; OpTime opTime; int skew; - unsigned int ping; // microseconds + unsigned int ping; // milliseconds + static unsigned int numPings; bool up() const { return health > 0; } diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp index 67d6cc26f07..cce5c091074 100644 --- a/db/repl/rs_rollback.cpp +++ b/db/repl/rs_rollback.cpp @@ -574,7 +574,7 @@ namespace mongo { sethbmsg("rollback 6"); // clean up oplog - log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog; + LOG(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog; // todo: fatal error if this throws? oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false); diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index 95bbe2040a6..5fe3075c0f7 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -188,6 +188,16 @@ namespace mongo { */ bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) { bool golive = false; + + { + lock lk( this ); + + if (_maintenanceMode > 0) { + // we're not actually going live + return true; + } + } + { readlock lk("local.replset.minvalid"); BSONObj mv; @@ -211,7 +221,7 @@ namespace mongo { BSONObj remoteOldestOp = r.findOne(rsoplog, Query()); OpTime ts = remoteOldestOp["ts"]._opTime(); DEV log() << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; - else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; + else LOG(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; DEV { log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog; log() << "replSet our state: " << state().toString() << rsLog; @@ -251,7 +261,7 @@ namespace mongo { assert(r.conn() == 0); if( !r.connect(hn) ) { - log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog; + LOG(2) << "replSet can't connect to " << hn << " to read operations" << rsLog; r.resetConnection(); return false; } @@ -407,7 +417,7 @@ namespace mongo { } - { + try { writelock lk(""); /* if we have become primary, we dont' want to apply things from elsewhere @@ -421,11 +431,16 @@ namespace mongo { syncApply(o); _logOpObjRS(o); // with repl sets we write the ops to our oplog too } + catch (DBException& e) { + sethbmsg(str::stream() << "syncTail: " << e.toString() << ", syncing: " << o); + sleepsecs(30); + return; + } } } r.tailCheck(); if( !r.haveCursor() ) { - log(1) << "replSet end syncTail pass with " << hn << rsLog; + LOG(1) << "replSet end syncTail pass with " << hn << rsLog; // TODO : reuse our connection to the primary. return; } @@ -475,9 +490,7 @@ namespace mongo { _syncThread(); } catch(DBException& e) { - sethbmsg(str::stream() << "syncThread: " << e.toString() << - ", try 'use local; db.oplog.rs.findOne({ts : {$gt : new Timestamp(" << - lastOpTimeWritten.getSecs() << "000," << lastOpTimeWritten.getInc() << ")}});' on the primary"); + sethbmsg(str::stream() << "syncThread: " << e.toString()); sleepsecs(10); } catch(...) { @@ -580,7 +593,7 @@ namespace mongo { // the target might end up with a new Member, but s.slave never // changes so we'll compare the names || target == slave->slave || target->fullName() == slave->slave->fullName()) { - log(1) << "replica set ghost target no good" << endl; + LOG(1) << "replica set ghost target no good" << endl; return; } @@ -593,8 +606,7 @@ namespace mongo { slave->reader.ghostQueryGTE(rsoplog, last); } - log(1) << "last: " << slave->last.toString() << " to " << last.toString() << rsLog; - + LOG(1) << "replSet last: " << slave->last.toString() << " to " << last.toString() << rsLog; if (slave->last > last) { return; } @@ -608,11 +620,11 @@ namespace mongo { BSONObj o = slave->reader.nextSafe(); slave->last = o["ts"]._opTime(); } - log(2) << "now last is " << slave->last.toString() << rsLog; + LOG(2) << "now last is " << slave->last.toString() << rsLog; } catch (DBException& e) { // we'll be back - log(2) << "replSet ghost sync error: " << e.what() << " for " + LOG(2) << "replSet ghost sync error: " << e.what() << " for " << slave->slave->fullName() << rsLog; slave->reader.resetConnection(); } diff --git a/db/scanandorder.cpp b/db/scanandorder.cpp new file mode 100644 index 00000000000..efa9c8d7f13 --- /dev/null +++ b/db/scanandorder.cpp @@ -0,0 +1,93 @@ +/* scanandorder.cpp + Order results (that aren't already indexes and in order.) +*/ + +/** + * Copyright (C) 2008 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "pch.h" +#include "scanandorder.h" + +namespace mongo { + + const unsigned ScanAndOrder::MaxScanAndOrderBytes = 32 * 1024 * 1024; + + void ScanAndOrder::_add(BSONObj& k, BSONObj o, DiskLoc* loc) { + if (!loc) { + _best.insert(make_pair(k.getOwned(),o.getOwned())); + } + else { + BSONObjBuilder b; + b.appendElements(o); + b.append("$diskLoc", loc->toBSONObj()); + _best.insert(make_pair(k.getOwned(), b.obj().getOwned())); + } + } + + void ScanAndOrder::_addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) { + /* todo : we don't correct _approxSize here. */ + const BSONObj& worstBestKey = i->first; + int c = worstBestKey.woCompare(k, _order._spec.keyPattern); + if ( c > 0 ) { + // k is better, 'upgrade' + _best.erase(i); + _add(k, o, loc); + } + } + + + void ScanAndOrder::add(BSONObj o, DiskLoc* loc) { + assert( o.isValid() ); + BSONObj k = _order.getKeyFromObject(o); + if ( k.isEmpty() ) { + return; + } + if ( (int) _best.size() < _limit ) { + _approxSize += k.objsize(); + _approxSize += o.objsize(); + + /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */ + uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < MaxScanAndOrderBytes ); + + _add(k, o, loc); + return; + } + BestMap::iterator i; + assert( _best.end() != _best.begin() ); + i = _best.end(); + i--; + _addIfBetter(k, o, i, loc); + } + + + void ScanAndOrder::fill(BufBuilder& b, Projection *filter, int& nout ) const { + int n = 0; + int nFilled = 0; + for ( BestMap::const_iterator i = _best.begin(); i != _best.end(); i++ ) { + n++; + if ( n <= _startFrom ) + continue; + const BSONObj& o = i->second; + fillQueryResultFromObj(b, filter, o); + nFilled++; + if ( nFilled >= _limit ) + break; + uassert( 10129 , "too much data for sort() with no index", b.len() < (int)MaxScanAndOrderBytes ); // appserver limit + } + nout = nFilled; + } + +} // namespace mongo diff --git a/db/scanandorder.h b/db/scanandorder.h index 2957ae60245..33e76f61f67 100644 --- a/db/scanandorder.h +++ b/db/scanandorder.h @@ -22,6 +22,7 @@ #include "indexkey.h" #include "queryutil.h" +#include "projection.h" namespace mongo { @@ -76,30 +77,9 @@ namespace mongo { typedef multimap<BSONObj,BSONObj,BSONObjCmp> BestMap; class ScanAndOrder { - void _add(BSONObj& k, BSONObj o, DiskLoc* loc) { - if (!loc) { - _best.insert(make_pair(k.getOwned(),o.getOwned())); - } - else { - BSONObjBuilder b; - b.appendElements(o); - b.append("$diskLoc", loc->toBSONObj()); - _best.insert(make_pair(k.getOwned(), b.obj().getOwned())); - } - } - - void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) { - /* todo : we don't correct _approxSize here. */ - const BSONObj& worstBestKey = i->first; - int c = worstBestKey.woCompare(k, _order._spec.keyPattern); - if ( c > 0 ) { - // k is better, 'upgrade' - _best.erase(i); - _add(k, o, loc); - } - } - public: + static const unsigned MaxScanAndOrderBytes; + ScanAndOrder(int startFrom, int limit, BSONObj order, const FieldRangeSet &frs) : _best( BSONObjCmp( order ) ), _startFrom(startFrom), _order(order, frs) { @@ -107,60 +87,25 @@ namespace mongo { _approxSize = 0; } - int size() const { - return _best.size(); - } - - void add(BSONObj o, DiskLoc* loc) { - assert( o.isValid() ); - BSONObj k = _order.getKeyFromObject(o); - if ( k.isEmpty() ) { - return; - } - if ( (int) _best.size() < _limit ) { - _approxSize += k.objsize(); - _approxSize += o.objsize(); - - /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */ - uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < 32 * 1024 * 1024 ); - - _add(k, o, loc); - return; - } - BestMap::iterator i; - assert( _best.end() != _best.begin() ); - i = _best.end(); - i--; - _addIfBetter(k, o, i, loc); - } + int size() const { return _best.size(); } - void _fill(BufBuilder& b, Projection *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) { - int n = 0; - int nFilled = 0; - for ( BestMap::iterator i = begin; i != end; i++ ) { - n++; - if ( n <= _startFrom ) - continue; - BSONObj& o = i->second; - fillQueryResultFromObj(b, filter, o); - nFilled++; - if ( nFilled >= _limit ) - break; - uassert( 10129 , "too much data for sort() with no index", b.len() < 4000000 ); // appserver limit - } - nout = nFilled; - } + void add(BSONObj o, DiskLoc* loc); /* scanning complete. stick the query result in b for n objects. */ - void fill(BufBuilder& b, Projection *filter, int& nout) { - _fill(b, filter, nout, _best.begin(), _best.end()); - } - + void fill(BufBuilder& b, Projection *filter, int& nout ) const; + + private: + + void _add(BSONObj& k, BSONObj o, DiskLoc* loc); + + void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc); + BestMap _best; // key -> full object int _startFrom; int _limit; // max to send back. KeyType _order; unsigned _approxSize; + }; } // namespace mongo diff --git a/db/security.cpp b/db/security.cpp index 4a6f32600aa..b57326a8233 100644 --- a/db/security.cpp +++ b/db/security.cpp @@ -30,7 +30,7 @@ namespace mongo { bool AuthenticationInfo::_warned = false; - + /* void AuthenticationInfo::print() const { cout << "AuthenticationInfo: " << this << '\n'; for ( MA::const_iterator i=_dbs.begin(); i!=_dbs.end(); i++ ) { @@ -38,7 +38,7 @@ namespace mongo { } cout << "END" << endl; } - + */ string AuthenticationInfo::getUser( const string& dbname ) const { scoped_spinlock lk(_lock); @@ -78,9 +78,9 @@ namespace mongo { pwd = internalSecurity.pwd; } else { - static BSONObj userPattern = fromjson("{\"user\":1}"); + // static BSONObj userPattern = fromjson("{\"user\":1}"); string systemUsers = dbname + ".system.users"; - OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1"); + // OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1"); { BSONObjBuilder b; b << "user" << user; @@ -107,7 +107,7 @@ namespace mongo { } } - bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { AuthenticationInfo *ai = cc().getAuthenticationInfo(); ai->logout(dbname); return true; diff --git a/db/security.h b/db/security.h index 2937ef29f80..2937ef29f80 100644..100755 --- a/db/security.h +++ b/db/security.h diff --git a/db/security_commands.cpp b/db/security_commands.cpp index 16face7fc32..2db96802404 100644 --- a/db/security_commands.cpp +++ b/db/security_commands.cpp @@ -56,7 +56,7 @@ namespace mongo { void help(stringstream& h) const { h << "internal"; } virtual LockType locktype() const { return NONE; } CmdGetNonce() : Command("getnonce") {} - bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { nonce64 *n = new nonce64(Security::getNonce()); stringstream ss; ss << hex << *n; @@ -68,7 +68,7 @@ namespace mongo { CmdLogout cmdLogout; - bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << " authenticate: " << cmdObj << endl; string user = cmdObj.getStringField("user"); diff --git a/db/security_common.h b/db/security_common.h index 3af70cc7b97..2f2565f3ce0 100644 --- a/db/security_common.h +++ b/db/security_common.h @@ -57,10 +57,10 @@ namespace mongo { virtual bool slaveOk() const { return true; } - virtual LockType locktype() const { return WRITE; } + virtual LockType locktype() const { return READ; } virtual void help(stringstream& ss) const { ss << "internal"; } CmdAuthenticate() : Command("authenticate") {} - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl); + bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); private: bool getUserObj(const string& dbname, const string& user, BSONObj& userObj, string& pwd); void authenticate(const string& dbname, const string& user, const bool readOnly); @@ -77,7 +77,7 @@ namespace mongo { void help(stringstream& h) const { h << "de-authenticate"; } virtual LockType locktype() const { return NONE; } CmdLogout() : Command("logout") {} - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl); + bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); }; } // namespace mongo diff --git a/db/stats/top.cpp b/db/stats/top.cpp index 51a270c8c8c..f5b6ee42f1c 100644 --- a/db/stats/top.cpp +++ b/db/stats/top.cpp @@ -156,7 +156,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } virtual void help( stringstream& help ) const { help << "usage by collection, in micros "; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { { BSONObjBuilder b( result.subobjStart( "totals" ) ); b.append( "note" , "all times in microseconds" ); |