summaryrefslogtreecommitdiff
path: root/db
diff options
context:
space:
mode:
Diffstat (limited to 'db')
-rw-r--r--db/btree.cpp2
-rw-r--r--db/btree.h2
-rw-r--r--db/btreecursor.cpp27
-rw-r--r--db/client.cpp18
-rw-r--r--db/clientcursor.cpp21
-rw-r--r--db/clientcursor.h3
-rw-r--r--db/cloner.cpp12
-rw-r--r--db/cmdline.cpp61
-rw-r--r--db/cmdline.h35
-rw-r--r--db/commands.h12
-rw-r--r--db/commands/distinct.cpp2
-rw-r--r--db/commands/find_and_modify.cpp2
-rw-r--r--db/commands/group.cpp19
-rw-r--r--db/commands/isself.cpp2
-rw-r--r--db/commands/mr.cpp28
-rw-r--r--db/compact.cpp3
-rw-r--r--db/curop.h15
-rw-r--r--db/cursor.h2
-rw-r--r--db/database.cpp21
-rw-r--r--db/db.cpp26
-rwxr-xr-xdb/db.vcxproj21
-rwxr-xr-xdb/db.vcxproj.filters23
-rw-r--r--db/dbcommands.cpp118
-rw-r--r--db/dbcommands_admin.cpp8
-rw-r--r--db/dbcommands_generic.cpp44
-rw-r--r--db/dbeval.cpp2
-rw-r--r--db/dbmessage.h4
-rw-r--r--db/dbwebserver.cpp5
-rw-r--r--db/driverHelpers.cpp2
-rw-r--r--db/dur.cpp93
-rw-r--r--db/dur_journal.cpp103
-rw-r--r--db/dur_journal.h11
-rw-r--r--db/dur_journalformat.h25
-rw-r--r--db/dur_journalimpl.h12
-rw-r--r--db/dur_preplogbuffer.cpp61
-rw-r--r--db/dur_recover.cpp172
-rw-r--r--db/dur_recover.h9
-rw-r--r--db/dur_stats.h1
-rw-r--r--db/dur_writetodatafiles.cpp10
-rw-r--r--db/durop.h2
-rw-r--r--db/geo/2d.cpp1340
-rw-r--r--db/geo/core.h9
-rw-r--r--db/geo/haystack.cpp2
-rw-r--r--db/index.cpp5
-rw-r--r--db/index.h10
-rw-r--r--db/indexkey.cpp379
-rw-r--r--db/indexkey.h13
-rw-r--r--db/instance.cpp2
-rw-r--r--db/instance.h2
-rw-r--r--db/introspect.cpp22
-rw-r--r--db/jsobj.cpp22
-rw-r--r--db/key.cpp20
-rw-r--r--db/matcher.cpp13
-rw-r--r--db/modules/mms.cpp2
-rw-r--r--db/mongommf.h2
-rw-r--r--db/namespace.cpp13
-rw-r--r--db/namespace.h1
-rw-r--r--db/oplog.cpp32
-rw-r--r--db/oplog.h1
-rw-r--r--db/ops/query.cpp16
-rw-r--r--db/ops/update.cpp49
-rw-r--r--db/pdfile.cpp47
-rw-r--r--db/pdfile.h2
-rw-r--r--db/queryoptimizer.cpp107
-rw-r--r--db/queryoptimizer.h16
-rw-r--r--db/queryutil-inl.h19
-rw-r--r--db/queryutil.cpp159
-rw-r--r--db/queryutil.h4
-rw-r--r--db/record.cpp21
-rw-r--r--db/repl.cpp10
-rw-r--r--db/repl/consensus.cpp11
-rw-r--r--db/repl/heartbeat.cpp18
-rw-r--r--db/repl/replset_commands.cpp34
-rw-r--r--db/repl/rs.cpp63
-rw-r--r--db/repl/rs.h62
-rw-r--r--db/repl/rs_config.cpp115
-rw-r--r--db/repl/rs_config.h12
-rw-r--r--db/repl/rs_initialsync.cpp3
-rw-r--r--db/repl/rs_initiate.cpp2
-rw-r--r--db/repl/rs_member.h3
-rw-r--r--db/repl/rs_rollback.cpp2
-rw-r--r--db/repl/rs_sync.cpp36
-rw-r--r--db/scanandorder.cpp93
-rw-r--r--db/scanandorder.h83
-rw-r--r--db/security.cpp10
-rwxr-xr-x[-rw-r--r--]db/security.h0
-rw-r--r--db/security_commands.cpp4
-rw-r--r--db/security_common.h6
-rw-r--r--db/stats/top.cpp2
89 files changed, 2567 insertions, 1371 deletions
diff --git a/db/btree.cpp b/db/btree.cpp
index 232ac615470..e4753bef696 100644
--- a/db/btree.cpp
+++ b/db/btree.cpp
@@ -44,7 +44,7 @@ namespace mongo {
}
/** data check. like assert, but gives a reasonable error message to the user. */
-#define check(expr) _IF(!(expr)) { checkFailed(__LINE__); }
+#define check(expr) if(!(expr) ) { checkFailed(__LINE__); }
#define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this );
diff --git a/db/btree.h b/db/btree.h
index 2e47d69a221..9ffa54cddc0 100644
--- a/db/btree.h
+++ b/db/btree.h
@@ -1071,7 +1071,7 @@ namespace mongo {
* Our btrees may (rarely) have "unused" keys when items are deleted.
* Skip past them.
*/
- virtual bool skipUnusedKeys( bool mayJump ) = 0;
+ virtual bool skipUnusedKeys() = 0;
bool skipOutOfRangeKeysAndCheckEnd();
void skipAndCheck();
diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp
index cd145ef861f..f39d5bb0535 100644
--- a/db/btreecursor.cpp
+++ b/db/btreecursor.cpp
@@ -68,7 +68,7 @@ namespace mongo {
return !currKeyNode().prevChildBucket.isNull();
}
- bool skipUnusedKeys( bool mayJump ) {
+ bool skipUnusedKeys() {
int u = 0;
while ( 1 ) {
if ( !ok() )
@@ -80,9 +80,6 @@ namespace mongo {
u++;
//don't include unused keys in nscanned
//++_nscanned;
- if ( mayJump && ( u % 10 == 0 ) ) {
- skipOutOfRangeKeysAndCheckEnd();
- }
}
if ( u > 10 )
OCCASIONALLY log() << "btree unused skipped:" << u << '\n';
@@ -114,13 +111,13 @@ namespace mongo {
while( 1 ) {
// if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) &&
// b->k(keyOfs).recordLoc == locAtKeyOfs ) {
- if ( keyAt(keyOfs).shallowEqual(keyAtKeyOfs) ) {
+ if ( keyAt(keyOfs).binaryEqual(keyAtKeyOfs) ) {
const _KeyNode& kn = keyNode(keyOfs);
if( kn.recordLoc == locAtKeyOfs ) {
if ( !kn.isUsed() ) {
// we were deleted but still exist as an unused
// marker key. advance.
- skipUnusedKeys( false );
+ skipUnusedKeys();
}
return;
}
@@ -149,7 +146,7 @@ namespace mongo {
bucket = _locate(keyAtKeyOfs, locAtKeyOfs);
RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl;
if ( ! bucket.isNull() )
- skipUnusedKeys( false );
+ skipUnusedKeys();
}
@@ -329,18 +326,24 @@ namespace mongo {
if ( ok() ) {
_nscanned = 1;
}
- skipUnusedKeys( false );
+ skipUnusedKeys();
checkEnd();
}
void BtreeCursor::skipAndCheck() {
- skipUnusedKeys( true );
+ int startNscanned = _nscanned;
+ skipUnusedKeys();
while( 1 ) {
if ( !skipOutOfRangeKeysAndCheckEnd() ) {
break;
}
- while( skipOutOfRangeKeysAndCheckEnd() );
- if ( !skipUnusedKeys( true ) ) {
+ do {
+ if ( _nscanned > startNscanned + 20 ) {
+ skipUnusedKeys();
+ return;
+ }
+ } while( skipOutOfRangeKeysAndCheckEnd() );
+ if ( !skipUnusedKeys() ) {
break;
}
}
@@ -395,7 +398,7 @@ namespace mongo {
bucket = _advance(bucket, keyOfs, _direction, "BtreeCursor::advance");
if ( !_independentFieldRanges ) {
- skipUnusedKeys( false );
+ skipUnusedKeys();
checkEnd();
if ( ok() ) {
++_nscanned;
diff --git a/db/client.cpp b/db/client.cpp
index be5dba9ae56..bf3aead75a6 100644
--- a/db/client.cpp
+++ b/db/client.cpp
@@ -122,10 +122,13 @@ namespace mongo {
error() << "Client::shutdown not called: " << _desc << endl;
}
- scoped_lock bl(clientsMutex);
- if ( ! _shutdown )
- clients.erase(this);
- delete _curOp;
+ if ( ! inShutdown() ) {
+ // we can't clean up safely once we're in shutdown
+ scoped_lock bl(clientsMutex);
+ if ( ! _shutdown )
+ clients.erase(this);
+ delete _curOp;
+ }
}
bool Client::shutdown() {
@@ -469,7 +472,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return false; }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
Client& c = cc();
c.gotHandshake( cmdObj );
return 1;
@@ -688,11 +691,14 @@ namespace mongo {
#define OPDEBUG_APPEND_NUMBER(x) if( x ) b.append( #x , (x) )
#define OPDEBUG_APPEND_BOOL(x) if( x ) b.appendBool( #x , (x) )
- void OpDebug::append( BSONObjBuilder& b ) const {
+ void OpDebug::append( const CurOp& curop, BSONObjBuilder& b ) const {
b.append( "op" , iscommand ? "command" : opToString( op ) );
b.append( "ns" , ns.toString() );
if ( ! query.isEmpty() )
b.append( iscommand ? "command" : "query" , query );
+ else if ( ! iscommand && curop.haveQuery() )
+ curop.appendQuery( b , "query" );
+
if ( ! updateobj.isEmpty() )
b.append( "updateobj" , updateobj );
diff --git a/db/clientcursor.cpp b/db/clientcursor.cpp
index 615616e7a7c..e803afd459c 100644
--- a/db/clientcursor.cpp
+++ b/db/clientcursor.cpp
@@ -447,16 +447,29 @@ namespace mongo {
return rec;
}
- bool ClientCursor::yieldSometimes( RecordNeeds need ) {
+ bool ClientCursor::yieldSometimes( RecordNeeds need, bool *yielded ) {
+ if ( yielded ) {
+ *yielded = false;
+ }
if ( ! _yieldSometimesTracker.ping() ) {
Record* rec = _recordForYield( need );
- if ( rec )
+ if ( rec ) {
+ if ( yielded ) {
+ *yielded = true;
+ }
return yield( yieldSuggest() , rec );
+ }
return true;
}
int micros = yieldSuggest();
- return ( micros > 0 ) ? yield( micros , _recordForYield( need ) ) : true;
+ if ( micros > 0 ) {
+ if ( yielded ) {
+ *yielded = true;
+ }
+ return yield( micros , _recordForYield( need ) );
+ }
+ return true;
}
void ClientCursor::staticYield( int micros , const StringData& ns , Record * rec ) {
@@ -616,7 +629,7 @@ namespace mongo {
help << " example: { cursorInfo : 1 }";
}
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
ClientCursor::appendStats( result );
return true;
}
diff --git a/db/clientcursor.h b/db/clientcursor.h
index b3bd996c768..75c7da85cc6 100644
--- a/db/clientcursor.h
+++ b/db/clientcursor.h
@@ -186,9 +186,10 @@ namespace mongo {
/**
* @param needRecord whether or not the next record has to be read from disk for sure
* if this is true, will yield of next record isn't in memory
+ * @param yielded true if a yield occurred, and potentially if a yield did not occur
* @return same as yield()
*/
- bool yieldSometimes( RecordNeeds need );
+ bool yieldSometimes( RecordNeeds need, bool *yielded = 0 );
static int yieldSuggest();
static void staticYield( int micros , const StringData& ns , Record * rec );
diff --git a/db/cloner.cpp b/db/cloner.cpp
index 2a46ea22cb4..8956133daa3 100644
--- a/db/cloner.cpp
+++ b/db/cloner.cpp
@@ -460,7 +460,7 @@ namespace mongo {
help << "{ clone : \"host13\" }";
}
CmdClone() : Command("clone") { }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string from = cmdObj.getStringField("clone");
if ( from.empty() )
return false;
@@ -486,7 +486,7 @@ namespace mongo {
"Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there."
;
}
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string fromhost = cmdObj.getStringField("from");
if ( fromhost.empty() ) {
errmsg = "missing 'from' parameter";
@@ -538,7 +538,7 @@ namespace mongo {
help << "get a nonce for subsequent copy db request from secure server\n";
help << "usage: {copydbgetnonce: 1, fromhost: <hostname>}";
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string fromhost = cmdObj.getStringField("fromhost");
if ( fromhost.empty() ) {
/* copy from self */
@@ -579,7 +579,7 @@ namespace mongo {
help << "copy a database from another host to this host\n";
help << "usage: {copydb: 1, fromhost: <hostname>, fromdb: <db>, todb: <db>[, slaveOk: <bool>, username: <username>, nonce: <nonce>, key: <key>]}";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
bool slaveOk = cmdObj["slaveOk"].trueValue();
string fromhost = cmdObj.getStringField("fromhost");
if ( fromhost.empty() ) {
@@ -633,7 +633,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << " example: { renameCollection: foo.a, to: bar.b }";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string source = cmdObj.getStringField( name.c_str() );
string target = cmdObj.getStringField( "to" );
if ( source.empty() || target.empty() ) {
@@ -671,6 +671,8 @@ namespace mongo {
nsToDatabase( target.c_str(), to );
if ( strcmp( from, to ) == 0 ) {
renameNamespace( source.c_str(), target.c_str() );
+ // make sure we drop counters etc
+ Top::global.collectionDropped( source );
return true;
}
}
diff --git a/db/cmdline.cpp b/db/cmdline.cpp
index d0b80455ff2..06880c98829 100644
--- a/db/cmdline.cpp
+++ b/db/cmdline.cpp
@@ -19,6 +19,7 @@
#include "pch.h"
#include "cmdline.h"
#include "commands.h"
+#include "../util/password.h"
#include "../util/processinfo.h"
#include "../util/net/listen.h"
#include "security_common.h"
@@ -27,6 +28,8 @@
#include <direct.h>
#endif
+#define MAX_LINE_LENGTH 256
+
namespace po = boost::program_options;
namespace fs = boost::filesystem;
@@ -64,6 +67,14 @@ namespace mongo {
("fork" , "fork server process" )
#endif
;
+
+ hidden.add_options()
+#ifdef MONGO_SSL
+ ("sslOnNormalPorts" , "use ssl on configured ports" )
+ ("sslPEMKeyFile" , po::value<string>(&cmdLine.sslPEMKeyFile), "PEM file for ssl" )
+ ("sslPEMKeyPassword" , new PasswordValue(&cmdLine.sslPEMKeyPassword) , "PEM file password" )
+#endif
+ ;
}
@@ -85,6 +96,32 @@ namespace mongo {
}
#endif
+ void CmdLine::parseConfigFile( istream &f, stringstream &ss ) {
+ string s;
+ char line[MAX_LINE_LENGTH];
+
+ while ( f ) {
+ f.getline(line, MAX_LINE_LENGTH);
+ s = line;
+ std::remove(s.begin(), s.end(), ' ');
+ std::remove(s.begin(), s.end(), '\t');
+ boost::to_upper(s);
+
+ if ( s.find( "FASTSYNC" ) != string::npos )
+ cout << "warning \"fastsync\" should not be put in your configuration file" << endl;
+
+ if ( s.c_str()[0] == '#' ) {
+ // skipping commented line
+ } else if ( s.find( "=FALSE" ) == string::npos ) {
+ ss << line << endl;
+ } else {
+ cout << "warning: remove or comment out this line by starting it with \'#\', skipping now : " << line << endl;
+ }
+ }
+ return;
+ }
+
+
bool CmdLine::store( int argc , char ** argv ,
boost::program_options::options_description& visible,
@@ -141,7 +178,9 @@ namespace mongo {
return false;
}
- po::store( po::parse_config_file( f , all ) , params );
+ stringstream ss;
+ CmdLine::parseConfigFile( f, ss );
+ po::store( po::parse_config_file( ss , all ) , params );
f.close();
}
@@ -287,7 +326,25 @@ namespace mongo {
noauth = false;
}
+#ifdef MONGO_SSL
+ if (params.count("sslOnNormalPorts") ) {
+ cmdLine.sslOnNormalPorts = true;
+ if ( cmdLine.sslPEMKeyPassword.size() == 0 ) {
+ log() << "need sslPEMKeyPassword" << endl;
+ dbexit(EXIT_BADOPTIONS);
+ }
+
+ if ( cmdLine.sslPEMKeyFile.size() == 0 ) {
+ log() << "need sslPEMKeyFile" << endl;
+ dbexit(EXIT_BADOPTIONS);
+ }
+
+ cmdLine.sslServerManager = new SSLManager( false );
+ cmdLine.sslServerManager->setupPEM( cmdLine.sslPEMKeyFile , cmdLine.sslPEMKeyPassword );
+ }
+#endif
+
{
BSONObjBuilder b;
for (po::variables_map::const_iterator it(params.begin()), end(params.end()); it != end; it++){
@@ -354,7 +411,7 @@ namespace mongo {
virtual bool adminOnly() const { return true; }
virtual bool slaveOk() const { return true; }
- virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
result.append("argv", argvArray);
result.append("parsed", parsedOpts);
return true;
diff --git a/db/cmdline.h b/db/cmdline.h
index 7b6d12a2e04..60eb668a735 100644
--- a/db/cmdline.h
+++ b/db/cmdline.h
@@ -21,6 +21,12 @@
namespace mongo {
+#ifdef MONGO_SSL
+ class SSLManager;
+#endif
+
+
+
/* command line options
*/
/* concurrency: OK/READ */
@@ -63,6 +69,7 @@ namespace mongo {
bool quiet; // --quiet
bool noTableScan; // --notablescan no table scans allowed
bool prealloc; // --noprealloc no preallocation of data files
+ bool preallocj; // --nopreallocj no preallocation of journal files
bool smallfiles; // --smallfiles allocate smaller data files
bool configsvr; // --configsvr
@@ -71,7 +78,8 @@ namespace mongo {
int quotaFiles; // --quotaFiles
bool cpu; // --cpu show cpu time periodically
- bool dur; // --dur durability (now --journal)
+ bool dur; // --dur durability (now --journal)
+ unsigned journalCommitInterval; // group/batch commit interval ms
/** --durOptions 7 dump journal and terminate without doing anything further
--durOptions 4 recover and terminate without listening
@@ -99,6 +107,14 @@ namespace mongo {
bool noUnixSocket; // --nounixsocket
string socket; // UNIX domain socket directory
+#ifdef MONGO_SSL
+ bool sslOnNormalPorts; // --sslOnNormalPorts
+ string sslPEMKeyFile; // --sslPEMKeyFile
+ string sslPEMKeyPassword; // --sslPEMKeyPassword
+
+ SSLManager* sslServerManager; // currently leaks on close
+#endif
+
static void addGlobalOptions( boost::program_options::options_description& general ,
boost::program_options::options_description& hidden );
@@ -106,6 +122,7 @@ namespace mongo {
boost::program_options::options_description& hidden );
+ static void parseConfigFile( istream &f, stringstream &ss);
/**
* @return true if should run program, false if should exit
*/
@@ -116,18 +133,28 @@ namespace mongo {
boost::program_options::variables_map &output );
};
+ // todo move to cmdline.cpp?
inline CmdLine::CmdLine() :
- port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), smallfiles(sizeof(int*) == 4),
+ port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), preallocj(true), smallfiles(sizeof(int*) == 4),
configsvr(false),
quota(false), quotaFiles(8), cpu(false), durOptions(0), objcheck(false), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true ),
syncdelay(60), noUnixSocket(false), socket("/tmp")
{
- // default may change for this later.
+ journalCommitInterval = 0; // 0 means use default
+ dur = false;
#if defined(_DURABLEDEFAULTON)
dur = true;
-#else
+#endif
+ if( sizeof(void*) == 8 )
+ dur = true;
+#if defined(_DURABLEDEFAULTOFF)
dur = false;
#endif
+
+#ifdef MONGO_SSL
+ sslOnNormalPorts = false;
+ sslServerManager = 0;
+#endif
}
extern CmdLine cmdLine;
diff --git a/db/commands.h b/db/commands.h
index 454e2277e06..c18621828f2 100644
--- a/db/commands.h
+++ b/db/commands.h
@@ -20,6 +20,7 @@
#include "../pch.h"
#include "jsobj.h"
#include "../util/timer.h"
+#include "../client/dbclient.h"
namespace mongo {
@@ -45,7 +46,7 @@ namespace mongo {
return value is true if succeeded. if false, set errmsg text.
*/
- virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0;
+ virtual bool run(const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) = 0;
/*
note: logTheTop() MUST be false if READ
@@ -94,6 +95,11 @@ namespace mongo {
*/
virtual bool requiresAuth() { return true; }
+ /* Return true if a replica set secondary should go into "recovering"
+ (unreadable) state while running this command.
+ */
+ virtual bool maintenanceMode() const { return false; }
+
/** @param webUI expose the command in the web ui as localhost:28017/<name>
@param oldName an optional old, deprecated name for the command
*/
@@ -120,7 +126,7 @@ namespace mongo {
static const map<string,Command*>* commandsByBestName() { return _commandsByBestName; }
static const map<string,Command*>* webCommands() { return _webCommands; }
/** @return if command was found and executed */
- static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder);
+ static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, int queryOptions = 0);
static LockType locktype( const string& name );
static Command * findCommand( const string& name );
};
@@ -139,7 +145,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const;
CmdShutdown() : Command("shutdown") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl);
+ bool run(const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl);
private:
bool shutdownHelper();
};
diff --git a/db/commands/distinct.cpp b/db/commands/distinct.cpp
index 9a10e69d5a8..48f44050e49 100644
--- a/db/commands/distinct.cpp
+++ b/db/commands/distinct.cpp
@@ -32,7 +32,7 @@ namespace mongo {
help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
Timer t;
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
diff --git a/db/commands/find_and_modify.cpp b/db/commands/find_and_modify.cpp
index 2856ab3d3f1..0cf766fcf87 100644
--- a/db/commands/find_and_modify.cpp
+++ b/db/commands/find_and_modify.cpp
@@ -37,7 +37,7 @@ namespace mongo {
virtual bool logTheOp() { return false; } // the modifications will be logged directly
virtual bool slaveOk() const { return false; }
virtual LockType locktype() const { return WRITE; }
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
static DBDirectClient db;
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
diff --git a/db/commands/group.cpp b/db/commands/group.cpp
index 9d7acbdf7d4..d3e5839748c 100644
--- a/db/commands/group.cpp
+++ b/db/commands/group.cpp
@@ -20,6 +20,7 @@
#include "../instance.h"
#include "../queryoptimizer.h"
#include "../../scripting/engine.h"
+#include "../clientcursor.h"
namespace mongo {
@@ -44,7 +45,7 @@ namespace mongo {
uassert( 10042 , "return of $key has to be an object" , type == Object );
return s->getObject( "return" );
}
- return obj.extractFields( keyPattern , true );
+ return obj.extractFields( keyPattern , true ).getOwned();
}
bool group( string realdbname , const string& ns , const BSONObj& query ,
@@ -88,14 +89,27 @@ namespace mongo {
list<BSONObj> blah;
shared_ptr<Cursor> cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query);
+ ClientCursor::CleanupPointer ccPointer;
+ ccPointer.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) );
while ( cursor->ok() ) {
+
+ if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) ||
+ !cursor->ok() ) {
+ break;
+ }
+
if ( ( cursor->matcher() && !cursor->matcher()->matchesCurrent( cursor.get() ) ) ||
cursor->getsetdup( cursor->currLoc() ) ) {
cursor->advance();
continue;
}
+ if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) ||
+ !cursor->ok() ) {
+ break;
+ }
+
BSONObj obj = cursor->current();
cursor->advance();
@@ -117,6 +131,7 @@ namespace mongo {
throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() );
}
}
+ ccPointer.reset();
if (!finalize.empty()) {
s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 );
@@ -140,7 +155,7 @@ namespace mongo {
return true;
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
if ( !globalScriptEngine ) {
errmsg = "server-side JavaScript execution is disabled";
diff --git a/db/commands/isself.cpp b/db/commands/isself.cpp
index cac8380dc20..5a868de919f 100644
--- a/db/commands/isself.cpp
+++ b/db/commands/isself.cpp
@@ -130,7 +130,7 @@ namespace mongo {
help << "{ _isSelf : 1 } INTERNAL ONLY";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
init();
result.append( "id" , _id );
return true;
diff --git a/db/commands/mr.cpp b/db/commands/mr.cpp
index 75f5615b9f6..56e9770dff2 100644
--- a/db/commands/mr.cpp
+++ b/db/commands/mr.cpp
@@ -879,8 +879,6 @@ namespace mongo {
}
}
-// boost::thread_specific_ptr<State*> _tl;
-
/**
* emit that will be called by js function
*/
@@ -932,7 +930,7 @@ namespace mongo {
help << "http://www.mongodb.org/display/DOCS/MapReduce";
}
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname , BSONObj& cmd, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
Timer t;
Client::GodScope cg;
Client& client = cc();
@@ -968,12 +966,6 @@ namespace mongo {
state.init();
state.prepTempCollection();
- {
- State** s = new State*();
- s[0] = &state;
-// _tl.reset( s );
- }
-
wassert( config.limit < 0x4000000 ); // see case on next line to 32 bit unsigned
ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , state.incomingDocuments() ) );
long long mapTime = 0;
@@ -988,7 +980,9 @@ namespace mongo {
// obtain cursor on data to apply mr to, sorted
shared_ptr<Cursor> temp = NamespaceDetailsTransient::getCursor( config.ns.c_str(), config.filter, config.sort );
+ uassert( 15876, str::stream() << "could not create cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, temp.get() );
auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , config.ns.c_str() ) );
+ uassert( 15877, str::stream() << "could not create client cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, cursor.get() );
Timer mt;
// go through each doc
@@ -1065,11 +1059,19 @@ namespace mongo {
countsBuilder.appendNumber( "reduce" , state.numReduces() );
timingBuilder.append( "reduceTime" , inReduce / 1000 );
timingBuilder.append( "mode" , state.jsMode() ? "js" : "mixed" );
-
-// _tl.reset();
+ }
+ // TODO: The error handling code for queries is v. fragile,
+ // *requires* rethrow AssertionExceptions - should probably fix.
+ catch ( AssertionException& e ){
+ log() << "mr failed, removing collection" << causedBy(e) << endl;
+ throw e;
+ }
+ catch ( std::exception& e ){
+ log() << "mr failed, removing collection" << causedBy(e) << endl;
+ throw e;
}
catch ( ... ) {
- log() << "mr failed, removing collection" << endl;
+ log() << "mr failed for unknown reason, removing collection" << endl;
throw;
}
@@ -1116,7 +1118,7 @@ namespace mongo {
virtual bool slaveOverrideOk() { return true; }
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();
string postProcessCollection = cmdObj["postProcessCollection"].valuestrsafe();
bool postProcessOnly = !(postProcessCollection.empty());
diff --git a/db/compact.cpp b/db/compact.cpp
index a1197460f4f..c6e5f77ee0e 100644
--- a/db/compact.cpp
+++ b/db/compact.cpp
@@ -263,6 +263,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool adminOnly() const { return false; }
virtual bool slaveOk() const { return true; }
+ virtual bool maintenanceMode() const { return true; }
virtual bool logTheOp() { return false; }
virtual void help( stringstream& help ) const {
help << "compact collection\n"
@@ -274,7 +275,7 @@ namespace mongo {
virtual bool requiresAuth() { return true; }
CompactCmd() : Command("compact") { }
- virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string coll = cmdObj.firstElement().valuestr();
if( coll.empty() || db.empty() ) {
errmsg = "no collection name specified";
diff --git a/db/curop.h b/db/curop.h
index f261812d383..2717d78cc62 100644
--- a/db/curop.h
+++ b/db/curop.h
@@ -28,6 +28,8 @@
namespace mongo {
+ class CurOp;
+
/* lifespan is different than CurOp because of recursives with DBDirectClient */
class OpDebug {
public:
@@ -36,7 +38,7 @@ namespace mongo {
void reset();
string toString() const;
- void append( BSONObjBuilder& b ) const;
+ void append( const CurOp& curop, BSONObjBuilder& b ) const;
// -------------------
@@ -119,7 +121,7 @@ namespace mongo {
int size() const { return *_size; }
bool have() const { return size() > 0; }
- BSONObj get() {
+ BSONObj get() const {
_lock.lock();
BSONObj o;
try {
@@ -133,7 +135,7 @@ namespace mongo {
return o;
}
- void append( BSONObjBuilder& b , const StringData& name ) {
+ void append( BSONObjBuilder& b , const StringData& name ) const {
scoped_spinlock lk(_lock);
BSONObj temp = _get();
b.append( name , temp );
@@ -141,7 +143,7 @@ namespace mongo {
private:
/** you have to be locked when you call this */
- BSONObj _get() {
+ BSONObj _get() const {
int sz = size();
if ( sz == 0 )
return BSONObj();
@@ -153,7 +155,7 @@ namespace mongo {
/** you have to be locked when you call this */
void _reset( int sz ) { _size[0] = sz; }
- SpinLock _lock;
+ mutable SpinLock _lock;
int * _size;
char _buf[512];
};
@@ -168,7 +170,8 @@ namespace mongo {
bool haveQuery() const { return _query.have(); }
BSONObj query() { return _query.get(); }
-
+ void appendQuery( BSONObjBuilder& b , const StringData& name ) const { _query.append( b , name ); }
+
void ensureStarted() {
if ( _start == 0 )
_start = _checkpoint = curTimeMicros64();
diff --git a/db/cursor.h b/db/cursor.h
index ff9c9821ada..9639b2677b1 100644
--- a/db/cursor.h
+++ b/db/cursor.h
@@ -132,6 +132,8 @@ namespace mongo {
virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) {
massert( 13285, "manual matcher config not allowed", false );
}
+
+ virtual void explainDetails( BSONObjBuilder& b ) { return; }
};
// strategy object implementing direction of traversal.
diff --git a/db/database.cpp b/db/database.cpp
index 7906e9b435a..97b3fa011cb 100644
--- a/db/database.cpp
+++ b/db/database.cpp
@@ -192,22 +192,31 @@ namespace mongo {
return ret;
}
+ bool fileIndexExceedsQuota( const char *ns, int fileIndex, bool enforceQuota ) {
+ return
+ cmdLine.quota &&
+ enforceQuota &&
+ fileIndex >= cmdLine.quotaFiles &&
+ // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g.
+ // rejecting an index insert after inserting the main record.
+ !NamespaceString::special( ns ) &&
+ NamespaceString( ns ).db != "local";
+ }
+
MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) {
// check existing files
for ( int i=numFiles()-1; i>=0; i-- ) {
MongoDataFile* f = getFile( i );
if ( f->getHeader()->unusedLength >= sizeNeeded ) {
- // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g.
- // rejecting an index insert after inserting the main record.
- if( cmdLine.quota && enforceQuota && i > cmdLine.quotaFiles && !NamespaceString::special(ns) )
+ if ( fileIndexExceedsQuota( ns, i-1, enforceQuota ) ) // NOTE i-1 is the value used historically for this check.
;
else
return f;
}
}
- if( cmdLine.quota && enforceQuota && numFiles() >= cmdLine.quotaFiles && !NamespaceString::special(ns) )
+ if ( fileIndexExceedsQuota( ns, numFiles(), enforceQuota ) )
uasserted(12501, "quota exceeded");
// allocate files until we either get one big enough or hit maxSize
@@ -261,8 +270,8 @@ namespace mongo {
log() << "creating profile collection: " << profileName << endl;
BSONObjBuilder spec;
spec.appendBool( "capped", true );
- spec.append( "size", 131072.0 );
- if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , true ) ) {
+ spec.append( "size", 1024*1024 );
+ if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , false /* we don't replica profile messages */ ) ) {
return false;
}
}
diff --git a/db/db.cpp b/db/db.cpp
index 9f90b9ddd02..2d4970af044 100644
--- a/db/db.cpp
+++ b/db/db.cpp
@@ -70,7 +70,6 @@ namespace mongo {
extern string repairpath;
void setupSignals( bool inFork );
- void startReplSets(ReplSetCmdline*);
void startReplication();
void exitCleanly( ExitCode code );
@@ -216,8 +215,6 @@ namespace mongo {
void listen(int port) {
//testTheDb();
- log() << "waiting for connections on port " << port << endl;
-
MessageServer::Options options;
options.port = port;
options.ipList = cmdLine.bind_ip;
@@ -483,12 +480,6 @@ namespace mongo {
clientCursorMonitor.go();
PeriodicTask::theRunner->go();
- if( !cmdLine._replSet.empty() ) {
- replSet = true;
- ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet);
- boost::thread t( boost::bind( &startReplSets, replSetCmdline) );
- }
-
listen(listenPort);
// listen() will return when exit code closes its socket.
@@ -575,10 +566,12 @@ int main(int argc, char* argv[]) {
("directoryperdb", "each database will be stored in a separate directory")
("journal", "enable journaling")
("journalOptions", po::value<int>(), "journal diagnostic options")
+ ("journalCommitInterval", po::value<unsigned>(), "how often to group/batch commit (ms)")
("ipv6", "enable IPv6 support (disabled by default)")
("jsonp","allow JSONP access via http (has security implications)")
("noauth", "run without security")
("nohttpinterface", "disable http interface")
+ ("nojournal", "disable journaling (journaling is on by default for 64 bit)")
("noprealloc", "disable data file preallocation - will often hurt performance")
("noscripting", "disable scripting engine")
("notablescan", "do not allow table scans")
@@ -631,12 +624,11 @@ int main(int argc, char* argv[]) {
("pretouch", po::value<int>(), "n pretouch threads for applying replicationed operations")
("command", po::value< vector<string> >(), "command")
("cacheSize", po::value<long>(), "cache size (in MB) for rec store")
- // these move to unhidden later:
("nodur", "disable journaling (currently the default)")
- ("nojournal", "disable journaling (currently the default)")
// things we don't want people to use
("nocursors", "diagnostic/debugging option that turns off cursors DO NOT USE IN PRODUCTION")
("nohints", "ignore query hints")
+ ("nopreallocj", "don't preallocate journal files")
("dur", "enable journaling") // deprecated version
("durOptions", po::value<int>(), "durability diagnostic options") // deprecated version
// deprecated pairing command line options
@@ -745,6 +737,15 @@ int main(int argc, char* argv[]) {
if (params.count("durOptions")) {
cmdLine.durOptions = params["durOptions"].as<int>();
}
+ if( params.count("journalCommitInterval") ) {
+ // don't check if dur is false here as many will just use the default, and will default to off on win32.
+ // ie no point making life a little more complex by giving an error on a dev environment.
+ cmdLine.journalCommitInterval = params["journalCommitInterval"].as<unsigned>();
+ if( cmdLine.journalCommitInterval <= 1 || cmdLine.journalCommitInterval > 300 ) {
+ out() << "--journalCommitInterval out of allowed range (0-300ms)" << endl;
+ dbexit( EXIT_BADOPTIONS );
+ }
+ }
if (params.count("journalOptions")) {
cmdLine.durOptions = params["journalOptions"].as<int>();
}
@@ -761,6 +762,9 @@ int main(int argc, char* argv[]) {
if (params.count("nohints")) {
useHints = false;
}
+ if (params.count("nopreallocj")) {
+ cmdLine.preallocj = false;
+ }
if (params.count("nohttpinterface")) {
noHttpInterface = true;
}
diff --git a/db/db.vcxproj b/db/db.vcxproj
index 685015ed7f6..8f831cb8559 100755
--- a/db/db.vcxproj
+++ b/db/db.vcxproj
@@ -459,9 +459,27 @@
<ClCompile Include="..\s\shard.cpp" />
<ClCompile Include="..\s\shardconnection.cpp" />
<ClCompile Include="..\s\shardkey.cpp" />
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\alignedbuilder.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
</ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\concurrency\spin_lock.cpp" />
<ClCompile Include="..\util\concurrency\synchronization.cpp" />
<ClCompile Include="..\util\concurrency\task.cpp" />
@@ -561,6 +579,7 @@
<ClCompile Include="..\client\parallel.cpp" />
<ClCompile Include="pdfile.cpp" />
<ClCompile Include="queryoptimizer.cpp" />
+ <ClCompile Include="scanandorder.cpp" />
<ClCompile Include="security.cpp" />
<ClCompile Include="security_commands.cpp" />
<ClCompile Include="security_common.cpp" />
@@ -652,6 +671,8 @@
<ClInclude Include="..\targetver.h" />
<ClInclude Include="..\pcre-7.4\config.h" />
<ClInclude Include="..\pcre-7.4\pcre.h" />
+ <ClInclude Include="..\third_party\snappy\config.h" />
+ <ClInclude Include="..\third_party\snappy\snappy.h" />
<ClInclude Include="..\util\alignedbuilder.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
<ClInclude Include="..\util\concurrency\race.h" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters
index d9e9def86f8..36b0df1ddc2 100755
--- a/db/db.vcxproj.filters
+++ b/db/db.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\bson\oid.cpp" />
@@ -166,6 +166,16 @@
<ClCompile Include="..\util\net\message_port.cpp" />
<ClCompile Include="dbmessage.cpp" />
<ClCompile Include="commands\find_and_modify.cpp" />
+ <ClCompile Include="..\util\compress.cpp">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="scanandorder.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\dbclientcursor.h" />
@@ -315,6 +325,12 @@
<ClInclude Include="..\util\net\sock.h" />
<ClInclude Include="..\util\concurrency\rwlockimpl.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
+ <ClInclude Include="..\third_party\snappy\config.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="db.rc" />
@@ -349,4 +365,9 @@
<Library Include="..\..\js\js64d.lib" />
<Library Include="..\..\js\js64r.lib" />
</ItemGroup>
+ <ItemGroup>
+ <Filter Include="snappy">
+ <UniqueIdentifier>{bb99c086-7926-4f50-838d-f5f0c18397c0}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
</Project> \ No newline at end of file
diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp
index 73c1004d4f2..2edd7684ff8 100644
--- a/db/dbcommands.cpp
+++ b/db/dbcommands.cpp
@@ -31,6 +31,7 @@
#include "../util/lruishmap.h"
#include "../util/md5.hpp"
#include "../util/processinfo.h"
+#include "../util/ramlog.h"
#include "json.h"
#include "repl.h"
#include "repl_block.h"
@@ -53,14 +54,16 @@ namespace mongo {
namespace dur {
void setAgeOutJournalFiles(bool rotate);
}
+ /** @return true if fields found */
bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
BSONElement e = cmdObj["ageOutJournalFiles"];
if( !e.eoo() ) {
bool r = e.trueValue();
log() << "ageOutJournalFiles " << r << endl;
dur::setAgeOutJournalFiles(r);
+ return true;
}
- return true;
+ return false;
}
void flushDiagLog();
@@ -85,7 +88,7 @@ namespace mongo {
help << "reset error state (used with getpreverror)";
}
CmdResetError() : Command("resetError", false, "reseterror") {}
- bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
LastError *le = lastError.get();
assert( le );
le->reset();
@@ -116,7 +119,7 @@ namespace mongo {
<< " { w:n } - await replication to n servers (including self) before returning\n"
<< " { wtimeout:m} - timeout for w in m milliseconds";
}
- bool run(const string& dbname, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& _cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
LastError *le = lastError.disableForCommand();
bool err = false;
@@ -246,7 +249,7 @@ namespace mongo {
return true;
}
CmdGetPrevError() : Command("getPrevError", false, "getpreverror") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
LastError *le = lastError.disableForCommand();
le->appendSelf( result );
if ( le->valid )
@@ -268,14 +271,14 @@ namespace mongo {
<< "N to wait N seconds for other members to catch up.";
}
- bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
bool force = cmdObj.hasField("force") && cmdObj["force"].trueValue();
if (!force && theReplSet && theReplSet->isPrimary()) {
- int timeout, now, start;
+ long long timeout, now, start;
timeout = now = start = curTimeMicros64()/1000000;
if (cmdObj.hasField("timeoutSecs")) {
- timeout += cmdObj["timeoutSecs"].numberInt();
+ timeout += cmdObj["timeoutSecs"].numberLong();
}
OpTime lastOp = theReplSet->lastOpTimeWritten;
@@ -329,7 +332,7 @@ namespace mongo {
}
virtual LockType locktype() const { return WRITE; }
CmdDropDatabase() : Command("dropDatabase") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONElement e = cmdObj.firstElement();
log() << "dropDatabase " << dbname << endl;
int p = (int) e.number();
@@ -349,12 +352,13 @@ namespace mongo {
virtual bool slaveOk() const {
return true;
}
+ virtual bool maintenanceMode() const { return true; }
virtual void help( stringstream& help ) const {
help << "repair database. also compacts. note: slow.";
}
virtual LockType locktype() const { return WRITE; }
CmdRepairDatabase() : Command("repairDatabase") {}
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONElement e = cmdObj.firstElement();
log() << "repairDatabase " << dbname << endl;
int p = (int) e.number();
@@ -388,7 +392,7 @@ namespace mongo {
}
virtual LockType locktype() const { return WRITE; }
CmdProfile() : Command("profile") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONElement e = cmdObj.firstElement();
result.append("was", cc().database()->profile);
result.append("slowms", cmdLine.slowMS );
@@ -425,7 +429,7 @@ namespace mongo {
help << "returns lots of administrative server statistics";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
long long start = Listener::getElapsedTimeMillis();
BSONObjBuilder timeBuilder(128);
@@ -596,6 +600,21 @@ namespace mongo {
timeBuilder.appendNumber( "after dur" , Listener::getElapsedTimeMillis() - start );
+ {
+ RamLog* rl = RamLog::get( "warnings" );
+ verify(15880, rl);
+
+ if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes
+ vector<const char*> lines;
+ rl->get( lines );
+
+ BSONArrayBuilder arr( result.subarrayStart( "warnings" ) );
+ for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ )
+ arr.append( lines[i] );
+ arr.done();
+ }
+ }
+
if ( ! authed )
result.append( "note" , "run against admin for more info" );
@@ -619,7 +638,7 @@ namespace mongo {
virtual void help( stringstream& help ) const { help << "internal"; }
virtual LockType locktype() const { return NONE; }
CmdGetOpTime() : Command("getoptime") { }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
writelock l( "" );
result.appendDate("optime", OpTime::now().asDate());
return true;
@@ -648,7 +667,7 @@ namespace mongo {
}
void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Monitoring+and+Diagnostics#MonitoringandDiagnostics-DatabaseRecord%2FReplay"; }
virtual LockType locktype() const { return WRITE; }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
int was = _diaglog.setLevel( cmdObj.firstElement().numberInt() );
flushDiagLog();
if ( !cmdLine.quiet )
@@ -771,7 +790,7 @@ namespace mongo {
}
virtual void help( stringstream& help ) const { help << "drop a collection\n{drop : <collectionName>}"; }
virtual LockType locktype() const { return WRITE; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string nsToDrop = dbname + '.' + cmdObj.firstElement().valuestr();
NamespaceDetails *d = nsdetails(nsToDrop.c_str());
if ( !cmdLine.quiet )
@@ -805,7 +824,7 @@ namespace mongo {
return false;
}
virtual void help( stringstream& help ) const { help << "count objects in collection"; }
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
string err;
long long n = runCount(ns.c_str(), cmdObj, err);
@@ -844,7 +863,8 @@ namespace mongo {
help << "create a collection explicitly\n"
"{ create: <ns>[, capped: <bool>, size: <collSizeInBytes>, max: <nDocs>] }";
}
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ uassert(15888, "must pass name of collection to create", cmdObj.firstElement().valuestrsafe()[0] != '\0');
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
string err;
uassert(14832, "specify size:<n> when capped is true", !cmdObj["capped"].trueValue() || cmdObj["size"].isNumber() || cmdObj.hasField("$nExtents"));
@@ -869,7 +889,7 @@ namespace mongo {
help << "drop indexes for a collection";
}
CmdDropIndexes() : Command("dropIndexes", false, "deleteIndexes") { }
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) {
BSONElement e = jsobj.firstElement();
string toDeleteNs = dbname + '.' + e.valuestr();
NamespaceDetails *d = nsdetails(toDeleteNs.c_str());
@@ -914,7 +934,7 @@ namespace mongo {
help << "re-index a collection";
}
CmdReIndex() : Command("reIndex") { }
- bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
static DBDirectClient db;
BSONElement e = jsobj.firstElement();
@@ -969,7 +989,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const { help << "list databases on this server"; }
CmdListDatabases() : Command("listDatabases" , true ) {}
- bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
vector< string > dbNames;
getDatabaseNames( dbNames );
vector< BSONObj > dbInfos;
@@ -1038,7 +1058,7 @@ namespace mongo {
virtual LockType locktype() const { return WRITE; }
CmdCloseAllDatabases() : Command( "closeAllDatabases" ) {}
- bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
bool ok;
try {
ok = dbHolder.closeAll( dbpath , result, false );
@@ -1065,7 +1085,7 @@ namespace mongo {
help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }";
}
virtual LockType locktype() const { return READ; }
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string ns = dbname;
ns += ".";
{
@@ -1164,7 +1184,7 @@ namespace mongo {
"\nkeyPattern, min, and max parameters are optional."
"\nnote: This command may take a while to run";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
Timer timer;
string ns = jsobj.firstElement().String();
@@ -1282,7 +1302,7 @@ namespace mongo {
help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024\n"
" avgObjSize - in bytes";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string ns = dbname + "." + jsobj.firstElement().valuestr();
Client::Context cx( ns );
@@ -1351,7 +1371,7 @@ namespace mongo {
"Get stats on a database. Not instantaneous. Slower for databases with large .ns files.\n" <<
"Example: { dbStats:1, scale:1 }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
int scale = 1;
if ( jsobj["scale"].isNumber() ) {
scale = jsobj["scale"].numberInt();
@@ -1426,7 +1446,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{ cloneCollectionAsCapped:<fromName>, toCollection:<toName>, size:<sizeInBytes> }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string from = jsobj.getStringField( "cloneCollectionAsCapped" );
string to = jsobj.getStringField( "toCollection" );
long long size = (long long)jsobj.getField( "size" ).number();
@@ -1488,7 +1508,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{ convertToCapped:<fromCollectionName>, size:<sizeInBytes> }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
BackgroundOperation::assertNoBgOpInProgForDb(dbname.c_str());
string from = jsobj.getStringField( "convertToCapped" );
@@ -1544,7 +1564,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{whatsmyuri:1}";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
BSONObj info = cc().curop()->infoNoauth();
result << "you" << info[ "client" ];
return true;
@@ -1559,7 +1579,7 @@ namespace mongo {
return true;
}
virtual bool slaveOk() const {
- return false;
+ return true;
}
virtual LockType locktype() const { return WRITE; }
virtual bool requiresAuth() {
@@ -1568,7 +1588,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "internal. for testing only.";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string coll = cmdObj[ "godinsert" ].valuestrsafe();
uassert( 13049, "godinsert must specify a collection", !coll.empty() );
string ns = dbname + "." + coll;
@@ -1583,7 +1603,7 @@ namespace mongo {
DBHashCmd() : Command( "dbHash", false, "dbhash" ) {}
virtual bool slaveOk() const { return true; }
virtual LockType locktype() const { return READ; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
list<string> colls;
Database* db = cc().database();
if ( db )
@@ -1629,9 +1649,8 @@ namespace mongo {
cursor = findTableScan( c.c_str() , BSONObj() );
}
else {
- bb.done();
- errmsg = (string)"can't find _id index for: " + c;
- return 0;
+ log() << "can't find _id index for: " << c << endl;
+ continue;
}
md5_state_t st;
@@ -1677,7 +1696,7 @@ namespace mongo {
help << "w:true write lock. secs:<seconds>";
}
CmdSleep() : Command("sleep") { }
- bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
int secs = 100;
if ( cmdObj["secs"].isNumber() )
secs = cmdObj["secs"].numberInt();
@@ -1700,7 +1719,7 @@ namespace mongo {
virtual bool slaveOk() const { return false; }
virtual LockType locktype() const { return WRITE; }
virtual bool requiresAuth() { return true; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string coll = cmdObj[ "captrunc" ].valuestrsafe();
uassert( 13416, "captrunc must specify a collection", !coll.empty() );
string ns = dbname + "." + coll;
@@ -1727,7 +1746,7 @@ namespace mongo {
virtual bool slaveOk() const { return false; }
virtual LockType locktype() const { return WRITE; }
virtual bool requiresAuth() { return true; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string coll = cmdObj[ "emptycapped" ].valuestrsafe();
uassert( 13428, "emptycapped must specify a collection", !coll.empty() );
string ns = dbname + "." + coll;
@@ -1792,13 +1811,22 @@ namespace mongo {
if ( c->adminOnly() )
log( 2 ) << "command: " << cmdObj << endl;
+ if (c->maintenanceMode() && theReplSet && theReplSet->isSecondary()) {
+ theReplSet->setMaintenanceMode(true);
+ }
+
if ( c->locktype() == Command::NONE ) {
// we also trust that this won't crash
client.curop()->ensureStarted();
string errmsg;
- int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl );
+ int ok = c->run( dbname , cmdObj , queryOptions, errmsg , result , fromRepl );
if ( ! ok )
result.append( "errmsg" , errmsg );
+
+ if (c->maintenanceMode() && theReplSet) {
+ theReplSet->setMaintenanceMode(false);
+ }
+
return ok;
}
@@ -1812,11 +1840,13 @@ namespace mongo {
client.curop()->ensureStarted();
Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() );
+ bool retval = true;
+
try {
string errmsg;
- if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ) {
+ if ( ! c->run(dbname, cmdObj, queryOptions, errmsg, result, fromRepl ) ) {
result.append( "errmsg" , errmsg );
- return false;
+ retval = false;
}
}
catch ( DBException& e ) {
@@ -1824,14 +1854,18 @@ namespace mongo {
ss << "exception: " << e.what();
result.append( "errmsg" , ss.str() );
result.append( "code" , e.getCode() );
- return false;
+ retval = false;
}
- if ( c->logTheOp() && ! fromRepl ) {
+ if ( retval && c->logTheOp() && ! fromRepl ) {
logOp("c", cmdns, cmdObj);
}
- return true;
+ if (c->maintenanceMode() && theReplSet) {
+ theReplSet->setMaintenanceMode(false);
+ }
+
+ return retval;
}
diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp
index 47f6c691ab4..566027fc594 100644
--- a/db/dbcommands_admin.cpp
+++ b/db/dbcommands_admin.cpp
@@ -47,7 +47,7 @@ namespace mongo {
virtual void help(stringstream& h) const { h << "internal"; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string dropns = dbname + "." + cmdObj.firstElement().valuestrsafe();
if ( !cmdLine.quiet )
@@ -82,7 +82,7 @@ namespace mongo {
virtual bool adminOnly() const { return true; }
virtual void help(stringstream& h) const { h << "test how long to write and fsync to a test file in the journal/ directory"; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
filesystem::path p = dur::getJournalDir();
p /= "journalLatencyTest";
@@ -157,7 +157,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
//{ validate: "collectionnamewithoutthedbpart" [, scandata: <bool>] [, full: <bool> } */
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string ns = dbname + "." + cmdObj.firstElement().valuestrsafe();
NamespaceDetails * d = nsdetails( ns.c_str() );
if ( !cmdLine.quiet )
@@ -473,7 +473,7 @@ namespace mongo {
return !x.empty();
}*/
virtual void help(stringstream& h) const { h << url(); }
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
bool sync = !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately
bool lock = cmdObj["lock"].trueValue();
log() << "CMD fsync: sync:" << sync << " lock:" << lock << endl;
diff --git a/db/dbcommands_generic.cpp b/db/dbcommands_generic.cpp
index 2e025b500ea..a9e13eab741 100644
--- a/db/dbcommands_generic.cpp
+++ b/db/dbcommands_generic.cpp
@@ -79,7 +79,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "internal command facilitating running in certain cloud computing environments";
}
- bool run(const string& dbname, BSONObj& obj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& obj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
if( !obj.hasElement("servers") ) {
vector<string> ips;
obj["servers"].Obj().Vals(ips);
@@ -106,7 +106,7 @@ namespace mongo {
help << "get version #, etc.\n";
help << "{ buildinfo:1 }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo();
result << "versionArray" << versionArray;
result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 );
@@ -137,7 +137,7 @@ namespace mongo {
help << " syncdelay\n";
help << "{ getParameter:'*' } to get everything\n";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
bool all = *cmdObj.firstElement().valuestrsafe() == '*';
int before = result.len();
@@ -166,11 +166,6 @@ namespace mongo {
}
} cmdGet;
- // dev - experimental. so only in set command for now. may go away or change
- namespace dur {
- int groupCommitIntervalMs = 100;
- }
-
// tempish
bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl );
@@ -184,23 +179,24 @@ namespace mongo {
help << "set administrative option(s)\n";
help << "{ setParameter:1, <param>:<value> }\n";
help << "supported so far:\n";
- help << " notablescan\n";
+ help << " journalCommitInterval\n";
help << " logLevel\n";
+ help << " notablescan\n";
help << " quiet\n";
help << " syncdelay\n";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
int s = 0;
- setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl);
- if( cmdObj.hasElement("groupCommitIntervalMs") ) {
+ bool found = setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl);
+ if( cmdObj.hasElement("journalCommitInterval") ) {
if( !cmdLine.dur ) {
errmsg = "journaling is off";
return false;
}
- int x = (int) cmdObj["groupCommitIntervalMs"].Number();
- assert( x > 0 && x < 500 );
- dur::groupCommitIntervalMs = x;
- log() << "groupCommitIntervalMs " << x << endl;
+ int x = (int) cmdObj["journalCommitInterval"].Number();
+ assert( x > 1 && x < 500 );
+ cmdLine.journalCommitInterval = x;
+ log() << "setParameter journalCommitInterval=" << x << endl;
s++;
}
if( cmdObj.hasElement("notablescan") ) {
@@ -241,7 +237,7 @@ namespace mongo {
s++;
}
- if( s == 0 ) {
+ if( s == 0 && !found ) {
errmsg = "no option found to set, use help:true to see options ";
return false;
}
@@ -257,7 +253,7 @@ namespace mongo {
virtual void help( stringstream &help ) const { help << "a way to check that the server is alive. responds immediately even if server is in a db lock."; }
virtual LockType locktype() const { return NONE; }
virtual bool requiresAuth() { return false; }
- virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& badns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
// IMPORTANT: Don't put anything in here that might lock db - including authentication
return true;
}
@@ -270,7 +266,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
virtual bool readOnly() { return true; }
virtual LockType locktype() const { return NONE; }
- virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( globalScriptEngine ) {
BSONObjBuilder bb( result.subobjStart( "js" ) );
result.append( "utf8" , globalScriptEngine->utf8Ok() );
@@ -292,7 +288,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return true; }
- virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
rotateLogs();
return 1;
}
@@ -306,7 +302,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return false; }
- virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONObjBuilder b( result.subobjStart( "commands" ) );
for ( map<string,Command*>::iterator i=_commands->begin(); i!=_commands->end(); ++i ) {
Command * c = i->second;
@@ -361,7 +357,7 @@ namespace mongo {
}
virtual LockType locktype() const { return NONE; }
CmdForceError() : Command("forceerror") {}
- bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbnamne, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
uassert( 10038 , "forced error", false);
return true;
}
@@ -373,7 +369,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
virtual LockType locktype() const { return NONE; }
virtual bool requiresAuth() { return false; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
result << "options" << QueryOption_AllSupported;
return true;
}
@@ -393,7 +389,7 @@ namespace mongo {
help << "{ getLog : '*' } OR { getLog : 'global' }";
}
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string p = cmdObj.firstElement().String();
if ( p == "*" ) {
vector<string> names;
diff --git a/db/dbeval.cpp b/db/dbeval.cpp
index 3a53200a49f..5fe137fc3a3 100644
--- a/db/dbeval.cpp
+++ b/db/dbeval.cpp
@@ -121,7 +121,7 @@ namespace mongo {
}
virtual LockType locktype() const { return NONE; }
CmdEval() : Command("eval", false, "$eval") { }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
AuthenticationInfo *ai = cc().getAuthenticationInfo();
uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(dbname.c_str()) );
diff --git a/db/dbmessage.h b/db/dbmessage.h
index a14d4cf5142..a789bff849c 100644
--- a/db/dbmessage.h
+++ b/db/dbmessage.h
@@ -122,7 +122,7 @@ namespace mongo {
/** the 32 bit field before the ns
* track all bit usage here as its cross op
- * 0: InsertOption_KeepGoing
+ * 0: InsertOption_ContinueOnError
* 1: fromWriteback
*/
int& reservedField() { return *reserved; }
@@ -233,7 +233,7 @@ namespace mongo {
public:
enum ReservedOptions {
- Reserved_InsertOption_KeepGoing = 1 << 0 ,
+ Reserved_InsertOption_ContinueOnError = 1 << 0 ,
Reserved_FromWriteback = 1 << 1
};
};
diff --git a/db/dbwebserver.cpp b/db/dbwebserver.cpp
index 40950a8ccb3..50a59fa1267 100644
--- a/db/dbwebserver.cpp
+++ b/db/dbwebserver.cpp
@@ -61,7 +61,7 @@ namespace mongo {
class DbWebServer : public MiniWebServer {
public:
DbWebServer(const string& ip, int port, const AdminAccess* webUsers)
- : MiniWebServer(ip, port), _webUsers(webUsers) {
+ : MiniWebServer("admin web console", ip, port), _webUsers(webUsers) {
WebStatusPlugin::initAll();
}
@@ -424,7 +424,7 @@ namespace mongo {
string errmsg;
BSONObjBuilder sub;
- if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) )
+ if ( ! c->run( "admin.$cmd" , co , 0, errmsg , sub , false ) )
buf.append( cmd , errmsg );
else
buf.append( cmd , sub.obj() );
@@ -531,7 +531,6 @@ namespace mongo {
Client::initThread("websvr");
const int p = cmdLine.port + 1000;
DbWebServer mini(cmdLine.bind_ip, p, adminAccessPtr.get());
- log() << "web admin interface listening on port " << p << endl;
mini.initAndListen();
cc().shutdown();
}
diff --git a/db/driverHelpers.cpp b/db/driverHelpers.cpp
index d98a33b25c5..12aa01886c4 100644
--- a/db/driverHelpers.cpp
+++ b/db/driverHelpers.cpp
@@ -46,7 +46,7 @@ namespace mongo {
class ObjectIdTest : public BasicDriverHelper {
public:
ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ) {}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( cmdObj.firstElement().type() != jstOID ) {
errmsg = "not oid";
return false;
diff --git a/db/dur.cpp b/db/dur.cpp
index 6cb69ac5ac2..dfa36f95224 100644
--- a/db/dur.cpp
+++ b/db/dur.cpp
@@ -62,11 +62,11 @@
#include "dur_journal.h"
#include "dur_commitjob.h"
#include "dur_recover.h"
+#include "dur_stats.h"
#include "../util/concurrency/race.h"
#include "../util/mongoutils/hash.h"
#include "../util/mongoutils/str.h"
#include "../util/timer.h"
-#include "dur_stats.h"
using namespace mongoutils;
@@ -74,8 +74,9 @@ namespace mongo {
namespace dur {
- void WRITETODATAFILES();
- void PREPLOGBUFFER();
+ void PREPLOGBUFFER(JSectHeader& outParm);
+ void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed);
+ void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed);
/** declared later in this file
only used in this file -- use DurableInterface::commitNow() outside
@@ -129,6 +130,7 @@ namespace mongo {
"commits" << _commits <<
"journaledMB" << _journaledBytes / 1000000.0 <<
"writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 <<
+ "compression" << _journaledBytes / (_uncompressedBytes+1.0) <<
"commitsInWriteLock" << _commitsInWriteLock <<
"earlyCommits" << _earlyCommits <<
"timeMs" <<
@@ -143,6 +145,8 @@ namespace mongo {
b << "ageOutJournalFiles" << "mutex timeout";
if( r == 0 )
b << "ageOutJournalFiles" << false;
+ if( cmdLine.journalCommitInterval != 0 )
+ b << "journalCommitIntervalMs" << cmdLine.journalCommitInterval;
return b.obj();
}
@@ -269,6 +273,9 @@ namespace mongo {
}
bool DurableImpl::commitIfNeeded() {
+ if ( ! dbMutex.isWriteLocked() ) // we implicitly commit if needed when releasing write lock
+ return false;
+
DEV commitJob._nSinceCommitIfNeededCall = 0;
if (commitJob.bytes() > UncommittedBytesLimit) { // should this also fire if CmdLine::DurAlwaysCommit?
stats.curr->_earlyCommits++;
@@ -325,15 +332,6 @@ namespace mongo {
}
#endif
- /** write the buffer we have built to the journal and fsync it.
- outside of lock as that could be slow.
- */
- static void WRITETOJOURNAL(AlignedBuilder& ab) {
- Timer t;
- journal(ab);
- stats.curr->_writeToJournalMicros += t.micros();
- }
-
// Functor to be called over all MongoFiles
class validateSingleMapMatches {
@@ -486,6 +484,7 @@ namespace mongo {
stats.curr->_remapPrivateViewMicros += t.micros();
}
+ // lock order: dbMutex first, then this
mutex groupCommitMutex("groupCommit");
bool _groupCommitWithLimitedLocks() {
@@ -502,8 +501,8 @@ namespace mongo {
commitJob.notifyCommitted();
return true;
}
-
- PREPLOGBUFFER();
+ JSectHeader h;
+ PREPLOGBUFFER(h);
RWLockRecursive::Shared lk3(MongoFile::mmmutex);
@@ -515,16 +514,15 @@ namespace mongo {
lk1.reset();
// ****** now other threads can do writes ******
-
- WRITETOJOURNAL(commitJob._ab);
+ WRITETOJOURNAL(h, commitJob._ab);
assert( abLen == commitJob._ab.len() ); // a check that no one touched the builder while we were doing work. if so, our locking is wrong.
// data is now in the journal, which is sufficient for acknowledging getLastError.
// (ok to crash after that)
commitJob.notifyCommitted();
- WRITETODATAFILES();
- assert( abLen == commitJob._ab.len() ); // WRITETODATAFILES uses _ab also
+ WRITETODATAFILES(h, commitJob._ab);
+ assert( abLen == commitJob._ab.len() ); // check again wasn't modded
commitJob._ab.reset();
// can't : dbMutex._remapPrivateViewRequested = true;
@@ -570,18 +568,19 @@ namespace mongo {
// (and we are only read locked in the dbMutex, so it could happen)
scoped_lock lk(groupCommitMutex);
- PREPLOGBUFFER();
+ JSectHeader h;
+ PREPLOGBUFFER(h);
// todo : write to the journal outside locks, as this write can be slow.
// however, be careful then about remapprivateview as that cannot be done
// if new writes are then pending in the private maps.
- WRITETOJOURNAL(commitJob._ab);
+ WRITETOJOURNAL(h, commitJob._ab);
// data is now in the journal, which is sufficient for acknowledging getLastError.
// (ok to crash after that)
commitJob.notifyCommitted();
- WRITETODATAFILES();
+ WRITETODATAFILES(h, commitJob._ab);
debugValidateAllMapsMatch();
commitJob.reset();
@@ -613,6 +612,7 @@ namespace mongo {
}
/** locking: in read lock when called
+ or, for early commits (commitIfNeeded), in write lock
@see MongoMMF::close()
*/
static void groupCommit() {
@@ -686,29 +686,41 @@ namespace mongo {
}
extern int groupCommitIntervalMs;
+ filesystem::path getJournalDir();
void durThread() {
Client::initThread("journal");
+
+ bool samePartition = true;
+ try {
+ const string dbpathDir = boost::filesystem::path(dbpath).native_directory_string();
+ samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
+ }
+ catch(...) {
+ }
+
while( !inShutdown() ) {
RACECHECK
+
+ unsigned ms = cmdLine.journalCommitInterval;
+ if( ms == 0 ) {
+ // use default
+ ms = samePartition ? 100 : 30;
+ }
+
+ unsigned oneThird = (ms / 3) + 1; // +1 so never zero
+
try {
- int millis = groupCommitIntervalMs;
- {
- stats.rotate();
- {
- Timer t;
- journalRotate(); // note we do this part outside of mongomutex
- millis -= t.millis();
- wassert( millis <= groupCommitIntervalMs ); // race if groupCommitIntervalMs was changing by another thread so wassert
- if( millis < 2 )
- millis = 2;
- }
+ stats.rotate();
- // we do this in a couple blocks, which makes it a tiny bit faster (only a little) on throughput,
- // but is likely also less spiky on our cpu usage, which is good:
- sleepmillis(millis/2);
- commitJob.wi()._deferred.invoke();
- sleepmillis(millis/2);
+ // we do this in a couple blocks (the invoke()), which makes it a tiny bit faster (only a little) on throughput,
+ // but is likely also less spiky on our cpu usage, which is good.
+
+ // commit sooner if one or more getLastError j:true is pending
+ for( unsigned i = 1; i <= 2; i++ ) {
+ sleepmillis(oneThird);
+ if( commitJob._notify.nWaiting() )
+ break;
commitJob.wi()._deferred.invoke();
}
@@ -772,6 +784,13 @@ namespace mongo {
void DurableImpl::syncDataAndTruncateJournal() {
dbMutex.assertWriteLocked();
+ // a commit from the commit thread won't begin while we are in the write lock,
+ // but it may already be in progress and the end of that work is done outside
+ // (dbMutex) locks. This line waits for that to complete if already underway.
+ {
+ scoped_lock lk(groupCommitMutex);
+ }
+
groupCommit();
MongoFile::flushAll(true);
journalCleanup();
diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp
index f85dda32b51..0a1bc5ebbad 100644
--- a/db/dur_journal.cpp
+++ b/db/dur_journal.cpp
@@ -34,6 +34,7 @@
#include "../util/file.h"
#include "../util/checksum.h"
#include "../util/concurrency/race.h"
+#include "../util/compress.h"
using namespace mongoutils;
@@ -92,6 +93,11 @@ namespace mongo {
assert(false);
}
+ JSectFooter::JSectFooter() {
+ memset(this, 0, sizeof(*this));
+ sentinel = JEntry::OpCode_Footer;
+ }
+
JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash
sentinel = JEntry::OpCode_Footer;
reserved = 0;
@@ -103,6 +109,10 @@ namespace mongo {
}
bool JSectFooter::checkHash(const void* begin, int len) const {
+ if( !magicOk() ) {
+ log() << "journal footer not valid" << endl;
+ return false;
+ }
Checksum c;
c.gen(begin, len);
DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(c.bytes, 16) << endl;
@@ -317,13 +327,13 @@ namespace mongo {
void preallocateFiles() {
if( exists(getJournalDir()/"prealloc.0") || // if enabled previously, keep using
- exists(getJournalDir()/"prealloc.1") ||
- preallocateIsFaster() ) {
+ exists(getJournalDir()/"prealloc.1") ||
+ ( cmdLine.preallocj && preallocateIsFaster() ) ) {
usingPreallocate = true;
try {
_preallocateFiles();
}
- catch(...) {
+ catch(...) {
log() << "warning caught exception in preallocateFiles, continuing" << endl;
}
}
@@ -343,10 +353,12 @@ namespace mongo {
{
// zero the header
File f;
- f.open(temppath.string().c_str(), false, true);
+ f.open(temppath.string().c_str(), false, false);
char buf[8192];
memset(buf, 0, 8192);
f.write(0, buf, 8192);
+ f.truncate(DataLimitPerJournalFile);
+ f.fsync();
}
boost::filesystem::rename(temppath, filepath);
return;
@@ -471,12 +483,6 @@ namespace mongo {
/** called during recovery (the error message text below assumes that)
*/
unsigned long long journalReadLSN() {
- if( !debug ) {
- // in nondebug build, for now, be conservative until more tests written, and apply the whole journal.
- // however we will still write the lsn file to exercise that code, and use in _DEBUG build.
- return 0;
- }
-
if( !MemoryMappedFile::exists(lsnPath()) ) {
log() << "info no lsn file in journal/ directory" << endl;
return 0;
@@ -595,15 +601,7 @@ namespace mongo {
j._ageOut = a;
}
- /** check if time to rotate files. assure a file is open.
- done separately from the journal() call as we can do this part
- outside of lock.
- thread: durThread()
- */
- void journalRotate() {
- j.rotate();
- }
- void Journal::rotate() {
+ void Journal::_rotate() {
assert( !dbMutex.atLeastReadLocked() );
RACECHECK
@@ -618,6 +616,7 @@ namespace mongo {
return;
if( _curLogFile ) {
+ _curLogFile->truncate();
closeCurrentJournalFile();
removeUnneededJournalFiles();
}
@@ -636,24 +635,74 @@ namespace mongo {
}
}
- /** write to journal
+ /** write (append) the buffer we have built to the journal and fsync it.
+ outside of dbMutex lock as this could be slow.
+ @param uncompressed - a buffer that will be written to the journal after compression
+ will not return until on disk
*/
- void journal(const AlignedBuilder& b) {
- j.journal(b);
+ void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed) {
+ Timer t;
+ j.journal(h, uncompressed);
+ stats.curr->_writeToJournalMicros += t.micros();
}
- void Journal::journal(const AlignedBuilder& b) {
+ void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) {
+ RACECHECK
+ static AlignedBuilder b(32*1024*1024);
+ /* buffer to journal will be
+ JSectHeader
+ compressed operations
+ JSectFooter
+ */
+ const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter);
+ const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize;
+ b.reset(max);
+
+ {
+ dassert( h.sectionLen() == (unsigned) 0xffffffff ); // we will backfill later
+ b.appendStruct(h);
+ }
+
+ size_t compressedLength = 0;
+ rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength);
+ assert( compressedLength < 0xffffffff );
+ assert( compressedLength < max );
+ b.skip(compressedLength);
+
+ // footer
+ unsigned L = 0xffffffff;
+ {
+ // pad to alignment, and set the total section length in the JSectHeader
+ assert( 0xffffe000 == (~(Alignment-1)) );
+ unsigned lenUnpadded = b.len() + sizeof(JSectFooter);
+ L = (lenUnpadded + Alignment-1) & (~(Alignment-1));
+ dassert( L >= lenUnpadded );
+
+ ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded);
+
+ JSectFooter f(b.buf(), b.len()); // computes checksum
+ b.appendStruct(f);
+ dassert( b.len() == lenUnpadded );
+
+ b.skip(L - lenUnpadded);
+ dassert( b.len() % Alignment == 0 );
+ }
+
try {
mutex::scoped_lock lk(_curLogFileMutex);
// must already be open -- so that _curFileId is correct for previous buffer building
assert( _curLogFile );
- stats.curr->_journaledBytes += b.len();
- _written += b.len();
- _curLogFile->synchronousAppend((void *) b.buf(), b.len());
+ stats.curr->_uncompressedBytes += b.len();
+ unsigned w = b.len();
+ _written += w;
+ assert( w <= L );
+ stats.curr->_journaledBytes += L;
+ _curLogFile->synchronousAppend((const void *) b.buf(), L);
+ _rotate();
}
catch(std::exception& e) {
- log() << "warning exception in dur::journal " << e.what() << endl;
+ log() << "error exception in dur::journal " << e.what() << endl;
throw;
}
}
diff --git a/db/dur_journal.h b/db/dur_journal.h
index e8e3dfd1465..664f63942e0 100644
--- a/db/dur_journal.h
+++ b/db/dur_journal.h
@@ -28,7 +28,8 @@ namespace mongo {
extern bool okToCleanUp;
/** at termination after db files closed & fsynced
- also after covery
+ also after recovery
+ closes and removes journal files
@param log report in log that we are cleaning up if we actually do any work
*/
void journalCleanup(bool log = false);
@@ -43,12 +44,6 @@ namespace mongo {
*/
void journalRotate();
- /** write/append to journal file *
- @param buf - a buffer that will be written to the journal.
- will not return until on disk
- */
- void journal(const AlignedBuilder& buf);
-
/** flag that something has gone wrong during writing to the journal
(not for recovery mode)
*/
@@ -67,5 +62,7 @@ namespace mongo {
// in case disk controller buffers writes
const long long ExtraKeepTimeMs = 10000;
+ const unsigned JournalCommitIntervalDefault = 100;
+
}
}
diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h
index 72587ccd7b6..10ed8487b71 100644
--- a/db/dur_journalformat.h
+++ b/db/dur_journalformat.h
@@ -22,6 +22,8 @@ namespace mongo {
namespace dur {
+ const unsigned Alignment = 8192;
+
#pragma pack(1)
/** beginning header for a journal/j._<n> file
there is nothing important int this header at this time. except perhaps version #.
@@ -34,7 +36,11 @@ namespace mongo {
// x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near
// that. simply incrementing the version # is safe on a fwd basis.
+#if defined(_NOCOMPRESS)
enum { CurrentVersion = 0x4148 };
+#else
+ enum { CurrentVersion = 0x4149 };
+#endif
unsigned short _version;
// these are just for diagnostic ease (make header more useful as plain text)
@@ -55,11 +61,25 @@ namespace mongo {
/** "Section" header. A section corresponds to a group commit.
len is length of the entire section including header and footer.
+ header and footer are not compressed, just the stuff in between.
*/
struct JSectHeader {
- unsigned len; // length in bytes of the whole section
+ private:
+ unsigned _sectionLen; // unpadded length in bytes of the whole section
+ public:
unsigned long long seqNumber; // sequence number that can be used on recovery to not do too much work
unsigned long long fileId; // matches JHeader::fileId
+ unsigned sectionLen() const { return _sectionLen; }
+
+ // we store the unpadded length so we can use that when we uncompress. to
+ // get the true total size this must be rounded up to the Alignment.
+ void setSectionLen(unsigned lenUnpadded) { _sectionLen = lenUnpadded; }
+
+ unsigned sectionLenWithPadding() const {
+ unsigned x = (sectionLen() + (Alignment-1)) & (~(Alignment-1));
+ dassert( x % Alignment == 0 );
+ return x;
+ }
};
/** an individual write operation within a group commit section. Either the entire section should
@@ -111,6 +131,7 @@ namespace mongo {
/** group commit section footer. md5 is a key field. */
struct JSectFooter {
+ JSectFooter();
JSectFooter(const void* begin, int len); // needs buffer to compute hash
unsigned sentinel;
unsigned char hash[16];
@@ -123,6 +144,8 @@ namespace mongo {
@return true if buffer looks valid
*/
bool checkHash(const void* begin, int len) const;
+
+ bool magicOk() const { return *((unsigned*)magic) == 0x0a0a0a0a; }
};
/** declares "the next entry(s) are for this database / file path prefix" */
diff --git a/db/dur_journalimpl.h b/db/dur_journalimpl.h
index e436eae45f1..bf771c5d768 100644
--- a/db/dur_journalimpl.h
+++ b/db/dur_journalimpl.h
@@ -18,6 +18,7 @@
#pragma once
+#include "dur_journalformat.h"
#include "../util/logfile.h"
namespace mongo {
@@ -40,14 +41,14 @@ namespace mongo {
*/
void rotate();
- /** write to journal
+ /** append to the journal file
*/
- void journal(const AlignedBuilder& b);
+ void journal(const JSectHeader& h, const AlignedBuilder& b);
boost::filesystem::path getFilePathFor(int filenumber) const;
unsigned long long lastFlushTime() const { return _lastFlushTime; }
- void cleanup(bool log);
+ void cleanup(bool log); // closes and removes journal files
unsigned long long curFileId() const { return _curFileId; }
@@ -61,6 +62,11 @@ namespace mongo {
void open();
private:
+ /** check if time to rotate files. assure a file is open.
+ * internally called with every commit
+ */
+ void _rotate();
+
void _open();
void closeCurrentJournalFile();
void removeUnneededJournalFiles();
diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp
index 5851e415408..0d8ef3688db 100644
--- a/db/dur_preplogbuffer.cpp
+++ b/db/dur_preplogbuffer.cpp
@@ -60,7 +60,7 @@ namespace mongo {
size_t ofs = 1;
MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs);
- _IF( !mmf->willNeedRemap() ) {
+ if( unlikely(!mmf->willNeedRemap()) ) {
// tag this mmf as needed a remap of its private view later.
// usually it will already be dirty/already set, so we do the if above first
// to avoid possibility of cpu cache line contention
@@ -97,7 +97,7 @@ namespace mongo {
#endif
bb.appendBuf(i->start(), e.len);
- _IF (e.len != (unsigned)i->length()) {
+ if (unlikely(e.len != (unsigned)i->length())) {
log() << "journal info splitting prepBasicWrite at boundary" << endl;
// This only happens if we write to the last byte in a file and
@@ -120,40 +120,25 @@ namespace mongo {
// each time events switch to a different database we journal a JDbContext
RelativePath lastDbPath;
- set<WriteIntent>::iterator i = commitJob.writes().begin();
-
- const WriteIntent *w = &(*i);
- while(1) {
- i++;
- const WriteIntent *next = 0;
- IF( i != commitJob.writes().end() ) {
- next = &(*i);
- PREFETCH(next);
- }
- prepBasicWrite_inlock(bb, w, lastDbPath);
- _IF( next == 0 )
- break;
- w = next;
- };
+ for( set<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) {
+ prepBasicWrite_inlock(bb, &(*i), lastDbPath);
+ }
}
- void resetLogBuffer(AlignedBuilder& bb) {
+ void resetLogBuffer(/*out*/JSectHeader& h, AlignedBuilder& bb) {
bb.reset();
- // JSectHeader
- JSectHeader h;
- h.len = (unsigned) 0xffffffff; // total length, will fill in later
+ h.setSectionLen(0xffffffff); // total length, will fill in later
h.seqNumber = getLastDataFileFlushTime();
h.fileId = j.curFileId();
-
- bb.appendStruct(h);
}
/** we will build an output buffer ourself and then use O_DIRECT
we could be in read lock for this
caller handles locking
+ @return partially populated sectheader and _ab set
*/
- void _PREPLOGBUFFER() {
+ void _PREPLOGBUFFER(JSectHeader& h) {
assert( cmdLine.dur );
{
@@ -165,7 +150,7 @@ namespace mongo {
}
AlignedBuilder& bb = commitJob._ab;
- resetLogBuffer(bb);
+ resetLogBuffer(h, bb); // adds JSectHeader
// ops other than basic writes (DurOp's)
{
@@ -174,34 +159,14 @@ namespace mongo {
}
}
- {
- prepBasicWrites(bb);
- }
-
- // pad to alignment, and set the total section length in the JSectHeader
- assert( 0xffffe000 == (~(Alignment-1)) );
- unsigned lenWillBe = bb.len() + sizeof(JSectFooter);
- unsigned L = (lenWillBe + Alignment-1) & (~(Alignment-1));
- dassert( L >= lenWillBe );
- *((unsigned*)bb.atOfs(0)) = L;
-
- {
- JSectFooter f(bb.buf(), bb.len());
- bb.appendStruct(f);
- }
-
- {
- unsigned padding = L - bb.len();
- bb.skip(padding);
- dassert( bb.len() % Alignment == 0 );
- }
+ prepBasicWrites(bb);
return;
}
- void PREPLOGBUFFER() {
+ void PREPLOGBUFFER(/*out*/ JSectHeader& h) {
Timer t;
j.assureLogFileOpen(); // so fileId is set
- _PREPLOGBUFFER();
+ _PREPLOGBUFFER(h);
stats.curr->_prepLogBufferMicros += t.micros();
}
diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp
index 2e1516914f1..1e719c0070d 100644
--- a/db/dur_recover.cpp
+++ b/db/dur_recover.cpp
@@ -27,6 +27,7 @@
#include "namespace.h"
#include "../util/mongoutils/str.h"
#include "../util/bufreader.h"
+#include "../util/concurrency/race.h"
#include "pdfile.h"
#include "database.h"
#include "db.h"
@@ -35,6 +36,7 @@
#include "cmdline.h"
#include "curop.h"
#include "mongommf.h"
+#include "../util/compress.h"
#include <sys/stat.h>
#include <fcntl.h>
@@ -92,59 +94,73 @@ namespace mongo {
throws
*/
class JournalSectionIterator : boost::noncopyable {
+ auto_ptr<BufReader> _entries;
+ const JSectHeader _h;
+ const char *_lastDbName; // pointer into mmaped journal file
+ const bool _doDurOps;
+ string _uncompressed;
public:
- JournalSectionIterator(const void *p, unsigned len, bool doDurOps)
- : _br(p, len)
- , _sectHead(static_cast<const JSectHeader*>(_br.skip(sizeof(JSectHeader))))
- , _lastDbName(NULL)
- , _doDurOps(doDurOps)
- {}
+ JournalSectionIterator(const JSectHeader& h, const void *compressed, unsigned compressedLen, bool doDurOpsRecovering) :
+ _h(h),
+ _lastDbName(0)
+ , _doDurOps(doDurOpsRecovering)
+ {
+ assert( doDurOpsRecovering );
+ bool ok = uncompress((const char *)compressed, compressedLen, &_uncompressed);
+ if( !ok ) {
+ // it should always be ok (i think?) as there is a previous check to see that the JSectFooter is ok
+ log() << "couldn't uncompress journal section" << endl;
+ msgasserted(15874, "couldn't uncompress journal section");
+ }
+ const char *p = _uncompressed.c_str();
+ assert( compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader) );
+ _entries = auto_ptr<BufReader>( new BufReader(p, _uncompressed.size()) );
+ }
+
+ // we work with the uncompressed buffer when doing a WRITETODATAFILES (for speed)
+ JournalSectionIterator(const JSectHeader &h, const void *p, unsigned len) :
+ _entries( new BufReader((const char *) p, len) ),
+ _h(h),
+ _lastDbName(0)
+ , _doDurOps(false)
- bool atEof() const { return _br.atEof(); }
+ { }
- unsigned long long seqNumber() const { return _sectHead->seqNumber; }
+ bool atEof() const { return _entries->atEof(); }
+
+ unsigned long long seqNumber() const { return _h.seqNumber; }
/** get the next entry from the log. this function parses and combines JDbContext and JEntry's.
- * @return true if got an entry. false at successful end of section (and no entry returned).
* throws on premature end of section.
*/
- bool next(ParsedJournalEntry& e) {
+ void next(ParsedJournalEntry& e) {
unsigned lenOrOpCode;
- _br.read(lenOrOpCode);
+ _entries->read(lenOrOpCode);
if (lenOrOpCode > JEntry::OpCode_Min) {
switch( lenOrOpCode ) {
case JEntry::OpCode_Footer: {
- if (_doDurOps) {
- const char* pos = (const char*) _br.pos();
- pos -= sizeof(lenOrOpCode); // rewind to include OpCode
- const JSectFooter& footer = *(const JSectFooter*)pos;
- int len = pos - (char*)_sectHead;
- if (!footer.checkHash(_sectHead, len)) {
- massert(13594, "journal checksum doesn't match", false);
- }
- }
- return false; // false return value denotes end of section
+ assert( false );
}
case JEntry::OpCode_FileCreated:
case JEntry::OpCode_DropDb: {
e.dbName = 0;
- boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, _br);
+ boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries);
if (_doDurOps) {
e.op = op;
}
- return true;
+ return;
}
case JEntry::OpCode_DbContext: {
- _lastDbName = (const char*) _br.pos();
- const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _br.remaining());
+ _lastDbName = (const char*) _entries->pos();
+ const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _entries->remaining());
const unsigned len = strnlen(_lastDbName, limit);
massert(13533, "problem processing journal file during recovery", _lastDbName[len] == '\0');
- _br.skip(len+1); // skip '\0' too
- _br.read(lenOrOpCode);
+ _entries->skip(len+1); // skip '\0' too
+ _entries->read(lenOrOpCode); // read this for the fall through
}
// fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet
@@ -156,18 +172,13 @@ namespace mongo {
// JEntry - a basic write
assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min );
- _br.rewind(4);
- e.e = (JEntry *) _br.skip(sizeof(JEntry));
+ _entries->rewind(4);
+ e.e = (JEntry *) _entries->skip(sizeof(JEntry));
e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName;
assert( e.e->len == lenOrOpCode );
- _br.skip(e.e->len);
- return true;
+ _entries->skip(e.e->len);
}
- private:
- BufReader _br;
- const JSectHeader* _sectHead;
- const char *_lastDbName; // pointer into mmaped journal file
- const bool _doDurOps;
+
};
static string fileName(const char* dbName, int fileNo) {
@@ -289,27 +300,64 @@ namespace mongo {
log() << "END section" << endl;
}
- void RecoveryJob::processSection(const void *p, unsigned len) {
+ void RecoveryJob::processSection(const JSectHeader *h, const void *p, unsigned len, const JSectFooter *f) {
scoped_lock lk(_mx);
+ RACECHECK
+
+ /** todo: we should really verify the checksum to see that seqNumber is ok?
+ that is expensive maybe there is some sort of checksum of just the header
+ within the header itself
+ */
+ if( _recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs ) {
+ if( h->seqNumber != _lastSeqMentionedInConsoleLog ) {
+ static int n;
+ if( ++n < 10 ) {
+ log() << "recover skipping application of section seq:" << h->seqNumber << " < lsn:" << _lastDataSyncedFromLastRun << endl;
+ }
+ else if( n == 10 ) {
+ log() << "recover skipping application of section more..." << endl;
+ }
+ _lastSeqMentionedInConsoleLog = h->seqNumber;
+ }
+ return;
+ }
- vector<ParsedJournalEntry> entries;
- JournalSectionIterator i(p, len, _recovering);
+ auto_ptr<JournalSectionIterator> i;
+ if( _recovering ) {
+ i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering));
+ }
+ else {
+ i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, /*after header*/p, /*w/out header*/len));
+ }
- //DEV log() << "recovery processSection seq:" << i.seqNumber() << endl;
- if( _recovering && _lastDataSyncedFromLastRun > i.seqNumber() + ExtraKeepTimeMs ) {
- if( i.seqNumber() != _lastSeqMentionedInConsoleLog ) {
- log() << "recover skipping application of section seq:" << i.seqNumber() << " < lsn:" << _lastDataSyncedFromLastRun << endl;
- _lastSeqMentionedInConsoleLog = i.seqNumber();
+ // we use a static so that we don't have to reallocate every time through. occasionally we
+ // go back to a small allocation so that if there were a spiky growth it won't stick forever.
+ static vector<ParsedJournalEntry> entries;
+ entries.clear();
+/** TEMP uncomment
+ RARELY OCCASIONALLY {
+ if( entries.capacity() > 2048 ) {
+ entries.shrink_to_fit();
+ entries.reserve(2048);
}
- return;
}
+*/
// first read all entries to make sure this section is valid
ParsedJournalEntry e;
- while( i.next(e) ) {
+ while( !i->atEof() ) {
+ i->next(e);
entries.push_back(e);
}
+ // after the entries check the footer checksum
+ if( _recovering ) {
+ assert( ((const char *)h) + sizeof(JSectHeader) == p );
+ if( !f->checkHash(h, len + sizeof(JSectHeader)) ) {
+ msgasserted(13594, "journal checksum doesn't match");
+ }
+ }
+
// got all the entries for one group commit. apply them:
applyEntries(entries);
}
@@ -345,11 +393,16 @@ namespace mongo {
if( h.fileId != fileId ) {
if( debug || (cmdLine.durOptions & CmdLine::DurDumpJournal) ) {
log() << "Ending processFileBuffer at differing fileId want:" << fileId << " got:" << h.fileId << endl;
- log() << " sect len:" << h.len << " seqnum:" << h.seqNumber << endl;
+ log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl;
}
return true;
}
- processSection(br.skip(h.len), h.len);
+ unsigned slen = h.sectionLen();
+ unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter);
+ const char *hdr = (const char *) br.skip(h.sectionLenWithPadding());
+ const char *data = hdr + sizeof(JSectHeader);
+ const char *footer = data + dataLen;
+ processSection((const JSectHeader*) hdr, data, dataLen, (const JSectFooter*) footer);
// ctrl c check
killCurrentOp.checkForInterrupt(false);
@@ -367,6 +420,17 @@ namespace mongo {
/** apply a specific journal file */
bool RecoveryJob::processFile(path journalfile) {
log() << "recover " << journalfile.string() << endl;
+
+ try {
+ if( boost::filesystem::file_size( journalfile.string() ) == 0 ) {
+ log() << "recover info " << journalfile.string() << " has zero length" << endl;
+ return true;
+ }
+ } catch(...) {
+ // if something weird like a permissions problem keep going so the massert down below can happen (presumably)
+ log() << "recover exception checking filesize" << endl;
+ }
+
MemoryMappedFile f;
void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL);
massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
@@ -382,13 +446,19 @@ namespace mongo {
_lastDataSyncedFromLastRun = journalReadLSN();
log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl;
+ // todo: we could truncate the journal file at rotation time to the right length, then this abruptEnd
+ // check can be turned back on. this is relevant when prealloc is being used.
for( unsigned i = 0; i != files.size(); ++i ) {
- /*bool abruptEnd = */processFile(files[i]);
- /*if( abruptEnd && i+1 < files.size() ) {
+ bool abruptEnd = processFile(files[i]);
+ if( abruptEnd && i+1 < files.size() ) {
+#if 1 // Leaving this as a warning for now. TODO: make this an error post 2.0
+ log() << "recover warning: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
+#else
log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
close();
uasserted(13535, "recover abrupt journal file end");
- }*/
+#endif
+ }
}
close();
diff --git a/db/dur_recover.h b/db/dur_recover.h
index b5a922b498a..955e730ea05 100644
--- a/db/dur_recover.h
+++ b/db/dur_recover.h
@@ -2,6 +2,7 @@
#pragma once
+#include "dur_journalformat.h"
#include "../util/concurrency/mutex.h"
#include "../util/file.h"
@@ -15,10 +16,14 @@ namespace mongo {
*/
class RecoveryJob : boost::noncopyable {
public:
- RecoveryJob() :_lastDataSyncedFromLastRun(0), _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; }
+ RecoveryJob() : _lastDataSyncedFromLastRun(0),
+ _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; }
void go(vector<path>& files);
~RecoveryJob();
- void processSection(const void *, unsigned len);
+
+ /** @param data data between header and footer. compressed if recovering. */
+ void processSection(const JSectHeader *h, const void *data, unsigned len, const JSectFooter *f);
+
void close(); // locks and calls _close()
static RecoveryJob & get() { return _instance; }
diff --git a/db/dur_stats.h b/db/dur_stats.h
index d4943c01cb3..50a26d1f215 100644
--- a/db/dur_stats.h
+++ b/db/dur_stats.h
@@ -20,6 +20,7 @@ namespace mongo {
unsigned _commits;
unsigned _earlyCommits; // count of early commits from commitIfNeeded() or from getDur().commitNow()
unsigned long long _journaledBytes;
+ unsigned long long _uncompressedBytes;
unsigned long long _writeToDataFilesBytes;
unsigned long long _prepLogBufferMicros;
diff --git a/db/dur_writetodatafiles.cpp b/db/dur_writetodatafiles.cpp
index cdccb018d83..6724f0731aa 100644
--- a/db/dur_writetodatafiles.cpp
+++ b/db/dur_writetodatafiles.cpp
@@ -47,9 +47,9 @@ namespace mongo {
@see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en
*/
- void WRITETODATAFILES_Impl1() {
+ void WRITETODATAFILES_Impl1(const JSectHeader& h, AlignedBuilder& uncompressed) {
RWLockRecursive::Shared lk(MongoFile::mmmutex);
- RecoveryJob::get().processSection(commitJob._ab.buf(), commitJob._ab.len());
+ RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), 0);
}
#if 0
@@ -81,16 +81,14 @@ namespace mongo {
#endif
// concurrency: in mmmutex, not necessarily in dbMutex
- void WRITETODATAFILES() {
+ void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed) {
Timer t;
#if defined(_EXPERIMENTAL)
WRITETODATAFILES_Impl3();
#else
- WRITETODATAFILES_Impl1();
+ WRITETODATAFILES_Impl1(h, uncompressed);
#endif
stats.curr->_writeToDataFilesMicros += t.micros();
-
-
}
}
diff --git a/db/durop.h b/db/durop.h
index c4574c2e3cb..9ab1bfcbede 100644
--- a/db/durop.h
+++ b/db/durop.h
@@ -28,8 +28,6 @@ namespace mongo {
namespace dur {
- const unsigned Alignment = 8192;
-
/** DurOp - Operations we journal that aren't just basic writes.
*
* Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent.
diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp
index 21b0eaa6601..9b762b260de 100644
--- a/db/geo/2d.cpp
+++ b/db/geo/2d.cpp
@@ -138,7 +138,11 @@ namespace mongo {
GeoHash b = a;
b.move(1, 1);
- _error = distance(a, b);
+ // Epsilon is 1/100th of a bucket size
+ // TODO: Can we actually find error bounds for the sqrt function?
+ double epsilon = 0.001 / _scaling;
+ _error = distance(a, b) + epsilon;
+
// Error in radians
_errorSphere = deg2rad( _error );
}
@@ -293,6 +297,14 @@ namespace mongo {
}
+ BSONObj _fromBSONHash( const BSONElement& e ) const {
+ return _unhash( _tohash( e ) );
+ }
+
+ BSONObj _fromBSONHash( const BSONObj& o ) const {
+ return _unhash( _tohash( o.firstElement() ) );
+ }
+
GeoHash _tohash( const BSONElement& e ) const {
if ( e.isABSONObj() )
return _hash( e.embeddedObject() );
@@ -368,6 +380,10 @@ namespace mongo {
}
double sizeEdge( const GeoHash& a ) const {
+
+ if( ! a.constrains() )
+ return _max - _min;
+
double ax,ay,bx,by;
GeoHash b = a;
b.move( 1 , 1 );
@@ -443,6 +459,10 @@ namespace mongo {
Box() {}
+ BSONArray toBSON() const {
+ return BSON_ARRAY( BSON_ARRAY( _min._x << _min._y ) << BSON_ARRAY( _max._x << _max._y ) );
+ }
+
string toString() const {
StringBuilder buf(64);
buf << _min.toString() << " -->> " << _max.toString();
@@ -630,8 +650,8 @@ namespace mongo {
}
else if( fudge == 0 ){
- if( p._y == p1._y && p._x == p1._x ) return true;
- else if( p._y == p2._y && p._x == p2._x ) return true;
+ if( p._y == p1._y && p._x == p1._x ) return true;
+ else if( p._y == p2._y && p._x == p2._x ) return true;
}
// Normal intersection test.
@@ -742,293 +762,96 @@ namespace mongo {
geo2dplugin.getName();
}
- struct GeoUnitTest : public UnitTest {
-
- int round( double d ) {
- return (int)(.5+(d*1000));
- }
-
-#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); }
-
- void run() {
- assert( ! GeoHash::isBitSet( 0 , 0 ) );
- assert( ! GeoHash::isBitSet( 0 , 31 ) );
- assert( GeoHash::isBitSet( 1 , 31 ) );
-
- IndexSpec i( BSON( "loc" << "2d" ) );
- Geo2dType g( &geo2dplugin , &i );
- {
- double x = 73.01212;
- double y = 41.352964;
- BSONObj in = BSON( "x" << x << "y" << y );
- GeoHash h = g._hash( in );
- BSONObj out = g._unhash( h );
- assert( round(x) == round( out["x"].number() ) );
- assert( round(y) == round( out["y"].number() ) );
- assert( round( in["x"].number() ) == round( out["x"].number() ) );
- assert( round( in["y"].number() ) == round( out["y"].number() ) );
- }
-
- {
- double x = -73.01212;
- double y = 41.352964;
- BSONObj in = BSON( "x" << x << "y" << y );
- GeoHash h = g._hash( in );
- BSONObj out = g._unhash( h );
- assert( round(x) == round( out["x"].number() ) );
- assert( round(y) == round( out["y"].number() ) );
- assert( round( in["x"].number() ) == round( out["x"].number() ) );
- assert( round( in["y"].number() ) == round( out["y"].number() ) );
- }
-
- {
- GeoHash h( "0000" );
- h.move( 0 , 1 );
- GEOHEQ( h , "0001" );
- h.move( 0 , -1 );
- GEOHEQ( h , "0000" );
-
- h.init( "0001" );
- h.move( 0 , 1 );
- GEOHEQ( h , "0100" );
- h.move( 0 , -1 );
- GEOHEQ( h , "0001" );
-
-
- h.init( "0000" );
- h.move( 1 , 0 );
- GEOHEQ( h , "0010" );
- }
-
- {
- Box b( 5 , 5 , 2 );
- assert( "(5,5) -->> (7,7)" == b.toString() );
- }
-
- {
- GeoHash a = g.hash( 1 , 1 );
- GeoHash b = g.hash( 4 , 5 );
- assert( 5 == (int)(g.distance( a , b ) ) );
- a = g.hash( 50 , 50 );
- b = g.hash( 42 , 44 );
- assert( round(10) == round(g.distance( a , b )) );
- }
-
- {
- GeoHash x("0000");
- assert( 0 == x.getHash() );
- x.init( 0 , 1 , 32 );
- GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" )
-
- assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) );
- assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) );
- }
-
- {
- GeoHash x("1010");
- GEOHEQ( x , "1010" );
- GeoHash y = x + "01";
- GEOHEQ( y , "101001" );
- }
-
- {
-
- GeoHash a = g.hash( 5 , 5 );
- GeoHash b = g.hash( 5 , 7 );
- GeoHash c = g.hash( 100 , 100 );
- /*
- cout << "a: " << a << endl;
- cout << "b: " << b << endl;
- cout << "c: " << c << endl;
-
- cout << "a: " << a.toStringHex1() << endl;
- cout << "b: " << b.toStringHex1() << endl;
- cout << "c: " << c.toStringHex1() << endl;
- */
- BSONObj oa = a.wrap();
- BSONObj ob = b.wrap();
- BSONObj oc = c.wrap();
- /*
- cout << "a: " << oa.hexDump() << endl;
- cout << "b: " << ob.hexDump() << endl;
- cout << "c: " << oc.hexDump() << endl;
- */
- assert( oa.woCompare( ob ) < 0 );
- assert( oa.woCompare( oc ) < 0 );
-
- }
-
- {
- GeoHash x( "000000" );
- x.move( -1 , 0 );
- GEOHEQ( x , "101010" );
- x.move( 1 , -1 );
- GEOHEQ( x , "010101" );
- x.move( 0 , 1 );
- GEOHEQ( x , "000000" );
- }
- {
- GeoHash prefix( "110011000000" );
- GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" );
- assert( ! entry.hasPrefix( prefix ) );
- entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000");
- assert( entry.toString().find( prefix.toString() ) == 0 );
- assert( entry.hasPrefix( GeoHash( "1100" ) ) );
- assert( entry.hasPrefix( prefix ) );
- }
-
- {
- GeoHash a = g.hash( 50 , 50 );
- GeoHash b = g.hash( 48 , 54 );
- assert( round( 4.47214 ) == round( g.distance( a , b ) ) );
- }
-
-
- {
- Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) );
- assert( b.inside( 29.763 , -95.363 ) );
- assert( ! b.inside( 32.9570255 , -96.1082497 ) );
- assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) );
- }
-
- {
- GeoHash a( "11001111" );
- assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) );
- assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) );
- }
-
- {
- int N = 10000;
- {
- Timer t;
- for ( int i=0; i<N; i++ ) {
- unsigned x = (unsigned)rand();
- unsigned y = (unsigned)rand();
- GeoHash h( x , y );
- unsigned a,b;
- h.unhash_slow( a,b );
- assert( a == x );
- assert( b == y );
- }
- //cout << "slow: " << t.millis() << endl;
- }
-
- {
- Timer t;
- for ( int i=0; i<N; i++ ) {
- unsigned x = (unsigned)rand();
- unsigned y = (unsigned)rand();
- GeoHash h( x , y );
- unsigned a,b;
- h.unhash_fast( a,b );
- assert( a == x );
- assert( b == y );
- }
- //cout << "fast: " << t.millis() << endl;
- }
-
- }
-
- {
- // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example
-
- {
- Point BNA (-86.67, 36.12);
- Point LAX (-118.40, 33.94);
+ class GeoHopper;
- double dist1 = spheredist_deg(BNA, LAX);
- double dist2 = spheredist_deg(LAX, BNA);
+ class GeoPoint {
+ public:
- // target is 0.45306
- assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
- assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
- }
- {
- Point BNA (-1.5127, 0.6304);
- Point LAX (-2.0665, 0.5924);
+ GeoPoint() : _distance( -1 ), _exact( false )
+ {}
- double dist1 = spheredist_rad(BNA, LAX);
- double dist2 = spheredist_rad(LAX, BNA);
+ //// Distance not used ////
- // target is 0.45306
- assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
- assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
- }
- {
- Point JFK (-73.77694444, 40.63861111 );
- Point LAX (-118.40, 33.94);
+ GeoPoint( const GeoKeyNode& node )
+ : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( -1 ) , _exact( false ) {
+ }
- double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES;
- assert( dist > 2469 && dist < 2470 );
- }
+ //// Immediate initialization of distance ////
- {
- Point BNA (-86.67, 36.12);
- Point LAX (-118.40, 33.94);
- Point JFK (-73.77694444, 40.63861111 );
- assert( spheredist_deg(BNA, BNA) < 1e-6);
- assert( spheredist_deg(LAX, LAX) < 1e-6);
- assert( spheredist_deg(JFK, JFK) < 1e-6);
+ GeoPoint( const GeoKeyNode& node, double distance, bool exact )
+ : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( distance ), _exact( exact ) {
+ }
- Point zero (0, 0);
- Point antizero (0,-180);
+ GeoPoint( const GeoPoint& pt, double distance, bool exact )
+ : _key( pt.key() ) , _loc( pt.loc() ) , _o( pt.obj() ), _distance( distance ), _exact( exact ) {
+ }
- // these were known to cause NaN
- assert( spheredist_deg(zero, zero) < 1e-6);
- assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6);
- assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6);
- }
- }
+ bool operator<( const GeoPoint& other ) const {
+ if( _distance != other._distance ) return _distance < other._distance;
+ if( _exact != other._exact ) return _exact < other._exact;
+ return _loc < other._loc;
}
- } geoUnitTest;
- class GeoHopper;
+ double distance() const {
+ return _distance;
+ }
- class GeoPoint {
- public:
- GeoPoint() { }
+ bool isExact() const {
+ return _exact;
+ }
- //// Distance not used ////
+ BSONObj key() const {
+ return _key;
+ }
- GeoPoint( const GeoKeyNode& node )
- : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _exactDistance( -1 ), _exactWithin( false ) {
+ DiskLoc loc() const {
+ return _loc;
}
-
- //// Immediate initialization of exact distance ////
- GeoPoint( const GeoKeyNode& node , double exactDistance, bool exactWithin )
- : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _exactDistance( exactDistance ), _exactWithin( exactWithin ) {
+ BSONObj obj() const {
+ return _o;
}
- bool operator<( const GeoPoint& other ) const {
- return _exactDistance < other._exactDistance;
+ BSONObj pt() const {
+ return _pt;
}
- bool isEmpty() const {
+ bool isEmpty() {
return _o.isEmpty();
}
string toString() const {
- return str::stream() << "Point from " << _o.toString() << " dist : " << _exactDistance << " within ? " << _exactWithin;
+ return str::stream() << "Point from " << _o << " dist : " << _distance << ( _exact ? " (ex)" : " (app)" );
}
BSONObj _key;
DiskLoc _loc;
BSONObj _o;
+ BSONObj _pt;
- double _exactDistance;
- bool _exactWithin;
+ double _distance;
+ bool _exact;
};
// GeoBrowse subclasses this
class GeoAccumulator {
public:
- GeoAccumulator( const Geo2dType * g , const BSONObj& filter )
- : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) {
+ GeoAccumulator( const Geo2dType * g , const BSONObj& filter, bool uniqueDocs, bool needDistance )
+ : _g(g) ,
+ _keysChecked(0) ,
+ _lookedAt(0) ,
+ _matchesPerfd(0) ,
+ _objectsLoaded(0) ,
+ _pointsLoaded(0) ,
+ _found(0) ,
+ _uniqueDocs( uniqueDocs ) ,
+ _needDistance( needDistance )
+ {
if ( ! filter.isEmpty() ) {
_matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) );
+ GEODEBUG( "Matcher is now " << _matcher->docMatcher().toString() );
}
}
@@ -1042,6 +865,9 @@ namespace mongo {
set< pair<DiskLoc,int> > _seen;
public:
bool seen(DiskLoc bucket, int pos) {
+
+ _keysChecked++;
+
pair< set<pair<DiskLoc,int> >::iterator, bool > seenBefore = _seen.insert( make_pair(bucket,pos) );
if ( ! seenBefore.second ) {
GEODEBUG( "\t\t\t\t already seen : " << bucket.toString() << ' ' << pos ); // node.key.toString() << " @ " << Point( _g, GeoHash( node.key.firstElement() ) ).toString() << " with " << node.recordLoc.obj()["_id"] );
@@ -1050,29 +876,43 @@ namespace mongo {
return false;
}
- void add( const GeoKeyNode& node ) {
+ enum KeyResult { BAD, BORDER, GOOD };
+
+ virtual void add( const GeoKeyNode& node ) {
- GEODEBUG( "\t\t\t\t checking key " << node.key.toString() )
+ GEODEBUG( "\t\t\t\t checking key " << node._key.toString() )
_lookedAt++;
- // distance check
- double d = 0;
- if ( ! checkDistance( node , d ) ) {
- GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << d );
+ ////
+ // Approximate distance check using key data
+ ////
+ double keyD = 0;
+ Point keyP( _g, GeoHash( node._key.firstElement(), _g->_bits ) );
+ KeyResult keyOk = approxKeyCheck( keyP, keyD );
+ if ( keyOk == BAD ) {
+ GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << keyD );
return;
}
- GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << d );
+ GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << keyD );
+ ////
+ // Check for match using other key (and potentially doc) criteria
+ ////
// Remember match results for each object
map<DiskLoc, bool>::iterator match = _matched.find( node.recordLoc );
bool newDoc = match == _matched.end();
if( newDoc ) {
+ GEODEBUG( "\t\t\t\t matching new doc with " << (_matcher ? _matcher->docMatcher().toString() : "(empty)" ) );
+
// matcher
MatchDetails details;
if ( _matcher.get() ) {
bool good = _matcher->matchesWithSingleKeyIndex( node._key , node.recordLoc , &details );
+
+ _matchesPerfd++;
+
if ( details._loadedObject )
_objectsLoaded++;
@@ -1094,12 +934,50 @@ namespace mongo {
return;
}
- addSpecific( node , d, newDoc );
- _found++;
+ ////
+ // Exact check with particular data fields
+ ////
+ // Can add multiple points
+ int diff = addSpecific( node , keyP, keyOk == BORDER, keyD, newDoc );
+ if( diff > 0 ) _found += diff;
+ else _found -= -diff;
+
+ }
+
+ virtual void getPointsFor( const BSONObj& key, const BSONObj& obj, vector< BSONObj >& locsForNode, bool allPoints = false ){
+
+ // Find all the location objects from the keys
+ vector< BSONObj > locs;
+ _g->getKeys( obj, allPoints ? locsForNode : locs );
+ _pointsLoaded++;
+
+ if( allPoints ) return;
+ if( locs.size() == 1 ){
+ locsForNode.push_back( locs[0] );
+ return;
+ }
+
+ // Find the particular location we want
+ GeoHash keyHash( key.firstElement(), _g->_bits );
+
+ // log() << "Hash: " << node.key << " and " << keyHash.getHash() << " unique " << _uniqueDocs << endl;
+ for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
+
+ // Ignore all locations not hashed to the key's hash, since we may see
+ // those later
+ if( _g->_hash( *i ) != keyHash ) continue;
+
+ locsForNode.push_back( *i );
+
+ }
+
}
- virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) = 0;
- virtual bool checkDistance( const GeoKeyNode& node , double& d ) = 0;
+ virtual int addSpecific( const GeoKeyNode& node, const Point& p , bool inBounds, double d, bool newDoc ) = 0;
+ virtual KeyResult approxKeyCheck( const Point& p , double& keyD ) = 0;
+ virtual bool exactDocCheck( const Point& p , double& d ) = 0;
+ virtual bool expensiveExactCheck(){ return false; }
+
long long found() const {
return _found;
@@ -1109,9 +987,16 @@ namespace mongo {
map<DiskLoc, bool> _matched;
shared_ptr<CoveredIndexMatcher> _matcher;
+ long long _keysChecked;
long long _lookedAt;
+ long long _matchesPerfd;
long long _objectsLoaded;
+ long long _pointsLoaded;
long long _found;
+
+ bool _uniqueDocs;
+ bool _needDistance;
+
};
struct BtreeLocation {
@@ -1264,8 +1149,8 @@ namespace mongo {
DONE
} _state;
- GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() )
- : GeoCursorBase( g ), GeoAccumulator( g , filter ) ,
+ GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj(), bool uniqueDocs = true, bool needDistance = false )
+ : GeoCursorBase( g ), GeoAccumulator( g , filter, uniqueDocs, needDistance ) ,
_type( type ) , _filter( filter ) , _firstCall(true), _nscanned(), _centerPrefix(0, 0, 0) {
// Set up the initial expand state
@@ -1350,11 +1235,9 @@ namespace mongo {
virtual void fillStack( int maxToCheck, int maxToAdd = -1, bool onlyExpand = false ) {
#ifdef GEODEBUGGING
-
- int s = _state;
log() << "Filling stack with maximum of " << maxToCheck << ", state : " << (int) _state << endl;
-
#endif
+
if( maxToAdd < 0 ) maxToAdd = maxToCheck;
int maxFound = _foundInExp + maxToCheck;
assert( maxToCheck > 0 );
@@ -1395,7 +1278,6 @@ namespace mongo {
while ( true ) {
GEODEBUG( "box prefix [" << _prefix << "]" );
-
#ifdef GEODEBUGGING
if( _prefix.constrains() ) {
log() << "current expand box : " << Box( _g, _prefix ).toString() << endl;
@@ -1407,6 +1289,9 @@ namespace mongo {
GEODEBUG( "expanding box points... ");
+ // Record the prefix we're actively exploring...
+ _expPrefix.reset( new GeoHash( _prefix ) );
+
// Find points inside this prefix
while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded );
while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded );
@@ -1419,7 +1304,7 @@ namespace mongo {
#endif
- GEODEBUG( "finished expand, found : " << ( maxToCheck - ( maxFound - _found ) ) );
+ GEODEBUG( "finished expand, found : " << ( maxToAdd - ( maxAdded - _found ) ) );
if( _foundInExp >= maxFound || _found >= maxAdded ) return;
// We've searched this prefix fully, remember
@@ -1429,6 +1314,7 @@ namespace mongo {
if ( ! _prefix.constrains() ) {
GEODEBUG( "box exhausted" );
_state = DONE;
+ notePrefix();
return;
}
@@ -1453,8 +1339,9 @@ namespace mongo {
break;
}
- }
+ notePrefix();
+ }
// If we doeighbors
if( onlyExpand ) return;
@@ -1495,7 +1382,7 @@ namespace mongo {
GeoHash _neighborPrefix = _centerPrefix;
_neighborPrefix.move( i, j );
- GEODEBUG( "moving to " << i << " , " << j );
+ GEODEBUG( "moving to " << i << " , " << j << " fringe : " << _fringe.size() );
PREFIXDEBUG( _centerPrefix, _g );
PREFIXDEBUG( _neighborPrefix , _g );
while( _fringe.size() > 0 ) {
@@ -1542,7 +1429,7 @@ namespace mongo {
// be entirely done. Max recurse depth is < 8 * 16.
// If we're maxed out on points, return
- if( _foundInExp >= maxFound ) {
+ if( _foundInExp >= maxFound || _found >= maxAdded ) {
// Make sure we'll come back to add more points
assert( _state == DOING_EXPAND );
return;
@@ -1571,14 +1458,63 @@ namespace mongo {
// The amount the current box overlaps our search area
virtual double intersectsBox( Box& cur ) = 0;
- virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) {
+ virtual int addSpecific( const GeoKeyNode& node , const Point& keyP , bool onBounds , double keyD , bool newDoc ) {
- if( ! newDoc ) return;
+ int found = 0;
- if ( _cur.isEmpty() )
- _cur = GeoPoint( node );
- else
- _stack.push_back( GeoPoint( node ) );
+ // We need to handle every possible point in this method, even those not in the key value, to
+ // avoid us tracking which hashes we've already seen.
+ if( ! newDoc ){
+ // log() << "Already handled doc!" << endl;
+ return 0;
+ }
+
+ if( _uniqueDocs && ! onBounds ) {
+ // log() << "Added ind to " << _type << endl;
+ _stack.push_front( GeoPoint( node ) );
+ found++;
+ }
+ else {
+ // We now handle every possible point in the document, even those not in the key value,
+ // since we're iterating through them anyway - prevents us from having to save the hashes
+ // we've seen per-doc
+
+ // If we're filtering by hash, get the original
+ bool expensiveExact = expensiveExactCheck();
+
+ vector< BSONObj > locs;
+ getPointsFor( node._key, node.recordLoc.obj(), locs, true );
+ for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ){
+
+ double d = -1;
+ Point p( *i );
+
+ // We can avoid exact document checks by redoing approx checks,
+ // if the exact checks are more expensive.
+ bool needExact = true;
+ if( expensiveExact ){
+ assert( false );
+ KeyResult result = approxKeyCheck( p, d );
+ if( result == BAD ) continue;
+ else if( result == GOOD ) needExact = false;
+ }
+
+ if( ! needExact || exactDocCheck( p, d ) ){
+ // log() << "Added mult to " << _type << endl;
+ _stack.push_front( GeoPoint( node ) );
+ found++;
+ // If returning unique, just exit after first point is added
+ if( _uniqueDocs ) break;
+ }
+ }
+ }
+
+ if ( _cur.isEmpty() && _stack.size() > 0 ){
+ _cur = _stack.front();
+ _stack.pop_front();
+ }
+
+ return found;
}
virtual long long nscanned() {
@@ -1588,6 +1524,35 @@ namespace mongo {
return _nscanned;
}
+ virtual void explainDetails( BSONObjBuilder& b ){
+ b << "keysChecked" << _keysChecked;
+ b << "lookedAt" << _lookedAt;
+ b << "matchesPerfd" << _matchesPerfd;
+ b << "objectsLoaded" << _objectsLoaded;
+ b << "pointsLoaded" << _pointsLoaded;
+ }
+
+ virtual BSONObj prettyIndexBounds() const {
+
+ vector<GeoHash>::const_iterator i = _expPrefixes.end();
+ if( _expPrefixes.size() > 0 && *(--i) != *( _expPrefix.get() ) )
+ _expPrefixes.push_back( *( _expPrefix.get() ) );
+
+ BSONObjBuilder bob;
+ BSONArrayBuilder bab;
+ for( i = _expPrefixes.begin(); i != _expPrefixes.end(); ++i ){
+ bab << Box( _g, *i ).toBSON();
+ }
+ bob << _g->_geo << bab.arr();
+
+ return bob.obj();
+
+ }
+
+ void notePrefix() {
+ _expPrefixes.push_back( _prefix );
+ }
+
string _type;
BSONObj _filter;
list<GeoPoint> _stack;
@@ -1616,6 +1581,9 @@ namespace mongo {
BtreeLocation _min;
BtreeLocation _max;
+ shared_ptr<GeoHash> _expPrefix;
+ mutable vector<GeoHash> _expPrefixes;
+
};
@@ -1623,133 +1591,148 @@ namespace mongo {
public:
typedef multiset<GeoPoint> Holder;
- GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN )
- : GeoBrowse( g, "search", filter ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0)
+ GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = true )
+ : GeoBrowse( g, "search", filter, uniqueDocs, needDistance ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0)
{}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
// Always check approximate distance, since it lets us avoid doing
// checks of the rest of the object if it succeeds
- // TODO: Refactor so that we can check exact distance and within if we are going to
- // anyway.
- d = approxDistance( node );
- assert( d >= 0 );
- // Out of the error range, see how close we are to the furthest points
- bool good = d <= _maxDistance + 2 * _distError /* In error range */
- && ( _points.size() < _max /* need more points */
- || d <= farthest() + 2 * _distError /* could be closer than previous points */ );
+ switch (_type) {
+ case GEO_PLAIN:
+ d = _near.distance( p );
+ break;
+ case GEO_SPHERE:
+ checkEarthBounds( p );
+ d = spheredist_deg( _near, p );
+ break;
+ default: assert( false );
+ }
+ assert( d >= 0 );
GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near.toString()
- << "\t" << GeoHash( node.key.firstElement() ) << "\t" << d
- << " ok: " << good << " farthest: " << farthest() );
+ << "\t" << p.toString() << "\t" << d
+ << " farthest: " << farthest() );
- return good;
- }
+ // If we need more points
+ double borderDist = ( _points.size() < _max ? _maxDistance : farthest() );
+
+ if( d >= borderDist - 2 * _distError && d <= borderDist + 2 * _distError ) return BORDER;
+ else return d < borderDist ? GOOD : BAD;
- double approxDistance( const GeoKeyNode& node ) {
- return approxDistance( GeoHash( node._key.firstElement() ) );
}
- double approxDistance( const GeoHash& h ) {
+ virtual bool exactDocCheck( const Point& p, double& d ){
- double approxDistance = -1;
- Point p( _g, h );
- switch (_type) {
+ bool within = false;
+
+ // Get the appropriate distance for the type
+ switch ( _type ) {
case GEO_PLAIN:
- approxDistance = _near.distance( p );
+ d = _near.distance( p );
+ within = _near.distanceWithin( p, _maxDistance );
break;
case GEO_SPHERE:
checkEarthBounds( p );
- approxDistance = spheredist_deg( _near, p );
+ d = spheredist_deg( _near, p );
+ within = ( d <= _maxDistance );
break;
default: assert( false );
}
- return approxDistance;
+ return within;
}
- double exactDistances( const GeoKeyNode& node ) {
-
- GEODEBUG( "Finding exact distance for " << node.key.toString() << " and " << node.recordLoc.obj().toString() );
-
- // Find all the location objects from the keys
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
+ // Always in distance units, whether radians or normal
+ double farthest() const {
+ return _farthest;
+ }
- double maxDistance = -1;
+ virtual int addSpecific( const GeoKeyNode& node, const Point& keyP, bool onBounds, double keyD, bool newDoc ) {
- // Find the particular location we want
- BSONObj loc;
- GeoHash keyHash( node._key.firstElement(), _g->_bits );
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
+ // Unique documents
- loc = *i;
+ GeoPoint newPoint( node, keyD, false );
- // Ignore all locations not hashed to the key's hash, since we may see
- // those later
- if( _g->_hash( loc ) != keyHash ) continue;
+ int prevSize = _points.size();
- double exactDistance = -1;
- bool exactWithin = false;
+ // STEP 1 : Remove old duplicate points from the set if needed
+ if( _uniqueDocs ){
- Point p( loc );
+ // Lookup old point with same doc
+ map< DiskLoc , Holder::iterator >::iterator oldPointIt = _seenPts.find( newPoint.loc() );
- // Get the appropriate distance for the type
- switch ( _type ) {
- case GEO_PLAIN:
- exactDistance = _near.distance( p );
- exactWithin = _near.distanceWithin( p, _maxDistance );
- break;
- case GEO_SPHERE:
- checkEarthBounds( p );
- exactDistance = spheredist_deg( _near, p );
- exactWithin = ( exactDistance <= _maxDistance );
- break;
- default: assert( false );
+ if( oldPointIt != _seenPts.end() ){
+ const GeoPoint& oldPoint = *(oldPointIt->second);
+ // We don't need to care if we've already seen this same approx pt or better,
+ // or we've already gone to disk once for the point
+ if( oldPoint < newPoint ){
+ GEODEBUG( "\t\tOld point closer than new point" );
+ return 0;
+ }
+ GEODEBUG( "\t\tErasing old point " << oldPointIt->first.obj() );
+ _points.erase( oldPointIt->second );
}
+ }
- assert( exactDistance >= 0 );
- if( !exactWithin ) continue;
+ Holder::iterator newIt = _points.insert( newPoint );
+ if( _uniqueDocs ) _seenPts[ newPoint.loc() ] = newIt;
- GEODEBUG( "Inserting exact point: " << GeoPoint( node , exactDistance, exactWithin ).toString() );
+ GEODEBUG( "\t\tInserted new point " << newPoint.toString() << " approx : " << keyD );
- // Add a point for this location
- _points.insert( GeoPoint( node , exactDistance, exactWithin ) );
+ assert( _max > 0 );
- if( exactDistance > maxDistance ) maxDistance = exactDistance;
- }
+ Holder::iterator lastPtIt = _points.end();
+ lastPtIt--;
+ _farthest = lastPtIt->distance() + 2 * _distError;
- return maxDistance;
+ return _points.size() - prevSize;
}
- // Always in distance units, whether radians or normal
- double farthest() const {
- return _farthest;
- }
+ // Removes extra points from end of _points set.
+ // Check can be a bit costly if we have lots of exact points near borders,
+ // so we'll do this every once and awhile.
+ void processExtraPoints(){
- bool inErrorBounds( double approxD ) const {
- return approxD >= _maxDistance - _distError && approxD <= _maxDistance + _distError;
- }
+ if( _points.size() == 0 ) return;
- virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) {
+ int prevSize = _points.size();
- GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d );
+ // Erase all points from the set with a position >= _max *and*
+ // whose distance isn't close to the _max - 1 position distance
- double maxDistance = exactDistances( node );
- if( maxDistance >= 0 ){
+ int numToErase = _points.size() - _max;
+ if( numToErase < 0 ) numToErase = 0;
- // Recalculate the current furthest point.
- int numToErase = _points.size() - _max;
- while( numToErase-- > 0 ){
- _points.erase( --_points.end() );
- }
+ // Get the first point definitely in the _points array
+ Holder::iterator startErase = _points.end();
+ for( int i = 0; i < numToErase + 1; i++ ) startErase--;
+ _farthest = startErase->distance() + 2 * _distError;
- _farthest = boost::next( _points.end(), -1 )->_exactDistance;
+ GEODEBUG( "\t\tPotentially erasing " << numToErase << " points, " << " size : " << _points.size() << " max : " << _max << " dist : " << startErase->distance() << " farthest dist : " << _farthest << " from error : " << _distError );
+ startErase++;
+ while( numToErase > 0 && startErase->distance() <= _farthest ){
+ GEODEBUG( "\t\tNot erasing point " << startErase->toString() );
+ numToErase--;
+ startErase++;
+ assert( startErase != _points.end() || numToErase == 0 );
}
+
+ if( _uniqueDocs ){
+ for( Holder::iterator i = startErase; i != _points.end(); ++i )
+ _seenPts.erase( i->loc() );
+ }
+
+ _points.erase( startErase, _points.end() );
+
+ int diff = _points.size() - prevSize;
+ if( diff > 0 ) _found += diff;
+ else _found -= -diff;
+
}
unsigned _max;
@@ -1760,17 +1743,20 @@ namespace mongo {
double _distError;
double _farthest;
+ map< DiskLoc , Holder::iterator > _seenPts;
+
};
class GeoSearch : public GeoHopper {
public:
- GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN )
- : GeoHopper( g , numWanted , startPt , filter , maxDistance, type ),
+ GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = false )
+ : GeoHopper( g , numWanted , startPt , filter , maxDistance, type, uniqueDocs, needDistance ),
_start( g->hash( startPt._x, startPt._y ) ),
- _numWanted( numWanted ),
- _type(type)
+ // TODO: Remove numWanted...
+ _numWanted( numWanted ),
+ _type(type)
{
assert( g->getDetails() );
@@ -1795,6 +1781,8 @@ namespace mongo {
void exec() {
+ if( _numWanted == 0 ) return;
+
/*
* Search algorithm
* 1) use geohash prefix to find X items
@@ -1805,7 +1793,7 @@ namespace mongo {
#ifdef GEODEBUGGING
- log() << "start near search for points near " << _near << " (max dist " << _maxDistance << ")" << endl;
+ log() << "start near search for " << _numWanted << " points near " << _near << " (max dist " << _maxDistance << ")" << endl;
#endif
@@ -1815,13 +1803,16 @@ namespace mongo {
long long f = found();
assert( f <= 0x7fffffff );
fillStack( maxPointsHeuristic, _numWanted - static_cast<int>(f) , true );
+ processExtraPoints();
} while( _state != DONE && _state != DONE_NEIGHBOR &&
found() < _numWanted &&
(! _prefix.constrains() || _g->sizeEdge( _prefix ) <= _scanDistance ) );
// If we couldn't scan or scanned everything, we're done
- if( _state == DONE ) return;
-
+ if( _state == DONE ){
+ expandEndPoints();
+ return;
+ }
}
#ifdef GEODEBUGGING
@@ -1856,6 +1847,8 @@ namespace mongo {
_want = Box( _near._x - farDist , _near._y - farDist , farDist * 2 );
GEODEBUGPRINT( _want.toString() );
+ // log() << "Found : " << found() << " wanted : " << _numWanted << " Far distance : " << farDist << " box : " << _want << endl;
+
// Remember the far distance for further scans
_scanDistance = farDist;
@@ -1874,15 +1867,195 @@ namespace mongo {
// Do regular search in the full region
do {
fillStack( maxPointsHeuristic );
+ processExtraPoints();
}
while( _state != DONE );
}
- GEODEBUG( "done near search" )
+ GEODEBUG( "done near search with " << _points.size() << " points " );
+
+ expandEndPoints();
}
+ void addExactPoints( const GeoPoint& pt, Holder& points, bool force ){
+ int before, after;
+ addExactPoints( pt, points, before, after, force );
+ }
+
+ void addExactPoints( const GeoPoint& pt, Holder& points, int& before, int& after, bool force ){
+
+ before = 0;
+ after = 0;
+
+ GEODEBUG( "Adding exact points for " << pt.toString() );
+
+ if( pt.isExact() ){
+ if( force ) points.insert( pt );
+ return;
+ }
+
+ vector<BSONObj> locs;
+ getPointsFor( pt.key(), pt.obj(), locs, _uniqueDocs );
+
+ GeoPoint nearestPt( pt, -1, true );
+
+ for( vector<BSONObj>::iterator i = locs.begin(); i != locs.end(); i++ ){
+
+ Point loc( *i );
+
+ double d;
+ if( ! exactDocCheck( loc, d ) ) continue;
+
+ if( _uniqueDocs && ( nearestPt.distance() < 0 || d < nearestPt.distance() ) ){
+ nearestPt._distance = d;
+ nearestPt._pt = *i;
+ continue;
+ }
+ else if( ! _uniqueDocs ){
+ GeoPoint exactPt( pt, d, true );
+ exactPt._pt = *i;
+ GEODEBUG( "Inserting exact pt " << exactPt.toString() << " for " << pt.toString() << " exact : " << d << " is less? " << ( exactPt < pt ) << " bits : " << _g->_bits );
+ points.insert( exactPt );
+ exactPt < pt ? before++ : after++;
+ }
+
+ }
+
+ if( _uniqueDocs && nearestPt.distance() >= 0 ){
+ GEODEBUG( "Inserting unique exact pt " << nearestPt.toString() << " for " << pt.toString() << " exact : " << nearestPt.distance() << " is less? " << ( nearestPt < pt ) << " bits : " << _g->_bits );
+ points.insert( nearestPt );
+ if( nearestPt < pt ) before++;
+ else after++;
+ }
+
+ }
+
+ // TODO: Refactor this back into holder class, allow to run periodically when we are seeing a lot of pts
+ void expandEndPoints( bool finish = true ){
+
+ processExtraPoints();
+
+ // All points in array *could* be in maxDistance
+
+ // Step 1 : Trim points to max size
+ // TODO: This check will do little for now, but is skeleton for future work in incremental $near
+ // searches
+ if( _max > 0 ){
+
+ int numToErase = _points.size() - _max;
+
+ if( numToErase > 0 ){
+
+ Holder tested;
+
+ // Work backward through all points we're not sure belong in the set
+ Holder::iterator maybePointIt = _points.end();
+ maybePointIt--;
+ double approxMin = maybePointIt->distance() - 2 * _distError;
+
+ GEODEBUG( "\t\tNeed to erase " << numToErase << " max : " << _max << " min dist " << approxMin << " error : " << _distError << " starting from : " << (*maybePointIt).toString() );
+
+ // Insert all
+ int erased = 0;
+ while( _points.size() > 0 && ( maybePointIt->distance() >= approxMin || erased < numToErase ) ){
+
+ Holder::iterator current = maybePointIt--;
+
+ addExactPoints( *current, tested, true );
+ _points.erase( current );
+ erased++;
+
+ if( tested.size() )
+ approxMin = tested.begin()->distance() - 2 * _distError;
+
+ }
+
+ GEODEBUG( "\t\tEnding search at point " << ( _points.size() == 0 ? "(beginning)" : maybePointIt->toString() ) );
+
+ int numToAddBack = erased - numToErase;
+ assert( numToAddBack >= 0 );
+
+ GEODEBUG( "\t\tNum tested valid : " << tested.size() << " erased : " << erased << " added back : " << numToAddBack );
+
+#ifdef GEODEBUGGING
+ for( Holder::iterator it = tested.begin(); it != tested.end(); it++ ){
+ log() << "Tested Point: " << *it << endl;
+ }
+#endif
+ Holder::iterator testedIt = tested.begin();
+ for( int i = 0; i < numToAddBack && testedIt != tested.end(); i++ ){
+ _points.insert( *testedIt );
+ testedIt++;
+ }
+ }
+ }
+
+#ifdef GEODEBUGGING
+ for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){
+ log() << "Point: " << *it << endl;
+ }
+#endif
+ // We've now trimmed first set of unneeded points
+
+ GEODEBUG( "\t\t Start expanding, num points : " << _points.size() << " max : " << _max );
+
+ // Step 2: iterate through all points and add as needed
+
+ unsigned expandedPoints = 0;
+ Holder::iterator it = _points.begin();
+ double expandWindowEnd = -1;
+ while( it != _points.end() ){
+ const GeoPoint& currPt = *it;
+
+ // TODO: If one point is exact, maybe not 2 * _distError
+
+ // See if we're in an expand window
+ bool inWindow = currPt.distance() <= expandWindowEnd;
+ // If we're not, and we're done with points, break
+ if( ! inWindow && expandedPoints >= _max ) break;
+
+ bool expandApprox = ! currPt.isExact() && ( ! _uniqueDocs || ( finish && _needDistance ) || inWindow );
+
+ if( expandApprox ){
+
+ // Add new point(s)
+ // These will only be added in a radius of 2 * _distError around the current point,
+ // so should not affect previously valid points.
+ int before, after;
+ addExactPoints( currPt, _points, before, after, false );
+ expandedPoints += before;
+
+ if( _max > 0 && expandedPoints < _max )
+ expandWindowEnd = currPt.distance() + 2 * _distError;
+
+ // Iterate to the next point
+ Holder::iterator current = it++;
+ // Erase the current point
+ _points.erase( current );
+
+ }
+ else{
+ expandedPoints++;
+ it++;
+ }
+ }
+
+ GEODEBUG( "\t\tFinished expanding, num points : " << _points.size() << " max : " << _max );
+
+ // Finish
+ // TODO: Don't really need to trim?
+ for( ; expandedPoints > _max; expandedPoints-- ) it--;
+ _points.erase( it, _points.end() );
+
+#ifdef GEODEBUGGING
+ for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){
+ log() << "Point: " << *it << endl;
+ }
+#endif
+ }
+
virtual GeoHash expandStartHash(){
return _start;
}
@@ -1915,7 +2088,7 @@ namespace mongo {
: GeoCursorBase( s->_spec ) ,
_s( s ) , _cur( s->_points.begin() ) , _end( s->_points.end() ), _nscanned() {
if ( _cur != _end ) {
- ++_nscanned;
+ ++_nscanned;
}
}
@@ -1975,8 +2148,8 @@ namespace mongo {
class GeoCircleBrowse : public GeoBrowse {
public:
- GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center")
- : GeoBrowse( g , "circle" , filter ) {
+ GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center", bool uniqueDocs = true )
+ : GeoBrowse( g , "circle" , filter, uniqueDocs ) {
uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 );
@@ -2040,19 +2213,16 @@ namespace mongo {
return cur.intersects( _bBox );
}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
-
- GeoHash h( node._key.firstElement(), _g->_bits );
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
// Inexact hash distance checks.
double error = 0;
switch (_type) {
case GEO_PLAIN:
- d = _g->distance( _start , h );
+ d = _startPt.distance( p );
error = _g->_error;
break;
case GEO_SPHERE: {
- Point p( _g, h );
checkEarthBounds( p );
d = spheredist_deg( _startPt, p );
error = _g->_errorSphere;
@@ -2062,40 +2232,25 @@ namespace mongo {
}
// If our distance is in the error bounds...
- if( d >= _maxDistance - error && d <= _maxDistance + error ) {
-
- // Do exact check
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
-
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
-
- GEODEBUG( "Inexact distance : " << d << " vs " << _maxDistance << " from " << ( *i ).toString() << " due to error " << error );
-
- Point p( *i );
- // Exact distance checks.
- switch (_type) {
- case GEO_PLAIN: {
- if( _startPt.distanceWithin( p, _maxDistance ) ) return true;
- break;
- }
- case GEO_SPHERE:
- // Ignore all locations not hashed to the key's hash, since spherical calcs are
- // more expensive.
- if( _g->_hash( *i ) != h ) break;
- checkEarthBounds( p );
- if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true;
- break;
- default: assert( false );
- }
+ if( d >= _maxDistance - error && d <= _maxDistance + error ) return BORDER;
+ return d > _maxDistance ? BAD : GOOD;
+ }
- }
+ virtual bool exactDocCheck( const Point& p, double& d ){
- return false;
+ switch (_type) {
+ case GEO_PLAIN: {
+ if( _startPt.distanceWithin( p, _maxDistance ) ) return true;
+ break;
+ }
+ case GEO_SPHERE:
+ checkEarthBounds( p );
+ if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true;
+ break;
+ default: assert( false );
}
- GEODEBUG( "\t " << h << "\t" << d );
- return d <= _maxDistance;
+ return false;
}
GeoDistType _type;
@@ -2111,8 +2266,8 @@ namespace mongo {
class GeoBoxBrowse : public GeoBrowse {
public:
- GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() )
- : GeoBrowse( g , "box" , filter ) {
+ GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj(), bool uniqueDocs = true )
+ : GeoBrowse( g , "box" , filter, uniqueDocs ) {
uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 );
@@ -2133,7 +2288,7 @@ namespace mongo {
_fudge = _g->_error;
_wantLen = _fudge +
std::max( ( _want._max._x - _want._min._x ) ,
- ( _want._max._y - _want._min._y ) );
+ ( _want._max._y - _want._min._y ) ) / 2;
ok();
}
@@ -2171,39 +2326,14 @@ namespace mongo {
return cur.intersects( _want );
}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
-
- GeoHash h( node._key.firstElement() );
- Point approxPt( _g, h );
-
- bool approxInside = _want.inside( approxPt, _fudge );
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
+ if( _want.onBoundary( p, _fudge ) ) return BORDER;
+ else return _want.inside( p, _fudge ) ? GOOD : BAD;
- if( approxInside && _want.onBoundary( approxPt, _fudge ) ) {
-
- // Do exact check
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
-
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
- if( _want.inside( Point( *i ) ) ) {
-
- GEODEBUG( "found exact point : " << _want.toString()
- << " exact point : " << Point( *i ).toString()
- << " approx point : " << approxPt.toString()
- << " because of error: " << _fudge );
-
- return true;
- }
- }
-
- return false;
- }
-
- GEODEBUG( "checking point : " << _want.toString()
- << " point: " << approxPt.toString()
- << " in : " << _want.inside( approxPt, _fudge ) );
+ }
- return approxInside;
+ virtual bool exactDocCheck( const Point& p, double& d ){
+ return _want.inside( p );
}
Box _want;
@@ -2218,7 +2348,7 @@ namespace mongo {
public:
GeoPolygonBrowse( const Geo2dType* g , const BSONObj& polyPoints ,
- BSONObj filter = BSONObj() ) : GeoBrowse( g , "polygon" , filter ) {
+ BSONObj filter = BSONObj(), bool uniqueDocs = true ) : GeoBrowse( g , "polygon" , filter, uniqueDocs ) {
GEODEBUG( "In Polygon" )
@@ -2233,7 +2363,7 @@ namespace mongo {
uassert( 14030, "polygon must be defined by three points or more", _poly.size() >= 3 );
_bounds = _poly.bounds();
- _maxDim = _bounds.maxDim();
+ _maxDim = _g->_error + _bounds.maxDim() / 2;
ok();
}
@@ -2253,51 +2383,17 @@ namespace mongo {
return cur.intersects( _bounds );
}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
-
- GeoHash h( node._key.firstElement(), _g->_bits );
- Point p( _g, h );
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
int in = _poly.contains( p, _g->_error );
- if( in != 0 ) {
-
- if ( in > 0 ) {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx in polygon" );
- }
- else {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx not in polygon" );
- }
-
- if( in != 0 ) return in > 0;
- }
-
- // Do exact check, since to approximate check was inconclusive
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
-
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
-
- Point p( *i );
- // Ignore all points not hashed to the current value
- // This implicitly assumes hashing is less costly than the polygon check, which
- // may or may not be true.
- if( _g->hash( p ) != h ) continue;
+ if( in == 0 ) return BORDER;
+ else return in > 0 ? GOOD : BAD;
- // Use the point in polygon algorithm to see if the point
- // is contained in the polygon.
- bool in = _poly.contains( p );
- if ( in ) {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly in polygon" );
- }
- else {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly not in polygon" );
- }
- if( in ) return in;
-
- }
+ }
- return false;
+ virtual bool exactDocCheck( const Point& p, double& d ){
+ return _poly.contains( p );
}
private:
@@ -2324,7 +2420,7 @@ namespace mongo {
if ( e.type() == Array ) {
// If we get an array query, assume it is a location, and do a $within { $center : [[x, y], 0] } search
- shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ) ) );
+ shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ), "$center", true ) );
return c;
}
else if ( e.type() == Object ) {
@@ -2364,33 +2460,44 @@ namespace mongo {
if ( e.isNumber() )
maxDistance = e.numberDouble();
}
- shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type ) );
+
+ bool uniqueDocs = false;
+ if( ! n["$uniqueDocs"].eoo() ) uniqueDocs = n["$uniqueDocs"].trueValue();
+
+ shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type, uniqueDocs ) );
s->exec();
shared_ptr<Cursor> c;
c.reset( new GeoSearchCursor( s ) );
return c;
}
case BSONObj::opWITHIN: {
+
e = e.embeddedObject().firstElement();
uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() );
+
+ BSONObj context = e.embeddedObject();
e = e.embeddedObject().firstElement();
string type = e.fieldName();
+
+ bool uniqueDocs = true;
+ if( ! context["$uniqueDocs"].eoo() ) uniqueDocs = context["$uniqueDocs"].trueValue();
+
if ( startsWith(type, "$center") ) {
uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() );
- shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type) );
+ shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type, uniqueDocs ) );
return c;
}
else if ( type == "$box" ) {
uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() );
- shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) );
+ shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) );
return c;
}
else if ( startsWith( type, "$poly" ) ) {
uassert( 14029 , "$polygon has to take an object or array" , e.isABSONObj() );
- shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query ) );
+ shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) );
return c;
}
- throw UserException( 13058 , (string)"unknown $with type: " + type );
+ throw UserException( 13058 , (string)"unknown $within type: " + type );
}
default:
// Otherwise... assume the object defines a point, and we want to do a zero-radius $within $center
@@ -2414,7 +2521,7 @@ namespace mongo {
bool slaveOk() const { return true; }
void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; }
bool slaveOverrideOk() { return true; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string ns = dbname + "." + cmdObj.firstElement().valuestr();
NamespaceDetails * d = nsdetails( ns.c_str() );
@@ -2450,6 +2557,12 @@ namespace mongo {
assert( numWanted >= 0 );
}
+ bool uniqueDocs = false;
+ if( ! cmdObj["uniqueDocs"].eoo() ) uniqueDocs = cmdObj["uniqueDocs"].trueValue();
+
+ bool includeLocs = false;
+ if( ! cmdObj["includeLocs"].eoo() ) includeLocs = cmdObj["includeLocs"].trueValue();
+
uassert(13046, "'near' param missing/invalid", !cmdObj["near"].eoo());
const Point n( cmdObj["near"] );
result.append( "near" , g->_tohash( cmdObj["near"] ).toString() );
@@ -2466,7 +2579,7 @@ namespace mongo {
if ( cmdObj["spherical"].trueValue() )
type = GEO_SPHERE;
- GeoSearch gs( g , n , numWanted , filter , maxDistance , type );
+ GeoSearch gs( g , n , numWanted , filter , maxDistance , type, uniqueDocs, true );
if ( cmdObj["start"].type() == String) {
GeoHash start ((string) cmdObj["start"].valuestr());
@@ -2486,11 +2599,12 @@ namespace mongo {
for ( GeoHopper::Holder::iterator i=gs._points.begin(); i!=gs._points.end(); i++ ) {
const GeoPoint& p = *i;
- double dis = distanceMultiplier * p._exactDistance;
+ double dis = distanceMultiplier * p.distance();
totalDistance += dis;
BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ) ) );
bb.append( "dis" , dis );
+ if( includeLocs ) bb.append( "loc" , p._pt );
bb.append( "obj" , p._o );
bb.done();
}
@@ -2516,7 +2630,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
bool slaveOk() const { return true; }
bool slaveOverrideOk() { return true; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string ns = dbname + "." + cmdObj.firstElement().valuestr();
NamespaceDetails * d = nsdetails( ns.c_str() );
@@ -2571,4 +2685,248 @@ namespace mongo {
} geoWalkCmd;
+ struct GeoUnitTest : public UnitTest {
+
+ int round( double d ) {
+ return (int)(.5+(d*1000));
+ }
+
+#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); }
+
+ void run() {
+ assert( ! GeoHash::isBitSet( 0 , 0 ) );
+ assert( ! GeoHash::isBitSet( 0 , 31 ) );
+ assert( GeoHash::isBitSet( 1 , 31 ) );
+
+ IndexSpec i( BSON( "loc" << "2d" ) );
+ Geo2dType g( &geo2dplugin , &i );
+ {
+ double x = 73.01212;
+ double y = 41.352964;
+ BSONObj in = BSON( "x" << x << "y" << y );
+ GeoHash h = g._hash( in );
+ BSONObj out = g._unhash( h );
+ assert( round(x) == round( out["x"].number() ) );
+ assert( round(y) == round( out["y"].number() ) );
+ assert( round( in["x"].number() ) == round( out["x"].number() ) );
+ assert( round( in["y"].number() ) == round( out["y"].number() ) );
+ }
+
+ {
+ double x = -73.01212;
+ double y = 41.352964;
+ BSONObj in = BSON( "x" << x << "y" << y );
+ GeoHash h = g._hash( in );
+ BSONObj out = g._unhash( h );
+ assert( round(x) == round( out["x"].number() ) );
+ assert( round(y) == round( out["y"].number() ) );
+ assert( round( in["x"].number() ) == round( out["x"].number() ) );
+ assert( round( in["y"].number() ) == round( out["y"].number() ) );
+ }
+
+ {
+ GeoHash h( "0000" );
+ h.move( 0 , 1 );
+ GEOHEQ( h , "0001" );
+ h.move( 0 , -1 );
+ GEOHEQ( h , "0000" );
+
+ h.init( "0001" );
+ h.move( 0 , 1 );
+ GEOHEQ( h , "0100" );
+ h.move( 0 , -1 );
+ GEOHEQ( h , "0001" );
+
+
+ h.init( "0000" );
+ h.move( 1 , 0 );
+ GEOHEQ( h , "0010" );
+ }
+
+ {
+ Box b( 5 , 5 , 2 );
+ assert( "(5,5) -->> (7,7)" == b.toString() );
+ }
+
+ {
+ GeoHash a = g.hash( 1 , 1 );
+ GeoHash b = g.hash( 4 , 5 );
+ assert( 5 == (int)(g.distance( a , b ) ) );
+ a = g.hash( 50 , 50 );
+ b = g.hash( 42 , 44 );
+ assert( round(10) == round(g.distance( a , b )) );
+ }
+
+ {
+ GeoHash x("0000");
+ assert( 0 == x.getHash() );
+ x.init( 0 , 1 , 32 );
+ GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" )
+
+ assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) );
+ assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) );
+ }
+
+ {
+ GeoHash x("1010");
+ GEOHEQ( x , "1010" );
+ GeoHash y = x + "01";
+ GEOHEQ( y , "101001" );
+ }
+
+ {
+
+ GeoHash a = g.hash( 5 , 5 );
+ GeoHash b = g.hash( 5 , 7 );
+ GeoHash c = g.hash( 100 , 100 );
+ /*
+ cout << "a: " << a << endl;
+ cout << "b: " << b << endl;
+ cout << "c: " << c << endl;
+
+ cout << "a: " << a.toStringHex1() << endl;
+ cout << "b: " << b.toStringHex1() << endl;
+ cout << "c: " << c.toStringHex1() << endl;
+ */
+ BSONObj oa = a.wrap();
+ BSONObj ob = b.wrap();
+ BSONObj oc = c.wrap();
+ /*
+ cout << "a: " << oa.hexDump() << endl;
+ cout << "b: " << ob.hexDump() << endl;
+ cout << "c: " << oc.hexDump() << endl;
+ */
+ assert( oa.woCompare( ob ) < 0 );
+ assert( oa.woCompare( oc ) < 0 );
+
+ }
+
+ {
+ GeoHash x( "000000" );
+ x.move( -1 , 0 );
+ GEOHEQ( x , "101010" );
+ x.move( 1 , -1 );
+ GEOHEQ( x , "010101" );
+ x.move( 0 , 1 );
+ GEOHEQ( x , "000000" );
+ }
+
+ {
+ GeoHash prefix( "110011000000" );
+ GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" );
+ assert( ! entry.hasPrefix( prefix ) );
+
+ entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000");
+ assert( entry.toString().find( prefix.toString() ) == 0 );
+ assert( entry.hasPrefix( GeoHash( "1100" ) ) );
+ assert( entry.hasPrefix( prefix ) );
+ }
+
+ {
+ GeoHash a = g.hash( 50 , 50 );
+ GeoHash b = g.hash( 48 , 54 );
+ assert( round( 4.47214 ) == round( g.distance( a , b ) ) );
+ }
+
+
+ {
+ Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) );
+ assert( b.inside( 29.763 , -95.363 ) );
+ assert( ! b.inside( 32.9570255 , -96.1082497 ) );
+ assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) );
+ }
+
+ {
+ GeoHash a( "11001111" );
+ assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) );
+ assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) );
+ }
+
+ {
+ int N = 10000;
+ {
+ Timer t;
+ for ( int i=0; i<N; i++ ) {
+ unsigned x = (unsigned)rand();
+ unsigned y = (unsigned)rand();
+ GeoHash h( x , y );
+ unsigned a,b;
+ h.unhash_slow( a,b );
+ assert( a == x );
+ assert( b == y );
+ }
+ //cout << "slow: " << t.millis() << endl;
+ }
+
+ {
+ Timer t;
+ for ( int i=0; i<N; i++ ) {
+ unsigned x = (unsigned)rand();
+ unsigned y = (unsigned)rand();
+ GeoHash h( x , y );
+ unsigned a,b;
+ h.unhash_fast( a,b );
+ assert( a == x );
+ assert( b == y );
+ }
+ //cout << "fast: " << t.millis() << endl;
+ }
+
+ }
+
+ {
+ // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example
+
+ {
+ Point BNA (-86.67, 36.12);
+ Point LAX (-118.40, 33.94);
+
+ double dist1 = spheredist_deg(BNA, LAX);
+ double dist2 = spheredist_deg(LAX, BNA);
+
+ // target is 0.45306
+ assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
+ assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
+ }
+ {
+ Point BNA (-1.5127, 0.6304);
+ Point LAX (-2.0665, 0.5924);
+
+ double dist1 = spheredist_rad(BNA, LAX);
+ double dist2 = spheredist_rad(LAX, BNA);
+
+ // target is 0.45306
+ assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
+ assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
+ }
+ {
+ Point JFK (-73.77694444, 40.63861111 );
+ Point LAX (-118.40, 33.94);
+
+ double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES;
+ assert( dist > 2469 && dist < 2470 );
+ }
+
+ {
+ Point BNA (-86.67, 36.12);
+ Point LAX (-118.40, 33.94);
+ Point JFK (-73.77694444, 40.63861111 );
+ assert( spheredist_deg(BNA, BNA) < 1e-6);
+ assert( spheredist_deg(LAX, LAX) < 1e-6);
+ assert( spheredist_deg(JFK, JFK) < 1e-6);
+
+ Point zero (0, 0);
+ Point antizero (0,-180);
+
+ // these were known to cause NaN
+ assert( spheredist_deg(zero, zero) < 1e-6);
+ assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6);
+ assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6);
+ }
+ }
+ }
+ } geoUnitTest;
+
+
}
+
diff --git a/db/geo/core.h b/db/geo/core.h
index 74f4b6e8269..b77997844f2 100644
--- a/db/geo/core.h
+++ b/db/geo/core.h
@@ -278,14 +278,19 @@ namespace mongo {
return *this;
}
- bool operator==(const GeoHash& h ) {
+ bool operator==(const GeoHash& h ) const {
return _hash == h._hash && _bits == h._bits;
}
- bool operator!=(const GeoHash& h ) {
+ bool operator!=(const GeoHash& h ) const {
return !( *this == h );
}
+ bool operator<(const GeoHash& h ) const {
+ if( _hash != h._hash ) return _hash < h._hash;
+ return _bits < h._bits;
+ }
+
GeoHash& operator+=( const char * s ) {
unsigned pos = _bits * 2;
_bits += strlen(s) / 2;
diff --git a/db/geo/haystack.cpp b/db/geo/haystack.cpp
index fd6b2392d6a..a5dd478f625 100644
--- a/db/geo/haystack.cpp
+++ b/db/geo/haystack.cpp
@@ -264,7 +264,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
bool slaveOk() const { return true; }
bool slaveOverrideOk() const { return true; }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string ns = dbname + "." + cmdObj.firstElement().valuestr();
diff --git a/db/index.cpp b/db/index.cpp
index 8aebef45e8e..67a0d44e444 100644
--- a/db/index.cpp
+++ b/db/index.cpp
@@ -27,11 +27,6 @@
namespace mongo {
- /** old (<= v1.8) : 0
- 1 is new version
- */
- const int DefaultIndexVersionNumber = 1;
-
template< class V >
class IndexInterfaceImpl : public IndexInterface {
public:
diff --git a/db/index.h b/db/index.h
index debe2aa9c26..54b06394435 100644
--- a/db/index.h
+++ b/db/index.h
@@ -150,14 +150,18 @@ namespace mongo {
return io.getStringField("ns");
}
- int version() const {
- BSONElement e = info.obj()["v"];
+ static int versionForIndexObj( const BSONObj &obj ) {
+ BSONElement e = obj["v"];
if( e.type() == NumberInt )
return e._numberInt();
// should normally be an int. this is for backward compatibility
int v = e.numberInt();
uassert(14802, "index v field should be Integer type", v == 0);
- return v;
+ return v;
+ }
+
+ int version() const {
+ return versionForIndexObj( info.obj() );
}
/** @return true if index has unique constraint */
diff --git a/db/indexkey.cpp b/db/indexkey.cpp
index cc2cd43daf5..6d6fcc58cae 100644
--- a/db/indexkey.cpp
+++ b/db/indexkey.cpp
@@ -22,9 +22,15 @@
#include "btree.h"
#include "ops/query.h"
#include "background.h"
+#include "../util/text.h"
namespace mongo {
+ /** old (<= v1.8) : 0
+ 1 is new version
+ */
+ const int DefaultIndexVersionNumber = 1;
+
map<string,IndexPlugin*> * IndexPlugin::_plugins;
IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec )
@@ -100,6 +106,14 @@ namespace mongo {
}
{
+ // _undefinedElt
+ BSONObjBuilder b;
+ b.appendUndefined( "" );
+ _undefinedObj = b.obj();
+ _undefinedElt = _undefinedObj.firstElement();
+ }
+
+ {
// handle plugins
string pluginName = IndexPlugin::findPluginName( keyPattern );
if ( pluginName.size() ) {
@@ -116,131 +130,289 @@ namespace mongo {
_finishedInit = true;
}
-
- void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
- if ( _indexType.get() ) { //plugin (eg geo)
- _indexType->getKeys( obj , keys );
- return;
- }
- vector<const char*> fieldNames( _fieldNames );
- vector<BSONElement> fixed( _fixed );
- _getKeys( fieldNames , fixed , obj, keys );
- if ( keys.empty() && ! _sparse )
- keys.insert( _nullKey );
+ void assertParallelArrays( const char *first, const char *second ) {
+ stringstream ss;
+ ss << "cannot index parallel arrays [" << first << "] [" << second << "]";
+ uasserted( 10088 , ss.str() );
}
-
- void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const {
- BSONElement arrElt;
- unsigned arrIdx = ~0;
- int numNotFound = 0;
-
- for( unsigned i = 0; i < fieldNames.size(); ++i ) {
- if ( *fieldNames[ i ] == '\0' )
- continue;
-
- BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] );
-
- if ( e.eoo() ) {
- e = _nullElt; // no matching field
- numNotFound++;
+
+ class KeyGeneratorV0 {
+ public:
+ KeyGeneratorV0( const IndexSpec &spec ) : _spec( spec ) {}
+
+ void getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
+ if ( _spec._indexType.get() ) { //plugin (eg geo)
+ _spec._indexType->getKeys( obj , keys );
+ return;
}
-
- if ( e.type() != Array )
- fieldNames[ i ] = ""; // no matching field or non-array match
-
- if ( *fieldNames[ i ] == '\0' )
- fixed[ i ] = e; // no need for further object expansion (though array expansion still possible)
-
- if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here
- arrIdx = i;
- arrElt = e;
+ vector<const char*> fieldNames( _spec._fieldNames );
+ vector<BSONElement> fixed( _spec._fixed );
+ _getKeys( fieldNames , fixed , obj, keys );
+ if ( keys.empty() && ! _spec._sparse )
+ keys.insert( _spec._nullKey );
+ }
+
+ private:
+ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const {
+ BSONElement arrElt;
+ unsigned arrIdx = ~0;
+ int numNotFound = 0;
+
+ for( unsigned i = 0; i < fieldNames.size(); ++i ) {
+ if ( *fieldNames[ i ] == '\0' )
+ continue;
+
+ BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] );
+
+ if ( e.eoo() ) {
+ e = _spec._nullElt; // no matching field
+ numNotFound++;
+ }
+
+ if ( e.type() != Array )
+ fieldNames[ i ] = ""; // no matching field or non-array match
+
+ if ( *fieldNames[ i ] == '\0' )
+ fixed[ i ] = e; // no need for further object expansion (though array expansion still possible)
+
+ if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here
+ arrIdx = i;
+ arrElt = e;
+ }
+
+ // enforce single array path here
+ if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) {
+ assertParallelArrays( e.fieldName(), arrElt.fieldName() );
+ }
}
-
- // enforce single array path here
- if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) {
- stringstream ss;
- ss << "cannot index parallel arrays [" << e.fieldName() << "] [" << arrElt.fieldName() << "]";
- uasserted( 10088 , ss.str() );
+
+ bool allFound = true; // have we found elements for all field names in the key spec?
+ for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) {
+ if ( **i != '\0' ) {
+ allFound = false;
+ break;
+ }
}
- }
-
- bool allFound = true; // have we found elements for all field names in the key spec?
- for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) {
- if ( **i != '\0' ) {
- allFound = false;
- break;
+
+ if ( _spec._sparse && numNotFound == _spec._nFields ) {
+ // we didn't find any fields
+ // so we're not going to index this document
+ return;
}
- }
-
- if ( _sparse && numNotFound == _nFields ) {
- // we didn't find any fields
- // so we're not going to index this document
- return;
- }
-
- bool insertArrayNull = false;
-
- if ( allFound ) {
- if ( arrElt.eoo() ) {
- // no terminal array element to expand
- BSONObjBuilder b(_sizeTracker);
- for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i )
- b.appendAs( *i, "" );
- keys.insert( b.obj() );
+
+ bool insertArrayNull = false;
+
+ if ( allFound ) {
+ if ( arrElt.eoo() ) {
+ // no terminal array element to expand
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i )
+ b.appendAs( *i, "" );
+ keys.insert( b.obj() );
+ }
+ else {
+ // terminal array element to expand, so generate all keys
+ BSONObjIterator i( arrElt.embeddedObject() );
+ if ( i.more() ) {
+ while( i.more() ) {
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( unsigned j = 0; j < fixed.size(); ++j ) {
+ if ( j == arrIdx )
+ b.appendAs( i.next(), "" );
+ else
+ b.appendAs( fixed[ j ], "" );
+ }
+ keys.insert( b.obj() );
+ }
+ }
+ else if ( fixed.size() > 1 ) {
+ insertArrayNull = true;
+ }
+ }
}
else {
- // terminal array element to expand, so generate all keys
+ // nonterminal array element to expand, so recurse
+ assert( !arrElt.eoo() );
BSONObjIterator i( arrElt.embeddedObject() );
if ( i.more() ) {
while( i.more() ) {
- BSONObjBuilder b(_sizeTracker);
- for( unsigned j = 0; j < fixed.size(); ++j ) {
- if ( j == arrIdx )
- b.appendAs( i.next(), "" );
- else
- b.appendAs( fixed[ j ], "" );
+ BSONElement e = i.next();
+ if ( e.type() == Object ) {
+ _getKeys( fieldNames, fixed, e.embeddedObject(), keys );
}
- keys.insert( b.obj() );
}
}
- else if ( fixed.size() > 1 ) {
+ else {
insertArrayNull = true;
}
}
- }
- else {
- // nonterminal array element to expand, so recurse
- assert( !arrElt.eoo() );
- BSONObjIterator i( arrElt.embeddedObject() );
- if ( i.more() ) {
- while( i.more() ) {
- BSONElement e = i.next();
- if ( e.type() == Object ) {
- _getKeys( fieldNames, fixed, e.embeddedObject(), keys );
+
+ if ( insertArrayNull ) {
+ // x : [] - need to insert undefined
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( unsigned j = 0; j < fixed.size(); ++j ) {
+ if ( j == arrIdx ) {
+ b.appendUndefined( "" );
+ }
+ else {
+ BSONElement e = fixed[j];
+ if ( e.eoo() )
+ b.appendNull( "" );
+ else
+ b.appendAs( e , "" );
}
}
+ keys.insert( b.obj() );
}
- else {
- insertArrayNull = true;
+ }
+
+ const IndexSpec &_spec;
+ };
+
+ class KeyGeneratorV1 {
+ public:
+ KeyGeneratorV1( const IndexSpec &spec ) : _spec( spec ) {}
+
+ void getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
+ if ( _spec._indexType.get() ) { //plugin (eg geo)
+ _spec._indexType->getKeys( obj , keys );
+ return;
+ }
+ vector<const char*> fieldNames( _spec._fieldNames );
+ vector<BSONElement> fixed( _spec._fixed );
+ _getKeys( fieldNames , fixed , obj, keys );
+ if ( keys.empty() && ! _spec._sparse )
+ keys.insert( _spec._nullKey );
+ }
+
+ private:
+ /**
+ * @param arrayNestedArray - set if the returned element is an array nested directly within arr.
+ */
+ BSONElement extractNextElement( const BSONObj &obj, const BSONObj &arr, const char *&field, bool &arrayNestedArray ) const {
+ string firstField = mongoutils::str::before( field, '.' );
+ bool haveObjField = !obj.getField( firstField ).eoo();
+ BSONElement arrField = arr.getField( firstField );
+ bool haveArrField = !arrField.eoo();
+
+ // An index component field name cannot exist in both a document array and one of that array's children.
+ uassert( 15855 , "Parallel references while expanding indexed field in array", !haveObjField || !haveArrField );
+
+ arrayNestedArray = false;
+ if ( haveObjField ) {
+ return obj.getFieldDottedOrArray( field );
+ }
+ else if ( haveArrField ) {
+ if ( arrField.type() == Array ) {
+ arrayNestedArray = true;
+ }
+ return arr.getFieldDottedOrArray( field );
}
+ return BSONElement();
}
-
- if ( insertArrayNull ) {
- // x : [] - need to insert undefined
- BSONObjBuilder b(_sizeTracker);
- for( unsigned j = 0; j < fixed.size(); ++j ) {
- if ( j == arrIdx ) {
- b.appendUndefined( "" );
+
+ void _getKeysArrEltFixed( vector<const char*> &fieldNames , vector<BSONElement> &fixed , const BSONElement &arrEntry, BSONObjSet &keys, int numNotFound, const BSONElement &arrObjElt, const set< unsigned > &arrIdxs, bool mayExpandArrayUnembedded ) const {
+ // set up any terminal array values
+ for( set<unsigned>::const_iterator j = arrIdxs.begin(); j != arrIdxs.end(); ++j ) {
+ if ( *fieldNames[ *j ] == '\0' ) {
+ fixed[ *j ] = mayExpandArrayUnembedded ? arrEntry : arrObjElt;
+ }
+ }
+ // recurse
+ _getKeys( fieldNames, fixed, ( arrEntry.type() == Object ) ? arrEntry.embeddedObject() : BSONObj(), keys, numNotFound, arrObjElt.embeddedObject() );
+ }
+
+ /**
+ * @param fieldNames - fields to index, may be postfixes in recursive calls
+ * @param fixed - values that have already been identified for their index fields
+ * @param obj - object from which keys should be extracted, based on names in fieldNames
+ * @param keys - set where index keys are written
+ * @param numNotFound - number of index fields that have already been identified as missing
+ * @param array - array from which keys should be extracted, based on names in fieldNames
+ * If obj and array are both nonempty, obj will be one of the elements of array.
+ */
+ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const {
+ BSONElement arrElt;
+ set<unsigned> arrIdxs;
+ bool mayExpandArrayUnembedded = true;
+ for( unsigned i = 0; i < fieldNames.size(); ++i ) {
+ if ( *fieldNames[ i ] == '\0' ) {
+ continue;
+ }
+
+ bool arrayNestedArray;
+ // Extract element matching fieldName[ i ] from object xor array.
+ BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray );
+
+ if ( e.eoo() ) {
+ // if field not present, set to null
+ fixed[ i ] = _spec._nullElt;
+ // done expanding this field name
+ fieldNames[ i ] = "";
+ numNotFound++;
+ }
+ else if ( e.type() == Array ) {
+ arrIdxs.insert( i );
+ if ( arrElt.eoo() ) {
+ // we only expand arrays on a single path -- track the path here
+ arrElt = e;
+ }
+ else if ( e.rawdata() != arrElt.rawdata() ) {
+ // enforce single array path here
+ assertParallelArrays( e.fieldName(), arrElt.fieldName() );
+ }
+ if ( arrayNestedArray ) {
+ mayExpandArrayUnembedded = false;
+ }
}
else {
- BSONElement e = fixed[j];
- if ( e.eoo() )
- b.appendNull( "" );
- else
- b.appendAs( e , "" );
+ // not an array - no need for further expansion
+ fixed[ i ] = e;
+ }
+ }
+
+ if ( arrElt.eoo() ) {
+ // No array, so generate a single key.
+ if ( _spec._sparse && numNotFound == _spec._nFields ) {
+ return;
+ }
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) {
+ b.appendAs( *i, "" );
+ }
+ keys.insert( b.obj() );
+ }
+ else if ( arrElt.embeddedObject().firstElement().eoo() ) {
+ // Empty array, so set matching fields to undefined.
+ _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true );
+ }
+ else {
+ // Non empty array that can be expanded, so generate a key for each member.
+ BSONObj arrObj = arrElt.embeddedObject();
+ BSONObjIterator i( arrObj );
+ while( i.more() ) {
+ _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded );
}
}
- keys.insert( b.obj() );
+ }
+
+ const IndexSpec &_spec;
+ };
+
+ void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
+ switch( indexVersion() ) {
+ case 0: {
+ KeyGeneratorV0 g( *this );
+ g.getKeys( obj, keys );
+ break;
+ }
+ case 1: {
+ KeyGeneratorV1 g( *this );
+ g.getKeys( obj, keys );
+ break;
+ }
+ default:
+ massert( 15869, "Invalid index version for key generation.", false );
}
}
@@ -275,6 +447,13 @@ namespace mongo {
IndexSuitability IndexType::suitability( const BSONObj& query , const BSONObj& order ) const {
return _spec->_suitability( query , order );
}
+
+ int IndexSpec::indexVersion() const {
+ if ( !info.hasField( "v" ) ) {
+ return DefaultIndexVersionNumber;
+ }
+ return IndexDetails::versionForIndexObj( info );
+ }
bool IndexType::scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const {
return ! order.isEmpty();
diff --git a/db/indexkey.h b/db/indexkey.h
index 4a755f8a4e8..c04cd6396f6 100644
--- a/db/indexkey.h
+++ b/db/indexkey.h
@@ -25,6 +25,8 @@
namespace mongo {
+ extern const int DefaultIndexVersionNumber;
+
class Cursor;
class IndexSpec;
class IndexType; // TODO: this name sucks
@@ -161,16 +163,21 @@ namespace mongo {
protected:
+ int indexVersion() const;
+
IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ;
- void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const;
-
BSONSizeTracker _sizeTracker;
vector<const char*> _fieldNames;
vector<BSONElement> _fixed;
+
BSONObj _nullKey; // a full key with all fields null
BSONObj _nullObj; // only used for _nullElt
BSONElement _nullElt; // jstNull
+
+ BSONObj _undefinedObj; // only used for _undefinedElt
+ BSONElement _undefinedElt; // undefined
+
int _nFields; // number of fields in the index
bool _sparse; // if the index is sparse
shared_ptr<IndexType> _indexType;
@@ -179,6 +186,8 @@ namespace mongo {
void _init();
friend class IndexType;
+ friend class KeyGeneratorV0;
+ friend class KeyGeneratorV1;
public:
bool _finishedInit;
};
diff --git a/db/instance.cpp b/db/instance.cpp
index ede433d652b..971cd2e7b38 100644
--- a/db/instance.cpp
+++ b/db/instance.cpp
@@ -587,7 +587,7 @@ namespace mongo {
}
NOINLINE_DECL void insertMulti(DbMessage& d, const char *ns, const BSONObj& _js) {
- const bool keepGoing = d.reservedField() & InsertOption_KeepGoing;
+ const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError;
int n = 0;
BSONObj js(_js);
while( 1 ) {
diff --git a/db/instance.h b/db/instance.h
index 2b86eb44fce..422c77d5ffa 100644
--- a/db/instance.h
+++ b/db/instance.h
@@ -147,6 +147,8 @@ namespace mongo {
virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }
double getSoTimeout() const { return 0; }
+
+ virtual bool lazySupported() const { return true; }
private:
static HostAndPort _clientHost;
};
diff --git a/db/introspect.cpp b/db/introspect.cpp
index ca65710b3fc..7e1d19ce2f3 100644
--- a/db/introspect.cpp
+++ b/db/introspect.cpp
@@ -40,7 +40,7 @@ namespace mongo {
profileBufBuilder.reset();
BSONObjBuilder b(profileBufBuilder);
b.appendDate("ts", jsTime());
- currentOp.debug().append( b );
+ currentOp.debug().append( currentOp , b );
b.append("client", c.clientAddress() );
@@ -49,6 +49,26 @@ namespace mongo {
BSONObj p = b.done();
+ if (p.objsize() > 100*1024){
+ string small = p.toString(/*isArray*/false, /*full*/false);
+
+ warning() << "can't add full line to system.profile: " << small;
+
+ // rebuild with limited info
+ BSONObjBuilder b(profileBufBuilder);
+ b.appendDate("ts", jsTime());
+ b.append("client", c.clientAddress() );
+ if ( c.getAuthenticationInfo() )
+ b.append( "user" , c.getAuthenticationInfo()->getUser( nsToDatabase( ns ) ) );
+
+ b.append("err", "profile line too large (max is 100KB)");
+ if (small.size() < 100*1024){ // should be much smaller but if not don't break anything
+ b.append("abbreviated", small);
+ }
+
+ p = b.done();
+ }
+
// write: not replicated
NamespaceDetails *d = db->namespaceIndex.details(ns);
if( d ) {
diff --git a/db/jsobj.cpp b/db/jsobj.cpp
index 53c2329bd35..dcb77447873 100644
--- a/db/jsobj.cpp
+++ b/db/jsobj.cpp
@@ -45,7 +45,7 @@ BOOST_STATIC_ASSERT( sizeof(mongo::OID) == 12 );
namespace mongo {
- BSONElement nullElement;
+ BSONElement eooElement;
GENOIDLabeler GENOID;
@@ -508,6 +508,12 @@ namespace mongo {
}
BSONObj staticNull = fromjson( "{'':null}" );
+ BSONObj makeUndefined() {
+ BSONObjBuilder b;
+ b.appendUndefined( "" );
+ return b.obj();
+ }
+ BSONObj staticUndefined = makeUndefined();
/* well ordered compare */
int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const {
@@ -613,13 +619,13 @@ namespace mongo {
}
if ( sub.eoo() )
- return nullElement;
- else if ( sub.type() == Array || name[0] == '\0')
+ return eooElement;
+ else if ( sub.type() == Array || name[0] == '\0' )
return sub;
else if ( sub.type() == Object )
return sub.embeddedObject().getFieldDottedOrArray( name );
else
- return nullElement;
+ return eooElement;
}
/**
@@ -919,7 +925,7 @@ namespace mongo {
c.appendRegex("x", "goo");
BSONObj p = c.done();
- assert( !o.shallowEqual( p ) );
+ assert( !o.binaryEqual( p ) );
assert( o.woCompare( p ) < 0 );
}
@@ -1024,7 +1030,7 @@ namespace mongo {
BSONObj a = A.done();
BSONObj b = B.done();
BSONObj c = C.done();
- assert( !a.shallowEqual( b ) ); // comments on operator==
+ assert( !a.binaryEqual( b ) ); // comments on operator==
int cmp = a.woCompare(b);
assert( cmp == 0 );
cmp = a.woCompare(c);
@@ -1167,13 +1173,9 @@ namespace mongo {
while (l.more() && r.more()){
if (strcmp(l.next().fieldName(), r.next().fieldName())) {
- PRINTFL;
return false;
}
}
- PRINT(l.more());
- PRINT(r.more());
- PRINT(l.more() || r.more());
return !(l.more() || r.more()); // false if lhs and rhs have diff nFields()
}
diff --git a/db/key.cpp b/db/key.cpp
index ddc2d593350..648502ebf17 100644
--- a/db/key.cpp
+++ b/db/key.cpp
@@ -264,15 +264,17 @@ namespace mongo {
if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) {
int len;
const char * d = e.binData(len);
- int code = BinDataLengthToCode[len];
- if( code >= 0 ) {
- if( t >= 128 )
- t = (t-128) | 0x08;
- dassert( (code&t) == 0 );
- b.appendUChar( cbindata|bits );
- b.appendUChar( code | t );
- b.appendBuf(d, len);
- break;
+ if( len <= BinDataLenMax ) {
+ int code = BinDataLengthToCode[len];
+ if( code >= 0 ) {
+ if( t >= 128 )
+ t = (t-128) | 0x08;
+ dassert( (code&t) == 0 );
+ b.appendUChar( cbindata|bits );
+ b.appendUChar( code | t );
+ b.appendBuf(d, len);
+ break;
+ }
}
}
traditional(obj);
diff --git a/db/matcher.cpp b/db/matcher.cpp
index 23d5a7057bf..2b92d5797c3 100644
--- a/db/matcher.cpp
+++ b/db/matcher.cpp
@@ -64,8 +64,14 @@ namespace mongo {
}
~Where() {
- if ( scope.get() )
- scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
+ if ( scope.get() ){
+ try {
+ scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
+ }
+ catch( DBException& e ){
+ warning() << "javascript scope cleanup interrupted" << causedBy( e ) << endl;
+ }
+ }
if ( jsScope ) {
delete jsScope;
@@ -148,6 +154,9 @@ namespace mongo {
rm._prefix = prefix;
}
else {
+ uassert( 15882, "$elemMatch not allowed within $in",
+ ie.type() != Object ||
+ ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH );
_myset->insert(ie);
}
}
diff --git a/db/modules/mms.cpp b/db/modules/mms.cpp
index 28fc225477f..40abb391dfb 100644
--- a/db/modules/mms.cpp
+++ b/db/modules/mms.cpp
@@ -142,7 +142,7 @@ namespace mongo {
string errmsg;
BSONObjBuilder sub;
- if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) )
+ if ( ! c->run( "admin.$cmd" , co , 0 , errmsg , sub , false ) )
postData.append( cmd , errmsg );
else
postData.append( cmd , sub.obj() );
diff --git a/db/mongommf.h b/db/mongommf.h
index b347e4ff259..0c4e8e4a19d 100644
--- a/db/mongommf.h
+++ b/db/mongommf.h
@@ -75,7 +75,7 @@ namespace mongo {
fileSuffixNo() is 3
if the suffix is "ns", fileSuffixNo -1
*/
- RelativePath relativePath() const {
+ const RelativePath& relativePath() const {
DEV assert( !_p._p.empty() );
return _p;
}
diff --git a/db/namespace.cpp b/db/namespace.cpp
index 927f56b6e7b..2bc7409e56c 100644
--- a/db/namespace.cpp
+++ b/db/namespace.cpp
@@ -604,6 +604,17 @@ namespace mongo {
}
}
+ void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) {
+ assertInWriteLock();
+ vector< string > found;
+ for( ouriter i = _map.begin(); i != _map.end(); ++i )
+ if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 )
+ found.push_back( i->first );
+ for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) {
+ _map.erase(*i);
+ }
+ }
+
void NamespaceDetailsTransient::computeIndexKeys() {
_keysComputed = true;
_indexKeys.clear();
@@ -657,7 +668,7 @@ namespace mongo {
// index details across commands are in cursors and nsd
// transient (including query cache) so clear these.
ClientCursor::invalidate( from );
- NamespaceDetailsTransient::clearForPrefix( from );
+ NamespaceDetailsTransient::eraseForPrefix( from );
NamespaceDetails *details = ni->details( from );
ni->add_ns( to, *details );
diff --git a/db/namespace.h b/db/namespace.h
index a1b7c2274bc..3dfb3f33767 100644
--- a/db/namespace.h
+++ b/db/namespace.h
@@ -454,6 +454,7 @@ namespace mongo {
Can be useful as index namespaces share the same start as the regular collection.
SLOW - sequential scan of all NamespaceDetailsTransient objects */
static void clearForPrefix(const char *prefix);
+ static void eraseForPrefix(const char *prefix);
/**
* @return a cursor interface to the query optimizer. The implementation may
diff --git a/db/oplog.cpp b/db/oplog.cpp
index 7286fd9053c..dc9db76d9d5 100644
--- a/db/oplog.cpp
+++ b/db/oplog.cpp
@@ -473,9 +473,9 @@ namespace mongo {
return _qp.nsd()->capFirstNewRecord;
}
- void assertExtentNonempty( const Extent *e ) {
+ void wassertExtentNonempty( const Extent *e ) {
// TODO ensure this requirement is clearly enforced, or fix.
- massert( 14834, "empty extent found during finding start scan", !e->firstRecord.isNull() );
+ wassert( !e->firstRecord.isNull() );
}
DiskLoc FindingStartCursor::prevExtentFirstLoc( const DiskLoc &rec ) {
@@ -488,14 +488,14 @@ namespace mongo {
e = e->xprev.ext();
}
if ( e->myLoc != _qp.nsd()->capExtent ) {
- assertExtentNonempty( e );
+ wassertExtentNonempty( e );
return e->firstRecord;
}
}
else {
if ( !e->xprev.isNull() ) {
e = e->xprev.ext();
- assertExtentNonempty( e );
+ wassertExtentNonempty( e );
return e->firstRecord;
}
}
@@ -506,20 +506,30 @@ namespace mongo {
shared_ptr<Cursor> c = _qp.newCursor( startLoc );
_findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns()) );
}
+
+ bool FindingStartCursor::firstDocMatchesOrEmpty() const {
+ shared_ptr<Cursor> c = _qp.newCursor();
+ return !c->ok() || _matcher->matchesCurrent( c.get() );
+ }
void FindingStartCursor::init() {
- // Use a ClientCursor here so we can release db mutex while scanning
- // oplog (can take quite a while with large oplogs).
- shared_ptr<Cursor> c = _qp.newReverseCursor();
- _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) );
- _findingStartTimer.reset();
- _findingStartMode = Initial;
BSONElement tsElt = _qp.originalQuery()[ "ts" ];
massert( 13044, "no ts field in query", !tsElt.eoo() );
BSONObjBuilder b;
b.append( tsElt );
BSONObj tsQuery = b.obj();
_matcher.reset(new CoveredIndexMatcher(tsQuery, _qp.indexKey()));
+ if ( firstDocMatchesOrEmpty() ) {
+ _c = _qp.newCursor();
+ _findingStart = false;
+ return;
+ }
+ // Use a ClientCursor here so we can release db mutex while scanning
+ // oplog (can take quite a while with large oplogs).
+ shared_ptr<Cursor> c = _qp.newReverseCursor();
+ _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) );
+ _findingStartTimer.reset();
+ _findingStartMode = Initial;
}
// -------------------------------------
@@ -704,7 +714,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "internal (sharding)\n{ applyOps : [ ] , preCondition : [ { ns : ... , q : ... , res : ... } ] }";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( cmdObj.firstElement().type() != Array ) {
errmsg = "ops has to be an array";
diff --git a/db/oplog.h b/db/oplog.h
index f87a1c85e04..79fb01b0a4d 100644
--- a/db/oplog.h
+++ b/db/oplog.h
@@ -118,6 +118,7 @@ namespace mongo {
_findingStartCursor.reset( 0 );
}
void init();
+ bool firstDocMatchesOrEmpty() const;
};
void pretouchOperation(const BSONObj& op);
diff --git a/db/ops/query.cpp b/db/ops/query.cpp
index 120382fa7d8..f13b6e5ea4b 100644
--- a/db/ops/query.cpp
+++ b/db/ops/query.cpp
@@ -36,6 +36,7 @@
#include "../lasterror.h"
#include "../../s/d_logic.h"
#include "../repl_block.h"
+#include "../../server.h"
namespace mongo {
@@ -92,21 +93,15 @@ namespace mongo {
ClientCursor::Pointer p(cursorid);
ClientCursor *cc = p.c();
- int bufSize = 512;
- if ( cc ) {
- bufSize += sizeof( QueryResult );
- bufSize += MaxBytesToReturnToClientAtOnce;
- }
+ int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;
BufBuilder b( bufSize );
-
b.skip(sizeof(QueryResult));
-
int resultFlags = ResultFlag_AwaitCapable;
int start = 0;
int n = 0;
- if ( !cc ) {
+ if ( unlikely(!cc) ) {
log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
cursorid = 0;
resultFlags = ResultFlag_CursorNotFound;
@@ -420,6 +415,8 @@ namespace mongo {
*_b << "indexBounds" << c->prettyIndexBounds();
+ c->explainDetails( *_b );
+
if ( !hint ) {
*_b << "allPlans" << _a->arr();
}
@@ -899,9 +896,6 @@ namespace mongo {
if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
- //NamespaceDetails* d = nsdetails(ns);
- //uassert(14820, "capped collections have no _id index by default, can only query by _id if one added", d == NULL || d->haveIdIndex() );
-
bool nsFound = false;
bool indexFound = false;
diff --git a/db/ops/update.cpp b/db/ops/update.cpp
index 3221fe0f277..d70048d2cc2 100644
--- a/db/ops/update.cpp
+++ b/db/ops/update.cpp
@@ -1060,11 +1060,10 @@ namespace mongo {
debug.updateobj = updateobj;
- /* idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case */
- /* NOTE: when yield() is added herein, these must be refreshed after each call to yield! */
+ // idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case
+ // The pointers may be left invalid on a failed or terminal yield recovery.
NamespaceDetails *d = nsdetails(ns); // can be null if an upsert...
NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns);
- /* end note */
auto_ptr<ModSet> mods;
bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$';
@@ -1105,6 +1104,9 @@ namespace mongo {
shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) );
shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) );
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
+
if( c->ok() ) {
set<DiskLoc> seenObjects;
MatchDetails details;
@@ -1114,20 +1116,28 @@ namespace mongo {
bool atomic = c->matcher()->docMatcher().atomic();
- // *****************
- if ( cc.get() == 0 ) {
- shared_ptr< Cursor > cPtr = c;
- cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
- }
-
- if ( ! cc->yieldSometimes( ClientCursor::WillNeed ) ) {
- cc.release();
- break;
- }
- if ( !c->ok() ) {
- break;
+ if ( !atomic ) {
+ // *****************
+ if ( cc.get() == 0 ) {
+ shared_ptr< Cursor > cPtr = c;
+ cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
+ }
+
+ bool didYield;
+ if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) {
+ cc.release();
+ break;
+ }
+ if ( !c->ok() ) {
+ break;
+ }
+
+ if ( didYield ) {
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
+ }
+ // *****************
}
- // *****************
// May have already matched in UpdateOp, but do again to get details set correctly
if ( ! c->matcher()->matchesCurrent( c.get(), &details ) ) {
@@ -1146,6 +1156,8 @@ namespace mongo {
if ( !c->ok() ) {
break;
}
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
}
continue;
}
@@ -1276,10 +1288,11 @@ namespace mongo {
if ( !c->ok() ) {
break;
}
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
}
- if (atomic)
- getDur().commitIfNeeded();
+ getDur().commitIfNeeded();
continue;
}
diff --git a/db/pdfile.cpp b/db/pdfile.cpp
index 0b7a5b0830d..0569ba6868e 100644
--- a/db/pdfile.cpp
+++ b/db/pdfile.cpp
@@ -869,6 +869,7 @@ namespace mongo {
result.append("ns", name.c_str());
ClientCursor::invalidate(name.c_str());
Top::global.collectionDropped( name );
+ NamespaceDetailsTransient::eraseForPrefix( name.c_str() );
dropNS(name);
}
@@ -967,7 +968,7 @@ namespace mongo {
}
}
- void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn) {
+ void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn, bool doLog ) {
dassert( todelete == dl.rec() );
NamespaceDetails* d = nsdetails(ns);
@@ -976,6 +977,14 @@ namespace mongo {
uassert( 10089 , "can't remove from a capped collection" , 0 );
return;
}
+
+ BSONObj toDelete;
+ if ( doLog ) {
+ BSONElement e = dl.obj()["_id"];
+ if ( e.type() ) {
+ toDelete = e.wrap();
+ }
+ }
/* check if any cursors point to us. if so, advance them. */
ClientCursor::aboutToDelete(dl);
@@ -984,6 +993,10 @@ namespace mongo {
_deleteRecord(d, ns, todelete, dl);
NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp();
+
+ if ( ! toDelete.isEmpty() ) {
+ logOp( "d" , ns , toDelete );
+ }
}
@@ -1181,7 +1194,13 @@ namespace mongo {
BSONObjExternalSorter::Data d = i->next();
try {
- btBuilder.addKey(d.first, d.second);
+ if ( !dupsAllowed && dropDups ) {
+ LastError::Disabled led( lastError.get() );
+ btBuilder.addKey(d.first, d.second);
+ }
+ else {
+ btBuilder.addKey(d.first, d.second);
+ }
}
catch( AssertionException& e ) {
if ( dupsAllowed ) {
@@ -1189,8 +1208,9 @@ namespace mongo {
throw;
}
- if( e.interrupted() )
- throw;
+ if( e.interrupted() ) {
+ killCurrentOp.checkForInterrupt();
+ }
if ( ! dropDups )
throw;
@@ -1276,7 +1296,7 @@ namespace mongo {
log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl;
for( list<DiskLoc>::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ ){
- theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true );
+ theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true , true );
getDur().commitIfNeeded();
}
@@ -1302,18 +1322,27 @@ namespace mongo {
while ( cc->ok() ) {
BSONObj js = cc->current();
try {
- _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+ {
+ if ( !dupsAllowed && dropDups ) {
+ LastError::Disabled led( lastError.get() );
+ _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+ }
+ else {
+ _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+ }
+ }
cc->advance();
}
catch( AssertionException& e ) {
- if( e.interrupted() )
- throw;
+ if( e.interrupted() ) {
+ killCurrentOp.checkForInterrupt();
+ }
if ( dropDups ) {
DiskLoc toDelete = cc->currLoc();
bool ok = cc->advance();
cc->updateLocation();
- theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true );
+ theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true , true );
if( ClientCursor::find(id, false) == 0 ) {
cc.release();
if( !ok ) {
diff --git a/db/pdfile.h b/db/pdfile.h
index 0f45e6d337e..64dba68ca41 100644
--- a/db/pdfile.h
+++ b/db/pdfile.h
@@ -142,7 +142,7 @@ namespace mongo {
static Record* getRecord(const DiskLoc& dl);
static DeletedRecord* makeDeletedRecord(const DiskLoc& dl, int len);
- void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false);
+ void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false, bool logOp=false);
/* does not clean up indexes, etc. : just deletes the record in the pdfile. use deleteRecord() to unindex */
void _deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl);
diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp
index 4173eaaa2cd..e49e9b11ecb 100644
--- a/db/queryoptimizer.cpp
+++ b/db/queryoptimizer.cpp
@@ -52,7 +52,7 @@ namespace mongo {
QueryPlan::QueryPlan(
NamespaceDetails *d, int idxNo,
- const FieldRangeSetPair &frsp, const FieldRangeSetPair &originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
+ const FieldRangeSetPair &frsp, const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
_d(d), _idxNo(idxNo),
_frs( frsp.frsForIndex( _d, _idxNo ) ),
_frsMulti( frsp.frsForIndex( _d, -1 ) ),
@@ -166,12 +166,17 @@ doneCheckOrder:
_optimal = true;
if ( exactIndexedQueryCount == _frs.nNontrivialRanges() &&
orderFieldsUnindexed.size() == 0 &&
- exactIndexedQueryCount == _index->keyPattern().nFields() &&
+ exactIndexedQueryCount == idxKey.nFields() &&
exactIndexedQueryCount == _originalQuery.nFields() ) {
_exactKeyMatch = true;
}
_frv.reset( new FieldRangeVector( _frs, idxSpec, _direction ) );
- _originalFrv.reset( new FieldRangeVector( originalFrsp.frsForIndex( _d, _idxNo ), idxSpec, _direction ) );
+ if ( originalFrsp ) {
+ _originalFrv.reset( new FieldRangeVector( originalFrsp->frsForIndex( _d, _idxNo ), idxSpec, _direction ) );
+ }
+ else {
+ _originalFrv = _frv;
+ }
if ( _startOrEndSpec ) {
BSONObj newStart, newEnd;
if ( !startKey.isEmpty() )
@@ -206,8 +211,25 @@ doneCheckOrder:
}
if ( willScanTable() ) {
- if ( _frs.nNontrivialRanges() )
+ if ( _frs.nNontrivialRanges() ) {
checkTableScanAllowed( _frs.ns() );
+
+ // if we are doing a table scan on _id
+ // and its a capped collection
+ // we disallow as its a common user error
+ // .system. and local collections are exempt
+ if ( _d && _d->capped && _frs.range( "_id" ).nontrivial() ) {
+ if ( cc().isSyncThread() ||
+ str::contains( _frs.ns() , ".system." ) ||
+ str::startsWith( _frs.ns() , "local." ) ) {
+ // ok
+ }
+ else {
+ warning() << "_id query on capped collection without an _id index, performance will be poor collection: " << _frs.ns() << endl;
+ //uassert( 14820, str::stream() << "doing _id query on a capped collection without an index is not allowed: " << _frs.ns() ,
+ }
+ }
+ }
return findTableScan( _frs.ns(), _order, startLoc );
}
@@ -328,7 +350,7 @@ doneCheckOrder:
massert( 10365 , errmsg, indexDetailsForRange( _frsp->ns(), errmsg, _min, _max, keyPattern ) );
}
NamespaceDetails *d = nsdetails(_ns);
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) );
}
// returns an IndexDetails * for a hint, 0 if hint is $natural.
@@ -374,7 +396,7 @@ doneCheckOrder:
NamespaceDetails *d = nsdetails( ns );
if ( !d || !_frsp->matchPossible() ) {
// Table scan plan, when no matches are possible
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) );
return;
}
@@ -388,7 +410,7 @@ doneCheckOrder:
else {
massert( 10366 , "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() );
// Table scan plan
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) );
}
return;
}
@@ -398,7 +420,7 @@ doneCheckOrder:
BSONObj keyPattern;
IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern );
massert( 10367 , errmsg, idx );
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) );
return;
}
@@ -407,13 +429,13 @@ doneCheckOrder:
if ( idx >= 0 ) {
_usingPrerecordedPlan = true;
_mayRecordPlan = false;
- _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , *_originalFrsp , _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , _originalFrsp.get() , _originalQuery, _order ) ) );
return;
}
}
if ( _originalQuery.isEmpty() && _order.isEmpty() ) {
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) );
return;
}
@@ -428,7 +450,7 @@ doneCheckOrder:
if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) {
_usingPrerecordedPlan = true;
_mayRecordPlan = false;
- _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , *_originalFrsp , _originalQuery, _order ,
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , _originalFrsp.get() , _originalQuery, _order ,
BSONObj() , BSONObj() , _special ) ) );
return;
}
@@ -445,7 +467,7 @@ doneCheckOrder:
_oldNScanned = oldNScanned;
if ( !strcmp( bestIndex.firstElementFieldName(), "$natural" ) ) {
// Table scan plan
- p.reset( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ p.reset( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
}
NamespaceDetails::IndexIterator i = d->ii();
@@ -453,7 +475,7 @@ doneCheckOrder:
int j = i.pos();
IndexDetails& ii = i.next();
if( ii.keyPattern().woCompare(bestIndex) == 0 ) {
- p.reset( new QueryPlan( d, j, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ p.reset( new QueryPlan( d, j, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
}
}
@@ -480,7 +502,7 @@ doneCheckOrder:
if ( !_frsp->matchPossible() || ( _frsp->noNontrivialRanges() && _order.isEmpty() ) ||
( !_order.isEmpty() && !strcmp( _order.firstElementFieldName(), "$natural" ) ) ) {
// Table scan plan
- addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst );
+ addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst );
return;
}
@@ -490,10 +512,11 @@ doneCheckOrder:
QueryPlanPtr optimalPlan;
for( int i = 0; i < d->nIndexes; ++i ) {
if ( normalQuery ) {
- if ( !_frsp->matchPossibleForIndex( d, i, d->idx( i ).keyPattern() ) ) {
+ BSONObj keyPattern = d->idx( i ).keyPattern();
+ if ( !_frsp->matchPossibleForIndex( d, i, keyPattern ) ) {
// If no match is possible, only generate a trival plan that won't
// scan any documents.
- QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
addPlan( p, checkFirst );
return;
}
@@ -502,7 +525,7 @@ doneCheckOrder:
}
}
- QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
if ( p->optimal() ) {
if ( !optimalPlan.get() ) {
optimalPlan = p;
@@ -520,7 +543,7 @@ doneCheckOrder:
addPlan( *i, checkFirst );
// Table scan plan
- addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst );
+ addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst );
}
shared_ptr<QueryOp> QueryPlanSet::runOp( QueryOp &op ) {
@@ -538,7 +561,7 @@ doneCheckOrder:
return r.runUntilFirstCompletes();
}
- shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp ) {
+ shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp, bool retried ) {
if ( !_runner ) {
_runner.reset( new Runner( *this, originalOp ) );
shared_ptr<QueryOp> op = _runner->init();
@@ -553,10 +576,14 @@ doneCheckOrder:
if ( !_usingPrerecordedPlan || _bestGuessOnly || _plans.size() > 1 ) {
return op;
}
+
+ // Avoid an infinite loop here
+ uassert( 15878, str::stream() << "query plans not successful even with no constraints, potentially due to additional sort", ! retried );
+
// Retry with all candidate plans.
QueryUtilIndexed::clearIndexesForPatterns( *_frsp, _order );
init();
- return nextOp( originalOp );
+ return nextOp( originalOp, true );
}
bool QueryPlanSet::prepareToYield() {
@@ -815,24 +842,29 @@ doneCheckOrder:
_ns( ns ),
_or( !query.getField( "$or" ).eoo() ),
_query( query.getOwned() ),
- _org( ns, _query ),
_i(),
_honorRecordedPlan( honorRecordedPlan ),
_bestGuessOnly( bestGuessOnly ),
_hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ),
_mayYield( mayYield ),
_tableScanned() {
- if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_org.getSpecial().empty() ) {
+ if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() ) {
_or = false;
}
- if ( _or && uselessOr( _hint.firstElement() ) ) {
- _or = false;
+ if ( _or ) {
+ // Only construct an OrRangeGenerator if we may handle $or clauses.
+ _org.reset( new OrRangeGenerator( ns, _query ) );
+ if ( !_org->getSpecial().empty() ) {
+ _or = false;
+ }
+ else if ( uselessOr( _hint.firstElement() ) ) {
+ _or = false;
+ }
}
// if _or == false, don't use or clauses for index selection
if ( !_or ) {
auto_ptr<FieldRangeSetPair> frsp( new FieldRangeSetPair( ns, _query, true ) );
- auto_ptr<FieldRangeSetPair> oldFrsp( new FieldRangeSetPair( *frsp ) );
- _currentQps.reset( new QueryPlanSet( ns, frsp, oldFrsp, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) );
+ _currentQps.reset( new QueryPlanSet( ns, frsp, auto_ptr<FieldRangeSetPair>(), _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) );
}
else {
BSONElement e = _query.getField( "$or" );
@@ -847,8 +879,8 @@ doneCheckOrder:
return _currentQps->runOp( op );
}
++_i;
- auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() );
- auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() );
+ auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() );
+ auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() );
BSONElement hintElt = _hint.firstElement();
_currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) );
shared_ptr<QueryOp> ret( _currentQps->runOp( op ) );
@@ -856,7 +888,7 @@ doneCheckOrder:
_tableScanned = true;
} else {
// If the full table was scanned, don't bother popping the last or clause.
- _org.popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() );
+ _org->popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() );
}
return ret;
}
@@ -877,7 +909,7 @@ doneCheckOrder:
if ( op->qp().willScanTable() ) {
_tableScanned = true;
} else {
- _org.popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() );
+ _org->popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() );
}
return op;
}
@@ -887,8 +919,8 @@ doneCheckOrder:
shared_ptr<QueryOp> op;
while( mayRunMore() ) {
++_i;
- auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() );
- auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() );
+ auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() );
+ auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() );
BSONElement hintElt = _hint.firstElement();
_currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) );
op = nextOpHandleEndOfClause();
@@ -954,9 +986,9 @@ doneCheckOrder:
if ( !id ) {
return true;
}
- return QueryUtilIndexed::uselessOr( _org, nsd, nsd->idxNo( *id ) );
+ return QueryUtilIndexed::uselessOr( *_org, nsd, nsd->idxNo( *id ) );
}
- return QueryUtilIndexed::uselessOr( _org, nsd, -1 );
+ return QueryUtilIndexed::uselessOr( *_org, nsd, -1 );
}
MultiCursor::MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr<CursorOp> op, bool mayYield )
@@ -1199,12 +1231,13 @@ doneCheckOrder:
}
bool QueryUtilIndexed::indexUseful( const FieldRangeSetPair &frsp, NamespaceDetails *d, int idxNo, const BSONObj &order ) {
- frsp.assertValidIndex( d, idxNo );
- if ( !frsp.matchPossibleForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ) ) {
+ DEV frsp.assertValidIndex( d, idxNo );
+ BSONObj keyPattern = d->idx( idxNo ).keyPattern();
+ if ( !frsp.matchPossibleForIndex( d, idxNo, keyPattern ) ) {
// No matches are possible in the index so the index may be useful.
return true;
}
- return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ), order ) != USELESS;
+ return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, keyPattern ), order ) != USELESS;
}
void QueryUtilIndexed::clearIndexesForPatterns( const FieldRangeSetPair &frsp, const BSONObj &order ) {
diff --git a/db/queryoptimizer.h b/db/queryoptimizer.h
index e55e791e1ca..ad6b985ab1f 100644
--- a/db/queryoptimizer.h
+++ b/db/queryoptimizer.h
@@ -35,10 +35,13 @@ namespace mongo {
class QueryPlan : boost::noncopyable {
public:
+ /**
+ * @param originalFrsp - original constraints for this query clause. If null, frsp will be used instead.
+ */
QueryPlan(NamespaceDetails *d,
int idxNo, // -1 = no index
const FieldRangeSetPair &frsp,
- const FieldRangeSetPair &originalFrsp,
+ const FieldRangeSetPair *originalFrsp,
const BSONObj &originalQuery,
const BSONObj &order,
const BSONObj &startKey = BSONObj(),
@@ -245,6 +248,9 @@ namespace mongo {
typedef boost::shared_ptr<QueryPlan> QueryPlanPtr;
typedef vector<QueryPlanPtr> PlanSet;
+ /**
+ * @param originalFrsp - original constraints for this query clause; if null, frsp will be used.
+ */
QueryPlanSet( const char *ns,
auto_ptr<FieldRangeSetPair> frsp,
auto_ptr<FieldRangeSetPair> originalFrsp,
@@ -272,7 +278,7 @@ namespace mongo {
}
/** Initialize or iterate a runner generated from @param originalOp. */
- shared_ptr<QueryOp> nextOp( QueryOp &originalOp );
+ shared_ptr<QueryOp> nextOp( QueryOp &originalOp, bool retried = false );
/** Yield the runner member. */
@@ -290,7 +296,7 @@ namespace mongo {
//for testing
const FieldRangeSetPair &frsp() const { return *_frsp; }
- const FieldRangeSetPair &originalFrsp() const { return *_originalFrsp; }
+ const FieldRangeSetPair *originalFrsp() const { return _originalFrsp.get(); }
bool modifiedKeys() const;
bool hasMultiKey() const;
@@ -420,7 +426,7 @@ namespace mongo {
shared_ptr<Cursor> singleCursor() const;
/** @return true iff more $or clauses need to be scanned. */
- bool mayRunMore() const { return _or ? ( !_tableScanned && !_org.orFinished() ) : _i == 0; }
+ bool mayRunMore() const { return _or ? ( !_tableScanned && !_org->orFinished() ) : _i == 0; }
/** @return non-$or version of explain output. */
BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); }
/** @return true iff this is not a $or query and a plan is selected based on previous success of this plan. */
@@ -445,7 +451,7 @@ namespace mongo {
const char * _ns;
bool _or;
BSONObj _query;
- OrRangeGenerator _org;
+ shared_ptr<OrRangeGenerator> _org; // May be null in certain non $or query cases.
auto_ptr<QueryPlanSet> _currentQps;
int _i;
bool _honorRecordedPlan;
diff --git a/db/queryutil-inl.h b/db/queryutil-inl.h
index 2c3a757b385..d0fc212cef9 100644
--- a/db/queryutil-inl.h
+++ b/db/queryutil-inl.h
@@ -130,5 +130,24 @@ namespace mongo {
}
return ret;
}
+
+ inline bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
+ assertValidIndexOrNoIndex( d, idxNo );
+ if ( !matchPossible() ) {
+ return false;
+ }
+ if ( idxNo < 0 ) {
+ // multi key matchPossible() is true, so return true.
+ return true;
+ }
+ return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern );
+ }
+ inline void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const {
+ massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 );
+ if ( idxNo >= 0 ) {
+ assertValidIndex( d, idxNo );
+ }
+ }
+
} // namespace mongo
diff --git a/db/queryutil.cpp b/db/queryutil.cpp
index ec9ee693511..717eac816b8 100644
--- a/db/queryutil.cpp
+++ b/db/queryutil.cpp
@@ -28,6 +28,7 @@
namespace mongo {
extern BSONObj staticNull;
+ extern BSONObj staticUndefined;
/** returns a string that when used as a matcher, would match a super set of regex()
returns "" for complex regular expressions
@@ -79,6 +80,10 @@ namespace mongo {
r = r.substr( 0 , r.size() - 1 );
return r; //breaking here fails with /^a?/
}
+ else if (c == '|') {
+ // whole match so far is optional. Nothing we can do here.
+ return string();
+ }
else if (c == '\\') {
c = *(regex++);
if (c == 'Q'){
@@ -107,7 +112,7 @@ namespace mongo {
ss << c;
}
}
- else if (strchr("^$.[|()+{", c)) {
+ else if (strchr("^$.[()+{", c)) {
// list of "metacharacters" from man pcrepattern
r = ss.str();
break;
@@ -153,25 +158,33 @@ namespace mongo {
FieldRange::FieldRange( const BSONElement &e, bool singleKey, bool isNot, bool optimize )
: _singleKey( singleKey ) {
+ int op = e.getGtLtOp();
+
// NOTE with $not, we could potentially form a complementary set of intervals.
- if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
+ if ( !isNot && !e.eoo() && e.type() != RegEx && op == BSONObj::opIN ) {
set<BSONElement,element_lt> vals;
vector<FieldRange> regexes;
uassert( 12580 , "invalid query" , e.isABSONObj() );
BSONObjIterator i( e.embeddedObject() );
while( i.more() ) {
BSONElement ie = i.next();
+ uassert( 15881, "$elemMatch not allowed within $in",
+ ie.type() != Object ||
+ ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH );
if ( ie.type() == RegEx ) {
regexes.push_back( FieldRange( ie, singleKey, false, optimize ) );
}
else {
- // A document array may be indexed by its first element, or
- // as a full array if it is embedded within another array.
+ // A document array may be indexed by its first element, by undefined
+ // if it is empty, or as a full array if it is embedded within another
+ // array.
vals.insert( ie );
if ( ie.type() == Array ) {
- if ( !ie.embeddedObject().firstElement().eoo() ) {
- vals.insert( ie.embeddedObject().firstElement() );
- }
+ BSONElement temp = ie.embeddedObject().firstElement();
+ if ( temp.eoo() ) {
+ temp = staticUndefined.firstElement();
+ }
+ vals.insert( temp );
}
}
}
@@ -185,17 +198,21 @@ namespace mongo {
return;
}
- // A document array may be indexed by its first element, or
- // as a full array if it is embedded within another array.
- if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) {
+ // A document array may be indexed by its first element, by undefined
+ // if it is empty, or as a full array if it is embedded within another
+ // array.
+ if ( e.type() == Array && op == BSONObj::Equality ) {
_intervals.push_back( FieldInterval(e) );
- const BSONElement& temp = e.embeddedObject().firstElement();
- if ( ! temp.eoo() ) {
- if ( temp < e )
- _intervals.insert( _intervals.begin() , temp );
- else
- _intervals.push_back( FieldInterval(temp) );
+ BSONElement temp = e.embeddedObject().firstElement();
+ if ( temp.eoo() ) {
+ temp = staticUndefined.firstElement();
+ }
+ if ( temp < e ) {
+ _intervals.insert( _intervals.begin() , temp );
+ }
+ else {
+ _intervals.push_back( FieldInterval(temp) );
}
return;
@@ -215,8 +232,6 @@ namespace mongo {
if ( e.eoo() )
return;
- int op = e.getGtLtOp();
-
bool existsSpec = false;
if ( op == BSONObj::opEXISTS ) {
existsSpec = e.trueValue();
@@ -622,6 +637,27 @@ namespace mongo {
return o;
}
+ string FieldInterval::toString() const {
+ StringBuilder buf;
+ buf << ( _lower._inclusive ? "[" : "(" );
+ buf << _lower._bound;
+ buf << " , ";
+ buf << _upper._bound;
+ buf << ( _upper._inclusive ? "]" : ")" );
+ return buf.str();
+ }
+
+ string FieldRange::toString() const {
+ StringBuilder buf;
+ buf << "(FieldRange special: " << _special << " singleKey: " << _special << " intervals: ";
+ for( vector<FieldInterval>::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {
+ buf << i->toString();
+ }
+
+ buf << ")";
+ return buf.str();
+ }
+
string FieldRangeSet::getSpecial() const {
string s = "";
for ( map<string,FieldRange>::const_iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) {
@@ -773,30 +809,32 @@ namespace mongo {
}
void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) {
- if ( strcmp( e.fieldName(), "$and" ) == 0 ) {
- uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 );
- BSONObjIterator i( e.embeddedObject() );
- while( i.more() ) {
- BSONElement e = i.next();
- uassert( 14817 , "$and elements must be objects" , e.type() == Object );
- BSONObjIterator j( e.embeddedObject() );
- while( j.more() ) {
- processQueryField( j.next(), optimize );
- }
- }
- }
+ if ( e.fieldName()[ 0 ] == '$' ) {
+ if ( strcmp( e.fieldName(), "$and" ) == 0 ) {
+ uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 );
+ BSONObjIterator i( e.embeddedObject() );
+ while( i.more() ) {
+ BSONElement e = i.next();
+ uassert( 14817 , "$and elements must be objects" , e.type() == Object );
+ BSONObjIterator j( e.embeddedObject() );
+ while( j.more() ) {
+ processQueryField( j.next(), optimize );
+ }
+ }
+ }
- if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
- return;
- }
+ if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
+ return;
+ }
- if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
- return;
- }
+ if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
+ return;
+ }
- if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
- return;
- }
+ if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
+ return;
+ }
+ }
bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
if ( equality && e.type() == Object ) {
@@ -1055,32 +1093,11 @@ namespace mongo {
return ret;
}
- const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const {
- assertValidIndexOrNoIndex( nsd, idxNo );
- if ( idxNo < 0 ) {
- // An unindexed cursor cannot have a "single key" constraint.
- return _multiKey;
- }
- return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey;
- }
-
bool FieldRangeSetPair::noNontrivialRanges() const {
return _singleKey.matchPossible() && _singleKey.nNontrivialRanges() == 0 &&
_multiKey.matchPossible() && _multiKey.nNontrivialRanges() == 0;
}
- bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
- assertValidIndexOrNoIndex( d, idxNo );
- if ( !matchPossible() ) {
- return false;
- }
- if ( idxNo < 0 ) {
- // multi key matchPossible() is true, so return true.
- return true;
- }
- return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern );
- }
-
FieldRangeSetPair &FieldRangeSetPair::operator&=( const FieldRangeSetPair &other ) {
_singleKey &= other._singleKey;
_multiKey &= other._multiKey;
@@ -1093,21 +1110,23 @@ namespace mongo {
return *this;
}
+ BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
+ return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern );
+ }
+
void FieldRangeSetPair::assertValidIndex( const NamespaceDetails *d, int idxNo ) const {
massert( 14048, "FieldRangeSetPair invalid index specified", idxNo >= 0 && idxNo < d->nIndexes );
}
-
- void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const {
- massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 );
- if ( idxNo >= 0 ) {
- assertValidIndex( d, idxNo );
+
+ const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const {
+ assertValidIndexOrNoIndex( nsd, idxNo );
+ if ( idxNo < 0 ) {
+ // An unindexed cursor cannot have a "single key" constraint.
+ return _multiKey;
}
+ return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey;
}
-
- BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
- return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern );
- }
-
+
bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const {
bool eq;
int l = matchingLowElement( e, i, forward, eq );
diff --git a/db/queryutil.h b/db/queryutil.h
index 00d2d264961..104cde28e4a 100644
--- a/db/queryutil.h
+++ b/db/queryutil.h
@@ -53,6 +53,8 @@ namespace mongo {
/** @return true iff the interval is an equality constraint. */
bool equality() const;
mutable int _cachedEquality;
+
+ string toString() const;
};
/**
@@ -103,6 +105,8 @@ namespace mongo {
* NOTE the resulting intervals might not be strictValid().
*/
void reverse( FieldRange &ret ) const;
+
+ string toString() const;
private:
BSONObj addObj( const BSONObj &o );
void finishOperation( const vector<FieldInterval> &newIntervals, const FieldRange &other );
diff --git a/db/record.cpp b/db/record.cpp
index f5fa972227a..18be9c75fe2 100644
--- a/db/record.cpp
+++ b/db/record.cpp
@@ -120,14 +120,16 @@ namespace mongo {
/**
* after this call, we assume the page is in ram
+ * @param doHalf if this is a known good access, want to put in first half
* @return whether we know the page is in ram
*/
- bool access( size_t region , short offset ) {
+ bool access( size_t region , short offset , bool doHalf ) {
int regionHash = hash(region);
scoped_spinlock lk( _lock );
-
- RARELY {
+
+ static int rarely_count = 0;
+ if ( rarely_count++ % 2048 == 0 ) {
long long now = Listener::getElapsedTimeMillis();
RARELY if ( now == 0 ) {
tlog() << "warning Listener::getElapsedTimeMillis returning 0ms" << endl;
@@ -137,8 +139,8 @@ namespace mongo {
_rotate();
}
}
-
- for ( int i=0; i<NumSlices; i++ ) {
+
+ for ( int i=0; i<NumSlices / ( doHalf ? 2 : 1 ); i++ ) {
int pos = (_curSlice+i)%NumSlices;
State s = _slices[pos].get( regionHash , region , offset );
@@ -205,7 +207,7 @@ namespace mongo {
const size_t region = page >> 6;
const size_t offset = page & 0x3f;
- if ( ps::rolling.access( region , offset ) )
+ if ( ps::rolling.access( region , offset , false ) )
return true;
if ( ! blockSupported )
@@ -214,14 +216,11 @@ namespace mongo {
}
Record* Record::accessed() {
- if ( ! MemoryTrackingEnabled )
- return this;
-
const size_t page = (size_t)data >> 12;
const size_t region = page >> 6;
const size_t offset = page & 0x3f;
-
- ps::rolling.access( region , offset );
+
+ ps::rolling.access( region , offset , true );
return this;
}
diff --git a/db/repl.cpp b/db/repl.cpp
index a4ab6e4f0ea..3d08f2324c0 100644
--- a/db/repl.cpp
+++ b/db/repl.cpp
@@ -95,7 +95,7 @@ namespace mongo {
virtual LockType locktype() const { return WRITE; }
void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://www.mongodb.org/display/DOCS/Master+Slave"; }
CmdResync() : Command("resync") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( cmdLine.usingReplSets() ) {
errmsg = "resync command not currently supported with replica sets. See RS102 info in the mongodb documentations";
result.append("info", "http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member");
@@ -232,7 +232,7 @@ namespace mongo {
}
virtual LockType locktype() const { return NONE; }
CmdIsMaster() : Command("isMaster", true, "ismaster") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
/* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
authenticated.
we allow unauthenticated ismaster but we aren't as verbose informationally if
@@ -1407,6 +1407,7 @@ namespace mongo {
void newRepl();
void oldRepl();
+ void startReplSets(ReplSetCmdline*);
void startReplication() {
/* if we are going to be a replica set, we aren't doing other forms of replication. */
if( !cmdLine._replSet.empty() ) {
@@ -1416,6 +1417,11 @@ namespace mongo {
log() << "***" << endl;
}
newRepl();
+
+ replSet = true;
+ ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet);
+ boost::thread t( boost::bind( &startReplSets, replSetCmdline) );
+
return;
}
diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp
index 3a4dd9b5b3d..07ee2fa80a3 100644
--- a/db/repl/consensus.cpp
+++ b/db/repl/consensus.cpp
@@ -25,6 +25,7 @@ namespace mongo {
public:
CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
private:
+
bool shouldVeto(const BSONObj& cmdObj, string& errmsg) {
unsigned id = cmdObj["id"].Int();
const Member* primary = theReplSet->box.getPrimary();
@@ -66,7 +67,7 @@ namespace mongo {
return false;
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
@@ -101,7 +102,7 @@ namespace mongo {
public:
CmdReplSetElect() : ReplSetCommand("replSetElect") { }
private:
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
theReplSet->elect.electCmdReceived(cmdObj, &result);
@@ -152,7 +153,7 @@ namespace mongo {
LastYea &L = this->ly.ref(lk);
time_t now = time(0);
if( L.when + LeaseTime >= now && L.who != memberId ) {
- log(1) << "replSet not voting yea for " << memberId <<
+ LOG(1) << "replSet not voting yea for " << memberId <<
" voted for " << L.who << ' ' << now-L.when << " secs ago" << rsLog;
throw VoteException();
}
@@ -176,7 +177,7 @@ namespace mongo {
void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) {
BSONObjBuilder& b = *_b;
DEV log() << "replSet received elect msg " << cmd.toString() << rsLog;
- else log(2) << "replSet received elect msg " << cmd.toString() << rsLog;
+ else LOG(2) << "replSet received elect msg " << cmd.toString() << rsLog;
string set = cmd["set"].String();
unsigned whoid = cmd["whoid"].Int();
int cfgver = cmd["cfgver"].Int();
@@ -309,7 +310,7 @@ namespace mongo {
allUp = false;
}
}
- log(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog;
+ LOG(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog;
assert( ord <= theReplSet->lastOpTimeWritten ); // <= as this may change while we are working...
return true;
}
diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp
index 6247b4b1d13..7d3f78c73b5 100644
--- a/db/repl/heartbeat.cpp
+++ b/db/repl/heartbeat.cpp
@@ -39,6 +39,8 @@ namespace mongo {
extern bool replSetBlind;
extern ReplSettings replSettings;
+ unsigned int HeartbeatInfo::numPings;
+
long long HeartbeatInfo::timeDown() const {
if( up() ) return 0;
if( downSince == 0 )
@@ -51,7 +53,7 @@ namespace mongo {
public:
virtual bool adminOnly() const { return false; }
CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( replSetBlind )
return false;
@@ -62,6 +64,10 @@ namespace mongo {
return false;
}
+ if (!checkAuth(errmsg, result)) {
+ return false;
+ }
+
/* we want to keep heartbeat connections open when relinquishing primary. tag them here. */
{
AbstractMessagingPort *mp = cc().port();
@@ -147,7 +153,7 @@ namespace mongo {
string name() const { return "rsHealthPoll"; }
void doWork() {
if ( !theReplSet ) {
- log(2) << "replSet not initialized yet, skipping health poll this round" << rsLog;
+ LOG(2) << "replSet not initialized yet, skipping health poll this round" << rsLog;
return;
}
@@ -169,7 +175,10 @@ namespace mongo {
time_t after = mem.lastHeartbeat = before + (mem.ping / 1000);
// weight new ping with old pings
- mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2));
+ // on the first ping, just use the ping value
+ if (old.ping != 0) {
+ mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2));
+ }
if ( info["time"].isNumber() ) {
long long t = info["time"].numberLong();
@@ -191,6 +200,8 @@ namespace mongo {
mem.hbstate = MemberState(state.Int());
}
if( ok ) {
+ HeartbeatInfo::numPings++;
+
if( mem.upSince == 0 ) {
log() << "replSet info member " << h.toString() << " is up" << rsLog;
mem.upSince = mem.lastHeartbeat;
@@ -262,6 +273,7 @@ namespace mongo {
private:
void down(HeartbeatInfo& mem, string msg) {
mem.health = 0.0;
+ mem.ping = 0;
if( mem.upSince || mem.downSince == 0 ) {
mem.upSince = 0;
mem.downSince = jsTime();
diff --git a/db/repl/replset_commands.cpp b/db/repl/replset_commands.cpp
index 79639acd567..68dab7eb3c1 100644
--- a/db/repl/replset_commands.cpp
+++ b/db/repl/replset_commands.cpp
@@ -45,14 +45,18 @@ namespace mongo {
help << "Just for regression tests.\n";
}
CmdReplSetTest() : ReplSetCommand("replSetTest") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
log() << "replSet replSetTest command received: " << cmdObj.toString() << rsLog;
+
+ if (!checkAuth(errmsg, result)) {
+ return false;
+ }
+
if( cmdObj.hasElement("forceInitialSyncFailure") ) {
replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number();
return true;
}
- // may not need this, but if removed check all tests still work:
if( !check(errmsg, result) )
return false;
@@ -76,11 +80,11 @@ namespace mongo {
help << "internal";
}
CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {
- // this is ok but micros or combo with some rand() and/or 64 bits might be better --
+ // this is ok but micros or combo with some rand() and/or 64 bits might be better --
// imagine a restart and a clock correction simultaneously (very unlikely but possible...)
rbid = (int) curTimeMillis64();
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
result.append("rbid",rbid);
@@ -108,7 +112,7 @@ namespace mongo {
help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
}
CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( cmdObj["forShell"].trueValue() )
lastError.disableForCommand();
@@ -128,17 +132,21 @@ namespace mongo {
help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
}
CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { }
- virtual bool run(const string& a, BSONObj& b, string& errmsg, BSONObjBuilder& c, bool d) {
+ virtual bool run(const string& a, BSONObj& b, int e, string& errmsg, BSONObjBuilder& c, bool d) {
try {
rwlock_try_write lk(mutex);
- return _run(a,b,errmsg,c,d);
+ return _run(a,b,e,errmsg,c,d);
}
catch(rwlock_try_write::exception&) { }
errmsg = "a replSetReconfig is already in progress";
return false;
}
private:
- bool _run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool _run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ if ( !checkAuth(errmsg, result) ) {
+ return false;
+ }
+
if( cmdObj["replSetReconfig"].type() != Object ) {
errmsg = "no configuration specified";
return false;
@@ -209,7 +217,7 @@ namespace mongo {
}
CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
int secs = (int) cmdObj.firstElement().numberInt();
@@ -233,7 +241,7 @@ namespace mongo {
}
CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
if( !theReplSet->box.getState().primary() ) {
@@ -252,19 +260,19 @@ namespace mongo {
long long int diff = lastOp - closest;
result.append("closest", closest);
result.append("difference", diff);
-
+
if (diff < 0) {
// not our problem, but we'll wait until thing settle down
errmsg = "someone is ahead of the primary?";
return false;
}
-
+
if (diff > 10) {
errmsg = "no secondaries within 10 seconds of my optime";
return false;
}
}
-
+
int secs = (int) cmdObj.firstElement().numberInt();
if( secs == 0 )
secs = 60;
diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp
index 84b92fe9297..243e087eff1 100644
--- a/db/repl/rs.cpp
+++ b/db/repl/rs.cpp
@@ -24,9 +24,12 @@
#include "rs.h"
#include "connections.h"
#include "../repl.h"
+#include "../instance.h"
-namespace mongo {
+using namespace std;
+namespace mongo {
+
using namespace bson;
bool replSet = false;
@@ -60,18 +63,43 @@ namespace mongo {
}
void ReplSetImpl::assumePrimary() {
- log(2) << "assuming primary" << endl;
+ LOG(2) << "replSet assuming primary" << endl;
assert( iAmPotentiallyHot() );
writelock lk("admin."); // so we are synchronized with _logOp()
+
+ // Make sure that new OpTimes are higher than existing ones even with clock skew
+ DBDirectClient c;
+ BSONObj lastOp = c.findOne( "local.oplog.rs", Query().sort(reverseNaturalObj), NULL, QueryOption_SlaveOk );
+ if ( !lastOp.isEmpty() ) {
+ OpTime::setLast( lastOp[ "ts" ].date() );
+ }
+
changeState(MemberState::RS_PRIMARY);
}
void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); }
+ void ReplSetImpl::setMaintenanceMode(const bool inc) {
+ lock lk(this);
+
+ if (inc) {
+ log() << "replSet going into maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog;
+
+ _maintenanceMode++;
+ changeState(MemberState::RS_RECOVERING);
+ }
+ else {
+ _maintenanceMode--;
+ // no need to change state, syncTail will try to go live as a secondary soon
+
+ log() << "leaving maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog;
+ }
+ }
+
Member* ReplSetImpl::getMostElectable() {
lock lk(this);
-
- Member *max = 0;
+
+ Member *max = 0;
for (set<unsigned>::iterator it = _electableSet.begin(); it != _electableSet.end(); it++) {
const Member *temp = findById(*it);
@@ -91,7 +119,7 @@ namespace mongo {
const bool closeOnRelinquish = true;
void ReplSetImpl::relinquish() {
- log(2) << "attempting to relinquish" << endl;
+ LOG(2) << "replSet attempting to relinquish" << endl;
if( box.getState().primary() ) {
{
writelock lk("admin."); // so we are synchronized with _logOp()
@@ -239,7 +267,7 @@ namespace mongo {
if( myConfig().arbiterOnly )
b.append("arbiterOnly", true);
- if( myConfig().priority == 0 )
+ if( myConfig().priority == 0 && !myConfig().arbiterOnly)
b.append("passive", true);
if( myConfig().slaveDelay )
b.append("slaveDelay", myConfig().slaveDelay);
@@ -296,8 +324,10 @@ namespace mongo {
_currentSyncTarget(0),
_hbmsgTime(0),
_self(0),
+ _maintenanceMode(0),
mgr( new Manager(this) ),
ghost( new GhostSync(this) ) {
+
_cfg = 0;
memset(_hbmsg, 0, sizeof(_hbmsg));
strcpy( _hbmsg , "initial startup" );
@@ -306,7 +336,7 @@ namespace mongo {
_seeds = &replSetCmdline.seeds;
- log(1) << "replSet beginning startup..." << rsLog;
+ LOG(1) << "replSet beginning startup..." << rsLog;
loadConfig();
@@ -317,7 +347,7 @@ namespace mongo {
for( set<HostAndPort>::iterator i = replSetCmdline.seedSet.begin(); i != replSetCmdline.seedSet.end(); i++ ) {
if( i->isSelf() ) {
if( sss == 1 )
- log(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog;
+ LOG(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog;
}
else
log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog;
@@ -382,7 +412,7 @@ namespace mongo {
getLastErrorDefault = new BSONObj( c.getLastErrorDefaults );
}
- list<const ReplSetConfig::MemberCfg*> newOnes;
+ list<ReplSetConfig::MemberCfg*> newOnes;
// additive short-cuts the new config setup. If we are just adding a
// node/nodes and nothing else is changing, this is additive. If it's
// not a reconfig, we're not adding anything
@@ -391,8 +421,8 @@ namespace mongo {
unsigned nfound = 0;
int me = 0;
for( vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++ ) {
- const ReplSetConfig::MemberCfg& m = *i;
+ ReplSetConfig::MemberCfg& m = *i;
if( m.h.isSelf() ) {
me++;
}
@@ -443,8 +473,8 @@ namespace mongo {
// this is a shortcut for simple changes
if( additive ) {
log() << "replSet info : additive change to configuration" << rsLog;
- for( list<const ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) {
- const ReplSetConfig::MemberCfg* m = *i;
+ for( list<ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) {
+ ReplSetConfig::MemberCfg *m = *i;
Member *mi = new Member(m->h, m->_id, m, false);
/** we will indicate that new members are up() initially so that we don't relinquish our
@@ -456,6 +486,11 @@ namespace mongo {
_members.push(mi);
startHealthTaskFor(mi);
}
+
+ // if we aren't creating new members, we may have to update the
+ // groups for the current ones
+ _cfg->updateMembers(_members);
+
return true;
}
@@ -479,7 +514,7 @@ namespace mongo {
string members = "";
for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) {
- const ReplSetConfig::MemberCfg& m = *i;
+ ReplSetConfig::MemberCfg& m = *i;
Member *mi;
members += ( members == "" ? "" : ", " ) + m.h.toString();
if( m.h.isSelf() ) {
@@ -594,7 +629,7 @@ namespace mongo {
if( ++once == 1 )
log() << "replSet info you may need to run replSetInitiate -- rs.initiate() in the shell -- if that is not already done" << rsLog;
if( _seeds->size() == 0 )
- log(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog;
+ LOG(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog;
}
else {
startupStatus = EMPTYUNREACHABLE;
diff --git a/db/repl/rs.h b/db/repl/rs.h
index 7654597a930..14c630d27a2 100644
--- a/db/repl/rs.h
+++ b/db/repl/rs.h
@@ -58,10 +58,11 @@ namespace mongo {
~Member(); // intentionally unimplemented as should never be called -- see List1<>::Base.
Member(const Member&);
public:
- Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self);
+ Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self);
string fullName() const { return h().toString(); }
const ReplSetConfig::MemberCfg& config() const { return _config; }
+ ReplSetConfig::MemberCfg& configw() { return _config; }
const HeartbeatInfo& hbinfo() const { return _hbinfo; }
HeartbeatInfo& get_hbinfo() { return _hbinfo; }
string lhb() const { return _hbinfo.lastHeartbeatMsg; }
@@ -74,7 +75,7 @@ namespace mongo {
private:
friend class ReplSetImpl;
- const ReplSetConfig::MemberCfg _config;
+ ReplSetConfig::MemberCfg _config;
const HostAndPort _h;
HeartbeatInfo _hbinfo;
};
@@ -242,13 +243,19 @@ namespace mongo {
const Member *primary;
};
const SP get() {
- scoped_lock lk(m);
+ rwlock lk(m, false);
return sp;
}
- MemberState getState() const { return sp.state; }
- const Member* getPrimary() const { return sp.primary; }
+ MemberState getState() const {
+ rwlock lk(m, false);
+ return sp.state;
+ }
+ const Member* getPrimary() const {
+ rwlock lk(m, false);
+ return sp.primary;
+ }
void change(MemberState s, const Member *self) {
- scoped_lock lk(m);
+ rwlock lk(m, true);
if( sp.state != s ) {
log() << "replSet " << s.toString() << rsLog;
}
@@ -262,24 +269,25 @@ namespace mongo {
}
}
void set(MemberState s, const Member *p) {
- scoped_lock lk(m);
- sp.state = s; sp.primary = p;
+ rwlock lk(m, true);
+ sp.state = s;
+ sp.primary = p;
}
void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); }
void setOtherPrimary(const Member *mem) {
- scoped_lock lk(m);
+ rwlock lk(m, true);
assert( !sp.state.primary() );
sp.primary = mem;
}
void noteRemoteIsPrimary(const Member *remote) {
- scoped_lock lk(m);
+ rwlock lk(m, true);
if( !sp.state.secondary() && !sp.state.fatal() )
sp.state = MemberState::RS_RECOVERING;
sp.primary = remote;
}
StateBox() : m("StateBox") { }
private:
- mongo::mutex m;
+ RWLock m;
SP sp;
};
@@ -446,11 +454,20 @@ namespace mongo {
List1<Member> _members; // all members of the set EXCEPT _self.
ReplSetConfig::MemberCfg _config; // config of _self
unsigned _id; // _id of _self
+
+ int _maintenanceMode; // if we should stay in recovering state
public:
// this is called from within a writelock in logOpRS
unsigned selfId() const { return _id; }
Manager *mgr;
GhostSync *ghost;
+ /**
+ * This forces a secondary to go into recovering state and stay there
+ * until this is called again, passing in "false". Multiple threads can
+ * call this and it will leave maintenance mode once all of the callers
+ * have called it again, passing in false.
+ */
+ void setMaintenanceMode(const bool inc);
private:
Member* head() const { return _members.head(); }
public:
@@ -553,11 +570,29 @@ namespace mongo {
virtual bool logTheOp() { return false; }
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream &help ) const { help << "internal"; }
+
+ /**
+ * Some replica set commands call this and then call check(). This is
+ * intentional, as they might do things before theReplSet is initialized
+ * that still need to be checked for auth.
+ */
+ bool checkAuth(string& errmsg, BSONObjBuilder& result) {
+ if( !noauth && adminOnly() ) {
+ AuthenticationInfo *ai = cc().getAuthenticationInfo();
+ if (!ai->isAuthorizedForLock("admin", locktype())) {
+ errmsg = "replSet command unauthorized";
+ return false;
+ }
+ }
+ return true;
+ }
+
bool check(string& errmsg, BSONObjBuilder& result) {
if( !replSet ) {
errmsg = "not running with --replSet";
return false;
}
+
if( theReplSet == 0 ) {
result.append("startupStatus", ReplSet::startupStatus);
string s;
@@ -566,7 +601,8 @@ namespace mongo {
result.append("info", "run rs.initiate(...) if not yet done for the set");
return false;
}
- return true;
+
+ return checkAuth(errmsg, result);
}
};
@@ -578,7 +614,7 @@ namespace mongo {
/** inlines ----------------- */
- inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) :
+ inline Member::Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self) :
_config(*c), _h(h), _hbinfo(ord) {
assert(c);
if( self )
diff --git a/db/repl/rs_config.cpp b/db/repl/rs_config.cpp
index 4d6c7b59bba..745d60b537c 100644
--- a/db/repl/rs_config.cpp
+++ b/db/repl/rs_config.cpp
@@ -83,14 +83,24 @@ namespace mongo {
if( hidden ) b << "hidden" << hidden;
if( !buildIndexes ) b << "buildIndexes" << buildIndexes;
if( !tags.empty() ) {
- BSONArrayBuilder a;
- for( set<string>::const_iterator i = tags.begin(); i != tags.end(); i++ )
- a.append(*i);
- b.appendArray("tags", a.done());
+ BSONObjBuilder a;
+ for( map<string,string>::const_iterator i = tags.begin(); i != tags.end(); i++ )
+ a.append((*i).first, (*i).second);
+ b.append("tags", a.done());
}
return b.obj();
}
+ void ReplSetConfig::updateMembers(List1<Member> &dest) {
+ for (vector<MemberCfg>::iterator source = members.begin(); source < members.end(); source++) {
+ for( Member *d = dest.head(); d; d = d->next() ) {
+ if (d->fullName() == (*source).h.toString()) {
+ d->configw().groupsw() = (*source).groups();
+ }
+ }
+ }
+ }
+
bo ReplSetConfig::asBson() const {
bob b;
b.append("_id", _id).append("version", version);
@@ -307,85 +317,39 @@ namespace mongo {
}
void ReplSetConfig::_populateTagMap(map<string,TagClause> &tagMap) {
- // stage 1: create subgroups for each server corresponding to each of
- // its tags. If a server has three tags, we want it to end up in three
- // subgroups, e.g.: A is tagged with ["A", "dc.ny", "m"]. At the end of
- // this step, tagMap will contain:
- // "A" => {"A.A" : A}
- // "dc.ny" => {"dc.ny.A" : A}
- // "m" => {"m.A" : A}
- // If we have more than one server with the same tag, we end up with
- // something like "x.y.z" => [{"x.y.z.A" : A},{"x.y.z.B" : B}] (if A
- // and B were tagged with "x.y.z").
+ // create subgroups for each server corresponding to each of
+ // its tags. E.g.:
+ //
+ // A is tagged with {"server" : "A", "dc" : "ny"}
+ // B is tagged with {"server" : "B", "dc" : "ny"}
+ //
+ // At the end of this step, tagMap will contain:
+ //
+ // "server" => {"A" : [A], "B" : [B]}
+ // "dc" => {"ny" : [A,B]}
+
for (unsigned i=0; i<members.size(); i++) {
MemberCfg member = members[i];
- for (set<string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) {
- TagClause& clause = tagMap[*tag];
- clause.name = *tag;
+ for (map<string,string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) {
+ string label = (*tag).first;
+ string value = (*tag).second;
- // we also populate the map, to be used by step 2... I think
- // this is correct, as step 2 condenses the groups anyway
- string perServerName = *tag+"."+members[i].h.toString();
+ TagClause& clause = tagMap[label];
+ clause.name = label;
TagSubgroup* subgroup;
- if (clause.subgroups.find(perServerName) == clause.subgroups.end()) {
- clause.subgroups[perServerName] = subgroup = new TagSubgroup(perServerName);
+ // search for "ny" in "dc"'s clause
+ if (clause.subgroups.find(value) == clause.subgroups.end()) {
+ clause.subgroups[value] = subgroup = new TagSubgroup(value);
}
else {
- subgroup = clause.subgroups[perServerName];
+ subgroup = clause.subgroups[value];
}
subgroup->m.insert(&members[i]);
}
}
-
- // stage 2: generate all parent tags. If we have "x.y.z", this
- // generates "x.y" and "x" and creates a map for each clause, e.g.,
- // "x"'s clause might have a map that looks like:
- // "x.y" => {A, B} {C}
- // "x.w" => {D} {E, F}
- for (map<string,TagClause>::iterator baseClause = tagMap.begin(); baseClause != tagMap.end(); baseClause++) {
- string prevPrefix = (*baseClause).first;
- const char *dot = strrchr(prevPrefix.c_str(), '.');
-
- while (dot) {
- // get x.y
- string xyTag = string(prevPrefix.c_str(), dot - prevPrefix.c_str());
- log(1) << "generating tag " << xyTag << rsLog;
- TagClause& xyClause = tagMap[xyTag];
- xyClause.name = xyTag;
-
- // get all of x.y.z's subgroups, add them as a single subgroup of x.y
- TagSubgroup* condensedSubgroup;;
- if (xyClause.subgroups.find(prevPrefix) == xyClause.subgroups.end()) {
- // label this subgroup one higher than the current, e.g.,
- // "x.y.z" if we're creating the "x.y" clause
- condensedSubgroup = new TagSubgroup(prevPrefix);
- xyClause.subgroups[prevPrefix] = condensedSubgroup;
- }
- else {
- condensedSubgroup = xyClause.subgroups[prevPrefix];
- assert(condensedSubgroup->name == prevPrefix);
- }
-
- TagClause& xyzClause = tagMap[prevPrefix];
-
- for (map<string,TagSubgroup*>::iterator xyzSubgroup = xyzClause.subgroups.begin();
- xyzSubgroup != xyzClause.subgroups.end(); xyzSubgroup++) {
- for (set<MemberCfg*>::const_iterator xyzMember = (*xyzSubgroup).second->m.begin();
- xyzMember != (*xyzSubgroup).second->m.end(); xyzMember++) {
- condensedSubgroup->m.insert(*xyzMember);
- // we'll link the member back with the group later, to
- // avoid creating extra link-backs
- }
- }
-
- // advance: if we were handling "x.y", now do "x"
- prevPrefix = xyTag;
- dot = strrchr(prevPrefix.c_str(), '.');
- }
- }
}
void ReplSetConfig::parseRules(const BSONObj& modes) {
@@ -442,7 +406,7 @@ namespace mongo {
for (set<MemberCfg *>::iterator cfg = (*sgs).second->m.begin();
!foundMe && cfg != (*sgs).second->m.end(); cfg++) {
- (*cfg)->groupsw(this).insert((*sgs).second);
+ (*cfg)->groupsw().insert((*sgs).second);
}
}
@@ -463,7 +427,7 @@ namespace mongo {
}
// if we got here, this is a valid rule
- log(1) << "new rule " << rule.fieldName() << ": " << r->toString() << rsLog;
+ LOG(1) << "replSet new rule " << rule.fieldName() << ": " << r->toString() << rsLog;
rules[rule.fieldName()] = r;
}
}
@@ -532,9 +496,10 @@ namespace mongo {
if( mobj.hasElement("votes") )
m.votes = (unsigned) mobj["votes"].Number();
if( mobj.hasElement("tags") ) {
- vector<BSONElement> v = mobj["tags"].Array();
- for( unsigned i = 0; i < v.size(); i++ )
- m.tags.insert( v[i].String() );
+ const BSONObj &t = mobj["tags"].Obj();
+ for (BSONObj::iterator c = t.begin(); c.more(); c.next()) {
+ m.tags[(*c).fieldName()] = (*c).String();
+ }
}
m.check();
}
diff --git a/db/repl/rs_config.h b/db/repl/rs_config.h
index d9c9d97ed4d..4e0d1e862c0 100644
--- a/db/repl/rs_config.h
+++ b/db/repl/rs_config.h
@@ -25,7 +25,7 @@
#include "health.h"
namespace mongo {
-
+ class Member;
const string rsConfigNs = "local.system.replset";
class ReplSetConfig {
@@ -61,15 +61,14 @@ namespace mongo {
int slaveDelay; /* seconds. int rather than unsigned for convenient to/front bson conversion. */
bool hidden; /* if set, don't advertise to drives in isMaster. for non-primaries (priority 0) */
bool buildIndexes; /* if false, do not create any non-_id indexes */
- set<string> tags; /* tagging for data center, rack, etc. */
+ map<string,string> tags; /* tagging for data center, rack, etc. */
private:
set<TagSubgroup*> _groups; // the subgroups this member belongs to
public:
const set<TagSubgroup*>& groups() const {
return _groups;
}
- set<TagSubgroup*>& groupsw(ReplSetConfig *c) {
- assert(!c->_constructed);
+ set<TagSubgroup*>& groupsw() {
return _groups;
}
void check() const; /* check validity, assert if not. */
@@ -114,6 +113,11 @@ namespace mongo {
void saveConfigLocally(BSONObj comment); // to local db
string saveConfigEverywhere(); // returns textual info on what happened
+ /**
+ * Update members' groups when the config changes but members stay the same.
+ */
+ void updateMembers(List1<Member> &dest);
+
BSONObj asBson() const;
bool _constructed;
diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp
index 814bb1d0bf8..142878ab478 100644
--- a/db/repl/rs_initialsync.cpp
+++ b/db/repl/rs_initialsync.cpp
@@ -75,7 +75,7 @@ namespace mongo {
if( d && d->stats.nrecords == 0 )
return; // already empty, ok.
- log(1) << "replSet empty oplog" << rsLog;
+ LOG(1) << "replSet empty oplog" << rsLog;
d->emptyCappedCollection(rsoplog);
}
@@ -85,6 +85,7 @@ namespace mongo {
// find the member with the lowest ping time that has more data than me
for (Member *m = _members.head(); m; m = m->next()) {
if (m->hbinfo().up() &&
+ HeartbeatInfo::numPings > config().members.size()*2 &&
(m->state() == MemberState::RS_PRIMARY ||
(m->state() == MemberState::RS_SECONDARY && m->hbinfo().opTime > lastOpTimeWritten)) &&
(!closest || m->hbinfo().ping < closest->hbinfo().ping)) {
diff --git a/db/repl/rs_initiate.cpp b/db/repl/rs_initiate.cpp
index 5dd0ab23d24..0a796e1e445 100644
--- a/db/repl/rs_initiate.cpp
+++ b/db/repl/rs_initiate.cpp
@@ -150,7 +150,7 @@ namespace mongo {
h << "Initiate/christen a replica set.";
h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
log() << "replSet replSetInitiate admin command received from client" << rsLog;
if( !replSet ) {
diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h
index 8e5a8ad9da3..d60bb5261e9 100644
--- a/db/repl/rs_member.h
+++ b/db/repl/rs_member.h
@@ -80,7 +80,8 @@ namespace mongo {
DiagStr lastHeartbeatMsg;
OpTime opTime;
int skew;
- unsigned int ping; // microseconds
+ unsigned int ping; // milliseconds
+ static unsigned int numPings;
bool up() const { return health > 0; }
diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp
index 67d6cc26f07..cce5c091074 100644
--- a/db/repl/rs_rollback.cpp
+++ b/db/repl/rs_rollback.cpp
@@ -574,7 +574,7 @@ namespace mongo {
sethbmsg("rollback 6");
// clean up oplog
- log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
+ LOG(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
// todo: fatal error if this throws?
oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false);
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index 95bbe2040a6..5fe3075c0f7 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -188,6 +188,16 @@ namespace mongo {
*/
bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) {
bool golive = false;
+
+ {
+ lock lk( this );
+
+ if (_maintenanceMode > 0) {
+ // we're not actually going live
+ return true;
+ }
+ }
+
{
readlock lk("local.replset.minvalid");
BSONObj mv;
@@ -211,7 +221,7 @@ namespace mongo {
BSONObj remoteOldestOp = r.findOne(rsoplog, Query());
OpTime ts = remoteOldestOp["ts"]._opTime();
DEV log() << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
- else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
+ else LOG(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
DEV {
log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog;
log() << "replSet our state: " << state().toString() << rsLog;
@@ -251,7 +261,7 @@ namespace mongo {
assert(r.conn() == 0);
if( !r.connect(hn) ) {
- log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
+ LOG(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
r.resetConnection();
return false;
}
@@ -407,7 +417,7 @@ namespace mongo {
}
- {
+ try {
writelock lk("");
/* if we have become primary, we dont' want to apply things from elsewhere
@@ -421,11 +431,16 @@ namespace mongo {
syncApply(o);
_logOpObjRS(o); // with repl sets we write the ops to our oplog too
}
+ catch (DBException& e) {
+ sethbmsg(str::stream() << "syncTail: " << e.toString() << ", syncing: " << o);
+ sleepsecs(30);
+ return;
+ }
}
}
r.tailCheck();
if( !r.haveCursor() ) {
- log(1) << "replSet end syncTail pass with " << hn << rsLog;
+ LOG(1) << "replSet end syncTail pass with " << hn << rsLog;
// TODO : reuse our connection to the primary.
return;
}
@@ -475,9 +490,7 @@ namespace mongo {
_syncThread();
}
catch(DBException& e) {
- sethbmsg(str::stream() << "syncThread: " << e.toString() <<
- ", try 'use local; db.oplog.rs.findOne({ts : {$gt : new Timestamp(" <<
- lastOpTimeWritten.getSecs() << "000," << lastOpTimeWritten.getInc() << ")}});' on the primary");
+ sethbmsg(str::stream() << "syncThread: " << e.toString());
sleepsecs(10);
}
catch(...) {
@@ -580,7 +593,7 @@ namespace mongo {
// the target might end up with a new Member, but s.slave never
// changes so we'll compare the names
|| target == slave->slave || target->fullName() == slave->slave->fullName()) {
- log(1) << "replica set ghost target no good" << endl;
+ LOG(1) << "replica set ghost target no good" << endl;
return;
}
@@ -593,8 +606,7 @@ namespace mongo {
slave->reader.ghostQueryGTE(rsoplog, last);
}
- log(1) << "last: " << slave->last.toString() << " to " << last.toString() << rsLog;
-
+ LOG(1) << "replSet last: " << slave->last.toString() << " to " << last.toString() << rsLog;
if (slave->last > last) {
return;
}
@@ -608,11 +620,11 @@ namespace mongo {
BSONObj o = slave->reader.nextSafe();
slave->last = o["ts"]._opTime();
}
- log(2) << "now last is " << slave->last.toString() << rsLog;
+ LOG(2) << "now last is " << slave->last.toString() << rsLog;
}
catch (DBException& e) {
// we'll be back
- log(2) << "replSet ghost sync error: " << e.what() << " for "
+ LOG(2) << "replSet ghost sync error: " << e.what() << " for "
<< slave->slave->fullName() << rsLog;
slave->reader.resetConnection();
}
diff --git a/db/scanandorder.cpp b/db/scanandorder.cpp
new file mode 100644
index 00000000000..efa9c8d7f13
--- /dev/null
+++ b/db/scanandorder.cpp
@@ -0,0 +1,93 @@
+/* scanandorder.cpp
+ Order results (that aren't already indexes and in order.)
+*/
+
+/**
+ * Copyright (C) 2008 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "scanandorder.h"
+
+namespace mongo {
+
+ const unsigned ScanAndOrder::MaxScanAndOrderBytes = 32 * 1024 * 1024;
+
+ void ScanAndOrder::_add(BSONObj& k, BSONObj o, DiskLoc* loc) {
+ if (!loc) {
+ _best.insert(make_pair(k.getOwned(),o.getOwned()));
+ }
+ else {
+ BSONObjBuilder b;
+ b.appendElements(o);
+ b.append("$diskLoc", loc->toBSONObj());
+ _best.insert(make_pair(k.getOwned(), b.obj().getOwned()));
+ }
+ }
+
+ void ScanAndOrder::_addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) {
+ /* todo : we don't correct _approxSize here. */
+ const BSONObj& worstBestKey = i->first;
+ int c = worstBestKey.woCompare(k, _order._spec.keyPattern);
+ if ( c > 0 ) {
+ // k is better, 'upgrade'
+ _best.erase(i);
+ _add(k, o, loc);
+ }
+ }
+
+
+ void ScanAndOrder::add(BSONObj o, DiskLoc* loc) {
+ assert( o.isValid() );
+ BSONObj k = _order.getKeyFromObject(o);
+ if ( k.isEmpty() ) {
+ return;
+ }
+ if ( (int) _best.size() < _limit ) {
+ _approxSize += k.objsize();
+ _approxSize += o.objsize();
+
+ /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */
+ uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < MaxScanAndOrderBytes );
+
+ _add(k, o, loc);
+ return;
+ }
+ BestMap::iterator i;
+ assert( _best.end() != _best.begin() );
+ i = _best.end();
+ i--;
+ _addIfBetter(k, o, i, loc);
+ }
+
+
+ void ScanAndOrder::fill(BufBuilder& b, Projection *filter, int& nout ) const {
+ int n = 0;
+ int nFilled = 0;
+ for ( BestMap::const_iterator i = _best.begin(); i != _best.end(); i++ ) {
+ n++;
+ if ( n <= _startFrom )
+ continue;
+ const BSONObj& o = i->second;
+ fillQueryResultFromObj(b, filter, o);
+ nFilled++;
+ if ( nFilled >= _limit )
+ break;
+ uassert( 10129 , "too much data for sort() with no index", b.len() < (int)MaxScanAndOrderBytes ); // appserver limit
+ }
+ nout = nFilled;
+ }
+
+} // namespace mongo
diff --git a/db/scanandorder.h b/db/scanandorder.h
index 2957ae60245..33e76f61f67 100644
--- a/db/scanandorder.h
+++ b/db/scanandorder.h
@@ -22,6 +22,7 @@
#include "indexkey.h"
#include "queryutil.h"
+#include "projection.h"
namespace mongo {
@@ -76,30 +77,9 @@ namespace mongo {
typedef multimap<BSONObj,BSONObj,BSONObjCmp> BestMap;
class ScanAndOrder {
- void _add(BSONObj& k, BSONObj o, DiskLoc* loc) {
- if (!loc) {
- _best.insert(make_pair(k.getOwned(),o.getOwned()));
- }
- else {
- BSONObjBuilder b;
- b.appendElements(o);
- b.append("$diskLoc", loc->toBSONObj());
- _best.insert(make_pair(k.getOwned(), b.obj().getOwned()));
- }
- }
-
- void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) {
- /* todo : we don't correct _approxSize here. */
- const BSONObj& worstBestKey = i->first;
- int c = worstBestKey.woCompare(k, _order._spec.keyPattern);
- if ( c > 0 ) {
- // k is better, 'upgrade'
- _best.erase(i);
- _add(k, o, loc);
- }
- }
-
public:
+ static const unsigned MaxScanAndOrderBytes;
+
ScanAndOrder(int startFrom, int limit, BSONObj order, const FieldRangeSet &frs) :
_best( BSONObjCmp( order ) ),
_startFrom(startFrom), _order(order, frs) {
@@ -107,60 +87,25 @@ namespace mongo {
_approxSize = 0;
}
- int size() const {
- return _best.size();
- }
-
- void add(BSONObj o, DiskLoc* loc) {
- assert( o.isValid() );
- BSONObj k = _order.getKeyFromObject(o);
- if ( k.isEmpty() ) {
- return;
- }
- if ( (int) _best.size() < _limit ) {
- _approxSize += k.objsize();
- _approxSize += o.objsize();
-
- /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */
- uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < 32 * 1024 * 1024 );
-
- _add(k, o, loc);
- return;
- }
- BestMap::iterator i;
- assert( _best.end() != _best.begin() );
- i = _best.end();
- i--;
- _addIfBetter(k, o, i, loc);
- }
+ int size() const { return _best.size(); }
- void _fill(BufBuilder& b, Projection *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) {
- int n = 0;
- int nFilled = 0;
- for ( BestMap::iterator i = begin; i != end; i++ ) {
- n++;
- if ( n <= _startFrom )
- continue;
- BSONObj& o = i->second;
- fillQueryResultFromObj(b, filter, o);
- nFilled++;
- if ( nFilled >= _limit )
- break;
- uassert( 10129 , "too much data for sort() with no index", b.len() < 4000000 ); // appserver limit
- }
- nout = nFilled;
- }
+ void add(BSONObj o, DiskLoc* loc);
/* scanning complete. stick the query result in b for n objects. */
- void fill(BufBuilder& b, Projection *filter, int& nout) {
- _fill(b, filter, nout, _best.begin(), _best.end());
- }
-
+ void fill(BufBuilder& b, Projection *filter, int& nout ) const;
+
+ private:
+
+ void _add(BSONObj& k, BSONObj o, DiskLoc* loc);
+
+ void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc);
+
BestMap _best; // key -> full object
int _startFrom;
int _limit; // max to send back.
KeyType _order;
unsigned _approxSize;
+
};
} // namespace mongo
diff --git a/db/security.cpp b/db/security.cpp
index 4a6f32600aa..b57326a8233 100644
--- a/db/security.cpp
+++ b/db/security.cpp
@@ -30,7 +30,7 @@
namespace mongo {
bool AuthenticationInfo::_warned = false;
-
+ /*
void AuthenticationInfo::print() const {
cout << "AuthenticationInfo: " << this << '\n';
for ( MA::const_iterator i=_dbs.begin(); i!=_dbs.end(); i++ ) {
@@ -38,7 +38,7 @@ namespace mongo {
}
cout << "END" << endl;
}
-
+ */
string AuthenticationInfo::getUser( const string& dbname ) const {
scoped_spinlock lk(_lock);
@@ -78,9 +78,9 @@ namespace mongo {
pwd = internalSecurity.pwd;
}
else {
- static BSONObj userPattern = fromjson("{\"user\":1}");
+ // static BSONObj userPattern = fromjson("{\"user\":1}");
string systemUsers = dbname + ".system.users";
- OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1");
+ // OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1");
{
BSONObjBuilder b;
b << "user" << user;
@@ -107,7 +107,7 @@ namespace mongo {
}
}
- bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
AuthenticationInfo *ai = cc().getAuthenticationInfo();
ai->logout(dbname);
return true;
diff --git a/db/security.h b/db/security.h
index 2937ef29f80..2937ef29f80 100644..100755
--- a/db/security.h
+++ b/db/security.h
diff --git a/db/security_commands.cpp b/db/security_commands.cpp
index 16face7fc32..2db96802404 100644
--- a/db/security_commands.cpp
+++ b/db/security_commands.cpp
@@ -56,7 +56,7 @@ namespace mongo {
void help(stringstream& h) const { h << "internal"; }
virtual LockType locktype() const { return NONE; }
CmdGetNonce() : Command("getnonce") {}
- bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
nonce64 *n = new nonce64(Security::getNonce());
stringstream ss;
ss << hex << *n;
@@ -68,7 +68,7 @@ namespace mongo {
CmdLogout cmdLogout;
- bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
log() << " authenticate: " << cmdObj << endl;
string user = cmdObj.getStringField("user");
diff --git a/db/security_common.h b/db/security_common.h
index 3af70cc7b97..2f2565f3ce0 100644
--- a/db/security_common.h
+++ b/db/security_common.h
@@ -57,10 +57,10 @@ namespace mongo {
virtual bool slaveOk() const {
return true;
}
- virtual LockType locktype() const { return WRITE; }
+ virtual LockType locktype() const { return READ; }
virtual void help(stringstream& ss) const { ss << "internal"; }
CmdAuthenticate() : Command("authenticate") {}
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl);
+ bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl);
private:
bool getUserObj(const string& dbname, const string& user, BSONObj& userObj, string& pwd);
void authenticate(const string& dbname, const string& user, const bool readOnly);
@@ -77,7 +77,7 @@ namespace mongo {
void help(stringstream& h) const { h << "de-authenticate"; }
virtual LockType locktype() const { return NONE; }
CmdLogout() : Command("logout") {}
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl);
+ bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl);
};
} // namespace mongo
diff --git a/db/stats/top.cpp b/db/stats/top.cpp
index 51a270c8c8c..f5b6ee42f1c 100644
--- a/db/stats/top.cpp
+++ b/db/stats/top.cpp
@@ -156,7 +156,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
virtual void help( stringstream& help ) const { help << "usage by collection, in micros "; }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
{
BSONObjBuilder b( result.subobjStart( "totals" ) );
b.append( "note" , "all times in microseconds" );