diff options
author | Dwight <dwight@10gen.com> | 2011-08-17 16:55:13 -0400 |
---|---|---|
committer | Dwight <dwight@10gen.com> | 2011-08-17 16:55:13 -0400 |
commit | 48977d2abc8ecffaa4c547d427603c7ff24895d3 (patch) | |
tree | d910534e1d623fdc72dbd063ebf4878b8efedd7c | |
parent | 9ce68d36823c22f641f705928f1c1b22f6206d76 (diff) | |
parent | e4a084bdab0e2a61e81476068ed494e346715d41 (diff) | |
download | mongo-48977d2abc8ecffaa4c547d427603c7ff24895d3.tar.gz |
merge
302 files changed, 10377 insertions, 2474 deletions
diff --git a/.gitignore b/.gitignore index 87449576069..8ffc0d3e496 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,7 @@ .sconsign.dblite .sconf_temp perf.data +massif.out.* *~ *.swp diff --git a/SConstruct b/SConstruct index b8e8c3fc889..99e3189c2d0 100644 --- a/SConstruct +++ b/SConstruct @@ -126,10 +126,10 @@ add_option( "staticlibpath", "comma separated list of dirs to search for staticl add_option( "boost-compiler", "compiler used for boost (gcc41)" , 1 , True , "boostCompiler" ) add_option( "boost-version", "boost version for linking(1_38)" , 1 , True , "boostVersion" ) - # experimental features add_option( "mm", "use main memory instead of memory mapped files" , 0 , True ) add_option( "asio" , "Use Asynchronous IO (NOT READY YET)" , 0 , True ) +add_option( "ssl" , "Enable SSL" , 0 , True ) # library choices add_option( "usesm" , "use spider monkey for javascript" , 0 , True ) @@ -138,12 +138,13 @@ add_option( "usev8" , "use v8 for javascript" , 0 , True ) # mongo feature options add_option( "noshell", "don't build shell" , 0 , True ) add_option( "safeshell", "don't let shell scripts run programs (still, don't run untrusted scripts)" , 0 , True ) -add_option( "osnew", "use newer operating system API features" , 0 , False ) +add_option( "win2008plus", "use newer operating system API features" , 0 , False ) # dev tools add_option( "d", "debug build no optimization, etc..." , 0 , True , "debugBuild" ) add_option( "dd", "debug build no optimization, additional debug logging, etc..." , 0 , False , "debugBuildAndLogging" ) add_option( "durableDefaultOn" , "have durable default to on" , 0 , True ) +add_option( "durableDefaultOff" , "have durable default to off" , 0 , True ) add_option( "pch" , "use precompiled headers to speed up the build (experimental)" , 0 , True , "usePCH" ) add_option( "distcc" , "use distcc for distributing builds" , 0 , False ) @@ -234,6 +235,9 @@ if has_option( "safeshell" ): if has_option( "durableDefaultOn" ): env.Append( CPPDEFINES=[ "_DURABLEDEFAULTON" ] ) +if has_option( "durableDefaultOff" ): + env.Append( CPPDEFINES=[ "_DURABLEDEFAULTOFF" ] ) + boostCompiler = GetOption( "boostCompiler" ) if boostCompiler is None: boostCompiler = "" @@ -343,26 +347,28 @@ processInfoFiles = [ "util/processinfo.cpp" ] if os.path.exists( "util/processinfo_" + os.sys.platform + ".cpp" ): processInfoFiles += [ "util/processinfo_" + os.sys.platform + ".cpp" ] +elif os.sys.platform == "linux3": + processInfoFiles += [ "util/processinfo_linux2.cpp" ] else: processInfoFiles += [ "util/processinfo_none.cpp" ] coreServerFiles += processInfoFiles - - if has_option( "asio" ): coreServerFiles += [ "util/net/message_server_asio.cpp" ] # mongod files - also files used in tools. present in dbtests, but not in mongos and not in client libs. -serverOnlyFiles = Split( "db/key.cpp db/btreebuilder.cpp util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/record.cpp db/cursor.cpp db/security.cpp db/queryoptimizer.cpp db/queryoptimizercursor.cpp db/extsort.cpp db/cmdline.cpp" ) +serverOnlyFiles = Split( "util/compress.cpp db/key.cpp db/btreebuilder.cpp util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/record.cpp db/cursor.cpp db/security.cpp db/queryoptimizer.cpp db/queryoptimizercursor.cpp db/extsort.cpp db/cmdline.cpp" ) -serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/geo/*.cpp" ) + Glob( "db/ops/*.cpp" ) +serverOnlyFiles += [ "db/index.cpp" , "db/scanandorder.cpp" ] + Glob( "db/geo/*.cpp" ) + Glob( "db/ops/*.cpp" ) serverOnlyFiles += [ "db/dbcommands.cpp" , "db/dbcommands_admin.cpp" ] serverOnlyFiles += Glob( "db/commands/*.cpp" ) coreServerFiles += Glob( "db/stats/*.cpp" ) serverOnlyFiles += [ "db/driverHelpers.cpp" ] +snappyFiles = ["third_party/snappy/snappy.cc", "third_party/snappy/snappy-sinksource.cc"] + scriptingFiles = [ "scripting/engine.cpp" , "scripting/utils.cpp" , "scripting/bench.cpp" ] if usesm: @@ -474,7 +480,7 @@ if "darwin" == os.sys.platform: env.Append( CPPPATH=filterExists(["/sw/include" , "/opt/local/include"]) ) env.Append( LIBPATH=filterExists(["/sw/lib/", "/opt/local/lib"]) ) -elif "linux2" == os.sys.platform: +elif "linux2" == os.sys.platform or "linux3" == os.sys.platform: linux = True platform = "linux" @@ -519,7 +525,7 @@ elif "win32" == os.sys.platform: #if force64: # release = True - if has_option( "osnew" ): + if has_option( "win2008plus" ): env.Append( CPPDEFINES=[ "MONGO_USE_SRW_ON_WINDOWS" ] ) for pathdir in env['ENV']['PATH'].split(os.pathsep): @@ -689,6 +695,7 @@ if nix: if not has_option('clang'): env.Append( CPPFLAGS=" -fno-builtin-memcmp " ) # glibc's memcmp is faster than gcc's + env.Append( CPPDEFINES="_FILE_OFFSET_BITS=64" ) env.Append( CXXFLAGS=" -Wnon-virtual-dtor " ) env.Append( LINKFLAGS=" -fPIC -pthread -rdynamic" ) env.Append( LIBS=[] ) @@ -704,7 +711,7 @@ if nix: env.Append( CPPFLAGS=" -O0 -fstack-protector " ); env['ENV']['GLIBCXX_FORCE_NEW'] = 1; # play nice with valgrind else: - env.Append( CPPFLAGS=" -O3" ) + env.Append( CPPFLAGS=" -O3 " ) #env.Append( CPPFLAGS=" -fprofile-generate" ) #env.Append( LINKFLAGS=" -fprofile-generate" ) # then: @@ -752,6 +759,10 @@ if "uname" in dir(os): if hacks is not None: hacks.insert( env , { "linux64" : linux64 } ) +if has_option( "ssl" ): + env.Append( CPPDEFINES=["MONGO_SSL"] ) + env.Append( LIBS=["ssl"] ) + try: umask = os.umask(022) except OSError: @@ -1107,6 +1118,12 @@ def checkErrorCodes(): checkErrorCodes() +snappyEnv = env.Clone() +if not windows: + snappyEnv.Append(CPPFLAGS=" -Wno-sign-compare -Wno-unused-function ") #snappy doesn't compile cleanly +serverOnlyFiles += [snappyEnv.Object(f) for f in snappyFiles] + + # main db target mongodOnlyFiles = [ "db/db.cpp", "db/compact.cpp" ] if windows: diff --git a/bson/bson-inl.h b/bson/bson-inl.h index 54431549852..b86d66784ed 100644 --- a/bson/bson-inl.h +++ b/bson/bson-inl.h @@ -172,7 +172,7 @@ dodouble: } inline BSONObj BSONElement::embeddedObjectUserCheck() const { - if ( isABSONObj() ) + if ( MONGO_likely(isABSONObj()) ) return BSONObj(value()); stringstream ss; ss << "invalid parameter: expected an object (" << fieldName() << ")"; diff --git a/bson/bsonobj.h b/bson/bsonobj.h index c65f1268cc4..b3258a2c1d7 100644 --- a/bson/bsonobj.h +++ b/bson/bsonobj.h @@ -308,7 +308,7 @@ namespace mongo { /** This is "shallow equality" -- ints and doubles won't match. for a deep equality test use woCompare (which is slower). */ - bool shallowEqual(const BSONObj& r) const { + bool binaryEqual(const BSONObj& r) const { int os = objsize(); if ( os == r.objsize() ) { return (os == 0 || memcmp(objdata(),r.objdata(),os)==0); diff --git a/bson/bsonobjbuilder.h b/bson/bsonobjbuilder.h index 7d6965dd7fc..f61d45879f3 100644 --- a/bson/bsonobjbuilder.h +++ b/bson/bsonobjbuilder.h @@ -28,10 +28,10 @@ #include "bsonobj.h" #include "bsonmisc.h" -using namespace std; - namespace mongo { + using namespace std; + #if defined(_WIN32) // warning: 'this' : used in base member initializer list #pragma warning( disable : 4355 ) diff --git a/bson/bsonobjiterator.h b/bson/bsonobjiterator.h index 0d2344e002e..39ae24d9b86 100644 --- a/bson/bsonobjiterator.h +++ b/bson/bsonobjiterator.h @@ -37,7 +37,7 @@ namespace mongo { */ BSONObjIterator(const BSONObj& jso) { int sz = jso.objsize(); - if ( sz == 0 ) { + if ( MONGO_unlikely(sz == 0) ) { _pos = _theend = 0; return; } diff --git a/bson/inline_decls.h b/bson/inline_decls.h index 433a67010cb..30da9b4560d 100644 --- a/bson/inline_decls.h +++ b/bson/inline_decls.h @@ -31,24 +31,38 @@ #endif +namespace mongo { /* Note: do not clutter code with these -- ONLY use in hot spots / significant loops. */ #if !defined(__GNUC__) -// branch prediction. indicate we expect to enter the if statement body -# define MONGOIF(x) if( (x) ) +// branch prediction. indicate we expect to be true +# define MONGO_likely(x) ((bool)(x)) -// branch prediction. indicate we expect to not enter the if statement body -# define MONGO_IF(x) if( (x) ) +// branch prediction. indicate we expect to be false +# define MONGO_unlikely(x) ((bool)(x)) -// prefetch data from memory -# define MONGOPREFETCH(x) { /*just check we compile:*/ assert(sizeof(*x)); } +# if defined(_WIN32) + // prefetch data from memory + inline void prefetch(const void *p) { +#if defined(_MM_HINT_T0) + _mm_prefetch((char *) p, _MM_HINT_T0); +#endif + } +#else + inline void prefetch(void *p) { } +#endif #else -# define MONGOIF(x) if( __builtin_expect((x), 1) ) -# define MONGO_IF(x) if( __builtin_expect((x), 0) ) -# define MONGOPREFETCH(x) { /*just check we compile:*/ assert(sizeof(*x)); } +# define MONGO_likely(x) ( __builtin_expect((bool)(x), 1) ) +# define MONGO_unlikely(x) ( __builtin_expect((bool)(x), 0) ) + + inline void prefetch(void *p) { + __builtin_prefetch(p); + } #endif + +} diff --git a/bson/stringdata.h b/bson/stringdata.h index c4919e82d97..352dc51813f 100644 --- a/bson/stringdata.h +++ b/bson/stringdata.h @@ -60,7 +60,7 @@ namespace mongo { : _data(&val[0]), _size(N-1) {} // accessors - const char* const data() const { return _data; } + const char* data() const { return _data; } const unsigned size() const { return _size; } private: diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py index ce1b3e465d0..dec1030ddad 100755 --- a/buildscripts/errorcodes.py +++ b/buildscripts/errorcodes.py @@ -32,9 +32,9 @@ def assignErrorCodes(): codes = [] def readErrorCodes( callback, replaceZero = False ): - ps = [ re.compile( "(([umsg]asser(t|ted))) *\( *(\d+)" ) , - re.compile( "((User|Msg|MsgAssertion)Exceptio(n))\( *(\d+)" ) , - re.compile( "(((verify))) *\( *(\d+)" ) + ps = [ re.compile( "(([umsg]asser(t|ted))) *\(( *)(\d+)" ) , + re.compile( "((User|Msg|MsgAssertion)Exceptio(n))\(( *)(\d+)" ) , + re.compile( "(((verify))) *\(( *)(\d+)" ) ] for x in utils.getAllSourceFiles(): @@ -52,7 +52,8 @@ def readErrorCodes( callback, replaceZero = False ): m = m.groups() start = m[0] - code = m[3] + spaces = m[3] + code = m[4] if code == '0' and replaceZero : code = getNextCode( lastCodes ) lastCodes.append( code ) @@ -65,7 +66,7 @@ def readErrorCodes( callback, replaceZero = False ): codes.append( ( x , lineNum , line , code ) ) callback( x , lineNum , line , code ) - return start + "( " + code + return start + "(" + spaces + code line = re.sub( p, repl, line ) diff --git a/buildscripts/smoke.py b/buildscripts/smoke.py index 128c01cc287..c46b5d1879d 100755 --- a/buildscripts/smoke.py +++ b/buildscripts/smoke.py @@ -110,7 +110,7 @@ class mongod(object): sock.connect(("localhost", int(port))) sock.close() - def did_mongod_start(self, port=mongod_port, timeout=90): + def did_mongod_start(self, port=mongod_port, timeout=300): while timeout > 0: time.sleep(1) try: @@ -119,6 +119,7 @@ class mongod(object): except Exception,e: print >> sys.stderr, e timeout = timeout - 1 + print >> sys.stderr, "timeout starting mongod" return False def start(self): @@ -148,6 +149,10 @@ class mongod(object): argv += ["--master", "--oplogSize", "256"] if self.slave: argv += ['--slave', '--source', 'localhost:' + str(srcport)] + if self.kwargs.get('no_journal'): + argv += ['--nojournal'] + if self.kwargs.get('no_preallocj'): + argv += ['--nopreallocj'] print "running " + " ".join(argv) self.proc = Popen(argv) if not self.did_mongod_start(self.port): @@ -280,6 +285,19 @@ def runTest(test): t1 = time.time() # FIXME: we don't handle the case where the subprocess # hangs... that's bad. + if argv[0].endswith( 'mongo' ) and not '--eval' in argv : + argv = argv + [ '--eval', 'TestData = new Object();' + + 'TestData.testPath = "' + path + '";' + + 'TestData.testFile = "' + os.path.basename( path ) + '";' + + 'TestData.testName = "' + re.sub( ".js$", "", os.path.basename( path ) ) + '";' + + 'TestData.noJournal = ' + ( 'true' if no_journal else 'false' ) + ";" + + 'TestData.noJournalPrealloc = ' + ( 'true' if no_preallocj else 'false' ) + ";" ] + + if argv[0].endswith( 'test' ) and no_preallocj : + argv = argv + [ '--nopreallocj' ] + + + print argv r = call(argv, cwd=test_path) t2 = time.time() print " " + str((t2 - t1) * 1000) + "ms" @@ -301,7 +319,7 @@ def run_tests(tests): # The reason we use with is so that we get __exit__ semantics - with mongod(small_oplog=small_oplog) as master: + with mongod(small_oplog=small_oplog,no_journal=no_journal,no_preallocj=no_preallocj) as master: with mongod(slave=True) if small_oplog else Nothing() as slave: if small_oplog: master.wait_for_repl() @@ -421,7 +439,7 @@ def add_exe(e): return e def main(): - global mongod_executable, mongod_port, shell_executable, continue_on_failure, small_oplog, smoke_db_prefix, test_path + global mongod_executable, mongod_port, shell_executable, continue_on_failure, small_oplog, no_journal, no_preallocj, smoke_db_prefix, test_path parser = OptionParser(usage="usage: smoke.py [OPTIONS] ARGS*") parser.add_option('--mode', dest='mode', default='suite', help='If "files", ARGS are filenames; if "suite", ARGS are sets of tests (%default)') @@ -447,6 +465,12 @@ def main(): parser.add_option('--small-oplog', dest='small_oplog', default=False, action="store_true", help='Run tests with master/slave replication & use a small oplog') + parser.add_option('--nojournal', dest='no_journal', default=False, + action="store_true", + help='Do not turn on journaling in tests') + parser.add_option('--nopreallocj', dest='no_preallocj', default=False, + action="store_true", + help='Do not preallocate journal files in tests') global tests (options, tests) = parser.parse_args() @@ -467,6 +491,8 @@ def main(): continue_on_failure = options.continue_on_failure smoke_db_prefix = options.smoke_db_prefix small_oplog = options.small_oplog + no_journal = options.no_journal + no_preallocj = options.no_preallocj if options.File: if options.File == '-': diff --git a/client/connpool.cpp b/client/connpool.cpp index e94a78d1c45..2d7c37bfbda 100644 --- a/client/connpool.cpp +++ b/client/connpool.cpp @@ -238,13 +238,16 @@ namespace mongo { } void DBConnectionPool::appendInfo( BSONObjBuilder& b ) { - BSONObjBuilder bb( b.subobjStart( "hosts" ) ); + int avail = 0; long long created = 0; map<ConnectionString::ConnectionType,long long> createdByType; + set<string> replicaSets; + + BSONObjBuilder bb( b.subobjStart( "hosts" ) ); { scoped_lock lk( _mutex ); for ( PoolMap::iterator i=_pools.begin(); i!=_pools.end(); ++i ) { @@ -263,9 +266,33 @@ namespace mongo { long long& x = createdByType[i->second.type()]; x += i->second.numCreated(); + + { + string setName = i->first.ident; + if ( setName.find( "/" ) != string::npos ) { + setName = setName.substr( 0 , setName.find( "/" ) ); + replicaSets.insert( setName ); + } + } } } bb.done(); + + + BSONObjBuilder setBuilder( b.subobjStart( "replicaSets" ) ); + for ( set<string>::iterator i=replicaSets.begin(); i!=replicaSets.end(); ++i ) { + string rs = *i; + ReplicaSetMonitorPtr m = ReplicaSetMonitor::get( rs ); + if ( ! m ) { + warning() << "no monitor for set: " << rs << endl; + continue; + } + + BSONObjBuilder temp( setBuilder.subobjStart( rs ) ); + m->appendInfo( temp ); + temp.done(); + } + setBuilder.done(); { BSONObjBuilder temp( bb.subobjStart( "createdByType" ) ); @@ -280,20 +307,36 @@ namespace mongo { } bool DBConnectionPool::serverNameCompare::operator()( const string& a , const string& b ) const{ - string ap = str::before( a , "/" ); - string bp = str::before( b , "/" ); - - return ap < bp; + const char* ap = a.c_str(); + const char* bp = b.c_str(); + + while (true){ + if (*ap == '\0' || *ap == '/'){ + if (*bp == '\0' || *bp == '/') + return false; // equal strings + else + return true; // a is shorter + } + + if (*bp == '\0' || *bp == '/') + return false; // b is shorter + + if ( *ap < *bp) + return true; + else if (*ap > *bp) + return false; + + ++ap; + ++bp; + } + assert(false); } bool DBConnectionPool::poolKeyCompare::operator()( const PoolKey& a , const PoolKey& b ) const { - string ap = str::before( a.ident , "/" ); - string bp = str::before( b.ident , "/" ); - - if ( ap < bp ) + if (DBConnectionPool::serverNameCompare()( a.ident , b.ident )) return true; - if ( ap > bp ) + if (DBConnectionPool::serverNameCompare()( b.ident , a.ident )) return false; return a.timeout < b.timeout; @@ -366,7 +409,7 @@ namespace mongo { PoolFlushCmd() : Command( "connPoolSync" , false , "connpoolsync" ) {} virtual void help( stringstream &help ) const { help<<"internal"; } virtual LockType locktype() const { return NONE; } - virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool) { + virtual bool run(const string&, mongo::BSONObj&, int, std::string&, mongo::BSONObjBuilder& result, bool) { pool.flush(); return true; } @@ -381,7 +424,7 @@ namespace mongo { PoolStats() : Command( "connPoolStats" ) {} virtual void help( stringstream &help ) const { help<<"stats about connection pool"; } virtual LockType locktype() const { return NONE; } - virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool) { + virtual bool run(const string&, mongo::BSONObj&, int, std::string&, mongo::BSONObjBuilder& result, bool) { pool.appendInfo( result ); result.append( "numDBClientConnection" , DBClientConnection::getNumConnections() ); result.append( "numAScopedConnection" , AScopedConnection::getNumConnections() ); diff --git a/client/dbclient.cpp b/client/dbclient.cpp index 9c907b01a00..dadf7e4f38a 100644 --- a/client/dbclient.cpp +++ b/client/dbclient.cpp @@ -80,7 +80,7 @@ namespace mongo { case PAIR: case SET: { - DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers ); + DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers , socketTimeout ); if( ! set->connect() ) { delete set; errmsg = "connect failed to set "; @@ -589,6 +589,13 @@ namespace mongo { _failed = true; return false; } + +#ifdef MONGO_SSL + if ( cmdLine.sslOnNormalPorts ) { + p->secure( sslManager() ); + } +#endif + return true; } @@ -997,6 +1004,19 @@ namespace mongo { say(m); } +#ifdef MONGO_SSL + SSLManager* DBClientConnection::sslManager() { + if ( _sslManager ) + return _sslManager; + + SSLManager* s = new SSLManager(true); + _sslManager = s; + return s; + } + + SSLManager* DBClientConnection::_sslManager = 0; +#endif + AtomicUInt DBClientConnection::_numConnections; bool DBClientConnection::_lazyKillCursor = true; diff --git a/client/dbclient.h b/client/dbclient.h index f48f279e9f5..2b4bb857e2d 100644 --- a/client/dbclient.h +++ b/client/dbclient.h @@ -110,7 +110,7 @@ namespace mongo { */ enum InsertOptions { /** With muli-insert keep processing inserts if one fails */ - InsertOption_KeepGoing = 1 << 0 + InsertOption_ContinueOnError = 1 << 0 }; class DBClientBase; @@ -353,6 +353,7 @@ namespace mongo { virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL ) { if( retry ) *retry = false; if( targetHost ) *targetHost = ""; } + virtual bool lazySupported() const = 0; }; /** @@ -921,13 +922,15 @@ namespace mongo { void setSoTimeout(double to) { _so_timeout = to; } double getSoTimeout() const { return _so_timeout; } + virtual bool lazySupported() const { return true; } + static int getNumConnections() { return _numConnections; } static void setLazyKillCursor( bool lazy ) { _lazyKillCursor = lazy; } static bool getLazyKillCursor() { return _lazyKillCursor; } - + protected: friend class SyncClusterConnection; virtual void sayPiggyBack( Message &toSend ); @@ -951,6 +954,11 @@ namespace mongo { static AtomicUInt _numConnections; static bool _lazyKillCursor; // lazy means we piggy back kill cursors on next op + +#ifdef MONGO_SSL + static SSLManager* sslManager(); + static SSLManager* _sslManager; +#endif }; /** pings server to check if it's up diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp index bd108d75ba4..2cab1f7b0d5 100644 --- a/client/dbclient_rs.cpp +++ b/client/dbclient_rs.cpp @@ -54,9 +54,9 @@ namespace mongo { void run() { log() << "starting" << endl; while ( ! inShutdown() ) { - sleepsecs( 20 ); + sleepsecs( 10 ); try { - ReplicaSetMonitor::checkAll(); + ReplicaSetMonitor::checkAll( true ); } catch ( std::exception& e ) { error() << "check failed: " << e.what() << endl; @@ -99,17 +99,14 @@ namespace mongo { } _nodes.push_back( Node( servers[i] , conn.release() ) ); - + + int myLoc = _nodes.size() - 1; string maybePrimary; - if (_checkConnection( _nodes[_nodes.size()-1].conn , maybePrimary, false)) { - break; - } + _checkConnection( _nodes[myLoc].conn.get() , maybePrimary, false, myLoc ); } } ReplicaSetMonitor::~ReplicaSetMonitor() { - for ( unsigned i=0; i<_nodes.size(); i++ ) - delete _nodes[i].conn; _nodes.clear(); _master = -1; } @@ -125,7 +122,16 @@ namespace mongo { return m; } - void ReplicaSetMonitor::checkAll() { + ReplicaSetMonitorPtr ReplicaSetMonitor::get( const string& name ) { + scoped_lock lk( _setsLock ); + map<string,ReplicaSetMonitorPtr>::const_iterator i = _sets.find( name ); + if ( i == _sets.end() ) + return ReplicaSetMonitorPtr(); + return i->second; + } + + + void ReplicaSetMonitor::checkAll( bool checkAllSecondaries ) { set<string> seen; while ( true ) { @@ -146,7 +152,7 @@ namespace mongo { if ( ! m ) break; - m->check(); + m->check( checkAllSecondaries ); } @@ -202,7 +208,7 @@ namespace mongo { return _nodes[_master].addr; } - _check(); + _check( false ); scoped_lock lk( _lock ); uassert( 10009 , str::stream() << "ReplicaSetMonitor no master found for set: " << _name , _master >= 0 ); @@ -210,34 +216,70 @@ namespace mongo { } HostAndPort ReplicaSetMonitor::getSlave( const HostAndPort& prev ) { - // make sure its valid - if ( prev.port() > 0 ) { + // make sure its valid + + bool wasFound = false; + + // This is always true, since checked in port() + assert( prev.port() >= 0 ); + if( prev.host().size() ){ scoped_lock lk( _lock ); for ( unsigned i=0; i<_nodes.size(); i++ ) { if ( prev != _nodes[i].addr ) continue; - if ( _nodes[i].ok ) + wasFound = true; + + if ( _nodes[i].okForSecondaryQueries() ) return prev; + break; } } + if( prev.host().size() ){ + if( wasFound ){ LOG(1) << "slave '" << prev << "' is no longer ok to use" << endl; } + else{ LOG(1) << "slave '" << prev << "' was not found in the replica set" << endl; } + } + else LOG(1) << "slave '" << prev << "' is not initialized or invalid" << endl; + return getSlave(); } HostAndPort ReplicaSetMonitor::getSlave() { - scoped_lock lk( _lock ); - for ( unsigned i=0; i<_nodes.size(); i++ ) { - _nextSlave = ( _nextSlave + 1 ) % _nodes.size(); - if ( _nextSlave == _master ) - continue; - if ( _nodes[ _nextSlave ].ok ) - return _nodes[ _nextSlave ].addr; + LOG(2) << "selecting new slave from replica set " << getServerAddress() << endl; + + // Logic is to retry three times for any secondary node, if we can't find any secondary, we'll take + // any "ok" node + // TODO: Could this query hidden nodes? + const int MAX = 3; + for ( int xxx=0; xxx<MAX; xxx++ ) { + + { + scoped_lock lk( _lock ); + + unsigned i = 0; + for ( ; i<_nodes.size(); i++ ) { + _nextSlave = ( _nextSlave + 1 ) % _nodes.size(); + if ( _nextSlave == _master ){ + LOG(2) << "not selecting " << _nodes[_nextSlave] << " as it is the current master" << endl; + continue; + } + if ( _nodes[ _nextSlave ].okForSecondaryQueries() || ( _nodes[ _nextSlave ].ok && ( xxx + 1 ) >= MAX ) ) + return _nodes[ _nextSlave ].addr; + + LOG(2) << "not selecting " << _nodes[_nextSlave] << " as it is not ok to use" << endl; + } + + } + + check(false); } + + LOG(2) << "no suitable slave nodes found, returning default node " << _nodes[ 0 ] << endl; - return _nodes[ 0 ].addr; + return _nodes[0].addr; } /** @@ -266,7 +308,7 @@ namespace mongo { string host = member["name"].String(); int m = -1; - if ((m = _find(host)) <= 0) { + if ((m = _find(host)) < 0) { continue; } @@ -309,16 +351,34 @@ namespace mongo { - bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose ) { + bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ) { scoped_lock lk( _checkConnectionLock ); bool isMaster = false; bool changed = false; try { + Timer t; BSONObj o; c->isMaster(isMaster, &o); + + if ( o["setName"].type() != String || o["setName"].String() != _name ) { + warning() << "node: " << c->getServerAddress() << " isn't a part of set: " << _name + << " ismaster: " << o << endl; + if ( nodesOffset >= 0 ) + _nodes[nodesOffset].ok = false; + return false; + } - log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl; + if ( nodesOffset >= 0 ) { + _nodes[nodesOffset].pingTimeMillis = t.millis(); + _nodes[nodesOffset].hidden = o["hidden"].trueValue(); + _nodes[nodesOffset].secondary = o["secondary"].trueValue(); + _nodes[nodesOffset].ismaster = o["ismaster"].trueValue(); + + _nodes[nodesOffset].lastIsMaster = o.copy(); + } + log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl; + // add other nodes if ( o["hosts"].type() == Array ) { if ( o["primary"].type() == String ) @@ -329,11 +389,14 @@ namespace mongo { if (o.hasField("passives") && o["passives"].type() == Array) { _checkHosts(o["passives"].Obj(), changed); } - + _checkStatus(c); + + } catch ( std::exception& e ) { log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: caught exception " << c->toString() << ' ' << e.what() << endl; + _nodes[nodesOffset].ok = false; } if ( changed && _hook ) @@ -342,24 +405,28 @@ namespace mongo { return isMaster; } - void ReplicaSetMonitor::_check() { + void ReplicaSetMonitor::_check( bool checkAllSecondaries ) { bool triedQuickCheck = false; LOG(1) << "_check : " << getServerAddress() << endl; + int newMaster = -1; + for ( int retry = 0; retry < 2; retry++ ) { for ( unsigned i=0; i<_nodes.size(); i++ ) { - DBClientConnection * c; + shared_ptr<DBClientConnection> c; { scoped_lock lk( _lock ); c = _nodes[i].conn; } string maybePrimary; - if ( _checkConnection( c , maybePrimary , retry ) ) { + if ( _checkConnection( c.get() , maybePrimary , retry , i ) ) { _master = i; - return; + newMaster = i; + if ( ! checkAllSecondaries ) + return; } if ( ! triedQuickCheck && maybePrimary.size() ) { @@ -367,36 +434,44 @@ namespace mongo { if ( x >= 0 ) { triedQuickCheck = true; string dummy; - DBClientConnection * testConn; + shared_ptr<DBClientConnection> testConn; { scoped_lock lk( _lock ); testConn = _nodes[x].conn; } - if ( _checkConnection( testConn , dummy , false ) ) { + if ( _checkConnection( testConn.get() , dummy , false , x ) ) { _master = x; - return; + newMaster = x; + if ( ! checkAllSecondaries ) + return; } } } } + + if ( newMaster >= 0 ) + return; + sleepsecs(1); } } - void ReplicaSetMonitor::check() { + void ReplicaSetMonitor::check( bool checkAllSecondaries ) { // first see if the current master is fine if ( _master >= 0 ) { string temp; - if ( _checkConnection( _nodes[_master].conn , temp , false ) ) { - // current master is fine, so we're done - return; + if ( _checkConnection( _nodes[_master].conn.get() , temp , false , _master ) ) { + if ( ! checkAllSecondaries ) { + // current master is fine, so we're done + return; + } } } // we either have no master, or the current is dead - _check(); + _check( checkAllSecondaries ); } int ReplicaSetMonitor::_find( const string& server ) const { @@ -419,7 +494,26 @@ namespace mongo { return i; return -1; } - + + void ReplicaSetMonitor::appendInfo( BSONObjBuilder& b ) const { + scoped_lock lk( _lock ); + BSONArrayBuilder hosts( b.subarrayStart( "hosts" ) ); + for ( unsigned i=0; i<_nodes.size(); i++ ) { + hosts.append( BSON( "addr" << _nodes[i].addr << + // "lastIsMaster" << _nodes[i].lastIsMaster << // this is a potential race, so only used when debugging + "ok" << _nodes[i].ok << + "ismaster" << _nodes[i].ismaster << + "hidden" << _nodes[i].hidden << + "secondary" << _nodes[i].secondary << + "pingTimeMillis" << _nodes[i].pingTimeMillis ) ); + + } + hosts.done(); + + b.append( "master" , _master ); + b.append( "nextSlave" , _nextSlave ); + } + mongo::mutex ReplicaSetMonitor::_setsLock( "ReplicaSetMonitor" ); map<string,ReplicaSetMonitorPtr> ReplicaSetMonitor::_sets; @@ -428,8 +522,9 @@ namespace mongo { // ----- DBClientReplicaSet --------- // -------------------------------- - DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers ) - : _monitor( ReplicaSetMonitor::get( name , servers ) ) { + DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers, double so_timeout ) + : _monitor( ReplicaSetMonitor::get( name , servers ) ), + _so_timeout( so_timeout ) { } DBClientReplicaSet::~DBClientReplicaSet() { @@ -446,7 +541,7 @@ namespace mongo { } _masterHost = _monitor->getMaster(); - _master.reset( new DBClientConnection( true , this ) ); + _master.reset( new DBClientConnection( true , this , _so_timeout ) ); string errmsg; if ( ! _master->connect( _masterHost , errmsg ) ) { _monitor->notifyFailure( _masterHost ); @@ -463,10 +558,13 @@ namespace mongo { if ( ! _slave->isFailed() ) return _slave.get(); _monitor->notifySlaveFailure( _slaveHost ); + _slaveHost = _monitor->getSlave(); + } + else { + _slaveHost = h; } - - _slaveHost = _monitor->getSlave(); - _slave.reset( new DBClientConnection( true , this ) ); + + _slave.reset( new DBClientConnection( true , this , _so_timeout ) ); _slave->connect( _slaveHost ); _auth( _slave.get() ); return _slave.get(); diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h index 4a0a832d9ca..b6948a05b80 100644 --- a/client/dbclient_rs.h +++ b/client/dbclient_rs.h @@ -43,10 +43,16 @@ namespace mongo { static ReplicaSetMonitorPtr get( const string& name , const vector<HostAndPort>& servers ); /** + * gets a cached Monitor per name or will return none if it doesn't exist + */ + static ReplicaSetMonitorPtr get( const string& name ); + + + /** * checks all sets for current master and new secondaries * usually only called from a BackgroundJob */ - static void checkAll(); + static void checkAll( bool checkAllSecondaries ); /** * this is called whenever the config of any repclia set changes @@ -81,13 +87,15 @@ namespace mongo { /** * checks for current master and new secondaries */ - void check(); + void check( bool checkAllSecondaries ); string getName() const { return _name; } string getServerAddress() const; bool contains( const string& server ) const; + + void appendInfo( BSONObjBuilder& b ) const; private: /** @@ -98,7 +106,7 @@ namespace mongo { */ ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers ); - void _check(); + void _check( bool checkAllSecondaries ); /** * Use replSetGetStatus command to make sure hosts in host list are up @@ -119,9 +127,10 @@ namespace mongo { * @param c the connection to check * @param maybePrimary OUT * @param verbose + * @param nodesOffset - offset into _nodes array, -1 for not in it * @return if the connection is good */ - bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose ); + bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ); int _find( const string& server ) const ; int _find_inlock( const string& server ) const ; @@ -132,14 +141,44 @@ namespace mongo { string _name; struct Node { - Node( const HostAndPort& a , DBClientConnection* c ) : addr( a ) , conn(c) , ok(true) {} + Node( const HostAndPort& a , DBClientConnection* c ) + : addr( a ) , conn(c) , ok(true) , + ismaster(false), secondary( false ) , hidden( false ) , pingTimeMillis(0) { + } + + bool okForSecondaryQueries() const { + return ok && secondary && ! hidden; + } + + BSONObj toBSON() const { + return BSON( "addr" << addr.toString() << + "isMaster" << ismaster << + "secondary" << secondary << + "hidden" << hidden << + "ok" << ok ); + } + + string toString() const { + return toBSON().toString(); + } + HostAndPort addr; - DBClientConnection* conn; + shared_ptr<DBClientConnection> conn; // if this node is in a failure state // used for slave routing // this is too simple, should make it better bool ok; + + // as reported by ismaster + BSONObj lastIsMaster; + + bool ismaster; + bool secondary; + bool hidden; + + int pingTimeMillis; + }; /** @@ -168,7 +207,7 @@ namespace mongo { public: /** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */ - DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers ); + DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers, double so_timeout=0 ); virtual ~DBClientReplicaSet(); /** Returns false if nomember of the set were reachable, or neither is @@ -228,16 +267,14 @@ namespace mongo { // ----- informational ---- - /** - * timeout not supported in DBClientReplicaSet yet - */ - double getSoTimeout() const { return 0; } + double getSoTimeout() const { return _so_timeout; } string toString() { return getServerAddress(); } string getServerAddress() const { return _monitor->getServerAddress(); } virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; } + virtual bool lazySupported() const { return true; } // ---- low level ------ @@ -265,6 +302,8 @@ namespace mongo { HostAndPort _slaveHost; scoped_ptr<DBClientConnection> _slave; + + double _so_timeout; /** * for storing authentication info diff --git a/client/dbclientcursor.cpp b/client/dbclientcursor.cpp index f1685637311..5db360ef2c7 100644 --- a/client/dbclientcursor.cpp +++ b/client/dbclientcursor.cpp @@ -70,6 +70,7 @@ namespace mongo { } void DBClientCursor::initLazy( bool isRetry ) { + verify( 15875 , _client->lazySupported() ); Message toSend; _assembleInit( toSend ); _client->say( toSend, isRetry ); diff --git a/client/distlock.cpp b/client/distlock.cpp index cd516494cf9..cb711590524 100644 --- a/client/distlock.cpp +++ b/client/distlock.cpp @@ -634,7 +634,9 @@ namespace mongo { // TODO: Clean up all the extra code to exit this method, probably with a refactor if ( ! errMsg.empty() || ! err["n"].type() || err["n"].numberInt() < 1 ) { ( errMsg.empty() ? log( logLvl - 1 ) : warning() ) << "Could not re-enter lock '" << lockName << "' " - << ( !errMsg.empty() ? causedBy(errMsg) : string("(not sure lock is held)") ) << endl; + << ( !errMsg.empty() ? causedBy(errMsg) : string("(not sure lock is held)") ) + << " gle: " << err + << endl; *other = o; other->getOwned(); conn.done(); return false; } diff --git a/client/distlock_test.cpp b/client/distlock_test.cpp index ab5183c7069..42a1c48cedb 100644 --- a/client/distlock_test.cpp +++ b/client/distlock_test.cpp @@ -86,7 +86,7 @@ namespace mongo { } } - bool run(const string&, BSONObj& cmdObj, string& errmsg, + bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { Timer t; DistributedLock lk(ConnectionString(cmdObj["host"].String(), @@ -288,7 +288,7 @@ namespace mongo { return; } - bool run(const string&, BSONObj& cmdObj, string& errmsg, + bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { Timer t; @@ -417,7 +417,7 @@ namespace mongo { return NONE; } - bool run(const string&, BSONObj& cmdObj, string& errmsg, + bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { long long skew = (long long) number_field(cmdObj, "skew", 0); diff --git a/client/examples/httpClientTest.cpp b/client/examples/httpClientTest.cpp index fab3251ec49..4055d4492d5 100644 --- a/client/examples/httpClientTest.cpp +++ b/client/examples/httpClientTest.cpp @@ -22,20 +22,7 @@ using namespace mongo; -int main( int argc, const char **argv ) { - - int port = 27017; - if ( argc != 1 ) { - if ( argc != 3 ) - throw -12; - port = atoi( argv[ 2 ] ); - } - port += 1000; - - stringstream ss; - ss << "http://localhost:" << port << "/"; - string url = ss.str(); - +void play( string url ) { cout << "[" << url << "]" << endl; HttpClient c; @@ -45,8 +32,27 @@ int main( int argc, const char **argv ) { HttpClient::Headers h = r.getHeaders(); MONGO_assert( h["Content-Type"].find( "text/html" ) == 0 ); - cout << "Headers" << endl; + cout << "\tHeaders" << endl; for ( HttpClient::Headers::iterator i = h.begin() ; i != h.end(); ++i ) { - cout << i->first << "\t" << i->second << endl; + cout << "\t\t" << i->first << "\t" << i->second << endl; } + +} + +int main( int argc, const char **argv ) { + + int port = 27017; + if ( argc != 1 ) { + if ( argc != 3 ) + throw -12; + port = atoi( argv[ 2 ] ); + } + port += 1000; + + play( str::stream() << "http://localhost:" << port << "/" ); + +#ifdef MONGO_SSL + play( "https://www.10gen.com/" ); +#endif + } diff --git a/client/examples/rs.cpp b/client/examples/rs.cpp index 65fff8d2948..3307d87b56b 100644 --- a/client/examples/rs.cpp +++ b/client/examples/rs.cpp @@ -57,14 +57,19 @@ int main( int argc , const char ** argv ) { unsigned nThreads = 1; bool print = false; + bool testTimeout = false; for ( int i=1; i<argc; i++ ) { if ( mongoutils::str::equals( "--threads" , argv[i] ) ) { nThreads = atoi( argv[++i] ); } - else if ( mongoutils::str::equals( "--print" , argv[1] ) ) { + else if ( mongoutils::str::equals( "--print" , argv[i] ) ) { print = true; } + // Run a special mode to demonstrate the DBClientReplicaSet so_timeout option. + else if ( mongoutils::str::equals( "--testTimeout" , argv[i] ) ) { + testTimeout = true; + } else { cerr << "unknown option: " << argv[i] << endl; return 1; @@ -79,7 +84,7 @@ int main( int argc , const char ** argv ) { return 1; } - DBClientReplicaSet * conn = (DBClientReplicaSet*)cs.connect( errmsg ); + DBClientReplicaSet * conn = dynamic_cast<DBClientReplicaSet*>(cs.connect( errmsg, testTimeout ? 10 : 0 )); if ( ! conn ) { cout << "error connecting: " << errmsg << endl; return 2; @@ -88,6 +93,17 @@ int main( int argc , const char ** argv ) { string collName = "test.rs1"; conn->dropCollection( collName ); + + if ( testTimeout ) { + conn->insert( collName, BSONObj() ); + try { + conn->count( collName, BSON( "$where" << "sleep(40000)" ) ); + } catch( DBException& ) { + return 0; + } + cout << "expected socket exception" << endl; + return 1; + } vector<boost::shared_ptr<boost::thread> > threads; for ( unsigned i=0; i<nThreads; i++ ) { diff --git a/client/parallel.cpp b/client/parallel.cpp index f157927703f..76b0168be22 100644 --- a/client/parallel.cpp +++ b/client/parallel.cpp @@ -410,6 +410,7 @@ namespace mongo { } } + // TODO: Merge with futures API? We do a lot of error checking here that would be useful elsewhere. void ParallelSortClusteredCursor::_init() { // log() << "Starting parallel search..." << endl; @@ -720,17 +721,23 @@ namespace mongo { // ---- Future ----- // ----------------- - Future::CommandResult::CommandResult( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn ) - :_server(server) ,_db(db) ,_cmd(cmd) ,_conn(conn) ,_done(false) + Future::CommandResult::CommandResult( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn ) + :_server(server) ,_db(db) , _options(options), _cmd(cmd) ,_conn(conn) ,_done(false) { try { if ( ! _conn ){ _connHolder.reset( new ScopedDbConnection( _server ) ); _conn = _connHolder->get(); } - - _cursor.reset( new DBClientCursor(_conn, _db + ".$cmd", _cmd, -1/*limit*/, 0, NULL, 0, 0)); - _cursor->initLazy(); + + if ( _conn->lazySupported() ) { + _cursor.reset( new DBClientCursor(_conn, _db + ".$cmd", _cmd, -1/*limit*/, 0, NULL, _options, 0)); + _cursor->initLazy(); + } + else { + _done = true; // we set _done first because even if there is an error we're done + _ok = _conn->runCommand( db , cmd , _res , options ); + } } catch ( std::exception& e ) { error() << "Future::spawnComand (part 1) exception: " << e.what() << endl; @@ -768,8 +775,8 @@ namespace mongo { return _ok; } - shared_ptr<Future::CommandResult> Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn ) { - shared_ptr<Future::CommandResult> res (new Future::CommandResult( server , db , cmd , conn )); + shared_ptr<Future::CommandResult> Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn ) { + shared_ptr<Future::CommandResult> res (new Future::CommandResult( server , db , cmd , options , conn )); return res; } diff --git a/client/parallel.h b/client/parallel.h index 332840edea1..869bff95a4a 100644 --- a/client/parallel.h +++ b/client/parallel.h @@ -280,10 +280,11 @@ namespace mongo { private: - CommandResult( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn ); + CommandResult( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn ); string _server; string _db; + int _options; BSONObj _cmd; DBClientBase * _conn; scoped_ptr<ScopedDbConnection> _connHolder; // used if not provided a connection @@ -304,7 +305,7 @@ namespace mongo { * @param cmd cmd to exec * @param conn optional connection to use. will use standard pooled if non-specified */ - static shared_ptr<CommandResult> spawnCommand( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn = 0 ); + static shared_ptr<CommandResult> spawnCommand( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn = 0 ); }; diff --git a/client/syncclusterconnection.h b/client/syncclusterconnection.h index edd458fe683..68dd338a408 100644 --- a/client/syncclusterconnection.h +++ b/client/syncclusterconnection.h @@ -96,6 +96,7 @@ namespace mongo { virtual bool auth(const string &dbname, const string &username, const string &password_text, string& errmsg, bool digestPassword); + virtual bool lazySupported() const { return false; } private: SyncClusterConnection( SyncClusterConnection& prev, double socketTimeout = 0 ); string _toString() const; diff --git a/db/btree.cpp b/db/btree.cpp index 232ac615470..e4753bef696 100644 --- a/db/btree.cpp +++ b/db/btree.cpp @@ -44,7 +44,7 @@ namespace mongo { } /** data check. like assert, but gives a reasonable error message to the user. */ -#define check(expr) _IF(!(expr)) { checkFailed(__LINE__); } +#define check(expr) if(!(expr) ) { checkFailed(__LINE__); } #define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this ); diff --git a/db/btree.h b/db/btree.h index 2e47d69a221..9ffa54cddc0 100644 --- a/db/btree.h +++ b/db/btree.h @@ -1071,7 +1071,7 @@ namespace mongo { * Our btrees may (rarely) have "unused" keys when items are deleted. * Skip past them. */ - virtual bool skipUnusedKeys( bool mayJump ) = 0; + virtual bool skipUnusedKeys() = 0; bool skipOutOfRangeKeysAndCheckEnd(); void skipAndCheck(); diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp index cd145ef861f..f39d5bb0535 100644 --- a/db/btreecursor.cpp +++ b/db/btreecursor.cpp @@ -68,7 +68,7 @@ namespace mongo { return !currKeyNode().prevChildBucket.isNull(); } - bool skipUnusedKeys( bool mayJump ) { + bool skipUnusedKeys() { int u = 0; while ( 1 ) { if ( !ok() ) @@ -80,9 +80,6 @@ namespace mongo { u++; //don't include unused keys in nscanned //++_nscanned; - if ( mayJump && ( u % 10 == 0 ) ) { - skipOutOfRangeKeysAndCheckEnd(); - } } if ( u > 10 ) OCCASIONALLY log() << "btree unused skipped:" << u << '\n'; @@ -114,13 +111,13 @@ namespace mongo { while( 1 ) { // if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) && // b->k(keyOfs).recordLoc == locAtKeyOfs ) { - if ( keyAt(keyOfs).shallowEqual(keyAtKeyOfs) ) { + if ( keyAt(keyOfs).binaryEqual(keyAtKeyOfs) ) { const _KeyNode& kn = keyNode(keyOfs); if( kn.recordLoc == locAtKeyOfs ) { if ( !kn.isUsed() ) { // we were deleted but still exist as an unused // marker key. advance. - skipUnusedKeys( false ); + skipUnusedKeys(); } return; } @@ -149,7 +146,7 @@ namespace mongo { bucket = _locate(keyAtKeyOfs, locAtKeyOfs); RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl; if ( ! bucket.isNull() ) - skipUnusedKeys( false ); + skipUnusedKeys(); } @@ -329,18 +326,24 @@ namespace mongo { if ( ok() ) { _nscanned = 1; } - skipUnusedKeys( false ); + skipUnusedKeys(); checkEnd(); } void BtreeCursor::skipAndCheck() { - skipUnusedKeys( true ); + int startNscanned = _nscanned; + skipUnusedKeys(); while( 1 ) { if ( !skipOutOfRangeKeysAndCheckEnd() ) { break; } - while( skipOutOfRangeKeysAndCheckEnd() ); - if ( !skipUnusedKeys( true ) ) { + do { + if ( _nscanned > startNscanned + 20 ) { + skipUnusedKeys(); + return; + } + } while( skipOutOfRangeKeysAndCheckEnd() ); + if ( !skipUnusedKeys() ) { break; } } @@ -395,7 +398,7 @@ namespace mongo { bucket = _advance(bucket, keyOfs, _direction, "BtreeCursor::advance"); if ( !_independentFieldRanges ) { - skipUnusedKeys( false ); + skipUnusedKeys(); checkEnd(); if ( ok() ) { ++_nscanned; diff --git a/db/client.cpp b/db/client.cpp index be5dba9ae56..bf3aead75a6 100644 --- a/db/client.cpp +++ b/db/client.cpp @@ -122,10 +122,13 @@ namespace mongo { error() << "Client::shutdown not called: " << _desc << endl; } - scoped_lock bl(clientsMutex); - if ( ! _shutdown ) - clients.erase(this); - delete _curOp; + if ( ! inShutdown() ) { + // we can't clean up safely once we're in shutdown + scoped_lock bl(clientsMutex); + if ( ! _shutdown ) + clients.erase(this); + delete _curOp; + } } bool Client::shutdown() { @@ -469,7 +472,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return false; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { Client& c = cc(); c.gotHandshake( cmdObj ); return 1; @@ -688,11 +691,14 @@ namespace mongo { #define OPDEBUG_APPEND_NUMBER(x) if( x ) b.append( #x , (x) ) #define OPDEBUG_APPEND_BOOL(x) if( x ) b.appendBool( #x , (x) ) - void OpDebug::append( BSONObjBuilder& b ) const { + void OpDebug::append( const CurOp& curop, BSONObjBuilder& b ) const { b.append( "op" , iscommand ? "command" : opToString( op ) ); b.append( "ns" , ns.toString() ); if ( ! query.isEmpty() ) b.append( iscommand ? "command" : "query" , query ); + else if ( ! iscommand && curop.haveQuery() ) + curop.appendQuery( b , "query" ); + if ( ! updateobj.isEmpty() ) b.append( "updateobj" , updateobj ); diff --git a/db/clientcursor.cpp b/db/clientcursor.cpp index 615616e7a7c..e803afd459c 100644 --- a/db/clientcursor.cpp +++ b/db/clientcursor.cpp @@ -447,16 +447,29 @@ namespace mongo { return rec; } - bool ClientCursor::yieldSometimes( RecordNeeds need ) { + bool ClientCursor::yieldSometimes( RecordNeeds need, bool *yielded ) { + if ( yielded ) { + *yielded = false; + } if ( ! _yieldSometimesTracker.ping() ) { Record* rec = _recordForYield( need ); - if ( rec ) + if ( rec ) { + if ( yielded ) { + *yielded = true; + } return yield( yieldSuggest() , rec ); + } return true; } int micros = yieldSuggest(); - return ( micros > 0 ) ? yield( micros , _recordForYield( need ) ) : true; + if ( micros > 0 ) { + if ( yielded ) { + *yielded = true; + } + return yield( micros , _recordForYield( need ) ); + } + return true; } void ClientCursor::staticYield( int micros , const StringData& ns , Record * rec ) { @@ -616,7 +629,7 @@ namespace mongo { help << " example: { cursorInfo : 1 }"; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { ClientCursor::appendStats( result ); return true; } diff --git a/db/clientcursor.h b/db/clientcursor.h index b3bd996c768..75c7da85cc6 100644 --- a/db/clientcursor.h +++ b/db/clientcursor.h @@ -186,9 +186,10 @@ namespace mongo { /** * @param needRecord whether or not the next record has to be read from disk for sure * if this is true, will yield of next record isn't in memory + * @param yielded true if a yield occurred, and potentially if a yield did not occur * @return same as yield() */ - bool yieldSometimes( RecordNeeds need ); + bool yieldSometimes( RecordNeeds need, bool *yielded = 0 ); static int yieldSuggest(); static void staticYield( int micros , const StringData& ns , Record * rec ); diff --git a/db/cloner.cpp b/db/cloner.cpp index 2a46ea22cb4..8956133daa3 100644 --- a/db/cloner.cpp +++ b/db/cloner.cpp @@ -460,7 +460,7 @@ namespace mongo { help << "{ clone : \"host13\" }"; } CmdClone() : Command("clone") { } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string from = cmdObj.getStringField("clone"); if ( from.empty() ) return false; @@ -486,7 +486,7 @@ namespace mongo { "Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there." ; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("from"); if ( fromhost.empty() ) { errmsg = "missing 'from' parameter"; @@ -538,7 +538,7 @@ namespace mongo { help << "get a nonce for subsequent copy db request from secure server\n"; help << "usage: {copydbgetnonce: 1, fromhost: <hostname>}"; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string fromhost = cmdObj.getStringField("fromhost"); if ( fromhost.empty() ) { /* copy from self */ @@ -579,7 +579,7 @@ namespace mongo { help << "copy a database from another host to this host\n"; help << "usage: {copydb: 1, fromhost: <hostname>, fromdb: <db>, todb: <db>[, slaveOk: <bool>, username: <username>, nonce: <nonce>, key: <key>]}"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { bool slaveOk = cmdObj["slaveOk"].trueValue(); string fromhost = cmdObj.getStringField("fromhost"); if ( fromhost.empty() ) { @@ -633,7 +633,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << " example: { renameCollection: foo.a, to: bar.b }"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); if ( source.empty() || target.empty() ) { @@ -671,6 +671,8 @@ namespace mongo { nsToDatabase( target.c_str(), to ); if ( strcmp( from, to ) == 0 ) { renameNamespace( source.c_str(), target.c_str() ); + // make sure we drop counters etc + Top::global.collectionDropped( source ); return true; } } diff --git a/db/cmdline.cpp b/db/cmdline.cpp index d0b80455ff2..06880c98829 100644 --- a/db/cmdline.cpp +++ b/db/cmdline.cpp @@ -19,6 +19,7 @@ #include "pch.h" #include "cmdline.h" #include "commands.h" +#include "../util/password.h" #include "../util/processinfo.h" #include "../util/net/listen.h" #include "security_common.h" @@ -27,6 +28,8 @@ #include <direct.h> #endif +#define MAX_LINE_LENGTH 256 + namespace po = boost::program_options; namespace fs = boost::filesystem; @@ -64,6 +67,14 @@ namespace mongo { ("fork" , "fork server process" ) #endif ; + + hidden.add_options() +#ifdef MONGO_SSL + ("sslOnNormalPorts" , "use ssl on configured ports" ) + ("sslPEMKeyFile" , po::value<string>(&cmdLine.sslPEMKeyFile), "PEM file for ssl" ) + ("sslPEMKeyPassword" , new PasswordValue(&cmdLine.sslPEMKeyPassword) , "PEM file password" ) +#endif + ; } @@ -85,6 +96,32 @@ namespace mongo { } #endif + void CmdLine::parseConfigFile( istream &f, stringstream &ss ) { + string s; + char line[MAX_LINE_LENGTH]; + + while ( f ) { + f.getline(line, MAX_LINE_LENGTH); + s = line; + std::remove(s.begin(), s.end(), ' '); + std::remove(s.begin(), s.end(), '\t'); + boost::to_upper(s); + + if ( s.find( "FASTSYNC" ) != string::npos ) + cout << "warning \"fastsync\" should not be put in your configuration file" << endl; + + if ( s.c_str()[0] == '#' ) { + // skipping commented line + } else if ( s.find( "=FALSE" ) == string::npos ) { + ss << line << endl; + } else { + cout << "warning: remove or comment out this line by starting it with \'#\', skipping now : " << line << endl; + } + } + return; + } + + bool CmdLine::store( int argc , char ** argv , boost::program_options::options_description& visible, @@ -141,7 +178,9 @@ namespace mongo { return false; } - po::store( po::parse_config_file( f , all ) , params ); + stringstream ss; + CmdLine::parseConfigFile( f, ss ); + po::store( po::parse_config_file( ss , all ) , params ); f.close(); } @@ -287,7 +326,25 @@ namespace mongo { noauth = false; } +#ifdef MONGO_SSL + if (params.count("sslOnNormalPorts") ) { + cmdLine.sslOnNormalPorts = true; + if ( cmdLine.sslPEMKeyPassword.size() == 0 ) { + log() << "need sslPEMKeyPassword" << endl; + dbexit(EXIT_BADOPTIONS); + } + + if ( cmdLine.sslPEMKeyFile.size() == 0 ) { + log() << "need sslPEMKeyFile" << endl; + dbexit(EXIT_BADOPTIONS); + } + + cmdLine.sslServerManager = new SSLManager( false ); + cmdLine.sslServerManager->setupPEM( cmdLine.sslPEMKeyFile , cmdLine.sslPEMKeyPassword ); + } +#endif + { BSONObjBuilder b; for (po::variables_map::const_iterator it(params.begin()), end(params.end()); it != end; it++){ @@ -354,7 +411,7 @@ namespace mongo { virtual bool adminOnly() const { return true; } virtual bool slaveOk() const { return true; } - virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { result.append("argv", argvArray); result.append("parsed", parsedOpts); return true; diff --git a/db/cmdline.h b/db/cmdline.h index 7b6d12a2e04..60eb668a735 100644 --- a/db/cmdline.h +++ b/db/cmdline.h @@ -21,6 +21,12 @@ namespace mongo { +#ifdef MONGO_SSL + class SSLManager; +#endif + + + /* command line options */ /* concurrency: OK/READ */ @@ -63,6 +69,7 @@ namespace mongo { bool quiet; // --quiet bool noTableScan; // --notablescan no table scans allowed bool prealloc; // --noprealloc no preallocation of data files + bool preallocj; // --nopreallocj no preallocation of journal files bool smallfiles; // --smallfiles allocate smaller data files bool configsvr; // --configsvr @@ -71,7 +78,8 @@ namespace mongo { int quotaFiles; // --quotaFiles bool cpu; // --cpu show cpu time periodically - bool dur; // --dur durability (now --journal) + bool dur; // --dur durability (now --journal) + unsigned journalCommitInterval; // group/batch commit interval ms /** --durOptions 7 dump journal and terminate without doing anything further --durOptions 4 recover and terminate without listening @@ -99,6 +107,14 @@ namespace mongo { bool noUnixSocket; // --nounixsocket string socket; // UNIX domain socket directory +#ifdef MONGO_SSL + bool sslOnNormalPorts; // --sslOnNormalPorts + string sslPEMKeyFile; // --sslPEMKeyFile + string sslPEMKeyPassword; // --sslPEMKeyPassword + + SSLManager* sslServerManager; // currently leaks on close +#endif + static void addGlobalOptions( boost::program_options::options_description& general , boost::program_options::options_description& hidden ); @@ -106,6 +122,7 @@ namespace mongo { boost::program_options::options_description& hidden ); + static void parseConfigFile( istream &f, stringstream &ss); /** * @return true if should run program, false if should exit */ @@ -116,18 +133,28 @@ namespace mongo { boost::program_options::variables_map &output ); }; + // todo move to cmdline.cpp? inline CmdLine::CmdLine() : - port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), smallfiles(sizeof(int*) == 4), + port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), preallocj(true), smallfiles(sizeof(int*) == 4), configsvr(false), quota(false), quotaFiles(8), cpu(false), durOptions(0), objcheck(false), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true ), syncdelay(60), noUnixSocket(false), socket("/tmp") { - // default may change for this later. + journalCommitInterval = 0; // 0 means use default + dur = false; #if defined(_DURABLEDEFAULTON) dur = true; -#else +#endif + if( sizeof(void*) == 8 ) + dur = true; +#if defined(_DURABLEDEFAULTOFF) dur = false; #endif + +#ifdef MONGO_SSL + sslOnNormalPorts = false; + sslServerManager = 0; +#endif } extern CmdLine cmdLine; diff --git a/db/commands.h b/db/commands.h index 454e2277e06..c18621828f2 100644 --- a/db/commands.h +++ b/db/commands.h @@ -20,6 +20,7 @@ #include "../pch.h" #include "jsobj.h" #include "../util/timer.h" +#include "../client/dbclient.h" namespace mongo { @@ -45,7 +46,7 @@ namespace mongo { return value is true if succeeded. if false, set errmsg text. */ - virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0; + virtual bool run(const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) = 0; /* note: logTheTop() MUST be false if READ @@ -94,6 +95,11 @@ namespace mongo { */ virtual bool requiresAuth() { return true; } + /* Return true if a replica set secondary should go into "recovering" + (unreadable) state while running this command. + */ + virtual bool maintenanceMode() const { return false; } + /** @param webUI expose the command in the web ui as localhost:28017/<name> @param oldName an optional old, deprecated name for the command */ @@ -120,7 +126,7 @@ namespace mongo { static const map<string,Command*>* commandsByBestName() { return _commandsByBestName; } static const map<string,Command*>* webCommands() { return _webCommands; } /** @return if command was found and executed */ - static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder); + static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, int queryOptions = 0); static LockType locktype( const string& name ); static Command * findCommand( const string& name ); }; @@ -139,7 +145,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const; CmdShutdown() : Command("shutdown") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl); + bool run(const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); private: bool shutdownHelper(); }; diff --git a/db/commands/distinct.cpp b/db/commands/distinct.cpp index 9a10e69d5a8..48f44050e49 100644 --- a/db/commands/distinct.cpp +++ b/db/commands/distinct.cpp @@ -32,7 +32,7 @@ namespace mongo { help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); diff --git a/db/commands/find_and_modify.cpp b/db/commands/find_and_modify.cpp index 2856ab3d3f1..0cf766fcf87 100644 --- a/db/commands/find_and_modify.cpp +++ b/db/commands/find_and_modify.cpp @@ -37,7 +37,7 @@ namespace mongo { virtual bool logTheOp() { return false; } // the modifications will be logged directly virtual bool slaveOk() const { return false; } virtual LockType locktype() const { return WRITE; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { static DBDirectClient db; string ns = dbname + '.' + cmdObj.firstElement().valuestr(); diff --git a/db/commands/group.cpp b/db/commands/group.cpp index 9d7acbdf7d4..d3e5839748c 100644 --- a/db/commands/group.cpp +++ b/db/commands/group.cpp @@ -20,6 +20,7 @@ #include "../instance.h" #include "../queryoptimizer.h" #include "../../scripting/engine.h" +#include "../clientcursor.h" namespace mongo { @@ -44,7 +45,7 @@ namespace mongo { uassert( 10042 , "return of $key has to be an object" , type == Object ); return s->getObject( "return" ); } - return obj.extractFields( keyPattern , true ); + return obj.extractFields( keyPattern , true ).getOwned(); } bool group( string realdbname , const string& ns , const BSONObj& query , @@ -88,14 +89,27 @@ namespace mongo { list<BSONObj> blah; shared_ptr<Cursor> cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query); + ClientCursor::CleanupPointer ccPointer; + ccPointer.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) ); while ( cursor->ok() ) { + + if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) || + !cursor->ok() ) { + break; + } + if ( ( cursor->matcher() && !cursor->matcher()->matchesCurrent( cursor.get() ) ) || cursor->getsetdup( cursor->currLoc() ) ) { cursor->advance(); continue; } + if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) || + !cursor->ok() ) { + break; + } + BSONObj obj = cursor->current(); cursor->advance(); @@ -117,6 +131,7 @@ namespace mongo { throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() ); } } + ccPointer.reset(); if (!finalize.empty()) { s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 ); @@ -140,7 +155,7 @@ namespace mongo { return true; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { if ( !globalScriptEngine ) { errmsg = "server-side JavaScript execution is disabled"; diff --git a/db/commands/isself.cpp b/db/commands/isself.cpp index cac8380dc20..5a868de919f 100644 --- a/db/commands/isself.cpp +++ b/db/commands/isself.cpp @@ -130,7 +130,7 @@ namespace mongo { help << "{ _isSelf : 1 } INTERNAL ONLY"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { init(); result.append( "id" , _id ); return true; diff --git a/db/commands/mr.cpp b/db/commands/mr.cpp index 75f5615b9f6..56e9770dff2 100644 --- a/db/commands/mr.cpp +++ b/db/commands/mr.cpp @@ -879,8 +879,6 @@ namespace mongo { } } -// boost::thread_specific_ptr<State*> _tl; - /** * emit that will be called by js function */ @@ -932,7 +930,7 @@ namespace mongo { help << "http://www.mongodb.org/display/DOCS/MapReduce"; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname , BSONObj& cmd, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer t; Client::GodScope cg; Client& client = cc(); @@ -968,12 +966,6 @@ namespace mongo { state.init(); state.prepTempCollection(); - { - State** s = new State*(); - s[0] = &state; -// _tl.reset( s ); - } - wassert( config.limit < 0x4000000 ); // see case on next line to 32 bit unsigned ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , state.incomingDocuments() ) ); long long mapTime = 0; @@ -988,7 +980,9 @@ namespace mongo { // obtain cursor on data to apply mr to, sorted shared_ptr<Cursor> temp = NamespaceDetailsTransient::getCursor( config.ns.c_str(), config.filter, config.sort ); + uassert( 15876, str::stream() << "could not create cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, temp.get() ); auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , config.ns.c_str() ) ); + uassert( 15877, str::stream() << "could not create client cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, cursor.get() ); Timer mt; // go through each doc @@ -1065,11 +1059,19 @@ namespace mongo { countsBuilder.appendNumber( "reduce" , state.numReduces() ); timingBuilder.append( "reduceTime" , inReduce / 1000 ); timingBuilder.append( "mode" , state.jsMode() ? "js" : "mixed" ); - -// _tl.reset(); + } + // TODO: The error handling code for queries is v. fragile, + // *requires* rethrow AssertionExceptions - should probably fix. + catch ( AssertionException& e ){ + log() << "mr failed, removing collection" << causedBy(e) << endl; + throw e; + } + catch ( std::exception& e ){ + log() << "mr failed, removing collection" << causedBy(e) << endl; + throw e; } catch ( ... ) { - log() << "mr failed, removing collection" << endl; + log() << "mr failed for unknown reason, removing collection" << endl; throw; } @@ -1116,7 +1118,7 @@ namespace mongo { virtual bool slaveOverrideOk() { return true; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe(); string postProcessCollection = cmdObj["postProcessCollection"].valuestrsafe(); bool postProcessOnly = !(postProcessCollection.empty()); diff --git a/db/compact.cpp b/db/compact.cpp index a1197460f4f..c6e5f77ee0e 100644 --- a/db/compact.cpp +++ b/db/compact.cpp @@ -263,6 +263,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool adminOnly() const { return false; } virtual bool slaveOk() const { return true; } + virtual bool maintenanceMode() const { return true; } virtual bool logTheOp() { return false; } virtual void help( stringstream& help ) const { help << "compact collection\n" @@ -274,7 +275,7 @@ namespace mongo { virtual bool requiresAuth() { return true; } CompactCmd() : Command("compact") { } - virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string coll = cmdObj.firstElement().valuestr(); if( coll.empty() || db.empty() ) { errmsg = "no collection name specified"; diff --git a/db/curop.h b/db/curop.h index f261812d383..2717d78cc62 100644 --- a/db/curop.h +++ b/db/curop.h @@ -28,6 +28,8 @@ namespace mongo { + class CurOp; + /* lifespan is different than CurOp because of recursives with DBDirectClient */ class OpDebug { public: @@ -36,7 +38,7 @@ namespace mongo { void reset(); string toString() const; - void append( BSONObjBuilder& b ) const; + void append( const CurOp& curop, BSONObjBuilder& b ) const; // ------------------- @@ -119,7 +121,7 @@ namespace mongo { int size() const { return *_size; } bool have() const { return size() > 0; } - BSONObj get() { + BSONObj get() const { _lock.lock(); BSONObj o; try { @@ -133,7 +135,7 @@ namespace mongo { return o; } - void append( BSONObjBuilder& b , const StringData& name ) { + void append( BSONObjBuilder& b , const StringData& name ) const { scoped_spinlock lk(_lock); BSONObj temp = _get(); b.append( name , temp ); @@ -141,7 +143,7 @@ namespace mongo { private: /** you have to be locked when you call this */ - BSONObj _get() { + BSONObj _get() const { int sz = size(); if ( sz == 0 ) return BSONObj(); @@ -153,7 +155,7 @@ namespace mongo { /** you have to be locked when you call this */ void _reset( int sz ) { _size[0] = sz; } - SpinLock _lock; + mutable SpinLock _lock; int * _size; char _buf[512]; }; @@ -168,7 +170,8 @@ namespace mongo { bool haveQuery() const { return _query.have(); } BSONObj query() { return _query.get(); } - + void appendQuery( BSONObjBuilder& b , const StringData& name ) const { _query.append( b , name ); } + void ensureStarted() { if ( _start == 0 ) _start = _checkpoint = curTimeMicros64(); diff --git a/db/cursor.h b/db/cursor.h index ff9c9821ada..9639b2677b1 100644 --- a/db/cursor.h +++ b/db/cursor.h @@ -132,6 +132,8 @@ namespace mongo { virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) { massert( 13285, "manual matcher config not allowed", false ); } + + virtual void explainDetails( BSONObjBuilder& b ) { return; } }; // strategy object implementing direction of traversal. diff --git a/db/database.cpp b/db/database.cpp index 7906e9b435a..97b3fa011cb 100644 --- a/db/database.cpp +++ b/db/database.cpp @@ -192,22 +192,31 @@ namespace mongo { return ret; } + bool fileIndexExceedsQuota( const char *ns, int fileIndex, bool enforceQuota ) { + return + cmdLine.quota && + enforceQuota && + fileIndex >= cmdLine.quotaFiles && + // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g. + // rejecting an index insert after inserting the main record. + !NamespaceString::special( ns ) && + NamespaceString( ns ).db != "local"; + } + MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) { // check existing files for ( int i=numFiles()-1; i>=0; i-- ) { MongoDataFile* f = getFile( i ); if ( f->getHeader()->unusedLength >= sizeNeeded ) { - // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g. - // rejecting an index insert after inserting the main record. - if( cmdLine.quota && enforceQuota && i > cmdLine.quotaFiles && !NamespaceString::special(ns) ) + if ( fileIndexExceedsQuota( ns, i-1, enforceQuota ) ) // NOTE i-1 is the value used historically for this check. ; else return f; } } - if( cmdLine.quota && enforceQuota && numFiles() >= cmdLine.quotaFiles && !NamespaceString::special(ns) ) + if ( fileIndexExceedsQuota( ns, numFiles(), enforceQuota ) ) uasserted(12501, "quota exceeded"); // allocate files until we either get one big enough or hit maxSize @@ -261,8 +270,8 @@ namespace mongo { log() << "creating profile collection: " << profileName << endl; BSONObjBuilder spec; spec.appendBool( "capped", true ); - spec.append( "size", 131072.0 ); - if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , true ) ) { + spec.append( "size", 1024*1024 ); + if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , false /* we don't replica profile messages */ ) ) { return false; } } diff --git a/db/db.cpp b/db/db.cpp index 9f90b9ddd02..2d4970af044 100644 --- a/db/db.cpp +++ b/db/db.cpp @@ -70,7 +70,6 @@ namespace mongo { extern string repairpath; void setupSignals( bool inFork ); - void startReplSets(ReplSetCmdline*); void startReplication(); void exitCleanly( ExitCode code ); @@ -216,8 +215,6 @@ namespace mongo { void listen(int port) { //testTheDb(); - log() << "waiting for connections on port " << port << endl; - MessageServer::Options options; options.port = port; options.ipList = cmdLine.bind_ip; @@ -483,12 +480,6 @@ namespace mongo { clientCursorMonitor.go(); PeriodicTask::theRunner->go(); - if( !cmdLine._replSet.empty() ) { - replSet = true; - ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet); - boost::thread t( boost::bind( &startReplSets, replSetCmdline) ); - } - listen(listenPort); // listen() will return when exit code closes its socket. @@ -575,10 +566,12 @@ int main(int argc, char* argv[]) { ("directoryperdb", "each database will be stored in a separate directory") ("journal", "enable journaling") ("journalOptions", po::value<int>(), "journal diagnostic options") + ("journalCommitInterval", po::value<unsigned>(), "how often to group/batch commit (ms)") ("ipv6", "enable IPv6 support (disabled by default)") ("jsonp","allow JSONP access via http (has security implications)") ("noauth", "run without security") ("nohttpinterface", "disable http interface") + ("nojournal", "disable journaling (journaling is on by default for 64 bit)") ("noprealloc", "disable data file preallocation - will often hurt performance") ("noscripting", "disable scripting engine") ("notablescan", "do not allow table scans") @@ -631,12 +624,11 @@ int main(int argc, char* argv[]) { ("pretouch", po::value<int>(), "n pretouch threads for applying replicationed operations") ("command", po::value< vector<string> >(), "command") ("cacheSize", po::value<long>(), "cache size (in MB) for rec store") - // these move to unhidden later: ("nodur", "disable journaling (currently the default)") - ("nojournal", "disable journaling (currently the default)") // things we don't want people to use ("nocursors", "diagnostic/debugging option that turns off cursors DO NOT USE IN PRODUCTION") ("nohints", "ignore query hints") + ("nopreallocj", "don't preallocate journal files") ("dur", "enable journaling") // deprecated version ("durOptions", po::value<int>(), "durability diagnostic options") // deprecated version // deprecated pairing command line options @@ -745,6 +737,15 @@ int main(int argc, char* argv[]) { if (params.count("durOptions")) { cmdLine.durOptions = params["durOptions"].as<int>(); } + if( params.count("journalCommitInterval") ) { + // don't check if dur is false here as many will just use the default, and will default to off on win32. + // ie no point making life a little more complex by giving an error on a dev environment. + cmdLine.journalCommitInterval = params["journalCommitInterval"].as<unsigned>(); + if( cmdLine.journalCommitInterval <= 1 || cmdLine.journalCommitInterval > 300 ) { + out() << "--journalCommitInterval out of allowed range (0-300ms)" << endl; + dbexit( EXIT_BADOPTIONS ); + } + } if (params.count("journalOptions")) { cmdLine.durOptions = params["journalOptions"].as<int>(); } @@ -761,6 +762,9 @@ int main(int argc, char* argv[]) { if (params.count("nohints")) { useHints = false; } + if (params.count("nopreallocj")) { + cmdLine.preallocj = false; + } if (params.count("nohttpinterface")) { noHttpInterface = true; } diff --git a/db/db.vcxproj b/db/db.vcxproj index 685015ed7f6..8f831cb8559 100755 --- a/db/db.vcxproj +++ b/db/db.vcxproj @@ -459,9 +459,27 @@ <ClCompile Include="..\s\shard.cpp" />
<ClCompile Include="..\s\shardconnection.cpp" />
<ClCompile Include="..\s\shardkey.cpp" />
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\alignedbuilder.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
</ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\concurrency\spin_lock.cpp" />
<ClCompile Include="..\util\concurrency\synchronization.cpp" />
<ClCompile Include="..\util\concurrency\task.cpp" />
@@ -561,6 +579,7 @@ <ClCompile Include="..\client\parallel.cpp" />
<ClCompile Include="pdfile.cpp" />
<ClCompile Include="queryoptimizer.cpp" />
+ <ClCompile Include="scanandorder.cpp" />
<ClCompile Include="security.cpp" />
<ClCompile Include="security_commands.cpp" />
<ClCompile Include="security_common.cpp" />
@@ -652,6 +671,8 @@ <ClInclude Include="..\targetver.h" />
<ClInclude Include="..\pcre-7.4\config.h" />
<ClInclude Include="..\pcre-7.4\pcre.h" />
+ <ClInclude Include="..\third_party\snappy\config.h" />
+ <ClInclude Include="..\third_party\snappy\snappy.h" />
<ClInclude Include="..\util\alignedbuilder.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
<ClInclude Include="..\util\concurrency\race.h" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters index d9e9def86f8..36b0df1ddc2 100755 --- a/db/db.vcxproj.filters +++ b/db/db.vcxproj.filters @@ -1,4 +1,4 @@ -<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\bson\oid.cpp" />
@@ -166,6 +166,16 @@ <ClCompile Include="..\util\net\message_port.cpp" />
<ClCompile Include="dbmessage.cpp" />
<ClCompile Include="commands\find_and_modify.cpp" />
+ <ClCompile Include="..\util\compress.cpp">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="scanandorder.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\dbclientcursor.h" />
@@ -315,6 +325,12 @@ <ClInclude Include="..\util\net\sock.h" />
<ClInclude Include="..\util\concurrency\rwlockimpl.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
+ <ClInclude Include="..\third_party\snappy\config.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="db.rc" />
@@ -349,4 +365,9 @@ <Library Include="..\..\js\js64d.lib" />
<Library Include="..\..\js\js64r.lib" />
</ItemGroup>
+ <ItemGroup>
+ <Filter Include="snappy">
+ <UniqueIdentifier>{bb99c086-7926-4f50-838d-f5f0c18397c0}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
</Project>
\ No newline at end of file diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp index 73c1004d4f2..2edd7684ff8 100644 --- a/db/dbcommands.cpp +++ b/db/dbcommands.cpp @@ -31,6 +31,7 @@ #include "../util/lruishmap.h" #include "../util/md5.hpp" #include "../util/processinfo.h" +#include "../util/ramlog.h" #include "json.h" #include "repl.h" #include "repl_block.h" @@ -53,14 +54,16 @@ namespace mongo { namespace dur { void setAgeOutJournalFiles(bool rotate); } + /** @return true if fields found */ bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { BSONElement e = cmdObj["ageOutJournalFiles"]; if( !e.eoo() ) { bool r = e.trueValue(); log() << "ageOutJournalFiles " << r << endl; dur::setAgeOutJournalFiles(r); + return true; } - return true; + return false; } void flushDiagLog(); @@ -85,7 +88,7 @@ namespace mongo { help << "reset error state (used with getpreverror)"; } CmdResetError() : Command("resetError", false, "reseterror") {} - bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.get(); assert( le ); le->reset(); @@ -116,7 +119,7 @@ namespace mongo { << " { w:n } - await replication to n servers (including self) before returning\n" << " { wtimeout:m} - timeout for w in m milliseconds"; } - bool run(const string& dbname, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& _cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.disableForCommand(); bool err = false; @@ -246,7 +249,7 @@ namespace mongo { return true; } CmdGetPrevError() : Command("getPrevError", false, "getpreverror") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { LastError *le = lastError.disableForCommand(); le->appendSelf( result ); if ( le->valid ) @@ -268,14 +271,14 @@ namespace mongo { << "N to wait N seconds for other members to catch up."; } - bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { bool force = cmdObj.hasField("force") && cmdObj["force"].trueValue(); if (!force && theReplSet && theReplSet->isPrimary()) { - int timeout, now, start; + long long timeout, now, start; timeout = now = start = curTimeMicros64()/1000000; if (cmdObj.hasField("timeoutSecs")) { - timeout += cmdObj["timeoutSecs"].numberInt(); + timeout += cmdObj["timeoutSecs"].numberLong(); } OpTime lastOp = theReplSet->lastOpTimeWritten; @@ -329,7 +332,7 @@ namespace mongo { } virtual LockType locktype() const { return WRITE; } CmdDropDatabase() : Command("dropDatabase") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONElement e = cmdObj.firstElement(); log() << "dropDatabase " << dbname << endl; int p = (int) e.number(); @@ -349,12 +352,13 @@ namespace mongo { virtual bool slaveOk() const { return true; } + virtual bool maintenanceMode() const { return true; } virtual void help( stringstream& help ) const { help << "repair database. also compacts. note: slow."; } virtual LockType locktype() const { return WRITE; } CmdRepairDatabase() : Command("repairDatabase") {} - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONElement e = cmdObj.firstElement(); log() << "repairDatabase " << dbname << endl; int p = (int) e.number(); @@ -388,7 +392,7 @@ namespace mongo { } virtual LockType locktype() const { return WRITE; } CmdProfile() : Command("profile") {} - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONElement e = cmdObj.firstElement(); result.append("was", cc().database()->profile); result.append("slowms", cmdLine.slowMS ); @@ -425,7 +429,7 @@ namespace mongo { help << "returns lots of administrative server statistics"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { long long start = Listener::getElapsedTimeMillis(); BSONObjBuilder timeBuilder(128); @@ -596,6 +600,21 @@ namespace mongo { timeBuilder.appendNumber( "after dur" , Listener::getElapsedTimeMillis() - start ); + { + RamLog* rl = RamLog::get( "warnings" ); + verify(15880, rl); + + if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes + vector<const char*> lines; + rl->get( lines ); + + BSONArrayBuilder arr( result.subarrayStart( "warnings" ) ); + for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ ) + arr.append( lines[i] ); + arr.done(); + } + } + if ( ! authed ) result.append( "note" , "run against admin for more info" ); @@ -619,7 +638,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "internal"; } virtual LockType locktype() const { return NONE; } CmdGetOpTime() : Command("getoptime") { } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { writelock l( "" ); result.appendDate("optime", OpTime::now().asDate()); return true; @@ -648,7 +667,7 @@ namespace mongo { } void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Monitoring+and+Diagnostics#MonitoringandDiagnostics-DatabaseRecord%2FReplay"; } virtual LockType locktype() const { return WRITE; } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { int was = _diaglog.setLevel( cmdObj.firstElement().numberInt() ); flushDiagLog(); if ( !cmdLine.quiet ) @@ -771,7 +790,7 @@ namespace mongo { } virtual void help( stringstream& help ) const { help << "drop a collection\n{drop : <collectionName>}"; } virtual LockType locktype() const { return WRITE; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string nsToDrop = dbname + '.' + cmdObj.firstElement().valuestr(); NamespaceDetails *d = nsdetails(nsToDrop.c_str()); if ( !cmdLine.quiet ) @@ -805,7 +824,7 @@ namespace mongo { return false; } virtual void help( stringstream& help ) const { help << "count objects in collection"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string err; long long n = runCount(ns.c_str(), cmdObj, err); @@ -844,7 +863,8 @@ namespace mongo { help << "create a collection explicitly\n" "{ create: <ns>[, capped: <bool>, size: <collSizeInBytes>, max: <nDocs>] }"; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + uassert(15888, "must pass name of collection to create", cmdObj.firstElement().valuestrsafe()[0] != '\0'); string ns = dbname + '.' + cmdObj.firstElement().valuestr(); string err; uassert(14832, "specify size:<n> when capped is true", !cmdObj["capped"].trueValue() || cmdObj["size"].isNumber() || cmdObj.hasField("$nExtents")); @@ -869,7 +889,7 @@ namespace mongo { help << "drop indexes for a collection"; } CmdDropIndexes() : Command("dropIndexes", false, "deleteIndexes") { } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) { BSONElement e = jsobj.firstElement(); string toDeleteNs = dbname + '.' + e.valuestr(); NamespaceDetails *d = nsdetails(toDeleteNs.c_str()); @@ -914,7 +934,7 @@ namespace mongo { help << "re-index a collection"; } CmdReIndex() : Command("reIndex") { } - bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { static DBDirectClient db; BSONElement e = jsobj.firstElement(); @@ -969,7 +989,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const { help << "list databases on this server"; } CmdListDatabases() : Command("listDatabases" , true ) {} - bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { vector< string > dbNames; getDatabaseNames( dbNames ); vector< BSONObj > dbInfos; @@ -1038,7 +1058,7 @@ namespace mongo { virtual LockType locktype() const { return WRITE; } CmdCloseAllDatabases() : Command( "closeAllDatabases" ) {} - bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { bool ok; try { ok = dbHolder.closeAll( dbpath , result, false ); @@ -1065,7 +1085,7 @@ namespace mongo { help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }"; } virtual LockType locktype() const { return READ; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname; ns += "."; { @@ -1164,7 +1184,7 @@ namespace mongo { "\nkeyPattern, min, and max parameters are optional." "\nnote: This command may take a while to run"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { Timer timer; string ns = jsobj.firstElement().String(); @@ -1282,7 +1302,7 @@ namespace mongo { help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024\n" " avgObjSize - in bytes"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname + "." + jsobj.firstElement().valuestr(); Client::Context cx( ns ); @@ -1351,7 +1371,7 @@ namespace mongo { "Get stats on a database. Not instantaneous. Slower for databases with large .ns files.\n" << "Example: { dbStats:1, scale:1 }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { int scale = 1; if ( jsobj["scale"].isNumber() ) { scale = jsobj["scale"].numberInt(); @@ -1426,7 +1446,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{ cloneCollectionAsCapped:<fromName>, toCollection:<toName>, size:<sizeInBytes> }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string from = jsobj.getStringField( "cloneCollectionAsCapped" ); string to = jsobj.getStringField( "toCollection" ); long long size = (long long)jsobj.getField( "size" ).number(); @@ -1488,7 +1508,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{ convertToCapped:<fromCollectionName>, size:<sizeInBytes> }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { BackgroundOperation::assertNoBgOpInProgForDb(dbname.c_str()); string from = jsobj.getStringField( "convertToCapped" ); @@ -1544,7 +1564,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{whatsmyuri:1}"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { BSONObj info = cc().curop()->infoNoauth(); result << "you" << info[ "client" ]; return true; @@ -1559,7 +1579,7 @@ namespace mongo { return true; } virtual bool slaveOk() const { - return false; + return true; } virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { @@ -1568,7 +1588,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "internal. for testing only."; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "godinsert" ].valuestrsafe(); uassert( 13049, "godinsert must specify a collection", !coll.empty() ); string ns = dbname + "." + coll; @@ -1583,7 +1603,7 @@ namespace mongo { DBHashCmd() : Command( "dbHash", false, "dbhash" ) {} virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return READ; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { list<string> colls; Database* db = cc().database(); if ( db ) @@ -1629,9 +1649,8 @@ namespace mongo { cursor = findTableScan( c.c_str() , BSONObj() ); } else { - bb.done(); - errmsg = (string)"can't find _id index for: " + c; - return 0; + log() << "can't find _id index for: " << c << endl; + continue; } md5_state_t st; @@ -1677,7 +1696,7 @@ namespace mongo { help << "w:true write lock. secs:<seconds>"; } CmdSleep() : Command("sleep") { } - bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { int secs = 100; if ( cmdObj["secs"].isNumber() ) secs = cmdObj["secs"].numberInt(); @@ -1700,7 +1719,7 @@ namespace mongo { virtual bool slaveOk() const { return false; } virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { return true; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "captrunc" ].valuestrsafe(); uassert( 13416, "captrunc must specify a collection", !coll.empty() ); string ns = dbname + "." + coll; @@ -1727,7 +1746,7 @@ namespace mongo { virtual bool slaveOk() const { return false; } virtual LockType locktype() const { return WRITE; } virtual bool requiresAuth() { return true; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string coll = cmdObj[ "emptycapped" ].valuestrsafe(); uassert( 13428, "emptycapped must specify a collection", !coll.empty() ); string ns = dbname + "." + coll; @@ -1792,13 +1811,22 @@ namespace mongo { if ( c->adminOnly() ) log( 2 ) << "command: " << cmdObj << endl; + if (c->maintenanceMode() && theReplSet && theReplSet->isSecondary()) { + theReplSet->setMaintenanceMode(true); + } + if ( c->locktype() == Command::NONE ) { // we also trust that this won't crash client.curop()->ensureStarted(); string errmsg; - int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl ); + int ok = c->run( dbname , cmdObj , queryOptions, errmsg , result , fromRepl ); if ( ! ok ) result.append( "errmsg" , errmsg ); + + if (c->maintenanceMode() && theReplSet) { + theReplSet->setMaintenanceMode(false); + } + return ok; } @@ -1812,11 +1840,13 @@ namespace mongo { client.curop()->ensureStarted(); Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() ); + bool retval = true; + try { string errmsg; - if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ) { + if ( ! c->run(dbname, cmdObj, queryOptions, errmsg, result, fromRepl ) ) { result.append( "errmsg" , errmsg ); - return false; + retval = false; } } catch ( DBException& e ) { @@ -1824,14 +1854,18 @@ namespace mongo { ss << "exception: " << e.what(); result.append( "errmsg" , ss.str() ); result.append( "code" , e.getCode() ); - return false; + retval = false; } - if ( c->logTheOp() && ! fromRepl ) { + if ( retval && c->logTheOp() && ! fromRepl ) { logOp("c", cmdns, cmdObj); } - return true; + if (c->maintenanceMode() && theReplSet) { + theReplSet->setMaintenanceMode(false); + } + + return retval; } diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp index 47f6c691ab4..566027fc594 100644 --- a/db/dbcommands_admin.cpp +++ b/db/dbcommands_admin.cpp @@ -47,7 +47,7 @@ namespace mongo { virtual void help(stringstream& h) const { h << "internal"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string dropns = dbname + "." + cmdObj.firstElement().valuestrsafe(); if ( !cmdLine.quiet ) @@ -82,7 +82,7 @@ namespace mongo { virtual bool adminOnly() const { return true; } virtual void help(stringstream& h) const { h << "test how long to write and fsync to a test file in the journal/ directory"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { filesystem::path p = dur::getJournalDir(); p /= "journalLatencyTest"; @@ -157,7 +157,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } //{ validate: "collectionnamewithoutthedbpart" [, scandata: <bool>] [, full: <bool> } */ - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { string ns = dbname + "." + cmdObj.firstElement().valuestrsafe(); NamespaceDetails * d = nsdetails( ns.c_str() ); if ( !cmdLine.quiet ) @@ -473,7 +473,7 @@ namespace mongo { return !x.empty(); }*/ virtual void help(stringstream& h) const { h << url(); } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { bool sync = !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately bool lock = cmdObj["lock"].trueValue(); log() << "CMD fsync: sync:" << sync << " lock:" << lock << endl; diff --git a/db/dbcommands_generic.cpp b/db/dbcommands_generic.cpp index 2e025b500ea..a9e13eab741 100644 --- a/db/dbcommands_generic.cpp +++ b/db/dbcommands_generic.cpp @@ -79,7 +79,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "internal command facilitating running in certain cloud computing environments"; } - bool run(const string& dbname, BSONObj& obj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& obj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { if( !obj.hasElement("servers") ) { vector<string> ips; obj["servers"].Obj().Vals(ips); @@ -106,7 +106,7 @@ namespace mongo { help << "get version #, etc.\n"; help << "{ buildinfo:1 }"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo(); result << "versionArray" << versionArray; result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 ); @@ -137,7 +137,7 @@ namespace mongo { help << " syncdelay\n"; help << "{ getParameter:'*' } to get everything\n"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { bool all = *cmdObj.firstElement().valuestrsafe() == '*'; int before = result.len(); @@ -166,11 +166,6 @@ namespace mongo { } } cmdGet; - // dev - experimental. so only in set command for now. may go away or change - namespace dur { - int groupCommitIntervalMs = 100; - } - // tempish bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ); @@ -184,23 +179,24 @@ namespace mongo { help << "set administrative option(s)\n"; help << "{ setParameter:1, <param>:<value> }\n"; help << "supported so far:\n"; - help << " notablescan\n"; + help << " journalCommitInterval\n"; help << " logLevel\n"; + help << " notablescan\n"; help << " quiet\n"; help << " syncdelay\n"; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { int s = 0; - setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl); - if( cmdObj.hasElement("groupCommitIntervalMs") ) { + bool found = setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl); + if( cmdObj.hasElement("journalCommitInterval") ) { if( !cmdLine.dur ) { errmsg = "journaling is off"; return false; } - int x = (int) cmdObj["groupCommitIntervalMs"].Number(); - assert( x > 0 && x < 500 ); - dur::groupCommitIntervalMs = x; - log() << "groupCommitIntervalMs " << x << endl; + int x = (int) cmdObj["journalCommitInterval"].Number(); + assert( x > 1 && x < 500 ); + cmdLine.journalCommitInterval = x; + log() << "setParameter journalCommitInterval=" << x << endl; s++; } if( cmdObj.hasElement("notablescan") ) { @@ -241,7 +237,7 @@ namespace mongo { s++; } - if( s == 0 ) { + if( s == 0 && !found ) { errmsg = "no option found to set, use help:true to see options "; return false; } @@ -257,7 +253,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "a way to check that the server is alive. responds immediately even if server is in a db lock."; } virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& badns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // IMPORTANT: Don't put anything in here that might lock db - including authentication return true; } @@ -270,7 +266,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual bool readOnly() { return true; } virtual LockType locktype() const { return NONE; } - virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( globalScriptEngine ) { BSONObjBuilder bb( result.subobjStart( "js" ) ); result.append( "utf8" , globalScriptEngine->utf8Ok() ); @@ -292,7 +288,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return true; } - virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { rotateLogs(); return 1; } @@ -306,7 +302,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return false; } - virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { BSONObjBuilder b( result.subobjStart( "commands" ) ); for ( map<string,Command*>::iterator i=_commands->begin(); i!=_commands->end(); ++i ) { Command * c = i->second; @@ -361,7 +357,7 @@ namespace mongo { } virtual LockType locktype() const { return NONE; } CmdForceError() : Command("forceerror") {} - bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbnamne, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { uassert( 10038 , "forced error", false); return true; } @@ -373,7 +369,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return NONE; } virtual bool requiresAuth() { return false; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { result << "options" << QueryOption_AllSupported; return true; } @@ -393,7 +389,7 @@ namespace mongo { help << "{ getLog : '*' } OR { getLog : 'global' }"; } - virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string p = cmdObj.firstElement().String(); if ( p == "*" ) { vector<string> names; diff --git a/db/dbeval.cpp b/db/dbeval.cpp index 3a53200a49f..5fe137fc3a3 100644 --- a/db/dbeval.cpp +++ b/db/dbeval.cpp @@ -121,7 +121,7 @@ namespace mongo { } virtual LockType locktype() const { return NONE; } CmdEval() : Command("eval", false, "$eval") { } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { AuthenticationInfo *ai = cc().getAuthenticationInfo(); uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(dbname.c_str()) ); diff --git a/db/dbmessage.h b/db/dbmessage.h index a14d4cf5142..a789bff849c 100644 --- a/db/dbmessage.h +++ b/db/dbmessage.h @@ -122,7 +122,7 @@ namespace mongo { /** the 32 bit field before the ns * track all bit usage here as its cross op - * 0: InsertOption_KeepGoing + * 0: InsertOption_ContinueOnError * 1: fromWriteback */ int& reservedField() { return *reserved; } @@ -233,7 +233,7 @@ namespace mongo { public: enum ReservedOptions { - Reserved_InsertOption_KeepGoing = 1 << 0 , + Reserved_InsertOption_ContinueOnError = 1 << 0 , Reserved_FromWriteback = 1 << 1 }; }; diff --git a/db/dbwebserver.cpp b/db/dbwebserver.cpp index 40950a8ccb3..50a59fa1267 100644 --- a/db/dbwebserver.cpp +++ b/db/dbwebserver.cpp @@ -61,7 +61,7 @@ namespace mongo { class DbWebServer : public MiniWebServer { public: DbWebServer(const string& ip, int port, const AdminAccess* webUsers) - : MiniWebServer(ip, port), _webUsers(webUsers) { + : MiniWebServer("admin web console", ip, port), _webUsers(webUsers) { WebStatusPlugin::initAll(); } @@ -424,7 +424,7 @@ namespace mongo { string errmsg; BSONObjBuilder sub; - if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) ) + if ( ! c->run( "admin.$cmd" , co , 0, errmsg , sub , false ) ) buf.append( cmd , errmsg ); else buf.append( cmd , sub.obj() ); @@ -531,7 +531,6 @@ namespace mongo { Client::initThread("websvr"); const int p = cmdLine.port + 1000; DbWebServer mini(cmdLine.bind_ip, p, adminAccessPtr.get()); - log() << "web admin interface listening on port " << p << endl; mini.initAndListen(); cc().shutdown(); } diff --git a/db/driverHelpers.cpp b/db/driverHelpers.cpp index d98a33b25c5..12aa01886c4 100644 --- a/db/driverHelpers.cpp +++ b/db/driverHelpers.cpp @@ -46,7 +46,7 @@ namespace mongo { class ObjectIdTest : public BasicDriverHelper { public: ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ) {} - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.firstElement().type() != jstOID ) { errmsg = "not oid"; return false; diff --git a/db/dur.cpp b/db/dur.cpp index 6cb69ac5ac2..dfa36f95224 100644 --- a/db/dur.cpp +++ b/db/dur.cpp @@ -62,11 +62,11 @@ #include "dur_journal.h" #include "dur_commitjob.h" #include "dur_recover.h" +#include "dur_stats.h" #include "../util/concurrency/race.h" #include "../util/mongoutils/hash.h" #include "../util/mongoutils/str.h" #include "../util/timer.h" -#include "dur_stats.h" using namespace mongoutils; @@ -74,8 +74,9 @@ namespace mongo { namespace dur { - void WRITETODATAFILES(); - void PREPLOGBUFFER(); + void PREPLOGBUFFER(JSectHeader& outParm); + void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed); + void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed); /** declared later in this file only used in this file -- use DurableInterface::commitNow() outside @@ -129,6 +130,7 @@ namespace mongo { "commits" << _commits << "journaledMB" << _journaledBytes / 1000000.0 << "writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 << + "compression" << _journaledBytes / (_uncompressedBytes+1.0) << "commitsInWriteLock" << _commitsInWriteLock << "earlyCommits" << _earlyCommits << "timeMs" << @@ -143,6 +145,8 @@ namespace mongo { b << "ageOutJournalFiles" << "mutex timeout"; if( r == 0 ) b << "ageOutJournalFiles" << false; + if( cmdLine.journalCommitInterval != 0 ) + b << "journalCommitIntervalMs" << cmdLine.journalCommitInterval; return b.obj(); } @@ -269,6 +273,9 @@ namespace mongo { } bool DurableImpl::commitIfNeeded() { + if ( ! dbMutex.isWriteLocked() ) // we implicitly commit if needed when releasing write lock + return false; + DEV commitJob._nSinceCommitIfNeededCall = 0; if (commitJob.bytes() > UncommittedBytesLimit) { // should this also fire if CmdLine::DurAlwaysCommit? stats.curr->_earlyCommits++; @@ -325,15 +332,6 @@ namespace mongo { } #endif - /** write the buffer we have built to the journal and fsync it. - outside of lock as that could be slow. - */ - static void WRITETOJOURNAL(AlignedBuilder& ab) { - Timer t; - journal(ab); - stats.curr->_writeToJournalMicros += t.micros(); - } - // Functor to be called over all MongoFiles class validateSingleMapMatches { @@ -486,6 +484,7 @@ namespace mongo { stats.curr->_remapPrivateViewMicros += t.micros(); } + // lock order: dbMutex first, then this mutex groupCommitMutex("groupCommit"); bool _groupCommitWithLimitedLocks() { @@ -502,8 +501,8 @@ namespace mongo { commitJob.notifyCommitted(); return true; } - - PREPLOGBUFFER(); + JSectHeader h; + PREPLOGBUFFER(h); RWLockRecursive::Shared lk3(MongoFile::mmmutex); @@ -515,16 +514,15 @@ namespace mongo { lk1.reset(); // ****** now other threads can do writes ****** - - WRITETOJOURNAL(commitJob._ab); + WRITETOJOURNAL(h, commitJob._ab); assert( abLen == commitJob._ab.len() ); // a check that no one touched the builder while we were doing work. if so, our locking is wrong. // data is now in the journal, which is sufficient for acknowledging getLastError. // (ok to crash after that) commitJob.notifyCommitted(); - WRITETODATAFILES(); - assert( abLen == commitJob._ab.len() ); // WRITETODATAFILES uses _ab also + WRITETODATAFILES(h, commitJob._ab); + assert( abLen == commitJob._ab.len() ); // check again wasn't modded commitJob._ab.reset(); // can't : dbMutex._remapPrivateViewRequested = true; @@ -570,18 +568,19 @@ namespace mongo { // (and we are only read locked in the dbMutex, so it could happen) scoped_lock lk(groupCommitMutex); - PREPLOGBUFFER(); + JSectHeader h; + PREPLOGBUFFER(h); // todo : write to the journal outside locks, as this write can be slow. // however, be careful then about remapprivateview as that cannot be done // if new writes are then pending in the private maps. - WRITETOJOURNAL(commitJob._ab); + WRITETOJOURNAL(h, commitJob._ab); // data is now in the journal, which is sufficient for acknowledging getLastError. // (ok to crash after that) commitJob.notifyCommitted(); - WRITETODATAFILES(); + WRITETODATAFILES(h, commitJob._ab); debugValidateAllMapsMatch(); commitJob.reset(); @@ -613,6 +612,7 @@ namespace mongo { } /** locking: in read lock when called + or, for early commits (commitIfNeeded), in write lock @see MongoMMF::close() */ static void groupCommit() { @@ -686,29 +686,41 @@ namespace mongo { } extern int groupCommitIntervalMs; + filesystem::path getJournalDir(); void durThread() { Client::initThread("journal"); + + bool samePartition = true; + try { + const string dbpathDir = boost::filesystem::path(dbpath).native_directory_string(); + samePartition = onSamePartition(getJournalDir().string(), dbpathDir); + } + catch(...) { + } + while( !inShutdown() ) { RACECHECK + + unsigned ms = cmdLine.journalCommitInterval; + if( ms == 0 ) { + // use default + ms = samePartition ? 100 : 30; + } + + unsigned oneThird = (ms / 3) + 1; // +1 so never zero + try { - int millis = groupCommitIntervalMs; - { - stats.rotate(); - { - Timer t; - journalRotate(); // note we do this part outside of mongomutex - millis -= t.millis(); - wassert( millis <= groupCommitIntervalMs ); // race if groupCommitIntervalMs was changing by another thread so wassert - if( millis < 2 ) - millis = 2; - } + stats.rotate(); - // we do this in a couple blocks, which makes it a tiny bit faster (only a little) on throughput, - // but is likely also less spiky on our cpu usage, which is good: - sleepmillis(millis/2); - commitJob.wi()._deferred.invoke(); - sleepmillis(millis/2); + // we do this in a couple blocks (the invoke()), which makes it a tiny bit faster (only a little) on throughput, + // but is likely also less spiky on our cpu usage, which is good. + + // commit sooner if one or more getLastError j:true is pending + for( unsigned i = 1; i <= 2; i++ ) { + sleepmillis(oneThird); + if( commitJob._notify.nWaiting() ) + break; commitJob.wi()._deferred.invoke(); } @@ -772,6 +784,13 @@ namespace mongo { void DurableImpl::syncDataAndTruncateJournal() { dbMutex.assertWriteLocked(); + // a commit from the commit thread won't begin while we are in the write lock, + // but it may already be in progress and the end of that work is done outside + // (dbMutex) locks. This line waits for that to complete if already underway. + { + scoped_lock lk(groupCommitMutex); + } + groupCommit(); MongoFile::flushAll(true); journalCleanup(); diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp index f85dda32b51..0a1bc5ebbad 100644 --- a/db/dur_journal.cpp +++ b/db/dur_journal.cpp @@ -34,6 +34,7 @@ #include "../util/file.h" #include "../util/checksum.h" #include "../util/concurrency/race.h" +#include "../util/compress.h" using namespace mongoutils; @@ -92,6 +93,11 @@ namespace mongo { assert(false); } + JSectFooter::JSectFooter() { + memset(this, 0, sizeof(*this)); + sentinel = JEntry::OpCode_Footer; + } + JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash sentinel = JEntry::OpCode_Footer; reserved = 0; @@ -103,6 +109,10 @@ namespace mongo { } bool JSectFooter::checkHash(const void* begin, int len) const { + if( !magicOk() ) { + log() << "journal footer not valid" << endl; + return false; + } Checksum c; c.gen(begin, len); DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(c.bytes, 16) << endl; @@ -317,13 +327,13 @@ namespace mongo { void preallocateFiles() { if( exists(getJournalDir()/"prealloc.0") || // if enabled previously, keep using - exists(getJournalDir()/"prealloc.1") || - preallocateIsFaster() ) { + exists(getJournalDir()/"prealloc.1") || + ( cmdLine.preallocj && preallocateIsFaster() ) ) { usingPreallocate = true; try { _preallocateFiles(); } - catch(...) { + catch(...) { log() << "warning caught exception in preallocateFiles, continuing" << endl; } } @@ -343,10 +353,12 @@ namespace mongo { { // zero the header File f; - f.open(temppath.string().c_str(), false, true); + f.open(temppath.string().c_str(), false, false); char buf[8192]; memset(buf, 0, 8192); f.write(0, buf, 8192); + f.truncate(DataLimitPerJournalFile); + f.fsync(); } boost::filesystem::rename(temppath, filepath); return; @@ -471,12 +483,6 @@ namespace mongo { /** called during recovery (the error message text below assumes that) */ unsigned long long journalReadLSN() { - if( !debug ) { - // in nondebug build, for now, be conservative until more tests written, and apply the whole journal. - // however we will still write the lsn file to exercise that code, and use in _DEBUG build. - return 0; - } - if( !MemoryMappedFile::exists(lsnPath()) ) { log() << "info no lsn file in journal/ directory" << endl; return 0; @@ -595,15 +601,7 @@ namespace mongo { j._ageOut = a; } - /** check if time to rotate files. assure a file is open. - done separately from the journal() call as we can do this part - outside of lock. - thread: durThread() - */ - void journalRotate() { - j.rotate(); - } - void Journal::rotate() { + void Journal::_rotate() { assert( !dbMutex.atLeastReadLocked() ); RACECHECK @@ -618,6 +616,7 @@ namespace mongo { return; if( _curLogFile ) { + _curLogFile->truncate(); closeCurrentJournalFile(); removeUnneededJournalFiles(); } @@ -636,24 +635,74 @@ namespace mongo { } } - /** write to journal + /** write (append) the buffer we have built to the journal and fsync it. + outside of dbMutex lock as this could be slow. + @param uncompressed - a buffer that will be written to the journal after compression + will not return until on disk */ - void journal(const AlignedBuilder& b) { - j.journal(b); + void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed) { + Timer t; + j.journal(h, uncompressed); + stats.curr->_writeToJournalMicros += t.micros(); } - void Journal::journal(const AlignedBuilder& b) { + void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) { + RACECHECK + static AlignedBuilder b(32*1024*1024); + /* buffer to journal will be + JSectHeader + compressed operations + JSectFooter + */ + const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter); + const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize; + b.reset(max); + + { + dassert( h.sectionLen() == (unsigned) 0xffffffff ); // we will backfill later + b.appendStruct(h); + } + + size_t compressedLength = 0; + rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength); + assert( compressedLength < 0xffffffff ); + assert( compressedLength < max ); + b.skip(compressedLength); + + // footer + unsigned L = 0xffffffff; + { + // pad to alignment, and set the total section length in the JSectHeader + assert( 0xffffe000 == (~(Alignment-1)) ); + unsigned lenUnpadded = b.len() + sizeof(JSectFooter); + L = (lenUnpadded + Alignment-1) & (~(Alignment-1)); + dassert( L >= lenUnpadded ); + + ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded); + + JSectFooter f(b.buf(), b.len()); // computes checksum + b.appendStruct(f); + dassert( b.len() == lenUnpadded ); + + b.skip(L - lenUnpadded); + dassert( b.len() % Alignment == 0 ); + } + try { mutex::scoped_lock lk(_curLogFileMutex); // must already be open -- so that _curFileId is correct for previous buffer building assert( _curLogFile ); - stats.curr->_journaledBytes += b.len(); - _written += b.len(); - _curLogFile->synchronousAppend((void *) b.buf(), b.len()); + stats.curr->_uncompressedBytes += b.len(); + unsigned w = b.len(); + _written += w; + assert( w <= L ); + stats.curr->_journaledBytes += L; + _curLogFile->synchronousAppend((const void *) b.buf(), L); + _rotate(); } catch(std::exception& e) { - log() << "warning exception in dur::journal " << e.what() << endl; + log() << "error exception in dur::journal " << e.what() << endl; throw; } } diff --git a/db/dur_journal.h b/db/dur_journal.h index e8e3dfd1465..664f63942e0 100644 --- a/db/dur_journal.h +++ b/db/dur_journal.h @@ -28,7 +28,8 @@ namespace mongo { extern bool okToCleanUp; /** at termination after db files closed & fsynced - also after covery + also after recovery + closes and removes journal files @param log report in log that we are cleaning up if we actually do any work */ void journalCleanup(bool log = false); @@ -43,12 +44,6 @@ namespace mongo { */ void journalRotate(); - /** write/append to journal file * - @param buf - a buffer that will be written to the journal. - will not return until on disk - */ - void journal(const AlignedBuilder& buf); - /** flag that something has gone wrong during writing to the journal (not for recovery mode) */ @@ -67,5 +62,7 @@ namespace mongo { // in case disk controller buffers writes const long long ExtraKeepTimeMs = 10000; + const unsigned JournalCommitIntervalDefault = 100; + } } diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h index 72587ccd7b6..10ed8487b71 100644 --- a/db/dur_journalformat.h +++ b/db/dur_journalformat.h @@ -22,6 +22,8 @@ namespace mongo { namespace dur { + const unsigned Alignment = 8192; + #pragma pack(1) /** beginning header for a journal/j._<n> file there is nothing important int this header at this time. except perhaps version #. @@ -34,7 +36,11 @@ namespace mongo { // x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near // that. simply incrementing the version # is safe on a fwd basis. +#if defined(_NOCOMPRESS) enum { CurrentVersion = 0x4148 }; +#else + enum { CurrentVersion = 0x4149 }; +#endif unsigned short _version; // these are just for diagnostic ease (make header more useful as plain text) @@ -55,11 +61,25 @@ namespace mongo { /** "Section" header. A section corresponds to a group commit. len is length of the entire section including header and footer. + header and footer are not compressed, just the stuff in between. */ struct JSectHeader { - unsigned len; // length in bytes of the whole section + private: + unsigned _sectionLen; // unpadded length in bytes of the whole section + public: unsigned long long seqNumber; // sequence number that can be used on recovery to not do too much work unsigned long long fileId; // matches JHeader::fileId + unsigned sectionLen() const { return _sectionLen; } + + // we store the unpadded length so we can use that when we uncompress. to + // get the true total size this must be rounded up to the Alignment. + void setSectionLen(unsigned lenUnpadded) { _sectionLen = lenUnpadded; } + + unsigned sectionLenWithPadding() const { + unsigned x = (sectionLen() + (Alignment-1)) & (~(Alignment-1)); + dassert( x % Alignment == 0 ); + return x; + } }; /** an individual write operation within a group commit section. Either the entire section should @@ -111,6 +131,7 @@ namespace mongo { /** group commit section footer. md5 is a key field. */ struct JSectFooter { + JSectFooter(); JSectFooter(const void* begin, int len); // needs buffer to compute hash unsigned sentinel; unsigned char hash[16]; @@ -123,6 +144,8 @@ namespace mongo { @return true if buffer looks valid */ bool checkHash(const void* begin, int len) const; + + bool magicOk() const { return *((unsigned*)magic) == 0x0a0a0a0a; } }; /** declares "the next entry(s) are for this database / file path prefix" */ diff --git a/db/dur_journalimpl.h b/db/dur_journalimpl.h index e436eae45f1..bf771c5d768 100644 --- a/db/dur_journalimpl.h +++ b/db/dur_journalimpl.h @@ -18,6 +18,7 @@ #pragma once +#include "dur_journalformat.h" #include "../util/logfile.h" namespace mongo { @@ -40,14 +41,14 @@ namespace mongo { */ void rotate(); - /** write to journal + /** append to the journal file */ - void journal(const AlignedBuilder& b); + void journal(const JSectHeader& h, const AlignedBuilder& b); boost::filesystem::path getFilePathFor(int filenumber) const; unsigned long long lastFlushTime() const { return _lastFlushTime; } - void cleanup(bool log); + void cleanup(bool log); // closes and removes journal files unsigned long long curFileId() const { return _curFileId; } @@ -61,6 +62,11 @@ namespace mongo { void open(); private: + /** check if time to rotate files. assure a file is open. + * internally called with every commit + */ + void _rotate(); + void _open(); void closeCurrentJournalFile(); void removeUnneededJournalFiles(); diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp index 5851e415408..0d8ef3688db 100644 --- a/db/dur_preplogbuffer.cpp +++ b/db/dur_preplogbuffer.cpp @@ -60,7 +60,7 @@ namespace mongo { size_t ofs = 1; MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs); - _IF( !mmf->willNeedRemap() ) { + if( unlikely(!mmf->willNeedRemap()) ) { // tag this mmf as needed a remap of its private view later. // usually it will already be dirty/already set, so we do the if above first // to avoid possibility of cpu cache line contention @@ -97,7 +97,7 @@ namespace mongo { #endif bb.appendBuf(i->start(), e.len); - _IF (e.len != (unsigned)i->length()) { + if (unlikely(e.len != (unsigned)i->length())) { log() << "journal info splitting prepBasicWrite at boundary" << endl; // This only happens if we write to the last byte in a file and @@ -120,40 +120,25 @@ namespace mongo { // each time events switch to a different database we journal a JDbContext RelativePath lastDbPath; - set<WriteIntent>::iterator i = commitJob.writes().begin(); - - const WriteIntent *w = &(*i); - while(1) { - i++; - const WriteIntent *next = 0; - IF( i != commitJob.writes().end() ) { - next = &(*i); - PREFETCH(next); - } - prepBasicWrite_inlock(bb, w, lastDbPath); - _IF( next == 0 ) - break; - w = next; - }; + for( set<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) { + prepBasicWrite_inlock(bb, &(*i), lastDbPath); + } } - void resetLogBuffer(AlignedBuilder& bb) { + void resetLogBuffer(/*out*/JSectHeader& h, AlignedBuilder& bb) { bb.reset(); - // JSectHeader - JSectHeader h; - h.len = (unsigned) 0xffffffff; // total length, will fill in later + h.setSectionLen(0xffffffff); // total length, will fill in later h.seqNumber = getLastDataFileFlushTime(); h.fileId = j.curFileId(); - - bb.appendStruct(h); } /** we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this caller handles locking + @return partially populated sectheader and _ab set */ - void _PREPLOGBUFFER() { + void _PREPLOGBUFFER(JSectHeader& h) { assert( cmdLine.dur ); { @@ -165,7 +150,7 @@ namespace mongo { } AlignedBuilder& bb = commitJob._ab; - resetLogBuffer(bb); + resetLogBuffer(h, bb); // adds JSectHeader // ops other than basic writes (DurOp's) { @@ -174,34 +159,14 @@ namespace mongo { } } - { - prepBasicWrites(bb); - } - - // pad to alignment, and set the total section length in the JSectHeader - assert( 0xffffe000 == (~(Alignment-1)) ); - unsigned lenWillBe = bb.len() + sizeof(JSectFooter); - unsigned L = (lenWillBe + Alignment-1) & (~(Alignment-1)); - dassert( L >= lenWillBe ); - *((unsigned*)bb.atOfs(0)) = L; - - { - JSectFooter f(bb.buf(), bb.len()); - bb.appendStruct(f); - } - - { - unsigned padding = L - bb.len(); - bb.skip(padding); - dassert( bb.len() % Alignment == 0 ); - } + prepBasicWrites(bb); return; } - void PREPLOGBUFFER() { + void PREPLOGBUFFER(/*out*/ JSectHeader& h) { Timer t; j.assureLogFileOpen(); // so fileId is set - _PREPLOGBUFFER(); + _PREPLOGBUFFER(h); stats.curr->_prepLogBufferMicros += t.micros(); } diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp index 2e1516914f1..1e719c0070d 100644 --- a/db/dur_recover.cpp +++ b/db/dur_recover.cpp @@ -27,6 +27,7 @@ #include "namespace.h" #include "../util/mongoutils/str.h" #include "../util/bufreader.h" +#include "../util/concurrency/race.h" #include "pdfile.h" #include "database.h" #include "db.h" @@ -35,6 +36,7 @@ #include "cmdline.h" #include "curop.h" #include "mongommf.h" +#include "../util/compress.h" #include <sys/stat.h> #include <fcntl.h> @@ -92,59 +94,73 @@ namespace mongo { throws */ class JournalSectionIterator : boost::noncopyable { + auto_ptr<BufReader> _entries; + const JSectHeader _h; + const char *_lastDbName; // pointer into mmaped journal file + const bool _doDurOps; + string _uncompressed; public: - JournalSectionIterator(const void *p, unsigned len, bool doDurOps) - : _br(p, len) - , _sectHead(static_cast<const JSectHeader*>(_br.skip(sizeof(JSectHeader)))) - , _lastDbName(NULL) - , _doDurOps(doDurOps) - {} + JournalSectionIterator(const JSectHeader& h, const void *compressed, unsigned compressedLen, bool doDurOpsRecovering) : + _h(h), + _lastDbName(0) + , _doDurOps(doDurOpsRecovering) + { + assert( doDurOpsRecovering ); + bool ok = uncompress((const char *)compressed, compressedLen, &_uncompressed); + if( !ok ) { + // it should always be ok (i think?) as there is a previous check to see that the JSectFooter is ok + log() << "couldn't uncompress journal section" << endl; + msgasserted(15874, "couldn't uncompress journal section"); + } + const char *p = _uncompressed.c_str(); + assert( compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader) ); + _entries = auto_ptr<BufReader>( new BufReader(p, _uncompressed.size()) ); + } + + // we work with the uncompressed buffer when doing a WRITETODATAFILES (for speed) + JournalSectionIterator(const JSectHeader &h, const void *p, unsigned len) : + _entries( new BufReader((const char *) p, len) ), + _h(h), + _lastDbName(0) + , _doDurOps(false) - bool atEof() const { return _br.atEof(); } + { } - unsigned long long seqNumber() const { return _sectHead->seqNumber; } + bool atEof() const { return _entries->atEof(); } + + unsigned long long seqNumber() const { return _h.seqNumber; } /** get the next entry from the log. this function parses and combines JDbContext and JEntry's. - * @return true if got an entry. false at successful end of section (and no entry returned). * throws on premature end of section. */ - bool next(ParsedJournalEntry& e) { + void next(ParsedJournalEntry& e) { unsigned lenOrOpCode; - _br.read(lenOrOpCode); + _entries->read(lenOrOpCode); if (lenOrOpCode > JEntry::OpCode_Min) { switch( lenOrOpCode ) { case JEntry::OpCode_Footer: { - if (_doDurOps) { - const char* pos = (const char*) _br.pos(); - pos -= sizeof(lenOrOpCode); // rewind to include OpCode - const JSectFooter& footer = *(const JSectFooter*)pos; - int len = pos - (char*)_sectHead; - if (!footer.checkHash(_sectHead, len)) { - massert(13594, "journal checksum doesn't match", false); - } - } - return false; // false return value denotes end of section + assert( false ); } case JEntry::OpCode_FileCreated: case JEntry::OpCode_DropDb: { e.dbName = 0; - boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, _br); + boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries); if (_doDurOps) { e.op = op; } - return true; + return; } case JEntry::OpCode_DbContext: { - _lastDbName = (const char*) _br.pos(); - const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _br.remaining()); + _lastDbName = (const char*) _entries->pos(); + const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _entries->remaining()); const unsigned len = strnlen(_lastDbName, limit); massert(13533, "problem processing journal file during recovery", _lastDbName[len] == '\0'); - _br.skip(len+1); // skip '\0' too - _br.read(lenOrOpCode); + _entries->skip(len+1); // skip '\0' too + _entries->read(lenOrOpCode); // read this for the fall through } // fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet @@ -156,18 +172,13 @@ namespace mongo { // JEntry - a basic write assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min ); - _br.rewind(4); - e.e = (JEntry *) _br.skip(sizeof(JEntry)); + _entries->rewind(4); + e.e = (JEntry *) _entries->skip(sizeof(JEntry)); e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName; assert( e.e->len == lenOrOpCode ); - _br.skip(e.e->len); - return true; + _entries->skip(e.e->len); } - private: - BufReader _br; - const JSectHeader* _sectHead; - const char *_lastDbName; // pointer into mmaped journal file - const bool _doDurOps; + }; static string fileName(const char* dbName, int fileNo) { @@ -289,27 +300,64 @@ namespace mongo { log() << "END section" << endl; } - void RecoveryJob::processSection(const void *p, unsigned len) { + void RecoveryJob::processSection(const JSectHeader *h, const void *p, unsigned len, const JSectFooter *f) { scoped_lock lk(_mx); + RACECHECK + + /** todo: we should really verify the checksum to see that seqNumber is ok? + that is expensive maybe there is some sort of checksum of just the header + within the header itself + */ + if( _recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs ) { + if( h->seqNumber != _lastSeqMentionedInConsoleLog ) { + static int n; + if( ++n < 10 ) { + log() << "recover skipping application of section seq:" << h->seqNumber << " < lsn:" << _lastDataSyncedFromLastRun << endl; + } + else if( n == 10 ) { + log() << "recover skipping application of section more..." << endl; + } + _lastSeqMentionedInConsoleLog = h->seqNumber; + } + return; + } - vector<ParsedJournalEntry> entries; - JournalSectionIterator i(p, len, _recovering); + auto_ptr<JournalSectionIterator> i; + if( _recovering ) { + i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering)); + } + else { + i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, /*after header*/p, /*w/out header*/len)); + } - //DEV log() << "recovery processSection seq:" << i.seqNumber() << endl; - if( _recovering && _lastDataSyncedFromLastRun > i.seqNumber() + ExtraKeepTimeMs ) { - if( i.seqNumber() != _lastSeqMentionedInConsoleLog ) { - log() << "recover skipping application of section seq:" << i.seqNumber() << " < lsn:" << _lastDataSyncedFromLastRun << endl; - _lastSeqMentionedInConsoleLog = i.seqNumber(); + // we use a static so that we don't have to reallocate every time through. occasionally we + // go back to a small allocation so that if there were a spiky growth it won't stick forever. + static vector<ParsedJournalEntry> entries; + entries.clear(); +/** TEMP uncomment + RARELY OCCASIONALLY { + if( entries.capacity() > 2048 ) { + entries.shrink_to_fit(); + entries.reserve(2048); } - return; } +*/ // first read all entries to make sure this section is valid ParsedJournalEntry e; - while( i.next(e) ) { + while( !i->atEof() ) { + i->next(e); entries.push_back(e); } + // after the entries check the footer checksum + if( _recovering ) { + assert( ((const char *)h) + sizeof(JSectHeader) == p ); + if( !f->checkHash(h, len + sizeof(JSectHeader)) ) { + msgasserted(13594, "journal checksum doesn't match"); + } + } + // got all the entries for one group commit. apply them: applyEntries(entries); } @@ -345,11 +393,16 @@ namespace mongo { if( h.fileId != fileId ) { if( debug || (cmdLine.durOptions & CmdLine::DurDumpJournal) ) { log() << "Ending processFileBuffer at differing fileId want:" << fileId << " got:" << h.fileId << endl; - log() << " sect len:" << h.len << " seqnum:" << h.seqNumber << endl; + log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl; } return true; } - processSection(br.skip(h.len), h.len); + unsigned slen = h.sectionLen(); + unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter); + const char *hdr = (const char *) br.skip(h.sectionLenWithPadding()); + const char *data = hdr + sizeof(JSectHeader); + const char *footer = data + dataLen; + processSection((const JSectHeader*) hdr, data, dataLen, (const JSectFooter*) footer); // ctrl c check killCurrentOp.checkForInterrupt(false); @@ -367,6 +420,17 @@ namespace mongo { /** apply a specific journal file */ bool RecoveryJob::processFile(path journalfile) { log() << "recover " << journalfile.string() << endl; + + try { + if( boost::filesystem::file_size( journalfile.string() ) == 0 ) { + log() << "recover info " << journalfile.string() << " has zero length" << endl; + return true; + } + } catch(...) { + // if something weird like a permissions problem keep going so the massert down below can happen (presumably) + log() << "recover exception checking filesize" << endl; + } + MemoryMappedFile f; void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL); massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p); @@ -382,13 +446,19 @@ namespace mongo { _lastDataSyncedFromLastRun = journalReadLSN(); log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl; + // todo: we could truncate the journal file at rotation time to the right length, then this abruptEnd + // check can be turned back on. this is relevant when prealloc is being used. for( unsigned i = 0; i != files.size(); ++i ) { - /*bool abruptEnd = */processFile(files[i]); - /*if( abruptEnd && i+1 < files.size() ) { + bool abruptEnd = processFile(files[i]); + if( abruptEnd && i+1 < files.size() ) { +#if 1 // Leaving this as a warning for now. TODO: make this an error post 2.0 + log() << "recover warning: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl; +#else log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl; close(); uasserted(13535, "recover abrupt journal file end"); - }*/ +#endif + } } close(); diff --git a/db/dur_recover.h b/db/dur_recover.h index b5a922b498a..955e730ea05 100644 --- a/db/dur_recover.h +++ b/db/dur_recover.h @@ -2,6 +2,7 @@ #pragma once +#include "dur_journalformat.h" #include "../util/concurrency/mutex.h" #include "../util/file.h" @@ -15,10 +16,14 @@ namespace mongo { */ class RecoveryJob : boost::noncopyable { public: - RecoveryJob() :_lastDataSyncedFromLastRun(0), _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; } + RecoveryJob() : _lastDataSyncedFromLastRun(0), + _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; } void go(vector<path>& files); ~RecoveryJob(); - void processSection(const void *, unsigned len); + + /** @param data data between header and footer. compressed if recovering. */ + void processSection(const JSectHeader *h, const void *data, unsigned len, const JSectFooter *f); + void close(); // locks and calls _close() static RecoveryJob & get() { return _instance; } diff --git a/db/dur_stats.h b/db/dur_stats.h index d4943c01cb3..50a26d1f215 100644 --- a/db/dur_stats.h +++ b/db/dur_stats.h @@ -20,6 +20,7 @@ namespace mongo { unsigned _commits; unsigned _earlyCommits; // count of early commits from commitIfNeeded() or from getDur().commitNow() unsigned long long _journaledBytes; + unsigned long long _uncompressedBytes; unsigned long long _writeToDataFilesBytes; unsigned long long _prepLogBufferMicros; diff --git a/db/dur_writetodatafiles.cpp b/db/dur_writetodatafiles.cpp index cdccb018d83..6724f0731aa 100644 --- a/db/dur_writetodatafiles.cpp +++ b/db/dur_writetodatafiles.cpp @@ -47,9 +47,9 @@ namespace mongo { @see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en */ - void WRITETODATAFILES_Impl1() { + void WRITETODATAFILES_Impl1(const JSectHeader& h, AlignedBuilder& uncompressed) { RWLockRecursive::Shared lk(MongoFile::mmmutex); - RecoveryJob::get().processSection(commitJob._ab.buf(), commitJob._ab.len()); + RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), 0); } #if 0 @@ -81,16 +81,14 @@ namespace mongo { #endif // concurrency: in mmmutex, not necessarily in dbMutex - void WRITETODATAFILES() { + void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed) { Timer t; #if defined(_EXPERIMENTAL) WRITETODATAFILES_Impl3(); #else - WRITETODATAFILES_Impl1(); + WRITETODATAFILES_Impl1(h, uncompressed); #endif stats.curr->_writeToDataFilesMicros += t.micros(); - - } } diff --git a/db/durop.h b/db/durop.h index c4574c2e3cb..9ab1bfcbede 100644 --- a/db/durop.h +++ b/db/durop.h @@ -28,8 +28,6 @@ namespace mongo { namespace dur { - const unsigned Alignment = 8192; - /** DurOp - Operations we journal that aren't just basic writes. * * Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent. diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp index 21b0eaa6601..9b762b260de 100644 --- a/db/geo/2d.cpp +++ b/db/geo/2d.cpp @@ -138,7 +138,11 @@ namespace mongo { GeoHash b = a; b.move(1, 1); - _error = distance(a, b); + // Epsilon is 1/100th of a bucket size + // TODO: Can we actually find error bounds for the sqrt function? + double epsilon = 0.001 / _scaling; + _error = distance(a, b) + epsilon; + // Error in radians _errorSphere = deg2rad( _error ); } @@ -293,6 +297,14 @@ namespace mongo { } + BSONObj _fromBSONHash( const BSONElement& e ) const { + return _unhash( _tohash( e ) ); + } + + BSONObj _fromBSONHash( const BSONObj& o ) const { + return _unhash( _tohash( o.firstElement() ) ); + } + GeoHash _tohash( const BSONElement& e ) const { if ( e.isABSONObj() ) return _hash( e.embeddedObject() ); @@ -368,6 +380,10 @@ namespace mongo { } double sizeEdge( const GeoHash& a ) const { + + if( ! a.constrains() ) + return _max - _min; + double ax,ay,bx,by; GeoHash b = a; b.move( 1 , 1 ); @@ -443,6 +459,10 @@ namespace mongo { Box() {} + BSONArray toBSON() const { + return BSON_ARRAY( BSON_ARRAY( _min._x << _min._y ) << BSON_ARRAY( _max._x << _max._y ) ); + } + string toString() const { StringBuilder buf(64); buf << _min.toString() << " -->> " << _max.toString(); @@ -630,8 +650,8 @@ namespace mongo { } else if( fudge == 0 ){ - if( p._y == p1._y && p._x == p1._x ) return true; - else if( p._y == p2._y && p._x == p2._x ) return true; + if( p._y == p1._y && p._x == p1._x ) return true; + else if( p._y == p2._y && p._x == p2._x ) return true; } // Normal intersection test. @@ -742,293 +762,96 @@ namespace mongo { geo2dplugin.getName(); } - struct GeoUnitTest : public UnitTest { - - int round( double d ) { - return (int)(.5+(d*1000)); - } - -#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); } - - void run() { - assert( ! GeoHash::isBitSet( 0 , 0 ) ); - assert( ! GeoHash::isBitSet( 0 , 31 ) ); - assert( GeoHash::isBitSet( 1 , 31 ) ); - - IndexSpec i( BSON( "loc" << "2d" ) ); - Geo2dType g( &geo2dplugin , &i ); - { - double x = 73.01212; - double y = 41.352964; - BSONObj in = BSON( "x" << x << "y" << y ); - GeoHash h = g._hash( in ); - BSONObj out = g._unhash( h ); - assert( round(x) == round( out["x"].number() ) ); - assert( round(y) == round( out["y"].number() ) ); - assert( round( in["x"].number() ) == round( out["x"].number() ) ); - assert( round( in["y"].number() ) == round( out["y"].number() ) ); - } - - { - double x = -73.01212; - double y = 41.352964; - BSONObj in = BSON( "x" << x << "y" << y ); - GeoHash h = g._hash( in ); - BSONObj out = g._unhash( h ); - assert( round(x) == round( out["x"].number() ) ); - assert( round(y) == round( out["y"].number() ) ); - assert( round( in["x"].number() ) == round( out["x"].number() ) ); - assert( round( in["y"].number() ) == round( out["y"].number() ) ); - } - - { - GeoHash h( "0000" ); - h.move( 0 , 1 ); - GEOHEQ( h , "0001" ); - h.move( 0 , -1 ); - GEOHEQ( h , "0000" ); - - h.init( "0001" ); - h.move( 0 , 1 ); - GEOHEQ( h , "0100" ); - h.move( 0 , -1 ); - GEOHEQ( h , "0001" ); - - - h.init( "0000" ); - h.move( 1 , 0 ); - GEOHEQ( h , "0010" ); - } - - { - Box b( 5 , 5 , 2 ); - assert( "(5,5) -->> (7,7)" == b.toString() ); - } - - { - GeoHash a = g.hash( 1 , 1 ); - GeoHash b = g.hash( 4 , 5 ); - assert( 5 == (int)(g.distance( a , b ) ) ); - a = g.hash( 50 , 50 ); - b = g.hash( 42 , 44 ); - assert( round(10) == round(g.distance( a , b )) ); - } - - { - GeoHash x("0000"); - assert( 0 == x.getHash() ); - x.init( 0 , 1 , 32 ); - GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" ) - - assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) ); - assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) ); - } - - { - GeoHash x("1010"); - GEOHEQ( x , "1010" ); - GeoHash y = x + "01"; - GEOHEQ( y , "101001" ); - } - - { - - GeoHash a = g.hash( 5 , 5 ); - GeoHash b = g.hash( 5 , 7 ); - GeoHash c = g.hash( 100 , 100 ); - /* - cout << "a: " << a << endl; - cout << "b: " << b << endl; - cout << "c: " << c << endl; - - cout << "a: " << a.toStringHex1() << endl; - cout << "b: " << b.toStringHex1() << endl; - cout << "c: " << c.toStringHex1() << endl; - */ - BSONObj oa = a.wrap(); - BSONObj ob = b.wrap(); - BSONObj oc = c.wrap(); - /* - cout << "a: " << oa.hexDump() << endl; - cout << "b: " << ob.hexDump() << endl; - cout << "c: " << oc.hexDump() << endl; - */ - assert( oa.woCompare( ob ) < 0 ); - assert( oa.woCompare( oc ) < 0 ); - - } - - { - GeoHash x( "000000" ); - x.move( -1 , 0 ); - GEOHEQ( x , "101010" ); - x.move( 1 , -1 ); - GEOHEQ( x , "010101" ); - x.move( 0 , 1 ); - GEOHEQ( x , "000000" ); - } - { - GeoHash prefix( "110011000000" ); - GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" ); - assert( ! entry.hasPrefix( prefix ) ); - entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000"); - assert( entry.toString().find( prefix.toString() ) == 0 ); - assert( entry.hasPrefix( GeoHash( "1100" ) ) ); - assert( entry.hasPrefix( prefix ) ); - } - - { - GeoHash a = g.hash( 50 , 50 ); - GeoHash b = g.hash( 48 , 54 ); - assert( round( 4.47214 ) == round( g.distance( a , b ) ) ); - } - - - { - Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) ); - assert( b.inside( 29.763 , -95.363 ) ); - assert( ! b.inside( 32.9570255 , -96.1082497 ) ); - assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) ); - } - - { - GeoHash a( "11001111" ); - assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) ); - assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) ); - } - - { - int N = 10000; - { - Timer t; - for ( int i=0; i<N; i++ ) { - unsigned x = (unsigned)rand(); - unsigned y = (unsigned)rand(); - GeoHash h( x , y ); - unsigned a,b; - h.unhash_slow( a,b ); - assert( a == x ); - assert( b == y ); - } - //cout << "slow: " << t.millis() << endl; - } - - { - Timer t; - for ( int i=0; i<N; i++ ) { - unsigned x = (unsigned)rand(); - unsigned y = (unsigned)rand(); - GeoHash h( x , y ); - unsigned a,b; - h.unhash_fast( a,b ); - assert( a == x ); - assert( b == y ); - } - //cout << "fast: " << t.millis() << endl; - } - - } - - { - // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example - - { - Point BNA (-86.67, 36.12); - Point LAX (-118.40, 33.94); + class GeoHopper; - double dist1 = spheredist_deg(BNA, LAX); - double dist2 = spheredist_deg(LAX, BNA); + class GeoPoint { + public: - // target is 0.45306 - assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); - assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); - } - { - Point BNA (-1.5127, 0.6304); - Point LAX (-2.0665, 0.5924); + GeoPoint() : _distance( -1 ), _exact( false ) + {} - double dist1 = spheredist_rad(BNA, LAX); - double dist2 = spheredist_rad(LAX, BNA); + //// Distance not used //// - // target is 0.45306 - assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); - assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); - } - { - Point JFK (-73.77694444, 40.63861111 ); - Point LAX (-118.40, 33.94); + GeoPoint( const GeoKeyNode& node ) + : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( -1 ) , _exact( false ) { + } - double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES; - assert( dist > 2469 && dist < 2470 ); - } + //// Immediate initialization of distance //// - { - Point BNA (-86.67, 36.12); - Point LAX (-118.40, 33.94); - Point JFK (-73.77694444, 40.63861111 ); - assert( spheredist_deg(BNA, BNA) < 1e-6); - assert( spheredist_deg(LAX, LAX) < 1e-6); - assert( spheredist_deg(JFK, JFK) < 1e-6); + GeoPoint( const GeoKeyNode& node, double distance, bool exact ) + : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( distance ), _exact( exact ) { + } - Point zero (0, 0); - Point antizero (0,-180); + GeoPoint( const GeoPoint& pt, double distance, bool exact ) + : _key( pt.key() ) , _loc( pt.loc() ) , _o( pt.obj() ), _distance( distance ), _exact( exact ) { + } - // these were known to cause NaN - assert( spheredist_deg(zero, zero) < 1e-6); - assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6); - assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6); - } - } + bool operator<( const GeoPoint& other ) const { + if( _distance != other._distance ) return _distance < other._distance; + if( _exact != other._exact ) return _exact < other._exact; + return _loc < other._loc; } - } geoUnitTest; - class GeoHopper; + double distance() const { + return _distance; + } - class GeoPoint { - public: - GeoPoint() { } + bool isExact() const { + return _exact; + } - //// Distance not used //// + BSONObj key() const { + return _key; + } - GeoPoint( const GeoKeyNode& node ) - : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _exactDistance( -1 ), _exactWithin( false ) { + DiskLoc loc() const { + return _loc; } - - //// Immediate initialization of exact distance //// - GeoPoint( const GeoKeyNode& node , double exactDistance, bool exactWithin ) - : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _exactDistance( exactDistance ), _exactWithin( exactWithin ) { + BSONObj obj() const { + return _o; } - bool operator<( const GeoPoint& other ) const { - return _exactDistance < other._exactDistance; + BSONObj pt() const { + return _pt; } - bool isEmpty() const { + bool isEmpty() { return _o.isEmpty(); } string toString() const { - return str::stream() << "Point from " << _o.toString() << " dist : " << _exactDistance << " within ? " << _exactWithin; + return str::stream() << "Point from " << _o << " dist : " << _distance << ( _exact ? " (ex)" : " (app)" ); } BSONObj _key; DiskLoc _loc; BSONObj _o; + BSONObj _pt; - double _exactDistance; - bool _exactWithin; + double _distance; + bool _exact; }; // GeoBrowse subclasses this class GeoAccumulator { public: - GeoAccumulator( const Geo2dType * g , const BSONObj& filter ) - : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) { + GeoAccumulator( const Geo2dType * g , const BSONObj& filter, bool uniqueDocs, bool needDistance ) + : _g(g) , + _keysChecked(0) , + _lookedAt(0) , + _matchesPerfd(0) , + _objectsLoaded(0) , + _pointsLoaded(0) , + _found(0) , + _uniqueDocs( uniqueDocs ) , + _needDistance( needDistance ) + { if ( ! filter.isEmpty() ) { _matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) ); + GEODEBUG( "Matcher is now " << _matcher->docMatcher().toString() ); } } @@ -1042,6 +865,9 @@ namespace mongo { set< pair<DiskLoc,int> > _seen; public: bool seen(DiskLoc bucket, int pos) { + + _keysChecked++; + pair< set<pair<DiskLoc,int> >::iterator, bool > seenBefore = _seen.insert( make_pair(bucket,pos) ); if ( ! seenBefore.second ) { GEODEBUG( "\t\t\t\t already seen : " << bucket.toString() << ' ' << pos ); // node.key.toString() << " @ " << Point( _g, GeoHash( node.key.firstElement() ) ).toString() << " with " << node.recordLoc.obj()["_id"] ); @@ -1050,29 +876,43 @@ namespace mongo { return false; } - void add( const GeoKeyNode& node ) { + enum KeyResult { BAD, BORDER, GOOD }; + + virtual void add( const GeoKeyNode& node ) { - GEODEBUG( "\t\t\t\t checking key " << node.key.toString() ) + GEODEBUG( "\t\t\t\t checking key " << node._key.toString() ) _lookedAt++; - // distance check - double d = 0; - if ( ! checkDistance( node , d ) ) { - GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << d ); + //// + // Approximate distance check using key data + //// + double keyD = 0; + Point keyP( _g, GeoHash( node._key.firstElement(), _g->_bits ) ); + KeyResult keyOk = approxKeyCheck( keyP, keyD ); + if ( keyOk == BAD ) { + GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << keyD ); return; } - GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << d ); + GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << keyD ); + //// + // Check for match using other key (and potentially doc) criteria + //// // Remember match results for each object map<DiskLoc, bool>::iterator match = _matched.find( node.recordLoc ); bool newDoc = match == _matched.end(); if( newDoc ) { + GEODEBUG( "\t\t\t\t matching new doc with " << (_matcher ? _matcher->docMatcher().toString() : "(empty)" ) ); + // matcher MatchDetails details; if ( _matcher.get() ) { bool good = _matcher->matchesWithSingleKeyIndex( node._key , node.recordLoc , &details ); + + _matchesPerfd++; + if ( details._loadedObject ) _objectsLoaded++; @@ -1094,12 +934,50 @@ namespace mongo { return; } - addSpecific( node , d, newDoc ); - _found++; + //// + // Exact check with particular data fields + //// + // Can add multiple points + int diff = addSpecific( node , keyP, keyOk == BORDER, keyD, newDoc ); + if( diff > 0 ) _found += diff; + else _found -= -diff; + + } + + virtual void getPointsFor( const BSONObj& key, const BSONObj& obj, vector< BSONObj >& locsForNode, bool allPoints = false ){ + + // Find all the location objects from the keys + vector< BSONObj > locs; + _g->getKeys( obj, allPoints ? locsForNode : locs ); + _pointsLoaded++; + + if( allPoints ) return; + if( locs.size() == 1 ){ + locsForNode.push_back( locs[0] ); + return; + } + + // Find the particular location we want + GeoHash keyHash( key.firstElement(), _g->_bits ); + + // log() << "Hash: " << node.key << " and " << keyHash.getHash() << " unique " << _uniqueDocs << endl; + for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { + + // Ignore all locations not hashed to the key's hash, since we may see + // those later + if( _g->_hash( *i ) != keyHash ) continue; + + locsForNode.push_back( *i ); + + } + } - virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) = 0; - virtual bool checkDistance( const GeoKeyNode& node , double& d ) = 0; + virtual int addSpecific( const GeoKeyNode& node, const Point& p , bool inBounds, double d, bool newDoc ) = 0; + virtual KeyResult approxKeyCheck( const Point& p , double& keyD ) = 0; + virtual bool exactDocCheck( const Point& p , double& d ) = 0; + virtual bool expensiveExactCheck(){ return false; } + long long found() const { return _found; @@ -1109,9 +987,16 @@ namespace mongo { map<DiskLoc, bool> _matched; shared_ptr<CoveredIndexMatcher> _matcher; + long long _keysChecked; long long _lookedAt; + long long _matchesPerfd; long long _objectsLoaded; + long long _pointsLoaded; long long _found; + + bool _uniqueDocs; + bool _needDistance; + }; struct BtreeLocation { @@ -1264,8 +1149,8 @@ namespace mongo { DONE } _state; - GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() ) - : GeoCursorBase( g ), GeoAccumulator( g , filter ) , + GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj(), bool uniqueDocs = true, bool needDistance = false ) + : GeoCursorBase( g ), GeoAccumulator( g , filter, uniqueDocs, needDistance ) , _type( type ) , _filter( filter ) , _firstCall(true), _nscanned(), _centerPrefix(0, 0, 0) { // Set up the initial expand state @@ -1350,11 +1235,9 @@ namespace mongo { virtual void fillStack( int maxToCheck, int maxToAdd = -1, bool onlyExpand = false ) { #ifdef GEODEBUGGING - - int s = _state; log() << "Filling stack with maximum of " << maxToCheck << ", state : " << (int) _state << endl; - #endif + if( maxToAdd < 0 ) maxToAdd = maxToCheck; int maxFound = _foundInExp + maxToCheck; assert( maxToCheck > 0 ); @@ -1395,7 +1278,6 @@ namespace mongo { while ( true ) { GEODEBUG( "box prefix [" << _prefix << "]" ); - #ifdef GEODEBUGGING if( _prefix.constrains() ) { log() << "current expand box : " << Box( _g, _prefix ).toString() << endl; @@ -1407,6 +1289,9 @@ namespace mongo { GEODEBUG( "expanding box points... "); + // Record the prefix we're actively exploring... + _expPrefix.reset( new GeoHash( _prefix ) ); + // Find points inside this prefix while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded ); while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded ); @@ -1419,7 +1304,7 @@ namespace mongo { #endif - GEODEBUG( "finished expand, found : " << ( maxToCheck - ( maxFound - _found ) ) ); + GEODEBUG( "finished expand, found : " << ( maxToAdd - ( maxAdded - _found ) ) ); if( _foundInExp >= maxFound || _found >= maxAdded ) return; // We've searched this prefix fully, remember @@ -1429,6 +1314,7 @@ namespace mongo { if ( ! _prefix.constrains() ) { GEODEBUG( "box exhausted" ); _state = DONE; + notePrefix(); return; } @@ -1453,8 +1339,9 @@ namespace mongo { break; } - } + notePrefix(); + } // If we doeighbors if( onlyExpand ) return; @@ -1495,7 +1382,7 @@ namespace mongo { GeoHash _neighborPrefix = _centerPrefix; _neighborPrefix.move( i, j ); - GEODEBUG( "moving to " << i << " , " << j ); + GEODEBUG( "moving to " << i << " , " << j << " fringe : " << _fringe.size() ); PREFIXDEBUG( _centerPrefix, _g ); PREFIXDEBUG( _neighborPrefix , _g ); while( _fringe.size() > 0 ) { @@ -1542,7 +1429,7 @@ namespace mongo { // be entirely done. Max recurse depth is < 8 * 16. // If we're maxed out on points, return - if( _foundInExp >= maxFound ) { + if( _foundInExp >= maxFound || _found >= maxAdded ) { // Make sure we'll come back to add more points assert( _state == DOING_EXPAND ); return; @@ -1571,14 +1458,63 @@ namespace mongo { // The amount the current box overlaps our search area virtual double intersectsBox( Box& cur ) = 0; - virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) { + virtual int addSpecific( const GeoKeyNode& node , const Point& keyP , bool onBounds , double keyD , bool newDoc ) { - if( ! newDoc ) return; + int found = 0; - if ( _cur.isEmpty() ) - _cur = GeoPoint( node ); - else - _stack.push_back( GeoPoint( node ) ); + // We need to handle every possible point in this method, even those not in the key value, to + // avoid us tracking which hashes we've already seen. + if( ! newDoc ){ + // log() << "Already handled doc!" << endl; + return 0; + } + + if( _uniqueDocs && ! onBounds ) { + // log() << "Added ind to " << _type << endl; + _stack.push_front( GeoPoint( node ) ); + found++; + } + else { + // We now handle every possible point in the document, even those not in the key value, + // since we're iterating through them anyway - prevents us from having to save the hashes + // we've seen per-doc + + // If we're filtering by hash, get the original + bool expensiveExact = expensiveExactCheck(); + + vector< BSONObj > locs; + getPointsFor( node._key, node.recordLoc.obj(), locs, true ); + for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ){ + + double d = -1; + Point p( *i ); + + // We can avoid exact document checks by redoing approx checks, + // if the exact checks are more expensive. + bool needExact = true; + if( expensiveExact ){ + assert( false ); + KeyResult result = approxKeyCheck( p, d ); + if( result == BAD ) continue; + else if( result == GOOD ) needExact = false; + } + + if( ! needExact || exactDocCheck( p, d ) ){ + // log() << "Added mult to " << _type << endl; + _stack.push_front( GeoPoint( node ) ); + found++; + // If returning unique, just exit after first point is added + if( _uniqueDocs ) break; + } + } + } + + if ( _cur.isEmpty() && _stack.size() > 0 ){ + _cur = _stack.front(); + _stack.pop_front(); + } + + return found; } virtual long long nscanned() { @@ -1588,6 +1524,35 @@ namespace mongo { return _nscanned; } + virtual void explainDetails( BSONObjBuilder& b ){ + b << "keysChecked" << _keysChecked; + b << "lookedAt" << _lookedAt; + b << "matchesPerfd" << _matchesPerfd; + b << "objectsLoaded" << _objectsLoaded; + b << "pointsLoaded" << _pointsLoaded; + } + + virtual BSONObj prettyIndexBounds() const { + + vector<GeoHash>::const_iterator i = _expPrefixes.end(); + if( _expPrefixes.size() > 0 && *(--i) != *( _expPrefix.get() ) ) + _expPrefixes.push_back( *( _expPrefix.get() ) ); + + BSONObjBuilder bob; + BSONArrayBuilder bab; + for( i = _expPrefixes.begin(); i != _expPrefixes.end(); ++i ){ + bab << Box( _g, *i ).toBSON(); + } + bob << _g->_geo << bab.arr(); + + return bob.obj(); + + } + + void notePrefix() { + _expPrefixes.push_back( _prefix ); + } + string _type; BSONObj _filter; list<GeoPoint> _stack; @@ -1616,6 +1581,9 @@ namespace mongo { BtreeLocation _min; BtreeLocation _max; + shared_ptr<GeoHash> _expPrefix; + mutable vector<GeoHash> _expPrefixes; + }; @@ -1623,133 +1591,148 @@ namespace mongo { public: typedef multiset<GeoPoint> Holder; - GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN ) - : GeoBrowse( g, "search", filter ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0) + GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = true ) + : GeoBrowse( g, "search", filter, uniqueDocs, needDistance ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0) {} - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { // Always check approximate distance, since it lets us avoid doing // checks of the rest of the object if it succeeds - // TODO: Refactor so that we can check exact distance and within if we are going to - // anyway. - d = approxDistance( node ); - assert( d >= 0 ); - // Out of the error range, see how close we are to the furthest points - bool good = d <= _maxDistance + 2 * _distError /* In error range */ - && ( _points.size() < _max /* need more points */ - || d <= farthest() + 2 * _distError /* could be closer than previous points */ ); + switch (_type) { + case GEO_PLAIN: + d = _near.distance( p ); + break; + case GEO_SPHERE: + checkEarthBounds( p ); + d = spheredist_deg( _near, p ); + break; + default: assert( false ); + } + assert( d >= 0 ); GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near.toString() - << "\t" << GeoHash( node.key.firstElement() ) << "\t" << d - << " ok: " << good << " farthest: " << farthest() ); + << "\t" << p.toString() << "\t" << d + << " farthest: " << farthest() ); - return good; - } + // If we need more points + double borderDist = ( _points.size() < _max ? _maxDistance : farthest() ); + + if( d >= borderDist - 2 * _distError && d <= borderDist + 2 * _distError ) return BORDER; + else return d < borderDist ? GOOD : BAD; - double approxDistance( const GeoKeyNode& node ) { - return approxDistance( GeoHash( node._key.firstElement() ) ); } - double approxDistance( const GeoHash& h ) { + virtual bool exactDocCheck( const Point& p, double& d ){ - double approxDistance = -1; - Point p( _g, h ); - switch (_type) { + bool within = false; + + // Get the appropriate distance for the type + switch ( _type ) { case GEO_PLAIN: - approxDistance = _near.distance( p ); + d = _near.distance( p ); + within = _near.distanceWithin( p, _maxDistance ); break; case GEO_SPHERE: checkEarthBounds( p ); - approxDistance = spheredist_deg( _near, p ); + d = spheredist_deg( _near, p ); + within = ( d <= _maxDistance ); break; default: assert( false ); } - return approxDistance; + return within; } - double exactDistances( const GeoKeyNode& node ) { - - GEODEBUG( "Finding exact distance for " << node.key.toString() << " and " << node.recordLoc.obj().toString() ); - - // Find all the location objects from the keys - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); + // Always in distance units, whether radians or normal + double farthest() const { + return _farthest; + } - double maxDistance = -1; + virtual int addSpecific( const GeoKeyNode& node, const Point& keyP, bool onBounds, double keyD, bool newDoc ) { - // Find the particular location we want - BSONObj loc; - GeoHash keyHash( node._key.firstElement(), _g->_bits ); - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { + // Unique documents - loc = *i; + GeoPoint newPoint( node, keyD, false ); - // Ignore all locations not hashed to the key's hash, since we may see - // those later - if( _g->_hash( loc ) != keyHash ) continue; + int prevSize = _points.size(); - double exactDistance = -1; - bool exactWithin = false; + // STEP 1 : Remove old duplicate points from the set if needed + if( _uniqueDocs ){ - Point p( loc ); + // Lookup old point with same doc + map< DiskLoc , Holder::iterator >::iterator oldPointIt = _seenPts.find( newPoint.loc() ); - // Get the appropriate distance for the type - switch ( _type ) { - case GEO_PLAIN: - exactDistance = _near.distance( p ); - exactWithin = _near.distanceWithin( p, _maxDistance ); - break; - case GEO_SPHERE: - checkEarthBounds( p ); - exactDistance = spheredist_deg( _near, p ); - exactWithin = ( exactDistance <= _maxDistance ); - break; - default: assert( false ); + if( oldPointIt != _seenPts.end() ){ + const GeoPoint& oldPoint = *(oldPointIt->second); + // We don't need to care if we've already seen this same approx pt or better, + // or we've already gone to disk once for the point + if( oldPoint < newPoint ){ + GEODEBUG( "\t\tOld point closer than new point" ); + return 0; + } + GEODEBUG( "\t\tErasing old point " << oldPointIt->first.obj() ); + _points.erase( oldPointIt->second ); } + } - assert( exactDistance >= 0 ); - if( !exactWithin ) continue; + Holder::iterator newIt = _points.insert( newPoint ); + if( _uniqueDocs ) _seenPts[ newPoint.loc() ] = newIt; - GEODEBUG( "Inserting exact point: " << GeoPoint( node , exactDistance, exactWithin ).toString() ); + GEODEBUG( "\t\tInserted new point " << newPoint.toString() << " approx : " << keyD ); - // Add a point for this location - _points.insert( GeoPoint( node , exactDistance, exactWithin ) ); + assert( _max > 0 ); - if( exactDistance > maxDistance ) maxDistance = exactDistance; - } + Holder::iterator lastPtIt = _points.end(); + lastPtIt--; + _farthest = lastPtIt->distance() + 2 * _distError; - return maxDistance; + return _points.size() - prevSize; } - // Always in distance units, whether radians or normal - double farthest() const { - return _farthest; - } + // Removes extra points from end of _points set. + // Check can be a bit costly if we have lots of exact points near borders, + // so we'll do this every once and awhile. + void processExtraPoints(){ - bool inErrorBounds( double approxD ) const { - return approxD >= _maxDistance - _distError && approxD <= _maxDistance + _distError; - } + if( _points.size() == 0 ) return; - virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) { + int prevSize = _points.size(); - GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d ); + // Erase all points from the set with a position >= _max *and* + // whose distance isn't close to the _max - 1 position distance - double maxDistance = exactDistances( node ); - if( maxDistance >= 0 ){ + int numToErase = _points.size() - _max; + if( numToErase < 0 ) numToErase = 0; - // Recalculate the current furthest point. - int numToErase = _points.size() - _max; - while( numToErase-- > 0 ){ - _points.erase( --_points.end() ); - } + // Get the first point definitely in the _points array + Holder::iterator startErase = _points.end(); + for( int i = 0; i < numToErase + 1; i++ ) startErase--; + _farthest = startErase->distance() + 2 * _distError; - _farthest = boost::next( _points.end(), -1 )->_exactDistance; + GEODEBUG( "\t\tPotentially erasing " << numToErase << " points, " << " size : " << _points.size() << " max : " << _max << " dist : " << startErase->distance() << " farthest dist : " << _farthest << " from error : " << _distError ); + startErase++; + while( numToErase > 0 && startErase->distance() <= _farthest ){ + GEODEBUG( "\t\tNot erasing point " << startErase->toString() ); + numToErase--; + startErase++; + assert( startErase != _points.end() || numToErase == 0 ); } + + if( _uniqueDocs ){ + for( Holder::iterator i = startErase; i != _points.end(); ++i ) + _seenPts.erase( i->loc() ); + } + + _points.erase( startErase, _points.end() ); + + int diff = _points.size() - prevSize; + if( diff > 0 ) _found += diff; + else _found -= -diff; + } unsigned _max; @@ -1760,17 +1743,20 @@ namespace mongo { double _distError; double _farthest; + map< DiskLoc , Holder::iterator > _seenPts; + }; class GeoSearch : public GeoHopper { public: - GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN ) - : GeoHopper( g , numWanted , startPt , filter , maxDistance, type ), + GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = false ) + : GeoHopper( g , numWanted , startPt , filter , maxDistance, type, uniqueDocs, needDistance ), _start( g->hash( startPt._x, startPt._y ) ), - _numWanted( numWanted ), - _type(type) + // TODO: Remove numWanted... + _numWanted( numWanted ), + _type(type) { assert( g->getDetails() ); @@ -1795,6 +1781,8 @@ namespace mongo { void exec() { + if( _numWanted == 0 ) return; + /* * Search algorithm * 1) use geohash prefix to find X items @@ -1805,7 +1793,7 @@ namespace mongo { #ifdef GEODEBUGGING - log() << "start near search for points near " << _near << " (max dist " << _maxDistance << ")" << endl; + log() << "start near search for " << _numWanted << " points near " << _near << " (max dist " << _maxDistance << ")" << endl; #endif @@ -1815,13 +1803,16 @@ namespace mongo { long long f = found(); assert( f <= 0x7fffffff ); fillStack( maxPointsHeuristic, _numWanted - static_cast<int>(f) , true ); + processExtraPoints(); } while( _state != DONE && _state != DONE_NEIGHBOR && found() < _numWanted && (! _prefix.constrains() || _g->sizeEdge( _prefix ) <= _scanDistance ) ); // If we couldn't scan or scanned everything, we're done - if( _state == DONE ) return; - + if( _state == DONE ){ + expandEndPoints(); + return; + } } #ifdef GEODEBUGGING @@ -1856,6 +1847,8 @@ namespace mongo { _want = Box( _near._x - farDist , _near._y - farDist , farDist * 2 ); GEODEBUGPRINT( _want.toString() ); + // log() << "Found : " << found() << " wanted : " << _numWanted << " Far distance : " << farDist << " box : " << _want << endl; + // Remember the far distance for further scans _scanDistance = farDist; @@ -1874,15 +1867,195 @@ namespace mongo { // Do regular search in the full region do { fillStack( maxPointsHeuristic ); + processExtraPoints(); } while( _state != DONE ); } - GEODEBUG( "done near search" ) + GEODEBUG( "done near search with " << _points.size() << " points " ); + + expandEndPoints(); } + void addExactPoints( const GeoPoint& pt, Holder& points, bool force ){ + int before, after; + addExactPoints( pt, points, before, after, force ); + } + + void addExactPoints( const GeoPoint& pt, Holder& points, int& before, int& after, bool force ){ + + before = 0; + after = 0; + + GEODEBUG( "Adding exact points for " << pt.toString() ); + + if( pt.isExact() ){ + if( force ) points.insert( pt ); + return; + } + + vector<BSONObj> locs; + getPointsFor( pt.key(), pt.obj(), locs, _uniqueDocs ); + + GeoPoint nearestPt( pt, -1, true ); + + for( vector<BSONObj>::iterator i = locs.begin(); i != locs.end(); i++ ){ + + Point loc( *i ); + + double d; + if( ! exactDocCheck( loc, d ) ) continue; + + if( _uniqueDocs && ( nearestPt.distance() < 0 || d < nearestPt.distance() ) ){ + nearestPt._distance = d; + nearestPt._pt = *i; + continue; + } + else if( ! _uniqueDocs ){ + GeoPoint exactPt( pt, d, true ); + exactPt._pt = *i; + GEODEBUG( "Inserting exact pt " << exactPt.toString() << " for " << pt.toString() << " exact : " << d << " is less? " << ( exactPt < pt ) << " bits : " << _g->_bits ); + points.insert( exactPt ); + exactPt < pt ? before++ : after++; + } + + } + + if( _uniqueDocs && nearestPt.distance() >= 0 ){ + GEODEBUG( "Inserting unique exact pt " << nearestPt.toString() << " for " << pt.toString() << " exact : " << nearestPt.distance() << " is less? " << ( nearestPt < pt ) << " bits : " << _g->_bits ); + points.insert( nearestPt ); + if( nearestPt < pt ) before++; + else after++; + } + + } + + // TODO: Refactor this back into holder class, allow to run periodically when we are seeing a lot of pts + void expandEndPoints( bool finish = true ){ + + processExtraPoints(); + + // All points in array *could* be in maxDistance + + // Step 1 : Trim points to max size + // TODO: This check will do little for now, but is skeleton for future work in incremental $near + // searches + if( _max > 0 ){ + + int numToErase = _points.size() - _max; + + if( numToErase > 0 ){ + + Holder tested; + + // Work backward through all points we're not sure belong in the set + Holder::iterator maybePointIt = _points.end(); + maybePointIt--; + double approxMin = maybePointIt->distance() - 2 * _distError; + + GEODEBUG( "\t\tNeed to erase " << numToErase << " max : " << _max << " min dist " << approxMin << " error : " << _distError << " starting from : " << (*maybePointIt).toString() ); + + // Insert all + int erased = 0; + while( _points.size() > 0 && ( maybePointIt->distance() >= approxMin || erased < numToErase ) ){ + + Holder::iterator current = maybePointIt--; + + addExactPoints( *current, tested, true ); + _points.erase( current ); + erased++; + + if( tested.size() ) + approxMin = tested.begin()->distance() - 2 * _distError; + + } + + GEODEBUG( "\t\tEnding search at point " << ( _points.size() == 0 ? "(beginning)" : maybePointIt->toString() ) ); + + int numToAddBack = erased - numToErase; + assert( numToAddBack >= 0 ); + + GEODEBUG( "\t\tNum tested valid : " << tested.size() << " erased : " << erased << " added back : " << numToAddBack ); + +#ifdef GEODEBUGGING + for( Holder::iterator it = tested.begin(); it != tested.end(); it++ ){ + log() << "Tested Point: " << *it << endl; + } +#endif + Holder::iterator testedIt = tested.begin(); + for( int i = 0; i < numToAddBack && testedIt != tested.end(); i++ ){ + _points.insert( *testedIt ); + testedIt++; + } + } + } + +#ifdef GEODEBUGGING + for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){ + log() << "Point: " << *it << endl; + } +#endif + // We've now trimmed first set of unneeded points + + GEODEBUG( "\t\t Start expanding, num points : " << _points.size() << " max : " << _max ); + + // Step 2: iterate through all points and add as needed + + unsigned expandedPoints = 0; + Holder::iterator it = _points.begin(); + double expandWindowEnd = -1; + while( it != _points.end() ){ + const GeoPoint& currPt = *it; + + // TODO: If one point is exact, maybe not 2 * _distError + + // See if we're in an expand window + bool inWindow = currPt.distance() <= expandWindowEnd; + // If we're not, and we're done with points, break + if( ! inWindow && expandedPoints >= _max ) break; + + bool expandApprox = ! currPt.isExact() && ( ! _uniqueDocs || ( finish && _needDistance ) || inWindow ); + + if( expandApprox ){ + + // Add new point(s) + // These will only be added in a radius of 2 * _distError around the current point, + // so should not affect previously valid points. + int before, after; + addExactPoints( currPt, _points, before, after, false ); + expandedPoints += before; + + if( _max > 0 && expandedPoints < _max ) + expandWindowEnd = currPt.distance() + 2 * _distError; + + // Iterate to the next point + Holder::iterator current = it++; + // Erase the current point + _points.erase( current ); + + } + else{ + expandedPoints++; + it++; + } + } + + GEODEBUG( "\t\tFinished expanding, num points : " << _points.size() << " max : " << _max ); + + // Finish + // TODO: Don't really need to trim? + for( ; expandedPoints > _max; expandedPoints-- ) it--; + _points.erase( it, _points.end() ); + +#ifdef GEODEBUGGING + for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){ + log() << "Point: " << *it << endl; + } +#endif + } + virtual GeoHash expandStartHash(){ return _start; } @@ -1915,7 +2088,7 @@ namespace mongo { : GeoCursorBase( s->_spec ) , _s( s ) , _cur( s->_points.begin() ) , _end( s->_points.end() ), _nscanned() { if ( _cur != _end ) { - ++_nscanned; + ++_nscanned; } } @@ -1975,8 +2148,8 @@ namespace mongo { class GeoCircleBrowse : public GeoBrowse { public: - GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center") - : GeoBrowse( g , "circle" , filter ) { + GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center", bool uniqueDocs = true ) + : GeoBrowse( g , "circle" , filter, uniqueDocs ) { uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 ); @@ -2040,19 +2213,16 @@ namespace mongo { return cur.intersects( _bBox ); } - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - - GeoHash h( node._key.firstElement(), _g->_bits ); + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { // Inexact hash distance checks. double error = 0; switch (_type) { case GEO_PLAIN: - d = _g->distance( _start , h ); + d = _startPt.distance( p ); error = _g->_error; break; case GEO_SPHERE: { - Point p( _g, h ); checkEarthBounds( p ); d = spheredist_deg( _startPt, p ); error = _g->_errorSphere; @@ -2062,40 +2232,25 @@ namespace mongo { } // If our distance is in the error bounds... - if( d >= _maxDistance - error && d <= _maxDistance + error ) { - - // Do exact check - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); - - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { - - GEODEBUG( "Inexact distance : " << d << " vs " << _maxDistance << " from " << ( *i ).toString() << " due to error " << error ); - - Point p( *i ); - // Exact distance checks. - switch (_type) { - case GEO_PLAIN: { - if( _startPt.distanceWithin( p, _maxDistance ) ) return true; - break; - } - case GEO_SPHERE: - // Ignore all locations not hashed to the key's hash, since spherical calcs are - // more expensive. - if( _g->_hash( *i ) != h ) break; - checkEarthBounds( p ); - if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true; - break; - default: assert( false ); - } + if( d >= _maxDistance - error && d <= _maxDistance + error ) return BORDER; + return d > _maxDistance ? BAD : GOOD; + } - } + virtual bool exactDocCheck( const Point& p, double& d ){ - return false; + switch (_type) { + case GEO_PLAIN: { + if( _startPt.distanceWithin( p, _maxDistance ) ) return true; + break; + } + case GEO_SPHERE: + checkEarthBounds( p ); + if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true; + break; + default: assert( false ); } - GEODEBUG( "\t " << h << "\t" << d ); - return d <= _maxDistance; + return false; } GeoDistType _type; @@ -2111,8 +2266,8 @@ namespace mongo { class GeoBoxBrowse : public GeoBrowse { public: - GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() ) - : GeoBrowse( g , "box" , filter ) { + GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj(), bool uniqueDocs = true ) + : GeoBrowse( g , "box" , filter, uniqueDocs ) { uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 ); @@ -2133,7 +2288,7 @@ namespace mongo { _fudge = _g->_error; _wantLen = _fudge + std::max( ( _want._max._x - _want._min._x ) , - ( _want._max._y - _want._min._y ) ); + ( _want._max._y - _want._min._y ) ) / 2; ok(); } @@ -2171,39 +2326,14 @@ namespace mongo { return cur.intersects( _want ); } - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - - GeoHash h( node._key.firstElement() ); - Point approxPt( _g, h ); - - bool approxInside = _want.inside( approxPt, _fudge ); + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { + if( _want.onBoundary( p, _fudge ) ) return BORDER; + else return _want.inside( p, _fudge ) ? GOOD : BAD; - if( approxInside && _want.onBoundary( approxPt, _fudge ) ) { - - // Do exact check - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); - - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { - if( _want.inside( Point( *i ) ) ) { - - GEODEBUG( "found exact point : " << _want.toString() - << " exact point : " << Point( *i ).toString() - << " approx point : " << approxPt.toString() - << " because of error: " << _fudge ); - - return true; - } - } - - return false; - } - - GEODEBUG( "checking point : " << _want.toString() - << " point: " << approxPt.toString() - << " in : " << _want.inside( approxPt, _fudge ) ); + } - return approxInside; + virtual bool exactDocCheck( const Point& p, double& d ){ + return _want.inside( p ); } Box _want; @@ -2218,7 +2348,7 @@ namespace mongo { public: GeoPolygonBrowse( const Geo2dType* g , const BSONObj& polyPoints , - BSONObj filter = BSONObj() ) : GeoBrowse( g , "polygon" , filter ) { + BSONObj filter = BSONObj(), bool uniqueDocs = true ) : GeoBrowse( g , "polygon" , filter, uniqueDocs ) { GEODEBUG( "In Polygon" ) @@ -2233,7 +2363,7 @@ namespace mongo { uassert( 14030, "polygon must be defined by three points or more", _poly.size() >= 3 ); _bounds = _poly.bounds(); - _maxDim = _bounds.maxDim(); + _maxDim = _g->_error + _bounds.maxDim() / 2; ok(); } @@ -2253,51 +2383,17 @@ namespace mongo { return cur.intersects( _bounds ); } - virtual bool checkDistance( const GeoKeyNode& node, double& d ) { - - GeoHash h( node._key.firstElement(), _g->_bits ); - Point p( _g, h ); + virtual KeyResult approxKeyCheck( const Point& p, double& d ) { int in = _poly.contains( p, _g->_error ); - if( in != 0 ) { - - if ( in > 0 ) { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx in polygon" ); - } - else { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx not in polygon" ); - } - - if( in != 0 ) return in > 0; - } - - // Do exact check, since to approximate check was inconclusive - vector< BSONObj > locs; - _g->getKeys( node.recordLoc.obj(), locs ); - - for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) { - - Point p( *i ); - // Ignore all points not hashed to the current value - // This implicitly assumes hashing is less costly than the polygon check, which - // may or may not be true. - if( _g->hash( p ) != h ) continue; + if( in == 0 ) return BORDER; + else return in > 0 ? GOOD : BAD; - // Use the point in polygon algorithm to see if the point - // is contained in the polygon. - bool in = _poly.contains( p ); - if ( in ) { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly in polygon" ); - } - else { - GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly not in polygon" ); - } - if( in ) return in; - - } + } - return false; + virtual bool exactDocCheck( const Point& p, double& d ){ + return _poly.contains( p ); } private: @@ -2324,7 +2420,7 @@ namespace mongo { if ( e.type() == Array ) { // If we get an array query, assume it is a location, and do a $within { $center : [[x, y], 0] } search - shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ) ) ); + shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ), "$center", true ) ); return c; } else if ( e.type() == Object ) { @@ -2364,33 +2460,44 @@ namespace mongo { if ( e.isNumber() ) maxDistance = e.numberDouble(); } - shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type ) ); + + bool uniqueDocs = false; + if( ! n["$uniqueDocs"].eoo() ) uniqueDocs = n["$uniqueDocs"].trueValue(); + + shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type, uniqueDocs ) ); s->exec(); shared_ptr<Cursor> c; c.reset( new GeoSearchCursor( s ) ); return c; } case BSONObj::opWITHIN: { + e = e.embeddedObject().firstElement(); uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() ); + + BSONObj context = e.embeddedObject(); e = e.embeddedObject().firstElement(); string type = e.fieldName(); + + bool uniqueDocs = true; + if( ! context["$uniqueDocs"].eoo() ) uniqueDocs = context["$uniqueDocs"].trueValue(); + if ( startsWith(type, "$center") ) { uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() ); - shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type) ); + shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type, uniqueDocs ) ); return c; } else if ( type == "$box" ) { uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() ); - shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) ); + shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) ); return c; } else if ( startsWith( type, "$poly" ) ) { uassert( 14029 , "$polygon has to take an object or array" , e.isABSONObj() ); - shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query ) ); + shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) ); return c; } - throw UserException( 13058 , (string)"unknown $with type: " + type ); + throw UserException( 13058 , (string)"unknown $within type: " + type ); } default: // Otherwise... assume the object defines a point, and we want to do a zero-radius $within $center @@ -2414,7 +2521,7 @@ namespace mongo { bool slaveOk() const { return true; } void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; } bool slaveOverrideOk() { return true; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails * d = nsdetails( ns.c_str() ); @@ -2450,6 +2557,12 @@ namespace mongo { assert( numWanted >= 0 ); } + bool uniqueDocs = false; + if( ! cmdObj["uniqueDocs"].eoo() ) uniqueDocs = cmdObj["uniqueDocs"].trueValue(); + + bool includeLocs = false; + if( ! cmdObj["includeLocs"].eoo() ) includeLocs = cmdObj["includeLocs"].trueValue(); + uassert(13046, "'near' param missing/invalid", !cmdObj["near"].eoo()); const Point n( cmdObj["near"] ); result.append( "near" , g->_tohash( cmdObj["near"] ).toString() ); @@ -2466,7 +2579,7 @@ namespace mongo { if ( cmdObj["spherical"].trueValue() ) type = GEO_SPHERE; - GeoSearch gs( g , n , numWanted , filter , maxDistance , type ); + GeoSearch gs( g , n , numWanted , filter , maxDistance , type, uniqueDocs, true ); if ( cmdObj["start"].type() == String) { GeoHash start ((string) cmdObj["start"].valuestr()); @@ -2486,11 +2599,12 @@ namespace mongo { for ( GeoHopper::Holder::iterator i=gs._points.begin(); i!=gs._points.end(); i++ ) { const GeoPoint& p = *i; - double dis = distanceMultiplier * p._exactDistance; + double dis = distanceMultiplier * p.distance(); totalDistance += dis; BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ) ) ); bb.append( "dis" , dis ); + if( includeLocs ) bb.append( "loc" , p._pt ); bb.append( "obj" , p._o ); bb.done(); } @@ -2516,7 +2630,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } bool slaveOk() const { return true; } bool slaveOverrideOk() { return true; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); NamespaceDetails * d = nsdetails( ns.c_str() ); @@ -2571,4 +2685,248 @@ namespace mongo { } geoWalkCmd; + struct GeoUnitTest : public UnitTest { + + int round( double d ) { + return (int)(.5+(d*1000)); + } + +#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); } + + void run() { + assert( ! GeoHash::isBitSet( 0 , 0 ) ); + assert( ! GeoHash::isBitSet( 0 , 31 ) ); + assert( GeoHash::isBitSet( 1 , 31 ) ); + + IndexSpec i( BSON( "loc" << "2d" ) ); + Geo2dType g( &geo2dplugin , &i ); + { + double x = 73.01212; + double y = 41.352964; + BSONObj in = BSON( "x" << x << "y" << y ); + GeoHash h = g._hash( in ); + BSONObj out = g._unhash( h ); + assert( round(x) == round( out["x"].number() ) ); + assert( round(y) == round( out["y"].number() ) ); + assert( round( in["x"].number() ) == round( out["x"].number() ) ); + assert( round( in["y"].number() ) == round( out["y"].number() ) ); + } + + { + double x = -73.01212; + double y = 41.352964; + BSONObj in = BSON( "x" << x << "y" << y ); + GeoHash h = g._hash( in ); + BSONObj out = g._unhash( h ); + assert( round(x) == round( out["x"].number() ) ); + assert( round(y) == round( out["y"].number() ) ); + assert( round( in["x"].number() ) == round( out["x"].number() ) ); + assert( round( in["y"].number() ) == round( out["y"].number() ) ); + } + + { + GeoHash h( "0000" ); + h.move( 0 , 1 ); + GEOHEQ( h , "0001" ); + h.move( 0 , -1 ); + GEOHEQ( h , "0000" ); + + h.init( "0001" ); + h.move( 0 , 1 ); + GEOHEQ( h , "0100" ); + h.move( 0 , -1 ); + GEOHEQ( h , "0001" ); + + + h.init( "0000" ); + h.move( 1 , 0 ); + GEOHEQ( h , "0010" ); + } + + { + Box b( 5 , 5 , 2 ); + assert( "(5,5) -->> (7,7)" == b.toString() ); + } + + { + GeoHash a = g.hash( 1 , 1 ); + GeoHash b = g.hash( 4 , 5 ); + assert( 5 == (int)(g.distance( a , b ) ) ); + a = g.hash( 50 , 50 ); + b = g.hash( 42 , 44 ); + assert( round(10) == round(g.distance( a , b )) ); + } + + { + GeoHash x("0000"); + assert( 0 == x.getHash() ); + x.init( 0 , 1 , 32 ); + GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" ) + + assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) ); + assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) ); + } + + { + GeoHash x("1010"); + GEOHEQ( x , "1010" ); + GeoHash y = x + "01"; + GEOHEQ( y , "101001" ); + } + + { + + GeoHash a = g.hash( 5 , 5 ); + GeoHash b = g.hash( 5 , 7 ); + GeoHash c = g.hash( 100 , 100 ); + /* + cout << "a: " << a << endl; + cout << "b: " << b << endl; + cout << "c: " << c << endl; + + cout << "a: " << a.toStringHex1() << endl; + cout << "b: " << b.toStringHex1() << endl; + cout << "c: " << c.toStringHex1() << endl; + */ + BSONObj oa = a.wrap(); + BSONObj ob = b.wrap(); + BSONObj oc = c.wrap(); + /* + cout << "a: " << oa.hexDump() << endl; + cout << "b: " << ob.hexDump() << endl; + cout << "c: " << oc.hexDump() << endl; + */ + assert( oa.woCompare( ob ) < 0 ); + assert( oa.woCompare( oc ) < 0 ); + + } + + { + GeoHash x( "000000" ); + x.move( -1 , 0 ); + GEOHEQ( x , "101010" ); + x.move( 1 , -1 ); + GEOHEQ( x , "010101" ); + x.move( 0 , 1 ); + GEOHEQ( x , "000000" ); + } + + { + GeoHash prefix( "110011000000" ); + GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" ); + assert( ! entry.hasPrefix( prefix ) ); + + entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000"); + assert( entry.toString().find( prefix.toString() ) == 0 ); + assert( entry.hasPrefix( GeoHash( "1100" ) ) ); + assert( entry.hasPrefix( prefix ) ); + } + + { + GeoHash a = g.hash( 50 , 50 ); + GeoHash b = g.hash( 48 , 54 ); + assert( round( 4.47214 ) == round( g.distance( a , b ) ) ); + } + + + { + Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) ); + assert( b.inside( 29.763 , -95.363 ) ); + assert( ! b.inside( 32.9570255 , -96.1082497 ) ); + assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) ); + } + + { + GeoHash a( "11001111" ); + assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) ); + assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) ); + } + + { + int N = 10000; + { + Timer t; + for ( int i=0; i<N; i++ ) { + unsigned x = (unsigned)rand(); + unsigned y = (unsigned)rand(); + GeoHash h( x , y ); + unsigned a,b; + h.unhash_slow( a,b ); + assert( a == x ); + assert( b == y ); + } + //cout << "slow: " << t.millis() << endl; + } + + { + Timer t; + for ( int i=0; i<N; i++ ) { + unsigned x = (unsigned)rand(); + unsigned y = (unsigned)rand(); + GeoHash h( x , y ); + unsigned a,b; + h.unhash_fast( a,b ); + assert( a == x ); + assert( b == y ); + } + //cout << "fast: " << t.millis() << endl; + } + + } + + { + // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example + + { + Point BNA (-86.67, 36.12); + Point LAX (-118.40, 33.94); + + double dist1 = spheredist_deg(BNA, LAX); + double dist2 = spheredist_deg(LAX, BNA); + + // target is 0.45306 + assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); + assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); + } + { + Point BNA (-1.5127, 0.6304); + Point LAX (-2.0665, 0.5924); + + double dist1 = spheredist_rad(BNA, LAX); + double dist2 = spheredist_rad(LAX, BNA); + + // target is 0.45306 + assert( 0.45305 <= dist1 && dist1 <= 0.45307 ); + assert( 0.45305 <= dist2 && dist2 <= 0.45307 ); + } + { + Point JFK (-73.77694444, 40.63861111 ); + Point LAX (-118.40, 33.94); + + double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES; + assert( dist > 2469 && dist < 2470 ); + } + + { + Point BNA (-86.67, 36.12); + Point LAX (-118.40, 33.94); + Point JFK (-73.77694444, 40.63861111 ); + assert( spheredist_deg(BNA, BNA) < 1e-6); + assert( spheredist_deg(LAX, LAX) < 1e-6); + assert( spheredist_deg(JFK, JFK) < 1e-6); + + Point zero (0, 0); + Point antizero (0,-180); + + // these were known to cause NaN + assert( spheredist_deg(zero, zero) < 1e-6); + assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6); + assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6); + } + } + } + } geoUnitTest; + + } + diff --git a/db/geo/core.h b/db/geo/core.h index 74f4b6e8269..b77997844f2 100644 --- a/db/geo/core.h +++ b/db/geo/core.h @@ -278,14 +278,19 @@ namespace mongo { return *this; } - bool operator==(const GeoHash& h ) { + bool operator==(const GeoHash& h ) const { return _hash == h._hash && _bits == h._bits; } - bool operator!=(const GeoHash& h ) { + bool operator!=(const GeoHash& h ) const { return !( *this == h ); } + bool operator<(const GeoHash& h ) const { + if( _hash != h._hash ) return _hash < h._hash; + return _bits < h._bits; + } + GeoHash& operator+=( const char * s ) { unsigned pos = _bits * 2; _bits += strlen(s) / 2; diff --git a/db/geo/haystack.cpp b/db/geo/haystack.cpp index fd6b2392d6a..a5dd478f625 100644 --- a/db/geo/haystack.cpp +++ b/db/geo/haystack.cpp @@ -264,7 +264,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } bool slaveOk() const { return true; } bool slaveOverrideOk() const { return true; } - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string ns = dbname + "." + cmdObj.firstElement().valuestr(); diff --git a/db/index.cpp b/db/index.cpp index 8aebef45e8e..67a0d44e444 100644 --- a/db/index.cpp +++ b/db/index.cpp @@ -27,11 +27,6 @@ namespace mongo { - /** old (<= v1.8) : 0 - 1 is new version - */ - const int DefaultIndexVersionNumber = 1; - template< class V > class IndexInterfaceImpl : public IndexInterface { public: diff --git a/db/index.h b/db/index.h index debe2aa9c26..54b06394435 100644 --- a/db/index.h +++ b/db/index.h @@ -150,14 +150,18 @@ namespace mongo { return io.getStringField("ns"); } - int version() const { - BSONElement e = info.obj()["v"]; + static int versionForIndexObj( const BSONObj &obj ) { + BSONElement e = obj["v"]; if( e.type() == NumberInt ) return e._numberInt(); // should normally be an int. this is for backward compatibility int v = e.numberInt(); uassert(14802, "index v field should be Integer type", v == 0); - return v; + return v; + } + + int version() const { + return versionForIndexObj( info.obj() ); } /** @return true if index has unique constraint */ diff --git a/db/indexkey.cpp b/db/indexkey.cpp index cc2cd43daf5..6d6fcc58cae 100644 --- a/db/indexkey.cpp +++ b/db/indexkey.cpp @@ -22,9 +22,15 @@ #include "btree.h" #include "ops/query.h" #include "background.h" +#include "../util/text.h" namespace mongo { + /** old (<= v1.8) : 0 + 1 is new version + */ + const int DefaultIndexVersionNumber = 1; + map<string,IndexPlugin*> * IndexPlugin::_plugins; IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec ) @@ -100,6 +106,14 @@ namespace mongo { } { + // _undefinedElt + BSONObjBuilder b; + b.appendUndefined( "" ); + _undefinedObj = b.obj(); + _undefinedElt = _undefinedObj.firstElement(); + } + + { // handle plugins string pluginName = IndexPlugin::findPluginName( keyPattern ); if ( pluginName.size() ) { @@ -116,131 +130,289 @@ namespace mongo { _finishedInit = true; } - - void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const { - if ( _indexType.get() ) { //plugin (eg geo) - _indexType->getKeys( obj , keys ); - return; - } - vector<const char*> fieldNames( _fieldNames ); - vector<BSONElement> fixed( _fixed ); - _getKeys( fieldNames , fixed , obj, keys ); - if ( keys.empty() && ! _sparse ) - keys.insert( _nullKey ); + void assertParallelArrays( const char *first, const char *second ) { + stringstream ss; + ss << "cannot index parallel arrays [" << first << "] [" << second << "]"; + uasserted( 10088 , ss.str() ); } - - void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const { - BSONElement arrElt; - unsigned arrIdx = ~0; - int numNotFound = 0; - - for( unsigned i = 0; i < fieldNames.size(); ++i ) { - if ( *fieldNames[ i ] == '\0' ) - continue; - - BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); - - if ( e.eoo() ) { - e = _nullElt; // no matching field - numNotFound++; + + class KeyGeneratorV0 { + public: + KeyGeneratorV0( const IndexSpec &spec ) : _spec( spec ) {} + + void getKeys( const BSONObj &obj, BSONObjSet &keys ) const { + if ( _spec._indexType.get() ) { //plugin (eg geo) + _spec._indexType->getKeys( obj , keys ); + return; } - - if ( e.type() != Array ) - fieldNames[ i ] = ""; // no matching field or non-array match - - if ( *fieldNames[ i ] == '\0' ) - fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) - - if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here - arrIdx = i; - arrElt = e; + vector<const char*> fieldNames( _spec._fieldNames ); + vector<BSONElement> fixed( _spec._fixed ); + _getKeys( fieldNames , fixed , obj, keys ); + if ( keys.empty() && ! _spec._sparse ) + keys.insert( _spec._nullKey ); + } + + private: + void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const { + BSONElement arrElt; + unsigned arrIdx = ~0; + int numNotFound = 0; + + for( unsigned i = 0; i < fieldNames.size(); ++i ) { + if ( *fieldNames[ i ] == '\0' ) + continue; + + BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] ); + + if ( e.eoo() ) { + e = _spec._nullElt; // no matching field + numNotFound++; + } + + if ( e.type() != Array ) + fieldNames[ i ] = ""; // no matching field or non-array match + + if ( *fieldNames[ i ] == '\0' ) + fixed[ i ] = e; // no need for further object expansion (though array expansion still possible) + + if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here + arrIdx = i; + arrElt = e; + } + + // enforce single array path here + if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) { + assertParallelArrays( e.fieldName(), arrElt.fieldName() ); + } } - - // enforce single array path here - if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) { - stringstream ss; - ss << "cannot index parallel arrays [" << e.fieldName() << "] [" << arrElt.fieldName() << "]"; - uasserted( 10088 , ss.str() ); + + bool allFound = true; // have we found elements for all field names in the key spec? + for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) { + if ( **i != '\0' ) { + allFound = false; + break; + } } - } - - bool allFound = true; // have we found elements for all field names in the key spec? - for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) { - if ( **i != '\0' ) { - allFound = false; - break; + + if ( _spec._sparse && numNotFound == _spec._nFields ) { + // we didn't find any fields + // so we're not going to index this document + return; } - } - - if ( _sparse && numNotFound == _nFields ) { - // we didn't find any fields - // so we're not going to index this document - return; - } - - bool insertArrayNull = false; - - if ( allFound ) { - if ( arrElt.eoo() ) { - // no terminal array element to expand - BSONObjBuilder b(_sizeTracker); - for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) - b.appendAs( *i, "" ); - keys.insert( b.obj() ); + + bool insertArrayNull = false; + + if ( allFound ) { + if ( arrElt.eoo() ) { + // no terminal array element to expand + BSONObjBuilder b(_spec._sizeTracker); + for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) + b.appendAs( *i, "" ); + keys.insert( b.obj() ); + } + else { + // terminal array element to expand, so generate all keys + BSONObjIterator i( arrElt.embeddedObject() ); + if ( i.more() ) { + while( i.more() ) { + BSONObjBuilder b(_spec._sizeTracker); + for( unsigned j = 0; j < fixed.size(); ++j ) { + if ( j == arrIdx ) + b.appendAs( i.next(), "" ); + else + b.appendAs( fixed[ j ], "" ); + } + keys.insert( b.obj() ); + } + } + else if ( fixed.size() > 1 ) { + insertArrayNull = true; + } + } } else { - // terminal array element to expand, so generate all keys + // nonterminal array element to expand, so recurse + assert( !arrElt.eoo() ); BSONObjIterator i( arrElt.embeddedObject() ); if ( i.more() ) { while( i.more() ) { - BSONObjBuilder b(_sizeTracker); - for( unsigned j = 0; j < fixed.size(); ++j ) { - if ( j == arrIdx ) - b.appendAs( i.next(), "" ); - else - b.appendAs( fixed[ j ], "" ); + BSONElement e = i.next(); + if ( e.type() == Object ) { + _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); } - keys.insert( b.obj() ); } } - else if ( fixed.size() > 1 ) { + else { insertArrayNull = true; } } - } - else { - // nonterminal array element to expand, so recurse - assert( !arrElt.eoo() ); - BSONObjIterator i( arrElt.embeddedObject() ); - if ( i.more() ) { - while( i.more() ) { - BSONElement e = i.next(); - if ( e.type() == Object ) { - _getKeys( fieldNames, fixed, e.embeddedObject(), keys ); + + if ( insertArrayNull ) { + // x : [] - need to insert undefined + BSONObjBuilder b(_spec._sizeTracker); + for( unsigned j = 0; j < fixed.size(); ++j ) { + if ( j == arrIdx ) { + b.appendUndefined( "" ); + } + else { + BSONElement e = fixed[j]; + if ( e.eoo() ) + b.appendNull( "" ); + else + b.appendAs( e , "" ); } } + keys.insert( b.obj() ); } - else { - insertArrayNull = true; + } + + const IndexSpec &_spec; + }; + + class KeyGeneratorV1 { + public: + KeyGeneratorV1( const IndexSpec &spec ) : _spec( spec ) {} + + void getKeys( const BSONObj &obj, BSONObjSet &keys ) const { + if ( _spec._indexType.get() ) { //plugin (eg geo) + _spec._indexType->getKeys( obj , keys ); + return; + } + vector<const char*> fieldNames( _spec._fieldNames ); + vector<BSONElement> fixed( _spec._fixed ); + _getKeys( fieldNames , fixed , obj, keys ); + if ( keys.empty() && ! _spec._sparse ) + keys.insert( _spec._nullKey ); + } + + private: + /** + * @param arrayNestedArray - set if the returned element is an array nested directly within arr. + */ + BSONElement extractNextElement( const BSONObj &obj, const BSONObj &arr, const char *&field, bool &arrayNestedArray ) const { + string firstField = mongoutils::str::before( field, '.' ); + bool haveObjField = !obj.getField( firstField ).eoo(); + BSONElement arrField = arr.getField( firstField ); + bool haveArrField = !arrField.eoo(); + + // An index component field name cannot exist in both a document array and one of that array's children. + uassert( 15855 , "Parallel references while expanding indexed field in array", !haveObjField || !haveArrField ); + + arrayNestedArray = false; + if ( haveObjField ) { + return obj.getFieldDottedOrArray( field ); + } + else if ( haveArrField ) { + if ( arrField.type() == Array ) { + arrayNestedArray = true; + } + return arr.getFieldDottedOrArray( field ); } + return BSONElement(); } - - if ( insertArrayNull ) { - // x : [] - need to insert undefined - BSONObjBuilder b(_sizeTracker); - for( unsigned j = 0; j < fixed.size(); ++j ) { - if ( j == arrIdx ) { - b.appendUndefined( "" ); + + void _getKeysArrEltFixed( vector<const char*> &fieldNames , vector<BSONElement> &fixed , const BSONElement &arrEntry, BSONObjSet &keys, int numNotFound, const BSONElement &arrObjElt, const set< unsigned > &arrIdxs, bool mayExpandArrayUnembedded ) const { + // set up any terminal array values + for( set<unsigned>::const_iterator j = arrIdxs.begin(); j != arrIdxs.end(); ++j ) { + if ( *fieldNames[ *j ] == '\0' ) { + fixed[ *j ] = mayExpandArrayUnembedded ? arrEntry : arrObjElt; + } + } + // recurse + _getKeys( fieldNames, fixed, ( arrEntry.type() == Object ) ? arrEntry.embeddedObject() : BSONObj(), keys, numNotFound, arrObjElt.embeddedObject() ); + } + + /** + * @param fieldNames - fields to index, may be postfixes in recursive calls + * @param fixed - values that have already been identified for their index fields + * @param obj - object from which keys should be extracted, based on names in fieldNames + * @param keys - set where index keys are written + * @param numNotFound - number of index fields that have already been identified as missing + * @param array - array from which keys should be extracted, based on names in fieldNames + * If obj and array are both nonempty, obj will be one of the elements of array. + */ + void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const { + BSONElement arrElt; + set<unsigned> arrIdxs; + bool mayExpandArrayUnembedded = true; + for( unsigned i = 0; i < fieldNames.size(); ++i ) { + if ( *fieldNames[ i ] == '\0' ) { + continue; + } + + bool arrayNestedArray; + // Extract element matching fieldName[ i ] from object xor array. + BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray ); + + if ( e.eoo() ) { + // if field not present, set to null + fixed[ i ] = _spec._nullElt; + // done expanding this field name + fieldNames[ i ] = ""; + numNotFound++; + } + else if ( e.type() == Array ) { + arrIdxs.insert( i ); + if ( arrElt.eoo() ) { + // we only expand arrays on a single path -- track the path here + arrElt = e; + } + else if ( e.rawdata() != arrElt.rawdata() ) { + // enforce single array path here + assertParallelArrays( e.fieldName(), arrElt.fieldName() ); + } + if ( arrayNestedArray ) { + mayExpandArrayUnembedded = false; + } } else { - BSONElement e = fixed[j]; - if ( e.eoo() ) - b.appendNull( "" ); - else - b.appendAs( e , "" ); + // not an array - no need for further expansion + fixed[ i ] = e; + } + } + + if ( arrElt.eoo() ) { + // No array, so generate a single key. + if ( _spec._sparse && numNotFound == _spec._nFields ) { + return; + } + BSONObjBuilder b(_spec._sizeTracker); + for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) { + b.appendAs( *i, "" ); + } + keys.insert( b.obj() ); + } + else if ( arrElt.embeddedObject().firstElement().eoo() ) { + // Empty array, so set matching fields to undefined. + _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true ); + } + else { + // Non empty array that can be expanded, so generate a key for each member. + BSONObj arrObj = arrElt.embeddedObject(); + BSONObjIterator i( arrObj ); + while( i.more() ) { + _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded ); } } - keys.insert( b.obj() ); + } + + const IndexSpec &_spec; + }; + + void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const { + switch( indexVersion() ) { + case 0: { + KeyGeneratorV0 g( *this ); + g.getKeys( obj, keys ); + break; + } + case 1: { + KeyGeneratorV1 g( *this ); + g.getKeys( obj, keys ); + break; + } + default: + massert( 15869, "Invalid index version for key generation.", false ); } } @@ -275,6 +447,13 @@ namespace mongo { IndexSuitability IndexType::suitability( const BSONObj& query , const BSONObj& order ) const { return _spec->_suitability( query , order ); } + + int IndexSpec::indexVersion() const { + if ( !info.hasField( "v" ) ) { + return DefaultIndexVersionNumber; + } + return IndexDetails::versionForIndexObj( info ); + } bool IndexType::scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const { return ! order.isEmpty(); diff --git a/db/indexkey.h b/db/indexkey.h index 4a755f8a4e8..c04cd6396f6 100644 --- a/db/indexkey.h +++ b/db/indexkey.h @@ -25,6 +25,8 @@ namespace mongo { + extern const int DefaultIndexVersionNumber; + class Cursor; class IndexSpec; class IndexType; // TODO: this name sucks @@ -161,16 +163,21 @@ namespace mongo { protected: + int indexVersion() const; + IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ; - void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const; - BSONSizeTracker _sizeTracker; vector<const char*> _fieldNames; vector<BSONElement> _fixed; + BSONObj _nullKey; // a full key with all fields null BSONObj _nullObj; // only used for _nullElt BSONElement _nullElt; // jstNull + + BSONObj _undefinedObj; // only used for _undefinedElt + BSONElement _undefinedElt; // undefined + int _nFields; // number of fields in the index bool _sparse; // if the index is sparse shared_ptr<IndexType> _indexType; @@ -179,6 +186,8 @@ namespace mongo { void _init(); friend class IndexType; + friend class KeyGeneratorV0; + friend class KeyGeneratorV1; public: bool _finishedInit; }; diff --git a/db/instance.cpp b/db/instance.cpp index ede433d652b..971cd2e7b38 100644 --- a/db/instance.cpp +++ b/db/instance.cpp @@ -587,7 +587,7 @@ namespace mongo { } NOINLINE_DECL void insertMulti(DbMessage& d, const char *ns, const BSONObj& _js) { - const bool keepGoing = d.reservedField() & InsertOption_KeepGoing; + const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError; int n = 0; BSONObj js(_js); while( 1 ) { diff --git a/db/instance.h b/db/instance.h index 2b86eb44fce..422c77d5ffa 100644 --- a/db/instance.h +++ b/db/instance.h @@ -147,6 +147,8 @@ namespace mongo { virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; } double getSoTimeout() const { return 0; } + + virtual bool lazySupported() const { return true; } private: static HostAndPort _clientHost; }; diff --git a/db/introspect.cpp b/db/introspect.cpp index ca65710b3fc..7e1d19ce2f3 100644 --- a/db/introspect.cpp +++ b/db/introspect.cpp @@ -40,7 +40,7 @@ namespace mongo { profileBufBuilder.reset(); BSONObjBuilder b(profileBufBuilder); b.appendDate("ts", jsTime()); - currentOp.debug().append( b ); + currentOp.debug().append( currentOp , b ); b.append("client", c.clientAddress() ); @@ -49,6 +49,26 @@ namespace mongo { BSONObj p = b.done(); + if (p.objsize() > 100*1024){ + string small = p.toString(/*isArray*/false, /*full*/false); + + warning() << "can't add full line to system.profile: " << small; + + // rebuild with limited info + BSONObjBuilder b(profileBufBuilder); + b.appendDate("ts", jsTime()); + b.append("client", c.clientAddress() ); + if ( c.getAuthenticationInfo() ) + b.append( "user" , c.getAuthenticationInfo()->getUser( nsToDatabase( ns ) ) ); + + b.append("err", "profile line too large (max is 100KB)"); + if (small.size() < 100*1024){ // should be much smaller but if not don't break anything + b.append("abbreviated", small); + } + + p = b.done(); + } + // write: not replicated NamespaceDetails *d = db->namespaceIndex.details(ns); if( d ) { diff --git a/db/jsobj.cpp b/db/jsobj.cpp index 53c2329bd35..dcb77447873 100644 --- a/db/jsobj.cpp +++ b/db/jsobj.cpp @@ -45,7 +45,7 @@ BOOST_STATIC_ASSERT( sizeof(mongo::OID) == 12 ); namespace mongo { - BSONElement nullElement; + BSONElement eooElement; GENOIDLabeler GENOID; @@ -508,6 +508,12 @@ namespace mongo { } BSONObj staticNull = fromjson( "{'':null}" ); + BSONObj makeUndefined() { + BSONObjBuilder b; + b.appendUndefined( "" ); + return b.obj(); + } + BSONObj staticUndefined = makeUndefined(); /* well ordered compare */ int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const { @@ -613,13 +619,13 @@ namespace mongo { } if ( sub.eoo() ) - return nullElement; - else if ( sub.type() == Array || name[0] == '\0') + return eooElement; + else if ( sub.type() == Array || name[0] == '\0' ) return sub; else if ( sub.type() == Object ) return sub.embeddedObject().getFieldDottedOrArray( name ); else - return nullElement; + return eooElement; } /** @@ -919,7 +925,7 @@ namespace mongo { c.appendRegex("x", "goo"); BSONObj p = c.done(); - assert( !o.shallowEqual( p ) ); + assert( !o.binaryEqual( p ) ); assert( o.woCompare( p ) < 0 ); } @@ -1024,7 +1030,7 @@ namespace mongo { BSONObj a = A.done(); BSONObj b = B.done(); BSONObj c = C.done(); - assert( !a.shallowEqual( b ) ); // comments on operator== + assert( !a.binaryEqual( b ) ); // comments on operator== int cmp = a.woCompare(b); assert( cmp == 0 ); cmp = a.woCompare(c); @@ -1167,13 +1173,9 @@ namespace mongo { while (l.more() && r.more()){ if (strcmp(l.next().fieldName(), r.next().fieldName())) { - PRINTFL; return false; } } - PRINT(l.more()); - PRINT(r.more()); - PRINT(l.more() || r.more()); return !(l.more() || r.more()); // false if lhs and rhs have diff nFields() } diff --git a/db/key.cpp b/db/key.cpp index ddc2d593350..648502ebf17 100644 --- a/db/key.cpp +++ b/db/key.cpp @@ -264,15 +264,17 @@ namespace mongo { if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) { int len; const char * d = e.binData(len); - int code = BinDataLengthToCode[len]; - if( code >= 0 ) { - if( t >= 128 ) - t = (t-128) | 0x08; - dassert( (code&t) == 0 ); - b.appendUChar( cbindata|bits ); - b.appendUChar( code | t ); - b.appendBuf(d, len); - break; + if( len <= BinDataLenMax ) { + int code = BinDataLengthToCode[len]; + if( code >= 0 ) { + if( t >= 128 ) + t = (t-128) | 0x08; + dassert( (code&t) == 0 ); + b.appendUChar( cbindata|bits ); + b.appendUChar( code | t ); + b.appendBuf(d, len); + break; + } } } traditional(obj); diff --git a/db/matcher.cpp b/db/matcher.cpp index 23d5a7057bf..2b92d5797c3 100644 --- a/db/matcher.cpp +++ b/db/matcher.cpp @@ -64,8 +64,14 @@ namespace mongo { } ~Where() { - if ( scope.get() ) - scope->execSetup( "_mongo.readOnly = false;" , "make not read only" ); + if ( scope.get() ){ + try { + scope->execSetup( "_mongo.readOnly = false;" , "make not read only" ); + } + catch( DBException& e ){ + warning() << "javascript scope cleanup interrupted" << causedBy( e ) << endl; + } + } if ( jsScope ) { delete jsScope; @@ -148,6 +154,9 @@ namespace mongo { rm._prefix = prefix; } else { + uassert( 15882, "$elemMatch not allowed within $in", + ie.type() != Object || + ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH ); _myset->insert(ie); } } diff --git a/db/modules/mms.cpp b/db/modules/mms.cpp index 28fc225477f..40abb391dfb 100644 --- a/db/modules/mms.cpp +++ b/db/modules/mms.cpp @@ -142,7 +142,7 @@ namespace mongo { string errmsg; BSONObjBuilder sub; - if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) ) + if ( ! c->run( "admin.$cmd" , co , 0 , errmsg , sub , false ) ) postData.append( cmd , errmsg ); else postData.append( cmd , sub.obj() ); diff --git a/db/mongommf.h b/db/mongommf.h index b347e4ff259..0c4e8e4a19d 100644 --- a/db/mongommf.h +++ b/db/mongommf.h @@ -75,7 +75,7 @@ namespace mongo { fileSuffixNo() is 3 if the suffix is "ns", fileSuffixNo -1 */ - RelativePath relativePath() const { + const RelativePath& relativePath() const { DEV assert( !_p._p.empty() ); return _p; } diff --git a/db/namespace.cpp b/db/namespace.cpp index 927f56b6e7b..2bc7409e56c 100644 --- a/db/namespace.cpp +++ b/db/namespace.cpp @@ -604,6 +604,17 @@ namespace mongo { } } + void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) { + assertInWriteLock(); + vector< string > found; + for( ouriter i = _map.begin(); i != _map.end(); ++i ) + if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) + found.push_back( i->first ); + for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { + _map.erase(*i); + } + } + void NamespaceDetailsTransient::computeIndexKeys() { _keysComputed = true; _indexKeys.clear(); @@ -657,7 +668,7 @@ namespace mongo { // index details across commands are in cursors and nsd // transient (including query cache) so clear these. ClientCursor::invalidate( from ); - NamespaceDetailsTransient::clearForPrefix( from ); + NamespaceDetailsTransient::eraseForPrefix( from ); NamespaceDetails *details = ni->details( from ); ni->add_ns( to, *details ); diff --git a/db/namespace.h b/db/namespace.h index a1b7c2274bc..3dfb3f33767 100644 --- a/db/namespace.h +++ b/db/namespace.h @@ -454,6 +454,7 @@ namespace mongo { Can be useful as index namespaces share the same start as the regular collection. SLOW - sequential scan of all NamespaceDetailsTransient objects */ static void clearForPrefix(const char *prefix); + static void eraseForPrefix(const char *prefix); /** * @return a cursor interface to the query optimizer. The implementation may diff --git a/db/oplog.cpp b/db/oplog.cpp index 7286fd9053c..dc9db76d9d5 100644 --- a/db/oplog.cpp +++ b/db/oplog.cpp @@ -473,9 +473,9 @@ namespace mongo { return _qp.nsd()->capFirstNewRecord; } - void assertExtentNonempty( const Extent *e ) { + void wassertExtentNonempty( const Extent *e ) { // TODO ensure this requirement is clearly enforced, or fix. - massert( 14834, "empty extent found during finding start scan", !e->firstRecord.isNull() ); + wassert( !e->firstRecord.isNull() ); } DiskLoc FindingStartCursor::prevExtentFirstLoc( const DiskLoc &rec ) { @@ -488,14 +488,14 @@ namespace mongo { e = e->xprev.ext(); } if ( e->myLoc != _qp.nsd()->capExtent ) { - assertExtentNonempty( e ); + wassertExtentNonempty( e ); return e->firstRecord; } } else { if ( !e->xprev.isNull() ) { e = e->xprev.ext(); - assertExtentNonempty( e ); + wassertExtentNonempty( e ); return e->firstRecord; } } @@ -506,20 +506,30 @@ namespace mongo { shared_ptr<Cursor> c = _qp.newCursor( startLoc ); _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns()) ); } + + bool FindingStartCursor::firstDocMatchesOrEmpty() const { + shared_ptr<Cursor> c = _qp.newCursor(); + return !c->ok() || _matcher->matchesCurrent( c.get() ); + } void FindingStartCursor::init() { - // Use a ClientCursor here so we can release db mutex while scanning - // oplog (can take quite a while with large oplogs). - shared_ptr<Cursor> c = _qp.newReverseCursor(); - _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) ); - _findingStartTimer.reset(); - _findingStartMode = Initial; BSONElement tsElt = _qp.originalQuery()[ "ts" ]; massert( 13044, "no ts field in query", !tsElt.eoo() ); BSONObjBuilder b; b.append( tsElt ); BSONObj tsQuery = b.obj(); _matcher.reset(new CoveredIndexMatcher(tsQuery, _qp.indexKey())); + if ( firstDocMatchesOrEmpty() ) { + _c = _qp.newCursor(); + _findingStart = false; + return; + } + // Use a ClientCursor here so we can release db mutex while scanning + // oplog (can take quite a while with large oplogs). + shared_ptr<Cursor> c = _qp.newReverseCursor(); + _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) ); + _findingStartTimer.reset(); + _findingStartMode = Initial; } // ------------------------------------- @@ -704,7 +714,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "internal (sharding)\n{ applyOps : [ ] , preCondition : [ { ns : ... , q : ... , res : ... } ] }"; } - virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj.firstElement().type() != Array ) { errmsg = "ops has to be an array"; diff --git a/db/oplog.h b/db/oplog.h index f87a1c85e04..79fb01b0a4d 100644 --- a/db/oplog.h +++ b/db/oplog.h @@ -118,6 +118,7 @@ namespace mongo { _findingStartCursor.reset( 0 ); } void init(); + bool firstDocMatchesOrEmpty() const; }; void pretouchOperation(const BSONObj& op); diff --git a/db/ops/query.cpp b/db/ops/query.cpp index 120382fa7d8..f13b6e5ea4b 100644 --- a/db/ops/query.cpp +++ b/db/ops/query.cpp @@ -36,6 +36,7 @@ #include "../lasterror.h" #include "../../s/d_logic.h" #include "../repl_block.h" +#include "../../server.h" namespace mongo { @@ -92,21 +93,15 @@ namespace mongo { ClientCursor::Pointer p(cursorid); ClientCursor *cc = p.c(); - int bufSize = 512; - if ( cc ) { - bufSize += sizeof( QueryResult ); - bufSize += MaxBytesToReturnToClientAtOnce; - } + int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce; BufBuilder b( bufSize ); - b.skip(sizeof(QueryResult)); - int resultFlags = ResultFlag_AwaitCapable; int start = 0; int n = 0; - if ( !cc ) { + if ( unlikely(!cc) ) { log() << "getMore: cursorid not found " << ns << " " << cursorid << endl; cursorid = 0; resultFlags = ResultFlag_CursorNotFound; @@ -420,6 +415,8 @@ namespace mongo { *_b << "indexBounds" << c->prettyIndexBounds(); + c->explainDetails( *_b ); + if ( !hint ) { *_b << "allPlans" << _a->arr(); } @@ -899,9 +896,6 @@ namespace mongo { if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) { - //NamespaceDetails* d = nsdetails(ns); - //uassert(14820, "capped collections have no _id index by default, can only query by _id if one added", d == NULL || d->haveIdIndex() ); - bool nsFound = false; bool indexFound = false; diff --git a/db/ops/update.cpp b/db/ops/update.cpp index 3221fe0f277..d70048d2cc2 100644 --- a/db/ops/update.cpp +++ b/db/ops/update.cpp @@ -1060,11 +1060,10 @@ namespace mongo { debug.updateobj = updateobj; - /* idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case */ - /* NOTE: when yield() is added herein, these must be refreshed after each call to yield! */ + // idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case + // The pointers may be left invalid on a failed or terminal yield recovery. NamespaceDetails *d = nsdetails(ns); // can be null if an upsert... NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns); - /* end note */ auto_ptr<ModSet> mods; bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$'; @@ -1105,6 +1104,9 @@ namespace mongo { shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) ); shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) ); + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + if( c->ok() ) { set<DiskLoc> seenObjects; MatchDetails details; @@ -1114,20 +1116,28 @@ namespace mongo { bool atomic = c->matcher()->docMatcher().atomic(); - // ***************** - if ( cc.get() == 0 ) { - shared_ptr< Cursor > cPtr = c; - cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); - } - - if ( ! cc->yieldSometimes( ClientCursor::WillNeed ) ) { - cc.release(); - break; - } - if ( !c->ok() ) { - break; + if ( !atomic ) { + // ***************** + if ( cc.get() == 0 ) { + shared_ptr< Cursor > cPtr = c; + cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) ); + } + + bool didYield; + if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) { + cc.release(); + break; + } + if ( !c->ok() ) { + break; + } + + if ( didYield ) { + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); + } + // ***************** } - // ***************** // May have already matched in UpdateOp, but do again to get details set correctly if ( ! c->matcher()->matchesCurrent( c.get(), &details ) ) { @@ -1146,6 +1156,8 @@ namespace mongo { if ( !c->ok() ) { break; } + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); } continue; } @@ -1276,10 +1288,11 @@ namespace mongo { if ( !c->ok() ) { break; } + d = nsdetails(ns); + nsdt = &NamespaceDetailsTransient::get_w(ns); } - if (atomic) - getDur().commitIfNeeded(); + getDur().commitIfNeeded(); continue; } diff --git a/db/pdfile.cpp b/db/pdfile.cpp index 0b7a5b0830d..0569ba6868e 100644 --- a/db/pdfile.cpp +++ b/db/pdfile.cpp @@ -869,6 +869,7 @@ namespace mongo { result.append("ns", name.c_str()); ClientCursor::invalidate(name.c_str()); Top::global.collectionDropped( name ); + NamespaceDetailsTransient::eraseForPrefix( name.c_str() ); dropNS(name); } @@ -967,7 +968,7 @@ namespace mongo { } } - void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn) { + void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn, bool doLog ) { dassert( todelete == dl.rec() ); NamespaceDetails* d = nsdetails(ns); @@ -976,6 +977,14 @@ namespace mongo { uassert( 10089 , "can't remove from a capped collection" , 0 ); return; } + + BSONObj toDelete; + if ( doLog ) { + BSONElement e = dl.obj()["_id"]; + if ( e.type() ) { + toDelete = e.wrap(); + } + } /* check if any cursors point to us. if so, advance them. */ ClientCursor::aboutToDelete(dl); @@ -984,6 +993,10 @@ namespace mongo { _deleteRecord(d, ns, todelete, dl); NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp(); + + if ( ! toDelete.isEmpty() ) { + logOp( "d" , ns , toDelete ); + } } @@ -1181,7 +1194,13 @@ namespace mongo { BSONObjExternalSorter::Data d = i->next(); try { - btBuilder.addKey(d.first, d.second); + if ( !dupsAllowed && dropDups ) { + LastError::Disabled led( lastError.get() ); + btBuilder.addKey(d.first, d.second); + } + else { + btBuilder.addKey(d.first, d.second); + } } catch( AssertionException& e ) { if ( dupsAllowed ) { @@ -1189,8 +1208,9 @@ namespace mongo { throw; } - if( e.interrupted() ) - throw; + if( e.interrupted() ) { + killCurrentOp.checkForInterrupt(); + } if ( ! dropDups ) throw; @@ -1276,7 +1296,7 @@ namespace mongo { log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; for( list<DiskLoc>::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ ){ - theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true ); + theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true , true ); getDur().commitIfNeeded(); } @@ -1302,18 +1322,27 @@ namespace mongo { while ( cc->ok() ) { BSONObj js = cc->current(); try { - _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed); + { + if ( !dupsAllowed && dropDups ) { + LastError::Disabled led( lastError.get() ); + _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed); + } + else { + _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed); + } + } cc->advance(); } catch( AssertionException& e ) { - if( e.interrupted() ) - throw; + if( e.interrupted() ) { + killCurrentOp.checkForInterrupt(); + } if ( dropDups ) { DiskLoc toDelete = cc->currLoc(); bool ok = cc->advance(); cc->updateLocation(); - theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true ); + theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true , true ); if( ClientCursor::find(id, false) == 0 ) { cc.release(); if( !ok ) { diff --git a/db/pdfile.h b/db/pdfile.h index 0f45e6d337e..64dba68ca41 100644 --- a/db/pdfile.h +++ b/db/pdfile.h @@ -142,7 +142,7 @@ namespace mongo { static Record* getRecord(const DiskLoc& dl); static DeletedRecord* makeDeletedRecord(const DiskLoc& dl, int len); - void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false); + void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false, bool logOp=false); /* does not clean up indexes, etc. : just deletes the record in the pdfile. use deleteRecord() to unindex */ void _deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl); diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp index 4173eaaa2cd..e49e9b11ecb 100644 --- a/db/queryoptimizer.cpp +++ b/db/queryoptimizer.cpp @@ -52,7 +52,7 @@ namespace mongo { QueryPlan::QueryPlan( NamespaceDetails *d, int idxNo, - const FieldRangeSetPair &frsp, const FieldRangeSetPair &originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : + const FieldRangeSetPair &frsp, const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) : _d(d), _idxNo(idxNo), _frs( frsp.frsForIndex( _d, _idxNo ) ), _frsMulti( frsp.frsForIndex( _d, -1 ) ), @@ -166,12 +166,17 @@ doneCheckOrder: _optimal = true; if ( exactIndexedQueryCount == _frs.nNontrivialRanges() && orderFieldsUnindexed.size() == 0 && - exactIndexedQueryCount == _index->keyPattern().nFields() && + exactIndexedQueryCount == idxKey.nFields() && exactIndexedQueryCount == _originalQuery.nFields() ) { _exactKeyMatch = true; } _frv.reset( new FieldRangeVector( _frs, idxSpec, _direction ) ); - _originalFrv.reset( new FieldRangeVector( originalFrsp.frsForIndex( _d, _idxNo ), idxSpec, _direction ) ); + if ( originalFrsp ) { + _originalFrv.reset( new FieldRangeVector( originalFrsp->frsForIndex( _d, _idxNo ), idxSpec, _direction ) ); + } + else { + _originalFrv = _frv; + } if ( _startOrEndSpec ) { BSONObj newStart, newEnd; if ( !startKey.isEmpty() ) @@ -206,8 +211,25 @@ doneCheckOrder: } if ( willScanTable() ) { - if ( _frs.nNontrivialRanges() ) + if ( _frs.nNontrivialRanges() ) { checkTableScanAllowed( _frs.ns() ); + + // if we are doing a table scan on _id + // and its a capped collection + // we disallow as its a common user error + // .system. and local collections are exempt + if ( _d && _d->capped && _frs.range( "_id" ).nontrivial() ) { + if ( cc().isSyncThread() || + str::contains( _frs.ns() , ".system." ) || + str::startsWith( _frs.ns() , "local." ) ) { + // ok + } + else { + warning() << "_id query on capped collection without an _id index, performance will be poor collection: " << _frs.ns() << endl; + //uassert( 14820, str::stream() << "doing _id query on a capped collection without an index is not allowed: " << _frs.ns() , + } + } + } return findTableScan( _frs.ns(), _order, startLoc ); } @@ -328,7 +350,7 @@ doneCheckOrder: massert( 10365 , errmsg, indexDetailsForRange( _frsp->ns(), errmsg, _min, _max, keyPattern ) ); } NamespaceDetails *d = nsdetails(_ns); - _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) ); } // returns an IndexDetails * for a hint, 0 if hint is $natural. @@ -374,7 +396,7 @@ doneCheckOrder: NamespaceDetails *d = nsdetails( ns ); if ( !d || !_frsp->matchPossible() ) { // Table scan plan, when no matches are possible - _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) ); return; } @@ -388,7 +410,7 @@ doneCheckOrder: else { massert( 10366 , "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() ); // Table scan plan - _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) ); } return; } @@ -398,7 +420,7 @@ doneCheckOrder: BSONObj keyPattern; IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern ); massert( 10367 , errmsg, idx ); - _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) ); return; } @@ -407,13 +429,13 @@ doneCheckOrder: if ( idx >= 0 ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; - _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , *_originalFrsp , _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , _originalFrsp.get() , _originalQuery, _order ) ) ); return; } } if ( _originalQuery.isEmpty() && _order.isEmpty() ) { - _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) ); + _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) ); return; } @@ -428,7 +450,7 @@ doneCheckOrder: if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) { _usingPrerecordedPlan = true; _mayRecordPlan = false; - _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , *_originalFrsp , _originalQuery, _order , + _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , _originalFrsp.get() , _originalQuery, _order , BSONObj() , BSONObj() , _special ) ) ); return; } @@ -445,7 +467,7 @@ doneCheckOrder: _oldNScanned = oldNScanned; if ( !strcmp( bestIndex.firstElementFieldName(), "$natural" ) ) { // Table scan plan - p.reset( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + p.reset( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); } NamespaceDetails::IndexIterator i = d->ii(); @@ -453,7 +475,7 @@ doneCheckOrder: int j = i.pos(); IndexDetails& ii = i.next(); if( ii.keyPattern().woCompare(bestIndex) == 0 ) { - p.reset( new QueryPlan( d, j, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + p.reset( new QueryPlan( d, j, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); } } @@ -480,7 +502,7 @@ doneCheckOrder: if ( !_frsp->matchPossible() || ( _frsp->noNontrivialRanges() && _order.isEmpty() ) || ( !_order.isEmpty() && !strcmp( _order.firstElementFieldName(), "$natural" ) ) ) { // Table scan plan - addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst ); + addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst ); return; } @@ -490,10 +512,11 @@ doneCheckOrder: QueryPlanPtr optimalPlan; for( int i = 0; i < d->nIndexes; ++i ) { if ( normalQuery ) { - if ( !_frsp->matchPossibleForIndex( d, i, d->idx( i ).keyPattern() ) ) { + BSONObj keyPattern = d->idx( i ).keyPattern(); + if ( !_frsp->matchPossibleForIndex( d, i, keyPattern ) ) { // If no match is possible, only generate a trival plan that won't // scan any documents. - QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); addPlan( p, checkFirst ); return; } @@ -502,7 +525,7 @@ doneCheckOrder: } } - QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) ); + QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ); if ( p->optimal() ) { if ( !optimalPlan.get() ) { optimalPlan = p; @@ -520,7 +543,7 @@ doneCheckOrder: addPlan( *i, checkFirst ); // Table scan plan - addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst ); + addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst ); } shared_ptr<QueryOp> QueryPlanSet::runOp( QueryOp &op ) { @@ -538,7 +561,7 @@ doneCheckOrder: return r.runUntilFirstCompletes(); } - shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp ) { + shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp, bool retried ) { if ( !_runner ) { _runner.reset( new Runner( *this, originalOp ) ); shared_ptr<QueryOp> op = _runner->init(); @@ -553,10 +576,14 @@ doneCheckOrder: if ( !_usingPrerecordedPlan || _bestGuessOnly || _plans.size() > 1 ) { return op; } + + // Avoid an infinite loop here + uassert( 15878, str::stream() << "query plans not successful even with no constraints, potentially due to additional sort", ! retried ); + // Retry with all candidate plans. QueryUtilIndexed::clearIndexesForPatterns( *_frsp, _order ); init(); - return nextOp( originalOp ); + return nextOp( originalOp, true ); } bool QueryPlanSet::prepareToYield() { @@ -815,24 +842,29 @@ doneCheckOrder: _ns( ns ), _or( !query.getField( "$or" ).eoo() ), _query( query.getOwned() ), - _org( ns, _query ), _i(), _honorRecordedPlan( honorRecordedPlan ), _bestGuessOnly( bestGuessOnly ), _hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ), _mayYield( mayYield ), _tableScanned() { - if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_org.getSpecial().empty() ) { + if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() ) { _or = false; } - if ( _or && uselessOr( _hint.firstElement() ) ) { - _or = false; + if ( _or ) { + // Only construct an OrRangeGenerator if we may handle $or clauses. + _org.reset( new OrRangeGenerator( ns, _query ) ); + if ( !_org->getSpecial().empty() ) { + _or = false; + } + else if ( uselessOr( _hint.firstElement() ) ) { + _or = false; + } } // if _or == false, don't use or clauses for index selection if ( !_or ) { auto_ptr<FieldRangeSetPair> frsp( new FieldRangeSetPair( ns, _query, true ) ); - auto_ptr<FieldRangeSetPair> oldFrsp( new FieldRangeSetPair( *frsp ) ); - _currentQps.reset( new QueryPlanSet( ns, frsp, oldFrsp, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); + _currentQps.reset( new QueryPlanSet( ns, frsp, auto_ptr<FieldRangeSetPair>(), _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) ); } else { BSONElement e = _query.getField( "$or" ); @@ -847,8 +879,8 @@ doneCheckOrder: return _currentQps->runOp( op ); } ++_i; - auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() ); - auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() ); + auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() ); + auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() ); BSONElement hintElt = _hint.firstElement(); _currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); shared_ptr<QueryOp> ret( _currentQps->runOp( op ) ); @@ -856,7 +888,7 @@ doneCheckOrder: _tableScanned = true; } else { // If the full table was scanned, don't bother popping the last or clause. - _org.popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() ); + _org->popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() ); } return ret; } @@ -877,7 +909,7 @@ doneCheckOrder: if ( op->qp().willScanTable() ) { _tableScanned = true; } else { - _org.popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() ); + _org->popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() ); } return op; } @@ -887,8 +919,8 @@ doneCheckOrder: shared_ptr<QueryOp> op; while( mayRunMore() ) { ++_i; - auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() ); - auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() ); + auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() ); + auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() ); BSONElement hintElt = _hint.firstElement(); _currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) ); op = nextOpHandleEndOfClause(); @@ -954,9 +986,9 @@ doneCheckOrder: if ( !id ) { return true; } - return QueryUtilIndexed::uselessOr( _org, nsd, nsd->idxNo( *id ) ); + return QueryUtilIndexed::uselessOr( *_org, nsd, nsd->idxNo( *id ) ); } - return QueryUtilIndexed::uselessOr( _org, nsd, -1 ); + return QueryUtilIndexed::uselessOr( *_org, nsd, -1 ); } MultiCursor::MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr<CursorOp> op, bool mayYield ) @@ -1199,12 +1231,13 @@ doneCheckOrder: } bool QueryUtilIndexed::indexUseful( const FieldRangeSetPair &frsp, NamespaceDetails *d, int idxNo, const BSONObj &order ) { - frsp.assertValidIndex( d, idxNo ); - if ( !frsp.matchPossibleForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ) ) { + DEV frsp.assertValidIndex( d, idxNo ); + BSONObj keyPattern = d->idx( idxNo ).keyPattern(); + if ( !frsp.matchPossibleForIndex( d, idxNo, keyPattern ) ) { // No matches are possible in the index so the index may be useful. return true; } - return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ), order ) != USELESS; + return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, keyPattern ), order ) != USELESS; } void QueryUtilIndexed::clearIndexesForPatterns( const FieldRangeSetPair &frsp, const BSONObj &order ) { diff --git a/db/queryoptimizer.h b/db/queryoptimizer.h index e55e791e1ca..ad6b985ab1f 100644 --- a/db/queryoptimizer.h +++ b/db/queryoptimizer.h @@ -35,10 +35,13 @@ namespace mongo { class QueryPlan : boost::noncopyable { public: + /** + * @param originalFrsp - original constraints for this query clause. If null, frsp will be used instead. + */ QueryPlan(NamespaceDetails *d, int idxNo, // -1 = no index const FieldRangeSetPair &frsp, - const FieldRangeSetPair &originalFrsp, + const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey = BSONObj(), @@ -245,6 +248,9 @@ namespace mongo { typedef boost::shared_ptr<QueryPlan> QueryPlanPtr; typedef vector<QueryPlanPtr> PlanSet; + /** + * @param originalFrsp - original constraints for this query clause; if null, frsp will be used. + */ QueryPlanSet( const char *ns, auto_ptr<FieldRangeSetPair> frsp, auto_ptr<FieldRangeSetPair> originalFrsp, @@ -272,7 +278,7 @@ namespace mongo { } /** Initialize or iterate a runner generated from @param originalOp. */ - shared_ptr<QueryOp> nextOp( QueryOp &originalOp ); + shared_ptr<QueryOp> nextOp( QueryOp &originalOp, bool retried = false ); /** Yield the runner member. */ @@ -290,7 +296,7 @@ namespace mongo { //for testing const FieldRangeSetPair &frsp() const { return *_frsp; } - const FieldRangeSetPair &originalFrsp() const { return *_originalFrsp; } + const FieldRangeSetPair *originalFrsp() const { return _originalFrsp.get(); } bool modifiedKeys() const; bool hasMultiKey() const; @@ -420,7 +426,7 @@ namespace mongo { shared_ptr<Cursor> singleCursor() const; /** @return true iff more $or clauses need to be scanned. */ - bool mayRunMore() const { return _or ? ( !_tableScanned && !_org.orFinished() ) : _i == 0; } + bool mayRunMore() const { return _or ? ( !_tableScanned && !_org->orFinished() ) : _i == 0; } /** @return non-$or version of explain output. */ BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); } /** @return true iff this is not a $or query and a plan is selected based on previous success of this plan. */ @@ -445,7 +451,7 @@ namespace mongo { const char * _ns; bool _or; BSONObj _query; - OrRangeGenerator _org; + shared_ptr<OrRangeGenerator> _org; // May be null in certain non $or query cases. auto_ptr<QueryPlanSet> _currentQps; int _i; bool _honorRecordedPlan; diff --git a/db/queryutil-inl.h b/db/queryutil-inl.h index 2c3a757b385..d0fc212cef9 100644 --- a/db/queryutil-inl.h +++ b/db/queryutil-inl.h @@ -130,5 +130,24 @@ namespace mongo { } return ret; } + + inline bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { + assertValidIndexOrNoIndex( d, idxNo ); + if ( !matchPossible() ) { + return false; + } + if ( idxNo < 0 ) { + // multi key matchPossible() is true, so return true. + return true; + } + return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern ); + } + inline void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const { + massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 ); + if ( idxNo >= 0 ) { + assertValidIndex( d, idxNo ); + } + } + } // namespace mongo diff --git a/db/queryutil.cpp b/db/queryutil.cpp index ec9ee693511..717eac816b8 100644 --- a/db/queryutil.cpp +++ b/db/queryutil.cpp @@ -28,6 +28,7 @@ namespace mongo { extern BSONObj staticNull; + extern BSONObj staticUndefined; /** returns a string that when used as a matcher, would match a super set of regex() returns "" for complex regular expressions @@ -79,6 +80,10 @@ namespace mongo { r = r.substr( 0 , r.size() - 1 ); return r; //breaking here fails with /^a?/ } + else if (c == '|') { + // whole match so far is optional. Nothing we can do here. + return string(); + } else if (c == '\\') { c = *(regex++); if (c == 'Q'){ @@ -107,7 +112,7 @@ namespace mongo { ss << c; } } - else if (strchr("^$.[|()+{", c)) { + else if (strchr("^$.[()+{", c)) { // list of "metacharacters" from man pcrepattern r = ss.str(); break; @@ -153,25 +158,33 @@ namespace mongo { FieldRange::FieldRange( const BSONElement &e, bool singleKey, bool isNot, bool optimize ) : _singleKey( singleKey ) { + int op = e.getGtLtOp(); + // NOTE with $not, we could potentially form a complementary set of intervals. - if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) { + if ( !isNot && !e.eoo() && e.type() != RegEx && op == BSONObj::opIN ) { set<BSONElement,element_lt> vals; vector<FieldRange> regexes; uassert( 12580 , "invalid query" , e.isABSONObj() ); BSONObjIterator i( e.embeddedObject() ); while( i.more() ) { BSONElement ie = i.next(); + uassert( 15881, "$elemMatch not allowed within $in", + ie.type() != Object || + ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH ); if ( ie.type() == RegEx ) { regexes.push_back( FieldRange( ie, singleKey, false, optimize ) ); } else { - // A document array may be indexed by its first element, or - // as a full array if it is embedded within another array. + // A document array may be indexed by its first element, by undefined + // if it is empty, or as a full array if it is embedded within another + // array. vals.insert( ie ); if ( ie.type() == Array ) { - if ( !ie.embeddedObject().firstElement().eoo() ) { - vals.insert( ie.embeddedObject().firstElement() ); - } + BSONElement temp = ie.embeddedObject().firstElement(); + if ( temp.eoo() ) { + temp = staticUndefined.firstElement(); + } + vals.insert( temp ); } } } @@ -185,17 +198,21 @@ namespace mongo { return; } - // A document array may be indexed by its first element, or - // as a full array if it is embedded within another array. - if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) { + // A document array may be indexed by its first element, by undefined + // if it is empty, or as a full array if it is embedded within another + // array. + if ( e.type() == Array && op == BSONObj::Equality ) { _intervals.push_back( FieldInterval(e) ); - const BSONElement& temp = e.embeddedObject().firstElement(); - if ( ! temp.eoo() ) { - if ( temp < e ) - _intervals.insert( _intervals.begin() , temp ); - else - _intervals.push_back( FieldInterval(temp) ); + BSONElement temp = e.embeddedObject().firstElement(); + if ( temp.eoo() ) { + temp = staticUndefined.firstElement(); + } + if ( temp < e ) { + _intervals.insert( _intervals.begin() , temp ); + } + else { + _intervals.push_back( FieldInterval(temp) ); } return; @@ -215,8 +232,6 @@ namespace mongo { if ( e.eoo() ) return; - int op = e.getGtLtOp(); - bool existsSpec = false; if ( op == BSONObj::opEXISTS ) { existsSpec = e.trueValue(); @@ -622,6 +637,27 @@ namespace mongo { return o; } + string FieldInterval::toString() const { + StringBuilder buf; + buf << ( _lower._inclusive ? "[" : "(" ); + buf << _lower._bound; + buf << " , "; + buf << _upper._bound; + buf << ( _upper._inclusive ? "]" : ")" ); + return buf.str(); + } + + string FieldRange::toString() const { + StringBuilder buf; + buf << "(FieldRange special: " << _special << " singleKey: " << _special << " intervals: "; + for( vector<FieldInterval>::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) { + buf << i->toString(); + } + + buf << ")"; + return buf.str(); + } + string FieldRangeSet::getSpecial() const { string s = ""; for ( map<string,FieldRange>::const_iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) { @@ -773,30 +809,32 @@ namespace mongo { } void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) { - if ( strcmp( e.fieldName(), "$and" ) == 0 ) { - uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 ); - BSONObjIterator i( e.embeddedObject() ); - while( i.more() ) { - BSONElement e = i.next(); - uassert( 14817 , "$and elements must be objects" , e.type() == Object ); - BSONObjIterator j( e.embeddedObject() ); - while( j.more() ) { - processQueryField( j.next(), optimize ); - } - } - } + if ( e.fieldName()[ 0 ] == '$' ) { + if ( strcmp( e.fieldName(), "$and" ) == 0 ) { + uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 ); + BSONObjIterator i( e.embeddedObject() ); + while( i.more() ) { + BSONElement e = i.next(); + uassert( 14817 , "$and elements must be objects" , e.type() == Object ); + BSONObjIterator j( e.embeddedObject() ); + while( j.more() ) { + processQueryField( j.next(), optimize ); + } + } + } - if ( strcmp( e.fieldName(), "$where" ) == 0 ) { - return; - } + if ( strcmp( e.fieldName(), "$where" ) == 0 ) { + return; + } - if ( strcmp( e.fieldName(), "$or" ) == 0 ) { - return; - } + if ( strcmp( e.fieldName(), "$or" ) == 0 ) { + return; + } - if ( strcmp( e.fieldName(), "$nor" ) == 0 ) { - return; - } + if ( strcmp( e.fieldName(), "$nor" ) == 0 ) { + return; + } + } bool equality = ( getGtLtOp( e ) == BSONObj::Equality ); if ( equality && e.type() == Object ) { @@ -1055,32 +1093,11 @@ namespace mongo { return ret; } - const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const { - assertValidIndexOrNoIndex( nsd, idxNo ); - if ( idxNo < 0 ) { - // An unindexed cursor cannot have a "single key" constraint. - return _multiKey; - } - return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey; - } - bool FieldRangeSetPair::noNontrivialRanges() const { return _singleKey.matchPossible() && _singleKey.nNontrivialRanges() == 0 && _multiKey.matchPossible() && _multiKey.nNontrivialRanges() == 0; } - bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { - assertValidIndexOrNoIndex( d, idxNo ); - if ( !matchPossible() ) { - return false; - } - if ( idxNo < 0 ) { - // multi key matchPossible() is true, so return true. - return true; - } - return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern ); - } - FieldRangeSetPair &FieldRangeSetPair::operator&=( const FieldRangeSetPair &other ) { _singleKey &= other._singleKey; _multiKey &= other._multiKey; @@ -1093,21 +1110,23 @@ namespace mongo { return *this; } + BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { + return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern ); + } + void FieldRangeSetPair::assertValidIndex( const NamespaceDetails *d, int idxNo ) const { massert( 14048, "FieldRangeSetPair invalid index specified", idxNo >= 0 && idxNo < d->nIndexes ); } - - void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const { - massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 ); - if ( idxNo >= 0 ) { - assertValidIndex( d, idxNo ); + + const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const { + assertValidIndexOrNoIndex( nsd, idxNo ); + if ( idxNo < 0 ) { + // An unindexed cursor cannot have a "single key" constraint. + return _multiKey; } + return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey; } - - BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const { - return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern ); - } - + bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const { bool eq; int l = matchingLowElement( e, i, forward, eq ); diff --git a/db/queryutil.h b/db/queryutil.h index 00d2d264961..104cde28e4a 100644 --- a/db/queryutil.h +++ b/db/queryutil.h @@ -53,6 +53,8 @@ namespace mongo { /** @return true iff the interval is an equality constraint. */ bool equality() const; mutable int _cachedEquality; + + string toString() const; }; /** @@ -103,6 +105,8 @@ namespace mongo { * NOTE the resulting intervals might not be strictValid(). */ void reverse( FieldRange &ret ) const; + + string toString() const; private: BSONObj addObj( const BSONObj &o ); void finishOperation( const vector<FieldInterval> &newIntervals, const FieldRange &other ); diff --git a/db/record.cpp b/db/record.cpp index f5fa972227a..18be9c75fe2 100644 --- a/db/record.cpp +++ b/db/record.cpp @@ -120,14 +120,16 @@ namespace mongo { /** * after this call, we assume the page is in ram + * @param doHalf if this is a known good access, want to put in first half * @return whether we know the page is in ram */ - bool access( size_t region , short offset ) { + bool access( size_t region , short offset , bool doHalf ) { int regionHash = hash(region); scoped_spinlock lk( _lock ); - - RARELY { + + static int rarely_count = 0; + if ( rarely_count++ % 2048 == 0 ) { long long now = Listener::getElapsedTimeMillis(); RARELY if ( now == 0 ) { tlog() << "warning Listener::getElapsedTimeMillis returning 0ms" << endl; @@ -137,8 +139,8 @@ namespace mongo { _rotate(); } } - - for ( int i=0; i<NumSlices; i++ ) { + + for ( int i=0; i<NumSlices / ( doHalf ? 2 : 1 ); i++ ) { int pos = (_curSlice+i)%NumSlices; State s = _slices[pos].get( regionHash , region , offset ); @@ -205,7 +207,7 @@ namespace mongo { const size_t region = page >> 6; const size_t offset = page & 0x3f; - if ( ps::rolling.access( region , offset ) ) + if ( ps::rolling.access( region , offset , false ) ) return true; if ( ! blockSupported ) @@ -214,14 +216,11 @@ namespace mongo { } Record* Record::accessed() { - if ( ! MemoryTrackingEnabled ) - return this; - const size_t page = (size_t)data >> 12; const size_t region = page >> 6; const size_t offset = page & 0x3f; - - ps::rolling.access( region , offset ); + + ps::rolling.access( region , offset , true ); return this; } diff --git a/db/repl.cpp b/db/repl.cpp index a4ab6e4f0ea..3d08f2324c0 100644 --- a/db/repl.cpp +++ b/db/repl.cpp @@ -95,7 +95,7 @@ namespace mongo { virtual LockType locktype() const { return WRITE; } void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://www.mongodb.org/display/DOCS/Master+Slave"; } CmdResync() : Command("resync") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( cmdLine.usingReplSets() ) { errmsg = "resync command not currently supported with replica sets. See RS102 info in the mongodb documentations"; result.append("info", "http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member"); @@ -232,7 +232,7 @@ namespace mongo { } virtual LockType locktype() const { return NONE; } CmdIsMaster() : Command("isMaster", true, "ismaster") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { /* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not authenticated. we allow unauthenticated ismaster but we aren't as verbose informationally if @@ -1407,6 +1407,7 @@ namespace mongo { void newRepl(); void oldRepl(); + void startReplSets(ReplSetCmdline*); void startReplication() { /* if we are going to be a replica set, we aren't doing other forms of replication. */ if( !cmdLine._replSet.empty() ) { @@ -1416,6 +1417,11 @@ namespace mongo { log() << "***" << endl; } newRepl(); + + replSet = true; + ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet); + boost::thread t( boost::bind( &startReplSets, replSetCmdline) ); + return; } diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp index 3a4dd9b5b3d..07ee2fa80a3 100644 --- a/db/repl/consensus.cpp +++ b/db/repl/consensus.cpp @@ -25,6 +25,7 @@ namespace mongo { public: CmdReplSetFresh() : ReplSetCommand("replSetFresh") { } private: + bool shouldVeto(const BSONObj& cmdObj, string& errmsg) { unsigned id = cmdObj["id"].Int(); const Member* primary = theReplSet->box.getPrimary(); @@ -66,7 +67,7 @@ namespace mongo { return false; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; @@ -101,7 +102,7 @@ namespace mongo { public: CmdReplSetElect() : ReplSetCommand("replSetElect") { } private: - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; theReplSet->elect.electCmdReceived(cmdObj, &result); @@ -152,7 +153,7 @@ namespace mongo { LastYea &L = this->ly.ref(lk); time_t now = time(0); if( L.when + LeaseTime >= now && L.who != memberId ) { - log(1) << "replSet not voting yea for " << memberId << + LOG(1) << "replSet not voting yea for " << memberId << " voted for " << L.who << ' ' << now-L.when << " secs ago" << rsLog; throw VoteException(); } @@ -176,7 +177,7 @@ namespace mongo { void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) { BSONObjBuilder& b = *_b; DEV log() << "replSet received elect msg " << cmd.toString() << rsLog; - else log(2) << "replSet received elect msg " << cmd.toString() << rsLog; + else LOG(2) << "replSet received elect msg " << cmd.toString() << rsLog; string set = cmd["set"].String(); unsigned whoid = cmd["whoid"].Int(); int cfgver = cmd["cfgver"].Int(); @@ -309,7 +310,7 @@ namespace mongo { allUp = false; } } - log(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog; + LOG(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog; assert( ord <= theReplSet->lastOpTimeWritten ); // <= as this may change while we are working... return true; } diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp index 6247b4b1d13..7d3f78c73b5 100644 --- a/db/repl/heartbeat.cpp +++ b/db/repl/heartbeat.cpp @@ -39,6 +39,8 @@ namespace mongo { extern bool replSetBlind; extern ReplSettings replSettings; + unsigned int HeartbeatInfo::numPings; + long long HeartbeatInfo::timeDown() const { if( up() ) return 0; if( downSince == 0 ) @@ -51,7 +53,7 @@ namespace mongo { public: virtual bool adminOnly() const { return false; } CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( replSetBlind ) return false; @@ -62,6 +64,10 @@ namespace mongo { return false; } + if (!checkAuth(errmsg, result)) { + return false; + } + /* we want to keep heartbeat connections open when relinquishing primary. tag them here. */ { AbstractMessagingPort *mp = cc().port(); @@ -147,7 +153,7 @@ namespace mongo { string name() const { return "rsHealthPoll"; } void doWork() { if ( !theReplSet ) { - log(2) << "replSet not initialized yet, skipping health poll this round" << rsLog; + LOG(2) << "replSet not initialized yet, skipping health poll this round" << rsLog; return; } @@ -169,7 +175,10 @@ namespace mongo { time_t after = mem.lastHeartbeat = before + (mem.ping / 1000); // weight new ping with old pings - mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2)); + // on the first ping, just use the ping value + if (old.ping != 0) { + mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2)); + } if ( info["time"].isNumber() ) { long long t = info["time"].numberLong(); @@ -191,6 +200,8 @@ namespace mongo { mem.hbstate = MemberState(state.Int()); } if( ok ) { + HeartbeatInfo::numPings++; + if( mem.upSince == 0 ) { log() << "replSet info member " << h.toString() << " is up" << rsLog; mem.upSince = mem.lastHeartbeat; @@ -262,6 +273,7 @@ namespace mongo { private: void down(HeartbeatInfo& mem, string msg) { mem.health = 0.0; + mem.ping = 0; if( mem.upSince || mem.downSince == 0 ) { mem.upSince = 0; mem.downSince = jsTime(); diff --git a/db/repl/replset_commands.cpp b/db/repl/replset_commands.cpp index 79639acd567..68dab7eb3c1 100644 --- a/db/repl/replset_commands.cpp +++ b/db/repl/replset_commands.cpp @@ -45,14 +45,18 @@ namespace mongo { help << "Just for regression tests.\n"; } CmdReplSetTest() : ReplSetCommand("replSetTest") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetTest command received: " << cmdObj.toString() << rsLog; + + if (!checkAuth(errmsg, result)) { + return false; + } + if( cmdObj.hasElement("forceInitialSyncFailure") ) { replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number(); return true; } - // may not need this, but if removed check all tests still work: if( !check(errmsg, result) ) return false; @@ -76,11 +80,11 @@ namespace mongo { help << "internal"; } CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") { - // this is ok but micros or combo with some rand() and/or 64 bits might be better -- + // this is ok but micros or combo with some rand() and/or 64 bits might be better -- // imagine a restart and a clock correction simultaneously (very unlikely but possible...) rbid = (int) curTimeMillis64(); } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; result.append("rbid",rbid); @@ -108,7 +112,7 @@ namespace mongo { help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if ( cmdObj["forShell"].trueValue() ) lastError.disableForCommand(); @@ -128,17 +132,21 @@ namespace mongo { help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { } - virtual bool run(const string& a, BSONObj& b, string& errmsg, BSONObjBuilder& c, bool d) { + virtual bool run(const string& a, BSONObj& b, int e, string& errmsg, BSONObjBuilder& c, bool d) { try { rwlock_try_write lk(mutex); - return _run(a,b,errmsg,c,d); + return _run(a,b,e,errmsg,c,d); } catch(rwlock_try_write::exception&) { } errmsg = "a replSetReconfig is already in progress"; return false; } private: - bool _run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool _run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + if ( !checkAuth(errmsg, result) ) { + return false; + } + if( cmdObj["replSetReconfig"].type() != Object ) { errmsg = "no configuration specified"; return false; @@ -209,7 +217,7 @@ namespace mongo { } CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; int secs = (int) cmdObj.firstElement().numberInt(); @@ -233,7 +241,7 @@ namespace mongo { } CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { if( !check(errmsg, result) ) return false; if( !theReplSet->box.getState().primary() ) { @@ -252,19 +260,19 @@ namespace mongo { long long int diff = lastOp - closest; result.append("closest", closest); result.append("difference", diff); - + if (diff < 0) { // not our problem, but we'll wait until thing settle down errmsg = "someone is ahead of the primary?"; return false; } - + if (diff > 10) { errmsg = "no secondaries within 10 seconds of my optime"; return false; } } - + int secs = (int) cmdObj.firstElement().numberInt(); if( secs == 0 ) secs = 60; diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp index 84b92fe9297..243e087eff1 100644 --- a/db/repl/rs.cpp +++ b/db/repl/rs.cpp @@ -24,9 +24,12 @@ #include "rs.h" #include "connections.h" #include "../repl.h" +#include "../instance.h" -namespace mongo { +using namespace std; +namespace mongo { + using namespace bson; bool replSet = false; @@ -60,18 +63,43 @@ namespace mongo { } void ReplSetImpl::assumePrimary() { - log(2) << "assuming primary" << endl; + LOG(2) << "replSet assuming primary" << endl; assert( iAmPotentiallyHot() ); writelock lk("admin."); // so we are synchronized with _logOp() + + // Make sure that new OpTimes are higher than existing ones even with clock skew + DBDirectClient c; + BSONObj lastOp = c.findOne( "local.oplog.rs", Query().sort(reverseNaturalObj), NULL, QueryOption_SlaveOk ); + if ( !lastOp.isEmpty() ) { + OpTime::setLast( lastOp[ "ts" ].date() ); + } + changeState(MemberState::RS_PRIMARY); } void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); } + void ReplSetImpl::setMaintenanceMode(const bool inc) { + lock lk(this); + + if (inc) { + log() << "replSet going into maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog; + + _maintenanceMode++; + changeState(MemberState::RS_RECOVERING); + } + else { + _maintenanceMode--; + // no need to change state, syncTail will try to go live as a secondary soon + + log() << "leaving maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog; + } + } + Member* ReplSetImpl::getMostElectable() { lock lk(this); - - Member *max = 0; + + Member *max = 0; for (set<unsigned>::iterator it = _electableSet.begin(); it != _electableSet.end(); it++) { const Member *temp = findById(*it); @@ -91,7 +119,7 @@ namespace mongo { const bool closeOnRelinquish = true; void ReplSetImpl::relinquish() { - log(2) << "attempting to relinquish" << endl; + LOG(2) << "replSet attempting to relinquish" << endl; if( box.getState().primary() ) { { writelock lk("admin."); // so we are synchronized with _logOp() @@ -239,7 +267,7 @@ namespace mongo { if( myConfig().arbiterOnly ) b.append("arbiterOnly", true); - if( myConfig().priority == 0 ) + if( myConfig().priority == 0 && !myConfig().arbiterOnly) b.append("passive", true); if( myConfig().slaveDelay ) b.append("slaveDelay", myConfig().slaveDelay); @@ -296,8 +324,10 @@ namespace mongo { _currentSyncTarget(0), _hbmsgTime(0), _self(0), + _maintenanceMode(0), mgr( new Manager(this) ), ghost( new GhostSync(this) ) { + _cfg = 0; memset(_hbmsg, 0, sizeof(_hbmsg)); strcpy( _hbmsg , "initial startup" ); @@ -306,7 +336,7 @@ namespace mongo { _seeds = &replSetCmdline.seeds; - log(1) << "replSet beginning startup..." << rsLog; + LOG(1) << "replSet beginning startup..." << rsLog; loadConfig(); @@ -317,7 +347,7 @@ namespace mongo { for( set<HostAndPort>::iterator i = replSetCmdline.seedSet.begin(); i != replSetCmdline.seedSet.end(); i++ ) { if( i->isSelf() ) { if( sss == 1 ) - log(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog; + LOG(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog; } else log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog; @@ -382,7 +412,7 @@ namespace mongo { getLastErrorDefault = new BSONObj( c.getLastErrorDefaults ); } - list<const ReplSetConfig::MemberCfg*> newOnes; + list<ReplSetConfig::MemberCfg*> newOnes; // additive short-cuts the new config setup. If we are just adding a // node/nodes and nothing else is changing, this is additive. If it's // not a reconfig, we're not adding anything @@ -391,8 +421,8 @@ namespace mongo { unsigned nfound = 0; int me = 0; for( vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++ ) { - const ReplSetConfig::MemberCfg& m = *i; + ReplSetConfig::MemberCfg& m = *i; if( m.h.isSelf() ) { me++; } @@ -443,8 +473,8 @@ namespace mongo { // this is a shortcut for simple changes if( additive ) { log() << "replSet info : additive change to configuration" << rsLog; - for( list<const ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) { - const ReplSetConfig::MemberCfg* m = *i; + for( list<ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) { + ReplSetConfig::MemberCfg *m = *i; Member *mi = new Member(m->h, m->_id, m, false); /** we will indicate that new members are up() initially so that we don't relinquish our @@ -456,6 +486,11 @@ namespace mongo { _members.push(mi); startHealthTaskFor(mi); } + + // if we aren't creating new members, we may have to update the + // groups for the current ones + _cfg->updateMembers(_members); + return true; } @@ -479,7 +514,7 @@ namespace mongo { string members = ""; for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) { - const ReplSetConfig::MemberCfg& m = *i; + ReplSetConfig::MemberCfg& m = *i; Member *mi; members += ( members == "" ? "" : ", " ) + m.h.toString(); if( m.h.isSelf() ) { @@ -594,7 +629,7 @@ namespace mongo { if( ++once == 1 ) log() << "replSet info you may need to run replSetInitiate -- rs.initiate() in the shell -- if that is not already done" << rsLog; if( _seeds->size() == 0 ) - log(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog; + LOG(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog; } else { startupStatus = EMPTYUNREACHABLE; diff --git a/db/repl/rs.h b/db/repl/rs.h index 7654597a930..14c630d27a2 100644 --- a/db/repl/rs.h +++ b/db/repl/rs.h @@ -58,10 +58,11 @@ namespace mongo { ~Member(); // intentionally unimplemented as should never be called -- see List1<>::Base. Member(const Member&); public: - Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self); + Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self); string fullName() const { return h().toString(); } const ReplSetConfig::MemberCfg& config() const { return _config; } + ReplSetConfig::MemberCfg& configw() { return _config; } const HeartbeatInfo& hbinfo() const { return _hbinfo; } HeartbeatInfo& get_hbinfo() { return _hbinfo; } string lhb() const { return _hbinfo.lastHeartbeatMsg; } @@ -74,7 +75,7 @@ namespace mongo { private: friend class ReplSetImpl; - const ReplSetConfig::MemberCfg _config; + ReplSetConfig::MemberCfg _config; const HostAndPort _h; HeartbeatInfo _hbinfo; }; @@ -242,13 +243,19 @@ namespace mongo { const Member *primary; }; const SP get() { - scoped_lock lk(m); + rwlock lk(m, false); return sp; } - MemberState getState() const { return sp.state; } - const Member* getPrimary() const { return sp.primary; } + MemberState getState() const { + rwlock lk(m, false); + return sp.state; + } + const Member* getPrimary() const { + rwlock lk(m, false); + return sp.primary; + } void change(MemberState s, const Member *self) { - scoped_lock lk(m); + rwlock lk(m, true); if( sp.state != s ) { log() << "replSet " << s.toString() << rsLog; } @@ -262,24 +269,25 @@ namespace mongo { } } void set(MemberState s, const Member *p) { - scoped_lock lk(m); - sp.state = s; sp.primary = p; + rwlock lk(m, true); + sp.state = s; + sp.primary = p; } void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); } void setOtherPrimary(const Member *mem) { - scoped_lock lk(m); + rwlock lk(m, true); assert( !sp.state.primary() ); sp.primary = mem; } void noteRemoteIsPrimary(const Member *remote) { - scoped_lock lk(m); + rwlock lk(m, true); if( !sp.state.secondary() && !sp.state.fatal() ) sp.state = MemberState::RS_RECOVERING; sp.primary = remote; } StateBox() : m("StateBox") { } private: - mongo::mutex m; + RWLock m; SP sp; }; @@ -446,11 +454,20 @@ namespace mongo { List1<Member> _members; // all members of the set EXCEPT _self. ReplSetConfig::MemberCfg _config; // config of _self unsigned _id; // _id of _self + + int _maintenanceMode; // if we should stay in recovering state public: // this is called from within a writelock in logOpRS unsigned selfId() const { return _id; } Manager *mgr; GhostSync *ghost; + /** + * This forces a secondary to go into recovering state and stay there + * until this is called again, passing in "false". Multiple threads can + * call this and it will leave maintenance mode once all of the callers + * have called it again, passing in false. + */ + void setMaintenanceMode(const bool inc); private: Member* head() const { return _members.head(); } public: @@ -553,11 +570,29 @@ namespace mongo { virtual bool logTheOp() { return false; } virtual LockType locktype() const { return NONE; } virtual void help( stringstream &help ) const { help << "internal"; } + + /** + * Some replica set commands call this and then call check(). This is + * intentional, as they might do things before theReplSet is initialized + * that still need to be checked for auth. + */ + bool checkAuth(string& errmsg, BSONObjBuilder& result) { + if( !noauth && adminOnly() ) { + AuthenticationInfo *ai = cc().getAuthenticationInfo(); + if (!ai->isAuthorizedForLock("admin", locktype())) { + errmsg = "replSet command unauthorized"; + return false; + } + } + return true; + } + bool check(string& errmsg, BSONObjBuilder& result) { if( !replSet ) { errmsg = "not running with --replSet"; return false; } + if( theReplSet == 0 ) { result.append("startupStatus", ReplSet::startupStatus); string s; @@ -566,7 +601,8 @@ namespace mongo { result.append("info", "run rs.initiate(...) if not yet done for the set"); return false; } - return true; + + return checkAuth(errmsg, result); } }; @@ -578,7 +614,7 @@ namespace mongo { /** inlines ----------------- */ - inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) : + inline Member::Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self) : _config(*c), _h(h), _hbinfo(ord) { assert(c); if( self ) diff --git a/db/repl/rs_config.cpp b/db/repl/rs_config.cpp index 4d6c7b59bba..745d60b537c 100644 --- a/db/repl/rs_config.cpp +++ b/db/repl/rs_config.cpp @@ -83,14 +83,24 @@ namespace mongo { if( hidden ) b << "hidden" << hidden; if( !buildIndexes ) b << "buildIndexes" << buildIndexes; if( !tags.empty() ) { - BSONArrayBuilder a; - for( set<string>::const_iterator i = tags.begin(); i != tags.end(); i++ ) - a.append(*i); - b.appendArray("tags", a.done()); + BSONObjBuilder a; + for( map<string,string>::const_iterator i = tags.begin(); i != tags.end(); i++ ) + a.append((*i).first, (*i).second); + b.append("tags", a.done()); } return b.obj(); } + void ReplSetConfig::updateMembers(List1<Member> &dest) { + for (vector<MemberCfg>::iterator source = members.begin(); source < members.end(); source++) { + for( Member *d = dest.head(); d; d = d->next() ) { + if (d->fullName() == (*source).h.toString()) { + d->configw().groupsw() = (*source).groups(); + } + } + } + } + bo ReplSetConfig::asBson() const { bob b; b.append("_id", _id).append("version", version); @@ -307,85 +317,39 @@ namespace mongo { } void ReplSetConfig::_populateTagMap(map<string,TagClause> &tagMap) { - // stage 1: create subgroups for each server corresponding to each of - // its tags. If a server has three tags, we want it to end up in three - // subgroups, e.g.: A is tagged with ["A", "dc.ny", "m"]. At the end of - // this step, tagMap will contain: - // "A" => {"A.A" : A} - // "dc.ny" => {"dc.ny.A" : A} - // "m" => {"m.A" : A} - // If we have more than one server with the same tag, we end up with - // something like "x.y.z" => [{"x.y.z.A" : A},{"x.y.z.B" : B}] (if A - // and B were tagged with "x.y.z"). + // create subgroups for each server corresponding to each of + // its tags. E.g.: + // + // A is tagged with {"server" : "A", "dc" : "ny"} + // B is tagged with {"server" : "B", "dc" : "ny"} + // + // At the end of this step, tagMap will contain: + // + // "server" => {"A" : [A], "B" : [B]} + // "dc" => {"ny" : [A,B]} + for (unsigned i=0; i<members.size(); i++) { MemberCfg member = members[i]; - for (set<string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) { - TagClause& clause = tagMap[*tag]; - clause.name = *tag; + for (map<string,string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) { + string label = (*tag).first; + string value = (*tag).second; - // we also populate the map, to be used by step 2... I think - // this is correct, as step 2 condenses the groups anyway - string perServerName = *tag+"."+members[i].h.toString(); + TagClause& clause = tagMap[label]; + clause.name = label; TagSubgroup* subgroup; - if (clause.subgroups.find(perServerName) == clause.subgroups.end()) { - clause.subgroups[perServerName] = subgroup = new TagSubgroup(perServerName); + // search for "ny" in "dc"'s clause + if (clause.subgroups.find(value) == clause.subgroups.end()) { + clause.subgroups[value] = subgroup = new TagSubgroup(value); } else { - subgroup = clause.subgroups[perServerName]; + subgroup = clause.subgroups[value]; } subgroup->m.insert(&members[i]); } } - - // stage 2: generate all parent tags. If we have "x.y.z", this - // generates "x.y" and "x" and creates a map for each clause, e.g., - // "x"'s clause might have a map that looks like: - // "x.y" => {A, B} {C} - // "x.w" => {D} {E, F} - for (map<string,TagClause>::iterator baseClause = tagMap.begin(); baseClause != tagMap.end(); baseClause++) { - string prevPrefix = (*baseClause).first; - const char *dot = strrchr(prevPrefix.c_str(), '.'); - - while (dot) { - // get x.y - string xyTag = string(prevPrefix.c_str(), dot - prevPrefix.c_str()); - log(1) << "generating tag " << xyTag << rsLog; - TagClause& xyClause = tagMap[xyTag]; - xyClause.name = xyTag; - - // get all of x.y.z's subgroups, add them as a single subgroup of x.y - TagSubgroup* condensedSubgroup;; - if (xyClause.subgroups.find(prevPrefix) == xyClause.subgroups.end()) { - // label this subgroup one higher than the current, e.g., - // "x.y.z" if we're creating the "x.y" clause - condensedSubgroup = new TagSubgroup(prevPrefix); - xyClause.subgroups[prevPrefix] = condensedSubgroup; - } - else { - condensedSubgroup = xyClause.subgroups[prevPrefix]; - assert(condensedSubgroup->name == prevPrefix); - } - - TagClause& xyzClause = tagMap[prevPrefix]; - - for (map<string,TagSubgroup*>::iterator xyzSubgroup = xyzClause.subgroups.begin(); - xyzSubgroup != xyzClause.subgroups.end(); xyzSubgroup++) { - for (set<MemberCfg*>::const_iterator xyzMember = (*xyzSubgroup).second->m.begin(); - xyzMember != (*xyzSubgroup).second->m.end(); xyzMember++) { - condensedSubgroup->m.insert(*xyzMember); - // we'll link the member back with the group later, to - // avoid creating extra link-backs - } - } - - // advance: if we were handling "x.y", now do "x" - prevPrefix = xyTag; - dot = strrchr(prevPrefix.c_str(), '.'); - } - } } void ReplSetConfig::parseRules(const BSONObj& modes) { @@ -442,7 +406,7 @@ namespace mongo { for (set<MemberCfg *>::iterator cfg = (*sgs).second->m.begin(); !foundMe && cfg != (*sgs).second->m.end(); cfg++) { - (*cfg)->groupsw(this).insert((*sgs).second); + (*cfg)->groupsw().insert((*sgs).second); } } @@ -463,7 +427,7 @@ namespace mongo { } // if we got here, this is a valid rule - log(1) << "new rule " << rule.fieldName() << ": " << r->toString() << rsLog; + LOG(1) << "replSet new rule " << rule.fieldName() << ": " << r->toString() << rsLog; rules[rule.fieldName()] = r; } } @@ -532,9 +496,10 @@ namespace mongo { if( mobj.hasElement("votes") ) m.votes = (unsigned) mobj["votes"].Number(); if( mobj.hasElement("tags") ) { - vector<BSONElement> v = mobj["tags"].Array(); - for( unsigned i = 0; i < v.size(); i++ ) - m.tags.insert( v[i].String() ); + const BSONObj &t = mobj["tags"].Obj(); + for (BSONObj::iterator c = t.begin(); c.more(); c.next()) { + m.tags[(*c).fieldName()] = (*c).String(); + } } m.check(); } diff --git a/db/repl/rs_config.h b/db/repl/rs_config.h index d9c9d97ed4d..4e0d1e862c0 100644 --- a/db/repl/rs_config.h +++ b/db/repl/rs_config.h @@ -25,7 +25,7 @@ #include "health.h" namespace mongo { - + class Member; const string rsConfigNs = "local.system.replset"; class ReplSetConfig { @@ -61,15 +61,14 @@ namespace mongo { int slaveDelay; /* seconds. int rather than unsigned for convenient to/front bson conversion. */ bool hidden; /* if set, don't advertise to drives in isMaster. for non-primaries (priority 0) */ bool buildIndexes; /* if false, do not create any non-_id indexes */ - set<string> tags; /* tagging for data center, rack, etc. */ + map<string,string> tags; /* tagging for data center, rack, etc. */ private: set<TagSubgroup*> _groups; // the subgroups this member belongs to public: const set<TagSubgroup*>& groups() const { return _groups; } - set<TagSubgroup*>& groupsw(ReplSetConfig *c) { - assert(!c->_constructed); + set<TagSubgroup*>& groupsw() { return _groups; } void check() const; /* check validity, assert if not. */ @@ -114,6 +113,11 @@ namespace mongo { void saveConfigLocally(BSONObj comment); // to local db string saveConfigEverywhere(); // returns textual info on what happened + /** + * Update members' groups when the config changes but members stay the same. + */ + void updateMembers(List1<Member> &dest); + BSONObj asBson() const; bool _constructed; diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp index 814bb1d0bf8..142878ab478 100644 --- a/db/repl/rs_initialsync.cpp +++ b/db/repl/rs_initialsync.cpp @@ -75,7 +75,7 @@ namespace mongo { if( d && d->stats.nrecords == 0 ) return; // already empty, ok. - log(1) << "replSet empty oplog" << rsLog; + LOG(1) << "replSet empty oplog" << rsLog; d->emptyCappedCollection(rsoplog); } @@ -85,6 +85,7 @@ namespace mongo { // find the member with the lowest ping time that has more data than me for (Member *m = _members.head(); m; m = m->next()) { if (m->hbinfo().up() && + HeartbeatInfo::numPings > config().members.size()*2 && (m->state() == MemberState::RS_PRIMARY || (m->state() == MemberState::RS_SECONDARY && m->hbinfo().opTime > lastOpTimeWritten)) && (!closest || m->hbinfo().ping < closest->hbinfo().ping)) { diff --git a/db/repl/rs_initiate.cpp b/db/repl/rs_initiate.cpp index 5dd0ab23d24..0a796e1e445 100644 --- a/db/repl/rs_initiate.cpp +++ b/db/repl/rs_initiate.cpp @@ -150,7 +150,7 @@ namespace mongo { h << "Initiate/christen a replica set."; h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands"; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << "replSet replSetInitiate admin command received from client" << rsLog; if( !replSet ) { diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h index 8e5a8ad9da3..d60bb5261e9 100644 --- a/db/repl/rs_member.h +++ b/db/repl/rs_member.h @@ -80,7 +80,8 @@ namespace mongo { DiagStr lastHeartbeatMsg; OpTime opTime; int skew; - unsigned int ping; // microseconds + unsigned int ping; // milliseconds + static unsigned int numPings; bool up() const { return health > 0; } diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp index 67d6cc26f07..cce5c091074 100644 --- a/db/repl/rs_rollback.cpp +++ b/db/repl/rs_rollback.cpp @@ -574,7 +574,7 @@ namespace mongo { sethbmsg("rollback 6"); // clean up oplog - log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog; + LOG(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog; // todo: fatal error if this throws? oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false); diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp index 95bbe2040a6..5fe3075c0f7 100644 --- a/db/repl/rs_sync.cpp +++ b/db/repl/rs_sync.cpp @@ -188,6 +188,16 @@ namespace mongo { */ bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) { bool golive = false; + + { + lock lk( this ); + + if (_maintenanceMode > 0) { + // we're not actually going live + return true; + } + } + { readlock lk("local.replset.minvalid"); BSONObj mv; @@ -211,7 +221,7 @@ namespace mongo { BSONObj remoteOldestOp = r.findOne(rsoplog, Query()); OpTime ts = remoteOldestOp["ts"]._opTime(); DEV log() << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; - else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; + else LOG(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog; DEV { log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog; log() << "replSet our state: " << state().toString() << rsLog; @@ -251,7 +261,7 @@ namespace mongo { assert(r.conn() == 0); if( !r.connect(hn) ) { - log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog; + LOG(2) << "replSet can't connect to " << hn << " to read operations" << rsLog; r.resetConnection(); return false; } @@ -407,7 +417,7 @@ namespace mongo { } - { + try { writelock lk(""); /* if we have become primary, we dont' want to apply things from elsewhere @@ -421,11 +431,16 @@ namespace mongo { syncApply(o); _logOpObjRS(o); // with repl sets we write the ops to our oplog too } + catch (DBException& e) { + sethbmsg(str::stream() << "syncTail: " << e.toString() << ", syncing: " << o); + sleepsecs(30); + return; + } } } r.tailCheck(); if( !r.haveCursor() ) { - log(1) << "replSet end syncTail pass with " << hn << rsLog; + LOG(1) << "replSet end syncTail pass with " << hn << rsLog; // TODO : reuse our connection to the primary. return; } @@ -475,9 +490,7 @@ namespace mongo { _syncThread(); } catch(DBException& e) { - sethbmsg(str::stream() << "syncThread: " << e.toString() << - ", try 'use local; db.oplog.rs.findOne({ts : {$gt : new Timestamp(" << - lastOpTimeWritten.getSecs() << "000," << lastOpTimeWritten.getInc() << ")}});' on the primary"); + sethbmsg(str::stream() << "syncThread: " << e.toString()); sleepsecs(10); } catch(...) { @@ -580,7 +593,7 @@ namespace mongo { // the target might end up with a new Member, but s.slave never // changes so we'll compare the names || target == slave->slave || target->fullName() == slave->slave->fullName()) { - log(1) << "replica set ghost target no good" << endl; + LOG(1) << "replica set ghost target no good" << endl; return; } @@ -593,8 +606,7 @@ namespace mongo { slave->reader.ghostQueryGTE(rsoplog, last); } - log(1) << "last: " << slave->last.toString() << " to " << last.toString() << rsLog; - + LOG(1) << "replSet last: " << slave->last.toString() << " to " << last.toString() << rsLog; if (slave->last > last) { return; } @@ -608,11 +620,11 @@ namespace mongo { BSONObj o = slave->reader.nextSafe(); slave->last = o["ts"]._opTime(); } - log(2) << "now last is " << slave->last.toString() << rsLog; + LOG(2) << "now last is " << slave->last.toString() << rsLog; } catch (DBException& e) { // we'll be back - log(2) << "replSet ghost sync error: " << e.what() << " for " + LOG(2) << "replSet ghost sync error: " << e.what() << " for " << slave->slave->fullName() << rsLog; slave->reader.resetConnection(); } diff --git a/db/scanandorder.cpp b/db/scanandorder.cpp new file mode 100644 index 00000000000..efa9c8d7f13 --- /dev/null +++ b/db/scanandorder.cpp @@ -0,0 +1,93 @@ +/* scanandorder.cpp + Order results (that aren't already indexes and in order.) +*/ + +/** + * Copyright (C) 2008 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "pch.h" +#include "scanandorder.h" + +namespace mongo { + + const unsigned ScanAndOrder::MaxScanAndOrderBytes = 32 * 1024 * 1024; + + void ScanAndOrder::_add(BSONObj& k, BSONObj o, DiskLoc* loc) { + if (!loc) { + _best.insert(make_pair(k.getOwned(),o.getOwned())); + } + else { + BSONObjBuilder b; + b.appendElements(o); + b.append("$diskLoc", loc->toBSONObj()); + _best.insert(make_pair(k.getOwned(), b.obj().getOwned())); + } + } + + void ScanAndOrder::_addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) { + /* todo : we don't correct _approxSize here. */ + const BSONObj& worstBestKey = i->first; + int c = worstBestKey.woCompare(k, _order._spec.keyPattern); + if ( c > 0 ) { + // k is better, 'upgrade' + _best.erase(i); + _add(k, o, loc); + } + } + + + void ScanAndOrder::add(BSONObj o, DiskLoc* loc) { + assert( o.isValid() ); + BSONObj k = _order.getKeyFromObject(o); + if ( k.isEmpty() ) { + return; + } + if ( (int) _best.size() < _limit ) { + _approxSize += k.objsize(); + _approxSize += o.objsize(); + + /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */ + uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < MaxScanAndOrderBytes ); + + _add(k, o, loc); + return; + } + BestMap::iterator i; + assert( _best.end() != _best.begin() ); + i = _best.end(); + i--; + _addIfBetter(k, o, i, loc); + } + + + void ScanAndOrder::fill(BufBuilder& b, Projection *filter, int& nout ) const { + int n = 0; + int nFilled = 0; + for ( BestMap::const_iterator i = _best.begin(); i != _best.end(); i++ ) { + n++; + if ( n <= _startFrom ) + continue; + const BSONObj& o = i->second; + fillQueryResultFromObj(b, filter, o); + nFilled++; + if ( nFilled >= _limit ) + break; + uassert( 10129 , "too much data for sort() with no index", b.len() < (int)MaxScanAndOrderBytes ); // appserver limit + } + nout = nFilled; + } + +} // namespace mongo diff --git a/db/scanandorder.h b/db/scanandorder.h index 2957ae60245..33e76f61f67 100644 --- a/db/scanandorder.h +++ b/db/scanandorder.h @@ -22,6 +22,7 @@ #include "indexkey.h" #include "queryutil.h" +#include "projection.h" namespace mongo { @@ -76,30 +77,9 @@ namespace mongo { typedef multimap<BSONObj,BSONObj,BSONObjCmp> BestMap; class ScanAndOrder { - void _add(BSONObj& k, BSONObj o, DiskLoc* loc) { - if (!loc) { - _best.insert(make_pair(k.getOwned(),o.getOwned())); - } - else { - BSONObjBuilder b; - b.appendElements(o); - b.append("$diskLoc", loc->toBSONObj()); - _best.insert(make_pair(k.getOwned(), b.obj().getOwned())); - } - } - - void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) { - /* todo : we don't correct _approxSize here. */ - const BSONObj& worstBestKey = i->first; - int c = worstBestKey.woCompare(k, _order._spec.keyPattern); - if ( c > 0 ) { - // k is better, 'upgrade' - _best.erase(i); - _add(k, o, loc); - } - } - public: + static const unsigned MaxScanAndOrderBytes; + ScanAndOrder(int startFrom, int limit, BSONObj order, const FieldRangeSet &frs) : _best( BSONObjCmp( order ) ), _startFrom(startFrom), _order(order, frs) { @@ -107,60 +87,25 @@ namespace mongo { _approxSize = 0; } - int size() const { - return _best.size(); - } - - void add(BSONObj o, DiskLoc* loc) { - assert( o.isValid() ); - BSONObj k = _order.getKeyFromObject(o); - if ( k.isEmpty() ) { - return; - } - if ( (int) _best.size() < _limit ) { - _approxSize += k.objsize(); - _approxSize += o.objsize(); - - /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */ - uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < 32 * 1024 * 1024 ); - - _add(k, o, loc); - return; - } - BestMap::iterator i; - assert( _best.end() != _best.begin() ); - i = _best.end(); - i--; - _addIfBetter(k, o, i, loc); - } + int size() const { return _best.size(); } - void _fill(BufBuilder& b, Projection *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) { - int n = 0; - int nFilled = 0; - for ( BestMap::iterator i = begin; i != end; i++ ) { - n++; - if ( n <= _startFrom ) - continue; - BSONObj& o = i->second; - fillQueryResultFromObj(b, filter, o); - nFilled++; - if ( nFilled >= _limit ) - break; - uassert( 10129 , "too much data for sort() with no index", b.len() < 4000000 ); // appserver limit - } - nout = nFilled; - } + void add(BSONObj o, DiskLoc* loc); /* scanning complete. stick the query result in b for n objects. */ - void fill(BufBuilder& b, Projection *filter, int& nout) { - _fill(b, filter, nout, _best.begin(), _best.end()); - } - + void fill(BufBuilder& b, Projection *filter, int& nout ) const; + + private: + + void _add(BSONObj& k, BSONObj o, DiskLoc* loc); + + void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc); + BestMap _best; // key -> full object int _startFrom; int _limit; // max to send back. KeyType _order; unsigned _approxSize; + }; } // namespace mongo diff --git a/db/security.cpp b/db/security.cpp index 4a6f32600aa..b57326a8233 100644 --- a/db/security.cpp +++ b/db/security.cpp @@ -30,7 +30,7 @@ namespace mongo { bool AuthenticationInfo::_warned = false; - + /* void AuthenticationInfo::print() const { cout << "AuthenticationInfo: " << this << '\n'; for ( MA::const_iterator i=_dbs.begin(); i!=_dbs.end(); i++ ) { @@ -38,7 +38,7 @@ namespace mongo { } cout << "END" << endl; } - + */ string AuthenticationInfo::getUser( const string& dbname ) const { scoped_spinlock lk(_lock); @@ -78,9 +78,9 @@ namespace mongo { pwd = internalSecurity.pwd; } else { - static BSONObj userPattern = fromjson("{\"user\":1}"); + // static BSONObj userPattern = fromjson("{\"user\":1}"); string systemUsers = dbname + ".system.users"; - OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1"); + // OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1"); { BSONObjBuilder b; b << "user" << user; @@ -107,7 +107,7 @@ namespace mongo { } } - bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { AuthenticationInfo *ai = cc().getAuthenticationInfo(); ai->logout(dbname); return true; diff --git a/db/security.h b/db/security.h index 2937ef29f80..2937ef29f80 100644..100755 --- a/db/security.h +++ b/db/security.h diff --git a/db/security_commands.cpp b/db/security_commands.cpp index 16face7fc32..2db96802404 100644 --- a/db/security_commands.cpp +++ b/db/security_commands.cpp @@ -56,7 +56,7 @@ namespace mongo { void help(stringstream& h) const { h << "internal"; } virtual LockType locktype() const { return NONE; } CmdGetNonce() : Command("getnonce") {} - bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { nonce64 *n = new nonce64(Security::getNonce()); stringstream ss; ss << hex << *n; @@ -68,7 +68,7 @@ namespace mongo { CmdLogout cmdLogout; - bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { log() << " authenticate: " << cmdObj << endl; string user = cmdObj.getStringField("user"); diff --git a/db/security_common.h b/db/security_common.h index 3af70cc7b97..2f2565f3ce0 100644 --- a/db/security_common.h +++ b/db/security_common.h @@ -57,10 +57,10 @@ namespace mongo { virtual bool slaveOk() const { return true; } - virtual LockType locktype() const { return WRITE; } + virtual LockType locktype() const { return READ; } virtual void help(stringstream& ss) const { ss << "internal"; } CmdAuthenticate() : Command("authenticate") {} - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl); + bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); private: bool getUserObj(const string& dbname, const string& user, BSONObj& userObj, string& pwd); void authenticate(const string& dbname, const string& user, const bool readOnly); @@ -77,7 +77,7 @@ namespace mongo { void help(stringstream& h) const { h << "de-authenticate"; } virtual LockType locktype() const { return NONE; } CmdLogout() : Command("logout") {} - bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl); + bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl); }; } // namespace mongo diff --git a/db/stats/top.cpp b/db/stats/top.cpp index 51a270c8c8c..f5b6ee42f1c 100644 --- a/db/stats/top.cpp +++ b/db/stats/top.cpp @@ -156,7 +156,7 @@ namespace mongo { virtual LockType locktype() const { return READ; } virtual void help( stringstream& help ) const { help << "usage by collection, in micros "; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { { BSONObjBuilder b( result.subobjStart( "totals" ) ); b.append( "note" , "all times in microseconds" ); diff --git a/dbtests/basictests.cpp b/dbtests/basictests.cpp index 299dc4352ad..80bd7d70892 100644 --- a/dbtests/basictests.cpp +++ b/dbtests/basictests.cpp @@ -26,6 +26,7 @@ #include "../util/queue.h" #include "../util/paths.h" #include "../util/stringutils.h" +#include "../util/compress.h" #include "../db/db.h" namespace BasicTests { @@ -411,6 +412,21 @@ namespace BasicTests { ASSERT_EQUALS( -1 , lexNumCmp( "a.b.c.d0" , "a.b.c.d00" ) ); ASSERT_EQUALS( 1 , lexNumCmp( "a.b.c.0.y" , "a.b.c.00.x" ) ); + ASSERT_EQUALS( -1, lexNumCmp( "a", "a-" ) ); + ASSERT_EQUALS( 1, lexNumCmp( "a-", "a" ) ); + ASSERT_EQUALS( 0, lexNumCmp( "a-", "a-" ) ); + + ASSERT_EQUALS( -1, lexNumCmp( "a", "a-c" ) ); + ASSERT_EQUALS( 1, lexNumCmp( "a-c", "a" ) ); + ASSERT_EQUALS( 0, lexNumCmp( "a-c", "a-c" ) ); + + ASSERT_EQUALS( 1, lexNumCmp( "a-c.t", "a.t" ) ); + ASSERT_EQUALS( -1, lexNumCmp( "a.t", "a-c.t" ) ); + ASSERT_EQUALS( 0, lexNumCmp( "a-c.t", "a-c.t" ) ); + + ASSERT_EQUALS( 1, lexNumCmp( "ac.t", "a.t" ) ); + ASSERT_EQUALS( -1, lexNumCmp( "a.t", "ac.t" ) ); + ASSERT_EQUALS( 0, lexNumCmp( "ac.t", "ac.t" ) ); } }; @@ -596,6 +612,40 @@ namespace BasicTests { } }; + class CmdLineParseConfigTest { + public: + void run() { + stringstream ss1; + istringstream iss1(""); + CmdLine::parseConfigFile( iss1, ss1 ); + stringstream ss2; + istringstream iss2("password=\'foo bar baz\'"); + CmdLine::parseConfigFile( iss2, ss2 ); + stringstream ss3; + istringstream iss3("\t this = false \n#that = true\n #another = whocares\n\n other = monkeys "); + CmdLine::parseConfigFile( iss3, ss3 ); + + ASSERT( ss1.str().compare("\n") == 0 ); + ASSERT( ss2.str().compare("password=\'foo bar baz\'\n\n") == 0 ); + ASSERT( ss3.str().compare("\n other = monkeys \n\n") == 0 ); + } + }; + + struct CompressionTest1 { + void run() { + const char * c = "this is a test"; + std::string s; + size_t len = compress(c, strlen(c)+1, &s); + assert( len > 0 ); + + std::string out; + bool ok = uncompress(s.c_str(), s.size(), &out); + assert(ok); + assert( strcmp(out.c_str(), c) == 0 ); + } + } ctest1; + + class All : public Suite { public: All() : Suite( "basic" ) { @@ -632,6 +682,9 @@ namespace BasicTests { add< HostAndPortTests >(); add< RelativePathTest >(); + add< CmdLineParseConfigTest >(); + + add< CompressionTest1 >(); } } myall; diff --git a/dbtests/cursortests.cpp b/dbtests/cursortests.cpp index 4d2de164165..cf661864b95 100644 --- a/dbtests/cursortests.cpp +++ b/dbtests/cursortests.cpp @@ -33,6 +33,7 @@ namespace CursorTests { class Base { protected: + static const char *ns() { return "unittests.cursortests.Base"; } FieldRangeVector *vec( int *vals, int len, int direction = 1 ) { FieldRangeSet s( "", BSON( "a" << 1 ), true ); for( int i = 0; i < len; i += 2 ) { @@ -49,6 +50,7 @@ namespace CursorTests { IndexSpec *idxSpec = new IndexSpec( BSON( "a" << 1 ) ); return new FieldRangeVector( s, *idxSpec, direction ); } + DBDirectClient _c; private: vector< BSONObj > _objs; }; @@ -258,6 +260,29 @@ namespace CursorTests { } virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); } }; + + class AbortImplicitScan : public Base { + public: + void run() { + dblock lk; + IndexSpec idx( BSON( "a" << 1 << "b" << 1 ) ); + _c.ensureIndex( ns(), idx.keyPattern ); + for( int i = 0; i < 300; ++i ) { + _c.insert( ns(), BSON( "a" << i << "b" << 5 ) ); + } + FieldRangeSet frs( ns(), BSON( "b" << 3 ), true ); + boost::shared_ptr<FieldRangeVector> frv( new FieldRangeVector( frs, idx, 1 ) ); + Client::Context ctx( ns() ); + scoped_ptr<BtreeCursor> c( BtreeCursor::make( nsdetails( ns() ), 1, nsdetails( ns() )->idx(1), frv, 1 ) ); + int initialNscanned = c->nscanned(); + ASSERT( initialNscanned < 200 ); + ASSERT( c->ok() ); + c->advance(); + ASSERT( c->nscanned() > initialNscanned ); + ASSERT( c->nscanned() < 200 ); + ASSERT( c->ok() ); + } + }; } // namespace BtreeCursorTests @@ -274,6 +299,7 @@ namespace CursorTests { add< BtreeCursorTests::EqIn >(); add< BtreeCursorTests::RangeEq >(); add< BtreeCursorTests::RangeIn >(); + add< BtreeCursorTests::AbortImplicitScan >(); } } myall; } // namespace CursorTests diff --git a/dbtests/directclienttests.cpp b/dbtests/directclienttests.cpp index 5b3bde70889..860eb7e7e5c 100644 --- a/dbtests/directclienttests.cpp +++ b/dbtests/directclienttests.cpp @@ -84,7 +84,7 @@ namespace DirectClientTests { ASSERT_EQUALS((int)client().count(ns), 1); client().dropCollection(ns); - client().insert(ns, objs, InsertOption_KeepGoing); + client().insert(ns, objs, InsertOption_ContinueOnError); ASSERT_EQUALS(client().getLastErrorDetailed()["code"].numberInt(), 11000); ASSERT_EQUALS((int)client().count(ns), 2); } diff --git a/dbtests/framework.cpp b/dbtests/framework.cpp index 99fcad51d97..95ed8b33668 100644 --- a/dbtests/framework.cpp +++ b/dbtests/framework.cpp @@ -209,6 +209,7 @@ namespace mongo { hidden_options.add_options() ("suites", po::value< vector<string> >(), "test suites to run") + ("nopreallocj", "disable journal prealloc") ; positional_options.add("suites", -1); @@ -247,6 +248,10 @@ namespace mongo { cmdLine.dur = true; } + if( params.count("nopreallocj") ) { + cmdLine.preallocj = false; + } + if (params.count("debug") || params.count("verbose") ) { logLevel = 1; } diff --git a/dbtests/jsobjtests.cpp b/dbtests/jsobjtests.cpp index 9f00d4cabce..034bb97c620 100644 --- a/dbtests/jsobjtests.cpp +++ b/dbtests/jsobjtests.cpp @@ -569,6 +569,13 @@ namespace JsobjTests { } { + BSONObjBuilder b; + b.appendBinData("f", 33, (BinDataType) 1, "123456789012345678901234567890123"); + BSONObj o = b.obj(); + keyTest( o, false ); + } + + { for( int i = 1; i <= 3; i++ ) { for( int j = 1; j <= 3; j++ ) { BSONObjBuilder b; diff --git a/dbtests/namespacetests.cpp b/dbtests/namespacetests.cpp index 392917dd6d3..bbb8f5e596e 100644 --- a/dbtests/namespacetests.cpp +++ b/dbtests/namespacetests.cpp @@ -44,12 +44,13 @@ namespace NamespaceTests { ASSERT( theDataFileMgr.findAll( ns() )->eof() ); } protected: - void create() { + void create( bool sparse = false ) { NamespaceDetailsTransient::get_w( ns() ).deletedIndex(); BSONObjBuilder builder; builder.append( "ns", ns() ); builder.append( "name", "testIndex" ); builder.append( "key", key() ); + builder.append( "sparse", sparse ); BSONObj bobj = builder.done(); id_.info = theDataFileMgr.insert( ns(), bobj.objdata(), bobj.objsize() ); // head not needed for current tests @@ -339,12 +340,13 @@ namespace NamespaceTests { elts.push_back( simpleBC( i ) ); BSONObjBuilder b; b.append( "a", elts ); - + BSONObj obj = b.obj(); + BSONObjSet keys; - id().getKeysFromObject( b.done(), keys ); + id().getKeysFromObject( obj, keys ); checkSize( 4, keys ); BSONObjSet::iterator i = keys.begin(); - assertEquals( nullObj(), *i++ ); + assertEquals( nullObj(), *i++ ); // see SERVER-3377 for ( int j = 1; j < 4; ++i, ++j ) { BSONObjBuilder b; b.append( "", j ); @@ -532,9 +534,49 @@ namespace NamespaceTests { id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); checkSize(1, keys ); + ASSERT_EQUALS( Undefined, keys.begin()->firstElement().type() ); keys.clear(); } }; + + class DoubleArray : Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[1,2]}" ), keys ); + checkSize(2, keys ); + BSONObjSet::const_iterator i = keys.begin(); + ASSERT_EQUALS( BSON( "" << 1 << "" << 1 ), *i ); + ++i; + ASSERT_EQUALS( BSON( "" << 2 << "" << 2 ), *i ); + keys.clear(); + } + + protected: + BSONObj key() const { + return BSON( "a" << 1 << "a" << 1 ); + } + }; + + class DoubleEmptyArray : Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize(1, keys ); + ASSERT_EQUALS( fromjson( "{'':undefined,'':undefined}" ), *keys.begin() ); + keys.clear(); + } + + protected: + BSONObj key() const { + return BSON( "a" << 1 << "a" << 1 ); + } + }; class MultiEmptyArray : Base { public: @@ -558,7 +600,9 @@ namespace NamespaceTests { id().getKeysFromObject( fromjson( "{a:1,b:[]}" ), keys ); checkSize(1, keys ); //cout << "YO : " << *(keys.begin()) << endl; - ASSERT_EQUALS( NumberInt , keys.begin()->firstElement().type() ); + BSONObjIterator i( *keys.begin() ); + ASSERT_EQUALS( NumberInt , i.next().type() ); + ASSERT_EQUALS( Undefined , i.next().type() ); keys.clear(); } @@ -567,8 +611,313 @@ namespace NamespaceTests { return aAndB(); } }; + + class NestedEmptyArray : Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 ); } + }; + + class MultiNestedEmptyArray : Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null,'':null}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 << "a.c" << 1 ); } + }; + + class UnevenNestedEmptyArray : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':undefined,'':null}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[{b:1}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':{b:1},'':1}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[{b:[]}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':{b:[]},'':undefined}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a" << 1 << "a.b" << 1 ); } + }; + + class ReverseUnevenNestedEmptyArray : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null,'':undefined}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 << "a" << 1 ); } + }; + + class SparseReverseUnevenNestedEmptyArray : public Base { + public: + void run() { + create( true ); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null,'':undefined}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 << "a" << 1 ); } + }; + + class SparseEmptyArray : public Base { + public: + void run() { + create( true ); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:1}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[{c:1}]}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 ); } + }; + class SparseEmptyArraySecond : public Base { + public: + void run() { + create( true ); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:1}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[{c:1}]}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "z" << 1 << "a.b" << 1 ); } + }; + + class NonObjectMissingNestedField : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[1]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[1,{b:1}]}" ), keys ); + checkSize( 2, keys ); + BSONObjSet::const_iterator c = keys.begin(); + ASSERT_EQUALS( fromjson( "{'':null}" ), *c ); + ++c; + ASSERT_EQUALS( fromjson( "{'':1}" ), *c ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 ); } + }; + + class SparseNonObjectMissingNestedField : public Base { + public: + void run() { + create( true ); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[1]}" ), keys ); + checkSize( 0, keys ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[1,{b:1}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.b" << 1 ); } + }; + + class IndexedArrayIndex : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[1]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( BSON( "" << 1 ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[1]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':[1]}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':undefined}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:{'0':1}}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( BSON( "" << 1 ), *keys.begin() ); + keys.clear(); + + ASSERT_EXCEPTION( id().getKeysFromObject( fromjson( "{a:[{'0':1}]}" ), keys ), UserException ); + + ASSERT_EXCEPTION( id().getKeysFromObject( fromjson( "{a:[1,{'0':2}]}" ), keys ), UserException ); + } + protected: + BSONObj key() const { return BSON( "a.0" << 1 ); } + }; + + class DoubleIndexedArrayIndex : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[[1]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[[]]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':undefined}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.0.0" << 1 ); } + }; + + class ObjectWithinArray : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[{b:1}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[{b:[1]}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[{b:[[1]]}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':[1]}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[{b:1}]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[{b:[1]}]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[{b:[[1]]}]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':[1]}" ), *keys.begin() ); + keys.clear(); + + id().getKeysFromObject( fromjson( "{a:[[{b:[]}]]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':undefined}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.0.b" << 1 ); } + }; + + class ArrayWithinObjectWithinArray : public Base { + public: + void run() { + create(); + + BSONObjSet keys; + id().getKeysFromObject( fromjson( "{a:[{b:[1]}]}" ), keys ); + checkSize( 1, keys ); + ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() ); + keys.clear(); + } + protected: + BSONObj key() const { return BSON( "a.0.b.0" << 1 ); } + }; + + // also test numeric string field names + } // namespace IndexDetailsTests namespace NamespaceDetailsTests { @@ -862,7 +1211,22 @@ namespace NamespaceTests { add< IndexDetailsTests::AlternateMissing >(); add< IndexDetailsTests::MultiComplex >(); add< IndexDetailsTests::EmptyArray >(); + add< IndexDetailsTests::DoubleArray >(); + add< IndexDetailsTests::DoubleEmptyArray >(); add< IndexDetailsTests::MultiEmptyArray >(); + add< IndexDetailsTests::NestedEmptyArray >(); + add< IndexDetailsTests::MultiNestedEmptyArray >(); + add< IndexDetailsTests::UnevenNestedEmptyArray >(); + add< IndexDetailsTests::ReverseUnevenNestedEmptyArray >(); + add< IndexDetailsTests::SparseReverseUnevenNestedEmptyArray >(); + add< IndexDetailsTests::SparseEmptyArray >(); + add< IndexDetailsTests::SparseEmptyArraySecond >(); + add< IndexDetailsTests::NonObjectMissingNestedField >(); + add< IndexDetailsTests::SparseNonObjectMissingNestedField >(); + add< IndexDetailsTests::IndexedArrayIndex >(); + add< IndexDetailsTests::DoubleIndexedArrayIndex >(); + add< IndexDetailsTests::ObjectWithinArray >(); + add< IndexDetailsTests::ArrayWithinObjectWithinArray >(); add< IndexDetailsTests::MissingField >(); add< IndexDetailsTests::SubobjectMissing >(); add< IndexDetailsTests::CompoundMissing >(); diff --git a/dbtests/perftests.cpp b/dbtests/perftests.cpp index 11fda45c819..6766797a950 100644 --- a/dbtests/perftests.cpp +++ b/dbtests/perftests.cpp @@ -36,6 +36,7 @@ #include "../util/checksum.h" #include "../util/version.h" #include "../db/key.h" +#include "../util/compress.h" using namespace bson; @@ -118,7 +119,7 @@ namespace PerfTests { // optional 2nd test phase to be timed separately // return name of it - virtual const char * timed2() { return 0; } + virtual string timed2() { return ""; } virtual void post() { } @@ -133,8 +134,68 @@ namespace PerfTests { virtual bool showDurStats() { return true; } static DBClientConnection *conn; + static unsigned once; public: + /* if you want recording of the timings, place the password for the perf database + in ./../settings.py: + pstatspassword="<pwd>" + */ + void connect() { + if( once ) + return; + ++once; + + // no writing to perf db if _DEBUG + DEV return; + + const char *fn = "../../settings.py"; + if( !exists(fn) ) { + if( exists("settings.py") ) + fn = "settings.py"; + else { + cout << "no ../../settings.py or ./settings.py file found. will not write perf stats to pstats db." << endl; + cout << "it is recommended this be enabled even on dev boxes" << endl; + return; + } + } + + try { + if( conn == 0 ) { + MemoryMappedFile f; + const char *p = (const char *) f.mapWithOptions(fn, MongoFile::READONLY); + string pwd; + + { + const char *q = str::after(p, "pstatspassword=\""); + if( *q == 0 ) { + cout << "info perftests.cpp: no pstatspassword= in settings.py" << endl; + return; + } + else { + pwd = str::before(q, '\"'); + } + } + + DBClientConnection *c = new DBClientConnection(false, 0, 10); + string err; + if( c->connect("perfdb.10gen.cc", err) ) { + if( !c->auth("perf", "perf", pwd, err) ) { + cout << "info: authentication with stats db failed: " << err << endl; + assert(false); + } + conn = c; + } + else { + cout << err << " (to log perfstats)" << endl; + } + } + } + catch(...) { } + } + + virtual unsigned batchSize() { return 50; } + void say(unsigned long long n, int ms, string s) { unsigned long long rps = n*1000/ms; cout << "stats " << setw(33) << left << s << ' ' << right << setw(9) << rps << ' ' << right << setw(5) << ms << "ms "; @@ -142,124 +203,70 @@ namespace PerfTests { cout << dur::stats.curr->_asCSV(); cout << endl; - /* if you want recording of the timings, place the password for the perf database - in ./../settings.py: - pstatspassword="<pwd>" - */ - const char *fn = "../../settings.py"; - static bool ok = true; - if( ok ) { - DEV { - // no writing to perf db if dev - } - else if( !exists(fn) ) { - static int once; - if( exists("settings.py") ) - fn = "settings.py"; - else if( once++ == 0 ) { - cout << "no ../../settings.py or ./settings.py file found. will not write perf stats to pstats db." << endl; - cout << "it is recommended this be enabled even on dev boxes" << endl; - } - } - else { - try { - if( conn == 0 ) { - MemoryMappedFile f; - const char *p = (const char *) f.mapWithOptions(fn, MongoFile::READONLY); - string pwd; - - { - const char *q = str::after(p, "pstatspassword=\""); - if( *q == 0 ) { - cout << "info perftests.cpp: no pstatspassword= in settings.py" << endl; - ok = false; - } - else { - pwd = str::before(q, '\"'); - } - } + connect(); - if( ok ) { - conn = new DBClientConnection(false, 0, 10); - string err; - if( conn->connect("mongo05.10gen.cust.cbici.net", err) ) { - if( !conn->auth("perf", "perf", pwd, err) ) { - cout << "info: authentication with stats db failed: " << err << endl; - assert(false); - } - } - else { - cout << err << " (to log perfstats)" << endl; - ok = false; + if( conn && !conn->isFailed() ) { + const char *ns = "perf.pstats"; + if( perfHist ) { + static bool needver = true; + try { + // try to report rps from last time */ + Query q; + { + BSONObjBuilder b; + b.append("host",getHostName()).append("test",s).append("dur",cmdLine.dur); + DEV { b.append("info.DEBUG",true); } + else b.appendNull("info.DEBUG"); + if( sizeof(int*) == 4 ) + b.append("info.bits", 32); + else + b.appendNull("info.bits"); + q = Query(b.obj()).sort("when",-1); + } + BSONObj fields = BSON( "rps" << 1 << "info" << 1 ); + vector<BSONObj> v; + conn->findN(v, ns, q, perfHist, 0, &fields); + for( vector<BSONObj>::iterator i = v.begin(); i != v.end(); i++ ) { + BSONObj o = *i; + double lastrps = o["rps"].Number(); + if( lastrps ) { + cout << "stats " << setw(33) << right << "new/old:" << ' ' << setw(9); + cout << fixed << setprecision(2) << rps / lastrps; + if( needver ) { + cout << " " << o.getFieldDotted("info.git").toString(); } + cout << '\n'; } } - if( conn && !conn->isFailed() ) { - const char *ns = "perf.pstats"; - if( perfHist ) { - static bool needver = true; - try { - // try to report rps from last time */ - Query q; - { - BSONObjBuilder b; - b.append("host",getHostName()).append("test",s).append("dur",cmdLine.dur); - DEV b.append("info.DEBUG",true); - else b.appendNull("info.DEBUG"); - if( sizeof(int*) == 4 ) b.append("info.bits", 32); - else b.appendNull("info.bits"); - q = Query(b.obj()).sort("when",-1); - } - //cout << q.toString() << endl; - BSONObj fields = BSON( "rps" << 1 << "info" << 1 ); - vector<BSONObj> v; - conn->findN(v, ns, q, perfHist, 0, &fields); - for( vector<BSONObj>::iterator i = v.begin(); i != v.end(); i++ ) { - BSONObj o = *i; - double lastrps = o["rps"].Number(); - if( lastrps ) { - cout << "stats " << setw(33) << right << "new/old:" << ' ' << setw(9); - cout << fixed << setprecision(2) << rps / lastrps; - if( needver ) { - cout << " " << o.getFieldDotted("info.git").toString(); - } - cout << '\n'; - } - } - } catch(...) { } - cout.flush(); - needver = false; - } - { - bob b; - b.append("host", getHostName()); - b.appendTimeT("when", time(0)); - b.append("test", s); - b.append("rps", (int) rps); - b.append("millis", ms); - b.appendBool("dur", cmdLine.dur); - if( showDurStats() && cmdLine.dur ) - b.append("durStats", dur::stats.curr->_asObj()); - { - bob inf; - inf.append("version", versionString); - if( sizeof(int*) == 4 ) inf.append("bits", 32); - DEV inf.append("DEBUG", true); + } catch(...) { } + cout.flush(); + needver = false; + } + { + bob b; + b.append("host", getHostName()); + b.appendTimeT("when", time(0)); + b.append("test", s); + b.append("rps", (int) rps); + b.append("millis", ms); + b.appendBool("dur", cmdLine.dur); + if( showDurStats() && cmdLine.dur ) + b.append("durStats", dur::stats.curr->_asObj()); + { + bob inf; + inf.append("version", versionString); + if( sizeof(int*) == 4 ) inf.append("bits", 32); + DEV inf.append("DEBUG", true); #if defined(_WIN32) - inf.append("os", "win"); + inf.append("os", "win"); #endif - inf.append("git", gitVersion()); - inf.append("boost", BOOST_VERSION); - b.append("info", inf.obj()); - } - BSONObj o = b.obj(); - //cout << "inserting " << o.toString() << endl; - conn->insert(ns, o); - } - } - } - catch(...) { + inf.append("git", gitVersion()); + inf.append("boost", BOOST_VERSION); + b.append("info", inf.obj()); } + BSONObj o = b.obj(); + //cout << "inserting " << o.toString() << endl; + conn->insert(ns, o); } } } @@ -277,9 +284,9 @@ namespace PerfTests { dur::stats._intervalMicros = 0; // no auto rotate dur::stats.curr->reset(); - Timer t; + mongo::Timer t; unsigned long long n = 0; - const unsigned Batch = 50; + const unsigned Batch = batchSize(); if( hlm == 0 ) { // means just do once @@ -314,10 +321,10 @@ namespace PerfTests { post(); { - const char *test2name = timed2(); - if( test2name ) { + string test2name = timed2(); + if( test2name.size() != 0 ) { dur::stats.curr->reset(); - Timer t; + mongo::Timer t; unsigned long long n = 0; while( 1 ) { unsigned i; @@ -335,6 +342,7 @@ namespace PerfTests { }; DBClientConnection *B::conn; + unsigned B::once; unsigned dontOptimizeOutHopefully; @@ -598,6 +606,48 @@ namespace PerfTests { virtual bool showDurStats() { return false; } }; + class Compress : public B { + public: + const unsigned sz; + void *p; + Compress() : sz(1024*1024*100+3) { } + virtual unsigned batchSize() { return 1; } + string name() { return "compress"; } + virtual bool showDurStats() { return false; } + virtual int howLongMillis() { return 4000; } + unsigned long long expectation() { return 1000000; } + void prep() { + p = malloc(sz); + // this isn't a fair test as it is mostly rands but we just want a rough perf check + static int last; + for (unsigned i = 0; i<sz; i++) { + int r = rand(); + if( (r & 0x300) == 0x300 ) + r = last; + ((char*)p)[i] = r; + last = r; + } + } + size_t last; + string res; + void timed() { + mongo::Timer t; + string out; + size_t len = compress((const char *) p, sz, &out); + bool ok = uncompress(out.c_str(), out.size(), &res); + ASSERT(ok); + static unsigned once; + if( once++ == 0 ) + cout << "compress round trip " << sz/(1024.0*1024) / (t.millis()/1000.0) << "MB/sec\n"; + //cout << len / (1024.0/1024) << " compressed" << endl; + (void)len; //fix unused error while above line is commented out + } + void post() { + ASSERT( memcmp(res.c_str(), p, sz) == 0 ); + free(p); + } + }; + // test speed of checksum method class ChecksumTest : public B { public: @@ -607,6 +657,7 @@ namespace PerfTests { virtual int howLongMillis() { return 2000; } int expectationTimeMillis() { return 5000; } virtual bool showDurStats() { return false; } + virtual unsigned batchSize() { return 1; } void *p; @@ -684,7 +735,7 @@ namespace PerfTests { void timed() { client().insert( ns(), x ); } - const char * timed2() { + string timed2() { client().findOne(ns(), query); return "findOne_by_id"; } @@ -753,7 +804,7 @@ namespace PerfTests { client().update(ns(), q, y, /*upsert*/true); } - const char * timed2() { + virtual string timed2() { static BSONObj I = BSON( "$inc" << BSON( "y" << 1 ) ); // test some $inc's @@ -762,8 +813,7 @@ namespace PerfTests { BSONObj q = BSON("x" << x); client().update(ns(), q, I); - static string s = name()+"-inc"; - return s.c_str(); + return name()+"-inc"; } unsigned long long expectation() { return 1000; } @@ -778,6 +828,16 @@ namespace PerfTests { this->client().ensureIndex(this->ns(), BSON("y"<<1)); this->client().ensureIndex(this->ns(), BSON("z"<<1)); } + + /* + virtual string timed2() { + string x = T::timed2(); + if ( x.size() == 0 ) + return x; + + return x + "-with-more-indexes"; + } + */ }; void t() { @@ -822,6 +882,8 @@ namespace PerfTests { } else { add< Dummy >(); + add< ChecksumTest >(); + add< Compress >(); add< TLS >(); add< Malloc >(); add< Timer >(); @@ -838,7 +900,6 @@ namespace PerfTests { add< BSONIter >(); add< BSONGetFields1 >(); add< BSONGetFields2 >(); - add< ChecksumTest >(); add< TaskQueueTest >(); add< InsertDup >(); add< Insert1 >(); diff --git a/dbtests/queryoptimizertests.cpp b/dbtests/queryoptimizertests.cpp index bd597572d52..83a2d267c57 100644 --- a/dbtests/queryoptimizertests.cpp +++ b/dbtests/queryoptimizertests.cpp @@ -104,7 +104,7 @@ namespace QueryOptimizerTests { auto_ptr< FieldRangeSetPair > FieldRangeSetPair_GLOBAL; #define FRSP(x) ( FieldRangeSetPair_GLOBAL.reset( new FieldRangeSetPair( ns(), x ) ), *FieldRangeSetPair_GLOBAL ) auto_ptr< FieldRangeSetPair > FieldRangeSetPair_GLOBAL2; -#define FRSP2(x) ( FieldRangeSetPair_GLOBAL2.reset( new FieldRangeSetPair( ns(), x ) ), *FieldRangeSetPair_GLOBAL2 ) +#define FRSP2(x) ( FieldRangeSetPair_GLOBAL2.reset( new FieldRangeSetPair( ns(), x ) ), FieldRangeSetPair_GLOBAL2.get() ) class NoIndex : public Base { public: @@ -886,7 +886,7 @@ namespace QueryOptimizerTests { } BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" ); auto_ptr< FieldRangeSetPair > frsp( new FieldRangeSetPair( ns(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ) ) ); - QueryPlan qp( nsd(), 1, *frsp, *frsp, fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() ); + QueryPlan qp( nsd(), 1, *frsp, frsp.get(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() ); boost::shared_ptr<Cursor> c = qp.newCursor(); double expected[] = { 2, 3, 6, 9 }; ASSERT( c->ok() ); @@ -908,7 +908,7 @@ namespace QueryOptimizerTests { } BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" ); auto_ptr< FieldRangeSetPair > frsp( new FieldRangeSetPair( ns(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ) ) ); - QueryPlan qp( nsd(), 1, *frsp, *frsp, fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() ); + QueryPlan qp( nsd(), 1, *frsp, frsp.get(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() ); boost::shared_ptr<Cursor> c = qp.newCursor(); int matches[] = { 2, 3, 6, 9 }; for( int i = 0; i < 4; ++i, c->advance() ) { @@ -1900,18 +1900,19 @@ namespace QueryOptimizerTests { public: void run() { _cli.createCollection( ns(), 1000, true ); - _cli.insert( ns(), BSON( "_id" << 1 ) ); + _cli.insert( ns(), BSON( "x" << 1 ) ); { dblock lk; Client::Context ctx( ns() ); - setQueryOptimizerCursor( BSON( "_id" << GT << 0 ) ); - ASSERT_EQUALS( 1, current().getIntField( "_id" ) ); + setQueryOptimizerCursor( BSON( "x" << GT << 0 ) ); + ASSERT_EQUALS( 1, current().getIntField( "x" ) ); ASSERT( prepareToYield() ); } - - while( _cli.count( ns(), BSON( "_id" << 1 ) ) > 0 ) { - _cli.insert( ns(), BSONObj() ); + + int x = 2; + while( _cli.count( ns(), BSON( "x" << 1 ) ) > 0 ) { + _cli.insert( ns(), BSON( "x" << x++ ) ); } { @@ -2088,26 +2089,26 @@ namespace QueryOptimizerTests { public: void run() { _cli.createCollection( ns(), 1000, true ); - _cli.insert( ns(), BSON( "_id" << 1 << "a" << 1 ) ); - _cli.ensureIndex( ns(), BSON( "_id" << 1 ) ); + _cli.insert( ns(), BSON( "a" << 1 << "b" << 1 ) ); + _cli.ensureIndex( ns(), BSON( "a" << 1 ) ); shared_ptr<Cursor> c; { dblock lk; Client::Context ctx( ns() ); - c = newQueryOptimizerCursor( ns(), BSON( "_id" << GT << 0 << "a" << GT << 0 ) ); - ASSERT_EQUALS( 1, c->current().getIntField( "_id" ) ); + c = newQueryOptimizerCursor( ns(), BSON( "a" << GT << 0 << "b" << GT << 0 ) ); + ASSERT_EQUALS( 1, c->current().getIntField( "a" ) ); ASSERT( !c->getsetdup( c->currLoc() ) ); c->advance(); - ASSERT_EQUALS( 1, c->current().getIntField( "_id" ) ); + ASSERT_EQUALS( 1, c->current().getIntField( "a" ) ); ASSERT( c->getsetdup( c->currLoc() ) ); ASSERT( c->prepareToYield() ); } int i = 1; - while( _cli.count( ns(), BSON( "_id" << 1 ) ) > 0 ) { + while( _cli.count( ns(), BSON( "a" << 1 ) ) > 0 ) { ++i; - _cli.insert( ns(), BSON( "_id" << i << "a" << i ) ); + _cli.insert( ns(), BSON( "a" << i << "b" << i ) ); } { @@ -2116,7 +2117,7 @@ namespace QueryOptimizerTests { c->recoverFromYield(); ASSERT( c->ok() ); // {$natural:1} plan does not recover, {_id:1} plan does. - ASSERT( 1 < c->current().getIntField( "_id" ) ); + ASSERT( 1 < c->current().getIntField( "a" ) ); } } }; diff --git a/dbtests/querytests.cpp b/dbtests/querytests.cpp index a50eadfcd31..694053b10a8 100644 --- a/dbtests/querytests.cpp +++ b/dbtests/querytests.cpp @@ -361,6 +361,7 @@ namespace QueryTests { void insertA(const char* ns, int a) { BSONObjBuilder b; b.appendOID("_id", 0, true); + b.appendOID("value", 0, true); b.append("a", a); insert(ns, b.obj()); } @@ -374,7 +375,7 @@ namespace QueryTests { auto_ptr< DBClientCursor > c1 = client().query( ns, QUERY( "a" << GT << -1 ), 0, 0, 0, QueryOption_CursorTailable ); OID id; id.init("000000000000000000000000"); - auto_ptr< DBClientCursor > c2 = client().query( ns, QUERY( "_id" << GT << id ), 0, 0, 0, QueryOption_CursorTailable ); + auto_ptr< DBClientCursor > c2 = client().query( ns, QUERY( "value" << GT << id ), 0, 0, 0, QueryOption_CursorTailable ); c1->next(); c1->next(); ASSERT( !c1->more() ); @@ -399,7 +400,6 @@ namespace QueryTests { } void run() { const char *ns = "unittests.querytests.OplogReplayMode"; - insert( ns, BSON( "ts" << 3 ) ); insert( ns, BSON( "ts" << 0 ) ); insert( ns, BSON( "ts" << 1 ) ); insert( ns, BSON( "ts" << 2 ) ); @@ -407,6 +407,12 @@ namespace QueryTests { ASSERT( c->more() ); ASSERT_EQUALS( 2, c->next().getIntField( "ts" ) ); ASSERT( !c->more() ); + + insert( ns, BSON( "ts" << 3 ) ); + c = client().query( ns, QUERY( "ts" << GT << 1 ).hint( BSON( "$natural" << 1 ) ), 0, 0, 0, QueryOption_OplogReplay ); + ASSERT( c->more() ); + ASSERT_EQUALS( 2, c->next().getIntField( "ts" ) ); + ASSERT( c->more() ); } }; @@ -1146,7 +1152,35 @@ namespace QueryTests { private: int _old; }; + + /** + * Check OplogReplay mode where query timestamp is earlier than the earliest + * entry in the collection. + */ + class FindingStartStale : public CollectionBase { + public: + FindingStartStale() : CollectionBase( "findingstart" ) {} + void run() { + unsigned startNumCursors = ClientCursor::numCursors(); + + BSONObj info; + ASSERT( client().runCommand( "unittests", BSON( "create" << "querytests.findingstart" << "capped" << true << "$nExtents" << 5 << "autoIndexId" << false ), info ) ); + + // Check OplogReplay mode with empty collection. + auto_ptr< DBClientCursor > c = client().query( ns(), QUERY( "ts" << GTE << 50 ), 0, 0, 0, QueryOption_OplogReplay ); + ASSERT( !c->more() ); + + // Check with some docs in the collection. + for( int i = 100; i < 150; client().insert( ns(), BSON( "ts" << i++ ) ) ); + c = client().query( ns(), QUERY( "ts" << GTE << 50 ), 0, 0, 0, QueryOption_OplogReplay ); + ASSERT( c->more() ); + ASSERT_EQUALS( 100, c->next()[ "ts" ].numberInt() ); + + // Check that no persistent cursors outlast our queries above. + ASSERT_EQUALS( startNumCursors, ClientCursor::numCursors() ); + } + }; class WhatsMyUri : public CollectionBase { public: @@ -1362,6 +1396,7 @@ namespace QueryTests { add< HelperTest >(); add< HelperByIdTest >(); add< FindingStartPartiallyFull >(); + add< FindingStartStale >(); add< WhatsMyUri >(); add< parsedtests::basic1 >(); diff --git a/dbtests/repltests.cpp b/dbtests/repltests.cpp index ecaacf74874..2bf522555ab 100644 --- a/dbtests/repltests.cpp +++ b/dbtests/repltests.cpp @@ -25,6 +25,8 @@ #include "../db/json.h" #include "dbtests.h" +#include "../db/oplog.h" +#include "../db/queryoptimizer.h" namespace mongo { void createOplog(); @@ -1049,6 +1051,31 @@ namespace ReplTests { } }; + /** + * Check against oldest document in the oplog before scanning backward + * from the newest document. + */ + class FindingStartCursorStale : public Base { + public: + void run() { + for( int i = 0; i < 10; ++i ) { + client()->insert( ns(), BSON( "_id" << i ) ); + } + dblock lk; + Client::Context ctx( cllNS() ); + NamespaceDetails *nsd = nsdetails( cllNS() ); + BSONObjBuilder b; + b.appendTimestamp( "$gte" ); + BSONObj query = BSON( "ts" << b.obj() ); + FieldRangeSetPair frsp( cllNS(), query ); + BSONObj order = BSON( "$natural" << 1 ); + QueryPlan qp( nsd, -1, frsp, &frsp, query, order ); + FindingStartCursor fsc( qp ); + ASSERT( fsc.done() ); + ASSERT_EQUALS( 0, fsc.cursor()->current()[ "o" ].Obj()[ "_id" ].Int() ); + } + }; + class All : public Suite { public: All() : Suite( "repl" ) { @@ -1103,6 +1130,7 @@ namespace ReplTests { add< DeleteOpIsIdBased >(); add< DatabaseIgnorerBasic >(); add< DatabaseIgnorerUpdate >(); + add< FindingStartCursorStale >(); } } myall; diff --git a/dbtests/test.sln b/dbtests/test.sln new file mode 100755 index 00000000000..3a1b741c716 --- /dev/null +++ b/dbtests/test.sln @@ -0,0 +1,26 @@ +
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "test.vcxproj", "{215B2D68-0A70-4D10-8E75-B33010C62A91}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.Build.0 = Debug|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|x64.ActiveCfg = Debug|x64
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|x64.Build.0 = Debug|x64
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.ActiveCfg = Release|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.Build.0 = Release|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|x64.ActiveCfg = Release|x64
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj index 1460e9d45d1..fde77d2d20b 100644 --- a/dbtests/test.vcxproj +++ b/dbtests/test.vcxproj @@ -259,8 +259,16 @@ <ClInclude Include="..\db\resource.h" />
<ClInclude Include="..\db\scanandorder.h" />
<ClInclude Include="..\db\security.h" />
+ <ClInclude Include="..\third_party\snappy\config.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-c.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-internal.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-sinksource.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-internal.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-public.h" />
+ <ClInclude Include="..\third_party\snappy\snappy.h" />
<ClInclude Include="..\util\builder.h" />
<ClInclude Include="..\util\checksum.h" />
+ <ClInclude Include="..\util\compress.h" />
<ClInclude Include="..\util\concurrency\list.h" />
<ClInclude Include="..\util\concurrency\task.h" />
<ClInclude Include="..\util\concurrency\value.h" />
@@ -325,6 +333,7 @@ <ClCompile Include="..\db\repl\rs_rollback.cpp" />
<ClCompile Include="..\db\repl\rs_sync.cpp" />
<ClCompile Include="..\db\restapi.cpp" />
+ <ClCompile Include="..\db\scanandorder.cpp" />
<ClCompile Include="..\db\security_common.cpp" />
<ClCompile Include="..\pcre-7.4\pcrecpp.cc">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -609,9 +618,27 @@ <ClCompile Include="..\s\shard.cpp" />
<ClCompile Include="..\s\shardconnection.cpp" />
<ClCompile Include="..\s\shardkey.cpp" />
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\alignedbuilder.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
</ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\concurrency\spin_lock.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters index 1c832cd17ba..35e85fba416 100755 --- a/dbtests/test.vcxproj.filters +++ b/dbtests/test.vcxproj.filters @@ -56,6 +56,9 @@ <Filter Include="bson">
<UniqueIdentifier>{e6652333-c77f-420c-af8e-72d55bc095fe}</UniqueIdentifier>
</Filter>
+ <Filter Include="misc and third party\snappy">
+ <UniqueIdentifier>{fbc4416f-ca67-4e63-a1ea-49027de7e080}</UniqueIdentifier>
+ </Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\boostw\boost_1_34_1\boost\config\auto_link.hpp">
@@ -304,6 +307,30 @@ <ClInclude Include="..\server.h">
<Filter>db\h</Filter>
</ClInclude>
+ <ClInclude Include="..\third_party\snappy\config.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-c.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-internal.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-sinksource.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-internal.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-public.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\util\compress.h">
+ <Filter>misc and third party</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<Library Include="..\..\js\js64r.lib">
@@ -857,6 +884,18 @@ <ClCompile Include="..\util\concurrency\spin_lock.cpp">
<Filter>util\concurrency</Filter>
</ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <Filter>misc and third party\snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <Filter>misc and third party</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <Filter>misc and third party\snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\db\scanandorder.cpp">
+ <Filter>db\cpp</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\SConstruct">
diff --git a/debian/changelog b/debian/changelog index abc4a2bce28..d1e37c93b1d 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,15 @@ +mongodb (1.9.2) unstable; urgency=low + + * see http://jira.mongodb.org/browse/SERVER/fixforversion/10261 + + -- Richard Kreuter <richard@10gen.com> Thu, 11 Aug 2011 16:56:28 -0500 + +mongodb (1.9.1) unstable; urgency=low + + * see http://jira.mongodb.org/browse/SERVER/fixforversion/10261 + + -- Richard Kreuter <richard@10gen.com> Tue, 26 Jul 2011 16:56:28 -0500 + mongodb (1.9.0) unstable; urgency=low * see http://jira.mongodb.org/browse/SERVER/fixforversion/10232 diff --git a/distsrc/client/SConstruct b/distsrc/client/SConstruct index c2d309a4e5a..54fc9437d3c 100755 --- a/distsrc/client/SConstruct +++ b/distsrc/client/SConstruct @@ -41,7 +41,7 @@ linux = False if "darwin" == os.sys.platform: addExtraLibs( "/opt/local/" ) nix = True -elif "linux2" == os.sys.platform: +elif "linux2" == os.sys.platform or "linux3" == os.sys.platform: nix = True linux = True diff --git a/doxygenConfig b/doxygenConfig index 577ce0119c5..3d873903fe8 100644 --- a/doxygenConfig +++ b/doxygenConfig @@ -3,7 +3,7 @@ #--------------------------------------------------------------------------- DOXYFILE_ENCODING = UTF-8 PROJECT_NAME = MongoDB -PROJECT_NUMBER = 1.9.1-pre- +PROJECT_NUMBER = 2.0.0-rc0-pre- OUTPUT_DIRECTORY = docs/doxygen CREATE_SUBDIRS = NO OUTPUT_LANGUAGE = English diff --git a/jstests/ageoutjournalfiles.js b/jstests/ageoutjournalfiles.js new file mode 100644 index 00000000000..f7fe2275480 --- /dev/null +++ b/jstests/ageoutjournalfiles.js @@ -0,0 +1,16 @@ +if (db.serverStatus().dur) { + + assert(db.serverStatus().dur.ageOutJournalFiles != false); + + db.adminCommand({ setParameter: 1, ageOutJournalFiles: false }); + + assert(db.serverStatus().dur.ageOutJournalFiles == false); + + db.adminCommand({ setParameter: 1, ageOutJournalFiles: true }); + + assert(db.serverStatus().dur.ageOutJournalFiles != false); + +} +else { +// print("dur is off"); +}
\ No newline at end of file diff --git a/jstests/array_match3.js b/jstests/array_match3.js index 06ee926a6a6..c8653430770 100644 --- a/jstests/array_match3.js +++ b/jstests/array_match3.js @@ -10,6 +10,4 @@ assert.eq( 2, t.count( {'a.0':5} ) ); // Test with index. t.ensureIndex( {'a.0':1} ); -if ( 0 ) { // SERVER-2902 assert.eq( 2, t.count( {'a.0':5} ) ); -} diff --git a/jstests/arrayfind4.js b/jstests/arrayfind4.js new file mode 100644 index 00000000000..b141425f2e9 --- /dev/null +++ b/jstests/arrayfind4.js @@ -0,0 +1,22 @@ +// Test query empty array SERVER-2258 + +t = db.jstests_arrayfind4; +t.drop(); + +t.save( {a:[]} ); +t.ensureIndex( {a:1} ); + +assert.eq( 1, t.find( {a:[]} ).hint( {$natural:1} ).itcount() ); +assert.eq( 1, t.find( {a:[]} ).hint( {a:1} ).itcount() ); + +assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {$natural:1} ).itcount() ); +assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {a:1} ).itcount() ); + +t.remove(); +t.save( {a:[[]]} ); + +assert.eq( 1, t.find( {a:[]} ).hint( {$natural:1} ).itcount() ); +assert.eq( 1, t.find( {a:[]} ).hint( {a:1} ).itcount() ); + +assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {$natural:1} ).itcount() ); +assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {a:1} ).itcount() ); diff --git a/jstests/arrayfind5.js b/jstests/arrayfind5.js new file mode 100644 index 00000000000..083dc0622c8 --- /dev/null +++ b/jstests/arrayfind5.js @@ -0,0 +1,23 @@ +// Test indexed elemmatch of missing field. + +t = db.jstests_arrayfind5; +t.drop(); + +function check( nullElemMatch ) { + assert.eq( 1, t.find( {'a.b':1} ).itcount() ); + assert.eq( 1, t.find( {a:{$elemMatch:{b:1}}} ).itcount() ); + assert.eq( 0, t.find( {'a.b':null} ).itcount() ); + assert.eq( nullElemMatch ? 1 : 0, t.find( {a:{$elemMatch:{b:null}}} ).itcount() ); // see SERVER-3377 +} + +t.save( {a:[{},{b:1}]} ); +check( true ); +t.ensureIndex( {'a.b':1} ); +check( true ); + +t.drop(); + +t.save( {a:[5,{b:1}]} ); +check( false ); +t.ensureIndex( {'a.b':1} ); +check( false ); diff --git a/jstests/capped2.js b/jstests/capped2.js index 1f8bf1d01c6..65bb82f4c07 100644 --- a/jstests/capped2.js +++ b/jstests/capped2.js @@ -47,7 +47,7 @@ function checkDecreasing( i ) { for( i = 0 ;; ++i ) { debug( "capped 2: " + i ); - tzz.save( val[ i ] ); + tzz.insert( val[ i ] ); if ( tzz.count() == 0 ) { assert( i > 100, "K" ); break; @@ -57,6 +57,6 @@ for( i = 0 ;; ++i ) { for( i = 600 ; i >= 0 ; --i ) { debug( "capped 2: " + i ); - tzz.save( val[ i ] ); + tzz.insert( val[ i ] ); checkDecreasing( i ); } diff --git a/jstests/capped5.js b/jstests/capped5.js index f56d2278a7e..be6c27d7256 100644 --- a/jstests/capped5.js +++ b/jstests/capped5.js @@ -9,7 +9,6 @@ db.createCollection( tn , {capped: true, size: 1024 * 1024 * 1 } ); t.insert( { _id : 5 , x : 11 , z : 52 } ); assert.eq( 0 , t.getIndexKeys().length , "A0" ) assert.eq( 52 , t.findOne( { x : 11 } ).z , "A1" ); -assert.eq( 52 , t.findOne( { _id : 5, x : 11 } ).z , "A2" ); t.ensureIndex( { _id : 1 } ) t.ensureIndex( { x : 1 } ) diff --git a/jstests/capped6.js b/jstests/capped6.js index 65798075208..098f667732f 100644 --- a/jstests/capped6.js +++ b/jstests/capped6.js @@ -52,7 +52,7 @@ var max = 0; */ function doTest() { for( var i = max; i < oldMax; ++i ) { - tzz.save( val[ i ] ); + tzz.insert( val[ i ] ); } max = oldMax; count = tzz.count(); diff --git a/jstests/cappeda.js b/jstests/cappeda.js new file mode 100644 index 00000000000..4a4b14a64e5 --- /dev/null +++ b/jstests/cappeda.js @@ -0,0 +1,33 @@ + +t = db.scan_capped_id; +t.drop() + +x = t.runCommand( "create" , { capped : true , size : 10000 } ) +assert( x.ok ) + +for ( i=0; i<100; i++ ) + t.insert( { _id : i , x : 1 } ) + +function q() { + return t.findOne( { _id : 5 } ) +} + +function u() { + t.update( { _id : 5 } , { $set : { x : 2 } } ); + var gle = db.getLastError(); + if ( gle ) + throw gle; +} + + +// SERVER-3064 +//assert.throws( q , [] , "A1" ); +//assert.throws( u , [] , "B1" ); + +t.ensureIndex( { _id : 1 } ) + +assert.eq( 1 , q().x ) +q() +u() + +assert.eq( 2 , q().x ) diff --git a/jstests/date3.js b/jstests/date3.js new file mode 100644 index 00000000000..81b385a8616 --- /dev/null +++ b/jstests/date3.js @@ -0,0 +1,29 @@ +// Check dates before Unix epoch - SERVER-405 + +t = db.date3; +t.drop() + +d1 = new Date(-1000) +dz = new Date(0) +d2 = new Date(1000) + +t.save( {x: 2, d: d2} ) +t.save( {x: 1, d: d1} ) + +function test () { + var list = t.find( {d: {$lt: dz}} ) + assert.eq ( 1, list.size() ) + assert.eq ( 1, list[0].x ) + assert.eq ( d1, list[0].d ) + var list = t.find( {d: {$gt: dz}} ) + assert.eq ( 1, list.size() ) + assert.eq ( 2, list[0].x ) + var list = t.find().sort( {d:1} ) + assert.eq ( 2, list.size() ) + assert.eq ( 1, list[0].x ) + assert.eq ( 2, list[1].x ) +} + +test() +t.ensureIndex( {d: 1} ) +test() diff --git a/jstests/dbhash.js b/jstests/dbhash.js index e9cbc944b5f..7fea4b4d50c 100644 --- a/jstests/dbhash.js +++ b/jstests/dbhash.js @@ -14,16 +14,22 @@ db.getCollectionNames().forEach( function( x ) { } } ); +function dbhash( mydb ) { + var ret = mydb.runCommand( "dbhash" ); + assert.commandWorked( ret, "dbhash failure" ); + return ret; +} + function gh( coll , mydb ){ if ( ! mydb ) mydb = db; - var x = mydb.runCommand( "dbhash" ).collections[coll.getName()]; + var x = dbhash( mydb ).collections[coll.getName()]; if ( ! x ) return ""; return x; } function dbh( mydb ){ - return mydb.runCommand( "dbhash" ).md5; + return dbhash( mydb ).md5; } assert.eq( gh( a ) , gh( b ) , "A1" ); diff --git a/jstests/disk/quota.js b/jstests/disk/quota.js new file mode 100644 index 00000000000..d93e5eaafc0 --- /dev/null +++ b/jstests/disk/quota.js @@ -0,0 +1,47 @@ +// Check functioning of --quotaFiles parameter, including with respect to SERVER-3293 ('local' database). + +port = allocatePorts( 1 )[ 0 ]; + +baseName = "jstests_disk_quota"; +dbpath = "/data/db/" + baseName; + +m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--quotaFiles", "1", "--smallfiles" ); +db = m.getDB( baseName ); + +big = new Array( 10000 ).toString(); + +// Insert documents until quota is exhausted. +while( !db.getLastError() ) { + db[ baseName ].save( {b:big} ); +} +printjson( db.getLastError() ); + +dotTwoDataFile = dbpath + "/" + baseName + ".2"; +files = listFiles( dbpath ); +for( i in files ) { + // Since only one data file is allowed, a .0 file is expected and a .1 file may be preallocated (SERVER-3410) but no .2 file is expected. + assert.neq( dotTwoDataFile, files[ i ].name ); +} + +dotTwoDataFile = dbpath + "/" + "local" + ".2"; +// Check that quota does not apply to local db, and a .2 file can be created. +l = m.getDB( "local" )[ baseName ]; +for( i = 0; i < 10000; ++i ) { + l.save( {b:big} ); + assert( !db.getLastError() ); + dotTwoFound = false; + if ( i % 100 != 0 ) { + continue; + } + files = listFiles( dbpath ); + for( f in files ) { + if ( files[ f ].name == dotTwoDataFile ) { + dotTwoFound = true; + } + } + if ( dotTwoFound ) { + break; + } +} + +assert( dotTwoFound ); diff --git a/jstests/disk/quota2.js b/jstests/disk/quota2.js new file mode 100644 index 00000000000..c0d30dfecbf --- /dev/null +++ b/jstests/disk/quota2.js @@ -0,0 +1,38 @@ +// Test for quotaFiles off by one file limit issue - SERVER-3420. + +if ( 0 ) { // SERVER-3420 + +port = allocatePorts( 1 )[ 0 ]; + +baseName = "jstests_disk_quota2"; +dbpath = "/data/db/" + baseName; + +m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--quotaFiles", "1", "--smallfiles" ); +db = m.getDB( baseName ); + +big = new Array( 10000 ).toString(); + +// Insert documents until quota is exhausted. +while( !db.getLastError() ) { + db[ baseName ].save( {b:big} ); +} + +db.resetError(); + +// Trigger allocation of an additional file for a 'special' namespace. +for( n = 0; !db.getLastError(); ++n ) { + db.createCollection( '' + n ); +} + +print( n ); + +// Check that new docs are saved in the .0 file. +for( i = 0; i < n; ++i ) { + c = db[ ''+i ]; + c.save( {b:big} ); + if( !db.getLastError() ) { + assert.eq( 0, c.find()._addSpecial( "$showDiskLoc", true )[ 0 ].$diskLoc.file ); + } +} + +}
\ No newline at end of file diff --git a/jstests/drop2.js b/jstests/drop2.js index a1d619df1b3..87e646e1ee9 100644 --- a/jstests/drop2.js +++ b/jstests/drop2.js @@ -26,7 +26,7 @@ function op( drop ) { return null; } -s1 = startParallelShell( "db.jstests_drop2.count( { $where: function() { while( 1 ) { ; } } } )" ); +s1 = startParallelShell( "db.jstests_drop2.count( { $where: function() { while( 1 ) { sleep( 1 ); } } } )" ); countOp = null; assert.soon( function() { countOp = op( false ); return countOp; } ); diff --git a/jstests/dur/diskfull.js b/jstests/dur/diskfull.js index da45c20afd4..c123ea1541e 100644 --- a/jstests/dur/diskfull.js +++ b/jstests/dur/diskfull.js @@ -14,23 +14,23 @@ for ( i in files ) { if ( !doIt ) { print( "path " + startPath + " missing, skipping diskfull test" ); doIt = false; -}
-
-function checkNoJournalFiles(path, pass) {
- var files = listFiles(path);
- if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
- if (pass == null) {
- // wait a bit longer for mongod to potentially finish if it is still running.
- sleep(10000);
- return checkNoJournalFiles(path, 1);
- }
- print("\n\n\n");
- print("FAIL path:" + path);
- print("unexpected files:");
- printjson(files);
- assert(false, "FAIL a journal/lsn file is present which is unexpected");
- }
-}
+} + +function checkNoJournalFiles(path, pass) { + var files = listFiles(path); + if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) { + if (pass == null) { + // wait a bit longer for mongod to potentially finish if it is still running. + sleep(10000); + return checkNoJournalFiles(path, 1); + } + print("\n\n\n"); + print("FAIL path:" + path); + print("unexpected files:"); + printjson(files); + assert(false, "FAIL a journal/lsn file is present which is unexpected"); + } +} /** Clear dbpath without removing and recreating diskfulltest directory, as resetDbpath does */ function clear() { @@ -56,7 +56,9 @@ function work() { d.foo.insert( { _id:i, b:big } ); } - d.getLastError(); + gle = d.getLastError(); + if ( gle ) + throw gle; } catch ( e ) { print( e ); raise( e ); @@ -86,9 +88,8 @@ function runFirstMongodAndFillDisk() { conn = startMongodNoReset("--port", 30001, "--dbpath", startPath, "--dur", "--smallfiles", "--durOptions", 8, "--noprealloc"); assert.throws( work, null, "no exception thrown when exceeding disk capacity" ); - waitMongoProgramOnPort( 30001 ); - - // the above wait doesn't work on windows + stopMongod( 30001 ); + sleep(5000); } @@ -104,9 +105,9 @@ function runSecondMongdAndRecover() { // stopMongod seems to be asynchronous (hmmm) so we sleep here. sleep(5000); - // at this point, after clean shutdown, there should be no journal files
- log("check no journal files");
- checkNoJournalFiles(startPath + "/journal/");
+ // at this point, after clean shutdown, there should be no journal files + log("check no journal files"); + checkNoJournalFiles(startPath + "/journal/"); log(); } @@ -133,4 +134,4 @@ if ( doIt ) { print(testname + " SUCCESS"); -}
\ No newline at end of file +} diff --git a/jstests/evald.js b/jstests/evald.js index 78cabb68045..7b18f3cc893 100644 --- a/jstests/evald.js +++ b/jstests/evald.js @@ -53,10 +53,10 @@ function doIt( ev, wait, where ) { } -doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { ; } } } )", true, true ); -doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { ; } } } )", false, true ); -doIt( "while( true ) {;}", false ); -doIt( "while( true ) {;}", true ); +doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { sleep(1); } } } )", true, true ); +doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { sleep(1); } } } )", false, true ); +doIt( "while( true ) { sleep(1);}", false ); +doIt( "while( true ) { sleep(1);}", true ); // the for loops are currently required, as a spawned op masks the parent op - see SERVER-1931 doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count( {i:10} ); }", true ); @@ -65,4 +65,4 @@ doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count( doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count(); }", false ); doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} try { db.jstests_evald.count( {i:10} ); } catch ( e ) { } }", true ); -doIt( "while( 1 ) { try { while( 1 ) { ; } } catch ( e ) { } }", true ); +doIt( "while( 1 ) { try { while( 1 ) { sleep(1); } } catch ( e ) { } }", true ); diff --git a/jstests/exists9.js b/jstests/exists9.js index 09695ac4203..66378d1b424 100644 --- a/jstests/exists9.js +++ b/jstests/exists9.js @@ -25,8 +25,7 @@ assert.eq( 1, t.count( {a:{$exists:false}} ) ); t.ensureIndex( {a:1} ); assert.eq( 1, t.find( {a:{$exists:true}} ).hint( {a:1} ).itcount() ); assert.eq( 1, t.find( {a:{$exists:false}} ).hint( {a:1} ).itcount() ); -// The empty array will be scanned, but not returned. -assert.eq( 2, t.find( {a:{$exists:false}} ).hint( {a:1} ).explain().nscanned ); +assert.eq( 1, t.find( {a:{$exists:false}} ).hint( {a:1} ).explain().nscanned ); t.drop(); @@ -39,6 +38,4 @@ assert.eq( 1, t.count( {'a.0':{$exists:false}} ) ); // With index. t.ensureIndex( {'a.0':1} ); assert.eq( 1, t.find( {'a.0':{$exists:true}} ).hint( {'a.0':1} ).itcount() ); -if ( 0 ) { // SERVER-2902 assert.eq( 1, t.find( {'a.0':{$exists:false}} ).hint( {'a.0':1} ).itcount() ); -} diff --git a/jstests/geo_mapreduce2.js b/jstests/geo_mapreduce2.js new file mode 100644 index 00000000000..9c393457c7b --- /dev/null +++ b/jstests/geo_mapreduce2.js @@ -0,0 +1,36 @@ +// Geo mapreduce 2 from SERVER-3478 + +var coll = db.geoMR2 +coll.drop() + +for( var i = 0; i < 300; i++ ) + coll.insert({ i : i, location : [ 10, 20 ] }) + +coll.ensureIndex({ location : "2d" }) + +// map function +m = function() { + emit( null, { count : this.i } ) +} + +// reduce function +r = function( key, values ) { + + var total = 0 + for ( var i = 0; i < values.length; i++ ) { + total += values[i].count + } + + return { count : total } +}; + +try{ coll.mapReduce( m, r, + { out : coll.getName() + "_mr", + sort : { _id : 1 }, + query : { 'location' : { $within : { $centerSphere : [[ 10, 20 ], 0.01 ] } } } }) + +} +catch( e ){ + // This should occur, since we can't in-mem sort for mreduce + printjson( e ) +} diff --git a/jstests/group7.js b/jstests/group7.js new file mode 100644 index 00000000000..5bf9232577c --- /dev/null +++ b/jstests/group7.js @@ -0,0 +1,43 @@ +// Test yielding group command SERVER-1395 + +t = db.jstests_group7; +t.drop(); + +function checkForYield( docs, updates ) { + t.drop(); + a = 0; + for( var i = 0; i < docs; ++i ) { + t.save( {a:a} ); + } + db.getLastError(); + + // Iteratively update all a values atomically. + p = startParallelShell( 'for( a = 0; a < ' + updates + '; ++a ) { db.jstests_group7.update( {$atomic:true}, {$set:{a:a}}, false, true ); db.getLastError(); }' ); + + for( var i = 0; i < updates; ++i ) { + ret = t.group({key:{a:1},reduce:function(){},initial:{}}); + // Check if group sees more than one a value, indicating that it yielded. + if ( ret.length > 1 ) { + p(); + return true; + } + printjson( ret ); + } + + p(); + return false; +} + +var yielded = false; +var docs = 1500; +var updates = 50; +for( var j = 1; j <= 6; ++j ) { + if ( checkForYield( docs, updates ) ) { + yielded = true; + break; + } + // Increase docs and updates to encourage yielding. + docs *= 2; + updates *= 2; +} +assert( yielded );
\ No newline at end of file diff --git a/jstests/in9.js b/jstests/in9.js index b0d70b6a4fc..34cefb8278a 100644 --- a/jstests/in9.js +++ b/jstests/in9.js @@ -31,5 +31,5 @@ function doTest() { doTest(); // SERVER-1943 not fixed yet -//t.ensureIndex( {key:1} ); -//doTest(); +t.ensureIndex( {key:1} ); +doTest(); diff --git a/jstests/ina.js b/jstests/ina.js new file mode 100644 index 00000000000..cf614ab994d --- /dev/null +++ b/jstests/ina.js @@ -0,0 +1,15 @@ +// Uassert when $elemMatch is attempted within $in SERVER-3545 + +t = db.jstests_ina; +t.drop(); +t.save( {} ); + +assert.throws( function() { t.find( {a:{$in:[{$elemMatch:{b:1}}]}} ).itcount(); } ); +assert.throws( function() { t.find( {a:{$not:{$in:[{$elemMatch:{b:1}}]}}} ).itcount(); } ); + +assert.throws( function() { t.find( {a:{$nin:[{$elemMatch:{b:1}}]}} ).itcount(); } ); +assert.throws( function() { t.find( {a:{$not:{$nin:[{$elemMatch:{b:1}}]}}} ).itcount(); } ); + +// NOTE Above we don't check cases like {b:2,$elemMatch:{b:3,4}} - generally +// we assume that the first key is $elemMatch if any key is, and validating +// every key is expensive in some cases.
\ No newline at end of file diff --git a/jstests/indexbindata.js b/jstests/indexbindata.js new file mode 100755 index 00000000000..e69de29bb2d --- /dev/null +++ b/jstests/indexbindata.js diff --git a/jstests/indexr.js b/jstests/indexr.js index b900e8ccbd5..60ecfb13ed2 100644 --- a/jstests/indexr.js +++ b/jstests/indexr.js @@ -28,17 +28,13 @@ t.remove(); t.save( { a: [ { b: 3, c: 6 }, { b: 1, c: 1 } ] } ); assert.eq( 1, t.count( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ) ); -if ( 0 ) { // SERVER-3005 assert.eq( 1, t.count( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ) ); -} assert.eq( [[{$minElement:1},{$maxElement:1}]], t.find( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ).explain().indexBounds['a.c'] ); assert.eq( [[{$minElement:1},{$maxElement:1}]], t.find( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ).explain().indexBounds['a.c'] ); // Check reverse direction. assert.eq( 1, t.find( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ).sort( {'a.b':-1} ).itcount() ); -if ( 0 ) { // SERVER-3005 assert.eq( 1, t.find( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ).sort( {a:-1} ).itcount() ); -} assert.eq( [[{$maxElement:1},{$minElement:1}]], t.find( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ).sort( {'a.b':-1} ).explain().indexBounds['a.c'] ); assert.eq( [[{$maxElement:1},{$minElement:1}]], t.find( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ).sort( {a:-1} ).explain().indexBounds['a.c'] ); diff --git a/jstests/indexs.js b/jstests/indexs.js index 3a52584bfd3..609f912affe 100644 --- a/jstests/indexs.js +++ b/jstests/indexs.js @@ -17,7 +17,5 @@ t.drop(); t.ensureIndex( {a:1,'a.b':1} ); t.save( { a: [ { b: 3 } ] } ); assert.eq( ib, t.find( { a:{ b:3 } } ).explain().indexBounds ); -if ( 0 ) { // SERVER-3005 assert.eq( 1, t.find( { a:{ b:3 } } ).explain().nscanned ); assert.eq( 1, t.count( { a:{ b:3 } } ) ); -}
\ No newline at end of file diff --git a/jstests/indext.js b/jstests/indext.js new file mode 100644 index 00000000000..e418dc2e959 --- /dev/null +++ b/jstests/indext.js @@ -0,0 +1,21 @@ +// Sparse indexes with arrays SERVER-3216 + +t = db.jstests_indext; +t.drop(); + +t.ensureIndex( {'a.b':1}, {sparse:true} ); +t.save( {a:[]} ); +t.save( {a:1} ); +assert.eq( 0, t.find().hint( {'a.b':1} ).itcount() ); +assert.eq( 0, t.find().hint( {'a.b':1} ).explain().nscanned ); + +t.ensureIndex( {'a.b':1,'a.c':1}, {sparse:true} ); +t.save( {a:[]} ); +t.save( {a:1} ); +assert.eq( 0, t.find().hint( {'a.b':1,'a.c':1} ).itcount() ); +assert.eq( 0, t.find().hint( {'a.b':1,'a.c':1} ).explain().nscanned ); + +t.save( {a:[{b:1}]} ); +t.save( {a:1} ); +assert.eq( 1, t.find().hint( {'a.b':1,'a.c':1} ).itcount() ); +assert.eq( 1, t.find().hint( {'a.b':1,'a.c':1} ).explain().nscanned ); diff --git a/jstests/indexu.js b/jstests/indexu.js new file mode 100644 index 00000000000..c7fa8ed3365 --- /dev/null +++ b/jstests/indexu.js @@ -0,0 +1,137 @@ +// Test index key generation with duplicate values addressed by array index and +// object field. SERVER-2902 + +t = db.jstests_indexu; +t.drop(); + +var dupDoc = {a:[{'0':1}]}; // There are two 'a.0' fields in this doc. +var dupDoc2 = {a:[{'1':1},'c']}; +var noDupDoc = {a:[{'1':1}]}; + +// Test that we can't index dupDoc. +t.save( dupDoc ); +assert( !db.getLastError() ); +t.ensureIndex( {'a.0':1} ); +assert( db.getLastError() ); + +t.remove(); +t.ensureIndex( {'a.0':1} ); +assert( !db.getLastError() ); +t.save( dupDoc ); +assert( db.getLastError() ); + +// Test that we can't index dupDoc2. +t.drop(); +t.save( dupDoc2 ); +assert( !db.getLastError() ); +t.ensureIndex( {'a.1':1} ); +assert( db.getLastError() ); + +t.remove(); +t.ensureIndex( {'a.1':1} ); +assert( !db.getLastError() ); +t.save( dupDoc2 ); +assert( db.getLastError() ); + +// Test that we can index dupDoc with a different index. +t.drop(); +t.ensureIndex( {'a.b':1} ); +t.save( dupDoc ); +assert( !db.getLastError() ); + +// Test number field starting with hyphen. +t.drop(); +t.ensureIndex( {'a.-1':1} ); +t.save( {a:[{'-1':1}]} ); +assert( !db.getLastError() ); + +// Test number field starting with zero. +t.drop(); +t.ensureIndex( {'a.00':1} ); +t.save( {a:[{'00':1}]} ); +assert( !db.getLastError() ); + +// Test multiple array indexes +t.drop(); +t.ensureIndex( {'a.0':1,'a.1':1} ); +t.save( {a:[{'1':1}]} ); +assert( !db.getLastError() ); +t.save( {a:[{'1':1},4]} ); +assert( db.getLastError() ); + +// Test that we can index noDupDoc. +t.drop(); +t.save( noDupDoc ); +t.ensureIndex( {'a.0':1} ); +assert( !db.getLastError() ); +t.ensureIndex( {'a.1':1} ); +assert( !db.getLastError() ); + +t.drop(); +t.ensureIndex( {'a.0':1} ); +t.ensureIndex( {'a.1':1} ); +t.save( noDupDoc ); +assert( !db.getLastError() ); + +// Test that we can query noDupDoc. +assert.eq( 1, t.find( {'a.1':1} ).hint( {'a.1':1} ).itcount() ); +assert.eq( 1, t.find( {'a.1':1} ).hint( {$natural:1} ).itcount() ); +assert.eq( 1, t.find( {'a.0':{'1':1}} ).hint( {'a.0':1} ).itcount() ); +assert.eq( 1, t.find( {'a.0':{'1':1}} ).hint( {$natural:1} ).itcount() ); + +// Check multiple nested array fields. +t.drop(); +t.save( {a:[[1]]} ); +t.ensureIndex( {'a.0.0':1} ); +assert( !db.getLastError() ); +assert.eq( 1, t.find( {'a.0.0':1} ).hint( {$natural:1} ).itcount() ); +assert.eq( 1, t.find( {'a.0.0':1} ).hint( {'a.0.0':1} ).itcount() ); + +// Check where there is a duplicate for a partially addressed field but not for a fully addressed field. +t.drop(); +t.save( {a:[[1],{'0':1}]} ); +t.ensureIndex( {'a.0.0':1} ); +assert( db.getLastError() ); + +// Check where there is a duplicate for a fully addressed field. +t.drop(); +t.save( {a:[[1],{'0':[1]}]} ); +assert( !db.getLastError() ); +t.ensureIndex( {'a.0.0':1} ); +assert( db.getLastError() ); + +// Two ways of addressing parse to an array. +t.drop(); +t.save( {a:[{'0':1}]} ); +t.ensureIndex( {'a.0.0':1} ); +assert( db.getLastError() ); + +// Test several key depths - with same arrays being found. +t.drop(); +t.save( {a:[{'0':[{'0':1}]}]} ); +t.ensureIndex( {'a.0.0.0.0.0.0':1} ); +assert( db.getLastError() ); +t.ensureIndex( {'a.0.0.0.0.0':1} ); +assert( db.getLastError() ); +t.ensureIndex( {'a.0.0.0.0':1} ); +assert( db.getLastError() ); +t.ensureIndex( {'a.0.0.0':1} ); +assert( db.getLastError() ); +t.ensureIndex( {'a.0.0':1} ); +assert( db.getLastError() ); +t.ensureIndex( {'a.0':1} ); +assert( db.getLastError() ); +t.ensureIndex( {'a':1} ); +assert( !db.getLastError() ); + +// Two prefixes extract docs, but one terminates extraction before array. +t.drop(); +t.save( {a:[{'0':{'c':[]}}]} ); +t.ensureIndex( {'a.0.c':1} ); +assert( db.getLastError() ); + +t.drop(); +t.save( {a:[[{'b':1}]]} ); +assert.eq( 1, t.find( {'a.0.b':1} ).itcount() ); +t.ensureIndex( {'a.0.b':1} ); +assert.eq( 1, t.find( {'a.0.b':1} ).itcount() ); diff --git a/jstests/indexv.js b/jstests/indexv.js new file mode 100644 index 00000000000..a69ff2a4664 --- /dev/null +++ b/jstests/indexv.js @@ -0,0 +1,18 @@ +// Check null key generation. + +t = db.jstests_indexv; +t.drop(); + +t.ensureIndex( {'a.b':1} ); + +t.save( {a:[{},{b:1}]} ); +var e = t.find( {'a.b':null} ).explain(); +assert.eq( 0, e.n ); +assert.eq( 1, e.nscanned ); + +t.drop(); +t.ensureIndex( {'a.b.c':1} ); +t.save( {a:[{b:[]},{b:{c:1}}]} ); +var e = t.find( {'a.b.c':null} ).explain(); +assert.eq( 0, e.n ); +assert.eq( 1, e.nscanned ); diff --git a/jstests/indexw.js b/jstests/indexw.js new file mode 100644 index 00000000000..326443400d1 --- /dev/null +++ b/jstests/indexw.js @@ -0,0 +1,14 @@ +// Check that v0 keys are generated for v0 indexes SERVER-3375 + +t = db.jstests_indexw; +t.drop(); + +t.save( {a:[]} ); +assert.eq( 1, t.count( {a:[]} ) ); +t.ensureIndex( {a:1} ); +assert.eq( 1, t.count( {a:[]} ) ); +t.dropIndexes(); + +// The count result is incorrect - just checking here that v0 key generation is used. +t.ensureIndex( {a:1}, {v:0} ); +assert.eq( 0, t.count( {a:[]} ) ); diff --git a/jstests/libs/testconfig b/jstests/libs/testconfig new file mode 100644 index 00000000000..0c1fc871d61 --- /dev/null +++ b/jstests/libs/testconfig @@ -0,0 +1,4 @@ +fastsync = true +#comment line +#commentedflagwithan = false +version = false diff --git a/jstests/ork.js b/jstests/ork.js new file mode 100644 index 00000000000..d6d40161e69 --- /dev/null +++ b/jstests/ork.js @@ -0,0 +1,11 @@ +// SERVER-2585 Test $or clauses within indexed top level $or clauses. + +t = db.jstests_ork; +t.drop(); + +t.ensureIndex( {a:1} ); +t.save( {a:[1,2],b:5} ); +t.save( {a:[2,4],b:5} ); + +assert.eq( 2, t.find( {$or:[{a:1,$and:[{$or:[{a:2},{a:3}]},{$or:[{b:5}]}]},{a:2,$or:[{a:3},{a:4}]}]} ).itcount() ); +assert.eq( 1, t.find( {$or:[{a:1,$and:[{$or:[{a:2},{a:3}]},{$or:[{b:6}]}]},{a:2,$or:[{a:3},{a:4}]}]} ).itcount() ); diff --git a/jstests/orl.js b/jstests/orl.js new file mode 100644 index 00000000000..2726975d5aa --- /dev/null +++ b/jstests/orl.js @@ -0,0 +1,13 @@ +// SERVER-3445 Test using coarse multikey bounds for or range elimination. + +t = db.jstests_orl; +t.drop(); + +t.ensureIndex( {'a.b':1,'a.c':1} ); +// make the index multikey +t.save( {a:{b:[1,2]}} ); + +// SERVER-3445 +if ( 0 ) { +assert( !t.find( {$or:[{'a.b':2,'a.c':3},{'a.b':2,'a.c':4}]} ).explain().clauses ); +}
\ No newline at end of file diff --git a/jstests/orm.js b/jstests/orm.js new file mode 100644 index 00000000000..83183f05a59 --- /dev/null +++ b/jstests/orm.js @@ -0,0 +1,26 @@ +// Test dropping during a $or yield SERVER-3555 + +if ( 0 ) { // SERVER-3555 + +t = db.jstests_orm; +t.drop(); + +clauses = []; +for( i = 0; i < 10; ++i ) { + clauses.push( {a:{$lte:(i+1)*5000/10},i:49999} ); + clauses.push( {b:{$lte:(i+1)*5000/10},i:49999} ); +} + +p = startParallelShell( 'for( i = 0; i < 30; ++i ) { sleep( 1000 ); db.jstests_orm.drop() }' ); +for( j = 0; j < 10; ++j ) { + for( i = 0; i < 5000; ++i ) { + t.save( {a:i,i:i} ); + t.save( {b:i,i:i} ); + } + t.ensureIndex( {a:1} ); + t.ensureIndex( {b:1} ); + t.find( {$or:clauses} ).itcount(); +} +p(); + +}
\ No newline at end of file diff --git a/jstests/profile1.js b/jstests/profile1.js index eed64f60ae2..9654357127f 100644 --- a/jstests/profile1.js +++ b/jstests/profile1.js @@ -1,3 +1,4 @@ +print("profile1.js BEGIN"); try { @@ -61,21 +62,50 @@ try { after = db.system.profile.count() assert.eq( before + 3 , after , "X1" ) + /* sleep() could be inaccurate on certain platforms. let's check */ + print("\nsleep 2 time actual:"); + for (var i = 0; i < 4; i++) { + print(db.eval("var x = new Date(); sleep(2); return new Date() - x;")); + } + print(); + print("\nsleep 20 times actual:"); + for (var i = 0; i < 4; i++) { + print(db.eval("var x = new Date(); sleep(20); return new Date() - x;")); + } + print(); + print("\nsleep 120 times actual:"); + for (var i = 0; i < 4; i++) { + print(db.eval("var x = new Date(); sleep(120); return new Date() - x;")); + } + print(); + + function evalSleepMoreThan(millis,max){ + var start = new Date(); + db.eval("sleep("+millis+")"); + var end = new Date(); + var actual = end.getTime() - start.getTime(); + if ( actual > ( millis + 5 ) ) { + print( "warning wanted to sleep for: " + millis + " but took: " + actual ); + } + return actual >= max ? 1 : 0; + } + db.setProfilingLevel(1,100); before = db.system.profile.count(); - db.eval( "sleep(25)" ) - db.eval( "sleep(120)" ) + var delta = 0; + delta += evalSleepMoreThan( 15 , 100 ); + delta += evalSleepMoreThan( 120 , 100 ); after = db.system.profile.count() - assert.eq( before + 1 , after , "X2 : " + getProfileAString() ) + assert.eq( before + delta , after , "X2 : " + getProfileAString() ) db.setProfilingLevel(1,20); before = db.system.profile.count(); - db.eval( "sleep(25)" ) - db.eval( "sleep(120)" ) + delta = 0; + delta += evalSleepMoreThan( 5 , 20 ); + delta += evalSleepMoreThan( 120 , 20 ); after = db.system.profile.count() - assert.eq( before + 2 , after , "X3 : " + getProfileAString() ) - - + assert.eq( before + delta , after , "X3 : " + getProfileAString() ) + db.profile.drop(); db.setProfilingLevel(2) var q = { _id : 5 }; @@ -85,7 +115,9 @@ try { assert.eq( q , r.query , "Y1" ); assert.eq( u , r.updateobj , "Y2" ); assert.eq( "update" , r.op , "Y3" ); - assert.eq( "test.profile1" , r.ns , "Y4" ); + assert.eq("test.profile1", r.ns, "Y4"); + + print("profile1.js SUCCESS OK"); } finally { // disable profiling for subsequent tests diff --git a/jstests/profile2.js b/jstests/profile2.js new file mode 100644 index 00000000000..929b463ca3d --- /dev/null +++ b/jstests/profile2.js @@ -0,0 +1,19 @@ +print("profile2.js BEGIN"); + +try { + + assert.commandWorked( db.runCommand( {profile:2} ) ); + + huge = 'huge'; + while (huge.length < 2*1024*1024){ + huge += huge; + } + + db.profile2.count({huge:huge}) // would make a huge entry in db.system.profile + + print("profile2.js SUCCESS OK"); + +} finally { + // disable profiling for subsequent tests + assert.commandWorked( db.runCommand( {profile:0} ) ); +} diff --git a/jstests/profile3.js b/jstests/profile3.js new file mode 100644 index 00000000000..a6574b76f8a --- /dev/null +++ b/jstests/profile3.js @@ -0,0 +1,26 @@ + +t = db.profile3; +t.drop(); + +try { + db.setProfilingLevel(0); + + db.system.profile.drop(); + assert.eq( 0 , db.system.profile.count() ) + + db.setProfilingLevel(2); + + t.insert( { x : 1 } ); + t.findOne( { x : 1 } ); + t.find( { x : 1 } ).count(); + + db.system.profile.find().forEach( printjson ) + + db.setProfilingLevel(0); + db.system.profile.drop(); + +} +finally { + db.setProfilingLevel(0); +} + diff --git a/jstests/regexa.js b/jstests/regexa.js index e9644627548..b0d47190e77 100644 --- a/jstests/regexa.js +++ b/jstests/regexa.js @@ -14,6 +14,6 @@ t.save( {a:'a'} ); check(); t.ensureIndex( {a:1} ); -if ( 0 ) { // SERVER-3298 +if ( 1 ) { // SERVER-3298 check(); -}
\ No newline at end of file +} diff --git a/jstests/repl/basic1.js b/jstests/repl/basic1.js index aaa07dc6cc1..4a6091d9755 100644 --- a/jstests/repl/basic1.js +++ b/jstests/repl/basic1.js @@ -160,6 +160,8 @@ assert.eq( 0 , as.system.profile.count() , "P2" ) assert.eq( 1 , as.foo.findOne().x , "P3" ); assert.eq( 0 , as.system.profile.count() , "P4" ) +assert( as.getCollectionNames().indexOf( "system.profile" ) < 0 , "P4.5" ) + as.setProfilingLevel(2) as.foo.findOne(); assert.eq( 1 , as.system.profile.count() , "P5" ) diff --git a/jstests/repl/drop_dups.js b/jstests/repl/drop_dups.js new file mode 100644 index 00000000000..1fa9984ea06 --- /dev/null +++ b/jstests/repl/drop_dups.js @@ -0,0 +1,63 @@ + +var rt = new ReplTest( "drop_dups" ); + +m = rt.start( true ); +s = rt.start( false ); + +function block(){ + am.runCommand( { getlasterror : 1 , w : 2 , wtimeout : 3000 } ) +} + +am = m.getDB( "foo" ); +as = s.getDB( "foo" ); + +function run( createInBackground ) { + + collName = "foo" + ( createInBackground ? "B" : "F" ); + + am[collName].drop(); + am.blah.insert( { x : 1 } ) + block(); + + for ( i=0; i<10; i++ ) { + am[collName].insert( { _id : i , x : Math.floor( i / 2 ) } ) + } + + block(); + + am.runCommand( { "godinsert" : collName , obj : { _id : 100 , x : 20 } } ); + am.runCommand( { "godinsert" : collName , obj : { _id : 101 , x : 20 } } ); + + as.runCommand( { "godinsert" : collName , obj : { _id : 101 , x : 20 } } ); + as.runCommand( { "godinsert" : collName , obj : { _id : 100 , x : 20 } } ); + + assert.eq( as[collName].count() , am[collName].count() ); + + function mymap(z) { + return z._id + ":" + z.x + ","; + } + + + if ( am.serverStatus().mem.bits == 64 ) { + assert.neq( tojson(am[collName].find().map(mymap)) , + tojson(as[collName].find().map(mymap)) , "order is not supposed to be same on master and slave but it is" ); + } + + + am[collName].ensureIndex( { x : 1 } , { unique : true , dropDups : true , background : createInBackground } ); + am.blah.insert( { x : 1 } ) + block(); + + assert.eq( 2 , am[collName].getIndexKeys().length , "A1 : " + createInBackground ) + assert.eq( 2 , as[collName].getIndexKeys().length , "A2 : " + createInBackground ) + + assert.eq( am[collName].find().sort( { _id : 1 } ).map(mymap) , + as[collName].find().sort( { _id : 1 } ).map(mymap) , "different things dropped on master and slave" ); + + +} + +run( false ) +run( true ) + +rt.stop() diff --git a/jstests/repl/repl3.js b/jstests/repl/repl3.js index d3c38486b19..5ace9b69d2f 100644 --- a/jstests/repl/repl3.js +++ b/jstests/repl/repl3.js @@ -10,38 +10,42 @@ soonCount = function( count ) { } ); } -doTest = function( signal ) { - - rt = new ReplTest( "repl3tests" ); - - m = rt.start( true ); - s = rt.start( false ); - - am = m.getDB( baseName ).a - - am.save( { _id: new ObjectId() } ); - soonCount( 1 ); - rt.stop( false, signal ); - - big = new Array( 2000 ).toString(); - for( i = 0; i < 1000; ++i ) - am.save( { _id: new ObjectId(), i: i, b: big } ); - - s = rt.start( false, { autoresync: null }, true ); - +doTest = function (signal) { + + print("repl3.js doTest(" + signal + ")") + + rt = new ReplTest("repl3tests"); + + m = rt.start(true); + s = rt.start(false); + + am = m.getDB(baseName).a + + am.save({ _id: new ObjectId() }); + soonCount(1); + rt.stop(false, signal); + + big = new Array(2000).toString(); + for (i = 0; i < 1000; ++i) + am.save({ _id: new ObjectId(), i: i, b: big }); + + s = rt.start(false, { autoresync: null }, true); + // after SyncException, mongod waits 10 secs. - sleep( 15000 ); - + sleep(15000); + // Need the 2 additional seconds timeout, since commands don't work on an 'allDead' node. - soonCount( 1001 ); - as = s.getDB( baseName ).a - assert.eq( 1, as.find( { i: 0 } ).count() ); - assert.eq( 1, as.find( { i: 999 } ).count() ); - - assert.commandFailed( s.getDB( "admin" ).runCommand( { "resync" : 1 } ) ); + soonCount(1001); + as = s.getDB(baseName).a + assert.eq(1, as.find({ i: 0 }).count()); + assert.eq(1, as.find({ i: 999 }).count()); + + assert.commandFailed(s.getDB("admin").runCommand({ "resync": 1 })); rt.stop(); } doTest( 15 ); // SIGTERM doTest( 9 ); // SIGKILL + +print("repl3.js OK") diff --git a/jstests/replsets/auth1.js b/jstests/replsets/auth1.js index e9765c08153..edc162cca16 100644 --- a/jstests/replsets/auth1.js +++ b/jstests/replsets/auth1.js @@ -81,6 +81,10 @@ function doQueryOn(p) { doQueryOn(slave); master.adminCommand({logout:1}); + +print("unauthorized:"); +printjson(master.adminCommand({replSetGetStatus : 1})); + doQueryOn(master); diff --git a/jstests/replsets/downstream.js b/jstests/replsets/downstream.js new file mode 100755 index 00000000000..795e6671d46 --- /dev/null +++ b/jstests/replsets/downstream.js @@ -0,0 +1,36 @@ +// BUG: [SERVER-1768] replica set getlasterror {w: 2} after 2000 +// inserts hangs while secondary servers log "replSet error RS102 too stale to catch up" every once in a while + +function newReplicaSet (name, numServers) { + var rs = new ReplSetTest({name: name, nodes: numServers}) + rs.startSet() + rs.initiate() + rs.awaitReplication() + return rs +} + +function go() { +var N = 2000 + +// ~1KB string +var Text = '' +for (var i = 0; i < 40; i++) + Text += 'abcdefghijklmnopqrstuvwxyz' + +// Create replica set of 3 servers +var repset = newReplicaSet('repset', 3) +var conn = repset.getMaster() +var db = conn.getDB('test') + +// Add data to it +for (var i = 0; i < N; i++) + db['foo'].insert({x: i, text: Text}) + +// wait to be copied to at least one secondary (BUG hangs here) +db.getLastError(2) + +print('getlasterror_w2.js SUCCESS') +} + +// turn off until fixed +//go(); diff --git a/jstests/replsets/fastsync.js b/jstests/replsets/fastsync.js index 5ba978481cd..1c9c2152ebb 100644 --- a/jstests/replsets/fastsync.js +++ b/jstests/replsets/fastsync.js @@ -48,7 +48,7 @@ var admin = p.getDB("admin"); var foo = p.getDB("foo"); var local = p.getDB("local"); -var config = {_id : basename, members : [{_id : 0, host : hostname+":"+ports[0]}]}; +var config = {_id : basename, members : [{_id : 0, host : hostname+":"+ports[0], priority:2}]}; printjson(config); var result = admin.runCommand({replSetInitiate : config}); print("result:"); @@ -98,6 +98,7 @@ var startSlave = function(n) { config.members.push({_id:n, host:hostname+":"+ports[n]}); result = admin.runCommand({replSetReconfig : config}); + printjson(result); assert(result.ok, "reconfig worked"); reconnect(p); @@ -125,6 +126,10 @@ var startSlave = function(n) { assert.eq(status.members[n].state, 2); + assert.soon(function() { + return admin.runCommand({isMaster : 1}).ismaster; + }); + admin.foo.insert({x:1}); assert.soon(function() { var last = local.oplog.rs.find().sort({$natural:-1}).limit(1).next(); diff --git a/jstests/replsets/maintenance.js b/jstests/replsets/maintenance.js new file mode 100644 index 00000000000..5b068cd3d8e --- /dev/null +++ b/jstests/replsets/maintenance.js @@ -0,0 +1,32 @@ + + +var replTest = new ReplSetTest( {name: 'unicomplex', nodes: 3} ); +var conns = replTest.startSet(); +replTest.initiate(); + +// Make sure we have a master +var master = replTest.getMaster(); + +for (i=0;i<10000; i++) { master.getDB("bar").foo.insert({x:1,y:i,abc:123,str:"foo bar baz"}); } +for (i=0;i<1000; i++) { master.getDB("bar").foo.update({y:i},{$push :{foo : "barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"}}); } + +replTest.awaitReplication(); + +assert.soon(function() { return conns[2].getDB("admin").isMaster().secondary; }); + +join = startParallelShell( "db.getSisterDB('bar').runCommand({compact : 'foo'});", replTest.ports[2] ); + +print("check secondary goes to recovering"); +assert.soon(function() { return !conns[2].getDB("admin").isMaster().secondary; }); + +print("joining"); +join(); + +print("check secondary becomes a secondary again"); +var x = 0; +assert.soon(function() { + var im = conns[2].getDB("admin").isMaster(); + if (x++ % 5 == 0) printjson(im); + return im.secondary; +}); + diff --git a/jstests/replsets/remove1.js b/jstests/replsets/remove1.js index 6b9cf5d3c30..f93fe9eb071 100644 --- a/jstests/replsets/remove1.js +++ b/jstests/replsets/remove1.js @@ -92,8 +92,12 @@ print("reconfig with minority"); replTest.stop(1); assert.soon(function() { - reconnect(master); - return master.getDB("admin").runCommand({isMaster : 1}).secondary; + try { + return master.getDB("admin").runCommand({isMaster : 1}).secondary; + } + catch(e) { + print("trying to get master: "+e); + } }); config.version = 4; diff --git a/jstests/replsets/replset5.js b/jstests/replsets/replset5.js index a861bd6ff04..67ce2d78bcd 100644 --- a/jstests/replsets/replset5.js +++ b/jstests/replsets/replset5.js @@ -23,53 +23,63 @@ doTest = function (signal) { master.getDB("barDB").bar.save({ a: 1 });
replTest.awaitReplication();
- // These writes should be replicated immediately - var docNum = 5000; - for(var n=0; n<docNum; n++) { - master.getDB(testDB).foo.insert({ n: n });
- } -
- // If you want to test failure, just add values for w and wtimeout - // to the following command. This will override the default set above and - // prevent replication from happening in time for the count tests below. - var result = master.getDB("admin").runCommand({getlasterror: 1});
- printjson(result); - + // These writes should be replicated immediately
+ var docNum = 5000;
+ for (var n = 0; n < docNum; n++) {
+ master.getDB(testDB).foo.insert({ n: n });
+ }
+
+ // should use the configured last error defaults from above, that's what we're testing.
+ //
+ // If you want to test failure, just add values for w and wtimeout (e.g. w=1)
+ // to the following command. This will override the default set above and
+ // prevent replication from happening in time for the count tests below.
+ //
+ var result = master.getDB("admin").runCommand({ getlasterror: 1 });
+ print("replset5.js getlasterror result:");
+ printjson(result);
+
+ if (result.err == "timeout") {
+ print("\WARNING getLastError timed out and should not have.\nThis machine seems extremely slow. Stopping test without failing it\n")
+ replTest.stopSet(signal);
+ print("\WARNING getLastError timed out and should not have.\nThis machine seems extremely slow. Stopping test without failing it\n")
+ return;
+ }
var slaves = replTest.liveNodes.slaves;
slaves[0].setSlaveOk();
slaves[1].setSlaveOk();
- print("Testing slave counts");
+ print("replset5.js Testing slave counts");
+
+ var slave0count = slaves[0].getDB(testDB).foo.count();
+ assert(slave0count == docNum, "Slave 0 has " + slave0count + " of " + docNum + " documents!");
+
+ var slave1count = slaves[1].getDB(testDB).foo.count();
+ assert(slave1count == docNum, "Slave 1 has " + slave1count + " of " + docNum + " documents!");
+
+ var master1count = master.getDB(testDB).foo.count();
+ assert(master1count == docNum, "Master has " + master1count + " of " + docNum + " documents!");
+
+ print("replset5.js reconfigure with hidden=1");
+ config = master.getDB("local").system.replset.findOne();
+ config.version++;
+ config.members[2].priority = 0;
+ config.members[2].hidden = 1;
+
+ try {
+ master.adminCommand({ replSetReconfig: config });
+ }
+ catch (e) {
+ print(e);
+ }
+
+ config = master.getDB("local").system.replset.findOne();
+ printjson(config);
+ assert.eq(config.members[2].hidden, true);
- var slave0count = slaves[0].getDB(testDB).foo.count(); - assert( slave0count == docNum, "Slave 0 has " + slave0count + " of " + docNum + " documents!"); - - var slave1count = slaves[1].getDB(testDB).foo.count(); - assert( slave1count == docNum, "Slave 1 has " + slave1count + " of " + docNum + " documents!"); - - var master1count = master.getDB(testDB).foo.count(); - assert( master1count == docNum, "Master has " + master1count + " of " + docNum + " documents!"); - - print("reconfigure with hidden=1"); - config = master.getDB("local").system.replset.findOne(); - config.version++; - config.members[2].priority = 0; - config.members[2].hidden = 1; - - try { - master.adminCommand({replSetReconfig : config}); - } - catch(e) { - print(e); - } - - config = master.getDB("local").system.replset.findOne(); - printjson(config); - assert.eq(config.members[2].hidden, true); - replTest.stopSet(signal);
} -doTest( 15 );
-print("replset5.js success");
+doTest( 15 ); +print("replset5.js success"); diff --git a/jstests/replsets/replsetarb2.js b/jstests/replsets/replsetarb2.js index a20c41b79c0..6f712cbc257 100644 --- a/jstests/replsets/replsetarb2.js +++ b/jstests/replsets/replsetarb2.js @@ -8,11 +8,11 @@ doTest = function( signal ) { print(tojson(nodes)); var conns = replTest.startSet(); - var r = replTest.initiate({"_id" : "unicomplex", + var r = replTest.initiate({"_id" : "unicomplex", "members" : [ - {"_id" : 0, "host" : nodes[0] }, - {"_id" : 1, "host" : nodes[1], "arbiterOnly" : true, "votes": 1}, - {"_id" : 2, "host" : nodes[2] }]}); + {"_id" : 0, "host" : nodes[0] }, + {"_id" : 1, "host" : nodes[1], "arbiterOnly" : true, "votes": 1, "priority" : 0}, + {"_id" : 2, "host" : nodes[2] }]}); // Make sure we have a master var master = replTest.getMaster(); @@ -24,6 +24,10 @@ doTest = function( signal ) { return res.myState == 7; }, "Aribiter failed to initialize."); + var result = conns[1].getDB("admin").runCommand({isMaster : 1}); + assert(result.arbiterOnly); + assert(!result.passive); + // Wait for initial replication master.getDB("foo").foo.insert({a: "foo"}); replTest.awaitReplication(); diff --git a/jstests/replsets/rollback2.js b/jstests/replsets/rollback2.js index 46fb548ccdf..7ab3c6bf4ee 100644 --- a/jstests/replsets/rollback2.js +++ b/jstests/replsets/rollback2.js @@ -202,9 +202,24 @@ doTest = function (signal) { wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; }); // everyone is up here... - assert(A.isMaster().ismaster || A.isMaster().secondary, "A up"); - assert(B.isMaster().ismaster || B.isMaster().secondary, "B up"); replTest.awaitReplication(); + + // theoretically, a read could slip in between StateBox::change() printing + // replSet SECONDARY + // and the replset actually becoming secondary + // so we're trying to wait for that here + print("waiting for secondary"); + assert.soon(function() { + try { + var aim = A.isMaster(); + var bim = B.isMaster(); + return (aim.ismaster || aim.secondary) && + (bim.ismaster || bim.secondary); + } + catch(e) { + print("checking A and B: "+e); + } + }); verify(a); diff --git a/jstests/replsets/tags.js b/jstests/replsets/tags.js index 8ef8a0a12cc..4e738862afe 100644 --- a/jstests/replsets/tags.js +++ b/jstests/replsets/tags.js @@ -8,16 +8,16 @@ var nodes = replTest.startSet(); var port = replTest.ports; replTest.initiate({_id : name, members : [ - {_id:0, host : host+":"+port[0], tags : ["0", "dc.ny.rk1", "machine"]}, - {_id:1, host : host+":"+port[1], tags : ["1", "dc.ny.rk1", "machine"]}, - {_id:2, host : host+":"+port[2], tags : ["2", "dc.ny.rk2", "machine"]}, - {_id:3, host : host+":"+port[3], tags : ["3", "dc.sf.rk1", "machine"]}, - {_id:4, host : host+":"+port[4], tags : ["4", "dc.sf.rk2", "machine"]}, + {_id:0, host : host+":"+port[0], tags : {"server" : "0", "dc" : "ny", "ny" : "1", "rack" : "ny.rk1"}}, + {_id:1, host : host+":"+port[1], tags : {"server" : "1", "dc" : "ny", "ny" : "2", "rack" : "ny.rk1"}}, + {_id:2, host : host+":"+port[2], tags : {"server" : "2", "dc" : "ny", "ny" : "3", "rack" : "ny.rk2", "2" : "this"}}, + {_id:3, host : host+":"+port[3], tags : {"server" : "3", "dc" : "sf", "sf" : "1", "rack" : "sf.rk1"}}, + {_id:4, host : host+":"+port[4], tags : {"server" : "4", "dc" : "sf", "sf" : "2", "rack" : "sf.rk2"}}, ], settings : { getLastErrorModes : { - "important" : {"dc" : 2, "machine" : 3}, - "a machine" : {"machine" : 1} + "important" : {"dc" : 2, "server" : 3}, + "a machine" : {"server" : 1} } }}); @@ -29,14 +29,14 @@ printjson(config); var modes = config.settings.getLastErrorModes; assert.eq(typeof modes, "object"); assert.eq(modes.important.dc, 2); -assert.eq(modes.important.machine, 3); -assert.eq(modes["a machine"]["machine"], 1); +assert.eq(modes.important.server, 3); +assert.eq(modes["a machine"]["server"], 1); config.version++; config.members[1].priority = 1.5; config.members[2].priority = 2; -modes.rack = {"dc.sf" : 1}; -modes.niceRack = {"dc.sf" : 2}; +modes.rack = {"sf" : 1}; +modes.niceRack = {"sf" : 2}; modes["a machine"]["2"] = 1; modes.on2 = {"2" : 1} @@ -57,10 +57,10 @@ printjson(config); modes = config.settings.getLastErrorModes; assert.eq(typeof modes, "object"); assert.eq(modes.important.dc, 2); -assert.eq(modes.important.machine, 3); -assert.eq(modes["a machine"]["machine"], 1); -assert.eq(modes.rack["dc.sf"], 1); -assert.eq(modes.niceRack["dc.sf"], 2); +assert.eq(modes.important.server, 3); +assert.eq(modes["a machine"]["server"], 1); +assert.eq(modes.rack["sf"], 1); +assert.eq(modes.niceRack["sf"], 2); print("bridging"); replTest.bridge(); @@ -75,8 +75,11 @@ replTest.partition(3, 4); print("done bridging"); print("test1"); +print("2 should be primary"); master = replTest.getMaster(); +printjson(master.getDB("admin").runCommand({replSetGetStatus:1})); + var timeout = 20000; master.getDB("foo").bar.insert({x:1}); diff --git a/jstests/replsets/tags2.js b/jstests/replsets/tags2.js new file mode 100644 index 00000000000..16dfcdf4983 --- /dev/null +++ b/jstests/replsets/tags2.js @@ -0,0 +1,44 @@ +// Change a getLastErrorMode from 2 to 3 servers + +var host = getHostName(); +var replTest = new ReplSetTest( {name: "rstag", nodes: 3, startPort: 31000} ); +var nodes = replTest.startSet(); +var ports = replTest.ports; +var conf = {_id : "rstag", version: 1, members : [ + {_id : 0, host : host+":"+ports[0], tags : {"backup" : "A"}}, + {_id : 1, host : host+":"+ports[1], tags : {"backup" : "B"}}, + {_id : 2, host : host+":"+ports[2], tags : {"backup" : "C"}} ], + settings : {getLastErrorModes : { + backedUp : {backup : 2} }} }; +replTest.initiate( conf ); +replTest.awaitReplication(); + +master = replTest.getMaster(); +var db = master.getDB("test"); +db.foo.insert( {x:1} ); +var result = db.runCommand( {getLastError:1, w:"backedUp", wtimeout:20000} ); +assert.eq (result.err, null); + +conf.version = 2; +conf.settings.getLastErrorModes.backedUp.backup = 3; +master.getDB("admin").runCommand( {replSetReconfig: conf} ); +replTest.awaitReplication(); + +master = replTest.getMaster(); +var db = master.getDB("test"); +db.foo.insert( {x:2} ); +var result = db.runCommand( {getLastError:1, w:"backedUp", wtimeout:20000} ); +assert.eq (result.err, null); + +conf.version = 3; +conf.members[0].priorty = 3; +conf.members[2].priorty = 0; +master.getDB("admin").runCommand( {replSetReconfig: conf} ); + +master = replTest.getMaster(); +var db = master.getDB("test"); +db.foo.insert( {x:3} ); +var result = db.runCommand( {getLastError:1, w:"backedUp", wtimeout:20000} ); +assert.eq (result.err, null); + +replTest.stopSet(); diff --git a/jstests/replsets/toostale.js b/jstests/replsets/toostale.js index a1217a6a547..08b1a9c2c6f 100644 --- a/jstests/replsets/toostale.js +++ b/jstests/replsets/toostale.js @@ -111,16 +111,12 @@ replTest.restart(2); print("8: check s2.state == 3"); -status = master.getDB("admin").runCommand({replSetGetStatus:1}); -while (status.state == 0) { - print("state is 0: "); - printjson(status); - sleep(1000); - status = master.getDB("admin").runCommand({replSetGetStatus:1}); -} +assert.soon(function() { + var status = master.getDB("admin").runCommand({replSetGetStatus:1}); + printjson(status); + return status.members && status.members[2].state == 3; +}); -printjson(status); -assert.eq(status.members[2].state, 3, 'recovering'); print("make sure s2 doesn't become primary"); replTest.stop(0); diff --git a/jstests/sharding/addshard4.js b/jstests/sharding/addshard4.js index 81cc1f89e73..4a44b5537b2 100644 --- a/jstests/sharding/addshard4.js +++ b/jstests/sharding/addshard4.js @@ -2,14 +2,14 @@ s = new ShardingTest( "addshard4", 2 , 0 , 1 , {useHostname : true}); -r = new ReplSetTest({name : "addshard4", nodes : 3, startPort : 34000}); +r = new ReplSetTest({name : "addshard4", nodes : 3, startPort : 31100}); r.startSet(); var config = r.getReplSetConfig(); config.members[2].priority = 0; r.initiate(config); -//Wait for replica set to be fully initialized - could take some time +//Wait for replica set to be fully initialized - could take some time //to pre-allocate files on slow systems r.awaitReplication(); @@ -25,14 +25,14 @@ var result = s.adminCommand({"addshard" : shardName}); printjson(result); assert.eq(result, true); -r = new ReplSetTest({name : "addshard42", nodes : 3, startPort : 36000}); +r = new ReplSetTest({name : "addshard42", nodes : 3, startPort : 31200}); r.startSet(); config = r.getReplSetConfig(); config.members[2].arbiterOnly = true; r.initiate(config); -// Wait for replica set to be fully initialized - could take some time +// Wait for replica set to be fully initialized - could take some time // to pre-allocate files on slow systems r.awaitReplication(); diff --git a/jstests/sharding/array_shard_key.js b/jstests/sharding/array_shard_key.js new file mode 100644 index 00000000000..1ea61e8d3a8 --- /dev/null +++ b/jstests/sharding/array_shard_key.js @@ -0,0 +1,127 @@ +// Ensure you can't shard on an array key + +var st = new ShardingTest({ name : jsTestName(), shards : 3 }) + +var mongos = st.s0 + +var coll = mongos.getCollection( jsTestName() + ".foo" ) + +st.shardColl( coll, { _id : 1, i : 1 }, { _id : ObjectId(), i : 1 } ) + +printjson( mongos.getDB("config").chunks.find().toArray() ) + +st.printShardingStatus() + +print( "1: insert some invalid data" ) + +var value = null + +var checkError = function( shouldError ){ + var error = coll.getDB().getLastError() + + if( error != null ) printjson( error ) + + if( error == null && ! shouldError ) return + if( error != null && shouldError ) return + + if( error == null ) print( "No error detected!" ) + else print( "Unexpected error!" ) + + assert( false ) +} + +// Insert an object with invalid array key +coll.insert({ i : [ 1, 2 ] }) +checkError( true ) + +// Insert an object with valid array key +coll.insert({ i : 1 }) +checkError( false ) + +// Update the value with valid other field +value = coll.findOne({ i : 1 }) +coll.update( value, { $set : { j : 2 } } ) +checkError( false ) + +// Update the value with invalid other fields +value = coll.findOne({ i : 1 }) +coll.update( value, Object.merge( value, { i : [ 3 ] } ) ) +checkError( true ) + +// Multi-update the value with invalid other fields +value = coll.findOne({ i : 1 }) +coll.update( value, Object.merge( value, { i : [ 3, 4 ] } ), false, true) +checkError( true ) + +// Single update the value with valid other fields +value = coll.findOne({ i : 1 }) +coll.update( Object.merge( value, { i : [ 3, 4 ] } ), value ) +checkError( true ) + +// Multi-update the value with other fields (won't work, but no error) +value = coll.findOne({ i : 1 }) +coll.update( Object.merge( value, { i : [ 1, 1 ] } ), { $set : { k : 4 } }, false, true) +checkError( false ) + +// Query the value with other fields (won't work, but no error) +value = coll.findOne({ i : 1 }) +coll.find( Object.merge( value, { i : [ 1, 1 ] } ) ).toArray() +checkError( false ) + +// Can't remove using multikey, but shouldn't error +value = coll.findOne({ i : 1 }) +coll.remove( Object.extend( value, { i : [ 1, 2, 3, 4 ] } ) ) +checkError( false ) + +// Can't remove using multikey, but shouldn't error +value = coll.findOne({ i : 1 }) +coll.remove( Object.extend( value, { i : [ 1, 2, 3, 4, 5 ] } ) ) +error = coll.getDB().getLastError() +assert.eq( error, null ) +assert.eq( coll.find().itcount(), 1 ) + +value = coll.findOne({ i : 1 }) +coll.remove( Object.extend( value, { i : 1 } ) ) +error = coll.getDB().getLastError() +assert.eq( error, null ) +assert.eq( coll.find().itcount(), 0 ) + +printjson( "Sharding-then-inserting-multikey tested, now trying inserting-then-sharding-multikey" ) + +// Insert a bunch of data then shard over key which is an array +var coll = mongos.getCollection( "" + coll + "2" ) +for( var i = 0; i < 10; i++ ){ + // TODO : does not check weird cases like [ i, i ] + coll.insert({ i : [ i, i + 1 ] }) + checkError( false ) +} + +coll.ensureIndex({ _id : 1, i : 1 }) + +try { + st.shardColl( coll, { _id : 1, i : 1 }, { _id : ObjectId(), i : 1 } ) +} +catch( e ){ + print( "Correctly threw error on sharding with multikey index." ) +} + +st.printShardingStatus() + +// Insert a bunch of data then shard over key which is not an array +var coll = mongos.getCollection( "" + coll + "3" ) +for( var i = 0; i < 10; i++ ){ + // TODO : does not check weird cases like [ i, i ] + coll.insert({ i : i }) + checkError( false ) +} + +coll.ensureIndex({ _id : 1, i : 1 }) + +st.shardColl( coll, { _id : 1, i : 1 }, { _id : ObjectId(), i : 1 } ) + +st.printShardingStatus() + + + +// Finish +st.stop() diff --git a/jstests/sharding/auth.js b/jstests/sharding/auth.js index 559ec2c1fac..8d8d7d79dab 100644 --- a/jstests/sharding/auth.js +++ b/jstests/sharding/auth.js @@ -56,7 +56,7 @@ s.s = s._mongos[0] = s["s0"] = conn; login(adminUser); -d1 = new ReplSetTest({name : "d1", nodes : 3, startPort : 34000}); +d1 = new ReplSetTest({name : "d1", nodes : 3, startPort : 31100}); d1.startSet({keyFile : "jstests/libs/key2"}); d1.initiate(); @@ -102,6 +102,18 @@ s.getDB(testUser.db).addUser(testUser.username, testUser.password); logout(adminUser); +print("query try"); +var e = assert.throws(function() { + conn.getDB("foo").bar.findOne(); +}); +printjson(e); + +print("cmd try"); +e = assert.throws(function() { + conn.getDB("foo").runCommand({listdbs:1}); +}); +printjson(e); + print("insert try 1"); s.getDB("test").foo.insert({x:1}); result = s.getDB("test").runCommand({getLastError : 1}); @@ -118,7 +130,7 @@ assert.eq(result.err, null); logout(testUser); -d2 = new ReplSetTest({name : "d2", nodes : 3, startPort : 36000}); +d2 = new ReplSetTest({name : "d2", nodes : 3, startPort : 31200}); d2.startSet({keyFile : "jstests/libs/key1"}); d2.initiate(); @@ -156,4 +168,10 @@ while (cursor.hasNext()) { assert.eq(count, 501); +// check that dump doesn't get stuck with auth +var x = runMongoProgram( "mongodump", "--host", "127.0.0.1:31000", "-d", testUser.db, "-u", testUser.username, "-p", testUser.password); + +print("result: "+x); + + s.stop(); diff --git a/jstests/sharding/count_slaveok.js b/jstests/sharding/count_slaveok.js new file mode 100644 index 00000000000..075ab41c2ad --- /dev/null +++ b/jstests/sharding/count_slaveok.js @@ -0,0 +1,69 @@ +// Tests count and distinct using slaveOk + +var st = new ShardingTest( testName = "countSlaveOk", + numShards = 1, + verboseLevel = 0, + numMongos = 1, + { rs : true, + rs0 : { nodes : 2 } + }) + +var rst = st._rs[0].test + +// Insert data into replica set +var conn = new Mongo( st.s.host ) +conn.setLogLevel( 3 ) + +var coll = conn.getCollection( "test.countSlaveOk" ) +coll.drop() + +for( var i = 0; i < 300; i++ ){ + coll.insert( { i : i % 10 } ) +} + +var connA = conn +var connB = new Mongo( st.s.host ) +var connC = new Mongo( st.s.host ) + +// Make sure the writes get through, otherwise we can continue to error these one-at-a-time +coll.getDB().getLastError() + +st.printShardingStatus() + +// Wait for client to update itself and replication to finish +rst.awaitReplication() + +var primary = rst.getPrimary() +var sec = rst.getSecondary() + +// Data now inserted... stop the master, since only two in set, other will still be secondary +rst.stop( rst.getMaster(), undefined, true ) +printjson( rst.status() ) + +// Wait for the mongos to recognize the slave +ReplSetTest.awaitRSClientHosts( conn, sec, { ok : true, secondary : true } ) + +// Need to check slaveOk=true first, since slaveOk=false will destroy conn in pool when +// master is down +conn.setSlaveOk() + +// Should throw exception, since not slaveOk'd +assert.eq( 30, coll.find({ i : 0 }).count() ) +assert.eq( 10, coll.distinct("i").length ) + +try { + + conn.setSlaveOk( false ) + coll.find({ i : 0 }).count() + + print( "Should not reach here!" ) + printjson( coll.getDB().getLastError() ) + assert( false ) + +} +catch( e ){ + print( "Non-slaveOk'd connection failed." ) +} + +// Finish +st.stop() diff --git a/jstests/sharding/drop_sharded_db.js b/jstests/sharding/drop_sharded_db.js new file mode 100644 index 00000000000..aedde8f5032 --- /dev/null +++ b/jstests/sharding/drop_sharded_db.js @@ -0,0 +1,62 @@ +// Tests the dropping of a sharded database SERVER-3471 SERVER-1726 + +var st = new ShardingTest({ name : jsTestName() }) + +var mongos = st.s0 +var config = mongos.getDB( "config" ) + +var dbName = "buy" +var dbA = mongos.getDB( dbName ) +var dbB = mongos.getDB( dbName + "_201107" ) +var dbC = mongos.getDB( dbName + "_201108" ) + +print( "1: insert some data and colls into all dbs" ) + +var numDocs = 3000; +var numColls = 10; +for( var i = 0; i < numDocs; i++ ){ + dbA.getCollection( "data" + (i % numColls) ).insert({ _id : i }) + dbB.getCollection( "data" + (i % numColls) ).insert({ _id : i }) + dbC.getCollection( "data" + (i % numColls) ).insert({ _id : i }) +} + +print( "2: shard the colls ") + +for( var i = 0; i < numColls; i++ ){ + + var key = { _id : 1 } + st.shardColl( dbA.getCollection( "data" + i ), key ) + st.shardColl( dbB.getCollection( "data" + i ), key ) + st.shardColl( dbC.getCollection( "data" + i ), key ) + +} + +print( "3: drop the non-suffixed db ") + +dbA.dropDatabase() + + +print( "3: ensure only the non-suffixed db was dropped ") + +var dbs = mongos.getDBNames() +for( var i = 0; i < dbs.length; i++ ){ + assert.neq( dbs, "" + dbA ) +} + +assert.eq( 0, config.databases.find({ _id : "" + dbA }).toArray().length ) +assert.eq( 1, config.databases.find({ _id : "" + dbB }).toArray().length ) +assert.eq( 1, config.databases.find({ _id : "" + dbC }).toArray().length ) + +assert.eq( numColls, config.collections.find({ _id : RegExp( "^" + dbA + "\\..*" ), dropped : true }).toArray().length ) +assert.eq( numColls, config.collections.find({ _id : RegExp( "^" + dbB + "\\..*" ), dropped : false }).toArray().length ) +assert.eq( numColls, config.collections.find({ _id : RegExp( "^" + dbC + "\\..*" ), dropped : false }).toArray().length ) + +for( var i = 0; i < numColls; i++ ){ + + assert.eq( numDocs / numColls, dbB.getCollection( "data" + (i % numColls) ).find().itcount() ) + assert.eq( numDocs / numColls, dbC.getCollection( "data" + (i % numColls) ).find().itcount() ) + +} + +// Finish +st.stop() diff --git a/jstests/sharding/features3.js b/jstests/sharding/features3.js index 6870bb70208..5277d22ac56 100644 --- a/jstests/sharding/features3.js +++ b/jstests/sharding/features3.js @@ -17,54 +17,79 @@ for ( i=0; i<N; i++ ) db.foo.insert( { _id : i } ) db.getLastError(); x = db.foo.stats(); +assert.eq( "test.foo" , x.ns , "basic1" ) +assert( x.sharded , "basic2" ) assert.eq( N , x.count , "total count" ) assert.eq( N / 2 , x.shards.shard0000.count , "count on shard0000" ) assert.eq( N / 2 , x.shards.shard0001.count , "count on shard0001" ) assert( x.totalIndexSize > 0 ) assert( x.numExtents > 0 ) +db.bar.insert( { x : 1 } ) +x = db.bar.stats(); +assert.eq( 1 , x.count , "XXX1" ) +assert.eq( "test.bar" , x.ns , "XXX2" ) +assert( ! x.sharded , "XXX3: " + tojson(x) ) + +// Fork shell and start pulling back data start = new Date() print( "about to fork shell: " + Date() ) -join = startParallelShell( "db.foo.find( function(){ x = ''; for ( i=0; i<10000; i++ ){ x+=i; } return true; } ).itcount()" ) + +// TODO: Still potential problem when our sampling of current ops misses when $where is active - +// solution is to increase sleep time +parallelCommand = "try { while(true){" + + " db.foo.find( function(){ x = ''; for ( i=0; i<10000; i++ ){ x+=i; } sleep( 1000 ); return true; } ).itcount() " + + "}} catch(e){ print('PShell execution ended:'); printjson( e ) }" + +join = startParallelShell( parallelCommand ) print( "after forking shell: " + Date() ) +// Get all current $where operations function getMine( printInprog ){ + var inprog = db.currentOp().inprog; + if ( printInprog ) printjson( inprog ) + + // Find all the where queries var mine = [] for ( var x=0; x<inprog.length; x++ ){ if ( inprog[x].query && inprog[x].query.$where ){ mine.push( inprog[x] ) } } + return mine; } -state = 0; // 0 = not found, 1 = killed, -killTime = null; +var state = 0; // 0 = not found, 1 = killed, +var killTime = null; +var i = 0; -for ( i=0; i<( 100* 1000 ); i++ ){ +assert.soon( function(){ + + // Get all the current operations mine = getMine( state == 0 && i > 20 ); - if ( state == 0 ){ - if ( mine.length == 0 ){ - sleep(1); - continue; - } + i++; + + // Wait for the queries to start + if ( state == 0 && mine.length > 0 ){ + // Queries started state = 1; + // Kill all $where mine.forEach( function(z){ printjson( db.getSisterDB( "admin" ).killOp( z.opid ) ); } ) killTime = new Date() } - else if ( state == 1 ){ - if ( mine.length == 0 ){ - state = 2; - break; - } - sleep(1) - continue; + // Wait for killed queries to end + else if ( state == 1 && mine.length == 0 ){ + // Queries ended + state = 2; + return true; } -} + +}, "Couldn't kill the $where operations.", 2 * 60 * 1000 ) print( "after loop: " + Date() ); assert( killTime , "timed out waiting too kill last mine:" + tojson(mine) ) diff --git a/jstests/sharding/group_slaveok.js b/jstests/sharding/group_slaveok.js new file mode 100644 index 00000000000..3b7cec4910f --- /dev/null +++ b/jstests/sharding/group_slaveok.js @@ -0,0 +1,68 @@ +// Tests group using slaveOk + +var st = new ShardingTest( testName = "groupSlaveOk", + numShards = 1, + verboseLevel = 0, + numMongos = 1, + { rs : true, + rs0 : { nodes : 2 } + }) + +var rst = st._rs[0].test + +// Insert data into replica set +var conn = new Mongo( st.s.host ) +conn.setLogLevel( 3 ) + +var coll = conn.getCollection( "test.groupSlaveOk" ) +coll.drop() + +for( var i = 0; i < 300; i++ ){ + coll.insert( { i : i % 10 } ) +} + +// Make sure the writes get through, otherwise we can continue to error these one-at-a-time +coll.getDB().getLastError() + +st.printShardingStatus() + +// Wait for client to update itself and replication to finish +rst.awaitReplication() + +var primary = rst.getPrimary() +var sec = rst.getSecondary() + +// Data now inserted... stop the master, since only two in set, other will still be secondary +rst.stop( rst.getMaster(), undefined, true ) +printjson( rst.status() ) + +// Wait for the mongos to recognize the slave +ReplSetTest.awaitRSClientHosts( conn, sec, { ok : true, secondary : true } ) + +// Need to check slaveOk=true first, since slaveOk=false will destroy conn in pool when +// master is down +conn.setSlaveOk() + +// Should not throw exception, since slaveOk'd +assert.eq( 10, coll.group({ key : { i : true } , + reduce : function( obj, ctx ){ ctx.count += 1 } , + initial : { count : 0 } }).length ) + +try { + + conn.setSlaveOk( false ) + coll.group({ key : { i : true } , + reduce : function( obj, ctx ){ ctx.count += 1 } , + initial : { count : 0 } }) + + print( "Should not reach here!" ) + printjson( coll.getDB().getLastError() ) + assert( false ) + +} +catch( e ){ + print( "Non-slaveOk'd connection failed." ) +} + +// Finish +st.stop() diff --git a/jstests/sharding/parallel.js b/jstests/sharding/parallel.js new file mode 100644 index 00000000000..d35459c3730 --- /dev/null +++ b/jstests/sharding/parallel.js @@ -0,0 +1,38 @@ +numShards = 3 +s = new ShardingTest( "parallel" , numShards , 2 , 2 , { sync : true } ); + +s.adminCommand( { enablesharding : "test" } ); +s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } ); + +db = s.getDB( "test" ); + +N = 10000; + +for ( i=0; i<N; i+=(N/12) ) { + s.adminCommand( { split : "test.foo" , middle : { _id : i } } ) + sh.moveChunk( "test.foo", { _id : i } , "shard000" + Math.floor( Math.random() * numShards ) ) +} + + +for ( i=0; i<N; i++ ) + db.foo.insert( { _id : i } ) +db.getLastError(); + + +doCommand = function( dbname , cmd ) { + x = benchRun( { ops : [ { op : "findOne" , ns : dbname + ".$cmd" , query : cmd } ] , + host : db.getMongo().host , parallel : 2 , seconds : 2 } ) + printjson(x) + x = benchRun( { ops : [ { op : "findOne" , ns : dbname + ".$cmd" , query : cmd } ] , + host : s._mongos[1].host , parallel : 2 , seconds : 2 } ) + printjson(x) +} + +doCommand( "test" , { dbstats : 1 } ) +doCommand( "config" , { dbstats : 1 } ) + +x = s.getDB( "config" ).stats() +assert( x.ok , tojson(x) ) +printjson(x) + +s.stop() diff --git a/jstests/sharding/shard3.js b/jstests/sharding/shard3.js index 5f2c0b5148f..e27316e17b6 100644 --- a/jstests/sharding/shard3.js +++ b/jstests/sharding/shard3.js @@ -62,6 +62,7 @@ function doCounts( name , total , onlyItCounts ){ var total = doCounts( "before wrong save" ) secondary.save( { num : -3 } ); +printjson( secondary.getDB().getLastError() ) doCounts( "after wrong save" , total , true ) e = a.find().explain(); assert.eq( 3 , e.n , "ex1" ) diff --git a/jstests/sharding/sync6.js b/jstests/sharding/sync6.js index 0543837a822..233534bf1aa 100644 --- a/jstests/sharding/sync6.js +++ b/jstests/sharding/sync6.js @@ -17,10 +17,13 @@ commandConn.getDB( "admin" ).runCommand( { setParameter : 1, logLevel : 1 } ) // Have lots of threads, so use larger i // Can't test too many, we get socket exceptions... possibly due to the // javascript console. -for ( var i = 8; i < 12; i++ ) { +for ( var i = 8; i < 9; i++ ) { - // Our force time is 2 seconds - var takeoverMS = 2000; + // Our force time is 4 seconds + // Slower machines can't keep up the LockPinger rate, which can lead to lock failures + // since our locks are only valid if the LockPinger pings faster than the force time. + // Actual lock timeout is 15 minutes, so a few seconds is extremely aggressive + var takeoverMS = 4000; // Generate valid sleep and skew for this timeout var threadSleepWithLock = takeoverMS / 2; diff --git a/jstests/slowNightly/command_line_parsing.js b/jstests/slowNightly/command_line_parsing.js index 38c7324ddb9..ba7b1369627 100644 --- a/jstests/slowNightly/command_line_parsing.js +++ b/jstests/slowNightly/command_line_parsing.js @@ -7,3 +7,15 @@ var baseName = "jstests_slowNightly_command_line_parsing"; var m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--notablescan" ); m.getDB( baseName ).getCollection( baseName ).save( {a:1} ); assert.throws( function() { m.getDB( baseName ).getCollection( baseName ).find( {a:1} ).toArray() } ); + +// test config file +var m2 = startMongod( "--port", port+2, "--dbpath", "/data/db/" + baseName +"2", "--config", "jstests/libs/testconfig"); +var m2result = { + "parsed" : { + "config" : "jstests/libs/testconfig", + "dbpath" : "/data/db/jstests_slowNightly_command_line_parsing2", + "fastsync" : "true", + "port" : 31002 + } +}; +assert( friendlyEqual(m2result.parsed, m2.getDB("admin").runCommand( "getCmdLineOpts" ).parsed) ); diff --git a/jstests/slowNightly/dur_big_atomic_update.js b/jstests/slowNightly/dur_big_atomic_update.js index ffb0d838cc2..800b4b831fb 100644 --- a/jstests/slowNightly/dur_big_atomic_update.js +++ b/jstests/slowNightly/dur_big_atomic_update.js @@ -23,6 +23,23 @@ err = d.getLastErrorObj(); assert(err.err == null); assert(err.n == 1024); +d.dropDatabase(); + +for (var i=0; i<1024; i++){ + d.foo.insert({_id:i}); +} + +// Do it again but in a db.eval +d.eval( + function(host, big_string) { + new Mongo(host).getDB("test").foo.update({}, {$set: {big_string: big_string}}, false, /*multi*/true) + }, conn.host, big_string); // Can't pass in connection or DB objects + +err = d.getLastErrorObj(); + +assert(err.err == null); +assert(err.n == 1024); + // free up space d.dropDatabase(); diff --git a/jstests/slowNightly/replReads.js b/jstests/slowNightly/replReads.js new file mode 100644 index 00000000000..4fe91305738 --- /dev/null +++ b/jstests/slowNightly/replReads.js @@ -0,0 +1,108 @@ +2// Test that doing slaveOk reads from secondaries hits all the secondaries evenly + +function testReadLoadBalancing(numReplicas) { + + s = new ShardingTest( "replReads" , 1 /* numShards */, 0 /* verboseLevel */, 1 /* numMongos */, { rs : true , numReplicas : numReplicas, chunksize : 1 } ) + + s.adminCommand({enablesharding : "test"}) + s.config.settings.find().forEach(printjson) + + s.adminCommand({shardcollection : "test.foo", key : {_id : 1}}) + + s.getDB("test").foo.insert({a : 123}) + + primary = s._rs[0].test.liveNodes.master + secondaries = s._rs[0].test.liveNodes.slaves + + function rsStats() { + return s.getDB( "admin" ).runCommand( "connPoolStats" )["replicaSets"]["replReads-rs0"]; + } + + assert.eq( numReplicas , rsStats().hosts.length ); + + function isMasterOrSecondary( info ){ + if ( ! info.ok ) + return false; + if ( info.ismaster ) + return true; + return info.secondary && ! info.hidden; + } + + assert.soon( + function() { + var x = rsStats().hosts; + printjson(x) + for ( var i=0; i<x.length; i++ ) + if ( ! isMasterOrSecondary( x[i] ) ) + return false; + return true; + } + ); + + for (var i = 0; i < secondaries.length; i++) { + assert.soon( function(){ return secondaries[i].getDB("test").foo.count() > 0; } ) + secondaries[i].getDB('test').setProfilingLevel(2) + } + + for (var i = 0; i < secondaries.length * 10; i++) { + conn = new Mongo(s._mongos[0].host) + conn.setSlaveOk() + conn.getDB('test').foo.findOne() + } + + for (var i = 0; i < secondaries.length; i++) { + var profileCollection = secondaries[i].getDB('test').system.profile; + assert.eq(10, profileCollection.find().count(), "Wrong number of read queries sent to secondary " + i + " " + tojson( profileCollection.find().toArray() )) + } + + db = primary.getDB( "test" ); + + printjson(rs.status()); + c = rs.conf(); + print( "config before: " + tojson(c) ); + for ( i=0; i<c.members.length; i++ ) { + if ( c.members[i].host == db.runCommand( "ismaster" ).primary ) + continue; + c.members[i].hidden = true; + c.members[i].priority = 0; + break; + } + rs.reconfig( c ); + print( "config after: " + tojson( rs.conf() ) ); + + assert.soon( + function() { + var x = rsStats(); + printjson(x); + var numOk = 0; + for ( var i=0; i<x.hosts.length; i++ ) + if ( x.hosts[i].hidden ) + return true; + return false; + } , "one slave not ok" , 180000 , 5000 + ); + + for (var i = 0; i < secondaries.length * 10; i++) { + conn = new Mongo(s._mongos[0].host) + conn.setSlaveOk() + conn.getDB('test').foo.findOne() + } + + var counts = [] + for (var i = 0; i < secondaries.length; i++) { + var profileCollection = secondaries[i].getDB('test').system.profile; + counts.push( profileCollection.find().count() ); + } + + counts = counts.sort(); + assert.eq( 20 , counts[1] - counts[0] , "counts wrong: " + tojson( counts ) ); + + s.stop() +} + +//for (var i = 1; i < 10; i++) { +// testReadLoadBalancing(i) +//} + +// Is there a way that this can be run multiple times with different values? +testReadLoadBalancing(3) diff --git a/jstests/slowNightly/sharding_migrateBigObject.js b/jstests/slowNightly/sharding_migrateBigObject.js index d8ff740d81d..5ad9ed12a18 100644 --- a/jstests/slowNightly/sharding_migrateBigObject.js +++ b/jstests/slowNightly/sharding_migrateBigObject.js @@ -10,10 +10,8 @@ var admin = mongos.getDB("admin") admin.runCommand({ addshard : "localhost:30001" }) admin.runCommand({ addshard : "localhost:30002" }) - - -var coll = mongos.getDB("test").getCollection("stuff") -coll.drop() +db = mongos.getDB("test"); +var coll = db.getCollection("stuff") var data = "x" var nsq = 16 @@ -28,6 +26,9 @@ for( var i = 0; i < 40; i++ ) { if(i != 0 && i % 10 == 0) printjson( coll.stats() ) coll.save({ data : dataObj }) } +db.getLastError(); + +assert.eq( 40 , coll.count() , "prep1" ); printjson( coll.stats() ) @@ -37,6 +38,8 @@ admin.printShardingStatus() admin.runCommand({ shardcollection : "" + coll, key : { _id : 1 } }) +assert.lt( 5 , mongos.getDB( "config" ).chunks.find( { ns : "test.stuff" } ).count() , "not enough chunks" ); + assert.soon( function(){ res = mongos.getDB( "config" ).chunks.group( { cond : { ns : "test.stuff" } , @@ -45,7 +48,7 @@ assert.soon( initial : { nChunks : 0 } } ); printjson( res ); - return res.length > 1 && Math.abs( res[0].nChunks - res[1].nChunks ) <= 1; + return res.length > 1 && Math.abs( res[0].nChunks - res[1].nChunks ) <= 3; } , "never migrated" , 180000 , 1000 ); diff --git a/jstests/slowNightly/sharding_passthrough.js b/jstests/slowNightly/sharding_passthrough.js index 65d22059381..d81df685bc5 100644 --- a/jstests/slowNightly/sharding_passthrough.js +++ b/jstests/slowNightly/sharding_passthrough.js @@ -62,17 +62,17 @@ files.forEach( * clean (apitest_dbcollection) * logout and getnonce */ - if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile1|dbhash|median|apitest_dbcollection|evalb|evald|eval_nolock|auth1|auth2|dropdb_race|unix_socket\d*)\.js$/.test(x.name)) { + if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile\d*|dbhash|median|apitest_dbcollection|evalb|evald|eval_nolock|auth1|auth2|dropdb_race|unix_socket\d*)\.js$/.test(x.name)) { print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name) return; } // These are bugs (some might be fixed now): - if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4|profile1)\.js$/.test(x.name)) { + if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4|profile\d*)\.js$/.test(x.name)) { print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name) return; } // These aren't supposed to get run under sharding: - if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4|notablescan|compact.*|check_shard_index|mr_replaceIntoDB)\.js$/.test(x.name)) { + if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4|notablescan|compact.*|check_shard_index|bench_test.*|mr_replaceIntoDB)\.js$/.test(x.name)) { print(" >>>>>>>>>>>>>>> skipping test that would fail under sharding " + x.name) return; } diff --git a/jstests/slowNightly/sharding_rs1.js b/jstests/slowNightly/sharding_rs1.js index 01358e207de..f73e690d42e 100644 --- a/jstests/slowNightly/sharding_rs1.js +++ b/jstests/slowNightly/sharding_rs1.js @@ -59,6 +59,12 @@ assert.soon( function(){ s.config.settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true ); +sleep( 1000 ); + +while ( sh.isBalancerRunning() ){ + sleep( 1000 ); +} + for ( i=0; i<s._rs.length; i++ ){ r = s._rs[i]; r.test.awaitReplication(); diff --git a/jstests/slowNightly/sharding_rs_arb1.js b/jstests/slowNightly/sharding_rs_arb1.js new file mode 100644 index 00000000000..be4c4dcd136 --- /dev/null +++ b/jstests/slowNightly/sharding_rs_arb1.js @@ -0,0 +1,40 @@ +x = 5 +name = "sharding_rs_arb1" +replTest = new ReplSetTest( { name : name , nodes : 3 , startPort : 31000 } ); +nodes = replTest.startSet(); +var port = replTest.ports; +replTest.initiate({_id : name, members : + [ + {_id:0, host : getHostName()+":"+port[0]}, + {_id:1, host : getHostName()+":"+port[1]}, + {_id:2, host : getHostName()+":"+port[2], arbiterOnly : true}, + ], + }); + +replTest.awaitReplication(); + +master = replTest.getMaster(); +db = master.getDB( "test" ); +printjson( rs.status() ); + +var config = startMongodEmpty("--configsvr", "--port", 29999, "--dbpath", "/data/db/" + name + "_config" ); + +var mongos = startMongos("--port", 30000, "--configdb", getHostName() + ":29999") +var admin = mongos.getDB("admin") +var url = name + "/"; +for ( i=0; i<port.length; i++ ) { + if ( i > 0 ) + url += ","; + url += getHostName() + ":" + port[i]; +} +print( url ) +res = admin.runCommand( { addshard : url } ) +printjson( res ) +assert( res.ok , tojson(res) ) + + + +stopMongod( 30000 ) +stopMongod( 29999 ) +replTest.stopSet(); + diff --git a/jstests/slowNightly/sync6_slow.js b/jstests/slowNightly/sync6_slow.js new file mode 100644 index 00000000000..63d6123833c --- /dev/null +++ b/jstests/slowNightly/sync6_slow.js @@ -0,0 +1,82 @@ +// More complete version of sharding/sync6.js +// Test that distributed lock forcing does not result in inconsistencies, using a +// fast timeout. + +// Note that this test will always have random factors, since we can't control the +// thread scheduling. + +test = new SyncCCTest( "sync6", { logpath : "/dev/null" } ) + +// Startup another process to handle our commands to the cluster, mostly so it's +// easier to read. +var commandConn = startMongodTest( 30000 + 4, "syncCommander", false, {})//{ logpath : "/dev/null" } )//{verbose : ""} ) +// { logpath : "/data/db/syncCommander/mongod.log" } ); + +// Up the log level for this test +commandConn.getDB( "admin" ).runCommand( { setParameter : 1, logLevel : 0 } ) + +// Have lots of threads, so use larger i +// Can't test too many, we get socket exceptions... possibly due to the +// javascript console. +// TODO: Figure out our max bounds here - use less threads now to avoid pinger starvation issues. +for ( var t = 0; t < 4; t++ ) { +for ( var i = 4; i < 5; i++ ) { + + // Our force time is 6 seconds - slightly diff from sync6 to ensure exact time not important + var takeoverMS = 6000; + + // Generate valid sleep and skew for this timeout + var threadSleepWithLock = takeoverMS / 2; + var configServerTimeSkew = [ 0, 0, 0 ] + for ( var h = 0; h < 3; h++ ) { + // Skew by 1/30th the takeover time either way, at max + configServerTimeSkew[h] = ( i + h ) % Math.floor( takeoverMS / 60 ) + // Make skew pos or neg + configServerTimeSkew[h] *= ( ( i + h ) % 2 ) ? -1 : 1; + } + + // Build command + command = { _testDistLockWithSkew : 1 } + + // Basic test parameters + command["lockName"] = "TimeSkewFailNewTest_lock_" + i; + command["host"] = test.url + command["seed"] = i + command["numThreads"] = ( i % 50 ) + 1 + + // Critical values so we're sure of correct operation + command["takeoverMS"] = takeoverMS + command["wait"] = 4 * takeoverMS // so we must force the lock + command["skewHosts"] = configServerTimeSkew + command["threadWait"] = threadSleepWithLock + + // Less critical test params + + // 1/3 of threads will not release the lock + command["hangThreads"] = 3 + // Amount of time to wait before trying lock again + command["threadSleep"] = 1;// ( ( i + 1 ) * 100 ) % (takeoverMS / 4) + // Amount of total clock skew possible between locking threads (processes) + // This can be large now. + command["skewRange"] = ( command["takeoverMS"] * 3 ) * 60 * 1000 + + // Double-check our sleep, host skew, and takeoverMS values again + + // At maximum, our threads must sleep only half the lock timeout time. + assert( command["threadWait"] <= command["takeoverMS"] / 2 ) + for ( var h = 0; h < command["skewHosts"].length; h++ ) { + // At maximum, our config server time skew needs to be less than 1/30th + // the total time skew (1/60th either way). + assert( Math.abs( command["skewHosts"][h] ) <= ( command["takeoverMS"] / 60 ) ) + } + + result = commandConn.getDB( "admin" ).runCommand( command ) + printjson( result ) + printjson( command ) + assert( result.ok, "Skewed threads did not increment correctly." ); + +} +} + +stopMongoProgram( 30004 ) +test.stop(); diff --git a/jstests/slowWeekly/geo_full.js b/jstests/slowWeekly/geo_full.js index ab8715be6a6..9eb1b7a54bf 100644 --- a/jstests/slowWeekly/geo_full.js +++ b/jstests/slowWeekly/geo_full.js @@ -25,17 +25,25 @@ var randEnvironment = function(){ return { max : 180, min : -180, bits : Math.floor( Random.rand() * 32 ) + 1, - earth : true } + earth : true, + bucketSize : 360 / ( 4 * 1024 * 1024 * 1024 ) } } var scales = [ 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000 ] var scale = scales[ Math.floor( Random.rand() * scales.length ) ] var offset = Random.rand() * scale - return { max : Random.rand() * scale + offset, - min : - Random.rand() * scale + offset, - bits : Math.floor( Random.rand() * 32 ) + 1, - earth : false } + var max = Random.rand() * scale + offset + var min = - Random.rand() * scale + offset + var bits = Math.floor( Random.rand() * 32 ) + 1 + var range = max - min + var bucketSize = range / ( 4 * 1024 * 1024 * 1024 ) + + return { max : max, + min : min, + bits : bits, + earth : false, + bucketSize : bucketSize } } @@ -271,6 +279,7 @@ var randYesQuery = function(){ var locArray = function( loc ){ if( loc.x ) return [ loc.x, loc.y ] + if( ! loc.length ) return [ loc[0], loc[1] ] return loc } @@ -287,32 +296,54 @@ var locsArray = function( locs ){ } } -var numTests = 30 +var minBoxSize = function( env, box ){ + return env.bucketSize * Math.pow( 2, minBucketScale( env, box ) ) +} + +var minBucketScale = function( env, box ){ + + if( box.length && box[0].length ) + box = [ box[0][0] - box[1][0], box[0][1] - box[1][1] ] + + if( box.length ) + box = Math.max( box[0], box[1] ) + + print( box ) + print( env.bucketSize ) + + return Math.ceil( Math.log( box / env.bucketSize ) / Math.log( 2 ) ) + +} + +// TODO: Add spherical $uniqueDocs tests +var numTests = 100 // Our seed will change every time this is run, but // each individual test will be reproducible given // that seed and test number -var seed = Math.floor( Random.rand() * ( 10 ^ 30) ) +var seed = new Date().getTime() for ( var test = 0; test < numTests; test++ ) { Random.srand( seed + test ); - + //Random.srand( 42240 ) + //Random.srand( 7344 ) var t = db.testAllGeo t.drop() print( "Generating test environment #" + test ) var env = randEnvironment() + //env.bits = 11 var query = randQuery( env ) var data = randDataType() - + //data.numDocs = 100; data.maxLocs = 3; var results = {} var totalPoints = 0 print( "Calculating target results for " + data.numDocs + " docs with max " + data.maxLocs + " locs " ) // Index after a random number of docs added var indexIt = Math.floor( Random.rand() * data.numDocs ) - + for ( var i = 0; i < data.numDocs; i++ ) { if( indexIt == i ){ @@ -346,7 +377,7 @@ for ( var test = 0; test < numTests; test++ ) { randQueryAdditions( doc, indResults ) //printjson( doc ) - + doc._id = i t.insert( doc ) } @@ -362,27 +393,33 @@ for ( var test = 0; test < numTests; test++ ) { // exact print( "Exact query..." ) assert.eq( results.exact.docsIn, t.find( { "locs.loc" : randLocType( query.exact ), "exact.docIn" : randYesQuery() } ).count() ) - + // $center print( "Center query..." ) - assert.eq( results.center.docsIn, t.find( { "locs.loc" : { $within : { $center : [ query.center, query.radius ] } }, "center.docIn" : randYesQuery() } ).count() ) + print( "Min box : " + minBoxSize( env, query.radius ) ) + assert.eq( results.center.docsIn, t.find( { "locs.loc" : { $within : { $center : [ query.center, query.radius ], $uniqueDocs : 1 } }, "center.docIn" : randYesQuery() } ).count() ) + assert.eq( results.center.locsIn, t.find( { "locs.loc" : { $within : { $center : [ query.center, query.radius ], $uniqueDocs : false } }, "center.docIn" : randYesQuery() } ).count() ) if( query.sphereRadius >= 0 ){ print( "Center sphere query...") // $centerSphere assert.eq( results.sphere.docsIn, t.find( { "locs.loc" : { $within : { $centerSphere : [ query.sphereCenter, query.sphereRadius ] } }, "sphere.docIn" : randYesQuery() } ).count() ) + assert.eq( results.sphere.locsIn, t.find( { "locs.loc" : { $within : { $centerSphere : [ query.sphereCenter, query.sphereRadius ], $uniqueDocs : 0.0 } }, "sphere.docIn" : randYesQuery() } ).count() ) } // $box print( "Box query..." ) - assert.eq( results.box.docsIn, t.find( { "locs.loc" : { $within : { $box : query.box } }, "box.docIn" : randYesQuery() } ).count() ) + assert.eq( results.box.docsIn, t.find( { "locs.loc" : { $within : { $box : query.box, $uniqueDocs : true } }, "box.docIn" : randYesQuery() } ).count() ) + assert.eq( results.box.locsIn, t.find( { "locs.loc" : { $within : { $box : query.box, $uniqueDocs : false } }, "box.docIn" : randYesQuery() } ).count() ) // $polygon print( "Polygon query..." ) assert.eq( results.poly.docsIn, t.find( { "locs.loc" : { $within : { $polygon : query.boxPoly } }, "poly.docIn" : randYesQuery() } ).count() ) + assert.eq( results.poly.locsIn, t.find( { "locs.loc" : { $within : { $polygon : query.boxPoly, $uniqueDocs : 0 } }, "poly.docIn" : randYesQuery() } ).count() ) // $near print( "Near query..." ) assert.eq( results.center.locsIn > 100 ? 100 : results.center.locsIn, t.find( { "locs.loc" : { $near : query.center, $maxDistance : query.radius } } ).count( true ) ) + if( query.sphereRadius >= 0 ){ print( "Near sphere query...") // $centerSphere @@ -391,27 +428,39 @@ for ( var test = 0; test < numTests; test++ ) { // geoNear + // results limited by size of objects if( data.maxLocs < 100 ){ - + + // GeoNear query + print( "GeoNear query..." ) + assert.eq( results.center.locsIn > 100 ? 100 : results.center.locsIn, t.getDB().runCommand({ geoNear : "testAllGeo", near : query.center, maxDistance : query.radius }).results.length ) + // GeoNear query + assert.eq( results.center.docsIn > 100 ? 100 : results.center.docsIn, t.getDB().runCommand({ geoNear : "testAllGeo", near : query.center, maxDistance : query.radius, uniqueDocs : true }).results.length ) + + var num = 2 * results.center.locsIn; if( num > 200 ) num = 200; var output = db.runCommand( { geoNear : "testAllGeo", near : query.center, - maxDistance : query.radius , + maxDistance : query.radius , + includeLocs : true, num : num } ).results - + assert.eq( Math.min( 200, results.center.locsIn ), output.length ) var distance = 0; for ( var i = 0; i < output.length; i++ ) { var retDistance = output[i].dis - + var retLoc = locArray( output[i].loc ) + // print( "Dist from : " + results[i].loc + " to " + startPoint + " is " // + retDistance + " vs " + radius ) var arrLocs = locsArray( output[i].obj.locs ) + + assert.contains( retLoc, arrLocs ) // printjson( arrLocs ) @@ -422,6 +471,7 @@ for ( var test = 0; test < numTests; test++ ) { } assert( distInObj ) + assert.between( retDistance - 0.0001 , Geo.distance( locArray( query.center ), retLoc ), retDistance + 0.0001 ) assert.lte( retDistance, query.radius ) assert.gte( retDistance, distance ) distance = retDistance diff --git a/jstests/slowWeekly/geo_mnypts_plus_fields.js b/jstests/slowWeekly/geo_mnypts_plus_fields.js new file mode 100644 index 00000000000..f67e49ba930 --- /dev/null +++ b/jstests/slowWeekly/geo_mnypts_plus_fields.js @@ -0,0 +1,98 @@ +// Test sanity of geo queries with a lot of points + +var maxFields = 2; + +for( var fields = 1; fields < maxFields; fields++ ){ + + var coll = db.testMnyPts + coll.drop() + + var totalPts = 500 * 1000 + + // Add points in a 100x100 grid + for( var i = 0; i < totalPts; i++ ){ + var ii = i % 10000 + + var doc = { loc : [ ii % 100, Math.floor( ii / 100 ) ] } + + // Add fields with different kinds of data + for( var j = 0; j < fields; j++ ){ + + var field = null + + if( j % 3 == 0 ){ + // Make half the points not searchable + field = "abcdefg" + ( i % 2 == 0 ? "h" : "" ) + } + else if( j % 3 == 1 ){ + field = new Date() + } + else{ + field = true + } + + doc[ "field" + j ] = field + } + + coll.insert( doc ) + } + + // Create the query for the additional fields + queryFields = {} + for( var j = 0; j < fields; j++ ){ + + var field = null + + if( j % 3 == 0 ){ + field = "abcdefg" + } + else if( j % 3 == 1 ){ + field = { $lte : new Date() } + } + else{ + field = true + } + + queryFields[ "field" + j ] = field + } + + coll.ensureIndex({ loc : "2d" }) + + // Check that quarter of points in each quadrant + for( var i = 0; i < 4; i++ ){ + var x = i % 2 + var y = Math.floor( i / 2 ) + + var box = [[0, 0], [49, 49]] + box[0][0] += ( x == 1 ? 50 : 0 ) + box[1][0] += ( x == 1 ? 50 : 0 ) + box[0][1] += ( y == 1 ? 50 : 0 ) + box[1][1] += ( y == 1 ? 50 : 0 ) + + // Now only half of each result comes back + assert.eq( totalPts / ( 4 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).count() ) + assert.eq( totalPts / ( 4 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).itcount() ) + + } + + // Check that half of points in each half + for( var i = 0; i < 2; i++ ){ + + var box = [[0, 0], [49, 99]] + box[0][0] += ( i == 1 ? 50 : 0 ) + box[1][0] += ( i == 1 ? 50 : 0 ) + + assert.eq( totalPts / ( 2 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).count() ) + assert.eq( totalPts / ( 2 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).itcount() ) + + } + + // Check that all but corner set of points in radius + var circle = [[0, 0], (100 - 1) * Math.sqrt( 2 ) - 0.25 ] + + // All [99,x] pts are field0 : "abcdefg" + assert.eq( totalPts / 2 - totalPts / ( 100 * 100 ), coll.find(Object.extend( { loc : { $within : { $center : circle } } }, queryFields ) ).count() ) + assert.eq( totalPts / 2 - totalPts / ( 100 * 100 ), coll.find(Object.extend( { loc : { $within : { $center : circle } } }, queryFields ) ).itcount() ) + +} + diff --git a/jstests/slowWeekly/update_yield1.js b/jstests/slowWeekly/update_yield1.js index 7e95855adb1..5f7183064f3 100644 --- a/jstests/slowWeekly/update_yield1.js +++ b/jstests/slowWeekly/update_yield1.js @@ -54,7 +54,7 @@ while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){ assert.eq( 1 , x.inprog.length , "nothing in prog" ); } - assert.gt( 2000 , me ); + assert.gt( time / 3 , me ); } join(); diff --git a/jstests/sorta.js b/jstests/sorta.js index f5942d4bddd..7c82778a186 100644 --- a/jstests/sorta.js +++ b/jstests/sorta.js @@ -5,16 +5,17 @@ t.drop(); // Enable _allow_dot to try and bypass v8 field name checking. t.insert( {_id:0,a:MinKey}, true ); -t.save( {_id:1,a:null} ); -t.save( {_id:2,a:[]} ); +t.save( {_id:3,a:null} ); +t.save( {_id:1,a:[]} ); t.save( {_id:7,a:[2]} ); -t.save( {_id:3} ); -t.save( {_id:4,a:null} ); -t.save( {_id:5,a:[]} ); +t.save( {_id:4} ); +t.save( {_id:5,a:null} ); +t.save( {_id:2,a:[]} ); t.save( {_id:6,a:1} ); t.insert( {_id:8,a:MaxKey}, true ); function sorted( arr ) { + assert.eq( 9, arr.length ); for( i = 1; i < arr.length; ++i ) { assert.lte( arr[ i-1 ]._id, arr[ i ]._id ); } diff --git a/jstests/tool/csvexport1.js b/jstests/tool/csvexport1.js new file mode 100644 index 00000000000..eb4e6e38431 --- /dev/null +++ b/jstests/tool/csvexport1.js @@ -0,0 +1,45 @@ +// csvexport1.js + +t = new ToolTest( "csvexport1" ) + +c = t.startDB( "foo" ); + +assert.eq( 0 , c.count() , "setup1" ); + +objId = ObjectId() + +c.insert({ a : new NumberInt(1) , b : objId , c: [1, 2, 3], d : {a : "hello", b : "world"} , e: '-'}) +c.insert({ a : -2.0, c : MinKey, d : "Then he said, \"Hello World!\"", e : new NumberLong(3)}) +c.insert({ a : new BinData(0, "1234"), b : ISODate("2009-08-27"), c : new Timestamp(1234, 9876), d : /foo*\"bar\"/i, e : function foo() { print("Hello World!"); }}) + +assert.eq( 3 , c.count() , "setup2" ); + +t.runTool( "export" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" , "--csv", "-f", "a,b,c,d,e") + + +c.drop() + +assert.eq( 0 , c.count() , "after drop" ) + +t.runTool("import", "--file", t.extFile, "-d", t.baseName, "-c", "foo", "--type", "csv", "--headerline"); + +assert.soon ( 3 + " == c.count()", "after import"); + +// Note: Exporting and Importing to/from CSV is not designed to be round-trippable +expected = [] +expected.push({ a : 1, b : "ObjectID(" + objId.toString() + ")", c : "[ 1, 2, 3 ]", d : "{ \"a\" : \"hello\", \"b\" : \"world\" }", e : "-"}) +expected.push({ a : -2.0, b : "", c : "$MinKey", d : "Then he said, \"Hello World!\"", e : 3}) +expected.push({ a : "D76DF8", b : "2009-08-27T00:00:00Z", c : "{ \"t\" : 1000 , \"i\" : 9876 }", d : "/foo*\\\"bar\\\"/i", e : tojson(function foo() { print("Hello World!"); })}) + +actual = [] +actual.push(c.find({a : 1}).toArray()[0]); +actual.push(c.find({a : -2.0}).toArray()[0]); +actual.push(c.find({a : "D76DF8"}).toArray()[0]); + +for (i = 0; i < expected.length; i++) { + delete actual[i]._id + assert.eq( expected[i], actual[i], "CSV export " + i); +} + + +t.stop()
\ No newline at end of file diff --git a/jstests/tool/csvexport2.js b/jstests/tool/csvexport2.js new file mode 100644 index 00000000000..3e0dd2c6829 --- /dev/null +++ b/jstests/tool/csvexport2.js @@ -0,0 +1,31 @@ +// csvexport2.js + +t = new ToolTest( "csvexport2" ) + +c = t.startDB( "foo" ); + +// This test is designed to test exporting of a CodeWithScope object. +// However, due to SERVER-3391, it is not possible to create a CodeWithScope object in the mongo shell, +// therefore this test does not work. Once SERVER-3391 is resolved, this test should be un-commented out + +//assert.eq( 0 , c.count() , "setup1" ); + +//c.insert({ a : 1 , b : Code("print(\"Hello \" + x);", {"x" : "World!"})}) +//assert.eq( 1 , c.count() , "setup2" ); +//t.runTool( "export" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" , "--csv", "-f", "a,b") + + +//c.drop() + +//assert.eq( 0 , c.count() , "after drop" ) +//t.runTool("import", "--file", t.extFile, "-d", t.baseName, "-c", "foo", "--type", "csv", "--headerline"); +//assert.soon ( 1 + " == c.count()", "after import"); + +//expected = { a : 1, b : "\"{ \"$code\" : print(\"Hello \" + x); , \"$scope\" : { \"x\" : \"World!\" } }"}; +//actual = c.findOne() + +//delete actual._id; +//assert.eq( expected, actual ); + + +t.stop()
\ No newline at end of file diff --git a/jstests/tool/csvimport1.js b/jstests/tool/csvimport1.js new file mode 100644 index 00000000000..3bff1110cbe --- /dev/null +++ b/jstests/tool/csvimport1.js @@ -0,0 +1,40 @@ +// csvimport1.js + +t = new ToolTest( "csvimport1" ) + +c = t.startDB( "foo" ); + +base = [] +base.push({ a : 1, b : "this is some text.\nThis text spans multiple lines, and just for fun\ncontains a comma", "c" : "This has leading and trailing whitespace!" }) +base.push({a : 2, b : "When someone says something you \"put it in quotes\"", "c" : "I like embedded quotes/slashes\\backslashes" }) +base.push({a : 3, b : " This line contains the empty string and has leading and trailing whitespace inside the quotes! ", "c" : "" }) +base.push({a : 4, b : "", "c" : "How are empty entries handled?" }) +base.push({a : 5, b : "\"\"", c : "\"This string is in quotes and contains empty quotes (\"\")\""}) +base.push({ a : "a" , b : "b" , c : "c"}) + +assert.eq( 0 , c.count() , "setup" ); + +t.runTool( "import" , "--file" , "jstests/tool/data/csvimport1.csv" , "-d" , t.baseName , "-c" , "foo" , "--type" , "csv" , "-f" , "a,b,c" ); +assert.soon( base.length + " == c.count()" , "after import 1 " ); + +a = c.find().sort( { a : 1 } ).toArray(); +for (i = 0; i < base.length; i++ ) { + delete a[i]._id + assert.eq( tojson(base[i]), tojson(a[i]), "csv parse " + i) +} + +c.drop() +assert.eq( 0 , c.count() , "after drop" ) + +t.runTool( "import" , "--file" , "jstests/tool/data/csvimport1.csv" , "-d" , t.baseName , "-c" , "foo" , "--type" , "csv" , "--headerline" ) +assert.soon( "c.findOne()" , "no data after sleep" ); +assert.eq( base.length - 1 , c.count() , "after import 2" ); + +x = c.find().sort( { a : 1 } ).toArray(); +for (i = 0; i < base.length - 1; i++ ) { + delete x[i]._id + assert.eq( tojson(base[i]), tojson(x[i]), "csv parse with headerline " + i) +} + + +t.stop() diff --git a/jstests/tool/data/csvimport1.csv b/jstests/tool/data/csvimport1.csv new file mode 100644 index 00000000000..256d40a9184 --- /dev/null +++ b/jstests/tool/data/csvimport1.csv @@ -0,0 +1,8 @@ +a,b,c +1,"this is some text. +This text spans multiple lines, and just for fun +contains a comma", "This has leading and trailing whitespace!" +2, "When someone says something you ""put it in quotes""", I like embedded quotes/slashes\backslashes + 3 , " This line contains the empty string and has leading and trailing whitespace inside the quotes! ", "" + "4" ,, How are empty entries handled? +"5","""""", """This string is in quotes and contains empty quotes ("""")""" diff --git a/jstests/tool/dumprestore5.js b/jstests/tool/dumprestore5.js new file mode 100644 index 00000000000..ce28fea2027 --- /dev/null +++ b/jstests/tool/dumprestore5.js @@ -0,0 +1,36 @@ +// dumprestore5.js + +t = new ToolTest( "dumprestore5" ); + +t.startDB( "foo" ); + +db = t.db + +db.addUser('user','password') + +assert.eq(1, db.system.users.count(), "setup") +assert.eq(1, db.system.indexes.count(), "setup2") + +t.runTool( "dump" , "--out" , t.ext ); + +db.dropDatabase() + +assert.eq(0, db.system.users.count(), "didn't drop users") +assert.eq(0, db.system.indexes.count(), "didn't drop indexes") + +t.runTool("restore", "--dir", t.ext) + +assert.soon("db.system.users.findOne()", "no data after restore"); +assert.eq(1, db.system.users.find({user:'user'}).count(), "didn't restore users") +assert.eq(1, db.system.indexes.count(), "didn't restore indexes") + +db.removeUser('user') +db.addUser('user2', 'password2') + +t.runTool("restore", "--dir", t.ext, "--drop") + +assert.soon("1 == db.system.users.find({user:'user'}).count()", "didn't restore users 2") +assert.eq(0, db.system.users.find({user:'user2'}).count(), "didn't drop users") +assert.eq(1, db.system.indexes.count(), "didn't maintain indexes") + +t.stop(); diff --git a/jstests/unique2.js b/jstests/unique2.js index 42cf9fbd0ac..1c2828830f4 100644 --- a/jstests/unique2.js +++ b/jstests/unique2.js @@ -1,3 +1,11 @@ +// Test unique and dropDups index options. + +function checkNprev( np ) { + // getPrevError() is not available sharded. + if ( typeof( myShardingTest ) == 'undefined' ) { + assert.eq( np, db.getPrevError().nPrev ); + } +} t = db.jstests_unique2; @@ -21,7 +29,9 @@ t.ensureIndex({k:1}, {unique:true}); t.insert({k:3}); t.insert({k:[2,3]}); +assert( db.getLastError() ); t.insert({k:[4,3]}); +assert( db.getLastError() ); assert( t.count() == 1 ) ; assert( t.find().sort({k:1}).toArray().length == 1 ) ; @@ -33,9 +43,52 @@ t.insert({k:[2,3]}); t.insert({k:[4,3]}); assert( t.count() == 3 ) ; +// Trigger an error, so we can test n of getPrevError() later. +assert.throws( function() { t.find( {$where:'aaa'} ).itcount(); } ); +assert( db.getLastError() ); +checkNprev( 1 ); + t.ensureIndex({k:1}, {unique:true, dropDups:true}); +// Check error flag was not set SERVER-2054. +assert( !db.getLastError() ); +// Check that offset of previous error is correct. +checkNprev( 2 ); + +// Check the dups were dropped. +assert( t.count() == 1 ) ; +assert( t.find().sort({k:1}).toArray().length == 1 ) ; +assert( t.find().sort({k:1}).count() == 1 ) ; + +// Check that a new conflicting insert will cause an error. +t.insert({k:[2,3]}); +assert( db.getLastError() ); + +t.drop(); +t.insert({k:3}); +t.insert({k:[2,3]}); +t.insert({k:[4,3]}); +assert( t.count() == 3 ) ; + + +// Now try with a background index op. + +// Trigger an error, so we can test n of getPrevError() later. +assert.throws( function() { t.find( {$where:'aaa'} ).itcount(); } ); +assert( db.getLastError() ); +checkNprev( 1 ); + +t.ensureIndex({k:1}, {background:true, unique:true, dropDups:true}); +// Check error flag was not set SERVER-2054. +assert( !db.getLastError() ); +// Check that offset of pervious error is correct. +checkNprev( 2 ); + +// Check the dups were dropped. assert( t.count() == 1 ) ; assert( t.find().sort({k:1}).toArray().length == 1 ) ; assert( t.find().sort({k:1}).count() == 1 ) ; +// Check that a new conflicting insert will cause an error. +t.insert({k:[2,3]}); +assert( db.getLastError() ); diff --git a/jstests/uniqueness.js b/jstests/uniqueness.js index f1651b31c65..ce19ad08d82 100644 --- a/jstests/uniqueness.js +++ b/jstests/uniqueness.js @@ -26,8 +26,21 @@ db.jstests_uniqueness2.drop(); db.jstests_uniqueness2.insert({a:3}); db.jstests_uniqueness2.insert({a:3}); assert( db.jstests_uniqueness2.count() == 2 , 6) ; +db.resetError(); db.jstests_uniqueness2.ensureIndex({a:1}, true); assert( db.getLastError() , 7); +assert( db.getLastError().match( /E11000/ ) ); + +// Check for an error message when we index in the background and there are dups +db.jstests_uniqueness2.drop(); +db.jstests_uniqueness2.insert({a:3}); +db.jstests_uniqueness2.insert({a:3}); +assert( db.jstests_uniqueness2.count() == 2 , 6) ; +assert( !db.getLastError() ); +db.resetError(); +db.jstests_uniqueness2.ensureIndex({a:1}, {unique:true,background:true}); +assert( db.getLastError() , 7); +assert( db.getLastError().match( /E11000/ ) ); /* Check that if we update and remove _id, it gets added back by the DB */ diff --git a/jstests/updatef.js b/jstests/updatef.js new file mode 100644 index 00000000000..69425932f19 --- /dev/null +++ b/jstests/updatef.js @@ -0,0 +1,24 @@ +// Test unsafe management of nsdt on update command yield SERVER-3208 + +prefixNS = db.jstests_updatef; +prefixNS.save( {} ); + +t = db.jstests_updatef_actual; +t.drop(); + +t.save( {a:0,b:[]} ); +for( i = 0; i < 1000; ++i ) { + t.save( {a:100} ); +} +t.save( {a:0,b:[]} ); + +db.getLastError(); +// Repeatedly rename jstests_updatef to jstests_updatef_ and back. This will +// invalidate the jstests_updatef_actual NamespaceDetailsTransient object. +s = startParallelShell( "for( i=0; i < 100; ++i ) { db.jstests_updatef.renameCollection( 'jstests_updatef_' ); db.jstests_updatef_.renameCollection( 'jstests_updatef' ); }" ); + +for( i=0; i < 20; ++i ) { + t.update( {a:0}, {$push:{b:i}}, false, true ); +} + +s(); diff --git a/jstests/updateg.js b/jstests/updateg.js new file mode 100644 index 00000000000..f8d452f71b2 --- /dev/null +++ b/jstests/updateg.js @@ -0,0 +1,17 @@ +// SERVER-3370 check modifiers with field name characters comparing less than '.' character. + +t = db.jstests_updateg; + +t.drop(); +t.update({}, { '$inc' : { 'all.t' : 1, 'all-copy.t' : 1 }}, true); +assert.eq( 1, t.count( {all:{t:1},'all-copy':{t:1}} ) ); + +t.drop(); +t.save({ 'all' : {}, 'all-copy' : {}}); +t.update({}, { '$inc' : { 'all.t' : 1, 'all-copy.t' : 1 }}); +assert.eq( 1, t.count( {all:{t:1},'all-copy':{t:1}} ) ); + +t.drop(); +t.save({ 'all11' : {}, 'all2' : {}}); +t.update({}, { '$inc' : { 'all11.t' : 1, 'all2.t' : 1 }}); +assert.eq( 1, t.count( {all11:{t:1},'all2':{t:1}} ) ); @@ -44,7 +44,20 @@ # include <windows.h> #endif +#if defined(__linux__) && defined(MONGO_EXPOSE_MACROS) +// glibc's optimized versions are better than g++ builtins +# define __builtin_strcmp strcmp +# define __builtin_strlen strlen +# define __builtin_memchr memchr +# define __builtin_memcmp memcmp +# define __builtin_memcpy memcpy +# define __builtin_memset memset +# define __builtin_memmove memmove +#endif + + #include <ctime> +#include <cstring> #include <sstream> #include <string> #include <memory> @@ -138,7 +151,11 @@ namespace mongo { void asserted(const char *msg, const char *file, unsigned line); } -#define MONGO_assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) + + +// TODO: Rework the headers so we don't need this craziness +#include "bson/inline_decls.h" +#define MONGO_assert(_Expression) (void)( MONGO_likely(!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) #include "util/debug_util.h" #include "util/goodies.h" diff --git a/rpm/mongo.spec b/rpm/mongo.spec index 332c6d29c96..5287ec4a39a 100644 --- a/rpm/mongo.spec +++ b/rpm/mongo.spec @@ -1,5 +1,5 @@ Name: mongo -Version: 1.9.0 +Version: 1.9.2 Release: mongodb_1%{?dist} Summary: mongo client shell and tools License: AGPL 3.0 diff --git a/s/balance.cpp b/s/balance.cpp index da25f3362c2..0cb39ad038d 100644 --- a/s/balance.cpp +++ b/s/balance.cpp @@ -155,7 +155,7 @@ namespace mongo { cursor.reset(); if ( collections.empty() ) { - log(1) << "no collections to balance" << endl; + LOG(1) << "no collections to balance" << endl; return; } @@ -170,7 +170,7 @@ namespace mongo { vector<Shard> allShards; Shard::getAllShards( allShards ); if ( allShards.size() < 2) { - log(1) << "can't balance without more active shards" << endl; + LOG(1) << "can't balance without more active shards" << endl; return; } @@ -205,7 +205,7 @@ namespace mongo { cursor.reset(); if (shardToChunksMap.empty()) { - log(1) << "skipping empty collection (" << ns << ")"; + LOG(1) << "skipping empty collection (" << ns << ")"; continue; } @@ -282,7 +282,7 @@ namespace mongo { // now make sure we should even be running if ( ! grid.shouldBalance() ) { - log(1) << "skipping balancing round because balancing is disabled" << endl; + LOG(1) << "skipping balancing round because balancing is disabled" << endl; conn.done(); sleepsecs( 30 ); @@ -297,25 +297,25 @@ namespace mongo { { dist_lock_try lk( &balanceLock , "doing balance round" ); if ( ! lk.got() ) { - log(1) << "skipping balancing round because another balancer is active" << endl; + LOG(1) << "skipping balancing round because another balancer is active" << endl; conn.done(); sleepsecs( 30 ); // no need to wake up soon continue; } - log(1) << "*** start balancing round" << endl; + LOG(1) << "*** start balancing round" << endl; vector<CandidateChunkPtr> candidateChunks; _doBalanceRound( conn.conn() , &candidateChunks ); if ( candidateChunks.size() == 0 ) { - log(1) << "no need to move any chunk" << endl; + LOG(1) << "no need to move any chunk" << endl; } else { _balancedLastTime = _moveChunks( &candidateChunks ); } - log(1) << "*** end of balancing round" << endl; + LOG(1) << "*** end of balancing round" << endl; } conn.done(); @@ -326,7 +326,7 @@ namespace mongo { log() << "caught exception while doing balance: " << e.what() << endl; // Just to match the opening statement if in log level 1 - log(1) << "*** End of balancing round" << endl; + LOG(1) << "*** End of balancing round" << endl; sleepsecs( 30 ); // sleep a fair amount b/c of error continue; diff --git a/s/balancer_policy.cpp b/s/balancer_policy.cpp index efb0fb924af..f1b4bf14db1 100644 --- a/s/balancer_policy.cpp +++ b/s/balancer_policy.cpp @@ -96,13 +96,13 @@ namespace mongo { return NULL; } - log(1) << "collection : " << ns << endl; - log(1) << "donor : " << max.second << " chunks on " << max.first << endl; - log(1) << "receiver : " << min.second << " chunks on " << min.first << endl; + LOG(1) << "collection : " << ns << endl; + LOG(1) << "donor : " << max.second << " chunks on " << max.first << endl; + LOG(1) << "receiver : " << min.second << " chunks on " << min.first << endl; if ( ! drainingShards.empty() ) { string drainingStr; joinStringDelim( drainingShards, &drainingStr, ',' ); - log(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl; + LOG(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl; } // Solving imbalances takes a higher priority than draining shards. Many shards can diff --git a/s/chunk.cpp b/s/chunk.cpp index b1984179864..09dc994d961 100644 --- a/s/chunk.cpp +++ b/s/chunk.cpp @@ -208,7 +208,7 @@ namespace mongo { // no split points means there isn't enough data to split on // 1 split point means we have between half the chunk size to full chunk size // so we shouldn't split - log(1) << "chunk not full enough to trigger auto-split" << endl; + LOG(1) << "chunk not full enough to trigger auto-split" << endl; return BSONObj(); } @@ -350,7 +350,7 @@ namespace mongo { // this was implicit before since we did a splitVector on the same socket ShardConnection::sync(); - log(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten << " splitThreshold: " << splitThreshold << endl; + LOG(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten << " splitThreshold: " << splitThreshold << endl; _dataWritten = 0; // reset so we check often enough @@ -378,7 +378,7 @@ namespace mongo { Shard newLocation = Shard::pick( getShard() ); if ( getShard() == newLocation ) { // if this is the best shard, then we shouldn't do anything (Shard::pick already logged our shard). - log(1) << "recently split chunk: " << range << " already in the best shard: " << getShard() << endl; + LOG(1) << "recently split chunk: " << range << " already in the best shard: " << getShard() << endl; return true; // we did split even if we didn't migrate } @@ -386,7 +386,7 @@ namespace mongo { ChunkPtr toMove = cm->findChunk(min); if ( ! (toMove->getMin() == min && toMove->getMax() == max) ){ - log(1) << "recently split chunk: " << range << " modified before we could migrate " << toMove << endl; + LOG(1) << "recently split chunk: " << range << " modified before we could migrate " << toMove << endl; return true; } @@ -666,8 +666,10 @@ namespace mongo { } if ( c ) { - if ( c->contains( obj ) ) + if ( c->contains( key ) ){ + dassert(c->contains(key)); // doesn't use fast-path in extractKey return c; + } PRINT(foo); PRINT(*c); @@ -791,7 +793,7 @@ namespace mongo { set<Shard> seen; - log(1) << "ChunkManager::drop : " << _ns << endl; + LOG(1) << "ChunkManager::drop : " << _ns << endl; // lock all shards so no one can do a split/migrate for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) { @@ -799,7 +801,7 @@ namespace mongo { seen.insert( c->getShard() ); } - log(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl; + LOG(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl; // delete data from mongod for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ) { @@ -808,13 +810,13 @@ namespace mongo { conn.done(); } - log(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl; + LOG(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl; // remove chunk data ScopedDbConnection conn( configServer.modelServer() ); conn->remove( Chunk::chunkMetadataNS , BSON( "ns" << _ns ) ); conn.done(); - log(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl; + LOG(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl; for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ) { ScopedDbConnection conn( *i ); @@ -830,7 +832,7 @@ namespace mongo { conn.done(); } - log(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl; + LOG(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl; configServer.logChange( "dropCollection" , _ns , BSONObj() ); } @@ -841,7 +843,7 @@ namespace mongo { vector<BSONObj> splitPoints; soleChunk->pickSplitVector( splitPoints , Chunk::MaxChunkSize ); if ( splitPoints.empty() ) { - log(1) << "not enough data to warrant chunking " << getns() << endl; + LOG(1) << "not enough data to warrant chunking " << getns() << endl; return; } @@ -983,7 +985,7 @@ namespace mongo { void run() { runShardChunkVersion(); - log(1) << "shardObjTest passed" << endl; + LOG(1) << "shardObjTest passed" << endl; } } shardObjTest; @@ -1008,7 +1010,7 @@ namespace mongo { cmdBuilder.append( "shardHost" , s.getConnString() ); BSONObj cmd = cmdBuilder.obj(); - log(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << " " << &conn << endl; + LOG(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << " " << &conn << endl; return conn.runCommand( "admin" , cmd , result ); } diff --git a/s/commands_admin.cpp b/s/commands_admin.cpp index 4cb30f99a3b..4568c4d3897 100644 --- a/s/commands_admin.cpp +++ b/s/commands_admin.cpp @@ -45,6 +45,7 @@ #include "stats.h" #include "writeback_listener.h" #include "client.h" +#include "../util/ramlog.h" namespace mongo { @@ -82,7 +83,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << " shows status/reachability of servers in the cluster"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { result.append("configserver", configServer.getPrimary().getConnString() ); result.append("isdbgrid", 1); return true; @@ -95,7 +96,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "flush all router config"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { grid.flushConfig(); result.appendBool( "flushed" , true ); return true; @@ -112,7 +113,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual LockType locktype() const { return NONE; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { result.append( "host" , prettyHostName() ); result.append("version", versionString); result.append("process","mongos"); @@ -177,6 +178,20 @@ namespace mongo { bb.done(); } + { + RamLog* rl = RamLog::get( "warnings" ); + verify(15879, rl); + + if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes + vector<const char*> lines; + rl->get( lines ); + + BSONArrayBuilder arr( result.subarrayStart( "warnings" ) ); + for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ ) + arr.append( lines[i] ); + arr.done(); + } + } return 1; } @@ -187,7 +202,7 @@ namespace mongo { class FsyncCommand : public GridAdminCmd { public: FsyncCommand() : GridAdminCmd( "fsync" ) {} - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( cmdObj["lock"].trueValue() ) { errmsg = "can't do lock through mongos"; return false; @@ -228,7 +243,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << " example: { moveprimary : 'foo' , to : 'localhost:9999' }"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string dbname = cmdObj.firstElement().valuestrsafe(); if ( dbname.size() == 0 ) { @@ -323,7 +338,7 @@ namespace mongo { << "Enable sharding for a db. (Use 'shardcollection' command afterwards.)\n" << " { enablesharding : \"<dbname>\" }\n"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string dbname = cmdObj.firstElement().valuestrsafe(); if ( dbname.size() == 0 ) { errmsg = "no db"; @@ -368,7 +383,7 @@ namespace mongo { << " { enablesharding : \"<dbname>\" }\n"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "no ns"; @@ -517,7 +532,7 @@ namespace mongo { help << " example: { getShardVersion : 'alleyinsider.foo' } "; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string ns = cmdObj.firstElement().valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "need to specify fully namespace"; @@ -530,7 +545,7 @@ namespace mongo { return false; } - ChunkManagerPtr cm = config->getChunkManager( ns ); + ChunkManagerPtr cm = config->getChunkManagerIfExists( ns ); if ( ! cm ) { errmsg = "no chunk manager?"; return false; @@ -555,7 +570,7 @@ namespace mongo { ; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( ! okForConfigChanges( errmsg ) ) return false; @@ -633,7 +648,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "{ movechunk : 'test.foo' , find : { num : 1 } , to : 'localhost:30001' }"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( ! okForConfigChanges( errmsg ) ) return false; @@ -710,7 +725,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "list all shards of the system"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { ScopedDbConnection conn( configServer.getPrimary() ); vector<BSONObj> all; @@ -734,7 +749,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "add a new shard to the system"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { errmsg.clear(); // get replica set component hosts @@ -795,7 +810,7 @@ namespace mongo { virtual void help( stringstream& help ) const { help << "remove a shard to the system."; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string target = cmdObj.firstElement().valuestrsafe(); Shard s = Shard::make( target ); if ( ! grid.knowAboutShard( s.getConnString() ) ) { @@ -878,11 +893,12 @@ namespace mongo { class IsDbGridCmd : public Command { public: virtual LockType locktype() const { return NONE; } + virtual bool requiresAuth() { return false; } virtual bool slaveOk() const { return true; } IsDbGridCmd() : Command("isdbgrid") { } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { result.append("isdbgrid", 1); result.append("hostname", getHostNameCached()); return true; @@ -900,7 +916,7 @@ namespace mongo { help << "test if this is master half of a replica pair"; } CmdIsMaster() : Command("isMaster" , false , "ismaster") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { result.appendBool("ismaster", true ); result.append("msg", "isdbgrid"); result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize); @@ -924,7 +940,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{whatsmyuri:1}"; } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { result << "you" << ClientInfo::get()->getRemote(); return true; } @@ -942,7 +958,7 @@ namespace mongo { help << "get previous error (since last reseterror command)"; } CmdShardingGetPrevError() : Command( "getPrevError" , false , "getpreverror") { } - virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { errmsg += "getpreverror not supported for sharded environments"; return false; } @@ -960,7 +976,7 @@ namespace mongo { } CmdShardingGetLastError() : Command("getLastError" , false , "getlasterror") { } - virtual bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { LastError *le = lastError.disableForCommand(); { assert( le ); @@ -987,7 +1003,7 @@ namespace mongo { return true; } - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { LastError *le = lastError.get(); if ( le ) le->reset(); @@ -1018,7 +1034,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const { help << "list databases on cluster"; } - bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { vector<Shard> shards; Shard::getAllShards( shards ); @@ -1115,7 +1131,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const { help << "Not supported sharded"; } - bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) { + bool run(const string& , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) { errmsg = "closeAllDatabases isn't supported through mongos"; return false; } @@ -1131,7 +1147,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } virtual void help( stringstream& help ) const { help << "Not supported through mongos"; } - bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { + bool run(const string& , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) { if ( jsobj["forShell"].trueValue() ) lastError.disableForCommand(); @@ -1148,7 +1164,7 @@ namespace mongo { << "either (1) ran from localhost or (2) authenticated."; } - bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { return shutdownHelper(); } diff --git a/s/commands_public.cpp b/s/commands_public.cpp index 713b9489fc2..ef7110c7646 100644 --- a/s/commands_public.cpp +++ b/s/commands_public.cpp @@ -53,22 +53,34 @@ namespace mongo { return false; } + // Override if passthrough should also send query options + // Safer as off by default, can slowly enable as we add more tests + virtual bool passOptions() const { return false; } + // all grid commands are designed not to lock virtual LockType locktype() const { return NONE; } protected: + bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) { - return _passthrough(conf->getName(), conf, cmdObj, result); + return _passthrough(conf->getName(), conf, cmdObj, 0, result); } bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) { - return _passthrough("admin", conf, cmdObj, result); + return _passthrough("admin", conf, cmdObj, 0, result); + } + + bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , int options, BSONObjBuilder& result ) { + return _passthrough(conf->getName(), conf, cmdObj, options, result); + } + bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , int options, BSONObjBuilder& result ) { + return _passthrough("admin", conf, cmdObj, options, result); } private: - bool _passthrough(const string& db, DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) { + bool _passthrough(const string& db, DBConfigPtr conf, const BSONObj& cmdObj , int options , BSONObjBuilder& result ) { ShardConnection conn( conf->getPrimary() , "" ); BSONObj res; - bool ok = conn->runCommand( db , cmdObj , res ); + bool ok = conn->runCommand( db , cmdObj , res , passOptions() ? options : 0 ); if ( ! ok && res["code"].numberInt() == StaleConfigInContextCode ) { conn.done(); throw StaleConfigException("foo","command failed because of stale config"); @@ -99,13 +111,14 @@ namespace mongo { virtual void aggregateResults(const vector<BSONObj>& results, BSONObjBuilder& output) {} // don't override - virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& output, bool) { + virtual bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& output, bool) { + LOG(1) << "RunOnAllShardsCommand db: " << dbName << " cmd:" << cmdObj << endl; set<Shard> shards; getShards(dbName, cmdObj, shards); list< shared_ptr<Future::CommandResult> > futures; for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) { - futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) ); + futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj, 0 ) ); } vector<BSONObj> results; @@ -159,13 +172,13 @@ namespace mongo { virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) = 0; - virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) { string fullns = getFullNS( dbName , cmdObj ); DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) { - return passthrough( conf , cmdObj , result ); + return passthrough( conf , cmdObj , options, result ); } errmsg = "can't do command: " + name + " on sharded collection"; return false; @@ -184,6 +197,16 @@ namespace mongo { ReIndexCmd() : AllShardsCollectionCommand("reIndex") {} } reIndexCmd; + class ProfileCmd : public PublicGridCommand { + public: + ProfileCmd() : PublicGridCommand("profile") {} + virtual bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) { + errmsg = "profile currently not supported via mongos"; + return false; + } + } profileCmd; + + class ValidateCmd : public AllShardsCollectionCommand { public: ValidateCmd() : AllShardsCollectionCommand("validate") {} @@ -255,7 +278,7 @@ namespace mongo { class DropCmd : public PublicGridCommand { public: DropCmd() : PublicGridCommand( "drop" ) {} - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; @@ -280,7 +303,7 @@ namespace mongo { class DropDBCmd : public PublicGridCommand { public: DropDBCmd() : PublicGridCommand( "dropDatabase" ) {} - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { BSONElement e = cmdObj.firstElement(); @@ -309,7 +332,7 @@ namespace mongo { class RenameCollectionCmd : public PublicGridCommand { public: RenameCollectionCmd() : PublicGridCommand( "renameCollection" ) {} - bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string fullnsFrom = cmdObj.firstElement().valuestrsafe(); string dbNameFrom = nsToDatabase( fullnsFrom.c_str() ); DBConfigPtr confFrom = grid.getDBConfig( dbNameFrom , false ); @@ -334,7 +357,7 @@ namespace mongo { class CopyDBCmd : public PublicGridCommand { public: CopyDBCmd() : PublicGridCommand( "copydb" ) {} - bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string todb = cmdObj.getStringField("todb"); uassert(13402, "need a todb argument", !todb.empty()); @@ -370,7 +393,8 @@ namespace mongo { class CountCmd : public PublicGridCommand { public: CountCmd() : PublicGridCommand("count") { } - bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool l) { + virtual bool passOptions() const { return true; } + bool run(const string& dbName, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) { string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; @@ -379,12 +403,11 @@ namespace mongo { filter = cmdObj["query"].Obj(); DBConfigPtr conf = grid.getDBConfig( dbName , false ); - if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) { ShardConnection conn( conf->getPrimary() , fullns ); BSONObj temp; - bool ok = conn->runCommand( dbName , cmdObj , temp ); + bool ok = conn->runCommand( dbName , cmdObj , temp, options ); conn.done(); if ( ok ) { @@ -399,7 +422,7 @@ namespace mongo { } // this collection got sharded - ChunkManagerPtr cm = conf->getChunkManager( fullns , true ); + ChunkManagerPtr cm = conf->getChunkManagerIfExists( fullns , true ); if ( ! cm ) { errmsg = "should be sharded now"; result.append( "root" , temp ); @@ -410,11 +433,11 @@ namespace mongo { long long total = 0; map<string,long long> shardCounts; - ChunkManagerPtr cm = conf->getChunkManager( fullns ); + ChunkManagerPtr cm = conf->getChunkManagerIfExists( fullns ); while ( true ) { if ( ! cm ) { // probably unsharded now - return run( dbName , cmdObj , errmsg , result , l ); + return run( dbName , cmdObj , options , errmsg , result, false ); } set<Shard> shards; @@ -428,14 +451,14 @@ namespace mongo { if ( conn.setVersion() ) { total = 0; shardCounts.clear(); - cm = conf->getChunkManager( fullns ); + cm = conf->getChunkManagerIfExists( fullns ); conn.done(); hadToBreak = true; break; } BSONObj temp; - bool ok = conn->runCommand( dbName , BSON( "count" << collection << "query" << filter ) , temp ); + bool ok = conn->runCommand( dbName , BSON( "count" << collection << "query" << filter ) , temp, options ); conn.done(); if ( ok ) { @@ -449,7 +472,7 @@ namespace mongo { // my version is old total = 0; shardCounts.clear(); - cm = conf->getChunkManager( fullns , true ); + cm = conf->getChunkManagerIfExists( fullns , true ); hadToBreak = true; break; } @@ -476,14 +499,13 @@ namespace mongo { class CollectionStats : public PublicGridCommand { public: CollectionStats() : PublicGridCommand("collStats", "collstats") { } - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) { - result.append( "ns" , fullns ); result.appendBool("sharded", false); result.append( "primary" , conf->getPrimary().getName() ); return passthrough( conf , cmdObj , result); @@ -602,7 +624,7 @@ namespace mongo { class FindAndModifyCmd : public PublicGridCommand { public: FindAndModifyCmd() : PublicGridCommand("findAndModify", "findandmodify") { } - bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; @@ -639,7 +661,7 @@ namespace mongo { class DataSizeCmd : public PublicGridCommand { public: DataSizeCmd() : PublicGridCommand("dataSize", "datasize") { } - bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string fullns = cmdObj.firstElement().String(); DBConfigPtr conf = grid.getDBConfig( dbName , false ); @@ -703,7 +725,7 @@ namespace mongo { class GroupCmd : public NotAllowedOnShardedCollectionCmd { public: GroupCmd() : NotAllowedOnShardedCollectionCmd("group") {} - + virtual bool passOptions() const { return true; } virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) { return dbName + "." + cmdObj.firstElement().embeddedObjectUserCheck()["ns"].valuestrsafe(); } @@ -716,14 +738,15 @@ namespace mongo { virtual void help( stringstream &help ) const { help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }"; } - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool passOptions() const { return true; } + bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) { string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) { - return passthrough( conf , cmdObj , result ); + return passthrough( conf , cmdObj , options, result ); } ChunkManagerPtr cm = conf->getChunkManager( fullns ); @@ -739,7 +762,7 @@ namespace mongo { for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ) { ShardConnection conn( *i , fullns ); BSONObj res; - bool ok = conn->runCommand( conf->getName() , cmdObj , res ); + bool ok = conn->runCommand( conf->getName() , cmdObj , res, options ); conn.done(); if ( ! ok ) { @@ -774,7 +797,7 @@ namespace mongo { virtual void help( stringstream &help ) const { help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }"; } - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string fullns = dbName; fullns += "."; { @@ -811,15 +834,15 @@ namespace mongo { public: Geo2dFindNearCmd() : PublicGridCommand( "geoNear" ) {} void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; } - - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool passOptions() const { return true; } + bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) { string collection = cmdObj.firstElement().valuestrsafe(); string fullns = dbName + "." + collection; DBConfigPtr conf = grid.getDBConfig( dbName , false ); if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) { - return passthrough( conf , cmdObj , result ); + return passthrough( conf , cmdObj , options, result ); } ChunkManagerPtr cm = conf->getChunkManager( fullns ); @@ -836,7 +859,7 @@ namespace mongo { list< shared_ptr<Future::CommandResult> > futures; BSONArrayBuilder shardArray; for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) { - futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) ); + futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj, options ) ); shardArray.append(i->getName()); } @@ -946,7 +969,7 @@ namespace mongo { return b.obj(); } - bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { Timer t; string collection = cmdObj.firstElement().valuestrsafe(); @@ -1009,7 +1032,7 @@ namespace mongo { for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) { shared_ptr<ShardConnection> temp( new ShardConnection( i->getConnString() , fullns ) ); assert( temp->get() ); - futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand , temp->get() ) ); + futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand , 0 , temp->get() ) ); shardConns.push_back( temp ); } @@ -1096,7 +1119,7 @@ namespace mongo { mr_shard::Config config( dbName , cmdObj ); mr_shard::State state(config); - log(1) << "mr sharded output ns: " << config.ns << endl; + LOG(1) << "mr sharded output ns: " << config.ns << endl; if (config.outType == mr_shard::Config::INMEMORY) { errmsg = "This Map Reduce mode is not supported with sharded output"; @@ -1200,7 +1223,7 @@ namespace mongo { BSONObj finalCmdObj = finalCmd.obj(); for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) { shared_ptr<ShardConnection> temp( new ShardConnection( i->getConnString() , outns ) ); - futures.push_back( Future::spawnCommand( i->getConnString() , dbName , finalCmdObj , temp->get() ) ); + futures.push_back( Future::spawnCommand( i->getConnString() , dbName , finalCmdObj , 0 , temp->get() ) ); shardConns.push_back( temp ); } @@ -1268,7 +1291,7 @@ namespace mongo { class ApplyOpsCmd : public PublicGridCommand { public: ApplyOpsCmd() : PublicGridCommand( "applyOps" ) {} - virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { errmsg = "applyOps not allowed through mongos"; return false; } @@ -1277,7 +1300,7 @@ namespace mongo { class CompactCmd : public PublicGridCommand { public: CompactCmd() : PublicGridCommand( "compact" ) {} - virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + virtual bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { errmsg = "compact not allowed through mongos"; return false; } @@ -1285,7 +1308,7 @@ namespace mongo { } - bool Command::runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder) { + bool Command::runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, int queryOptions) { const char *p = strchr(ns, '.'); if ( !p ) return false; if ( strcmp(p, ".$cmd") != 0 ) return false; @@ -1326,7 +1349,7 @@ namespace mongo { anObjBuilder.append( "help" , help.str() ); } else { - ok = c->run( nsToDatabase( ns ) , jsobj, errmsg, anObjBuilder, false); + ok = c->run( nsToDatabase( ns ) , jsobj, queryOptions, errmsg, anObjBuilder, false ); } BSONObj tmp = anObjBuilder.asTempObj(); diff --git a/s/config.cpp b/s/config.cpp index b65443cb0bd..f9e548235d3 100644 --- a/s/config.cpp +++ b/s/config.cpp @@ -185,6 +185,16 @@ namespace mongo { return true; } + ChunkManagerPtr DBConfig::getChunkManagerIfExists( const string& ns, bool shouldReload ){ + try{ + return getChunkManager( ns, shouldReload ); + } + catch( AssertionException& e ){ + warning() << "chunk manager not found for " << ns << causedBy( e ) << endl; + return ChunkManagerPtr(); + } + } + ChunkManagerPtr DBConfig::getChunkManager( const string& ns , bool shouldReload ) { BSONObj key; bool unique; @@ -201,8 +211,8 @@ namespace mongo { _reload(); ci = _collections[ns]; } - massert( 10181 , (string)"not sharded:" + ns , ci.isSharded() || ci.wasDropped() ); - assert( ci.wasDropped() || ! ci.key().isEmpty() ); + massert( 10181 , (string)"not sharded:" + ns , ci.isSharded() ); + assert( ! ci.key().isEmpty() ); if ( ! shouldReload || earlyReload ) return ci.getCM(); @@ -226,6 +236,7 @@ namespace mongo { if ( v == oldVersion ) { scoped_lock lk( _lock ); CollectionInfo& ci = _collections[ns]; + massert( 15885 , str::stream() << "not sharded after reloading from chunks : " << ns , ci.isSharded() ); return ci.getCM(); } } @@ -244,7 +255,7 @@ namespace mongo { scoped_lock lk( _lock ); CollectionInfo& ci = _collections[ns]; - massert( 14822 , (string)"state changed in the middle: " + ns , ci.isSharded() || ci.wasDropped() ); + massert( 14822 , (string)"state changed in the middle: " + ns , ci.isSharded() ); if ( temp->getVersion() > ci.getCM()->getVersion() ) { // we only want to reset if we're newer @@ -252,6 +263,7 @@ namespace mongo { ci.resetCM( temp.release() ); } + massert( 15883 , str::stream() << "not sharded after chunk manager reset : " << ns , ci.isSharded() ); return ci.getCM(); } @@ -268,7 +280,7 @@ namespace mongo { } void DBConfig::unserialize(const BSONObj& from) { - log(1) << "DBConfig unserialize: " << _name << " " << from << endl; + LOG(1) << "DBConfig unserialize: " << _name << " " << from << endl; assert( _name == from["_id"].String() ); _shardingEnabled = from.getBoolField("partitioned"); @@ -300,13 +312,14 @@ namespace mongo { unserialize( o ); BSONObjBuilder b; - b.appendRegex( "_id" , (string)"^" + _name + "." ); + b.appendRegex( "_id" , (string)"^" + _name + "\\." ); auto_ptr<DBClientCursor> cursor = conn->query( ShardNS::collection ,b.obj() ); assert( cursor.get() ); while ( cursor->more() ) { BSONObj o = cursor->next(); - _collections[o["_id"].String()] = CollectionInfo( o ); + if( o["dropped"].trueValue() ) _collections.erase( o["_id"].String() ); + else _collections[o["_id"].String()] = CollectionInfo( o ); } conn.done(); @@ -369,7 +382,7 @@ namespace mongo { // 1 if ( ! configServer.allUp( errmsg ) ) { - log(1) << "\t DBConfig::dropDatabase not all up" << endl; + LOG(1) << "\t DBConfig::dropDatabase not all up" << endl; return 0; } @@ -392,7 +405,7 @@ namespace mongo { log() << "error removing from config server even after checking!" << endl; return 0; } - log(1) << "\t removed entry from config server for: " << _name << endl; + LOG(1) << "\t removed entry from config server for: " << _name << endl; set<Shard> allServers; @@ -428,7 +441,7 @@ namespace mongo { conn.done(); } - log(1) << "\t dropped primary db for: " << _name << endl; + LOG(1) << "\t dropped primary db for: " << _name << endl; configServer.logChange( "dropDatabase" , _name , BSONObj() ); return true; @@ -440,6 +453,7 @@ namespace mongo { while ( true ) { Collections::iterator i = _collections.begin(); for ( ; i != _collections.end(); ++i ) { + // log() << "coll : " << i->first << " and " << i->second.isSharded() << endl; if ( i->second.isSharded() ) break; } @@ -453,7 +467,7 @@ namespace mongo { } seen.insert( i->first ); - log(1) << "\t dropping sharded collection: " << i->first << endl; + LOG(1) << "\t dropping sharded collection: " << i->first << endl; i->second.getCM()->getAllShards( allServers ); i->second.getCM()->drop( i->second.getCM() ); @@ -461,7 +475,7 @@ namespace mongo { num++; uassert( 10184 , "_dropShardedCollections too many collections - bailing" , num < 100000 ); - log(2) << "\t\t dropped " << num << " so far" << endl; + LOG(2) << "\t\t dropped " << num << " so far" << endl; } return true; @@ -528,7 +542,7 @@ namespace mongo { string fullString; joinStringDelim( configHosts, &fullString, ',' ); _primary.setAddress( ConnectionString( fullString , ConnectionString::SYNC ) ); - log(1) << " config string : " << fullString << endl; + LOG(1) << " config string : " << fullString << endl; return true; } @@ -609,7 +623,7 @@ namespace mongo { if ( checkConsistency ) { string errmsg; if ( ! checkConfigServersConsistent( errmsg ) ) { - log( LL_ERROR ) << "config servers not in sync! " << errmsg << endl; + log( LL_ERROR ) << "config servers not in sync! " << errmsg << warnings; return false; } } @@ -672,7 +686,7 @@ namespace mongo { string name = o["_id"].valuestrsafe(); got.insert( name ); if ( name == "chunksize" ) { - log(1) << "MaxChunkSize: " << o["value"] << endl; + LOG(1) << "MaxChunkSize: " << o["value"] << endl; Chunk::MaxChunkSize = o["value"].numberInt() * 1024 * 1024; } else if ( name == "balancer" ) { @@ -746,7 +760,7 @@ namespace mongo { conn->createCollection( "config.changelog" , 1024 * 1024 * 10 , true ); } catch ( UserException& e ) { - log(1) << "couldn't create changelog (like race condition): " << e << endl; + LOG(1) << "couldn't create changelog (like race condition): " << e << endl; // don't care } createdCapped = true; diff --git a/s/config.h b/s/config.h index 6c8f8934aed..90c06cb0223 100644 --- a/s/config.h +++ b/s/config.h @@ -143,6 +143,7 @@ namespace mongo { bool isSharded( const string& ns ); ChunkManagerPtr getChunkManager( const string& ns , bool reload = false ); + ChunkManagerPtr getChunkManagerIfExists( const string& ns , bool reload = false ); /** * @return the correct for shard for the ns diff --git a/s/cursors.cpp b/s/cursors.cpp index c65cdb9f97b..e8aeffb1cb4 100644 --- a/s/cursors.cpp +++ b/s/cursors.cpp @@ -112,7 +112,7 @@ namespace mongo { } bool hasMore = sendMore && _cursor->more(); - log(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << getId() << " totalSent: " << _totalSent << endl; + LOG(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << getId() << " totalSent: " << _totalSent << endl; replyToQuery( 0 , r.p() , r.m() , b.buf() , b.len() , num , _totalSent , hasMore ? getId() : 0 ); _totalSent += num; @@ -131,13 +131,15 @@ namespace mongo { CursorCache::~CursorCache() { // TODO: delete old cursors? - int logLevel = 1; + bool print = logLevel > 0; if ( _cursors.size() || _refs.size() ) - logLevel = 0; - log( logLevel ) << " CursorCache at shutdown - " - << " sharded: " << _cursors.size() - << " passthrough: " << _refs.size() - << endl; + print = true; + + if ( print ) + cout << " CursorCache at shutdown - " + << " sharded: " << _cursors.size() + << " passthrough: " << _refs.size() + << endl; } ShardedClientCursorPtr CursorCache::get( long long id ) const { @@ -300,7 +302,7 @@ namespace mongo { help << " example: { cursorInfo : 1 }"; } virtual LockType locktype() const { return NONE; } - bool run(const string&, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string&, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { cursorCache.appendInfo( result ); if ( jsobj["setTimeout"].isNumber() ) CursorCache::TIMEOUT = jsobj["setTimeout"].numberLong(); diff --git a/s/d_logic.cpp b/s/d_logic.cpp index 5216b2e52ca..9d4fd74dd62 100644 --- a/s/d_logic.cpp +++ b/s/d_logic.cpp @@ -60,7 +60,7 @@ namespace mongo { return false; } - log(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl; + LOG(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl; if ( doesOpGetAResponse( op ) ) { assert( dbresponse ); @@ -97,8 +97,8 @@ namespace mongo { const OID& clientID = ShardedConnectionInfo::get(false)->getID(); massert( 10422 , "write with bad shard config and no server id!" , clientID.isSet() ); - log(1) << "got write with an old config - writing back ns: " << ns << endl; - if ( logLevel ) log(1) << m.toString() << endl; + LOG(1) << "got write with an old config - writing back ns: " << ns << endl; + if ( logLevel ) LOG(1) << m.toString() << endl; BSONObjBuilder b; b.appendBool( "writeBack" , true ); @@ -109,7 +109,7 @@ namespace mongo { b.appendTimestamp( "version" , shardingState.getVersion( ns ) ); b.appendTimestamp( "yourVersion" , ShardedConnectionInfo::get( true )->getVersion( ns ) ); b.appendBinData( "msg" , m.header()->len , bdtCustom , (char*)(m.singleData()) ); - log(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl; + LOG(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl; writeBackManager.queueWriteBack( clientID.str() , b.obj() ); return true; diff --git a/s/d_migrate.cpp b/s/d_migrate.cpp index 740a3148771..e24a02d3538 100644 --- a/s/d_migrate.cpp +++ b/s/d_migrate.cpp @@ -156,13 +156,28 @@ namespace mongo { string toString() const { return str::stream() << ns << " from " << min << " -> " << max; } - + void doRemove() { ShardForceVersionOkModeBlock sf; - writelock lk(ns); - RemoveSaver rs("moveChunk",ns,"post-cleanup"); - long long num = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 ); - log() << "moveChunk deleted: " << num << migrateLog; + { + writelock lk(ns); + RemoveSaver rs("moveChunk",ns,"post-cleanup"); + long long numDeleted = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 ); + log() << "moveChunk deleted: " << numDeleted << migrateLog; + } + + ReplTime lastOpApplied = cc().getLastOp(); + + Timer t; + for ( int i=0; i<3600; i++ ) { + if ( opReplicatedEnough( lastOpApplied , ( getSlaveCount() / 2 ) + 1 ) ) { + LOG(t.seconds() < 30 ? 1 : 0) << "moveChunk repl sync took " << t.seconds() << " seconds" << migrateLog; + return; + } + sleepsecs(1); + } + + warning() << "moveChunk repl sync timed out after " << t.seconds() << " seconds" << migrateLog; } }; @@ -646,7 +661,7 @@ namespace mongo { public: TransferModsCommand() : ChunkCommandHelper( "_transferMods" ) {} - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { return migrateFromStatus.transferMods( errmsg, result ); } } transferModsCommand; @@ -656,7 +671,7 @@ namespace mongo { public: InitialCloneCommand() : ChunkCommandHelper( "_migrateClone" ) {} - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { return migrateFromStatus.clone( errmsg, result ); } } initialCloneCommand; @@ -680,7 +695,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // 1. parse options // 2. make sure my view is complete and lock // 3. start migrate @@ -1064,7 +1079,7 @@ namespace mongo { preCond.done(); BSONObj cmd = cmdBuilder.obj(); - log(7) << "moveChunk update: " << cmd << migrateLog; + LOG(7) << "moveChunk update: " << cmd << migrateLog; bool ok = false; BSONObj cmdResult; @@ -1177,7 +1192,7 @@ namespace mongo { class MigrateStatus { public: - + MigrateStatus() : m_active("MigrateStatus") { active = false; } void prepare() { @@ -1345,9 +1360,19 @@ namespace mongo { timing.done(4); } + { + // pause to wait for replication + // this will prevent us from going into critical section until we're ready + Timer t; + while ( t.minutes() < 600 ) { + if ( flushPendingWrites( lastOpApplied ) ) + break; + sleepsecs(1); + } + } + { // 5. wait for commit - Timer timeWaitingForCommit; state = STEADY; while ( state == STEADY || state == COMMIT_START ) { @@ -1371,17 +1396,16 @@ namespace mongo { if ( state == COMMIT_START ) { if ( flushPendingWrites( lastOpApplied ) ) break; - - if ( timeWaitingForCommit.seconds() > 86400 ) { - state = FAIL; - errmsg = "timed out waiting for commit"; - return; - } } sleepmillis( 10 ); } + if ( state == FAIL ) { + errmsg = "imted out waiting for commit"; + return; + } + timing.done(5); } @@ -1516,12 +1540,14 @@ namespace mongo { return false; state = COMMIT_START; - // we wait 5 minutes for the commit to succeed before giving up - for ( int i=0; i<5*60*1000; i++ ) { + Timer t; + // we wait for the commit to succeed before giving up + while ( t.minutes() <= 5 ) { sleepmillis(1); if ( state == DONE ) return true; } + state = FAIL; log() << "startCommit never finished!" << migrateLog; return false; } @@ -1571,7 +1597,7 @@ namespace mongo { virtual LockType locktype() const { return WRITE; } // this is so don't have to do locking internally - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { if ( migrateStatus.getActive() ) { errmsg = "migrate already in progress"; @@ -1608,7 +1634,7 @@ namespace mongo { public: RecvChunkStatusCommand() : ChunkCommandHelper( "_recvChunkStatus" ) {} - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { migrateStatus.status( result ); return 1; } @@ -1619,7 +1645,7 @@ namespace mongo { public: RecvChunkCommitCommand() : ChunkCommandHelper( "_recvChunkCommit" ) {} - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { bool ok = migrateStatus.startCommit(); migrateStatus.status( result ); return ok; @@ -1631,7 +1657,7 @@ namespace mongo { public: RecvChunkAbortCommand() : ChunkCommandHelper( "_recvChunkAbort" ) {} - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { migrateStatus.abort(); migrateStatus.status( result ); return true; @@ -1653,7 +1679,7 @@ namespace mongo { assert( ! isInRange( BSON( "x" << 5 ) , min , max ) ); assert( ! isInRange( BSON( "x" << 6 ) , min , max ) ); - log(1) << "isInRangeTest passed" << migrateLog; + LOG(1) << "isInRangeTest passed" << migrateLog; } } isInRangeTest; } diff --git a/s/d_split.cpp b/s/d_split.cpp index 64fc4cb42e4..cef6188a2bb 100644 --- a/s/d_split.cpp +++ b/s/d_split.cpp @@ -57,7 +57,7 @@ namespace mongo { "example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n" "NOTE: This command may take a while to run"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { const char *ns = jsobj.getStringField( "medianKey" ); BSONObj min = jsobj.getObjectField( "min" ); BSONObj max = jsobj.getObjectField( "max" ); @@ -136,7 +136,7 @@ namespace mongo { help << "Internal command.\n"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { const char* ns = jsobj.getStringField( "checkShardingIndex" ); BSONObj keyPattern = jsobj.getObjectField( "keyPattern" ); @@ -177,6 +177,11 @@ namespace mongo { return false; } + if( d->isMultikey( d->idxNo( *idx ) ) ) { + errmsg = "index is multikey, cannot use for sharding"; + return false; + } + BtreeCursor * bc = BtreeCursor::make( d , d->idxNo(*idx) , *idx , min , max , false , 1 ); shared_ptr<Cursor> c( bc ); auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) ); @@ -248,7 +253,7 @@ namespace mongo { "NOTE: This command may take a while to run"; } - bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { // // 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get @@ -524,7 +529,7 @@ namespace mongo { virtual bool adminOnly() const { return true; } virtual LockType locktype() const { return NONE; } - bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { + bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) { // // 1. check whether parameters passed to splitChunk are sound @@ -686,7 +691,7 @@ namespace mongo { BSONObjBuilder logDetail; origChunk.appendShortVersion( "before" , logDetail ); - log(1) << "before split on " << origChunk << endl; + LOG(1) << "before split on " << origChunk << endl; vector<ChunkInfo> newChunks; ShardChunkVersion myVersion = maxVersion; diff --git a/s/d_state.cpp b/s/d_state.cpp index 409820047b0..f43865b222e 100644 --- a/s/d_state.cpp +++ b/s/d_state.cpp @@ -288,7 +288,7 @@ namespace mongo { ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ) { ShardedConnectionInfo* info = _tl.get(); if ( ! info && create ) { - log(1) << "entering shard mode for connection" << endl; + LOG(1) << "entering shard mode for connection" << endl; info = new ShardedConnectionInfo(); _tl.reset( info ); } @@ -316,7 +316,7 @@ namespace mongo { void ShardedConnectionInfo::addHook() { static bool done = false; if (!done) { - log(1) << "adding sharding hook" << endl; + LOG(1) << "adding sharding hook" << endl; pool.addHook(new ShardingConnectionHook(false)); done = true; } @@ -380,7 +380,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { ShardedConnectionInfo::reset(); return true; } @@ -452,7 +452,7 @@ namespace mongo { return true; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { // Steps // 1. check basic config @@ -613,7 +613,7 @@ namespace mongo { virtual LockType locktype() const { return NONE; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { string ns = cmdObj["getShardVersion"].valuestrsafe(); if ( ns.size() == 0 ) { errmsg = "need to specify full namespace"; @@ -642,7 +642,7 @@ namespace mongo { virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { shardingState.appendInfo( result ); return true; } diff --git a/s/d_writeback.cpp b/s/d_writeback.cpp index 6839fc4b1ca..01c0c14ac0a 100644 --- a/s/d_writeback.cpp +++ b/s/d_writeback.cpp @@ -129,7 +129,7 @@ namespace mongo { void help(stringstream& h) const { h<<"internal"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { BSONElement e = cmdObj.firstElement(); if ( e.type() != jstOID ) { @@ -144,7 +144,7 @@ namespace mongo { // we want to do return at least at every 5 minutes so sockets don't timeout BSONObj z; if ( writeBackManager.getWritebackQueue(id.str())->queue.blockingPop( z, 5 * 60 /* 5 minutes */ ) ) { - log(1) << "WriteBackCommand got : " << z << endl; + LOG(1) << "WriteBackCommand got : " << z << endl; result.append( "data" , z ); } else { @@ -168,7 +168,7 @@ namespace mongo { << "This is an internal command"; } - bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) { + bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) { writeBackManager.appendStats( result ); return true; } diff --git a/s/grid.cpp b/s/grid.cpp index 6141e061be6..3756e131a6a 100644 --- a/s/grid.cpp +++ b/s/grid.cpp @@ -125,6 +125,8 @@ namespace mongo { name = &nameInternal; } + ReplicaSetMonitorPtr rsMonitor; + // Check whether the host (or set) exists and run several sanity checks on this request. // There are two set of sanity checks: making sure adding this particular shard is consistent // with the replica set state (if it exists) and making sure this shards databases can be @@ -140,7 +142,7 @@ namespace mongo { errMsg = "can't use sync cluster as a shard. for replica set, have to use <setname>/<server1>,<server2>,..."; return false; } - + BSONObj resIsMongos; bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos ); @@ -264,6 +266,9 @@ namespace mongo { } } + if ( newShardConn->type() == ConnectionString::SET ) + rsMonitor = ReplicaSetMonitor::get( setName ); + newShardConn.done(); } catch ( DBException& e ) { @@ -295,7 +300,7 @@ namespace mongo { // build the ConfigDB shard document BSONObjBuilder b; b.append( "_id" , *name ); - b.append( "host" , servers.toString() ); + b.append( "host" , rsMonitor ? rsMonitor->getServerAddress() : servers.toString() ); if ( maxSize > 0 ) { b.append( ShardFields::maxSize.name() , maxSize ); } @@ -508,7 +513,7 @@ namespace mongo { assert( Grid::_inBalancingWindow( w8 , now ) ); assert( Grid::_inBalancingWindow( w9 , now ) ); - log(1) << "BalancingWidowObjTest passed" << endl; + LOG(1) << "BalancingWidowObjTest passed" << endl; } } BalancingWindowObjTest; diff --git a/s/request.cpp b/s/request.cpp index cda75f63a17..36488cb5617 100644 --- a/s/request.cpp +++ b/s/request.cpp @@ -43,7 +43,12 @@ namespace mongo { _clientInfo = ClientInfo::get(); _clientInfo->newRequest( p ); + } + void Request::checkAuth() const { + char cl[256]; + nsToDatabase(getns(), cl); + uassert(15845, "unauthorized", _clientInfo->getAuthenticationInfo()->isAuthorized(cl)); } void Request::init() { @@ -60,17 +65,21 @@ namespace mongo { uassert( 13644 , "can't use 'local' database through mongos" , ! str::startsWith( getns() , "local." ) ); - _config = grid.getDBConfig( getns() ); + const string nsStr (getns()); // use in functions taking string rather than char* + + _config = grid.getDBConfig( nsStr ); if ( reload ) { - if ( _config->isSharded( getns() ) ) - _config->getChunkManager( getns() , true ); + if ( _config->isSharded( nsStr ) ) + _config->getChunkManager( nsStr , true ); else _config->reload(); } - if ( _config->isSharded( getns() ) ) { - _chunkManager = _config->getChunkManager( getns() , reload ); - uassert( 10193 , (string)"no shard info for: " + getns() , _chunkManager ); + if ( _config->isSharded( nsStr ) ) { + _chunkManager = _config->getChunkManager( nsStr , reload ); + // TODO: All of these uasserts are no longer necessary, getChunkManager() throws when + // not returning the right value. + uassert( 10193 , (string)"no shard info for: " + nsStr , _chunkManager ); } else { _chunkManager.reset(); @@ -104,7 +113,7 @@ namespace mongo { } - log(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.header()->id) << " attempt: " << attempt << endl; + LOG(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.header()->id) << " attempt: " << attempt << endl; Strategy * s = SINGLE; _counter = &opsNonSharded; @@ -138,10 +147,7 @@ namespace mongo { s->getMore( *this ); } else { - char cl[256]; - nsToDatabase(getns(), cl); - uassert(15845, "unauthorized", _clientInfo->getAuthenticationInfo()->isAuthorized(cl)); - + checkAuth(); s->writeOp( op, *this ); } diff --git a/s/request.h b/s/request.h index 6645ed9a092..86a484e378b 100644 --- a/s/request.h +++ b/s/request.h @@ -70,6 +70,8 @@ namespace mongo { return _clientInfo; } + void checkAuth() const; + // ---- remote location info ----- diff --git a/s/s_only.cpp b/s/s_only.cpp index 4afa9008f71..6449b34ad81 100644 --- a/s/s_only.cpp +++ b/s/s_only.cpp @@ -91,7 +91,7 @@ namespace mongo { } string errmsg; - int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl ); + int ok = c->run( dbname , cmdObj , queryOptions, errmsg , result , fromRepl ); if ( ! ok ) result.append( "errmsg" , errmsg ); return ok; diff --git a/s/security.cpp b/s/security.cpp index e27e68f4dcf..6cb9da624be 100644 --- a/s/security.cpp +++ b/s/security.cpp @@ -94,7 +94,7 @@ namespace mongo { return false; } - bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) { + bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { AuthenticationInfo *ai = ClientInfo::get()->getAuthenticationInfo(); ai->logout(dbname); return true; diff --git a/s/server.cpp b/s/server.cpp index 1ca2e4c9d7e..a6ffab96f29 100644 --- a/s/server.cpp +++ b/s/server.cpp @@ -26,6 +26,7 @@ #include "../util/ramlog.h" #include "../util/signal_handlers.h" #include "../util/admin_access.h" +#include "../util/concurrency/task.h" #include "../db/dbwebserver.h" #include "../scripting/engine.h" @@ -94,7 +95,7 @@ namespace mongo { r.process(); } catch ( AssertionException & e ) { - log( e.isUserAssertion() ? 1 : 0 ) << "AssertionException in process: " << e.what() << endl; + log( e.isUserAssertion() ? 1 : 0 ) << "AssertionException while processing op type : " << m.operation() << " to : " << r.getns() << causedBy(e) << endl; le->raiseError( e.getCode() , e.what() ); @@ -158,9 +159,6 @@ namespace mongo { cursorCache.startTimeoutThread(); PeriodicTask::theRunner->go(); - log() << "waiting for connections on port " << cmdLine.port << endl; - //DbGridListener l(port); - //l.listen(); ShardedMessageHandler handler; MessageServer * server = createServer( opts , &handler ); server->setAsTimeTracker(); @@ -321,6 +319,16 @@ int _main(int argc, char* argv[]) { return 8; } + { + class CheckConfigServers : public task::Task { + virtual string name() const { return "CheckConfigServers"; } + virtual void doWork() { configServer.ok(true); } + }; + static CheckConfigServers checkConfigServers; + + task::repeat(&checkConfigServers, 60*1000); + } + int configError = configServer.checkConfigVersion( params.count( "upgrade" ) ); if ( configError ) { if ( configError > 0 ) { diff --git a/s/shard.cpp b/s/shard.cpp index dfd707857da..75326e047fc 100644 --- a/s/shard.cpp +++ b/s/shard.cpp @@ -235,7 +235,7 @@ namespace mongo { virtual bool slaveOk() const { return true; } virtual bool adminOnly() const { return true; } - virtual bool run(const string&, mongo::BSONObj&, std::string& errmsg , mongo::BSONObjBuilder& result, bool) { + virtual bool run(const string&, mongo::BSONObj&, int, std::string& errmsg , mongo::BSONObjBuilder& result, bool) { return staticShardInfo.getShardMap( result , errmsg ); } } cmdGetShardMap; @@ -346,7 +346,7 @@ namespace mongo { best = t; } - log(1) << "best shard for new allocation is " << best << endl; + LOG(1) << "best shard for new allocation is " << best << endl; return best.shard(); } @@ -360,7 +360,7 @@ namespace mongo { void ShardingConnectionHook::onCreate( DBClientBase * conn ) { if( !noauth ) { string err; - log(2) << "calling onCreate auth for " << conn->toString() << endl; + LOG(2) << "calling onCreate auth for " << conn->toString() << endl; uassert( 15847, "can't authenticate to shard server", conn->auth("local", internalSecurity.user, internalSecurity.pwd, err, false)); } diff --git a/s/shard_version.cpp b/s/shard_version.cpp index 01447749ac9..4f84b0ae61e 100644 --- a/s/shard_version.cpp +++ b/s/shard_version.cpp @@ -96,7 +96,7 @@ namespace mongo { ChunkManagerPtr manager; const bool isSharded = conf->isSharded( ns ); if ( isSharded ) { - manager = conf->getChunkManager( ns , authoritative ); + manager = conf->getChunkManagerIfExists( ns , authoritative ); // It's possible the chunk manager was reset since we checked whether sharded was true, // so must check this here. if( manager ) officialSequenceNumber = manager->getSequenceNumber(); @@ -139,8 +139,14 @@ namespace mongo { } if ( result["reloadConfig"].trueValue() ) { - // reload config - conf->getChunkManager( ns , true ); + if( result["version"].timestampTime() == 0 ){ + // reload db + conf->reload(); + } + else { + // reload config + conf->getChunkManager( ns , true ); + } } const int maxNumTries = 7; diff --git a/s/shardkey.cpp b/s/shardkey.cpp index 9602b8566e5..d6c8eda1ae1 100644 --- a/s/shardkey.cpp +++ b/s/shardkey.cpp @@ -55,7 +55,8 @@ namespace mongo { */ for(set<string>::const_iterator it = patternfields.begin(); it != patternfields.end(); ++it) { - if(obj.getFieldDotted(it->c_str()).eoo()) + BSONElement e = obj.getFieldDotted(it->c_str()); + if(e.eoo() || e.type() == Array) return false; } return true; @@ -83,7 +84,7 @@ namespace mongo { vector<const char*> keysToMove; keysToMove.push_back("_id"); BSONForEach(e, pattern) { - if (strchr(e.fieldName(), '.') == NULL) + if (strchr(e.fieldName(), '.') == NULL && strcmp(e.fieldName(), "_id") != 0) keysToMove.push_back(e.fieldName()); } @@ -185,8 +186,8 @@ namespace mongo { ShardKeyPattern k( fromjson("{a:1,'sub.b':-1,'sub.c':1}") ); BSONObj x = fromjson("{a:1,'sub.b':2,'sub.c':3}"); - assert( k.extractKey( fromjson("{a:1,sub:{b:2,c:3}}") ).shallowEqual(x) ); - assert( k.extractKey( fromjson("{sub:{b:2,c:3},a:1}") ).shallowEqual(x) ); + assert( k.extractKey( fromjson("{a:1,sub:{b:2,c:3}}") ).binaryEqual(x) ); + assert( k.extractKey( fromjson("{sub:{b:2,c:3},a:1}") ).binaryEqual(x) ); } void moveToFrontTest() { ShardKeyPattern sk (BSON("a" << 1 << "b" << 1)); @@ -194,13 +195,13 @@ namespace mongo { BSONObj ret; ret = sk.moveToFront(BSON("z" << 1 << "_id" << 1 << "y" << 1 << "a" << 1 << "x" << 1 << "b" << 1 << "w" << 1)); - assert(ret.shallowEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1))); + assert(ret.binaryEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1))); ret = sk.moveToFront(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)); - assert(ret.shallowEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1))); + assert(ret.binaryEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1))); ret = sk.moveToFront(BSON("z" << 1 << "y" << 1 << "a" << 1 << "b" << 1 << "Z" << 1 << "Y" << 1)); - assert(ret.shallowEqual(BSON("a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "Z" << 1 << "Y" << 1))); + assert(ret.binaryEqual(BSON("a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "Z" << 1 << "Y" << 1))); } @@ -263,7 +264,7 @@ namespace mongo { moveToFrontBenchmark(100); } - log(1) << "shardKeyTest passed" << endl; + LOG(1) << "shardKeyTest passed" << endl; } } shardKeyTest; diff --git a/s/shardkey.h b/s/shardkey.h index 96301ffe093..976cff09591 100644 --- a/s/shardkey.h +++ b/s/shardkey.h @@ -102,7 +102,21 @@ namespace mongo { }; inline BSONObj ShardKeyPattern::extractKey(const BSONObj& from) const { - BSONObj k = from.extractFields(pattern); + BSONObj k = from; + bool needExtraction = false; + + BSONObjIterator a(from); + BSONObjIterator b(pattern); + while (a.more() && b.more()){ + if (strcmp(a.next().fieldName(), b.next().fieldName()) != 0){ + needExtraction = true; + break; + } + } + + if (needExtraction || a.more() != b.more()) + k = from.extractFields(pattern); + uassert(13334, "Shard Key must be less than 512 bytes", k.objsize() < 512); return k; } diff --git a/s/strategy.cpp b/s/strategy.cpp index b48a718b49d..4230b7fac92 100644 --- a/s/strategy.cpp +++ b/s/strategy.cpp @@ -38,7 +38,7 @@ namespace mongo { conn.donotCheckVersion(); else if ( conn.setVersion() ) { conn.done(); - throw StaleConfigException( r.getns() , "doWRite" , true ); + throw StaleConfigException( r.getns() , "doWrite" , true ); } conn->say( r.m() ); conn.done(); @@ -46,6 +46,8 @@ namespace mongo { void Strategy::doQuery( Request& r , const Shard& shard ) { + r.checkAuth(); + ShardConnection dbcon( shard , r.getns() ); DBClientBase &c = dbcon.conn(); diff --git a/s/strategy_shard.cpp b/s/strategy_shard.cpp index 12d2049cdbf..c6b30e7965f 100644 --- a/s/strategy_shard.cpp +++ b/s/strategy_shard.cpp @@ -35,7 +35,9 @@ namespace mongo { virtual void queryOp( Request& r ) { QueryMessage q( r.d() ); - log(3) << "shard query: " << q.ns << " " << q.query << endl; + r.checkAuth(); + + LOG(3) << "shard query: " << q.ns << " " << q.query << endl; if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") ) throw UserException( 8010 , "something is wrong, shouldn't see a command here" ); @@ -73,7 +75,7 @@ namespace mongo { try { cursor->init(); - log(5) << " cursor type: " << cursor->type() << endl; + LOG(5) << " cursor type: " << cursor->type() << endl; shardedCursorTypes.hit( cursor->type() ); if ( query.isExplain() ) { @@ -92,7 +94,7 @@ namespace mongo { if ( ! cc->sendNextBatch( r ) ) { return; } - log(6) << "storing cursor : " << cc->getId() << endl; + LOG(6) << "storing cursor : " << cc->getId() << endl; cursorCache.store( cc ); } @@ -100,11 +102,11 @@ namespace mongo { int ntoreturn = r.d().pullInt(); long long id = r.d().pullInt64(); - log(6) << "want cursor : " << id << endl; + LOG(6) << "want cursor : " << id << endl; ShardedClientCursorPtr cursor = cursorCache.get( id ); if ( ! cursor ) { - log(6) << "\t invalid cursor :(" << endl; + LOG(6) << "\t invalid cursor :(" << endl; replyToQuery( ResultFlag_CursorNotFound , r.p() , r.m() , 0 , 0 , 0 ); return; } @@ -121,7 +123,7 @@ namespace mongo { void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) { const int flags = d.reservedField(); - bool keepGoing = flags & InsertOption_KeepGoing; // modified before assertion if should abort + bool keepGoing = flags & InsertOption_ContinueOnError; // modified before assertion if should abort while ( d.moreJSObjs() ) { try { @@ -139,8 +141,8 @@ namespace mongo { } if ( bad ) { - log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl; - uasserted( 8011 , "tried to insert object without shard key" ); + log() << "tried to insert object with no valid shard key: " << r.getns() << " " << o << endl; + uasserted( 8011 , "tried to insert object with no valid shard key" ); } } @@ -154,7 +156,7 @@ namespace mongo { for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); - log(4) << " server:" << c->getShard().toString() << " " << o << endl; + LOG(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , r.getns() , o , flags); r.gotInsert(); @@ -167,20 +169,20 @@ namespace mongo { int logLevel = i < ( maxTries / 2 ); LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; r.reset(); - - unsigned long long old = manager->getSequenceNumber(); - manager = r.getChunkManager(); - - LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl; - if (!manager) { + manager = r.getChunkManager(); + if( ! manager ) { keepGoing = false; uasserted(14804, "collection no longer sharded"); } + + unsigned long long old = manager->getSequenceNumber(); + + LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl; } sleepmillis( i * 20 ); } - + assert( inShutdown() || gotThrough ); // not caught below } catch (const UserException&){ if (!keepGoing || !d.moreJSObjs()){ @@ -208,8 +210,8 @@ namespace mongo { } if ( bad ) { - log() << "tried to insert object without shard key: " << nsChunkLookup << " " << o << endl; - uasserted( 14842 , "tried to insert object without shard key" ); + log() << "tried to insert object with no valid shard key: " << nsChunkLookup << " " << o << endl; + uasserted( 14842 , "tried to insert object with no valid shard key" ); } } @@ -222,7 +224,7 @@ namespace mongo { for ( int i=0; i<maxTries; i++ ) { try { ChunkPtr c = manager->findChunk( o ); - log(4) << " server:" << c->getShard().toString() << " " << o << endl; + LOG(4) << " server:" << c->getShard().toString() << " " << o << endl; insert( c->getShard() , ns , o , flags, safe); break; } @@ -231,7 +233,7 @@ namespace mongo { LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl; unsigned long long old = manager->getSequenceNumber(); - manager = conf->getChunkManager(ns); + manager = conf->getChunkManagerIfExists(ns); LOG( logLevel ) << " sequenece number - old: " << old << " new: " << manager->getSequenceNumber() << endl; @@ -256,7 +258,7 @@ namespace mongo { bool multi = flags & UpdateOption_Multi; if (upsert) { - uassert(8012, "can't upsert something without shard key", + uassert(8012, "can't upsert something without valid shard key", (manager->hasShardKey(toupdate) || (toupdate.firstElementFieldName()[0] == '$' && manager->hasShardKey(query)))); @@ -271,7 +273,8 @@ namespace mongo { if ( multi ) { } else if ( strcmp( query.firstElementFieldName() , "_id" ) || query.nFields() != 1 ) { - throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" ); + log() << "Query " << query << endl; + throw UserException( 8013 , "can't do non-multi update with query that doesn't have a valid shard key" ); } else { save = true; @@ -304,7 +307,7 @@ namespace mongo { } else { uasserted(12376, - str::stream() << "shard key must be in update object for collection: " << manager->getns() ); + str::stream() << "valid shard key must be in update object for collection: " << manager->getns() ); } } @@ -349,7 +352,7 @@ namespace mongo { bool multi = flags & UpdateOption_Multi; if (upsert) { - uassert(14854, "can't upsert something without shard key", + uassert(14854, "can't upsert something without valid shard key", (manager->hasShardKey(toupdate) || (toupdate.firstElementFieldName()[0] == '$' && manager->hasShardKey(query)))); @@ -364,7 +367,7 @@ namespace mongo { if ( multi ) { } else if ( strcmp( query.firstElementFieldName() , "_id" ) || query.nFields() != 1 ) { - throw UserException( 14850 , "can't do non-multi update with query that doesn't have the shard key" ); + throw UserException( 14850 , "can't do non-multi update with query that doesn't have a valid shard key" ); } else { save = true; @@ -397,7 +400,7 @@ namespace mongo { } else { uasserted(14857, - str::stream() << "shard key must be in update object for collection: " << manager->getns() ); + str::stream() << "valid shard key must be in update object for collection: " << manager->getns() ); } } @@ -447,7 +450,7 @@ namespace mongo { while ( true ) { try { manager->getShardsForQuery( shards , pattern ); - log(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl; + LOG(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl; if ( shards.size() == 1 ) { doWrite( dbDelete , r , *shards.begin() ); return; @@ -479,7 +482,7 @@ namespace mongo { virtual void writeOp( int op , Request& r ) { const char *ns = r.getns(); - log(3) << "write: " << ns << endl; + LOG(3) << "write: " << ns << endl; DbMessage& d = r.d(); ChunkManagerPtr info = r.getChunkManager(); diff --git a/s/strategy_single.cpp b/s/strategy_single.cpp index b3eef9dafa4..012be5fb3dd 100644 --- a/s/strategy_single.cpp +++ b/s/strategy_single.cpp @@ -36,7 +36,7 @@ namespace mongo { virtual void queryOp( Request& r ) { QueryMessage q( r.d() ); - log(3) << "single query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn << endl; + LOG(3) << "single query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn << " options : " << q.queryOptions << endl; if ( r.isCommand() ) { @@ -55,7 +55,7 @@ namespace mongo { : str::equals("query", e.fieldName()))) cmdObj = e.embeddedObject(); } - bool ok = Command::runAgainstRegistered(q.ns, cmdObj, builder); + bool ok = Command::runAgainstRegistered(q.ns, cmdObj, builder, q.queryOptions); if ( ok ) { BSONObj x = builder.done(); replyToQuery(0, r.p(), r.m(), x); @@ -161,12 +161,12 @@ namespace mongo { if ( r.isShardingEnabled() && strstr( ns , ".system.indexes" ) == strchr( ns , '.' ) && strchr( ns , '.' ) ) { - log(1) << " .system.indexes write for: " << ns << endl; + LOG(1) << " .system.indexes write for: " << ns << endl; handleIndexWrite( op , r ); return; } - log(3) << "single write: " << ns << endl; + LOG(3) << "single write: " << ns << endl; doWrite( op , r , r.primaryShard() ); r.gotInsert(); // Won't handle mulit-insert correctly. Not worth parsing the request. } diff --git a/s/writeback_listener.cpp b/s/writeback_listener.cpp index 81f75988a6e..5f320d3921f 100644 --- a/s/writeback_listener.cpp +++ b/s/writeback_listener.cpp @@ -117,7 +117,7 @@ namespace mongo { while ( ! inShutdown() ) { if ( ! Shard::isAShardNode( _addr ) ) { - log(1) << _addr << " is not a shard node" << endl; + LOG(1) << _addr << " is not a shard node" << endl; sleepsecs( 60 ); continue; } @@ -216,7 +216,10 @@ namespace mongo { if ( gle["code"].numberInt() == 9517 ) { log() << "writeback failed because of stale config, retrying attempts: " << attempts << endl; - db->getChunkManager( ns , true ); + if( ! db->getChunkManagerIfExists( ns , true ) ){ + uassert( 15884, str::stream() << "Could not reload chunk manager after " << attempts << " attempts.", attempts <= 4 ); + sleepsecs( attempts - 1 ); + } continue; } diff --git a/scripting/bench.cpp b/scripting/bench.cpp index 1ac7f04a55e..9ada7d6495c 100644 --- a/scripting/bench.cpp +++ b/scripting/bench.cpp @@ -142,7 +142,7 @@ namespace mongo { conn->remove( ns , fixQuery( e["query"].Obj() ) ); } else if ( op == "update" ) { - conn->update( ns , fixQuery( e["query"].Obj() ) , e["update"].Obj() ); + conn->update( ns , fixQuery( e["query"].Obj() ) , e["update"].Obj() , e["upsert"].trueValue() ); } else { log() << "don't understand op: " << op << endl; @@ -1,25 +1,25 @@ -/** @file server.h
-
- This file contains includes commonly needed in the server files (mongod, mongos, test). It is NOT included in the C++ client.
-
- Over time we should move more here, and more out of pch.h. And get rid of pch.h at some point.
-*/
-
-// todo is there a boost thign for this already?
-
-#pragma once
-
-#include "bson/inline_decls.h"
-
-/* Note: do not clutter code with these -- ONLY use in hot spots / significant loops. */
-
-// branch prediction. indicate we expect to enter the if statement body
-#define IF MONGOIF
-
-// branch prediction. indicate we expect to not enter the if statement body
-#define _IF MONGO_IF
-
-// prefetch data from memory
-#define PREFETCH MONGOPREFETCH
-
-using namespace bson;
+/** @file server.h + + This file contains includes commonly needed in the server files (mongod, mongos, test). It is NOT included in the C++ client. + + Over time we should move more here, and more out of pch.h. And get rid of pch.h at some point. +*/ + +// todo is there a boost thign for this already? + +#pragma once + +#include "bson/inline_decls.h" + +/* Note: do not clutter code with these -- ONLY use in hot spots / significant loops. */ + +// branch prediction. indicate we expect to be true +#define likely MONGO_likely + +// branch prediction. indicate we expect to be false +#define unlikely MONGO_unlikely + +// prefetch data from memory +#define PREFETCH MONGOPREFETCH + +using namespace bson; diff --git a/shell/collection.js b/shell/collection.js index cf8f5ce19c1..862a0a11440 100644 --- a/shell/collection.js +++ b/shell/collection.js @@ -120,7 +120,7 @@ DBCollection.prototype._validateObject = function( o ){ throw "can't save a DBQuery object"; } -DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 }; +DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 , $MinKey : 1, $MaxKey : 1 }; DBCollection.prototype._validateForStorage = function( o ){ this._validateObject( o ); diff --git a/shell/dbshell.cpp b/shell/dbshell.cpp index 8db622732dc..f3122c797d5 100644 --- a/shell/dbshell.cpp +++ b/shell/dbshell.cpp @@ -403,6 +403,8 @@ string finishCode( string code ) { return ""; if ( ! line ) return ""; + if ( code.find("\n\n") != string::npos ) // cancel multiline if two blank lines are entered + return ";"; while (startsWith(line, "... ")) line += 4; @@ -504,6 +506,9 @@ int _main(int argc, char* argv[]) { ("version", "show version information") ("verbose", "increase verbosity") ("ipv6", "enable IPv6 support (disabled by default)") +#ifdef MONGO_SSL + ("ssl", "use all for connections") +#endif ; hidden_options.add_options() @@ -572,6 +577,11 @@ int _main(int argc, char* argv[]) { if (params.count("quiet")) { mongo::cmdLine.quiet = true; } +#ifdef MONGO_SSL + if (params.count("ssl")) { + mongo::cmdLine.sslOnNormalPorts = true; + } +#endif if (params.count("nokillop")) { mongo::shellUtils::_nokillop = true; } @@ -579,6 +589,8 @@ int _main(int argc, char* argv[]) { autoKillOp = true; } + + /* This is a bit confusing, here are the rules: * * if nodb is set then all positional parameters are files diff --git a/shell/mongo.js b/shell/mongo.js index e129784bf66..25357691c51 100644 --- a/shell/mongo.js +++ b/shell/mongo.js @@ -24,8 +24,9 @@ if ( typeof mongoInject == "function" ){ mongoInject( Mongo.prototype ); } -Mongo.prototype.setSlaveOk = function() { - this.slaveOk = true; +Mongo.prototype.setSlaveOk = function( value ) { + if( value == undefined ) value = true + this.slaveOk = value } Mongo.prototype.getDB = function( name ){ @@ -43,6 +44,10 @@ Mongo.prototype.adminCommand = function( cmd ){ return this.getDB( "admin" ).runCommand( cmd ); } +Mongo.prototype.setLogLevel = function( logLevel ){ + return this.adminCommand({ setParameter : 1, logLevel : logLevel }) +} + Mongo.prototype.getDBNames = function(){ return this.getDBs().databases.map( function(z){ diff --git a/shell/mongo_vstudio.cpp b/shell/mongo_vstudio.cpp index ea0b2cd4b20..2fbb6d908b5 100644 --- a/shell/mongo_vstudio.cpp +++ b/shell/mongo_vstudio.cpp @@ -89,6 +89,26 @@ const StringData _jscode_raw_utils = "doassert( \"[\" + a + \"] != [\" + b + \"] are equal : \" + msg );\n" "}\n" "\n" +"assert.contains = function( o, arr, msg ){\n" +"var wasIn = false\n" +"\n" +"if( ! arr.length ){\n" +"for( i in arr ){\n" +"wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) )\n" +"return;\n" +"if( wasIn ) break\n" +"}\n" +"}\n" +"else {\n" +"for( var i = 0; i < arr.length; i++ ){\n" +"wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) )\n" +"if( wasIn ) break\n" +"}\n" +"}\n" +"\n" +"if( ! wasIn ) doassert( tojson( o ) + \" was not in \" + tojson( arr ) + \" : \" + msg )\n" +"}\n" +"\n" "assert.repeat = function( f, msg, timeout, interval ) {\n" "if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" "\n" @@ -216,6 +236,18 @@ const StringData _jscode_raw_utils = "doassert( a + \" is not greater than or eq \" + b + \" : \" + msg );\n" "}\n" "\n" +"assert.between = function( a, b, c, msg, inclusive ){\n" +"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n" +"\n" +"if( ( inclusive == undefined || inclusive == true ) &&\n" +"a <= b && b <= c ) return;\n" +"else if( a < b && b < c ) return;\n" +"\n" +"doassert( b + \" is not between \" + a + \" and \" + c + \" : \" + msg );\n" +"}\n" +"\n" +"assert.betweenIn = function( a, b, c, msg ){ assert.between( a, b, c, msg, true ) }\n" +"assert.betweenEx = function( a, b, c, msg ){ assert.between( a, b, c, msg, false ) }\n" "\n" "assert.close = function( a , b , msg , places ){\n" "if (places === undefined) {\n" @@ -243,6 +275,11 @@ const StringData _jscode_raw_utils = "return dst;\n" "}\n" "\n" +"Object.merge = function( dst, src, deep ){\n" +"var clone = Object.extend( {}, dst, deep )\n" +"return Object.extend( clone, src, deep )\n" +"}\n" +"\n" "argumentsToArray = function( a ){\n" "var arr = [];\n" "for ( var i=0; i<a.length; i++ )\n" @@ -943,6 +980,35 @@ const StringData _jscode_raw_utils = "print( tojsononeline( x ) );\n" "}\n" "\n" +"if ( typeof TestData == \"undefined\" ){\n" +"TestData = undefined\n" +"}\n" +"\n" +"jsTestName = function(){\n" +"if( TestData ) return TestData.testName\n" +"return \"__unknown_name__\"\n" +"}\n" +"\n" +"jsTestFile = function(){\n" +"if( TestData ) return TestData.testFile\n" +"return \"__unknown_file__\"\n" +"}\n" +"\n" +"jsTestPath = function(){\n" +"if( TestData ) return TestData.testPath\n" +"return \"__unknown_path__\"\n" +"}\n" +"\n" +"jsTestOptions = function(){\n" +"if( TestData ) return { noJournal : TestData.noJournal,\n" +"noJournalPrealloc : TestData.noJournalPrealloc }\n" +"return {}\n" +"}\n" +"\n" +"testLog = function(x){\n" +"print( jsTestFile() + \" - \" + x )\n" +"}\n" +"\n" "shellPrintHelper = function (x) {\n" "\n" "if (typeof (x) == \"undefined\") {\n" @@ -1481,6 +1547,41 @@ const StringData _jscode_raw_utils = "return \"error: couldn't find \"+hn+\" in \"+tojson(c.members);\n" "};\n" "\n" +"rs.debug = {};\n" +"\n" +"rs.debug.nullLastOpWritten = function(primary, secondary) {\n" +"var p = connect(primary+\"/local\");\n" +"var s = connect(secondary+\"/local\");\n" +"s.getMongo().setSlaveOk();\n" +"\n" +"var secondToLast = s.oplog.rs.find().sort({$natural : -1}).limit(1).next();\n" +"var last = p.runCommand({findAndModify : \"oplog.rs\",\n" +"query : {ts : {$gt : secondToLast.ts}},\n" +"sort : {$natural : 1},\n" +"update : {$set : {op : \"n\"}}});\n" +"\n" +"if (!last.value.o || !last.value.o._id) {\n" +"print(\"couldn't find an _id?\");\n" +"}\n" +"else {\n" +"last.value.o = {_id : last.value.o._id};\n" +"}\n" +"\n" +"print(\"nulling out this op:\");\n" +"printjson(last);\n" +"};\n" +"\n" +"rs.debug.getLastOpWritten = function(server) {\n" +"var s = db.getSisterDB(\"local\");\n" +"if (server) {\n" +"s = connect(server+\"/local\");\n" +"}\n" +"s.getMongo().setSlaveOk();\n" +"\n" +"return s.oplog.rs.find().sort({$natural : -1}).limit(1).next();\n" +"};\n" +"\n" +"\n" "help = shellHelper.help = function (x) {\n" "if (x == \"mr\") {\n" "print(\"\\nSee also http://www.mongodb.org/display/DOCS/MapReduce\");\n" @@ -1634,7 +1735,8 @@ const StringData _jscode_raw_utils_sh = "print( \"\\tsh.moveChunk(fullName,find,to) move the chunk where 'find' is to 'to' (name of shard)\");\n" "\n" "print( \"\\tsh.setBalancerState( <bool on or not> ) turns the balancer on or off true=on, false=off\" );\n" -"print( \"\\tsh.getBalancerState() return true if on, off if not\" );\n" +"print( \"\\tsh.getBalancerState() return true if on, off if not\" );\n" +"print( \"\\tsh.isBalancerRunning() return true if the balancer is running on any mongos\" );\n" "\n" "print( \"\\tsh.status() prints a general overview of the cluster\" )\n" "}\n" @@ -1691,6 +1793,11 @@ const StringData _jscode_raw_utils_sh = "return true;\n" "return ! x.stopped;\n" "}\n" +"\n" +"sh.isBalancerRunning = function() {\n" +"var x = db.getSisterDB( \"config\" ).locks.findOne( { _id : \"balancer\" } );\n" +"return x.state > 0;\n" +"}\n" ; extern const JSFile utils_sh; const JSFile utils_sh = { "shell/utils_sh.js" , _jscode_raw_utils_sh }; @@ -2552,8 +2659,9 @@ const StringData _jscode_raw_mongo = "mongoInject( Mongo.prototype );\n" "}\n" "\n" -"Mongo.prototype.setSlaveOk = function() {\n" -"this.slaveOk = true;\n" +"Mongo.prototype.setSlaveOk = function( value ) {\n" +"if( value == undefined ) value = true\n" +"this.slaveOk = value\n" "}\n" "\n" "Mongo.prototype.getDB = function( name ){\n" @@ -2571,6 +2679,10 @@ const StringData _jscode_raw_mongo = "return this.getDB( \"admin\" ).runCommand( cmd );\n" "}\n" "\n" +"Mongo.prototype.setLogLevel = function( logLevel ){\n" +"return this.adminCommand({ setParameter : 1, logLevel : logLevel })\n" +"}\n" +"\n" "Mongo.prototype.getDBNames = function(){\n" "return this.getDBs().databases.map(\n" "function(z){\n" @@ -3162,7 +3274,7 @@ const StringData _jscode_raw_collection = "throw \"can't save a DBQuery object\";\n" "}\n" "\n" -"DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 };\n" +"DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 , $MinKey : 1, $MaxKey : 1 };\n" "\n" "DBCollection.prototype._validateForStorage = function( o ){\n" "this._validateObject( o );\n" diff --git a/shell/servers.js b/shell/servers.js index a4e568a0f0f..e551559a79c 100755 --- a/shell/servers.js +++ b/shell/servers.js @@ -21,6 +21,25 @@ _parsePort = function() { return port; } +connectionURLTheSame = function( a , b ){ + if ( a == b ) + return true; + + if ( ! a || ! b ) + return false; + + a = a.split( "/" )[0] + b = b.split( "/" )[0] + + return a == b; +} + +assert( connectionURLTheSame( "foo" , "foo" ) ) +assert( ! connectionURLTheSame( "foo" , "bar" ) ) + +assert( connectionURLTheSame( "foo/a,b" , "foo/b,a" ) ) +assert( ! connectionURLTheSame( "foo/a,b" , "bar/a,b" ) ) + createMongoArgs = function( binaryName , args ){ var fullArgs = [ binaryName ]; @@ -79,6 +98,9 @@ startMongodTest = function (port, dirname, restart, extraOptions ) { oplogSize: "40", nohttpinterface: "" }; + + if( jsTestOptions().noJournal ) options["nojournal"] = "" + if( jsTestOptions().noJournalPrealloc ) options["nopreallocj"] = "" if ( extraOptions ) Object.extend( options , extraOptions ); @@ -158,6 +180,17 @@ myPort = function() { * * useHostname to use the hostname (instead of localhost) */ ShardingTest = function( testName , numShards , verboseLevel , numMongos , otherParams ){ + + // Check if testName is an object, if so, pull params from there + if( testName && ! testName.charAt ){ + var params = testName + testName = params.name || "test" + numShards = params.shards || 2 + verboseLevel = params.verbose || 0 + numMongos = params.mongos || 1 + otherParams = params.other || {} + } + this._testName = testName; if ( ! otherParams ) @@ -170,8 +203,7 @@ ShardingTest = function( testName , numShards , verboseLevel , numMongos , other var localhost = otherParams.useHostname ? getHostName() : "localhost"; this._alldbpaths = [] - - + if ( otherParams.rs ){ localhost = getHostName(); // start replica sets @@ -179,15 +211,18 @@ ShardingTest = function( testName , numShards , verboseLevel , numMongos , other for ( var i=0; i<numShards; i++){ var setName = testName + "-rs" + i; - var rsDefaults = { oplogSize : 40 } + var rsDefaults = { oplogSize : 40, nodes : 3 } var rsParams = otherParams["rs" + i] for( var param in rsParams ){ rsDefaults[param] = rsParams[param] } + + var numReplicas = rsDefaults.nodes || otherParams.numReplicas || 3 + delete rsDefaults.nodes - var rs = new ReplSetTest( { name : setName , nodes : 3 , startPort : 31100 + ( i * 100 ) } ); - this._rs[i] = { setName : setName , test : rs , nodes : rs.startSet( rsParams ) , url : rs.getURL() }; + var rs = new ReplSetTest( { name : setName , nodes : numReplicas , startPort : 31100 + ( i * 100 ) } ); + this._rs[i] = { setName : setName , test : rs , nodes : rs.startSet( rsDefaults ) , url : rs.getURL() }; rs.initiate(); } @@ -322,10 +357,9 @@ ShardingTest.prototype.getServer = function( dbname ){ for ( var i=0; i<this._connections.length; i++ ){ var c = this._connections[i]; - if ( name == c.name ) + if ( connectionURLTheSame( name , c.name ) || + connectionURLTheSame( rsName , c.name ) ) return c; - if ( rsName && c.name.startsWith( rsName ) ) - return c; } throw "can't find server for: " + dbname + " name:" + name; @@ -683,7 +717,7 @@ ShardingTest.prototype.getShards = function( coll, query ){ for( var i = 0; i < shards.length; i++ ){ for( var j = 0; j < this._connections.length; j++ ){ - if( this._connections[j].name == shards[i] ){ + if ( connectionURLTheSame( this._connections[j].name , shards[i] ) ){ shards[i] = this._connections[j] break; } @@ -730,22 +764,36 @@ ShardingTest.prototype.shardGo = function( collName , key , split , move , dbNam if( collName.getDB ) c = "" + collName + var isEmpty = this.s.getCollection( c ).count() == 0 + if( ! this.isSharded( dbName ) ) this.s.adminCommand( { enableSharding : dbName } ) - this.s.adminCommand( { shardcollection : c , key : key } ); - this.s.adminCommand( { split : c , middle : split } ); + var result = this.s.adminCommand( { shardcollection : c , key : key } ) + if( ! result.ok ){ + printjson( result ) + assert( false ) + } + result = this.s.adminCommand( { split : c , middle : split } ); + if( ! result.ok ){ + printjson( result ) + assert( false ) + } + var result = null for( var i = 0; i < 5; i++ ){ result = this.s.adminCommand( { movechunk : c , find : move , to : this.getOther( this.getServer( dbName ) ).name } ); if( result.ok ) break; sleep( 5 * 1000 ); } + printjson( result ) assert( result.ok ) }; +ShardingTest.prototype.shardColl = ShardingTest.prototype.shardGo + ShardingTest.prototype.setBalancer = function( balancer ){ if( balancer || balancer == undefined ){ this.config.settings.update( { _id: "balancer" }, { $set : { stopped: false } } , true ) @@ -902,6 +950,8 @@ ReplTest.prototype.getOptions = function( master , extra , putBinaryFirst, norep a.push( "--dbpath" ); a.push( this.getPath( master ) ); + if( jsTestOptions().noJournal ) a.push( "--nojournal" ) + if( jsTestOptions().noJournalPrealloc ) a.push( "--nopreallocj" ) if ( !norepl ) { if ( master ){ @@ -1210,6 +1260,9 @@ ReplSetTest.prototype.getOptions = function( n , extra , putBinaryFirst ){ a.push( "--dbpath" ); a.push( this.getPath( ( n.host ? this.getNodeId( n ) : n ) ) ); + if( jsTestOptions().noJournal ) a.push( "--nojournal" ) + if( jsTestOptions().noJournalPrealloc ) a.push( "--nopreallocj" ) + for ( var k in extra ){ var v = extra[k]; a.push( "--" + k ); @@ -1271,6 +1324,50 @@ ReplSetTest.prototype.callIsMaster = function() { return master || false; } +ReplSetTest.awaitRSClientHosts = function( conn, host, hostOk, rs ) { + + if( host.length ){ + for( var i = 0; i < host.length; i++ ) this.awaitOk( conn, host[i] ) + return + } + + if( hostOk == undefined ) hostOk = { ok : true } + if( host.host ) host = host.host + if( rs && rs.getMaster ) rs = rs.name + + print( "Awaiting " + host + " to be " + tojson( hostOk ) + " for " + conn + " (rs: " + rs + ")" ) + + var tests = 0 + assert.soon( function() { + var rsClientHosts = conn.getDB( "admin" ).runCommand( "connPoolStats" )[ "replicaSets" ] + if( tests++ % 10 == 0 ) + printjson( rsClientHosts ) + + for ( rsName in rsClientHosts ){ + if( rs && rs != rsName ) continue + for ( var i = 0; i < rsClientHosts[rsName].hosts.length; i++ ){ + var clientHost = rsClientHosts[rsName].hosts[ i ]; + if( clientHost.addr != host ) continue + + // Check that *all* host properties are set correctly + var propOk = true + for( var prop in hostOk ){ + if( clientHost[prop] != hostOk[prop] ){ + propOk = false + break + } + } + + if( propOk ) return true; + + } + } + return false; + }, "timed out waiting for replica set client to recognize hosts", + 3 * 20 * 1000 /* ReplicaSetMonitorWatcher updates every 20s */ ) + +} + ReplSetTest.prototype.awaitSecondaryNodes = function( timeout ) { var master = this.getMaster(); var slaves = this.liveNodes.slaves; @@ -1296,6 +1393,7 @@ ReplSetTest.prototype.getMaster = function( timeout ) { return master; } +ReplSetTest.prototype.getPrimary = ReplSetTest.prototype.getMaster ReplSetTest.prototype.getSecondaries = function( timeout ){ var master = this.getMaster( timeout ) @@ -1308,6 +1406,16 @@ ReplSetTest.prototype.getSecondaries = function( timeout ){ return secs } +ReplSetTest.prototype.getSecondary = function( timeout ){ + return this.getSecondaries( timeout )[0]; +} + +ReplSetTest.prototype.status = function( timeout ){ + var master = this.callIsMaster() + if( ! master ) master = this.liveNodes.slaves[0] + return master.getDB("admin").runCommand({replSetGetStatus: 1}) +} + // Add a node to the test set ReplSetTest.prototype.add = function( config ) { if(this.ports.length == 0) { @@ -1379,62 +1487,76 @@ ReplSetTest.prototype.reInitiate = function() { this.initiate( config , 'replSetReconfig' ); } +ReplSetTest.prototype.getLastOpTimeWritten = function() { + this.getMaster(); + this.attempt({context : this, desc : "awaiting oplog query"}, + function() { + try { + this.latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts']; + } + catch(e) { + print("ReplSetTest caught exception " + e); + return false; + } + return true; + }); +}; + ReplSetTest.prototype.awaitReplication = function(timeout) { - this.getMaster(); - timeout = timeout || 30000; + timeout = timeout || 30000; - this.attempt({context : this, desc : "awaiting oplog query"}, - function() { - try { - latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts']; - } - catch(e) { - print("ReplSetTest caught exception " + e); - return false; - } - return true; - }); - - print("ReplSetTest " + latest); - - this.attempt({context: this, timeout: timeout, desc: "awaiting replication"}, - function() { - var synced = true; - for(var i=0; i<this.liveNodes.slaves.length; i++) { - var slave = this.liveNodes.slaves[i]; - - // Continue if we're connected to an arbiter - if(res = slave.getDB("admin").runCommand({replSetGetStatus: 1})) { - if(res.myState == 7) { - continue; - } - } - - slave.getDB("admin").getMongo().setSlaveOk(); - var log = slave.getDB("local")['oplog.rs']; - if(log.find({}).sort({'$natural': -1}).limit(1).hasNext()) { - var entry = log.find({}).sort({'$natural': -1}).limit(1).next(); - printjson( entry ); - var ts = entry['ts']; - print("ReplSetTest await TS for " + slave + " is " + ts.t+":"+ts.i + " and latest is " + latest.t+":"+latest.i); - - if (latest.t < ts.t || (latest.t == ts.t && latest.i < ts.i)) { - latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts']; - } - - print("ReplSetTest await oplog size for " + slave + " is " + log.count()); - synced = (synced && friendlyEqual(latest,ts)) - } - else { - synced = false; - } - } - - if(synced) { - print("ReplSetTest await synced=" + synced); - } - return synced; - }); + this.getLastOpTimeWritten(); + + print("ReplSetTest " + this.latest); + + this.attempt({context: this, timeout: timeout, desc: "awaiting replication"}, + function() { + try { + var synced = true; + for(var i=0; i<this.liveNodes.slaves.length; i++) { + var slave = this.liveNodes.slaves[i]; + + // Continue if we're connected to an arbiter + if(res = slave.getDB("admin").runCommand({replSetGetStatus: 1})) { + if(res.myState == 7) { + continue; + } + } + + slave.getDB("admin").getMongo().setSlaveOk(); + var log = slave.getDB("local")['oplog.rs']; + if(log.find({}).sort({'$natural': -1}).limit(1).hasNext()) { + var entry = log.find({}).sort({'$natural': -1}).limit(1).next(); + printjson( entry ); + var ts = entry['ts']; + print("ReplSetTest await TS for " + slave + " is " + ts.t+":"+ts.i + " and latest is " + this.latest.t+":"+this.latest.i); + + if (this.latest.t < ts.t || (this.latest.t == ts.t && this.latest.i < ts.i)) { + this.latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts']; + } + + print("ReplSetTest await oplog size for " + slave + " is " + log.count()); + synced = (synced && friendlyEqual(this.latest,ts)) + } + else { + synced = false; + } + } + + if(synced) { + print("ReplSetTest await synced=" + synced); + } + return synced; + } + catch (e) { + print("ReplSetTest.awaitReplication: caught exception "+e); + + // we might have a new master now + this.getLastOpTimeWritten(); + + return false; + } + }); } ReplSetTest.prototype.getHashes = function( db ){ @@ -1704,10 +1826,10 @@ ReplSetTest.prototype.waitForIndicator = function( node, states, ind, timeout ){ var lastTime = null var currTime = new Date().getTime() var status = undefined - + this.attempt({context: this, timeout: timeout, desc: "waiting for state indicator " + ind + " for " + timeout + "ms" }, function() { - status = this.getMaster().getDB("admin").runCommand({ replSetGetStatus : 1 }) + status = this.status() if( lastTime == null || ( currTime = new Date().getTime() ) - (1000 * 5) > lastTime ){ if( lastTime == null ) print( "ReplSetTest waitForIndicator Initial status ( timeout : " + timeout + " ) :" ) diff --git a/shell/utils.js b/shell/utils.js index 9a239dbc4a9..a903691fbd4 100644 --- a/shell/utils.js +++ b/shell/utils.js @@ -84,6 +84,26 @@ assert.neq = function( a , b , msg ){ doassert( "[" + a + "] != [" + b + "] are equal : " + msg ); } +assert.contains = function( o, arr, msg ){ + var wasIn = false + + if( ! arr.length ){ + for( i in arr ){ + wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) ) + return; + if( wasIn ) break + } + } + else { + for( var i = 0; i < arr.length; i++ ){ + wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) ) + if( wasIn ) break + } + } + + if( ! wasIn ) doassert( tojson( o ) + " was not in " + tojson( arr ) + " : " + msg ) +} + assert.repeat = function( f, msg, timeout, interval ) { if ( assert._debug && msg ) print( "in assert for: " + msg ); @@ -211,6 +231,18 @@ assert.gte = function( a , b , msg ){ doassert( a + " is not greater than or eq " + b + " : " + msg ); } +assert.between = function( a, b, c, msg, inclusive ){ + if ( assert._debug && msg ) print( "in assert for: " + msg ); + + if( ( inclusive == undefined || inclusive == true ) && + a <= b && b <= c ) return; + else if( a < b && b < c ) return; + + doassert( b + " is not between " + a + " and " + c + " : " + msg ); +} + +assert.betweenIn = function( a, b, c, msg ){ assert.between( a, b, c, msg, true ) } +assert.betweenEx = function( a, b, c, msg ){ assert.between( a, b, c, msg, false ) } assert.close = function( a , b , msg , places ){ if (places === undefined) { @@ -238,6 +270,11 @@ Object.extend = function( dst , src , deep ){ return dst; } +Object.merge = function( dst, src, deep ){ + var clone = Object.extend( {}, dst, deep ) + return Object.extend( clone, src, deep ) +} + argumentsToArray = function( a ){ var arr = []; for ( var i=0; i<a.length; i++ ) @@ -938,6 +975,35 @@ printjsononeline = function(x){ print( tojsononeline( x ) ); } +if ( typeof TestData == "undefined" ){ + TestData = undefined +} + +jsTestName = function(){ + if( TestData ) return TestData.testName + return "__unknown_name__" +} + +jsTestFile = function(){ + if( TestData ) return TestData.testFile + return "__unknown_file__" +} + +jsTestPath = function(){ + if( TestData ) return TestData.testPath + return "__unknown_path__" +} + +jsTestOptions = function(){ + if( TestData ) return { noJournal : TestData.noJournal, + noJournalPrealloc : TestData.noJournalPrealloc } + return {} +} + +testLog = function(x){ + print( jsTestFile() + " - " + x ) +} + shellPrintHelper = function (x) { if (typeof (x) == "undefined") { @@ -1476,6 +1542,41 @@ rs.remove = function (hn) { return "error: couldn't find "+hn+" in "+tojson(c.members); }; +rs.debug = {}; + +rs.debug.nullLastOpWritten = function(primary, secondary) { + var p = connect(primary+"/local"); + var s = connect(secondary+"/local"); + s.getMongo().setSlaveOk(); + + var secondToLast = s.oplog.rs.find().sort({$natural : -1}).limit(1).next(); + var last = p.runCommand({findAndModify : "oplog.rs", + query : {ts : {$gt : secondToLast.ts}}, + sort : {$natural : 1}, + update : {$set : {op : "n"}}}); + + if (!last.value.o || !last.value.o._id) { + print("couldn't find an _id?"); + } + else { + last.value.o = {_id : last.value.o._id}; + } + + print("nulling out this op:"); + printjson(last); +}; + +rs.debug.getLastOpWritten = function(server) { + var s = db.getSisterDB("local"); + if (server) { + s = connect(server+"/local"); + } + s.getMongo().setSlaveOk(); + + return s.oplog.rs.find().sort({$natural : -1}).limit(1).next(); +}; + + help = shellHelper.help = function (x) { if (x == "mr") { print("\nSee also http://www.mongodb.org/display/DOCS/MapReduce"); diff --git a/shell/utils_sh.js b/shell/utils_sh.js index 2f4a5a3f85b..5bd449bc61d 100644 --- a/shell/utils_sh.js +++ b/shell/utils_sh.js @@ -33,7 +33,8 @@ sh.help = function() { print( "\tsh.moveChunk(fullName,find,to) move the chunk where 'find' is to 'to' (name of shard)"); print( "\tsh.setBalancerState( <bool on or not> ) turns the balancer on or off true=on, false=off" ); - print( "\tsh.getBalancerState() return true if on, off if not" ); + print( "\tsh.getBalancerState() return true if on, off if not" ); + print( "\tsh.isBalancerRunning() return true if the balancer is running on any mongos" ); print( "\tsh.status() prints a general overview of the cluster" ) } @@ -90,3 +91,8 @@ sh.getBalancerState = function() { return true; return ! x.stopped; } + +sh.isBalancerRunning = function() { + var x = db.getSisterDB( "config" ).locks.findOne( { _id : "balancer" } ); + return x.state > 0; +} diff --git a/speed.js b/speed.js new file mode 100755 index 00000000000..c5aa3a36964 --- /dev/null +++ b/speed.js @@ -0,0 +1,13 @@ +t = db.fooo;
+t.drop();
+x = { str:'aaaabbbbcc' }
+s = new Date();
+for( var i = 0; i < 100000; i++ ) {
+ x.i = i;
+ t.insert(x);
+}
+print( (new Date())-s );
+t.ensureIndex({x:1});
+t.ensureIndex({str:1});
+print( (new Date())-s );
+
diff --git a/third_party/linenoise/linenoise.cpp b/third_party/linenoise/linenoise.cpp index 81f76194512..dca8dbb5a4f 100644 --- a/third_party/linenoise/linenoise.cpp +++ b/third_party/linenoise/linenoise.cpp @@ -549,7 +549,10 @@ static int linenoisePrompt(int fd, char *buf, size_t buflen, const char *prompt) /* Only autocomplete when the callback is set. It returns < 0 when * there was an error reading from fd. Otherwise it will return the * character that should be handled next. */ - if (c == 9 && completionCallback != NULL) { + if (c == 9 && completionCallback != NULL) { /* tab */ + /* ignore tabs used for indentation */ + if (pos == 0) continue; + c = completeLine(fd,prompt,buf,buflen,&len,&pos,cols); /* Return on errors */ if (c < 0) return len; diff --git a/third_party/snappy/COPYING b/third_party/snappy/COPYING new file mode 100755 index 00000000000..8d6bd9fed4e --- /dev/null +++ b/third_party/snappy/COPYING @@ -0,0 +1,28 @@ +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/third_party/snappy/README b/third_party/snappy/README new file mode 100755 index 00000000000..df8f0e178e2 --- /dev/null +++ b/third_party/snappy/README @@ -0,0 +1,135 @@ +Snappy, a fast compressor/decompressor. + + +Introduction +============ + +Snappy is a compression/decompression library. It does not aim for maximum +compression, or compatibility with any other compression library; instead, +it aims for very high speeds and reasonable compression. For instance, +compared to the fastest mode of zlib, Snappy is an order of magnitude faster +for most inputs, but the resulting compressed files are anywhere from 20% to +100% bigger. (For more information, see "Performance", below.) + +Snappy has the following properties: + + * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code. + See "Performance" below. + * Stable: Over the last few years, Snappy has compressed and decompressed + petabytes of data in Google's production environment. The Snappy bitstream + format is stable and will not change between versions. + * Robust: The Snappy decompressor is designed not to crash in the face of + corrupted or malicious input. + * Free and open source software: Snappy is licensed under a BSD-type license. + For more information, see the included COPYING file. + +Snappy has previously been called "Zippy" in some Google presentations +and the like. + + +Performance +=========== + +Snappy is intended to be fast. On a single core of a Core i7 processor +in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at +about 500 MB/sec or more. (These numbers are for the slowest inputs in our +benchmark suite; others are much faster.) In our tests, Snappy usually +is faster than algorithms in the same class (e.g. LZO, LZF, FastLZ, QuickLZ, +etc.) while achieving comparable compression ratios. + +Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x +for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and +other already-compressed data. Similar numbers for zlib in its fastest mode +are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are +capable of achieving yet higher compression rates, although usually at the +expense of speed. Of course, compression ratio will vary significantly with +the input. + +Although Snappy should be fairly portable, it is primarily optimized +for 64-bit x86-compatible processors, and may run slower in other environments. +In particular: + + - Snappy uses 64-bit operations in several places to process more data at + once than would otherwise be possible. + - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap. + On some platforms, these must be emulated with single-byte loads + and stores, which is much slower. + - Snappy assumes little-endian throughout, and needs to byte-swap data in + several places if running on a big-endian platform. + +Experience has shown that even heavily tuned code can be improved. +Performance optimizations, whether for 64-bit x86 or other platforms, +are of course most welcome; see "Contact", below. + + +Usage +===== + +Note that Snappy, both the implementation and the main interface, +is written in C++. However, several third-party bindings to other languages +are available; see the Google Code page at http://code.google.com/p/snappy/ +for more information. Also, if you want to use Snappy from C code, you can +use the included C bindings in snappy-c.h. + +To use Snappy from your own C++ program, include the file "snappy.h" from +your calling file, and link against the compiled library. + +There are many ways to call Snappy, but the simplest possible is + + snappy::Compress(input, &output); + +and similarly + + snappy::Uncompress(input, &output); + +where "input" and "output" are both instances of std::string. + +There are other interfaces that are more flexible in various ways, including +support for custom (non-array) input sources. See the header file for more +information. + + +Tests and benchmarks +==================== + +When you compile Snappy, snappy_unittest is compiled in addition to the +library itself. You do not need it to use the compressor from your own library, +but it contains several useful components for Snappy development. + +First of all, it contains unit tests, verifying correctness on your machine in +various scenarios. If you want to change or optimize Snappy, please run the +tests to verify you have not broken anything. Note that if you have the +Google Test library installed, unit test behavior (especially failures) will be +significantly more user-friendly. You can find Google Test at + + http://code.google.com/p/googletest/ + +You probably also want the gflags library for handling of command-line flags; +you can find it at + + http://code.google.com/p/google-gflags/ + +In addition to the unit tests, snappy contains microbenchmarks used to +tune compression and decompression performance. These are automatically run +before the unit tests, but you can disable them using the flag +--run_microbenchmarks=false if you have gflags installed (otherwise you will +need to edit the source). + +Finally, snappy can benchmark Snappy against a few other compression libraries +(zlib, LZO, LZF, FastLZ and QuickLZ), if they were detected at configure time. +To benchmark using a given file, give the compression algorithm you want to test +Snappy against (e.g. --zlib) and then a list of one or more file names on the +command line. The testdata/ directory contains the files used by the +microbenchmark, which should provide a reasonably balanced starting point for +benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they +are used to verify correctness in the presence of corrupted data in the unit +test.) + + +Contact +======= + +Snappy is distributed through Google Code. For the latest version, a bug tracker, +and other information, see + + http://code.google.com/p/snappy/ diff --git a/third_party/snappy/config.h b/third_party/snappy/config.h new file mode 100755 index 00000000000..bfc3b30087f --- /dev/null +++ b/third_party/snappy/config.h @@ -0,0 +1,124 @@ +/* config.h.in. Generated from configure.ac by autoheader. */ + +/* Define if building universal (internal helper macro) */ +//#undef AC_APPLE_UNIVERSAL_BUILD + +#if defined(_WIN32) +// signed/unsigned mismatch +#pragma warning( disable : 4018 ) +#endif + +/* Define to 1 if the compiler supports __builtin_ctz and friends. */ +#if defined(__GNUC__) +#definfe HAVE_BUILTIN_CTZ 1 +#endif + +/* Define to 1 if the compiler supports __builtin_expect. */ +#if defined(__GNUC__) +#definfe HAVE_BUILTIN_EXPECT 1 +#endif + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#if !defined(_WIN32) +#define HAVE_DLFCN_H 1 +#endif + +/* Use the gflags package for command-line parsing. */ +#undef HAVE_GFLAGS + +/* Defined when Google Test is available. */ +#undef HAVE_GTEST + +/* Define to 1 if you have the <inttypes.h> header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `fastlz' library (-lfastlz). */ +#undef HAVE_LIBFASTLZ + +/* Define to 1 if you have the `lzf' library (-llzf). */ +#undef HAVE_LIBLZF + +/* Define to 1 if you have the `lzo2' library (-llzo2). */ +#undef HAVE_LIBLZO2 + +/* Define to 1 if you have the `quicklz' library (-lquicklz). */ +#undef HAVE_LIBQUICKLZ + +/* Define to 1 if you have the `z' library (-lz). */ +#undef HAVE_LIBZ + +/* Define to 1 if you have the <memory.h> header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the <stddef.h> header file. */ +#define HAVE_STDDEF_H 1 + +/* Define to 1 if you have the <stdint.h> header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the <stdlib.h> header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the <strings.h> header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the <string.h> header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the <sys/mman.h> header file. */ +#if !defined(_WIN32) +#define HAVE_SYS_MMAN_H 1 +#endif + +/* Define to 1 if you have the <sys/resource.h> header file. */ +#define HAVE_SYS_RESOURCE_H 1 + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the <sys/types.h> header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the <unistd.h> header file. */ +#define HAVE_UNISTD_H 1 + +/* Define to 1 if you have the <windows.h> header file. */ +#if defined(_WIN32) +#define HAVE_WINDOWS_H 1 +#endif + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR "libs/" + +/* Name of package */ +#define PACKAGE "snappy" + +#define PACKAGE_BUGREPORT "" + +/* Define to the full name of this package. */ +#define PACKAGE_NAME "snappy" + +/* Define to the full name and version of this package. */ +#define PACKAGE_STRING "snappy 1.0.3" + +/* Define to the one symbol short name of this package. */ +#define PACKAGE_TARNAME "snappy" + +/* Define to the home page for this package. */ +#define PACKAGE_URL "" + +/* Define to the version of this package. */ +#define PACKAGE_VERSION "1.0.3" + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Version number of package */ +#define VERSION "1.0.3" + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined(__BIG_ENDIAN__) +#define WORDS_BIGENDIAN 1 +#endif diff --git a/third_party/snappy/snappy-internal.h b/third_party/snappy/snappy-internal.h new file mode 100755 index 00000000000..a32eda59fb2 --- /dev/null +++ b/third_party/snappy/snappy-internal.h @@ -0,0 +1,150 @@ +// Copyright 2008 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Internals shared between the Snappy implementation and its unittest. + +#ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_ +#define UTIL_SNAPPY_SNAPPY_INTERNAL_H_ + +#include "snappy-stubs-internal.h" + +namespace snappy { +namespace internal { + +class WorkingMemory { + public: + WorkingMemory() : large_table_(NULL) { } + ~WorkingMemory() { delete[] large_table_; } + + // Allocates and clears a hash table using memory in "*this", + // stores the number of buckets in "*table_size" and returns a pointer to + // the base of the hash table. + uint16* GetHashTable(size_t input_size, int* table_size); + + private: + uint16 small_table_[1<<10]; // 2KB + uint16* large_table_; // Allocated only when needed + + DISALLOW_COPY_AND_ASSIGN(WorkingMemory); +}; + +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input_length <= kBlockSize" +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input_length)" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +char* CompressFragment(const char* input, + size_t input_length, + char* op, + uint16* table, + const int table_size); + +// Return the largest n such that +// +// s1[0,n-1] == s2[0,n-1] +// and n <= (s2_limit - s2). +// +// Does not read *s2_limit or beyond. +// Does not read *(s1 + (s2_limit - s2)) or beyond. +// Requires that s2_limit >= s2. +// +// Separate implementation for x86_64, for speed. Uses the fact that +// x86_64 is little endian. +#if defined(ARCH_K8) +static inline int FindMatchLength(const char* s1, + const char* s2, + const char* s2_limit) { + DCHECK_GE(s2_limit, s2); + int matched = 0; + + // Find out how long the match is. We loop over the data 64 bits at a + // time until we find a 64-bit block that doesn't match; then we find + // the first non-matching bit and use that to calculate the total + // length of the match. + while (PREDICT_TRUE(s2 <= s2_limit - 8)) { + if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) { + s2 += 8; + matched += 8; + } else { + // On current (mid-2008) Opteron models there is a 3% more + // efficient code sequence to find the first non-matching byte. + // However, what follows is ~10% better on Intel Core 2 and newer, + // and we expect AMD's bsf instruction to improve. + uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero64(x); + matched += matching_bits >> 3; + return matched; + } + } + while (PREDICT_TRUE(s2 < s2_limit)) { + if (PREDICT_TRUE(s1[matched] == *s2)) { + ++s2; + ++matched; + } else { + return matched; + } + } + return matched; +} +#else +static inline int FindMatchLength(const char* s1, + const char* s2, + const char* s2_limit) { + // Implementation based on the x86-64 version, above. + DCHECK_GE(s2_limit, s2); + int matched = 0; + + while (s2 <= s2_limit - 4 && + UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) { + s2 += 4; + matched += 4; + } + if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) { + uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched); + int matching_bits = Bits::FindLSBSetNonZero(x); + matched += matching_bits >> 3; + } else { + while ((s2 < s2_limit) && (s1[matched] == *s2)) { + ++s2; + ++matched; + } + } + return matched; +} +#endif + +} // end namespace internal +} // end namespace snappy + +#endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_ diff --git a/third_party/snappy/snappy-sinksource.cc b/third_party/snappy/snappy-sinksource.cc new file mode 100755 index 00000000000..1017895f962 --- /dev/null +++ b/third_party/snappy/snappy-sinksource.cc @@ -0,0 +1,72 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <string.h> + +#include "snappy-sinksource.h" + +namespace snappy { + +Source::~Source() { } + +Sink::~Sink() { } + +char* Sink::GetAppendBuffer(size_t length, char* scratch) { + return scratch; +} + +ByteArraySource::~ByteArraySource() { } + +size_t ByteArraySource::Available() const { return left_; } + +const char* ByteArraySource::Peek(size_t* len) { + *len = left_; + return ptr_; +} + +void ByteArraySource::Skip(size_t n) { + left_ -= n; + ptr_ += n; +} + +UncheckedByteArraySink::~UncheckedByteArraySink() { } + +void UncheckedByteArraySink::Append(const char* data, size_t n) { + // Do no copying if the caller filled in the result of GetAppendBuffer() + if (data != dest_) { + memcpy(dest_, data, n); + } + dest_ += n; +} + +char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) { + return dest_; +} + + +} diff --git a/third_party/snappy/snappy-sinksource.h b/third_party/snappy/snappy-sinksource.h new file mode 100755 index 00000000000..430baeabb0e --- /dev/null +++ b/third_party/snappy/snappy-sinksource.h @@ -0,0 +1,136 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#ifndef UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_ +#define UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_ + +#include <stddef.h> + + +namespace snappy { + +// A Sink is an interface that consumes a sequence of bytes. +class Sink { + public: + Sink() { } + virtual ~Sink(); + + // Append "bytes[0,n-1]" to this. + virtual void Append(const char* bytes, size_t n) = 0; + + // Returns a writable buffer of the specified length for appending. + // May return a pointer to the caller-owned scratch buffer which + // must have at least the indicated length. The returned buffer is + // only valid until the next operation on this Sink. + // + // After writing at most "length" bytes, call Append() with the + // pointer returned from this function and the number of bytes + // written. Many Append() implementations will avoid copying + // bytes if this function returned an internal buffer. + // + // If a non-scratch buffer is returned, the caller may only pass a + // prefix of it to Append(). That is, it is not correct to pass an + // interior pointer of the returned array to Append(). + // + // The default implementation always returns the scratch buffer. + virtual char* GetAppendBuffer(size_t length, char* scratch); + + private: + // No copying + Sink(const Sink&); + void operator=(const Sink&); +}; + +// A Source is an interface that yields a sequence of bytes +class Source { + public: + Source() { } + virtual ~Source(); + + // Return the number of bytes left to read from the source + virtual size_t Available() const = 0; + + // Peek at the next flat region of the source. Does not reposition + // the source. The returned region is empty iff Available()==0. + // + // Returns a pointer to the beginning of the region and store its + // length in *len. + // + // The returned region is valid until the next call to Skip() or + // until this object is destroyed, whichever occurs first. + // + // The returned region may be larger than Available() (for example + // if this ByteSource is a view on a substring of a larger source). + // The caller is responsible for ensuring that it only reads the + // Available() bytes. + virtual const char* Peek(size_t* len) = 0; + + // Skip the next n bytes. Invalidates any buffer returned by + // a previous call to Peek(). + // REQUIRES: Available() >= n + virtual void Skip(size_t n) = 0; + + private: + // No copying + Source(const Source&); + void operator=(const Source&); +}; + +// A Source implementation that yields the contents of a flat array +class ByteArraySource : public Source { + public: + ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { } + virtual ~ByteArraySource(); + virtual size_t Available() const; + virtual const char* Peek(size_t* len); + virtual void Skip(size_t n); + private: + const char* ptr_; + size_t left_; +}; + +// A Sink implementation that writes to a flat array without any bound checks. +class UncheckedByteArraySink : public Sink { + public: + explicit UncheckedByteArraySink(char* dest) : dest_(dest) { } + virtual ~UncheckedByteArraySink(); + virtual void Append(const char* data, size_t n); + virtual char* GetAppendBuffer(size_t len, char* scratch); + + // Return the current output pointer so that a caller can see how + // many bytes were produced. + // Note: this is not a Sink method. + char* CurrentDestination() const { return dest_; } + private: + char* dest_; +}; + + +} + +#endif // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_ diff --git a/third_party/snappy/snappy-stubs-internal.cc b/third_party/snappy/snappy-stubs-internal.cc new file mode 100755 index 00000000000..6ed334371f1 --- /dev/null +++ b/third_party/snappy/snappy-stubs-internal.cc @@ -0,0 +1,42 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include <algorithm> +#include <string> + +#include "snappy-stubs-internal.h" + +namespace snappy { + +void Varint::Append32(string* s, uint32 value) { + char buf[Varint::kMax32]; + const char* p = Varint::Encode32(buf, value); + s->append(buf, p - buf); +} + +} // namespace snappy diff --git a/third_party/snappy/snappy-stubs-internal.h b/third_party/snappy/snappy-stubs-internal.h new file mode 100755 index 00000000000..355a06bc568 --- /dev/null +++ b/third_party/snappy/snappy-stubs-internal.h @@ -0,0 +1,478 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various stubs for the open-source version of Snappy. + +#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ +#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <iostream> +#include <string> + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +#ifdef HAVE_SYS_MMAN +#include <sys/mman.h> +#endif + +#include "snappy-stubs-public.h" + +#if defined(__x86_64__) + +// Enable 64-bit optimized versions of some routines. +#define ARCH_K8 1 + +#endif + +// Needed by OS X, among others. +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +// Pull in std::min, std::ostream, and the likes. This is safe because this +// header file is never used from any public header files. +using namespace std; + +// The size of an array, if known at compile-time. +// Will give unexpected results if used on a pointer. +// We undefine it first, since some compilers already have a definition. +#ifdef ARRAYSIZE +#undef ARRAYSIZE +#endif +#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a))) + +// Static prediction hints. +#ifdef HAVE_BUILTIN_EXPECT +#define PREDICT_FALSE(x) (__builtin_expect(x, 0)) +#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1)) +#else +#define PREDICT_FALSE(x) x +#define PREDICT_TRUE(x) x +#endif + +// This is only used for recomputing the tag byte table used during +// decompression; for simplicity we just remove it from the open-source +// version (anyone who wants to regenerate it can just do the call +// themselves within main()). +#define DEFINE_bool(flag_name, default_value, description) \ + bool FLAGS_ ## flag_name = default_value; +#define DECLARE_bool(flag_name) \ + extern bool FLAGS_ ## flag_name; +#define REGISTER_MODULE_INITIALIZER(name, code) + +namespace snappy { + +static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF); +static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL); + +// Logging. + +#define LOG(level) LogMessage() +#define VLOG(level) true ? (void)0 : \ + snappy::LogMessageVoidify() & snappy::LogMessage() + +class LogMessage { + public: + LogMessage() { } + ~LogMessage() { + cerr << endl; + } + + LogMessage& operator<<(const std::string& msg) { + cerr << msg; + return *this; + } + LogMessage& operator<<(int x) { + cerr << x; + return *this; + } +}; + +// Asserts, both versions activated in debug mode only, +// and ones that are always active. + +#define CRASH_UNLESS(condition) \ + PREDICT_TRUE(condition) ? (void)0 : \ + snappy::LogMessageVoidify() & snappy::LogMessageCrash() + +class LogMessageCrash : public LogMessage { + public: + LogMessageCrash() { } + ~LogMessageCrash() { + cerr << endl; + abort(); + } +}; + +// This class is used to explicitly ignore values in the conditional +// logging macros. This avoids compiler warnings like "value computed +// is not used" and "statement has no effect". + +class LogMessageVoidify { + public: + LogMessageVoidify() { } + // This has to be an operator with a precedence lower than << but + // higher than ?: + void operator&(const LogMessage&) { } +}; + +#define CHECK(cond) CRASH_UNLESS(cond) +#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b)) +#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b)) +#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b)) +#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b)) +#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b)) +#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b)) + +#ifdef NDEBUG + +#define DCHECK(cond) CRASH_UNLESS(true) +#define DCHECK_LE(a, b) CRASH_UNLESS(true) +#define DCHECK_GE(a, b) CRASH_UNLESS(true) +#define DCHECK_EQ(a, b) CRASH_UNLESS(true) +#define DCHECK_NE(a, b) CRASH_UNLESS(true) +#define DCHECK_LT(a, b) CRASH_UNLESS(true) +#define DCHECK_GT(a, b) CRASH_UNLESS(true) + +#else + +#define DCHECK(cond) CHECK(cond) +#define DCHECK_LE(a, b) CHECK_LE(a, b) +#define DCHECK_GE(a, b) CHECK_GE(a, b) +#define DCHECK_EQ(a, b) CHECK_EQ(a, b) +#define DCHECK_NE(a, b) CHECK_NE(a, b) +#define DCHECK_LT(a, b) CHECK_LT(a, b) +#define DCHECK_GT(a, b) CHECK_GT(a, b) + +#endif + +// Potentially unaligned loads and stores. + +#if 1 +//#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(_WIN32) + +#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p)) +#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p)) +#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p)) + +#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val)) +#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val)) +#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val)) + +#else + +// These functions are provided for architectures that don't support +// unaligned loads and stores. + +inline uint16 UNALIGNED_LOAD16(const void *p) { + uint16 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint32 UNALIGNED_LOAD32(const void *p) { + uint32 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline uint64 UNALIGNED_LOAD64(const void *p) { + uint64 t; + memcpy(&t, p, sizeof t); + return t; +} + +inline void UNALIGNED_STORE16(void *p, uint16 v) { + memcpy(p, &v, sizeof v); +} + +inline void UNALIGNED_STORE32(void *p, uint32 v) { + memcpy(p, &v, sizeof v); +} + +inline void UNALIGNED_STORE64(void *p, uint64 v) { + memcpy(p, &v, sizeof v); +} + +#endif + +// The following guarantees declaration of the byte swap functions. +#ifdef WORDS_BIGENDIAN + +#ifdef _MSC_VER +#include <stdlib.h> +#define bswap_16(x) _byteswap_ushort(x) +#define bswap_32(x) _byteswap_ulong(x) +#define bswap_64(x) _byteswap_uint64(x) + +#elif defined(__APPLE__) +// Mac OS X / Darwin features +#include <libkern/OSByteOrder.h> +#define bswap_16(x) OSSwapInt16(x) +#define bswap_32(x) OSSwapInt32(x) +#define bswap_64(x) OSSwapInt64(x) + +#else +#include <byteswap.h> +#endif + +#endif // WORDS_BIGENDIAN + +// Convert to little-endian storage, opposite of network format. +// Convert x from host to little endian: x = LittleEndian.FromHost(x); +// convert x from little endian to host: x = LittleEndian.ToHost(x); +// +// Store values into unaligned memory converting to little endian order: +// LittleEndian.Store16(p, x); +// +// Load unaligned values stored in little endian converting to host order: +// x = LittleEndian.Load16(p); +class LittleEndian { + public: + // Conversion functions. +#ifdef WORDS_BIGENDIAN + + static uint16 FromHost16(uint16 x) { return bswap_16(x); } + static uint16 ToHost16(uint16 x) { return bswap_16(x); } + + static uint32 FromHost32(uint32 x) { return bswap_32(x); } + static uint32 ToHost32(uint32 x) { return bswap_32(x); } + + static bool IsLittleEndian() { return false; } + +#else // !defined(WORDS_BIGENDIAN) + + static uint16 FromHost16(uint16 x) { return x; } + static uint16 ToHost16(uint16 x) { return x; } + + static uint32 FromHost32(uint32 x) { return x; } + static uint32 ToHost32(uint32 x) { return x; } + + static bool IsLittleEndian() { return true; } + +#endif // !defined(WORDS_BIGENDIAN) + + // Functions to do unaligned loads and stores in little-endian order. + static uint16 Load16(const void *p) { + return ToHost16(UNALIGNED_LOAD16(p)); + } + + static void Store16(void *p, uint16 v) { + UNALIGNED_STORE16(p, FromHost16(v)); + } + + static uint32 Load32(const void *p) { + return ToHost32(UNALIGNED_LOAD32(p)); + } + + static void Store32(void *p, uint32 v) { + UNALIGNED_STORE32(p, FromHost32(v)); + } +}; + +// Some bit-manipulation functions. +class Bits { + public: + // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0. + static int Log2Floor(uint32 n); + + // Return the first set least / most significant bit, 0-indexed. Returns an + // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except + // that it's 0-indexed. + static int FindLSBSetNonZero(uint32 n); + static int FindLSBSetNonZero64(uint64 n); + + private: + DISALLOW_COPY_AND_ASSIGN(Bits); +}; + +#ifdef HAVE_BUILTIN_CTZ + +inline int Bits::Log2Floor(uint32 n) { + return n == 0 ? -1 : 31 ^ __builtin_clz(n); +} + +inline int Bits::FindLSBSetNonZero(uint32 n) { + return __builtin_ctz(n); +} + +inline int Bits::FindLSBSetNonZero64(uint64 n) { + return __builtin_ctzll(n); +} + +#else // Portable versions. + +inline int Bits::Log2Floor(uint32 n) { + if (n == 0) + return -1; + int log = 0; + uint32 value = n; + for (int i = 4; i >= 0; --i) { + int shift = (1 << i); + uint32 x = value >> shift; + if (x != 0) { + value = x; + log += shift; + } + } + assert(value == 1); + return log; +} + +inline int Bits::FindLSBSetNonZero(uint32 n) { + int rc = 31; + for (int i = 4, shift = 1 << 4; i >= 0; --i) { + const uint32 x = n << shift; + if (x != 0) { + n = x; + rc -= shift; + } + shift >>= 1; + } + return rc; +} + +// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero(). +inline int Bits::FindLSBSetNonZero64(uint64 n) { + const uint32 bottombits = static_cast<uint32>(n); + if (bottombits == 0) { + // Bottom bits are zero, so scan in top bits + return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32)); + } else { + return FindLSBSetNonZero(bottombits); + } +} + +#endif // End portable versions. + +// Variable-length integer encoding. +class Varint { + public: + // Maximum lengths of varint encoding of uint32. + static const int kMax32 = 5; + + // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1]. + // Never reads a character at or beyond limit. If a valid/terminated varint32 + // was found in the range, stores it in *OUTPUT and returns a pointer just + // past the last byte of the varint32. Else returns NULL. On success, + // "result <= limit". + static const char* Parse32WithLimit(const char* ptr, const char* limit, + uint32* OUTPUT); + + // REQUIRES "ptr" points to a buffer of length sufficient to hold "v". + // EFFECTS Encodes "v" into "ptr" and returns a pointer to the + // byte just past the last encoded byte. + static char* Encode32(char* ptr, uint32 v); + + // EFFECTS Appends the varint representation of "value" to "*s". + static void Append32(string* s, uint32 value); +}; + +inline const char* Varint::Parse32WithLimit(const char* p, + const char* l, + uint32* OUTPUT) { + const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p); + const unsigned char* limit = reinterpret_cast<const unsigned char*>(l); + uint32 b, result; + if (ptr >= limit) return NULL; + b = *(ptr++); result = b & 127; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done; + if (ptr >= limit) return NULL; + b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done; + return NULL; // Value is too long to be a varint32 + done: + *OUTPUT = result; + return reinterpret_cast<const char*>(ptr); +} + +inline char* Varint::Encode32(char* sptr, uint32 v) { + // Operate on characters as unsigneds + unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr); + static const int B = 128; + if (v < (1<<7)) { + *(ptr++) = v; + } else if (v < (1<<14)) { + *(ptr++) = v | B; + *(ptr++) = v>>7; + } else if (v < (1<<21)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = v>>14; + } else if (v < (1<<28)) { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = v>>21; + } else { + *(ptr++) = v | B; + *(ptr++) = (v>>7) | B; + *(ptr++) = (v>>14) | B; + *(ptr++) = (v>>21) | B; + *(ptr++) = v>>28; + } + return reinterpret_cast<char*>(ptr); +} + +// If you know the internal layout of the std::string in use, you can +// replace this function with one that resizes the string without +// filling the new space with zeros (if applicable) -- +// it will be non-portable but faster. +inline void STLStringResizeUninitialized(string* s, size_t new_size) { + s->resize(new_size); +} + +// Return a mutable char* pointing to a string's internal buffer, +// which may not be null-terminated. Writing through this pointer will +// modify the string. +// +// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the +// next call to a string method that invalidates iterators. +// +// As of 2006-04, there is no standard-blessed way of getting a +// mutable reference to a string's internal buffer. However, issue 530 +// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530) +// proposes this as the method. It will officially be part of the standard +// for C++0x. This should already work on all current implementations. +inline char* string_as_array(string* str) { + return str->empty() ? NULL : &*str->begin(); +} + +} // namespace snappy + +#endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_ diff --git a/third_party/snappy/snappy-stubs-public.h b/third_party/snappy/snappy-stubs-public.h new file mode 100755 index 00000000000..074d4638866 --- /dev/null +++ b/third_party/snappy/snappy-stubs-public.h @@ -0,0 +1,85 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// Author: sesse@google.com (Steinar H. Gunderson) +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Various type stubs for the open-source version of Snappy. +// +// This file cannot include config.h, as it is included from snappy.h, +// which is a public header. Instead, snappy-stubs-public.h is generated by +// from snappy-stubs-public.h.in at configure time. + +#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ +#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ + +#if !defined(_WIN32) +#include <stdint.h> +#endif + +#if 1 +#include <stddef.h> +#endif + +#define SNAPPY_MAJOR 1 +#define SNAPPY_MINOR 0 +#define SNAPPY_PATCHLEVEL 3 +#define SNAPPY_VERSION \ + ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL) + +#include <string> + +namespace snappy { + +#if !defined(_WIN32) +typedef int8_t int8; +typedef uint8_t uint8; +typedef int16_t int16; +typedef uint16_t uint16; +typedef int32_t int32; +typedef uint32_t uint32; +typedef int64_t int64; +typedef uint64_t uint64; +#else +typedef signed char int8; +typedef unsigned char uint8; +typedef short int16; +typedef unsigned short uint16; +typedef int int32; +typedef unsigned int uint32; +typedef long long int64; +typedef unsigned long long uint64; +#endif + +typedef std::string string; + +#define DISALLOW_COPY_AND_ASSIGN(TypeName) \ + TypeName(const TypeName&); \ + void operator=(const TypeName&) + +} // namespace snappy + +#endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_ diff --git a/third_party/snappy/snappy.cc b/third_party/snappy/snappy.cc new file mode 100755 index 00000000000..fdc67e886c6 --- /dev/null +++ b/third_party/snappy/snappy.cc @@ -0,0 +1,1026 @@ +// Copyright 2005 Google Inc. All Rights Reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "snappy.h" +#include "snappy-internal.h" +#include "snappy-sinksource.h" + +#include <stdio.h> + +#include <algorithm> +#include <string> +#include <vector> + + +namespace snappy { + +// Any hash function will produce a valid compressed bitstream, but a good +// hash function reduces the number of collisions and thus yields better +// compression for compressible input, and more speed for incompressible +// input. Of course, it doesn't hurt if the hash function is reasonably fast +// either, as it gets called a lot. +static inline uint32 HashBytes(uint32 bytes, int shift) { + uint32 kMul = 0x1e35a7bd; + return (bytes * kMul) >> shift; +} +static inline uint32 Hash(const char* p, int shift) { + return HashBytes(UNALIGNED_LOAD32(p), shift); +} + +size_t MaxCompressedLength(size_t source_len) { + // Compressed data can be defined as: + // compressed := item* literal* + // item := literal* copy + // + // The trailing literal sequence has a space blowup of at most 62/60 + // since a literal of length 60 needs one tag byte + one extra byte + // for length information. + // + // Item blowup is trickier to measure. Suppose the "copy" op copies + // 4 bytes of data. Because of a special check in the encoding code, + // we produce a 4-byte copy only if the offset is < 65536. Therefore + // the copy op takes 3 bytes to encode, and this type of item leads + // to at most the 62/60 blowup for representing literals. + // + // Suppose the "copy" op copies 5 bytes of data. If the offset is big + // enough, it will take 5 bytes to encode the copy op. Therefore the + // worst case here is a one-byte literal followed by a five-byte copy. + // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. + // + // This last factor dominates the blowup, so the final estimate is: + return 32 + source_len + source_len/6; +} + +enum { + LITERAL = 0, + COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode + COPY_2_BYTE_OFFSET = 2, + COPY_4_BYTE_OFFSET = 3 +}; + +// Copy "len" bytes from "src" to "op", one byte at a time. Used for +// handling COPY operations where the input and output regions may +// overlap. For example, suppose: +// src == "ab" +// op == src + 2 +// len == 20 +// After IncrementalCopy(src, op, len), the result will have +// eleven copies of "ab" +// ababababababababababab +// Note that this does not match the semantics of either memcpy() +// or memmove(). +static inline void IncrementalCopy(const char* src, char* op, int len) { + DCHECK_GT(len, 0); + do { + *op++ = *src++; + } while (--len > 0); +} + +// Equivalent to IncrementalCopy except that it can write up to ten extra +// bytes after the end of the copy, and that it is faster. +// +// The main part of this loop is a simple copy of eight bytes at a time until +// we've copied (at least) the requested amount of bytes. However, if op and +// src are less than eight bytes apart (indicating a repeating pattern of +// length < 8), we first need to expand the pattern in order to get the correct +// results. For instance, if the buffer looks like this, with the eight-byte +// <src> and <op> patterns marked as intervals: +// +// abxxxxxxxxxxxx +// [------] src +// [------] op +// +// a single eight-byte copy from <src> to <op> will repeat the pattern once, +// after which we can move <op> two bytes without moving <src>: +// +// ababxxxxxxxxxx +// [------] src +// [------] op +// +// and repeat the exercise until the two no longer overlap. +// +// This allows us to do very well in the special case of one single byte +// repeated many times, without taking a big hit for more general cases. +// +// The worst case of extra writing past the end of the match occurs when +// op - src == 1 and len == 1; the last copy will read from byte positions +// [0..7] and write to [4..11], whereas it was only supposed to write to +// position 1. Thus, ten excess bytes. + +namespace { + +const int kMaxIncrementCopyOverflow = 10; + +} // namespace + +static inline void IncrementalCopyFastPath(const char* src, char* op, int len) { + while (op - src < 8) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + len -= op - src; + op += op - src; + } + while (len > 0) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src)); + src += 8; + op += 8; + len -= 8; + } +} + +static inline char* EmitLiteral(char* op, + const char* literal, + int len, + bool allow_fast_path) { + int n = len - 1; // Zero-length literals are disallowed + if (n < 60) { + // Fits in tag byte + *op++ = LITERAL | (n << 2); + + // The vast majority of copies are below 16 bytes, for which a + // call to memcpy is overkill. This fast path can sometimes + // copy up to 15 bytes too much, but that is okay in the + // main loop, since we have a bit to go on for both sides: + // + // - The input will always have kInputMarginBytes = 15 extra + // available bytes, as long as we're in the main loop, and + // if not, allow_fast_path = false. + // - The output will always have 32 spare bytes (see + // MaxCompressedLength). + if (allow_fast_path && len <= 16) { + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(literal + 8)); + return op + len; + } + } else { + // Encode in upcoming bytes + char* base = op; + int count = 0; + op++; + while (n > 0) { + *op++ = n & 0xff; + n >>= 8; + count++; + } + assert(count >= 1); + assert(count <= 4); + *base = LITERAL | ((59+count) << 2); + } + memcpy(op, literal, len); + return op + len; +} + +static inline char* EmitCopyLessThan64(char* op, int offset, int len) { + DCHECK_LE(len, 64); + DCHECK_GE(len, 4); + DCHECK_LT(offset, 65536); + + if ((len < 12) && (offset < 2048)) { + int len_minus_4 = len - 4; + assert(len_minus_4 < 8); // Must fit in 3 bits + *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5); + *op++ = offset & 0xff; + } else { + *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2); + LittleEndian::Store16(op, offset); + op += 2; + } + return op; +} + +static inline char* EmitCopy(char* op, int offset, int len) { + // Emit 64 byte copies but make sure to keep at least four bytes reserved + while (len >= 68) { + op = EmitCopyLessThan64(op, offset, 64); + len -= 64; + } + + // Emit an extra 60 byte copy if have too much data to fit in one copy + if (len > 64) { + op = EmitCopyLessThan64(op, offset, 60); + len -= 60; + } + + // Emit remainder + op = EmitCopyLessThan64(op, offset, len); + return op; +} + + +bool GetUncompressedLength(const char* start, size_t n, size_t* result) { + uint32 v = 0; + const char* limit = start + n; + if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { + *result = v; + return true; + } else { + return false; + } +} + +namespace internal { +uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { + // Use smaller hash table when input.size() is smaller, since we + // fill the table, incurring O(hash table size) overhead for + // compression, and if the input is short, we won't need that + // many hash table entries anyway. + assert(kMaxHashTableSize >= 256); + int htsize = 256; + while (htsize < kMaxHashTableSize && htsize < input_size) { + htsize <<= 1; + } + CHECK_EQ(0, htsize & (htsize - 1)) << ": must be power of two"; + CHECK_LE(htsize, kMaxHashTableSize) << ": hash table too large"; + + uint16* table; + if (htsize <= ARRAYSIZE(small_table_)) { + table = small_table_; + } else { + if (large_table_ == NULL) { + large_table_ = new uint16[kMaxHashTableSize]; + } + table = large_table_; + } + + *table_size = htsize; + memset(table, 0, htsize * sizeof(*table)); + return table; +} +} // end namespace internal + +#if defined(_WIN32) +// signed/unsigned mismatch +# pragma warning( disable : 4244 ) +#endif + +// For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will +// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have +// empirically found that overlapping loads such as +// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) +// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. +static inline uint32 GetUint32AtOffset(uint64 v, int offset) { + DCHECK(0 <= offset && offset <= 4) << offset; + return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); +} + +// Flat array compression that does not emit the "uncompressed length" +// prefix. Compresses "input" string to the "*op" buffer. +// +// REQUIRES: "input" is at most "kBlockSize" bytes long. +// REQUIRES: "op" points to an array of memory that is at least +// "MaxCompressedLength(input.size())" in size. +// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. +// REQUIRES: "table_size" is a power of two +// +// Returns an "end" pointer into "op" buffer. +// "end - op" is the compressed size of "input". +namespace internal { +char* CompressFragment(const char* const input, + const size_t input_size, + char* op, + uint16* table, + const int table_size) { + // "ip" is the input pointer, and "op" is the output pointer. + const char* ip = input; + CHECK_LE(input_size, kBlockSize); + CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two"; + const int shift = 32 - Bits::Log2Floor(table_size); + DCHECK_EQ(kuint32max >> shift, table_size - 1); + const char* ip_end = input + input_size; + const char* base_ip = ip; + // Bytes in [next_emit, ip) will be emitted as literal bytes. Or + // [next_emit, ip_end) after the main loop. + const char* next_emit = ip; + + const int kInputMarginBytes = 15; + if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { + const char* ip_limit = input + input_size - kInputMarginBytes; + + for (uint32 next_hash = Hash(++ip, shift); ; ) { + DCHECK_LT(next_emit, ip); + // The body of this loop calls EmitLiteral once and then EmitCopy one or + // more times. (The exception is that when we're close to exhausting + // the input we goto emit_remainder.) + // + // In the first iteration of this loop we're just starting, so + // there's nothing to copy, so calling EmitLiteral once is + // necessary. And we only start a new iteration when the + // current iteration has determined that a call to EmitLiteral will + // precede the next call to EmitCopy (if any). + // + // Step 1: Scan forward in the input looking for a 4-byte-long match. + // If we get close to exhausting the input then goto emit_remainder. + // + // Heuristic match skipping: If 32 bytes are scanned with no matches + // found, start looking only at every other byte. If 32 more bytes are + // scanned, look at every third byte, etc.. When a match is found, + // immediately go back to looking at every byte. This is a small loss + // (~5% performance, ~0.1% density) for compressible data due to more + // bookkeeping, but for non-compressible data (such as JPEG) it's a huge + // win since the compressor quickly "realizes" the data is incompressible + // and doesn't bother looking for matches everywhere. + // + // The "skip" variable keeps track of how many bytes there are since the + // last match; dividing it by 32 (ie. right-shifting by five) gives the + // number of bytes to move ahead for each iteration. + uint32 skip = 32; + + const char* next_ip = ip; + const char* candidate; + do { + ip = next_ip; + uint32 hash = next_hash; + DCHECK_EQ(hash, Hash(ip, shift)); + uint32 bytes_between_hash_lookups = skip++ >> 5; + next_ip = ip + bytes_between_hash_lookups; + if (PREDICT_FALSE(next_ip > ip_limit)) { + goto emit_remainder; + } + next_hash = Hash(next_ip, shift); + candidate = base_ip + table[hash]; + DCHECK_GE(candidate, base_ip); + DCHECK_LT(candidate, ip); + + table[hash] = ip - base_ip; + } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) != + UNALIGNED_LOAD32(candidate))); + + // Step 2: A 4-byte match has been found. We'll later see if more + // than 4 bytes match. But, prior to the match, input + // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." + DCHECK_LE(next_emit + 16, ip_end); + op = EmitLiteral(op, next_emit, ip - next_emit, true); + + // Step 3: Call EmitCopy, and then see if another EmitCopy could + // be our next move. Repeat until we find no match for the + // input immediately after what was consumed by the last EmitCopy call. + // + // If we exit this loop normally then we need to call EmitLiteral next, + // though we don't yet know how big the literal will be. We handle that + // by proceeding to the next iteration of the main loop. We also can exit + // this loop via goto if we get close to exhausting the input. + uint64 input_bytes = 0; + uint32 candidate_bytes = 0; + + do { + // We have a 4-byte match at ip, and no need to emit any + // "literal bytes" prior to ip. + const char* base = ip; + int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); + ip += matched; + int offset = base - candidate; + DCHECK_EQ(0, memcmp(base, candidate, matched)); + op = EmitCopy(op, offset, matched); + // We could immediately start working at ip now, but to improve + // compression we first update table[Hash(ip - 1, ...)]. + const char* insert_tail = ip - 1; + next_emit = ip; + if (PREDICT_FALSE(ip >= ip_limit)) { + goto emit_remainder; + } + input_bytes = UNALIGNED_LOAD64(insert_tail); + uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); + table[prev_hash] = ip - base_ip - 1; + uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); + candidate = base_ip + table[cur_hash]; + candidate_bytes = UNALIGNED_LOAD32(candidate); + table[cur_hash] = ip - base_ip; + } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); + + next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); + ++ip; + } + } + + emit_remainder: + // Emit the remaining bytes as a literal + if (next_emit < ip_end) { + op = EmitLiteral(op, next_emit, ip_end - next_emit, false); + } + + return op; +} +} // end namespace internal + +// Signature of output types needed by decompression code. +// The decompression code is templatized on a type that obeys this +// signature so that we do not pay virtual function call overhead in +// the middle of a tight decompression loop. +// +// class DecompressionWriter { +// public: +// // Called before decompression +// void SetExpectedLength(size_t length); +// +// // Called after decompression +// bool CheckLength() const; +// +// // Called repeatedly during decompression +// bool Append(const char* ip, uint32 length, bool allow_fast_path); +// bool AppendFromSelf(uint32 offset, uint32 length); +// }; +// +// "allow_fast_path" is a parameter that says if there is at least 16 +// readable bytes in "ip". It is currently only used by SnappyArrayWriter. + +// ----------------------------------------------------------------------- +// Lookup table for decompression code. Generated by ComputeTable() below. +// ----------------------------------------------------------------------- + +// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits +static const uint32 wordmask[] = { + 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu +}; + +// Data stored per entry in lookup table: +// Range Bits-used Description +// ------------------------------------ +// 1..64 0..7 Literal/copy length encoded in opcode byte +// 0..7 8..10 Copy offset encoded in opcode byte / 256 +// 0..4 11..13 Extra bytes after opcode +// +// We use eight bits for the length even though 7 would have sufficed +// because of efficiency reasons: +// (1) Extracting a byte is faster than a bit-field +// (2) It properly aligns copy offset so we do not need a <<8 +static const uint16 char_table[256] = { + 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, + 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, + 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, + 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, + 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, + 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, + 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, + 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, + 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, + 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, + 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, + 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, + 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, + 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, + 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, + 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, + 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, + 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, + 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, + 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, + 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, + 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, + 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, + 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, + 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, + 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, + 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, + 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, + 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, + 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, + 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, + 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 +}; + +// In debug mode, allow optional computation of the table at startup. +// Also, check that the decompression table is correct. +#ifndef NDEBUG +DEFINE_bool(snappy_dump_decompression_table, false, + "If true, we print the decompression table at startup."); + +static uint16 MakeEntry(unsigned int extra, + unsigned int len, + unsigned int copy_offset) { + // Check that all of the fields fit within the allocated space + DCHECK_EQ(extra, extra & 0x7); // At most 3 bits + DCHECK_EQ(copy_offset, copy_offset & 0x7); // At most 3 bits + DCHECK_EQ(len, len & 0x7f); // At most 7 bits + return len | (copy_offset << 8) | (extra << 11); +} + +static void ComputeTable() { + uint16 dst[256]; + + // Place invalid entries in all places to detect missing initialization + int assigned = 0; + for (int i = 0; i < 256; i++) { + dst[i] = 0xffff; + } + + // Small LITERAL entries. We store (len-1) in the top 6 bits. + for (unsigned int len = 1; len <= 60; len++) { + dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); + assigned++; + } + + // Large LITERAL entries. We use 60..63 in the high 6 bits to + // encode the number of bytes of length info that follow the opcode. + for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { + // We set the length field in the lookup table to 1 because extra + // bytes encode len-1. + dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); + assigned++; + } + + // COPY_1_BYTE_OFFSET. + // + // The tag byte in the compressed data stores len-4 in 3 bits, and + // offset/256 in 5 bits. offset%256 is stored in the next byte. + // + // This format is used for length in range [4..11] and offset in + // range [0..2047] + for (unsigned int len = 4; len < 12; len++) { + for (unsigned int offset = 0; offset < 2048; offset += 256) { + dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = + MakeEntry(1, len, offset>>8); + assigned++; + } + } + + // COPY_2_BYTE_OFFSET. + // Tag contains len-1 in top 6 bits, and offset in next two bytes. + for (unsigned int len = 1; len <= 64; len++) { + dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); + assigned++; + } + + // COPY_4_BYTE_OFFSET. + // Tag contents len-1 in top 6 bits, and offset in next four bytes. + for (unsigned int len = 1; len <= 64; len++) { + dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); + assigned++; + } + + // Check that each entry was initialized exactly once. + CHECK_EQ(assigned, 256); + for (int i = 0; i < 256; i++) { + CHECK_NE(dst[i], 0xffff); + } + + if (FLAGS_snappy_dump_decompression_table) { + printf("static const uint16 char_table[256] = {\n "); + for (int i = 0; i < 256; i++) { + printf("0x%04x%s", + dst[i], + ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); + } + printf("};\n"); + } + + // Check that computed table matched recorded table + for (int i = 0; i < 256; i++) { + CHECK_EQ(dst[i], char_table[i]); + } +} +REGISTER_MODULE_INITIALIZER(snappy, ComputeTable()); +#endif /* !NDEBUG */ + +// Helper class for decompression +class SnappyDecompressor { + private: + Source* reader_; // Underlying source of bytes to decompress + const char* ip_; // Points to next buffered byte + const char* ip_limit_; // Points just past buffered bytes + uint32 peeked_; // Bytes peeked from reader (need to skip) + bool eof_; // Hit end of input without an error? + char scratch_[5]; // Temporary buffer for PeekFast() boundaries + + // Ensure that all of the tag metadata for the next tag is available + // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even + // if (ip_limit_ - ip_ < 5). + // + // Returns true on success, false on error or end of input. + bool RefillTag(); + + public: + explicit SnappyDecompressor(Source* reader) + : reader_(reader), + ip_(NULL), + ip_limit_(NULL), + peeked_(0), + eof_(false) { + } + + ~SnappyDecompressor() { + // Advance past any bytes we peeked at from the reader + reader_->Skip(peeked_); + } + + // Returns true iff we have hit the end of the input without an error. + bool eof() const { + return eof_; + } + + // Read the uncompressed length stored at the start of the compressed data. + // On succcess, stores the length in *result and returns true. + // On failure, returns false. + bool ReadUncompressedLength(uint32* result) { + DCHECK(ip_ == NULL); // Must not have read anything yet + // Length is encoded in 1..5 bytes + *result = 0; + uint32 shift = 0; + while (true) { + if (shift >= 32) return false; + size_t n; + const char* ip = reader_->Peek(&n); + if (n == 0) return false; + const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); + reader_->Skip(1); + *result |= static_cast<uint32>(c & 0x7f) << shift; + if (c < 128) { + break; + } + shift += 7; + } + return true; + } + + // Process the next item found in the input. + // Returns true if successful, false on error or end of input. + template <class Writer> + void DecompressAllTags(Writer* writer) { + const char* ip = ip_; + for ( ;; ) { + if (ip_limit_ - ip < 5) { + ip_ = ip; + if (!RefillTag()) return; + ip = ip_; + } + + const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); + const uint32 entry = char_table[c]; + const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; + ip += entry >> 11; + const uint32 length = entry & 0xff; + + if ((c & 0x3) == LITERAL) { + uint32 literal_length = length + trailer; + uint32 avail = ip_limit_ - ip; + while (avail < literal_length) { + bool allow_fast_path = (avail >= 16); + if (!writer->Append(ip, avail, allow_fast_path)) return; + literal_length -= avail; + reader_->Skip(peeked_); + size_t n; + ip = reader_->Peek(&n); + avail = n; + peeked_ = avail; + if (avail == 0) return; // Premature end of input + ip_limit_ = ip + avail; + } + bool allow_fast_path = (avail >= 16); + if (!writer->Append(ip, literal_length, allow_fast_path)) { + return; + } + ip += literal_length; + } else { + // copy_offset/256 is encoded in bits 8..10. By just fetching + // those bits, we get copy_offset (since the bit-field starts at + // bit 8). + const uint32 copy_offset = entry & 0x700; + if (!writer->AppendFromSelf(copy_offset + trailer, length)) { + return; + } + } + } + } +}; + +bool SnappyDecompressor::RefillTag() { + const char* ip = ip_; + if (ip == ip_limit_) { + // Fetch a new fragment from the reader + reader_->Skip(peeked_); // All peeked bytes are used up + size_t n; + ip = reader_->Peek(&n); + peeked_ = n; + if (n == 0) { + eof_ = true; + return false; + } + ip_limit_ = ip + n; + } + + // Read the tag character + DCHECK_LT(ip, ip_limit_); + const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); + const uint32 entry = char_table[c]; + const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' + DCHECK_LE(needed, sizeof(scratch_)); + + // Read more bytes from reader if needed + uint32 nbuf = ip_limit_ - ip; + if (nbuf < needed) { + // Stitch together bytes from ip and reader to form the word + // contents. We store the needed bytes in "scratch_". They + // will be consumed immediately by the caller since we do not + // read more than we need. + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + while (nbuf < needed) { + size_t length; + const char* src = reader_->Peek(&length); + if (length == 0) return false; + uint32 to_add = min<uint32>(needed - nbuf, length); + memcpy(scratch_ + nbuf, src, to_add); + nbuf += to_add; + reader_->Skip(to_add); + } + DCHECK_EQ(nbuf, needed); + ip_ = scratch_; + ip_limit_ = scratch_ + needed; + } else if (nbuf < 5) { + // Have enough bytes, but move into scratch_ so that we do not + // read past end of input + memmove(scratch_, ip, nbuf); + reader_->Skip(peeked_); // All peeked bytes are used up + peeked_ = 0; + ip_ = scratch_; + ip_limit_ = scratch_ + nbuf; + } else { + // Pass pointer to buffer returned by reader_. + ip_ = ip; + } + return true; +} + +template <typename Writer> +static bool InternalUncompress(Source* r, + Writer* writer, + uint32 max_len) { + // Read the uncompressed length from the front of the compressed input + SnappyDecompressor decompressor(r); + uint32 uncompressed_len = 0; + if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; + // Protect against possible DoS attack + if (static_cast<uint64>(uncompressed_len) > max_len) { + return false; + } + + writer->SetExpectedLength(uncompressed_len); + + // Process the entire input + decompressor.DecompressAllTags(writer); + return (decompressor.eof() && writer->CheckLength()); +} + +bool GetUncompressedLength(Source* source, uint32* result) { + SnappyDecompressor decompressor(source); + return decompressor.ReadUncompressedLength(result); +} + +size_t Compress(Source* reader, Sink* writer) { + size_t written = 0; + int N = reader->Available(); + char ulength[Varint::kMax32]; + char* p = Varint::Encode32(ulength, N); + writer->Append(ulength, p-ulength); + written += (p - ulength); + + internal::WorkingMemory wmem; + char* scratch = NULL; + char* scratch_output = NULL; + + while (N > 0) { + // Get next block to compress (without copying if possible) + size_t fragment_size; + const char* fragment = reader->Peek(&fragment_size); + DCHECK_NE(fragment_size, 0) << ": premature end of input"; + const int num_to_read = min(N, kBlockSize); + size_t bytes_read = fragment_size; + + int pending_advance = 0; + if (bytes_read >= num_to_read) { + // Buffer returned by reader is large enough + pending_advance = num_to_read; + fragment_size = num_to_read; + } else { + // Read into scratch buffer + if (scratch == NULL) { + // If this is the last iteration, we want to allocate N bytes + // of space, otherwise the max possible kBlockSize space. + // num_to_read contains exactly the correct value + scratch = new char[num_to_read]; + } + memcpy(scratch, fragment, bytes_read); + reader->Skip(bytes_read); + + while (bytes_read < num_to_read) { + fragment = reader->Peek(&fragment_size); + size_t n = min<size_t>(fragment_size, num_to_read - bytes_read); + memcpy(scratch + bytes_read, fragment, n); + bytes_read += n; + reader->Skip(n); + } + DCHECK_EQ(bytes_read, num_to_read); + fragment = scratch; + fragment_size = num_to_read; + } + DCHECK_EQ(fragment_size, num_to_read); + + // Get encoding table for compression + int table_size; + uint16* table = wmem.GetHashTable(num_to_read, &table_size); + + // Compress input_fragment and append to dest + const int max_output = MaxCompressedLength(num_to_read); + + // Need a scratch buffer for the output, in case the byte sink doesn't + // have room for us directly. + if (scratch_output == NULL) { + scratch_output = new char[max_output]; + } else { + // Since we encode kBlockSize regions followed by a region + // which is <= kBlockSize in length, a previously allocated + // scratch_output[] region is big enough for this iteration. + } + char* dest = writer->GetAppendBuffer(max_output, scratch_output); + char* end = internal::CompressFragment(fragment, fragment_size, + dest, table, table_size); + writer->Append(dest, end - dest); + written += (end - dest); + + N -= num_to_read; + reader->Skip(pending_advance); + } + + delete[] scratch; + delete[] scratch_output; + + return written; +} + +// ----------------------------------------------------------------------- +// Flat array interfaces +// ----------------------------------------------------------------------- + +// A type that writes to a flat array. +// Note that this is not a "ByteSink", but a type that matches the +// Writer template argument to SnappyDecompressor::DecompressAllTags(). +class SnappyArrayWriter { + private: + char* base_; + char* op_; + char* op_limit_; + + public: + inline explicit SnappyArrayWriter(char* dst) + : base_(dst), + op_(dst) { + } + + inline void SetExpectedLength(size_t len) { + op_limit_ = op_ + len; + } + + inline bool CheckLength() const { + return op_ == op_limit_; + } + + inline bool Append(const char* ip, uint32 len, bool allow_fast_path) { + char* op = op_; + const int space_left = op_limit_ - op; + if (allow_fast_path && len <= 16 && space_left >= 16) { + // Fast path, used for the majority (about 90%) of dynamic invocations. + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8)); + } else { + if (space_left < len) { + return false; + } + memcpy(op, ip, len); + } + op_ = op + len; + return true; + } + + inline bool AppendFromSelf(uint32 offset, uint32 len) { + char* op = op_; + const int space_left = op_limit_ - op; + + if (op - base_ <= offset - 1u) { // -1u catches offset==0 + return false; + } + if (len <= 16 && offset >= 8 && space_left >= 16) { + // Fast path, used for the majority (70-80%) of dynamic invocations. + UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset)); + UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8)); + } else { + if (space_left >= len + kMaxIncrementCopyOverflow) { + IncrementalCopyFastPath(op - offset, op, len); + } else { + if (space_left < len) { + return false; + } + IncrementalCopy(op - offset, op, len); + } + } + + op_ = op + len; + return true; + } +}; + +bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { + ByteArraySource reader(compressed, n); + return RawUncompress(&reader, uncompressed); +} + +bool RawUncompress(Source* compressed, char* uncompressed) { + SnappyArrayWriter output(uncompressed); + return InternalUncompress(compressed, &output, kuint32max); +} + +bool Uncompress(const char* compressed, size_t n, string* uncompressed) { + size_t ulength; + if (!GetUncompressedLength(compressed, n, &ulength)) { + return false; + } + // Protect against possible DoS attack + if ((static_cast<uint64>(ulength) + uncompressed->size()) > + uncompressed->max_size()) { + return false; + } + STLStringResizeUninitialized(uncompressed, ulength); + return RawUncompress(compressed, n, string_as_array(uncompressed)); +} + + +// A Writer that drops everything on the floor and just does validation +class SnappyDecompressionValidator { + private: + size_t expected_; + size_t produced_; + + public: + inline SnappyDecompressionValidator() : produced_(0) { } + inline void SetExpectedLength(size_t len) { + expected_ = len; + } + inline bool CheckLength() const { + return expected_ == produced_; + } + inline bool Append(const char* ip, uint32 len, bool allow_fast_path) { + produced_ += len; + return produced_ <= expected_; + } + inline bool AppendFromSelf(uint32 offset, uint32 len) { + if (produced_ <= offset - 1u) return false; // -1u catches offset==0 + produced_ += len; + return produced_ <= expected_; + } +}; + +bool IsValidCompressedBuffer(const char* compressed, size_t n) { + ByteArraySource reader(compressed, n); + SnappyDecompressionValidator writer; + return InternalUncompress(&reader, &writer, kuint32max); +} + +void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length) { + ByteArraySource reader(input, input_length); + UncheckedByteArraySink writer(compressed); + Compress(&reader, &writer); + + // Compute how many bytes were added + *compressed_length = (writer.CurrentDestination() - compressed); +} + +size_t Compress(const char* input, size_t input_length, string* compressed) { + // Pre-grow the buffer to the max length of the compressed output + compressed->resize(MaxCompressedLength(input_length)); + + size_t compressed_length; + RawCompress(input, input_length, string_as_array(compressed), + &compressed_length); + compressed->resize(compressed_length); + return compressed_length; +} + + +} // end namespace snappy + diff --git a/third_party/snappy/snappy.h b/third_party/snappy/snappy.h new file mode 100755 index 00000000000..8d6ef2294f5 --- /dev/null +++ b/third_party/snappy/snappy.h @@ -0,0 +1,155 @@ +// Copyright 2005 and onwards Google Inc. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// A light-weight compression algorithm. It is designed for speed of +// compression and decompression, rather than for the utmost in space +// savings. +// +// For getting better compression ratios when you are compressing data +// with long repeated sequences or compressing data that is similar to +// other data, while still compressing fast, you might look at first +// using BMDiff and then compressing the output of BMDiff with +// Snappy. + +#ifndef UTIL_SNAPPY_SNAPPY_H__ +#define UTIL_SNAPPY_SNAPPY_H__ + +#include <stddef.h> +#include <string> + +#include "snappy-stubs-public.h" + +namespace snappy { + class Source; + class Sink; + + // ------------------------------------------------------------------------ + // Generic compression/decompression routines. + // ------------------------------------------------------------------------ + + // Compress the bytes read from "*source" and append to "*sink". Return the + // number of bytes written. + size_t Compress(Source* source, Sink* sink); + + bool GetUncompressedLength(Source* source, uint32* result); + + // ------------------------------------------------------------------------ + // Higher-level string based routines (should be sufficient for most users) + // ------------------------------------------------------------------------ + + // Sets "*output" to the compressed version of "input[0,input_length-1]". + // Original contents of *output are lost. + // + // REQUIRES: "input[]" is not an alias of "*output". + size_t Compress(const char* input, size_t input_length, string* output); + + // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed". + // Original contents of "*uncompressed" are lost. + // + // REQUIRES: "compressed[]" is not an alias of "*uncompressed". + // + // returns false if the message is corrupted and could not be decompressed + bool Uncompress(const char* compressed, size_t compressed_length, + string* uncompressed); + + + // ------------------------------------------------------------------------ + // Lower-level character array based routines. May be useful for + // efficiency reasons in certain circumstances. + // ------------------------------------------------------------------------ + + // REQUIRES: "compressed" must point to an area of memory that is at + // least "MaxCompressedLength(input_length)" bytes in length. + // + // Takes the data stored in "input[0..input_length]" and stores + // it in the array pointed to by "compressed". + // + // "*compressed_length" is set to the length of the compressed output. + // + // Example: + // char* output = new char[snappy::MaxCompressedLength(input_length)]; + // size_t output_length; + // RawCompress(input, input_length, output, &output_length); + // ... Process(output, output_length) ... + // delete [] output; + void RawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + + // Given data in "compressed[0..compressed_length-1]" generated by + // calling the Snappy::Compress routine, this routine + // stores the uncompressed data to + // uncompressed[0..GetUncompressedLength(compressed)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(const char* compressed, size_t compressed_length, + char* uncompressed); + + // Given data from the byte source 'compressed' generated by calling + // the Snappy::Compress routine, this routine stores the uncompressed + // data to + // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1] + // returns false if the message is corrupted and could not be decrypted + bool RawUncompress(Source* compressed, char* uncompressed); + + // Returns the maximal size of the compressed representation of + // input data that is "source_bytes" bytes in length; + size_t MaxCompressedLength(size_t source_bytes); + + // REQUIRES: "compressed[]" was produced by RawCompress() or Compress() + // Returns true and stores the length of the uncompressed data in + // *result normally. Returns false on parsing error. + // This operation takes O(1) time. + bool GetUncompressedLength(const char* compressed, size_t compressed_length, + size_t* result); + + // Returns true iff the contents of "compressed[]" can be uncompressed + // successfully. Does not return the uncompressed data. Takes + // time proportional to compressed_length, but is usually at least + // a factor of four faster than actual decompression. + bool IsValidCompressedBuffer(const char* compressed, + size_t compressed_length); + + // *** DO NOT CHANGE THE VALUE OF kBlockSize *** + // + // New Compression code chops up the input into blocks of at most + // the following size. This ensures that back-references in the + // output never cross kBlockSize block boundaries. This can be + // helpful in implementing blocked decompression. However the + // decompression code should not rely on this guarantee since older + // compression code may not obey it. + static const int kBlockLog = 15; + static const int kBlockSize = 1 << kBlockLog; + + static const int kMaxHashTableBits = 14; + static const int kMaxHashTableSize = 1 << kMaxHashTableBits; + +} // end namespace snappy + + +#endif // UTIL_SNAPPY_SNAPPY_H__ diff --git a/tools/bridge.cpp b/tools/bridge.cpp index f7518a17ad4..341a1dae687 100644 --- a/tools/bridge.cpp +++ b/tools/bridge.cpp @@ -88,7 +88,7 @@ set<MessagingPort*> ports; class MyListener : public Listener { public: - MyListener( int port ) : Listener( "", port ) {} + MyListener( int port ) : Listener( "bridge" , "", port ) {} virtual void accepted(MessagingPort *mp) { ports.insert( mp ); Forwarder f( *mp ); diff --git a/tools/export.cpp b/tools/export.cpp index fb32a9e58ff..c3a5420438d 100644 --- a/tools/export.cpp +++ b/tools/export.cpp @@ -45,6 +45,73 @@ public: _usesstdout = false; } + // Turn every double quote character into two double quote characters + // If hasSurroundingQuotes is true, doesn't escape the first and last + // characters of the string, if it's false, add a double quote character + // around the whole string. + string csvEscape(string str, bool hasSurroundingQuotes = false) { + size_t index = hasSurroundingQuotes ? 1 : 0; + while (((index = str.find('"', index)) != string::npos) + && (index < (hasSurroundingQuotes ? str.size() - 1 : str.size()))) { + str.replace(index, 1, "\"\""); + index += 2; + } + return hasSurroundingQuotes ? str : "\"" + str + "\""; + } + + // Gets the string representation of a BSON object that can be correctly written to a CSV file + string csvString (const BSONElement& object) { + const char* binData; // Only used with BinData type + + switch (object.type()) { + case MinKey: + return "$MinKey"; + case MaxKey: + return "$MaxKey"; + case NumberInt: + case NumberDouble: + case NumberLong: + case Bool: + return object.toString(false); + case String: + case Symbol: + return csvEscape(object.toString(false), true); + case Object: + return csvEscape(object.jsonString(Strict, false)); + case Array: + return csvEscape(object.jsonString(Strict, false)); + case BinData: + int len; + binData = object.binDataClean(len); + return toHex(binData, len); + case jstOID: + return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes + case Date: + return timeToISOString(object.Date() / 1000); + case Timestamp: + return csvEscape(object.jsonString(Strict, false)); + case RegEx: + return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags())); + case Code: + return csvEscape(object.toString(false)); + case CodeWScope: + if (string(object.codeWScopeScopeData()) == "") { + return csvEscape(object.toString(false)); + } else { + return csvEscape(object.jsonString(Strict, false)); + } + case EOO: + case Undefined: + case DBRef: + case jstNULL: + cerr << "Invalid BSON object type for CSV output: " << object.type() << endl; + return ""; + } + // Can never get here + assert(false); + return ""; + } + int run() { string ns; const bool csv = hasParam( "csv" ); @@ -137,7 +204,7 @@ public: out << ","; const BSONElement & e = obj.getFieldDotted(i->c_str()); if ( ! e.eoo() ) { - out << e.jsonString( Strict , false ); + out << csvString(e); } } out << endl; diff --git a/tools/import.cpp b/tools/import.cpp index c7a18b940ec..16980b05fbb 100644 --- a/tools/import.cpp +++ b/tools/import.cpp @@ -27,6 +27,7 @@ #include <iostream> #include <boost/program_options.hpp> +#include <boost/algorithm/string.hpp> using namespace mongo; @@ -44,100 +45,215 @@ class Import : public Tool { bool _doimport; bool _jsonArray; vector<string> _upsertFields; + static const int BUF_SIZE = 1024 * 1024 * 4; + + string trimWhitespace(const string& str) { + if (str.size() == 0) { + return str; + } + size_t begin = 0; + size_t end = str.size() - 1; + while (begin < str.size() && isspace(str[begin])) { ++begin; } // Finds index of first non-whitespace character + while (end > 0 && isspace(str[end])) { --end; } // Finds index of last non-whitespace character + return str.substr(begin, end - begin + 1); + } + + void csvTokenizeRow(const string& row, vector<string>& tokens) { + bool inQuotes = false; + bool prevWasQuote = false; + bool tokenQuoted = false; + string curtoken = ""; + for (string::const_iterator it = row.begin(); it != row.end(); ++it) { + char element = *it; + if (element == '"') { + if (!inQuotes) { + inQuotes = true; + tokenQuoted = true; + curtoken = ""; + } else { + if (prevWasQuote) { + curtoken += "\""; + prevWasQuote = false; + } else { + prevWasQuote = true; + } + } + } else { + if (inQuotes && prevWasQuote) { + inQuotes = false; + prevWasQuote = false; + tokens.push_back(curtoken); + } + + if (element == ',' && !inQuotes) { + if (!tokenQuoted) { // If token was quoted, it's already been added + tokens.push_back(trimWhitespace(curtoken)); + } + curtoken = ""; + tokenQuoted = false; + } else { + curtoken += element; + } + } + } + if (!tokenQuoted || (inQuotes && prevWasQuote)) { + tokens.push_back(trimWhitespace(curtoken)); + } + } void _append( BSONObjBuilder& b , const string& fieldName , const string& data ) { - if ( b.appendAsNumber( fieldName , data ) ) + if ( _ignoreBlanks && data.size() == 0 ) return; - if ( _ignoreBlanks && data.size() == 0 ) + if ( b.appendAsNumber( fieldName , data ) ) return; // TODO: other types? - b.append( fieldName , data ); + b.append ( fieldName , data ); + } + + /* + * Reads one line from in into buf. + * Returns the number of bytes that should be skipped - the caller should + * increment buf by this amount. + */ + int getLine(istream* in, char* buf) { + if (_jsonArray) { + in->read(buf, BUF_SIZE); + uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit)); + buf[ in->gcount() ] = '\0'; + } + else { + in->getline( buf , BUF_SIZE ); + log(1) << "got line:" << buf << endl; + } + uassert( 10263 , "unknown error reading file" , + (!(in->rdstate() & ios_base::badbit)) && + (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) ); + + int numBytesSkipped = 0; + if (strncmp("\xEF\xBB\xBF", buf, 3) == 0) { // UTF-8 BOM (notepad is stupid) + buf += 3; + numBytesSkipped += 3; + } + + uassert(13289, "Invalid UTF8 character detected", isValidUTF8(buf)); + return numBytesSkipped; } - BSONObj parseLine( char * line ) { - uassert(13289, "Invalid UTF8 character detected", isValidUTF8(line)); + /* + * Parses a BSON object out of a JSON array. + * Returns number of bytes processed on success and -1 on failure. + */ + int parseJSONArray(char* buf, BSONObj& o) { + int len = 0; + while (buf[0] != '{' && buf[0] != '\0') { + len++; + buf++; + } + if (buf[0] == '\0') + return -1; + + int jslen; + o = fromjson(buf, &jslen); + len += jslen; - if ( _type == JSON ) { + return len; + } + + /* + * Parses one object from the input file. This usually corresponds to one line in the input + * file, unless the file is a CSV and contains a newline within a quoted string entry. + * Returns a true if a BSONObj was successfully created and false if not. + */ + bool parseRow(istream* in, BSONObj& o, int& numBytesRead) { + boost::scoped_array<char> buffer(new char[BUF_SIZE+2]); + char* line = buffer.get(); + + numBytesRead = getLine(in, line); + line += numBytesRead; + + if (line[0] == '\0') { + return false; + } + numBytesRead += strlen( line ); + + if (_type == JSON) { + // Strip out trailing whitespace char * end = ( line + strlen( line ) ) - 1; - while ( isspace(*end) ) { + while ( end >= line && isspace(*end) ) { *end = 0; end--; } - return fromjson( line ); + o = fromjson( line ); + return true; } - BSONObjBuilder b; + vector<string> tokens; + if (_type == CSV) { + string row; + bool inside_quotes = false; + size_t last_quote = 0; + while (true) { + string lineStr(line); + // Deal with line breaks in quoted strings + last_quote = lineStr.find_first_of('"'); + while (last_quote != string::npos) { + inside_quotes = !inside_quotes; + last_quote = lineStr.find_first_of('"', last_quote+1); + } - unsigned int pos=0; - while ( line[0] ) { - string name; - if ( pos < _fields.size() ) { - name = _fields[pos]; + row.append(lineStr); + + if (inside_quotes) { + row.append("\n"); + int num = getLine(in, line); + line += num; + numBytesRead += num; + + uassert (15854, "CSV file ends while inside quoted field", line[0] != '\0'); + numBytesRead += strlen( line ); + } else { + break; + } } - else { - stringstream ss; - ss << "field" << pos; - name = ss.str(); + // now 'row' is string corresponding to one row of the CSV file + // (which may span multiple lines) and represents one BSONObj + csvTokenizeRow(row, tokens); + } + else { // _type == TSV + while (line[0] != '\t' && isspace(line[0])) { // Strip leading whitespace, but not tabs + line++; } - pos++; - - bool done = false; - string data; - char * end; - if ( _type == CSV && line[0] == '"' ) { - line++; //skip first '"' - - while (true) { - end = strchr( line , '"' ); - if (!end) { - data += line; - done = true; - break; - } - else if (end[1] == '"') { - // two '"'s get appended as one - data.append(line, end-line+1); //include '"' - line = end+2; //skip both '"'s - } - else if (end[-1] == '\\') { - // "\\\"" gets appended as '"' - data.append(line, end-line-1); //exclude '\\' - data.append("\""); - line = end+1; //skip the '"' - } - else { - data.append(line, end-line); - line = end+2; //skip '"' and ',' - break; - } - } + + boost::split(tokens, line, boost::is_any_of(_sep)); + } + + // Now that the row is tokenized, create a BSONObj out of it. + BSONObjBuilder b; + unsigned int pos=0; + for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it) { + string token = *it; + if ( _headerLine ) { + _fields.push_back(token); } else { - end = strstr( line , _sep ); - if ( ! end ) { - done = true; - data = string( line ); + string name; + if ( pos < _fields.size() ) { + name = _fields[pos]; } else { - data = string( line , end - line ); - line = end+1; + stringstream ss; + ss << "field" << pos; + name = ss.str(); } - } + pos++; - if ( _headerLine ) { - while ( isspace( data[0] ) ) - data = data.substr( 1 ); - _fields.push_back( data ); + _append( b , name , token ); } - else - _append( b , name , data ); - - if ( done ) - break; } - return b.obj(); + o = b.obj(); + return true; } public: @@ -255,68 +371,37 @@ public: _jsonArray = true; } - int errors = 0; - - int num = 0; - time_t start = time(0); - log(1) << "filesize: " << fileSize << endl; ProgressMeter pm( fileSize ); - const int BUF_SIZE = 1024 * 1024 * 4; - boost::scoped_array<char> line(new char[BUF_SIZE+2]); - char * buf = line.get(); - while ( _jsonArray || in->rdstate() == 0 ) { - if (_jsonArray) { - if (buf == line.get()) { //first pass - in->read(buf, BUF_SIZE); - uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit)); - buf[ in->gcount() ] = '\0'; - } - } - else { - buf = line.get(); - in->getline( buf , BUF_SIZE ); - log(1) << "got line:" << buf << endl; - } - uassert( 10263 , "unknown error reading file" , - (!(in->rdstate() & ios_base::badbit)) && - (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) ); - - int len = 0; - if (strncmp("\xEF\xBB\xBF", buf, 3) == 0) { // UTF-8 BOM (notepad is stupid) - buf += 3; - len += 3; - } - - if (_jsonArray) { - while (buf[0] != '{' && buf[0] != '\0') { - len++; - buf++; - } - if (buf[0] == '\0') - break; - } - else { - while ((_type != TSV || buf[0] != '\t') && isspace( buf[0] )) { - len++; - buf++; - } - if (buf[0] == '\0') - continue; - len += strlen( buf ); - } + int num = 0; + int errors = 0; + int len = 0; + // buffer and line are only used when parsing a jsonArray + boost::scoped_array<char> buffer(new char[BUF_SIZE+2]); + char* line = buffer.get(); + while ( _jsonArray || in->rdstate() == 0 ) { try { BSONObj o; if (_jsonArray) { - int jslen; - o = fromjson(buf, &jslen); - len += jslen; - buf += jslen; + int bytesProcessed = 0; + if (line == buffer.get()) { // Only read on first pass - the whole array must be on one line. + bytesProcessed = getLine(in, line); + line += bytesProcessed; + len += bytesProcessed; + } + if ((bytesProcessed = parseJSONArray(line, o)) < 0) { + len += bytesProcessed; + break; + } + len += bytesProcessed; + line += len; } else { - o = parseLine( buf ); + if (!parseRow(in, o, len)) { + continue; + } } if ( _headerLine ) { @@ -348,7 +433,7 @@ public: } catch ( std::exception& e ) { cout << "exception:" << e.what() << endl; - cout << buf << endl; + cout << line << endl; errors++; if (hasParam("stopOnError") || _jsonArray) diff --git a/tools/restore.cpp b/tools/restore.cpp index 3ff6a742d99..9adf90bd209 100644 --- a/tools/restore.cpp +++ b/tools/restore.cpp @@ -25,6 +25,7 @@ #include <boost/program_options.hpp> #include <fcntl.h> +#include <set> using namespace mongo; @@ -40,6 +41,7 @@ public: bool _drop; string _curns; string _curdb; + set<string> _users; // For restoring users with --drop Restore() : BSONTool( "restore" ) , _drop(false) { add_options() @@ -208,13 +210,31 @@ public: out() << "\t going into namespace [" << ns << "]" << endl; if ( _drop ) { - out() << "\t dropping" << endl; - conn().dropCollection( ns ); + if (root.leaf() != "system.users.bson" ) { + out() << "\t dropping" << endl; + conn().dropCollection( ns ); + } else { + // Create map of the users currently in the DB + BSONObj fields = BSON("user" << 1); + scoped_ptr<DBClientCursor> cursor(conn().query(ns, Query(), 0, 0, &fields)); + while (cursor->more()) { + BSONObj user = cursor->next(); + _users.insert(user["user"].String()); + } + } } _curns = ns.c_str(); _curdb = NamespaceString(_curns).db; processFile( root ); + if (_drop && root.leaf() == "system.users.bson") { + // Delete any users that used to exist but weren't in the dump file + for (set<string>::iterator it = _users.begin(); it != _users.end(); ++it) { + BSONObj userMatch = BSON("user" << *it); + conn().remove(ns, Query(userMatch)); + } + _users.clear(); + } } virtual void gotObject( const BSONObj& obj ) { @@ -260,7 +280,13 @@ public: ::abort(); } } - else { + else if (_drop && endsWith(_curns.c_str(), ".system.users") && _users.count(obj["user"].String())) { + // Since system collections can't be dropped, we have to manually + // replace the contents of the system.users collection + BSONObj userMatch = BSON("user" << obj["user"].String()); + conn().update(_curns, Query(userMatch), obj); + _users.erase(obj["user"].String()); + } else { conn().insert( _curns , obj ); } } diff --git a/tools/tool.cpp b/tools/tool.cpp index 98e18a9226a..d938e752041 100644 --- a/tools/tool.cpp +++ b/tools/tool.cpp @@ -380,8 +380,15 @@ namespace mongo { if ( ! dbname.size() ) dbname = _db; - if ( ! ( _username.size() || _password.size() ) ) + if ( ! ( _username.size() || _password.size() ) ) { + // Make sure that we don't need authentication to connect to this db + // findOne throws an AssertionException if it's not authenticated. + if (_coll.size() > 0) { + // BSONTools don't have a collection + conn().findOne(getNS(), Query("{}")); + } return; + } string errmsg; if ( _conn->auth( dbname , _username , _password , errmsg ) ) @@ -396,7 +403,7 @@ namespace mongo { } BSONTool::BSONTool( const char * name, DBAccess access , bool objcheck ) - : Tool( name , access , "" , "" ) , _objcheck( objcheck ) { + : Tool( name , access , "" , "" , false ) , _objcheck( objcheck ) { add_options() ("objcheck" , "validate object before inserting" ) @@ -489,9 +496,9 @@ namespace mongo { fclose( file ); uassert( 10265 , "counts don't match" , m.done() == fileLength ); - out() << "\t " << m.hits() << " objects found" << endl; + (_usesstdout ? cout : cerr ) << m.hits() << " objects found" << endl; if ( _matcher.get() ) - out() << "\t " << processed << " objects processed" << endl; + (_usesstdout ? cout : cerr ) << processed << " objects processed" << endl; return processed; } diff --git a/util/alignedbuilder.cpp b/util/alignedbuilder.cpp index 732ef99c764..b2e0461b733 100644 --- a/util/alignedbuilder.cpp +++ b/util/alignedbuilder.cpp @@ -32,9 +32,30 @@ namespace mongo { /** reset for a re-use. shrinks if > 128MB */ void AlignedBuilder::reset() { _len = 0; - const unsigned sizeCap = 128*1024*1024; - if (_p._size > sizeCap) - _realloc(sizeCap, _len); + RARELY { + const unsigned sizeCap = 128*1024*1024; + if (_p._size > sizeCap) + _realloc(sizeCap, _len); + } + } + + /** reset with a hint as to the upcoming needed size specified */ + void AlignedBuilder::reset(unsigned sz) { + _len = 0; + unsigned Q = 32 * 1024 * 1024 - 1; + unsigned want = (sz+Q) & (~Q); + if( _p._size == want ) { + return; + } + if( _p._size > want ) { + if( _p._size <= 64 * 1024 * 1024 ) + return; + bool downsize = false; + RARELY { downsize = true; } + if( !downsize ) + return; + } + _realloc(want, _len); } void AlignedBuilder::mallocSelfAligned(unsigned sz) { @@ -52,10 +73,16 @@ namespace mongo { /* "slow"/infrequent portion of 'grow()' */ void NOINLINE_DECL AlignedBuilder::growReallocate(unsigned oldLen) { + dassert( _len > _p._size ); unsigned a = _p._size; assert( a ); while( 1 ) { - a *= 2; + if( a < 128 * 1024 * 1024 ) + a *= 2; + else if( sizeof(int*) == 4 ) + a += 32 * 1024 * 1024; + else + a += 64 * 1024 * 1024; DEV if( a > 256*1024*1024 ) { log() << "dur AlignedBuilder too big, aborting in _DEBUG build" << endl; abort(); diff --git a/util/alignedbuilder.h b/util/alignedbuilder.h index 8760bfb9a44..1d246a9d78e 100644 --- a/util/alignedbuilder.h +++ b/util/alignedbuilder.h @@ -28,6 +28,9 @@ namespace mongo { AlignedBuilder(unsigned init_size); ~AlignedBuilder() { kill(); } + /** reset with a hint as to the upcoming needed size specified */ + void reset(unsigned sz); + /** reset for a re-use. shrinks if > 128MB */ void reset(); @@ -43,8 +46,12 @@ namespace mongo { return l; } + /** if buffer grows pointer no longer valid */ char* atOfs(unsigned ofs) { return _p._data + ofs; } + /** if buffer grows pointer no longer valid */ + char* cur() { return _p._data + _len; } + void appendChar(char j) { *((char*)grow(sizeof(char))) = j; } @@ -94,7 +101,7 @@ namespace mongo { inline char* grow(unsigned by) { unsigned oldlen = _len; _len += by; - if ( _len > _p._size ) { + if (MONGO_unlikely( _len > _p._size )) { growReallocate(oldlen); } return _p._data + oldlen; diff --git a/util/array.h b/util/array.h index bf705a4d988..12822252fd7 100644 --- a/util/array.h +++ b/util/array.h @@ -18,6 +18,12 @@ namespace mongo { + /* + * simple array class that does no allocations + * same api as vector + * fixed buffer, so once capacity is exceeded, will assert + * meant to be-reused with clear() + */ template<typename T> class FastArray { public: @@ -44,6 +50,7 @@ namespace mongo { } void push_back( const T& t ) { + assert( _size < _capacity ); _data[_size++] = t; } diff --git a/util/assert_util.cpp b/util/assert_util.cpp index 52947bc02b8..da039c09a58 100644 --- a/util/assert_util.cpp +++ b/util/assert_util.cpp @@ -66,11 +66,23 @@ namespace mongo { /* "warning" assert -- safe to continue, so we don't throw exception. */ NOINLINE_DECL void wasserted(const char *msg, const char *file, unsigned line) { - problem() << "warning Assertion failure " << msg << ' ' << file << ' ' << dec << line << endl; + static bool rateLimited; + static time_t lastWhen; + static unsigned lastLine; + if( lastLine == line && time(0)-lastWhen < 5 ) { + if( rateLimited++ == 0 ) { + log() << "rate limiting wassert" << endl; + } + return; + } + lastWhen = time(0); + lastLine = line; + + problem() << "warning assertion failure " << msg << ' ' << file << ' ' << dec << line << endl; sayDbContext(); raiseError(0,msg && *msg ? msg : "wassertion failure"); assertionCount.condrollover( ++assertionCount.warning ); -#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) +#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) || defined(_DURABLEDEFAULTOFF) // this is so we notice in buildbot log() << "\n\n***aborting after wassert() failure in a debug/test build\n\n" << endl; abort(); @@ -86,7 +98,7 @@ namespace mongo { temp << "assertion " << file << ":" << line; AssertionException e(temp.str(),0); breakpoint(); -#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) +#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) || defined(_DURABLEDEFAULTOFF) // this is so we notice in buildbot log() << "\n\n***aborting after assert() failure in a debug/test build\n\n" << endl; abort(); @@ -103,7 +115,7 @@ namespace mongo { temp << msgid; AssertionException e(temp.str(),0); breakpoint(); -#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) +#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) || defined(_DURABLEDEFAULTOFF) // this is so we notice in buildbot log() << "\n\n***aborting after verify() failure in a debug/test build\n\n" << endl; abort(); diff --git a/util/assert_util.h b/util/assert_util.h index 244fb2287e1..b4c68b7de34 100644 --- a/util/assert_util.h +++ b/util/assert_util.h @@ -175,15 +175,15 @@ namespace mongo { #undef assert #endif -#define MONGO_assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) +#define MONGO_assert(_Expression) (void)( MONGO_likely(!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) ) #define assert MONGO_assert /* "user assert". if asserts, user did something wrong, not our code */ -#define MONGO_uassert(msgid, msg, expr) (void)( (!!(expr)) || (mongo::uasserted(msgid, msg), 0) ) +#define MONGO_uassert(msgid, msg, expr) (void)( MONGO_likely(!!(expr)) || (mongo::uasserted(msgid, msg), 0) ) #define uassert MONGO_uassert /* warning only - keeps going */ -#define MONGO_wassert(_Expression) (void)( (!!(_Expression)) || (mongo::wasserted(#_Expression, __FILE__, __LINE__), 0) ) +#define MONGO_wassert(_Expression) (void)( MONGO_likely(!!(_Expression)) || (mongo::wasserted(#_Expression, __FILE__, __LINE__), 0) ) #define wassert MONGO_wassert /* display a message, no context, and throw assertionexception @@ -191,7 +191,7 @@ namespace mongo { easy way to throw an exception and log something without our stack trace display happening. */ -#define MONGO_massert(msgid, msg, expr) (void)( (!!(expr)) || (mongo::msgasserted(msgid, msg), 0) ) +#define MONGO_massert(msgid, msg, expr) (void)( MONGO_likely(!!(expr)) || (mongo::msgasserted(msgid, msg), 0) ) #define massert MONGO_massert /* dassert is 'debug assert' -- might want to turn off for production as these diff --git a/util/bufreader.h b/util/bufreader.h index a0dcefa8d83..53f0ba744e2 100644 --- a/util/bufreader.h +++ b/util/bufreader.h @@ -28,6 +28,7 @@ namespace mongo { public: class eof : public std::exception { public: + eof() { } virtual const char * what() { return "BufReader eof"; } }; @@ -88,6 +89,7 @@ namespace mongo { } const void* pos() { return _pos; } + const void* start() { return _start; } private: const void *_start; diff --git a/util/compress.cpp b/util/compress.cpp new file mode 100644 index 00000000000..bcde488b88b --- /dev/null +++ b/util/compress.cpp @@ -0,0 +1,31 @@ +// @file compress.cpp + +#include "../third_party/snappy/snappy.h" +#include "compress.h" +#include <string> +#include <string.h> +#include <assert.h> + +namespace mongo { + + void rawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length) + { + snappy::RawCompress(input, input_length, compressed, compressed_length); + } + + size_t maxCompressedLength(size_t source_len) { + return snappy::MaxCompressedLength(source_len); + } + + size_t compress(const char* input, size_t input_length, std::string* output) { + return snappy::Compress(input, input_length, output); + } + + bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) { + return snappy::Uncompress(compressed, compressed_length, uncompressed); + } + +} diff --git a/util/compress.h b/util/compress.h new file mode 100644 index 00000000000..5bc5a3392bb --- /dev/null +++ b/util/compress.h @@ -0,0 +1,21 @@ +// @file compress.h + +#pragma once + +#include <string> + +namespace mongo { + + size_t compress(const char* input, size_t input_length, std::string* output); + + bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed); + + size_t maxCompressedLength(size_t source_len); + void rawCompress(const char* input, + size_t input_length, + char* compressed, + size_t* compressed_length); + +} + + diff --git a/util/concurrency/mutex.h b/util/concurrency/mutex.h index 44c2ebee0ea..6ca76570cbf 100644 --- a/util/concurrency/mutex.h +++ b/util/concurrency/mutex.h @@ -24,6 +24,8 @@ namespace mongo { + void printStackTrace( ostream &o ); + class mutex; inline boost::xtime incxtimemillis( long long s ) { @@ -86,6 +88,16 @@ namespace mongo { class scoped_lock : boost::noncopyable { public: #if defined(_DEBUG) + struct PostStaticCheck { + PostStaticCheck() { + if ( StaticObserver::_destroyingStatics ) { + cout << "trying to lock a mongo::mutex during static shutdown" << endl; + printStackTrace( cout ); + } + } + }; + + PostStaticCheck _check; mongo::mutex * const _mut; #endif scoped_lock( mongo::mutex &m ) : diff --git a/util/concurrency/race.h b/util/concurrency/race.h index 924d6d2fc5a..6be13363a6f 100644 --- a/util/concurrency/race.h +++ b/util/concurrency/race.h @@ -7,6 +7,12 @@ namespace mongo { namespace race { +#ifdef _WIN32 + typedef unsigned threadId_t; +#else + typedef pthread_t threadId_t; +#endif + #if defined(_DEBUG) class Block { diff --git a/util/concurrency/rwlock.h b/util/concurrency/rwlock.h index d14774b4ece..c281e54ecf0 100644 --- a/util/concurrency/rwlock.h +++ b/util/concurrency/rwlock.h @@ -38,20 +38,22 @@ namespace mongo { DEV mutexDebugger.leaving(_name); RWLockBase::unlock(); } + + void lock_shared() { RWLockBase::lock_shared(); } + void unlock_shared() { RWLockBase::unlock_shared(); } + void lockAsUpgradable() { RWLockBase::lockAsUpgradable(); } void unlockFromUpgradable() { // upgradable -> unlocked RWLockBase::unlockFromUpgradable(); } void upgrade() { // upgradable -> exclusive lock RWLockBase::upgrade(); - DEV mutexDebugger.entering(_name); } - void lock_shared() { RWLockBase::lock_shared(); } - void unlock_shared() { RWLockBase::unlock_shared(); } + bool lock_shared_try( int millis ) { return RWLockBase::lock_shared_try(millis); } + bool lock_try( int millis = 0 ) { if( RWLockBase::lock_try(millis) ) { - DEV mutexDebugger.entering(_name); return true; } return false; diff --git a/util/concurrency/synchronization.cpp b/util/concurrency/synchronization.cpp index 0ddc417eff1..ce2547c25eb 100644 --- a/util/concurrency/synchronization.cpp +++ b/util/concurrency/synchronization.cpp @@ -43,6 +43,7 @@ namespace mongo { NotifyAll::NotifyAll() : _mutex("NotifyAll") { _lastDone = 0; _lastReturned = 0; + _nWaiting = 0; } NotifyAll::When NotifyAll::now() { @@ -52,6 +53,7 @@ namespace mongo { void NotifyAll::waitFor(When e) { scoped_lock lock( _mutex ); + ++_nWaiting; while( _lastDone < e ) { _condition.wait( lock.boost() ); } @@ -59,6 +61,7 @@ namespace mongo { void NotifyAll::awaitBeyondNow() { scoped_lock lock( _mutex ); + ++_nWaiting; When e = ++_lastReturned; while( _lastDone <= e ) { _condition.wait( lock.boost() ); @@ -68,6 +71,7 @@ namespace mongo { void NotifyAll::notifyAll(When e) { scoped_lock lock( _mutex ); _lastDone = e; + _nWaiting = 0; _condition.notify_all(); } diff --git a/util/concurrency/synchronization.h b/util/concurrency/synchronization.h index 2467292616f..a0e89f7246b 100644 --- a/util/concurrency/synchronization.h +++ b/util/concurrency/synchronization.h @@ -65,16 +65,21 @@ namespace mongo { */ void waitFor(When); + /** a bit faster than waitFor( now() ) */ void awaitBeyondNow(); /** may be called multiple times. notifies all waiters */ void notifyAll(When); + /** indicates how many threads are waiting for a notify. */ + unsigned nWaiting() const { return _nWaiting; } + private: mongo::mutex _mutex; boost::condition _condition; When _lastDone; When _lastReturned; + unsigned _nWaiting; }; } // namespace mongo diff --git a/util/file.h b/util/file.h index 826a905b90e..368e6927b43 100644 --- a/util/file.h +++ b/util/file.h @@ -47,6 +47,9 @@ namespace mongo { fileofs len() { return 0; } void fsync() { assert(false); } + // shrink file to size bytes. No-op if file already smaller. + void truncate(fileofs size); + /** @return -1 if error or unavailable */ static boost::intmax_t freeSpace(const string &path) { assert(false); return -1; } }; @@ -57,10 +60,11 @@ namespace mongo { class File : public FileInterface { HANDLE fd; bool _bad; + string _name; void err(BOOL b=false) { /* false = error happened */ if( !b && !_bad ) { _bad = true; - log() << "File I/O error " << GetLastError() << '\n'; + log() << "File " << _name << "I/O error " << GetLastError() << '\n'; } } public: @@ -73,6 +77,7 @@ namespace mongo { fd = INVALID_HANDLE_VALUE; } void open(const char *filename, bool readOnly=false , bool direct=false) { + _name = filename; fd = CreateFile( toNativeString(filename).c_str(), ( readOnly ? 0 : GENERIC_WRITE ) | GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ, @@ -123,6 +128,20 @@ namespace mongo { return li.QuadPart; } void fsync() { FlushFileBuffers(fd); } + + void truncate(fileofs size) { + if (len() <= size) + return; + + LARGE_INTEGER li; + li.QuadPart = size; + if (SetFilePointerEx(fd, li, NULL, FILE_BEGIN) == 0){ + err(false); + return; //couldn't seek + } + + err(SetEndOfFile(fd)); + } }; #else @@ -194,6 +213,13 @@ namespace mongo { assert( !statvfs( path.c_str() , &info ) ); return boost::intmax_t( info.f_bavail ) * info.f_frsize; } + + void truncate(fileofs size) { + if (len() <= size) + return; + + err(ftruncate(fd, size) == 0); + } }; diff --git a/util/file_allocator.cpp b/util/file_allocator.cpp index bf01d90865f..b0572f971bd 100644 --- a/util/file_allocator.cpp +++ b/util/file_allocator.cpp @@ -287,8 +287,8 @@ namespace mongo { if ( fd > 0 ) close( fd ); log() << "error failed to allocate new file: " << name - << " size: " << size << ' ' << errnoWithDescription() << endl; - log() << " will try again in 10 seconds" << endl; + << " size: " << size << ' ' << errnoWithDescription() << warnings; + log() << " will try again in 10 seconds" << endl; // not going to warning logs try { if ( tmp.size() ) BOOST_CHECK_EXCEPTION( boost::filesystem::remove( tmp ) ); diff --git a/util/goodies.h b/util/goodies.h index 51a80f6783c..65bfbaba982 100644 --- a/util/goodies.h +++ b/util/goodies.h @@ -109,6 +109,8 @@ namespace mongo { // PRINTFL; prints file:line #define MONGO_PRINTFL cout << __FILE__ ":" << __LINE__ << endl #define PRINTFL MONGO_PRINTFL +#define MONGO_FLOG log() << __FILE__ ":" << __LINE__ << endl +#define FLOG MONGO_FLOG #undef assert #define assert MONGO_assert diff --git a/util/log.h b/util/log.h index b49d960c41d..d5c7e55aae0 100644 --- a/util/log.h +++ b/util/log.h @@ -298,6 +298,9 @@ namespace mongo { } public: static Logstream& get() { + if ( StaticObserver::_destroyingStatics ) { + cout << "Logstream::get called in uninitialized state" << endl; + } Logstream *p = tsp.get(); if( p == 0 ) tsp.reset( p = new Logstream() ); @@ -342,7 +345,7 @@ namespace mongo { return Logstream::get().prolog(); } -#define MONGO_LOG(level) MONGO_IF ( logLevel >= (level) ) log( level ) +#define MONGO_LOG(level) if ( MONGO_unlikely(logLevel >= (level)) ) log( level ) #define LOG MONGO_LOG inline Nullstream& log( LogLevel l ) { @@ -517,4 +520,6 @@ namespace mongo { } }; + extern Tee* const warnings; // Things put here go in serverStatus + } // namespace mongo diff --git a/util/logfile.cpp b/util/logfile.cpp index 37e14b47678..609edb8fe2d 100644 --- a/util/logfile.cpp +++ b/util/logfile.cpp @@ -77,9 +77,18 @@ namespace mongo { CloseHandle(_fd); } + void LogFile::truncate() { + verify(15870, _fd != INVALID_HANDLE_VALUE); + + if (!SetEndOfFile(_fd)){ + msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription()); + } + } + void LogFile::synchronousAppend(const void *_buf, size_t _len) { const size_t BlockSize = 8 * 1024 * 1024; assert(_fd); + assert(_len % 4096 == 0); const char *buf = (const char *) _buf; size_t left = _len; while( left ) { @@ -88,7 +97,7 @@ namespace mongo { if( !WriteFile(_fd, buf, toWrite, &written, NULL) ) { DWORD e = GetLastError(); if( e == 87 ) - msgasserted(13519, "error 87 appending to file - misaligned direct write?"); + msgasserted(13519, "error 87 appending to file - invalid parameter"); else uasserted(13517, str::stream() << "error appending to file " << _name << ' ' << _len << ' ' << toWrite << ' ' << errnoWithDescription(e)); } @@ -150,8 +159,20 @@ namespace mongo { _fd = -1; } + void LogFile::truncate() { + verify(15872, _fd >= 0); + + BOOST_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here + const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek + if (ftruncate(_fd, pos) != 0){ + msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription()); + } + } + void LogFile::synchronousAppend(const void *b, size_t len) { - off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek +#ifdef POSIX_FADV_DONTNEED + const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek +#endif const char *buf = (char *) b; assert(_fd); diff --git a/util/logfile.h b/util/logfile.h index e4bbc467cb5..f6d1c94bf22 100644 --- a/util/logfile.h +++ b/util/logfile.h @@ -38,6 +38,8 @@ namespace mongo { const string _name; + void truncate(); // Removes extra data after current position + private: #if defined(_WIN32) typedef HANDLE fd_type; diff --git a/util/net/httpclient.cpp b/util/net/httpclient.cpp index de45023c2aa..16eaa0ae80a 100644 --- a/util/net/httpclient.cpp +++ b/util/net/httpclient.cpp @@ -38,8 +38,15 @@ namespace mongo { } int HttpClient::_go( const char * command , string url , const char * body , Result * result ) { - uassert( 10271 , "invalid url" , url.find( "http://" ) == 0 ); - url = url.substr( 7 ); + bool ssl = false; + if ( url.find( "https://" ) == 0 ) { + ssl = true; + url = url.substr( 8 ); + } + else { + uassert( 10271 , "invalid url" , url.find( "http://" ) == 0 ); + url = url.substr( 7 ); + } string host , path; if ( url.find( "/" ) == string::npos ) { @@ -56,7 +63,7 @@ namespace mongo { HD( "path [" << path << "]" ); string server = host; - int port = 80; + int port = ssl ? 443 : 80; string::size_type idx = host.find( ":" ); if ( idx != string::npos ) { @@ -92,6 +99,15 @@ namespace mongo { Socket sock; if ( ! sock.connect( addr ) ) return -1; + + if ( ssl ) { +#ifdef MONGO_SSL + _checkSSLManager(); + sock.secure( _sslManager.get() ); +#else + uasserted( 15862 , "no ssl support" ); +#endif + } { const char * out = req.c_str(); @@ -152,5 +168,10 @@ namespace mongo { _body = entire; } +#ifdef MONGO_SSL + void HttpClient::_checkSSLManager() { + _sslManager.reset( new SSLManager( true ) ); + } +#endif } diff --git a/util/net/httpclient.h b/util/net/httpclient.h index dadcc72f226..c3f8c824adc 100644 --- a/util/net/httpclient.h +++ b/util/net/httpclient.h @@ -18,10 +18,11 @@ #pragma once #include "../../pch.h" +#include "sock.h" namespace mongo { - class HttpClient { + class HttpClient : boost::noncopyable { public: typedef map<string,string> Headers; @@ -68,6 +69,11 @@ namespace mongo { private: int _go( const char * command , string url , const char * body , Result * result ); +#ifdef MONGO_SSL + void _checkSSLManager(); + + scoped_ptr<SSLManager> _sslManager; +#endif }; } diff --git a/util/net/listen.cpp b/util/net/listen.cpp index 53139ccc385..16ddde880b1 100644 --- a/util/net/listen.cpp +++ b/util/net/listen.cpp @@ -95,20 +95,42 @@ namespace mongo { return out; } + + Listener::Listener(const string& name, const string &ip, int port, bool logConnect ) + : _port(port), _name(name), _ip(ip), _logConnect(logConnect), _elapsedTime(0) { +#ifdef MONGO_SSL + _ssl = 0; + _sslPort = 0; + + if ( cmdLine.sslOnNormalPorts && cmdLine.sslServerManager ) { + secure( cmdLine.sslServerManager ); + } +#endif + } + + Listener::~Listener() { + if ( _timeTracker == this ) + _timeTracker = 0; + } - void Listener::initAndListen() { - checkTicketNumbers(); - vector<SockAddr> mine = ipToAddrs(_ip.c_str(), _port, (!cmdLine.noUnixSocket && useUnixSockets())); - vector<int> socks; - SOCKET maxfd = 0; // needed for select() +#ifdef MONGO_SSL + void Listener::secure( SSLManager* manager ) { + _ssl = manager; + } - for (vector<SockAddr>::iterator it=mine.begin(), end=mine.end(); it != end; ++it) { - SockAddr& me = *it; + void Listener::addSecurePort( SSLManager* manager , int additionalPort ) { + _ssl = manager; + _sslPort = additionalPort; + } + +#endif + + bool Listener::_setupSockets( const vector<SockAddr>& mine , vector<int>& socks ) { + for (vector<SockAddr>::const_iterator it=mine.begin(), end=mine.end(); it != end; ++it) { + const SockAddr& me = *it; SOCKET sock = ::socket(me.getType(), SOCK_STREAM, 0); - if ( sock == INVALID_SOCKET ) { - log() << "ERROR: listen(): invalid socket? " << errnoWithDescription() << endl; - } + massert( 15863 , str::stream() << "listen(): invalid socket? " << errnoWithDescription() , sock >= 0 ); if (me.getType() == AF_UNIX) { #if !defined(_WIN32) @@ -138,42 +160,90 @@ namespace mongo { if ( ::bind(sock, me.raw(), me.addressSize) != 0 ) { int x = errno; - log() << "listen(): bind() failed " << errnoWithDescription(x) << " for socket: " << me.toString() << endl; + error() << "listen(): bind() failed " << errnoWithDescription(x) << " for socket: " << me.toString() << endl; if ( x == EADDRINUSE ) - log() << " addr already in use" << endl; + error() << " addr already in use" << endl; closesocket(sock); - return; + return false; } #if !defined(_WIN32) if (me.getType() == AF_UNIX) { if (chmod(me.getAddr().c_str(), 0777) == -1) { - log() << "couldn't chmod socket file " << me << errnoWithDescription() << endl; + error() << "couldn't chmod socket file " << me << errnoWithDescription() << endl; } - ListeningSockets::get()->addPath( me.getAddr() ); } #endif - + if ( ::listen(sock, 128) != 0 ) { - log() << "listen(): listen() failed " << errnoWithDescription() << endl; + error() << "listen(): listen() failed " << errnoWithDescription() << endl; closesocket(sock); - return; + return false; } ListeningSockets::get()->add( sock ); socks.push_back(sock); - if (sock > maxfd) - maxfd = sock; } + + return true; + } + + void Listener::initAndListen() { + checkTicketNumbers(); + vector<int> socks; + set<int> sslSocks; + + { // normal sockets + vector<SockAddr> mine = ipToAddrs(_ip.c_str(), _port, (!cmdLine.noUnixSocket && useUnixSockets())); + if ( ! _setupSockets( mine , socks ) ) + return; + } + +#ifdef MONGO_SSL + if ( _ssl && _sslPort > 0 ) { + unsigned prev = socks.size(); + + vector<SockAddr> mine = ipToAddrs(_ip.c_str(), _sslPort, false ); + if ( ! _setupSockets( mine , socks ) ) + return; + + for ( unsigned i=prev; i<socks.size(); i++ ) { + sslSocks.insert( socks[i] ); + } + + } +#endif + + SOCKET maxfd = 0; // needed for select() + for ( unsigned i=0; i<socks.size(); i++ ) { + if ( socks[i] > maxfd ) + maxfd = socks[i]; + } + +#ifdef MONGO_SSL + if ( _ssl == 0 ) { + _logListen( _port , false ); + } + else if ( _sslPort == 0 ) { + _logListen( _port , true ); + } + else { + // both + _logListen( _port , false ); + _logListen( _sslPort , true ); + } +#else + _logListen( _port , false ); +#endif static long connNumber = 0; struct timeval maxSelectTime; while ( ! inShutdown() ) { fd_set fds[1]; FD_ZERO(fds); - + for (vector<int>::iterator it=socks.begin(), end=socks.end(); it != end; ++it) { FD_SET(*it, fds); } @@ -233,13 +303,25 @@ namespace mongo { disableNagle(s); if ( _logConnect && ! cmdLine.quiet ) log() << "connection accepted from " << from.toString() << " #" << ++connNumber << endl; - accepted(s, from); + + Socket newSock = Socket(s, from); +#ifdef MONGO_SSL + if ( _ssl && ( _sslPort == 0 || sslSocks.count(*it) ) ) { + newSock.secureAccepted( _ssl ); + } +#endif + accepted( newSock ); } } } - void Listener::accepted(int sock, const SockAddr& from) { - accepted( new MessagingPort(sock, from) ); + void Listener::_logListen( int port , bool ssl ) { + log() << _name << ( _name.size() ? " " : "" ) << "waiting for connections on port " << port << ( ssl ? " ssl" : "" ) << endl; + } + + + void Listener::accepted(Socket socket) { + accepted( new MessagingPort(socket) ); } void Listener::accepted(MessagingPort *mp) { diff --git a/util/net/listen.h b/util/net/listen.h index e8b4189c0f5..415db1e3fb6 100644 --- a/util/net/listen.h +++ b/util/net/listen.h @@ -25,15 +25,25 @@ namespace mongo { class Listener : boost::noncopyable { public: - Listener(const string &ip, int p, bool logConnect=true ) : _port(p), _ip(ip), _logConnect(logConnect), _elapsedTime(0) { } - virtual ~Listener() { - if ( _timeTracker == this ) - _timeTracker = 0; - } + + Listener(const string& name, const string &ip, int port, bool logConnect=true ); + + virtual ~Listener(); + +#ifdef MONGO_SSL + /** + * make this an ssl socket + * ownership of SSLManager remains with the caller + */ + void secure( SSLManager* manager ); + + void addSecurePort( SSLManager* manager , int additionalPort ); +#endif + void initAndListen(); // never returns unless error (start a thread) /* spawn a thread, etc., then return */ - virtual void accepted(int sock, const SockAddr& from); + virtual void accepted(Socket socket); virtual void accepted(MessagingPort *mp); const int _port; @@ -60,12 +70,25 @@ namespace mongo { } private: + string _name; string _ip; bool _logConnect; long long _elapsedTime; + +#ifdef MONGO_SSL + SSLManager* _ssl; + int _sslPort; +#endif + + /** + * @return true iff everything went ok + */ + bool _setupSockets( const vector<SockAddr>& mine , vector<int>& socks ); + + void _logListen( int port , bool ssl ); static const Listener* _timeTracker; - + virtual bool useUnixSockets() const { return false; } }; diff --git a/util/net/message_port.cpp b/util/net/message_port.cpp index 8c50c8d52a3..9abfaf7c975 100644 --- a/util/net/message_port.cpp +++ b/util/net/message_port.cpp @@ -137,6 +137,10 @@ namespace mongo { piggyBackData = 0; } + MessagingPort::MessagingPort( Socket& sock ) + : Socket( sock ) , piggyBackData( 0 ) { + } + void MessagingPort::shutdown() { close(); } diff --git a/util/net/message_port.h b/util/net/message_port.h index 6bbcc46f71c..22ecafecfbc 100644 --- a/util/net/message_port.h +++ b/util/net/message_port.h @@ -56,6 +56,8 @@ namespace mongo { // no data sent, then we detect that the other side is down MessagingPort(double so_timeout = 0, int logLevel = 0 ); + MessagingPort(Socket& socket); + virtual ~MessagingPort(); void shutdown(); diff --git a/util/net/message_server_port.cpp b/util/net/message_server_port.cpp index e9712d490ba..ca0b13dae07 100644 --- a/util/net/message_server_port.cpp +++ b/util/net/message_server_port.cpp @@ -28,6 +28,10 @@ #include "../../db/lasterror.h" #include "../../db/stats/counters.h" +#ifdef __linux__ // TODO: consider making this ifndef _WIN32 +# include <sys/resource.h> +#endif + namespace mongo { namespace pms { @@ -43,6 +47,8 @@ namespace mongo { inPort->setLogLevel(1); scoped_ptr<MessagingPort> p( inPort ); + p->postFork(); + string otherSide; Message m; @@ -98,7 +104,7 @@ namespace mongo { class PortMessageServer : public MessageServer , public Listener { public: PortMessageServer( const MessageServer::Options& opts, MessageHandler * handler ) : - Listener( opts.ipList, opts.port ) { + Listener( "" , opts.ipList, opts.port ) { uassert( 10275 , "multiple PortMessageServer not supported" , ! pms::handler ); pms::handler = handler; @@ -125,10 +131,18 @@ namespace mongo { pthread_attr_init(&attrs); pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED); - static const size_t STACK_SIZE = 1024*1024; - pthread_attr_setstacksize(&attrs, (DEBUG_BUILD - ? (STACK_SIZE / 2) - : STACK_SIZE)); + static const size_t STACK_SIZE = 1024*1024; // if we change this we need to update the warning + + struct rlimit limits; + verify(15887, getrlimit(RLIMIT_STACK, &limits) == 0); + if (limits.rlim_cur > STACK_SIZE) { + pthread_attr_setstacksize(&attrs, (DEBUG_BUILD + ? (STACK_SIZE / 2) + : STACK_SIZE)); + } else if (limits.rlim_cur < 1024*1024) { + warning() << "Stack size set to " << (limits.rlim_cur/1024) << "KB. We suggest 1MB" << endl; + } + pthread_t thread; int failed = pthread_create(&thread, &attrs, (void*(*)(void*)) &pms::threadRun, p); diff --git a/util/net/miniwebserver.cpp b/util/net/miniwebserver.cpp index 269a60bc85c..01a3418a909 100644 --- a/util/net/miniwebserver.cpp +++ b/util/net/miniwebserver.cpp @@ -23,8 +23,8 @@ namespace mongo { - MiniWebServer::MiniWebServer(const string &ip, int port) - : Listener(ip, port, false) + MiniWebServer::MiniWebServer(const string& name, const string &ip, int port) + : Listener(name, ip, port, false) {} string MiniWebServer::parseURL( const char * buf ) { @@ -108,17 +108,18 @@ namespace mongo { return false; } - void MiniWebServer::accepted(int s, const SockAddr &from) { - setSockTimeouts(s, 8); + void MiniWebServer::accepted(Socket sock) { + sock.postFork(); + sock.setTimeout(8); char buf[4096]; int len = 0; while ( 1 ) { int left = sizeof(buf) - 1 - len; if( left == 0 ) break; - int x = ::recv(s, buf + len, left, 0); + int x = sock.unsafe_recv( buf + len , left ); if ( x <= 0 ) { - closesocket(s); + sock.close(); return; } len += x; @@ -134,7 +135,7 @@ namespace mongo { vector<string> headers; try { - doRequest(buf, parseURL( buf ), responseMsg, responseCode, headers, from); + doRequest(buf, parseURL( buf ), responseMsg, responseCode, headers, sock.remoteAddr() ); } catch ( std::exception& e ) { responseCode = 500; @@ -165,8 +166,8 @@ namespace mongo { ss << responseMsg; string response = ss.str(); - ::send(s, response.c_str(), response.size(), 0); - closesocket(s); + sock.send( response.c_str(), response.size() , "http response" ); + sock.close(); } string MiniWebServer::getHeader( const char * req , string wanted ) { diff --git a/util/net/miniwebserver.h b/util/net/miniwebserver.h index 01c810b551e..1fb6b3f2e65 100644 --- a/util/net/miniwebserver.h +++ b/util/net/miniwebserver.h @@ -27,7 +27,7 @@ namespace mongo { class MiniWebServer : public Listener { public: - MiniWebServer(const string &ip, int _port); + MiniWebServer(const string& name, const string &ip, int _port); virtual ~MiniWebServer() {} virtual void doRequest( @@ -53,7 +53,7 @@ namespace mongo { static string urlDecode(string s) {return urlDecode(s.c_str());} private: - void accepted(int s, const SockAddr &from); + void accepted(Socket socket); static bool fullReceive( const char *buf ); }; diff --git a/util/net/sock.cpp b/util/net/sock.cpp index f9e4a85d832..69c42f2729d 100644 --- a/util/net/sock.cpp +++ b/util/net/sock.cpp @@ -34,21 +34,37 @@ # endif #endif +#ifdef MONGO_SSL +#include <openssl/err.h> +#include <openssl/ssl.h> +#endif + + namespace mongo { static bool ipv6 = false; void enableIPv6(bool state) { ipv6 = state; } bool IPv6Enabled() { return ipv6; } - // --- some global helpers ----- + void setSockTimeouts(int sock, double secs) { + struct timeval tv; + tv.tv_sec = (int)secs; + tv.tv_usec = (int)((long long)(secs*1000*1000) % (1000*1000)); + bool report = logLevel > 3; // solaris doesn't provide these + DEV report = true; + bool ok = setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0; + if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; + ok = setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0; + DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; + } #if defined(_WIN32) void disableNagle(int sock) { int x = 1; if ( setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &x, sizeof(x)) ) - out() << "ERROR: disableNagle failed" << endl; + error() << "disableNagle failed" << endl; if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) ) - out() << "ERROR: SO_KEEPALIVE failed" << endl; + error() << "SO_KEEPALIVE failed" << endl; } #else @@ -62,11 +78,35 @@ namespace mongo { #endif if ( setsockopt(sock, level, TCP_NODELAY, (char *) &x, sizeof(x)) ) - log() << "ERROR: disableNagle failed: " << errnoWithDescription() << endl; + error() << "disableNagle failed: " << errnoWithDescription() << endl; #ifdef SO_KEEPALIVE if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) ) - log() << "ERROR: SO_KEEPALIVE failed: " << errnoWithDescription() << endl; + error() << "SO_KEEPALIVE failed: " << errnoWithDescription() << endl; + +# ifdef __linux__ + socklen_t len = sizeof(x); + if ( getsockopt(sock, level, TCP_KEEPIDLE, (char *) &x, &len) ) + error() << "can't get TCP_KEEPIDLE: " << errnoWithDescription() << endl; + + if (x > 300) { + x = 300; + if ( setsockopt(sock, level, TCP_KEEPIDLE, (char *) &x, sizeof(x)) ) { + error() << "can't set TCP_KEEPIDLE: " << errnoWithDescription() << endl; + } + } + + len = sizeof(x); // just in case it changed + if ( getsockopt(sock, level, TCP_KEEPINTVL, (char *) &x, &len) ) + error() << "can't get TCP_KEEPINTVL: " << errnoWithDescription() << endl; + + if (x > 300) { + x = 300; + if ( setsockopt(sock, level, TCP_KEEPINTVL, (char *) &x, sizeof(x)) ) { + error() << "can't set TCP_KEEPINTVL: " << errnoWithDescription() << endl; + } + } +# endif #endif } @@ -299,29 +339,119 @@ namespace mongo { } + // ------------ SSLManager ----------------- + +#ifdef MONGO_SSL + SSLManager::SSLManager( bool client ) { + _client = client; + SSL_library_init(); + SSL_load_error_strings(); + ERR_load_crypto_strings(); + + _context = SSL_CTX_new( client ? SSLv23_client_method() : SSLv23_server_method() ); + massert( 15864 , mongoutils::str::stream() << "can't create SSL Context: " << ERR_error_string(ERR_get_error(), NULL) , _context ); + + SSL_CTX_set_options( _context, SSL_OP_ALL); + } + + void SSLManager::setupPubPriv( const string& privateKeyFile , const string& publicKeyFile ) { + massert( 15865 , + mongoutils::str::stream() << "Can't read SSL certificate from file " + << publicKeyFile << ":" << ERR_error_string(ERR_get_error(), NULL) , + SSL_CTX_use_certificate_file(_context, publicKeyFile.c_str(), SSL_FILETYPE_PEM) ); + + + massert( 15866 , + mongoutils::str::stream() << "Can't read SSL private key from file " + << privateKeyFile << " : " << ERR_error_string(ERR_get_error(), NULL) , + SSL_CTX_use_PrivateKey_file(_context, privateKeyFile.c_str(), SSL_FILETYPE_PEM) ); + } + + + int SSLManager::password_cb(char *buf,int num, int rwflag,void *userdata){ + SSLManager* sm = (SSLManager*)userdata; + string pass = sm->_password; + strcpy(buf,pass.c_str()); + return(pass.size()); + } + + void SSLManager::setupPEM( const string& keyFile , const string& password ) { + _password = password; + + massert( 15867 , "Can't read certificate file" , SSL_CTX_use_certificate_chain_file( _context , keyFile.c_str() ) ); + + SSL_CTX_set_default_passwd_cb_userdata( _context , this ); + SSL_CTX_set_default_passwd_cb( _context, &SSLManager::password_cb ); + + massert( 15868 , "Can't read key file" , SSL_CTX_use_PrivateKey_file( _context , keyFile.c_str() , SSL_FILETYPE_PEM ) ); + } + + SSL * SSLManager::secure( int fd ) { + SSL * ssl = SSL_new( _context ); + massert( 15861 , "can't create SSL" , ssl ); + SSL_set_fd( ssl , fd ); + return ssl; + } + + +#endif + // ------------ Socket ----------------- Socket::Socket(int fd , const SockAddr& remote) : _fd(fd), _remote(remote), _timeout(0) { _logLevel = 0; - _bytesOut = 0; - _bytesIn = 0; + _init(); } Socket::Socket( double timeout, int ll ) { _logLevel = ll; _fd = -1; _timeout = timeout; + _init(); + } + + void Socket::_init() { _bytesOut = 0; _bytesIn = 0; +#ifdef MONGO_SSL + _sslAccepted = 0; +#endif } void Socket::close() { +#ifdef MONGO_SSL + _ssl.reset(); +#endif if ( _fd >= 0 ) { closesocket( _fd ); _fd = -1; } } + +#ifdef MONGO_SSL + void Socket::secure( SSLManager * ssl ) { + assert( ssl ); + assert( _fd >= 0 ); + _ssl.reset( ssl->secure( _fd ) ); + SSL_connect( _ssl.get() ); + } + + void Socket::secureAccepted( SSLManager * ssl ) { + _sslAccepted = ssl; + } +#endif + + void Socket::postFork() { +#ifdef MONGO_SSL + if ( _sslAccepted ) { + assert( _fd ); + _ssl.reset( _sslAccepted->secure( _fd ) ); + SSL_accept( _ssl.get() ); + _sslAccepted = 0; + } +#endif + } class ConnectBG : public BackgroundJob { public: @@ -347,7 +477,7 @@ namespace mongo { } if ( _timeout > 0 ) { - setSockTimeouts( _fd, _timeout ); + setTimeout( _timeout ); } ConnectBG bg(_fd, remote); @@ -377,12 +507,29 @@ namespace mongo { return true; } + int Socket::_send( const char * data , int len ) { +#ifdef MONGO_SSL + if ( _ssl ) { + return SSL_write( _ssl.get() , data , len ); + } +#endif + return ::send( _fd , data , len , portSendFlags ); + } // sends all data or throws an exception void Socket::send( const char * data , int len, const char *context ) { while( len > 0 ) { - int ret = ::send( _fd , data , len , portSendFlags ); + int ret = _send( data , len ); if ( ret == -1 ) { + +#ifdef MONGO_SSL + if ( _ssl ) { + log() << "SSL Error ret: " << ret << " err: " << SSL_get_error( _ssl.get() , ret ) + << " " << ERR_error_string(ERR_get_error(), NULL) + << endl; + } +#endif + #if defined(_WIN32) if ( WSAGetLastError() == WSAETIMEDOUT && _timeout != 0 ) { #else @@ -408,15 +555,27 @@ namespace mongo { } } - // sends all data or throws an exception - void Socket::send( const vector< pair< char *, int > > &data, const char *context ) { -#if defined(_WIN32) - // TODO use scatter/gather api + void Socket::_send( const vector< pair< char *, int > > &data, const char *context ) { for( vector< pair< char *, int > >::const_iterator i = data.begin(); i != data.end(); ++i ) { char * data = i->first; int len = i->second; send( data, len, context ); } + } + + // sends all data or throws an exception + void Socket::send( const vector< pair< char *, int > > &data, const char *context ) { + +#ifdef MONGO_SSL + if ( _ssl ) { + _send( data , context ); + return; + } +#endif + +#if defined(_WIN32) + // TODO use scatter/gather api + _send( data , context ); #else vector< struct iovec > d( data.size() ); int i = 0; @@ -479,23 +638,26 @@ namespace mongo { log(3) << "Socket recv() conn closed? " << remoteString() << endl; throw SocketException( SocketException::CLOSED , remoteString() ); } - else { /* ret < 0 */ + else { /* ret < 0 */ +#if defined(_WIN32) + int e = WSAGetLastError(); +#else int e = errno; - -#if defined(EINTR) && !defined(_WIN32) +# if defined(EINTR) if( e == EINTR ) { if( ++retries == 1 ) { log() << "EINTR retry" << endl; continue; } } +# endif #endif if ( ( e == EAGAIN #if defined(_WIN32) - || e == WSAETIMEDOUT #endif - ) && _timeout > 0 ) { + ) && _timeout > 0 ) + { // this is a timeout log(_logLevel) << "Socket recv() timeout " << remoteString() <<endl; throw SocketException( SocketException::RECV_TIMEOUT, remoteString() ); @@ -508,11 +670,33 @@ namespace mongo { } int Socket::unsafe_recv( char *buf, int max ) { - int x = ::recv( _fd , buf , max , portRecvFlags ); + int x = _recv( buf , max ); _bytesIn += x; return x; } + + int Socket::_recv( char *buf, int max ) { +#ifdef MONGO_SSL + if ( _ssl ){ + return SSL_read( _ssl.get() , buf , max ); + } +#endif + return ::recv( _fd , buf , max , portRecvFlags ); + } + + void Socket::setTimeout( double secs ) { + struct timeval tv; + tv.tv_sec = (int)secs; + tv.tv_usec = (int)((long long)(secs*1000*1000) % (1000*1000)); + bool report = logLevel > 3; // solaris doesn't provide these + DEV report = true; + bool ok = setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0; + if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; + ok = setsockopt(_fd, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0; + DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; + } + #if defined(_WIN32) struct WinsockInit { WinsockInit() { diff --git a/util/net/sock.h b/util/net/sock.h index 9c6f0251ad6..1cd51333525 100644 --- a/util/net/sock.h +++ b/util/net/sock.h @@ -39,6 +39,10 @@ #endif // _WIN32 +#ifdef MONGO_SSL +#include <openssl/ssl.h> +#endif + namespace mongo { const int SOCK_FAMILY_UNKNOWN_ERROR=13078; @@ -68,24 +72,13 @@ namespace mongo { return mongoutils::str::stream() << cmdLine.socket << "/mongodb-" << port << ".sock"; } - inline void setSockTimeouts(int sock, double secs) { - struct timeval tv; - tv.tv_sec = (int)secs; - tv.tv_usec = (int)((long long)(secs*1000*1000) % (1000*1000)); - bool report = logLevel > 3; // solaris doesn't provide these - DEV report = true; - bool ok = setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0; - if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; - ok = setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0; - DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl; - } - // If an ip address is passed in, just return that. If a hostname is passed // in, look up its ip and return that. Returns "" on failure. string hostbyname(const char *hostname); void enableIPv6(bool state=true); bool IPv6Enabled(); + void setSockTimeouts(int sock, double secs); /** * wrapped around os representation of network address @@ -157,6 +150,29 @@ namespace mongo { string _extra; }; +#ifdef MONGO_SSL + class SSLManager : boost::noncopyable { + public: + SSLManager( bool client ); + + void setupPEM( const string& keyFile , const string& password ); + void setupPubPriv( const string& privateKeyFile , const string& publicKeyFile ); + + /** + * creates an SSL context to be used for this file descriptor + * caller should delete + */ + SSL * secure( int fd ); + + static int password_cb( char *buf,int num, int rwflag,void *userdata ); + + private: + bool _client; + SSL_CTX* _context; + string _password; + }; +#endif + /** * thin wrapped around file descriptor and system calls * todo: ssl @@ -165,9 +181,12 @@ namespace mongo { public: Socket(int sock, const SockAddr& farEnd); - // in some cases the timeout will actually be 2x this value - eg we do a partial send, - // then the timeout fires, then we try to send again, then the timeout fires again with - // no data sent, then we detect that the other side is down + /** In some cases the timeout will actually be 2x this value - eg we do a partial send, + then the timeout fires, then we try to send again, then the timeout fires again with + no data sent, then we detect that the other side is down. + + Generally you don't want a timeout, you should be very prepared for errors if you set one. + */ Socket(double so_timeout = 0, int logLevel = 0 ); bool connect(SockAddr& farEnd); @@ -190,8 +209,32 @@ namespace mongo { void clearCounters() { _bytesIn = 0; _bytesOut = 0; } long long getBytesIn() const { return _bytesIn; } long long getBytesOut() const { return _bytesOut; } + + void setTimeout( double secs ); + +#ifdef MONGO_SSL + /** secures inline */ + void secure( SSLManager * ssl ); + void secureAccepted( SSLManager * ssl ); +#endif + + /** + * call this after a fork for server sockets + */ + void postFork(); + private: + void _init(); + /** raw send, same semantics as ::send */ + int _send( const char * data , int len ); + + /** sends dumbly, just each buffer at a time */ + void _send( const vector< pair< char *, int > > &data, const char *context ); + + /** raw recv, same semantics as ::recv */ + int _recv( char * buf , int max ); + int _fd; SockAddr _remote; double _timeout; @@ -199,6 +242,11 @@ namespace mongo { long long _bytesIn; long long _bytesOut; +#ifdef MONGO_SSL + shared_ptr<SSL> _ssl; + SSLManager * _sslAccepted; +#endif + protected: int _logLevel; // passed to log() when logging errors diff --git a/util/paths.h b/util/paths.h index 4ae591fb49b..2297a9a2f90 100644 --- a/util/paths.h +++ b/util/paths.h @@ -23,9 +23,9 @@ #include <sys/stat.h> #include <fcntl.h> -using namespace mongoutils; - namespace mongo { + + using namespace mongoutils; extern string dbpath; diff --git a/util/processinfo_darwin.cpp b/util/processinfo_darwin.cpp index c1190aec438..9f73cbffd4f 100644 --- a/util/processinfo_darwin.cpp +++ b/util/processinfo_darwin.cpp @@ -19,15 +19,14 @@ #include "processinfo.h" #include "log.h" - +#include <mach/vm_statistics.h> #include <mach/task_info.h> - #include <mach/mach_init.h> #include <mach/mach_host.h> #include <mach/mach_traps.h> #include <mach/task.h> #include <mach/vm_map.h> -#include <mach/shared_memory_server.h> +#include <mach/shared_region.h> #include <iostream> #include <sys/types.h> diff --git a/util/ramlog.cpp b/util/ramlog.cpp index f8cfa0a7052..69ffc175ee9 100644 --- a/util/ramlog.cpp +++ b/util/ramlog.cpp @@ -25,7 +25,7 @@ namespace mongo { using namespace mongoutils; - RamLog::RamLog( string name ) : _name(name) { + RamLog::RamLog( string name ) : _name(name), _lastWrite(0) { h = 0; n = 0; for( int i = 0; i < N; i++ ) lines[i][C-1] = 0; @@ -48,6 +48,8 @@ namespace mongo { } void RamLog::write(LogLevel ll, const string& str) { + _lastWrite = time(0); + char *p = lines[(h+n)%N]; unsigned sz = str.size(); @@ -183,4 +185,6 @@ namespace mongo { mongo::mutex* RamLog::_namedLock; RamLog::RM* RamLog::_named = 0; + + Tee* const warnings = new RamLog("warnings"); // Things put here go in serverStatus } diff --git a/util/ramlog.h b/util/ramlog.h index 8539a436388..d3d5c8fbb4e 100644 --- a/util/ramlog.h +++ b/util/ramlog.h @@ -34,6 +34,8 @@ namespace mongo { static RamLog* get( string name ); static void getNames( vector<string>& names ); + time_t lastWrite() { return _lastWrite; } // 0 if no writes + protected: static int repeats(const vector<const char *>& v, int i); static string clean(const vector<const char *>& v, int i, string line=""); @@ -57,6 +59,7 @@ namespace mongo { typedef map<string,RamLog*> RM; static mongo::mutex* _namedLock; static RM* _named; + time_t _lastWrite; }; } diff --git a/util/stringutils.h b/util/stringutils.h index bab9f608f7e..93598aa520b 100644 --- a/util/stringutils.h +++ b/util/stringutils.h @@ -40,7 +40,11 @@ namespace mongo { return string(copy); } - // for convenience, '{' is greater than anything and stops number parsing + /** + * Non numeric characters are compared lexicographically; numeric substrings + * are compared numerically; dots separate ordered comparable subunits. + * For convenience, character 255 is greater than anything else. + */ inline int lexNumCmp( const char *s1, const char *s2 ) { //cout << "START : " << s1 << "\t" << s2 << endl; @@ -48,6 +52,18 @@ namespace mongo { while( *s1 && *s2 ) { + bool d1 = ( *s1 == '.' ); + bool d2 = ( *s2 == '.' ); + if ( d1 && !d2 ) + return -1; + if ( d2 && !d1 ) + return 1; + if ( d1 && d2 ) { + ++s1; ++s2; + startWord = true; + continue; + } + bool p1 = ( *s1 == (char)255 ); bool p2 = ( *s2 == (char)255 ); //cout << "\t\t " << p1 << "\t" << p2 << endl; @@ -64,7 +80,6 @@ namespace mongo { if ( startWord ) { while ( *s1 == '0' ) s1++; while ( *s2 == '0' ) s2++; - startWord = false; } char * e1 = (char*)s1; @@ -94,6 +109,7 @@ namespace mongo { // otherwise, the numbers are equal s1 = e1; s2 = e2; + startWord = false; continue; } @@ -109,11 +125,8 @@ namespace mongo { if ( *s2 > *s1 ) return -1; - if ( *s1 == '.' ) - startWord = true; - else - startWord = false; s1++; s2++; + startWord = false; } if ( *s1 ) diff --git a/util/time_support.h b/util/time_support.h index ce2cdbc0e15..ca17807ec96 100644 --- a/util/time_support.h +++ b/util/time_support.h @@ -52,6 +52,16 @@ namespace mongo { return buf; } + inline string timeToISOString(time_t time) { + struct tm t; + time_t_to_Struct( time, &t ); + + const char* fmt = "%Y-%m-%dT%H:%M:%SZ"; + char buf[32]; + assert(strftime(buf, sizeof(buf), fmt, &t) == 20); + return buf; + } + inline boost::gregorian::date currentDate() { boost::posix_time::ptime now = boost::posix_time::second_clock::local_time(); return now.date(); diff --git a/util/timer.h b/util/timer.h index 9db907185dd..cbfe859ef5c 100644 --- a/util/timer.h +++ b/util/timer.h @@ -30,6 +30,8 @@ namespace mongo { Timer( unsigned long long startMicros ) { old = startMicros; } int seconds() const { return (int)(micros() / 1000000); } int millis() const { return (int)(micros() / 1000); } + int minutes() const { return seconds() / 60; } + /** gets time interval and resets at the same time. this way we can call curTimeMicros once instead of twice if one wanted millis() and then reset(). diff --git a/util/version.cpp b/util/version.cpp index f9c1471c88e..809f4cde3eb 100644 --- a/util/version.cpp +++ b/util/version.cpp @@ -26,6 +26,8 @@ #include "stringutils.h" #include "../db/jsobj.h" #include "file.h" +#include "ramlog.h" +#include "../db/cmdline.h" namespace mongo { @@ -36,7 +38,7 @@ namespace mongo { * 1.2.3-rc4-pre- * If you really need to do something else you'll need to fix _versionArray() */ - const char versionString[] = "1.9.1-pre-"; + const char versionString[] = "2.0.0-rc0-pre-"; // See unit test for example outputs static BSONArray _versionArray(const char* version){ @@ -114,35 +116,39 @@ namespace mongo { log() << "build info: " << sysInfo() << endl; } + + static Tee * startupWarningsLog = new RamLog("startupWarnings"); //intentionally leaked + // - // 32 bit systems warning + // system warnings // void show_warnings() { - // each message adds a leading but not a trailing newline + // each message adds a leading and a trailing newline bool warned = false; { const char * foo = strchr( versionString , '.' ) + 1; int bar = atoi( foo ); if ( ( 2 * ( bar / 2 ) ) != bar ) { - cout << "\n** NOTE: This is a development version (" << versionString << ") of MongoDB."; - cout << "\n** Not recommended for production." << endl; + log() << startupWarningsLog; + log() << "** NOTE: This is a development version (" << versionString << ") of MongoDB." << startupWarningsLog; + log() << "** Not recommended for production." << startupWarningsLog; warned = true; } } if ( sizeof(int*) == 4 ) { - cout << endl; - cout << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << endl; - cout << "** see http://blog.mongodb.org/post/137788967/32-bit-limitations" << endl; - cout << "** with --journal, the limit is lower" << endl; + log() << startupWarningsLog; + log() << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << startupWarningsLog; + log() << "** see http://blog.mongodb.org/post/137788967/32-bit-limitations" << startupWarningsLog; + log() << "** with --journal, the limit is lower" << startupWarningsLog; warned = true; } #ifdef __linux__ if (boost::filesystem::exists("/proc/vz") && !boost::filesystem::exists("/proc/bc")) { - cout << endl; - cout << "** WARNING: You are running in OpenVZ. This is known to be broken!!!" << endl; + log() << startupWarningsLog; + log() << "** WARNING: You are running in OpenVZ. This is known to be broken!!!" << startupWarningsLog; warned = true; } @@ -172,22 +178,49 @@ namespace mongo { const char* space = strchr(line, ' '); if ( ! space ) { - cout << "** WARNING: cannot parse numa_maps" << endl; + log() << startupWarningsLog; + log() << "** WARNING: cannot parse numa_maps" << startupWarningsLog; warned = true; } else if ( ! startsWith(space+1, "interleave") ) { - cout << endl; - cout << "** WARNING: You are running on a NUMA machine." << endl; - cout << "** We suggest launching mongod like this to avoid performance problems:" << endl; - cout << "** numactl --interleave=all mongod [other options]" << endl; + log() << startupWarningsLog; + log() << "** WARNING: You are running on a NUMA machine." << startupWarningsLog; + log() << "** We suggest launching mongod like this to avoid performance problems:" << startupWarningsLog; + log() << "** numactl --interleave=all mongod [other options]" << startupWarningsLog; warned = true; } } } + + if (cmdLine.dur){ + fstream f ("/proc/sys/vm/overcommit_memory", ios_base::in); + unsigned val; + f >> val; + + if (val == 2) { + log() << startupWarningsLog; + log() << "** WARNING: /proc/sys/vm/overcommit_memory is " << val << startupWarningsLog; + log() << "** Journaling works best with it set to 0 or 1" << startupWarningsLog; + } + } + + if (boost::filesystem::exists("/proc/sys/vm/zone_reclaim_mode")){ + fstream f ("/proc/sys/vm/zone_reclaim_mode", ios_base::in); + unsigned val; + f >> val; + + if (val != 0) { + log() << startupWarningsLog; + log() << "** WARNING: /proc/sys/vm/zone_reclaim_mode is " << val << startupWarningsLog; + log() << "** We suggest setting it to 0" << startupWarningsLog; + log() << "** http://www.kernel.org/doc/Documentation/sysctl/vm.txt" << startupWarningsLog; + } + } #endif - if (warned) - cout << endl; + if (warned) { + log() << startupWarningsLog; + } } int versionCmp(StringData rhs, StringData lhs) { |