summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDwight <dwight@10gen.com>2011-08-17 16:55:13 -0400
committerDwight <dwight@10gen.com>2011-08-17 16:55:13 -0400
commit48977d2abc8ecffaa4c547d427603c7ff24895d3 (patch)
treed910534e1d623fdc72dbd063ebf4878b8efedd7c
parent9ce68d36823c22f641f705928f1c1b22f6206d76 (diff)
parente4a084bdab0e2a61e81476068ed494e346715d41 (diff)
downloadmongo-48977d2abc8ecffaa4c547d427603c7ff24895d3.tar.gz
merge
-rw-r--r--.gitignore1
-rw-r--r--SConstruct35
-rw-r--r--bson/bson-inl.h2
-rw-r--r--bson/bsonobj.h2
-rw-r--r--bson/bsonobjbuilder.h4
-rw-r--r--bson/bsonobjiterator.h2
-rw-r--r--bson/inline_decls.h32
-rw-r--r--bson/stringdata.h2
-rwxr-xr-xbuildscripts/errorcodes.py11
-rwxr-xr-xbuildscripts/smoke.py32
-rw-r--r--client/connpool.cpp67
-rw-r--r--client/dbclient.cpp22
-rw-r--r--client/dbclient.h12
-rw-r--r--client/dbclient_rs.cpp188
-rw-r--r--client/dbclient_rs.h61
-rw-r--r--client/dbclientcursor.cpp1
-rw-r--r--client/distlock.cpp4
-rw-r--r--client/distlock_test.cpp6
-rw-r--r--client/examples/httpClientTest.cpp38
-rw-r--r--client/examples/rs.cpp20
-rw-r--r--client/parallel.cpp21
-rw-r--r--client/parallel.h5
-rw-r--r--client/syncclusterconnection.h1
-rw-r--r--db/btree.cpp2
-rw-r--r--db/btree.h2
-rw-r--r--db/btreecursor.cpp27
-rw-r--r--db/client.cpp18
-rw-r--r--db/clientcursor.cpp21
-rw-r--r--db/clientcursor.h3
-rw-r--r--db/cloner.cpp12
-rw-r--r--db/cmdline.cpp61
-rw-r--r--db/cmdline.h35
-rw-r--r--db/commands.h12
-rw-r--r--db/commands/distinct.cpp2
-rw-r--r--db/commands/find_and_modify.cpp2
-rw-r--r--db/commands/group.cpp19
-rw-r--r--db/commands/isself.cpp2
-rw-r--r--db/commands/mr.cpp28
-rw-r--r--db/compact.cpp3
-rw-r--r--db/curop.h15
-rw-r--r--db/cursor.h2
-rw-r--r--db/database.cpp21
-rw-r--r--db/db.cpp26
-rwxr-xr-xdb/db.vcxproj21
-rwxr-xr-xdb/db.vcxproj.filters23
-rw-r--r--db/dbcommands.cpp118
-rw-r--r--db/dbcommands_admin.cpp8
-rw-r--r--db/dbcommands_generic.cpp44
-rw-r--r--db/dbeval.cpp2
-rw-r--r--db/dbmessage.h4
-rw-r--r--db/dbwebserver.cpp5
-rw-r--r--db/driverHelpers.cpp2
-rw-r--r--db/dur.cpp93
-rw-r--r--db/dur_journal.cpp103
-rw-r--r--db/dur_journal.h11
-rw-r--r--db/dur_journalformat.h25
-rw-r--r--db/dur_journalimpl.h12
-rw-r--r--db/dur_preplogbuffer.cpp61
-rw-r--r--db/dur_recover.cpp172
-rw-r--r--db/dur_recover.h9
-rw-r--r--db/dur_stats.h1
-rw-r--r--db/dur_writetodatafiles.cpp10
-rw-r--r--db/durop.h2
-rw-r--r--db/geo/2d.cpp1340
-rw-r--r--db/geo/core.h9
-rw-r--r--db/geo/haystack.cpp2
-rw-r--r--db/index.cpp5
-rw-r--r--db/index.h10
-rw-r--r--db/indexkey.cpp379
-rw-r--r--db/indexkey.h13
-rw-r--r--db/instance.cpp2
-rw-r--r--db/instance.h2
-rw-r--r--db/introspect.cpp22
-rw-r--r--db/jsobj.cpp22
-rw-r--r--db/key.cpp20
-rw-r--r--db/matcher.cpp13
-rw-r--r--db/modules/mms.cpp2
-rw-r--r--db/mongommf.h2
-rw-r--r--db/namespace.cpp13
-rw-r--r--db/namespace.h1
-rw-r--r--db/oplog.cpp32
-rw-r--r--db/oplog.h1
-rw-r--r--db/ops/query.cpp16
-rw-r--r--db/ops/update.cpp49
-rw-r--r--db/pdfile.cpp47
-rw-r--r--db/pdfile.h2
-rw-r--r--db/queryoptimizer.cpp107
-rw-r--r--db/queryoptimizer.h16
-rw-r--r--db/queryutil-inl.h19
-rw-r--r--db/queryutil.cpp159
-rw-r--r--db/queryutil.h4
-rw-r--r--db/record.cpp21
-rw-r--r--db/repl.cpp10
-rw-r--r--db/repl/consensus.cpp11
-rw-r--r--db/repl/heartbeat.cpp18
-rw-r--r--db/repl/replset_commands.cpp34
-rw-r--r--db/repl/rs.cpp63
-rw-r--r--db/repl/rs.h62
-rw-r--r--db/repl/rs_config.cpp115
-rw-r--r--db/repl/rs_config.h12
-rw-r--r--db/repl/rs_initialsync.cpp3
-rw-r--r--db/repl/rs_initiate.cpp2
-rw-r--r--db/repl/rs_member.h3
-rw-r--r--db/repl/rs_rollback.cpp2
-rw-r--r--db/repl/rs_sync.cpp36
-rw-r--r--db/scanandorder.cpp93
-rw-r--r--db/scanandorder.h83
-rw-r--r--db/security.cpp10
-rwxr-xr-x[-rw-r--r--]db/security.h0
-rw-r--r--db/security_commands.cpp4
-rw-r--r--db/security_common.h6
-rw-r--r--db/stats/top.cpp2
-rw-r--r--dbtests/basictests.cpp53
-rw-r--r--dbtests/cursortests.cpp26
-rw-r--r--dbtests/directclienttests.cpp2
-rw-r--r--dbtests/framework.cpp5
-rw-r--r--dbtests/jsobjtests.cpp7
-rw-r--r--dbtests/namespacetests.cpp374
-rw-r--r--dbtests/perftests.cpp305
-rw-r--r--dbtests/queryoptimizertests.cpp35
-rw-r--r--dbtests/querytests.cpp39
-rw-r--r--dbtests/repltests.cpp28
-rwxr-xr-xdbtests/test.sln26
-rw-r--r--dbtests/test.vcxproj27
-rwxr-xr-xdbtests/test.vcxproj.filters39
-rw-r--r--debian/changelog12
-rwxr-xr-xdistsrc/client/SConstruct2
-rw-r--r--doxygenConfig2
-rw-r--r--jstests/ageoutjournalfiles.js16
-rw-r--r--jstests/array_match3.js2
-rw-r--r--jstests/arrayfind4.js22
-rw-r--r--jstests/arrayfind5.js23
-rw-r--r--jstests/capped2.js4
-rw-r--r--jstests/capped5.js1
-rw-r--r--jstests/capped6.js2
-rw-r--r--jstests/cappeda.js33
-rw-r--r--jstests/date3.js29
-rw-r--r--jstests/dbhash.js10
-rw-r--r--jstests/disk/quota.js47
-rw-r--r--jstests/disk/quota2.js38
-rw-r--r--jstests/drop2.js2
-rw-r--r--jstests/dur/diskfull.js51
-rw-r--r--jstests/evald.js10
-rw-r--r--jstests/exists9.js5
-rw-r--r--jstests/geo_mapreduce2.js36
-rw-r--r--jstests/group7.js43
-rw-r--r--jstests/in9.js4
-rw-r--r--jstests/ina.js15
-rwxr-xr-xjstests/indexbindata.js0
-rw-r--r--jstests/indexr.js4
-rw-r--r--jstests/indexs.js2
-rw-r--r--jstests/indext.js21
-rw-r--r--jstests/indexu.js137
-rw-r--r--jstests/indexv.js18
-rw-r--r--jstests/indexw.js14
-rw-r--r--jstests/libs/testconfig4
-rw-r--r--jstests/ork.js11
-rw-r--r--jstests/orl.js13
-rw-r--r--jstests/orm.js26
-rw-r--r--jstests/profile1.js50
-rw-r--r--jstests/profile2.js19
-rw-r--r--jstests/profile3.js26
-rw-r--r--jstests/regexa.js4
-rw-r--r--jstests/repl/basic1.js2
-rw-r--r--jstests/repl/drop_dups.js63
-rw-r--r--jstests/repl/repl3.js58
-rw-r--r--jstests/replsets/auth1.js4
-rwxr-xr-xjstests/replsets/downstream.js36
-rw-r--r--jstests/replsets/fastsync.js7
-rw-r--r--jstests/replsets/maintenance.js32
-rw-r--r--jstests/replsets/remove1.js8
-rw-r--r--jstests/replsets/replset5.js92
-rw-r--r--jstests/replsets/replsetarb2.js12
-rw-r--r--jstests/replsets/rollback2.js19
-rw-r--r--jstests/replsets/tags.js33
-rw-r--r--jstests/replsets/tags2.js44
-rw-r--r--jstests/replsets/toostale.js14
-rw-r--r--jstests/sharding/addshard4.js8
-rw-r--r--jstests/sharding/array_shard_key.js127
-rw-r--r--jstests/sharding/auth.js22
-rw-r--r--jstests/sharding/count_slaveok.js69
-rw-r--r--jstests/sharding/drop_sharded_db.js62
-rw-r--r--jstests/sharding/features3.js59
-rw-r--r--jstests/sharding/group_slaveok.js68
-rw-r--r--jstests/sharding/parallel.js38
-rw-r--r--jstests/sharding/shard3.js1
-rw-r--r--jstests/sharding/sync6.js9
-rw-r--r--jstests/slowNightly/command_line_parsing.js12
-rw-r--r--jstests/slowNightly/dur_big_atomic_update.js17
-rw-r--r--jstests/slowNightly/replReads.js108
-rw-r--r--jstests/slowNightly/sharding_migrateBigObject.js13
-rw-r--r--jstests/slowNightly/sharding_passthrough.js6
-rw-r--r--jstests/slowNightly/sharding_rs1.js6
-rw-r--r--jstests/slowNightly/sharding_rs_arb1.js40
-rw-r--r--jstests/slowNightly/sync6_slow.js82
-rw-r--r--jstests/slowWeekly/geo_full.js86
-rw-r--r--jstests/slowWeekly/geo_mnypts_plus_fields.js98
-rw-r--r--jstests/slowWeekly/update_yield1.js2
-rw-r--r--jstests/sorta.js11
-rw-r--r--jstests/tool/csvexport1.js45
-rw-r--r--jstests/tool/csvexport2.js31
-rw-r--r--jstests/tool/csvimport1.js40
-rw-r--r--jstests/tool/data/csvimport1.csv8
-rw-r--r--jstests/tool/dumprestore5.js36
-rw-r--r--jstests/unique2.js53
-rw-r--r--jstests/uniqueness.js13
-rw-r--r--jstests/updatef.js24
-rw-r--r--jstests/updateg.js17
-rw-r--r--pch.h19
-rw-r--r--rpm/mongo.spec2
-rw-r--r--s/balance.cpp18
-rw-r--r--s/balancer_policy.cpp8
-rw-r--r--s/chunk.cpp28
-rw-r--r--s/commands_admin.cpp64
-rw-r--r--s/commands_public.cpp107
-rw-r--r--s/config.cpp44
-rw-r--r--s/config.h1
-rw-r--r--s/cursors.cpp18
-rw-r--r--s/d_logic.cpp8
-rw-r--r--s/d_migrate.cpp74
-rw-r--r--s/d_split.cpp15
-rw-r--r--s/d_state.cpp12
-rw-r--r--s/d_writeback.cpp6
-rw-r--r--s/grid.cpp11
-rw-r--r--s/request.cpp28
-rw-r--r--s/request.h2
-rw-r--r--s/s_only.cpp2
-rw-r--r--s/security.cpp2
-rw-r--r--s/server.cpp16
-rw-r--r--s/shard.cpp6
-rw-r--r--s/shard_version.cpp12
-rw-r--r--s/shardkey.cpp17
-rw-r--r--s/shardkey.h16
-rw-r--r--s/strategy.cpp4
-rw-r--r--s/strategy_shard.cpp59
-rw-r--r--s/strategy_single.cpp8
-rw-r--r--s/writeback_listener.cpp7
-rw-r--r--scripting/bench.cpp2
-rw-r--r--server.h50
-rw-r--r--shell/collection.js2
-rw-r--r--shell/dbshell.cpp12
-rw-r--r--shell/mongo.js9
-rw-r--r--shell/mongo_vstudio.cpp120
-rwxr-xr-xshell/servers.js256
-rw-r--r--shell/utils.js101
-rw-r--r--shell/utils_sh.js8
-rwxr-xr-xspeed.js13
-rw-r--r--third_party/linenoise/linenoise.cpp5
-rwxr-xr-xthird_party/snappy/COPYING28
-rwxr-xr-xthird_party/snappy/README135
-rwxr-xr-xthird_party/snappy/config.h124
-rwxr-xr-xthird_party/snappy/snappy-internal.h150
-rwxr-xr-xthird_party/snappy/snappy-sinksource.cc72
-rwxr-xr-xthird_party/snappy/snappy-sinksource.h136
-rwxr-xr-xthird_party/snappy/snappy-stubs-internal.cc42
-rwxr-xr-xthird_party/snappy/snappy-stubs-internal.h478
-rwxr-xr-xthird_party/snappy/snappy-stubs-public.h85
-rwxr-xr-xthird_party/snappy/snappy.cc1026
-rwxr-xr-xthird_party/snappy/snappy.h155
-rw-r--r--tools/bridge.cpp2
-rw-r--r--tools/export.cpp69
-rw-r--r--tools/import.cpp327
-rw-r--r--tools/restore.cpp32
-rw-r--r--tools/tool.cpp15
-rw-r--r--util/alignedbuilder.cpp35
-rw-r--r--util/alignedbuilder.h9
-rw-r--r--util/array.h7
-rw-r--r--util/assert_util.cpp20
-rw-r--r--util/assert_util.h8
-rw-r--r--util/bufreader.h2
-rw-r--r--util/compress.cpp31
-rw-r--r--util/compress.h21
-rw-r--r--util/concurrency/mutex.h12
-rw-r--r--util/concurrency/race.h6
-rw-r--r--util/concurrency/rwlock.h10
-rw-r--r--util/concurrency/synchronization.cpp4
-rw-r--r--util/concurrency/synchronization.h5
-rw-r--r--util/file.h28
-rw-r--r--util/file_allocator.cpp4
-rw-r--r--util/goodies.h2
-rw-r--r--util/log.h7
-rw-r--r--util/logfile.cpp25
-rw-r--r--util/logfile.h2
-rw-r--r--util/net/httpclient.cpp27
-rw-r--r--util/net/httpclient.h8
-rw-r--r--util/net/listen.cpp130
-rw-r--r--util/net/listen.h37
-rw-r--r--util/net/message_port.cpp4
-rw-r--r--util/net/message_port.h2
-rw-r--r--util/net/message_server_port.cpp24
-rw-r--r--util/net/miniwebserver.cpp19
-rw-r--r--util/net/miniwebserver.h4
-rw-r--r--util/net/sock.cpp222
-rw-r--r--util/net/sock.h78
-rw-r--r--util/paths.h4
-rw-r--r--util/processinfo_darwin.cpp5
-rw-r--r--util/ramlog.cpp6
-rw-r--r--util/ramlog.h3
-rw-r--r--util/stringutils.h25
-rw-r--r--util/time_support.h10
-rw-r--r--util/timer.h2
-rw-r--r--util/version.cpp69
302 files changed, 10377 insertions, 2474 deletions
diff --git a/.gitignore b/.gitignore
index 87449576069..8ffc0d3e496 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
.sconsign.dblite
.sconf_temp
perf.data
+massif.out.*
*~
*.swp
diff --git a/SConstruct b/SConstruct
index b8e8c3fc889..99e3189c2d0 100644
--- a/SConstruct
+++ b/SConstruct
@@ -126,10 +126,10 @@ add_option( "staticlibpath", "comma separated list of dirs to search for staticl
add_option( "boost-compiler", "compiler used for boost (gcc41)" , 1 , True , "boostCompiler" )
add_option( "boost-version", "boost version for linking(1_38)" , 1 , True , "boostVersion" )
-
# experimental features
add_option( "mm", "use main memory instead of memory mapped files" , 0 , True )
add_option( "asio" , "Use Asynchronous IO (NOT READY YET)" , 0 , True )
+add_option( "ssl" , "Enable SSL" , 0 , True )
# library choices
add_option( "usesm" , "use spider monkey for javascript" , 0 , True )
@@ -138,12 +138,13 @@ add_option( "usev8" , "use v8 for javascript" , 0 , True )
# mongo feature options
add_option( "noshell", "don't build shell" , 0 , True )
add_option( "safeshell", "don't let shell scripts run programs (still, don't run untrusted scripts)" , 0 , True )
-add_option( "osnew", "use newer operating system API features" , 0 , False )
+add_option( "win2008plus", "use newer operating system API features" , 0 , False )
# dev tools
add_option( "d", "debug build no optimization, etc..." , 0 , True , "debugBuild" )
add_option( "dd", "debug build no optimization, additional debug logging, etc..." , 0 , False , "debugBuildAndLogging" )
add_option( "durableDefaultOn" , "have durable default to on" , 0 , True )
+add_option( "durableDefaultOff" , "have durable default to off" , 0 , True )
add_option( "pch" , "use precompiled headers to speed up the build (experimental)" , 0 , True , "usePCH" )
add_option( "distcc" , "use distcc for distributing builds" , 0 , False )
@@ -234,6 +235,9 @@ if has_option( "safeshell" ):
if has_option( "durableDefaultOn" ):
env.Append( CPPDEFINES=[ "_DURABLEDEFAULTON" ] )
+if has_option( "durableDefaultOff" ):
+ env.Append( CPPDEFINES=[ "_DURABLEDEFAULTOFF" ] )
+
boostCompiler = GetOption( "boostCompiler" )
if boostCompiler is None:
boostCompiler = ""
@@ -343,26 +347,28 @@ processInfoFiles = [ "util/processinfo.cpp" ]
if os.path.exists( "util/processinfo_" + os.sys.platform + ".cpp" ):
processInfoFiles += [ "util/processinfo_" + os.sys.platform + ".cpp" ]
+elif os.sys.platform == "linux3":
+ processInfoFiles += [ "util/processinfo_linux2.cpp" ]
else:
processInfoFiles += [ "util/processinfo_none.cpp" ]
coreServerFiles += processInfoFiles
-
-
if has_option( "asio" ):
coreServerFiles += [ "util/net/message_server_asio.cpp" ]
# mongod files - also files used in tools. present in dbtests, but not in mongos and not in client libs.
-serverOnlyFiles = Split( "db/key.cpp db/btreebuilder.cpp util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/record.cpp db/cursor.cpp db/security.cpp db/queryoptimizer.cpp db/queryoptimizercursor.cpp db/extsort.cpp db/cmdline.cpp" )
+serverOnlyFiles = Split( "util/compress.cpp db/key.cpp db/btreebuilder.cpp util/logfile.cpp util/alignedbuilder.cpp db/mongommf.cpp db/dur.cpp db/durop.cpp db/dur_writetodatafiles.cpp db/dur_preplogbuffer.cpp db/dur_commitjob.cpp db/dur_recover.cpp db/dur_journal.cpp db/introspect.cpp db/btree.cpp db/clientcursor.cpp db/tests.cpp db/repl.cpp db/repl/rs.cpp db/repl/consensus.cpp db/repl/rs_initiate.cpp db/repl/replset_commands.cpp db/repl/manager.cpp db/repl/health.cpp db/repl/heartbeat.cpp db/repl/rs_config.cpp db/repl/rs_rollback.cpp db/repl/rs_sync.cpp db/repl/rs_initialsync.cpp db/oplog.cpp db/repl_block.cpp db/btreecursor.cpp db/cloner.cpp db/namespace.cpp db/cap.cpp db/matcher_covered.cpp db/dbeval.cpp db/restapi.cpp db/dbhelpers.cpp db/instance.cpp db/client.cpp db/database.cpp db/pdfile.cpp db/record.cpp db/cursor.cpp db/security.cpp db/queryoptimizer.cpp db/queryoptimizercursor.cpp db/extsort.cpp db/cmdline.cpp" )
-serverOnlyFiles += [ "db/index.cpp" ] + Glob( "db/geo/*.cpp" ) + Glob( "db/ops/*.cpp" )
+serverOnlyFiles += [ "db/index.cpp" , "db/scanandorder.cpp" ] + Glob( "db/geo/*.cpp" ) + Glob( "db/ops/*.cpp" )
serverOnlyFiles += [ "db/dbcommands.cpp" , "db/dbcommands_admin.cpp" ]
serverOnlyFiles += Glob( "db/commands/*.cpp" )
coreServerFiles += Glob( "db/stats/*.cpp" )
serverOnlyFiles += [ "db/driverHelpers.cpp" ]
+snappyFiles = ["third_party/snappy/snappy.cc", "third_party/snappy/snappy-sinksource.cc"]
+
scriptingFiles = [ "scripting/engine.cpp" , "scripting/utils.cpp" , "scripting/bench.cpp" ]
if usesm:
@@ -474,7 +480,7 @@ if "darwin" == os.sys.platform:
env.Append( CPPPATH=filterExists(["/sw/include" , "/opt/local/include"]) )
env.Append( LIBPATH=filterExists(["/sw/lib/", "/opt/local/lib"]) )
-elif "linux2" == os.sys.platform:
+elif "linux2" == os.sys.platform or "linux3" == os.sys.platform:
linux = True
platform = "linux"
@@ -519,7 +525,7 @@ elif "win32" == os.sys.platform:
#if force64:
# release = True
- if has_option( "osnew" ):
+ if has_option( "win2008plus" ):
env.Append( CPPDEFINES=[ "MONGO_USE_SRW_ON_WINDOWS" ] )
for pathdir in env['ENV']['PATH'].split(os.pathsep):
@@ -689,6 +695,7 @@ if nix:
if not has_option('clang'):
env.Append( CPPFLAGS=" -fno-builtin-memcmp " ) # glibc's memcmp is faster than gcc's
+ env.Append( CPPDEFINES="_FILE_OFFSET_BITS=64" )
env.Append( CXXFLAGS=" -Wnon-virtual-dtor " )
env.Append( LINKFLAGS=" -fPIC -pthread -rdynamic" )
env.Append( LIBS=[] )
@@ -704,7 +711,7 @@ if nix:
env.Append( CPPFLAGS=" -O0 -fstack-protector " );
env['ENV']['GLIBCXX_FORCE_NEW'] = 1; # play nice with valgrind
else:
- env.Append( CPPFLAGS=" -O3" )
+ env.Append( CPPFLAGS=" -O3 " )
#env.Append( CPPFLAGS=" -fprofile-generate" )
#env.Append( LINKFLAGS=" -fprofile-generate" )
# then:
@@ -752,6 +759,10 @@ if "uname" in dir(os):
if hacks is not None:
hacks.insert( env , { "linux64" : linux64 } )
+if has_option( "ssl" ):
+ env.Append( CPPDEFINES=["MONGO_SSL"] )
+ env.Append( LIBS=["ssl"] )
+
try:
umask = os.umask(022)
except OSError:
@@ -1107,6 +1118,12 @@ def checkErrorCodes():
checkErrorCodes()
+snappyEnv = env.Clone()
+if not windows:
+ snappyEnv.Append(CPPFLAGS=" -Wno-sign-compare -Wno-unused-function ") #snappy doesn't compile cleanly
+serverOnlyFiles += [snappyEnv.Object(f) for f in snappyFiles]
+
+
# main db target
mongodOnlyFiles = [ "db/db.cpp", "db/compact.cpp" ]
if windows:
diff --git a/bson/bson-inl.h b/bson/bson-inl.h
index 54431549852..b86d66784ed 100644
--- a/bson/bson-inl.h
+++ b/bson/bson-inl.h
@@ -172,7 +172,7 @@ dodouble:
}
inline BSONObj BSONElement::embeddedObjectUserCheck() const {
- if ( isABSONObj() )
+ if ( MONGO_likely(isABSONObj()) )
return BSONObj(value());
stringstream ss;
ss << "invalid parameter: expected an object (" << fieldName() << ")";
diff --git a/bson/bsonobj.h b/bson/bsonobj.h
index c65f1268cc4..b3258a2c1d7 100644
--- a/bson/bsonobj.h
+++ b/bson/bsonobj.h
@@ -308,7 +308,7 @@ namespace mongo {
/** This is "shallow equality" -- ints and doubles won't match. for a
deep equality test use woCompare (which is slower).
*/
- bool shallowEqual(const BSONObj& r) const {
+ bool binaryEqual(const BSONObj& r) const {
int os = objsize();
if ( os == r.objsize() ) {
return (os == 0 || memcmp(objdata(),r.objdata(),os)==0);
diff --git a/bson/bsonobjbuilder.h b/bson/bsonobjbuilder.h
index 7d6965dd7fc..f61d45879f3 100644
--- a/bson/bsonobjbuilder.h
+++ b/bson/bsonobjbuilder.h
@@ -28,10 +28,10 @@
#include "bsonobj.h"
#include "bsonmisc.h"
-using namespace std;
-
namespace mongo {
+ using namespace std;
+
#if defined(_WIN32)
// warning: 'this' : used in base member initializer list
#pragma warning( disable : 4355 )
diff --git a/bson/bsonobjiterator.h b/bson/bsonobjiterator.h
index 0d2344e002e..39ae24d9b86 100644
--- a/bson/bsonobjiterator.h
+++ b/bson/bsonobjiterator.h
@@ -37,7 +37,7 @@ namespace mongo {
*/
BSONObjIterator(const BSONObj& jso) {
int sz = jso.objsize();
- if ( sz == 0 ) {
+ if ( MONGO_unlikely(sz == 0) ) {
_pos = _theend = 0;
return;
}
diff --git a/bson/inline_decls.h b/bson/inline_decls.h
index 433a67010cb..30da9b4560d 100644
--- a/bson/inline_decls.h
+++ b/bson/inline_decls.h
@@ -31,24 +31,38 @@
#endif
+namespace mongo {
/* Note: do not clutter code with these -- ONLY use in hot spots / significant loops. */
#if !defined(__GNUC__)
-// branch prediction. indicate we expect to enter the if statement body
-# define MONGOIF(x) if( (x) )
+// branch prediction. indicate we expect to be true
+# define MONGO_likely(x) ((bool)(x))
-// branch prediction. indicate we expect to not enter the if statement body
-# define MONGO_IF(x) if( (x) )
+// branch prediction. indicate we expect to be false
+# define MONGO_unlikely(x) ((bool)(x))
-// prefetch data from memory
-# define MONGOPREFETCH(x) { /*just check we compile:*/ assert(sizeof(*x)); }
+# if defined(_WIN32)
+ // prefetch data from memory
+ inline void prefetch(const void *p) {
+#if defined(_MM_HINT_T0)
+ _mm_prefetch((char *) p, _MM_HINT_T0);
+#endif
+ }
+#else
+ inline void prefetch(void *p) { }
+#endif
#else
-# define MONGOIF(x) if( __builtin_expect((x), 1) )
-# define MONGO_IF(x) if( __builtin_expect((x), 0) )
-# define MONGOPREFETCH(x) { /*just check we compile:*/ assert(sizeof(*x)); }
+# define MONGO_likely(x) ( __builtin_expect((bool)(x), 1) )
+# define MONGO_unlikely(x) ( __builtin_expect((bool)(x), 0) )
+
+ inline void prefetch(void *p) {
+ __builtin_prefetch(p);
+ }
#endif
+
+}
diff --git a/bson/stringdata.h b/bson/stringdata.h
index c4919e82d97..352dc51813f 100644
--- a/bson/stringdata.h
+++ b/bson/stringdata.h
@@ -60,7 +60,7 @@ namespace mongo {
: _data(&val[0]), _size(N-1) {}
// accessors
- const char* const data() const { return _data; }
+ const char* data() const { return _data; }
const unsigned size() const { return _size; }
private:
diff --git a/buildscripts/errorcodes.py b/buildscripts/errorcodes.py
index ce1b3e465d0..dec1030ddad 100755
--- a/buildscripts/errorcodes.py
+++ b/buildscripts/errorcodes.py
@@ -32,9 +32,9 @@ def assignErrorCodes():
codes = []
def readErrorCodes( callback, replaceZero = False ):
- ps = [ re.compile( "(([umsg]asser(t|ted))) *\( *(\d+)" ) ,
- re.compile( "((User|Msg|MsgAssertion)Exceptio(n))\( *(\d+)" ) ,
- re.compile( "(((verify))) *\( *(\d+)" )
+ ps = [ re.compile( "(([umsg]asser(t|ted))) *\(( *)(\d+)" ) ,
+ re.compile( "((User|Msg|MsgAssertion)Exceptio(n))\(( *)(\d+)" ) ,
+ re.compile( "(((verify))) *\(( *)(\d+)" )
]
for x in utils.getAllSourceFiles():
@@ -52,7 +52,8 @@ def readErrorCodes( callback, replaceZero = False ):
m = m.groups()
start = m[0]
- code = m[3]
+ spaces = m[3]
+ code = m[4]
if code == '0' and replaceZero :
code = getNextCode( lastCodes )
lastCodes.append( code )
@@ -65,7 +66,7 @@ def readErrorCodes( callback, replaceZero = False ):
codes.append( ( x , lineNum , line , code ) )
callback( x , lineNum , line , code )
- return start + "( " + code
+ return start + "(" + spaces + code
line = re.sub( p, repl, line )
diff --git a/buildscripts/smoke.py b/buildscripts/smoke.py
index 128c01cc287..c46b5d1879d 100755
--- a/buildscripts/smoke.py
+++ b/buildscripts/smoke.py
@@ -110,7 +110,7 @@ class mongod(object):
sock.connect(("localhost", int(port)))
sock.close()
- def did_mongod_start(self, port=mongod_port, timeout=90):
+ def did_mongod_start(self, port=mongod_port, timeout=300):
while timeout > 0:
time.sleep(1)
try:
@@ -119,6 +119,7 @@ class mongod(object):
except Exception,e:
print >> sys.stderr, e
timeout = timeout - 1
+ print >> sys.stderr, "timeout starting mongod"
return False
def start(self):
@@ -148,6 +149,10 @@ class mongod(object):
argv += ["--master", "--oplogSize", "256"]
if self.slave:
argv += ['--slave', '--source', 'localhost:' + str(srcport)]
+ if self.kwargs.get('no_journal'):
+ argv += ['--nojournal']
+ if self.kwargs.get('no_preallocj'):
+ argv += ['--nopreallocj']
print "running " + " ".join(argv)
self.proc = Popen(argv)
if not self.did_mongod_start(self.port):
@@ -280,6 +285,19 @@ def runTest(test):
t1 = time.time()
# FIXME: we don't handle the case where the subprocess
# hangs... that's bad.
+ if argv[0].endswith( 'mongo' ) and not '--eval' in argv :
+ argv = argv + [ '--eval', 'TestData = new Object();' +
+ 'TestData.testPath = "' + path + '";' +
+ 'TestData.testFile = "' + os.path.basename( path ) + '";' +
+ 'TestData.testName = "' + re.sub( ".js$", "", os.path.basename( path ) ) + '";' +
+ 'TestData.noJournal = ' + ( 'true' if no_journal else 'false' ) + ";" +
+ 'TestData.noJournalPrealloc = ' + ( 'true' if no_preallocj else 'false' ) + ";" ]
+
+ if argv[0].endswith( 'test' ) and no_preallocj :
+ argv = argv + [ '--nopreallocj' ]
+
+
+ print argv
r = call(argv, cwd=test_path)
t2 = time.time()
print " " + str((t2 - t1) * 1000) + "ms"
@@ -301,7 +319,7 @@ def run_tests(tests):
# The reason we use with is so that we get __exit__ semantics
- with mongod(small_oplog=small_oplog) as master:
+ with mongod(small_oplog=small_oplog,no_journal=no_journal,no_preallocj=no_preallocj) as master:
with mongod(slave=True) if small_oplog else Nothing() as slave:
if small_oplog:
master.wait_for_repl()
@@ -421,7 +439,7 @@ def add_exe(e):
return e
def main():
- global mongod_executable, mongod_port, shell_executable, continue_on_failure, small_oplog, smoke_db_prefix, test_path
+ global mongod_executable, mongod_port, shell_executable, continue_on_failure, small_oplog, no_journal, no_preallocj, smoke_db_prefix, test_path
parser = OptionParser(usage="usage: smoke.py [OPTIONS] ARGS*")
parser.add_option('--mode', dest='mode', default='suite',
help='If "files", ARGS are filenames; if "suite", ARGS are sets of tests (%default)')
@@ -447,6 +465,12 @@ def main():
parser.add_option('--small-oplog', dest='small_oplog', default=False,
action="store_true",
help='Run tests with master/slave replication & use a small oplog')
+ parser.add_option('--nojournal', dest='no_journal', default=False,
+ action="store_true",
+ help='Do not turn on journaling in tests')
+ parser.add_option('--nopreallocj', dest='no_preallocj', default=False,
+ action="store_true",
+ help='Do not preallocate journal files in tests')
global tests
(options, tests) = parser.parse_args()
@@ -467,6 +491,8 @@ def main():
continue_on_failure = options.continue_on_failure
smoke_db_prefix = options.smoke_db_prefix
small_oplog = options.small_oplog
+ no_journal = options.no_journal
+ no_preallocj = options.no_preallocj
if options.File:
if options.File == '-':
diff --git a/client/connpool.cpp b/client/connpool.cpp
index e94a78d1c45..2d7c37bfbda 100644
--- a/client/connpool.cpp
+++ b/client/connpool.cpp
@@ -238,13 +238,16 @@ namespace mongo {
}
void DBConnectionPool::appendInfo( BSONObjBuilder& b ) {
- BSONObjBuilder bb( b.subobjStart( "hosts" ) );
+
int avail = 0;
long long created = 0;
map<ConnectionString::ConnectionType,long long> createdByType;
+ set<string> replicaSets;
+
+ BSONObjBuilder bb( b.subobjStart( "hosts" ) );
{
scoped_lock lk( _mutex );
for ( PoolMap::iterator i=_pools.begin(); i!=_pools.end(); ++i ) {
@@ -263,9 +266,33 @@ namespace mongo {
long long& x = createdByType[i->second.type()];
x += i->second.numCreated();
+
+ {
+ string setName = i->first.ident;
+ if ( setName.find( "/" ) != string::npos ) {
+ setName = setName.substr( 0 , setName.find( "/" ) );
+ replicaSets.insert( setName );
+ }
+ }
}
}
bb.done();
+
+
+ BSONObjBuilder setBuilder( b.subobjStart( "replicaSets" ) );
+ for ( set<string>::iterator i=replicaSets.begin(); i!=replicaSets.end(); ++i ) {
+ string rs = *i;
+ ReplicaSetMonitorPtr m = ReplicaSetMonitor::get( rs );
+ if ( ! m ) {
+ warning() << "no monitor for set: " << rs << endl;
+ continue;
+ }
+
+ BSONObjBuilder temp( setBuilder.subobjStart( rs ) );
+ m->appendInfo( temp );
+ temp.done();
+ }
+ setBuilder.done();
{
BSONObjBuilder temp( bb.subobjStart( "createdByType" ) );
@@ -280,20 +307,36 @@ namespace mongo {
}
bool DBConnectionPool::serverNameCompare::operator()( const string& a , const string& b ) const{
- string ap = str::before( a , "/" );
- string bp = str::before( b , "/" );
-
- return ap < bp;
+ const char* ap = a.c_str();
+ const char* bp = b.c_str();
+
+ while (true){
+ if (*ap == '\0' || *ap == '/'){
+ if (*bp == '\0' || *bp == '/')
+ return false; // equal strings
+ else
+ return true; // a is shorter
+ }
+
+ if (*bp == '\0' || *bp == '/')
+ return false; // b is shorter
+
+ if ( *ap < *bp)
+ return true;
+ else if (*ap > *bp)
+ return false;
+
+ ++ap;
+ ++bp;
+ }
+ assert(false);
}
bool DBConnectionPool::poolKeyCompare::operator()( const PoolKey& a , const PoolKey& b ) const {
- string ap = str::before( a.ident , "/" );
- string bp = str::before( b.ident , "/" );
-
- if ( ap < bp )
+ if (DBConnectionPool::serverNameCompare()( a.ident , b.ident ))
return true;
- if ( ap > bp )
+ if (DBConnectionPool::serverNameCompare()( b.ident , a.ident ))
return false;
return a.timeout < b.timeout;
@@ -366,7 +409,7 @@ namespace mongo {
PoolFlushCmd() : Command( "connPoolSync" , false , "connpoolsync" ) {}
virtual void help( stringstream &help ) const { help<<"internal"; }
virtual LockType locktype() const { return NONE; }
- virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool) {
+ virtual bool run(const string&, mongo::BSONObj&, int, std::string&, mongo::BSONObjBuilder& result, bool) {
pool.flush();
return true;
}
@@ -381,7 +424,7 @@ namespace mongo {
PoolStats() : Command( "connPoolStats" ) {}
virtual void help( stringstream &help ) const { help<<"stats about connection pool"; }
virtual LockType locktype() const { return NONE; }
- virtual bool run(const string&, mongo::BSONObj&, std::string&, mongo::BSONObjBuilder& result, bool) {
+ virtual bool run(const string&, mongo::BSONObj&, int, std::string&, mongo::BSONObjBuilder& result, bool) {
pool.appendInfo( result );
result.append( "numDBClientConnection" , DBClientConnection::getNumConnections() );
result.append( "numAScopedConnection" , AScopedConnection::getNumConnections() );
diff --git a/client/dbclient.cpp b/client/dbclient.cpp
index 9c907b01a00..dadf7e4f38a 100644
--- a/client/dbclient.cpp
+++ b/client/dbclient.cpp
@@ -80,7 +80,7 @@ namespace mongo {
case PAIR:
case SET: {
- DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers );
+ DBClientReplicaSet * set = new DBClientReplicaSet( _setName , _servers , socketTimeout );
if( ! set->connect() ) {
delete set;
errmsg = "connect failed to set ";
@@ -589,6 +589,13 @@ namespace mongo {
_failed = true;
return false;
}
+
+#ifdef MONGO_SSL
+ if ( cmdLine.sslOnNormalPorts ) {
+ p->secure( sslManager() );
+ }
+#endif
+
return true;
}
@@ -997,6 +1004,19 @@ namespace mongo {
say(m);
}
+#ifdef MONGO_SSL
+ SSLManager* DBClientConnection::sslManager() {
+ if ( _sslManager )
+ return _sslManager;
+
+ SSLManager* s = new SSLManager(true);
+ _sslManager = s;
+ return s;
+ }
+
+ SSLManager* DBClientConnection::_sslManager = 0;
+#endif
+
AtomicUInt DBClientConnection::_numConnections;
bool DBClientConnection::_lazyKillCursor = true;
diff --git a/client/dbclient.h b/client/dbclient.h
index f48f279e9f5..2b4bb857e2d 100644
--- a/client/dbclient.h
+++ b/client/dbclient.h
@@ -110,7 +110,7 @@ namespace mongo {
*/
enum InsertOptions {
/** With muli-insert keep processing inserts if one fails */
- InsertOption_KeepGoing = 1 << 0
+ InsertOption_ContinueOnError = 1 << 0
};
class DBClientBase;
@@ -353,6 +353,7 @@ namespace mongo {
virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL ) {
if( retry ) *retry = false; if( targetHost ) *targetHost = "";
}
+ virtual bool lazySupported() const = 0;
};
/**
@@ -921,13 +922,15 @@ namespace mongo {
void setSoTimeout(double to) { _so_timeout = to; }
double getSoTimeout() const { return _so_timeout; }
+ virtual bool lazySupported() const { return true; }
+
static int getNumConnections() {
return _numConnections;
}
static void setLazyKillCursor( bool lazy ) { _lazyKillCursor = lazy; }
static bool getLazyKillCursor() { return _lazyKillCursor; }
-
+
protected:
friend class SyncClusterConnection;
virtual void sayPiggyBack( Message &toSend );
@@ -951,6 +954,11 @@ namespace mongo {
static AtomicUInt _numConnections;
static bool _lazyKillCursor; // lazy means we piggy back kill cursors on next op
+
+#ifdef MONGO_SSL
+ static SSLManager* sslManager();
+ static SSLManager* _sslManager;
+#endif
};
/** pings server to check if it's up
diff --git a/client/dbclient_rs.cpp b/client/dbclient_rs.cpp
index bd108d75ba4..2cab1f7b0d5 100644
--- a/client/dbclient_rs.cpp
+++ b/client/dbclient_rs.cpp
@@ -54,9 +54,9 @@ namespace mongo {
void run() {
log() << "starting" << endl;
while ( ! inShutdown() ) {
- sleepsecs( 20 );
+ sleepsecs( 10 );
try {
- ReplicaSetMonitor::checkAll();
+ ReplicaSetMonitor::checkAll( true );
}
catch ( std::exception& e ) {
error() << "check failed: " << e.what() << endl;
@@ -99,17 +99,14 @@ namespace mongo {
}
_nodes.push_back( Node( servers[i] , conn.release() ) );
-
+
+ int myLoc = _nodes.size() - 1;
string maybePrimary;
- if (_checkConnection( _nodes[_nodes.size()-1].conn , maybePrimary, false)) {
- break;
- }
+ _checkConnection( _nodes[myLoc].conn.get() , maybePrimary, false, myLoc );
}
}
ReplicaSetMonitor::~ReplicaSetMonitor() {
- for ( unsigned i=0; i<_nodes.size(); i++ )
- delete _nodes[i].conn;
_nodes.clear();
_master = -1;
}
@@ -125,7 +122,16 @@ namespace mongo {
return m;
}
- void ReplicaSetMonitor::checkAll() {
+ ReplicaSetMonitorPtr ReplicaSetMonitor::get( const string& name ) {
+ scoped_lock lk( _setsLock );
+ map<string,ReplicaSetMonitorPtr>::const_iterator i = _sets.find( name );
+ if ( i == _sets.end() )
+ return ReplicaSetMonitorPtr();
+ return i->second;
+ }
+
+
+ void ReplicaSetMonitor::checkAll( bool checkAllSecondaries ) {
set<string> seen;
while ( true ) {
@@ -146,7 +152,7 @@ namespace mongo {
if ( ! m )
break;
- m->check();
+ m->check( checkAllSecondaries );
}
@@ -202,7 +208,7 @@ namespace mongo {
return _nodes[_master].addr;
}
- _check();
+ _check( false );
scoped_lock lk( _lock );
uassert( 10009 , str::stream() << "ReplicaSetMonitor no master found for set: " << _name , _master >= 0 );
@@ -210,34 +216,70 @@ namespace mongo {
}
HostAndPort ReplicaSetMonitor::getSlave( const HostAndPort& prev ) {
- // make sure its valid
- if ( prev.port() > 0 ) {
+ // make sure its valid
+
+ bool wasFound = false;
+
+ // This is always true, since checked in port()
+ assert( prev.port() >= 0 );
+ if( prev.host().size() ){
scoped_lock lk( _lock );
for ( unsigned i=0; i<_nodes.size(); i++ ) {
if ( prev != _nodes[i].addr )
continue;
- if ( _nodes[i].ok )
+ wasFound = true;
+
+ if ( _nodes[i].okForSecondaryQueries() )
return prev;
+
break;
}
}
+ if( prev.host().size() ){
+ if( wasFound ){ LOG(1) << "slave '" << prev << "' is no longer ok to use" << endl; }
+ else{ LOG(1) << "slave '" << prev << "' was not found in the replica set" << endl; }
+ }
+ else LOG(1) << "slave '" << prev << "' is not initialized or invalid" << endl;
+
return getSlave();
}
HostAndPort ReplicaSetMonitor::getSlave() {
- scoped_lock lk( _lock );
- for ( unsigned i=0; i<_nodes.size(); i++ ) {
- _nextSlave = ( _nextSlave + 1 ) % _nodes.size();
- if ( _nextSlave == _master )
- continue;
- if ( _nodes[ _nextSlave ].ok )
- return _nodes[ _nextSlave ].addr;
+ LOG(2) << "selecting new slave from replica set " << getServerAddress() << endl;
+
+ // Logic is to retry three times for any secondary node, if we can't find any secondary, we'll take
+ // any "ok" node
+ // TODO: Could this query hidden nodes?
+ const int MAX = 3;
+ for ( int xxx=0; xxx<MAX; xxx++ ) {
+
+ {
+ scoped_lock lk( _lock );
+
+ unsigned i = 0;
+ for ( ; i<_nodes.size(); i++ ) {
+ _nextSlave = ( _nextSlave + 1 ) % _nodes.size();
+ if ( _nextSlave == _master ){
+ LOG(2) << "not selecting " << _nodes[_nextSlave] << " as it is the current master" << endl;
+ continue;
+ }
+ if ( _nodes[ _nextSlave ].okForSecondaryQueries() || ( _nodes[ _nextSlave ].ok && ( xxx + 1 ) >= MAX ) )
+ return _nodes[ _nextSlave ].addr;
+
+ LOG(2) << "not selecting " << _nodes[_nextSlave] << " as it is not ok to use" << endl;
+ }
+
+ }
+
+ check(false);
}
+
+ LOG(2) << "no suitable slave nodes found, returning default node " << _nodes[ 0 ] << endl;
- return _nodes[ 0 ].addr;
+ return _nodes[0].addr;
}
/**
@@ -266,7 +308,7 @@ namespace mongo {
string host = member["name"].String();
int m = -1;
- if ((m = _find(host)) <= 0) {
+ if ((m = _find(host)) < 0) {
continue;
}
@@ -309,16 +351,34 @@ namespace mongo {
- bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose ) {
+ bool ReplicaSetMonitor::_checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset ) {
scoped_lock lk( _checkConnectionLock );
bool isMaster = false;
bool changed = false;
try {
+ Timer t;
BSONObj o;
c->isMaster(isMaster, &o);
+
+ if ( o["setName"].type() != String || o["setName"].String() != _name ) {
+ warning() << "node: " << c->getServerAddress() << " isn't a part of set: " << _name
+ << " ismaster: " << o << endl;
+ if ( nodesOffset >= 0 )
+ _nodes[nodesOffset].ok = false;
+ return false;
+ }
- log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl;
+ if ( nodesOffset >= 0 ) {
+ _nodes[nodesOffset].pingTimeMillis = t.millis();
+ _nodes[nodesOffset].hidden = o["hidden"].trueValue();
+ _nodes[nodesOffset].secondary = o["secondary"].trueValue();
+ _nodes[nodesOffset].ismaster = o["ismaster"].trueValue();
+
+ _nodes[nodesOffset].lastIsMaster = o.copy();
+ }
+ log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: " << c->toString() << ' ' << o << endl;
+
// add other nodes
if ( o["hosts"].type() == Array ) {
if ( o["primary"].type() == String )
@@ -329,11 +389,14 @@ namespace mongo {
if (o.hasField("passives") && o["passives"].type() == Array) {
_checkHosts(o["passives"].Obj(), changed);
}
-
+
_checkStatus(c);
+
+
}
catch ( std::exception& e ) {
log( ! verbose ) << "ReplicaSetMonitor::_checkConnection: caught exception " << c->toString() << ' ' << e.what() << endl;
+ _nodes[nodesOffset].ok = false;
}
if ( changed && _hook )
@@ -342,24 +405,28 @@ namespace mongo {
return isMaster;
}
- void ReplicaSetMonitor::_check() {
+ void ReplicaSetMonitor::_check( bool checkAllSecondaries ) {
bool triedQuickCheck = false;
LOG(1) << "_check : " << getServerAddress() << endl;
+ int newMaster = -1;
+
for ( int retry = 0; retry < 2; retry++ ) {
for ( unsigned i=0; i<_nodes.size(); i++ ) {
- DBClientConnection * c;
+ shared_ptr<DBClientConnection> c;
{
scoped_lock lk( _lock );
c = _nodes[i].conn;
}
string maybePrimary;
- if ( _checkConnection( c , maybePrimary , retry ) ) {
+ if ( _checkConnection( c.get() , maybePrimary , retry , i ) ) {
_master = i;
- return;
+ newMaster = i;
+ if ( ! checkAllSecondaries )
+ return;
}
if ( ! triedQuickCheck && maybePrimary.size() ) {
@@ -367,36 +434,44 @@ namespace mongo {
if ( x >= 0 ) {
triedQuickCheck = true;
string dummy;
- DBClientConnection * testConn;
+ shared_ptr<DBClientConnection> testConn;
{
scoped_lock lk( _lock );
testConn = _nodes[x].conn;
}
- if ( _checkConnection( testConn , dummy , false ) ) {
+ if ( _checkConnection( testConn.get() , dummy , false , x ) ) {
_master = x;
- return;
+ newMaster = x;
+ if ( ! checkAllSecondaries )
+ return;
}
}
}
}
+
+ if ( newMaster >= 0 )
+ return;
+
sleepsecs(1);
}
}
- void ReplicaSetMonitor::check() {
+ void ReplicaSetMonitor::check( bool checkAllSecondaries ) {
// first see if the current master is fine
if ( _master >= 0 ) {
string temp;
- if ( _checkConnection( _nodes[_master].conn , temp , false ) ) {
- // current master is fine, so we're done
- return;
+ if ( _checkConnection( _nodes[_master].conn.get() , temp , false , _master ) ) {
+ if ( ! checkAllSecondaries ) {
+ // current master is fine, so we're done
+ return;
+ }
}
}
// we either have no master, or the current is dead
- _check();
+ _check( checkAllSecondaries );
}
int ReplicaSetMonitor::_find( const string& server ) const {
@@ -419,7 +494,26 @@ namespace mongo {
return i;
return -1;
}
-
+
+ void ReplicaSetMonitor::appendInfo( BSONObjBuilder& b ) const {
+ scoped_lock lk( _lock );
+ BSONArrayBuilder hosts( b.subarrayStart( "hosts" ) );
+ for ( unsigned i=0; i<_nodes.size(); i++ ) {
+ hosts.append( BSON( "addr" << _nodes[i].addr <<
+ // "lastIsMaster" << _nodes[i].lastIsMaster << // this is a potential race, so only used when debugging
+ "ok" << _nodes[i].ok <<
+ "ismaster" << _nodes[i].ismaster <<
+ "hidden" << _nodes[i].hidden <<
+ "secondary" << _nodes[i].secondary <<
+ "pingTimeMillis" << _nodes[i].pingTimeMillis ) );
+
+ }
+ hosts.done();
+
+ b.append( "master" , _master );
+ b.append( "nextSlave" , _nextSlave );
+ }
+
mongo::mutex ReplicaSetMonitor::_setsLock( "ReplicaSetMonitor" );
map<string,ReplicaSetMonitorPtr> ReplicaSetMonitor::_sets;
@@ -428,8 +522,9 @@ namespace mongo {
// ----- DBClientReplicaSet ---------
// --------------------------------
- DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers )
- : _monitor( ReplicaSetMonitor::get( name , servers ) ) {
+ DBClientReplicaSet::DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers, double so_timeout )
+ : _monitor( ReplicaSetMonitor::get( name , servers ) ),
+ _so_timeout( so_timeout ) {
}
DBClientReplicaSet::~DBClientReplicaSet() {
@@ -446,7 +541,7 @@ namespace mongo {
}
_masterHost = _monitor->getMaster();
- _master.reset( new DBClientConnection( true , this ) );
+ _master.reset( new DBClientConnection( true , this , _so_timeout ) );
string errmsg;
if ( ! _master->connect( _masterHost , errmsg ) ) {
_monitor->notifyFailure( _masterHost );
@@ -463,10 +558,13 @@ namespace mongo {
if ( ! _slave->isFailed() )
return _slave.get();
_monitor->notifySlaveFailure( _slaveHost );
+ _slaveHost = _monitor->getSlave();
+ }
+ else {
+ _slaveHost = h;
}
-
- _slaveHost = _monitor->getSlave();
- _slave.reset( new DBClientConnection( true , this ) );
+
+ _slave.reset( new DBClientConnection( true , this , _so_timeout ) );
_slave->connect( _slaveHost );
_auth( _slave.get() );
return _slave.get();
diff --git a/client/dbclient_rs.h b/client/dbclient_rs.h
index 4a0a832d9ca..b6948a05b80 100644
--- a/client/dbclient_rs.h
+++ b/client/dbclient_rs.h
@@ -43,10 +43,16 @@ namespace mongo {
static ReplicaSetMonitorPtr get( const string& name , const vector<HostAndPort>& servers );
/**
+ * gets a cached Monitor per name or will return none if it doesn't exist
+ */
+ static ReplicaSetMonitorPtr get( const string& name );
+
+
+ /**
* checks all sets for current master and new secondaries
* usually only called from a BackgroundJob
*/
- static void checkAll();
+ static void checkAll( bool checkAllSecondaries );
/**
* this is called whenever the config of any repclia set changes
@@ -81,13 +87,15 @@ namespace mongo {
/**
* checks for current master and new secondaries
*/
- void check();
+ void check( bool checkAllSecondaries );
string getName() const { return _name; }
string getServerAddress() const;
bool contains( const string& server ) const;
+
+ void appendInfo( BSONObjBuilder& b ) const;
private:
/**
@@ -98,7 +106,7 @@ namespace mongo {
*/
ReplicaSetMonitor( const string& name , const vector<HostAndPort>& servers );
- void _check();
+ void _check( bool checkAllSecondaries );
/**
* Use replSetGetStatus command to make sure hosts in host list are up
@@ -119,9 +127,10 @@ namespace mongo {
* @param c the connection to check
* @param maybePrimary OUT
* @param verbose
+ * @param nodesOffset - offset into _nodes array, -1 for not in it
* @return if the connection is good
*/
- bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose );
+ bool _checkConnection( DBClientConnection * c , string& maybePrimary , bool verbose , int nodesOffset );
int _find( const string& server ) const ;
int _find_inlock( const string& server ) const ;
@@ -132,14 +141,44 @@ namespace mongo {
string _name;
struct Node {
- Node( const HostAndPort& a , DBClientConnection* c ) : addr( a ) , conn(c) , ok(true) {}
+ Node( const HostAndPort& a , DBClientConnection* c )
+ : addr( a ) , conn(c) , ok(true) ,
+ ismaster(false), secondary( false ) , hidden( false ) , pingTimeMillis(0) {
+ }
+
+ bool okForSecondaryQueries() const {
+ return ok && secondary && ! hidden;
+ }
+
+ BSONObj toBSON() const {
+ return BSON( "addr" << addr.toString() <<
+ "isMaster" << ismaster <<
+ "secondary" << secondary <<
+ "hidden" << hidden <<
+ "ok" << ok );
+ }
+
+ string toString() const {
+ return toBSON().toString();
+ }
+
HostAndPort addr;
- DBClientConnection* conn;
+ shared_ptr<DBClientConnection> conn;
// if this node is in a failure state
// used for slave routing
// this is too simple, should make it better
bool ok;
+
+ // as reported by ismaster
+ BSONObj lastIsMaster;
+
+ bool ismaster;
+ bool secondary;
+ bool hidden;
+
+ int pingTimeMillis;
+
};
/**
@@ -168,7 +207,7 @@ namespace mongo {
public:
/** Call connect() after constructing. autoReconnect is always on for DBClientReplicaSet connections. */
- DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers );
+ DBClientReplicaSet( const string& name , const vector<HostAndPort>& servers, double so_timeout=0 );
virtual ~DBClientReplicaSet();
/** Returns false if nomember of the set were reachable, or neither is
@@ -228,16 +267,14 @@ namespace mongo {
// ----- informational ----
- /**
- * timeout not supported in DBClientReplicaSet yet
- */
- double getSoTimeout() const { return 0; }
+ double getSoTimeout() const { return _so_timeout; }
string toString() { return getServerAddress(); }
string getServerAddress() const { return _monitor->getServerAddress(); }
virtual ConnectionString::ConnectionType type() const { return ConnectionString::SET; }
+ virtual bool lazySupported() const { return true; }
// ---- low level ------
@@ -265,6 +302,8 @@ namespace mongo {
HostAndPort _slaveHost;
scoped_ptr<DBClientConnection> _slave;
+
+ double _so_timeout;
/**
* for storing authentication info
diff --git a/client/dbclientcursor.cpp b/client/dbclientcursor.cpp
index f1685637311..5db360ef2c7 100644
--- a/client/dbclientcursor.cpp
+++ b/client/dbclientcursor.cpp
@@ -70,6 +70,7 @@ namespace mongo {
}
void DBClientCursor::initLazy( bool isRetry ) {
+ verify( 15875 , _client->lazySupported() );
Message toSend;
_assembleInit( toSend );
_client->say( toSend, isRetry );
diff --git a/client/distlock.cpp b/client/distlock.cpp
index cd516494cf9..cb711590524 100644
--- a/client/distlock.cpp
+++ b/client/distlock.cpp
@@ -634,7 +634,9 @@ namespace mongo {
// TODO: Clean up all the extra code to exit this method, probably with a refactor
if ( ! errMsg.empty() || ! err["n"].type() || err["n"].numberInt() < 1 ) {
( errMsg.empty() ? log( logLvl - 1 ) : warning() ) << "Could not re-enter lock '" << lockName << "' "
- << ( !errMsg.empty() ? causedBy(errMsg) : string("(not sure lock is held)") ) << endl;
+ << ( !errMsg.empty() ? causedBy(errMsg) : string("(not sure lock is held)") )
+ << " gle: " << err
+ << endl;
*other = o; other->getOwned(); conn.done();
return false;
}
diff --git a/client/distlock_test.cpp b/client/distlock_test.cpp
index ab5183c7069..42a1c48cedb 100644
--- a/client/distlock_test.cpp
+++ b/client/distlock_test.cpp
@@ -86,7 +86,7 @@ namespace mongo {
}
}
- bool run(const string&, BSONObj& cmdObj, string& errmsg,
+ bool run(const string&, BSONObj& cmdObj, int, string& errmsg,
BSONObjBuilder& result, bool) {
Timer t;
DistributedLock lk(ConnectionString(cmdObj["host"].String(),
@@ -288,7 +288,7 @@ namespace mongo {
return;
}
- bool run(const string&, BSONObj& cmdObj, string& errmsg,
+ bool run(const string&, BSONObj& cmdObj, int, string& errmsg,
BSONObjBuilder& result, bool) {
Timer t;
@@ -417,7 +417,7 @@ namespace mongo {
return NONE;
}
- bool run(const string&, BSONObj& cmdObj, string& errmsg,
+ bool run(const string&, BSONObj& cmdObj, int, string& errmsg,
BSONObjBuilder& result, bool) {
long long skew = (long long) number_field(cmdObj, "skew", 0);
diff --git a/client/examples/httpClientTest.cpp b/client/examples/httpClientTest.cpp
index fab3251ec49..4055d4492d5 100644
--- a/client/examples/httpClientTest.cpp
+++ b/client/examples/httpClientTest.cpp
@@ -22,20 +22,7 @@
using namespace mongo;
-int main( int argc, const char **argv ) {
-
- int port = 27017;
- if ( argc != 1 ) {
- if ( argc != 3 )
- throw -12;
- port = atoi( argv[ 2 ] );
- }
- port += 1000;
-
- stringstream ss;
- ss << "http://localhost:" << port << "/";
- string url = ss.str();
-
+void play( string url ) {
cout << "[" << url << "]" << endl;
HttpClient c;
@@ -45,8 +32,27 @@ int main( int argc, const char **argv ) {
HttpClient::Headers h = r.getHeaders();
MONGO_assert( h["Content-Type"].find( "text/html" ) == 0 );
- cout << "Headers" << endl;
+ cout << "\tHeaders" << endl;
for ( HttpClient::Headers::iterator i = h.begin() ; i != h.end(); ++i ) {
- cout << i->first << "\t" << i->second << endl;
+ cout << "\t\t" << i->first << "\t" << i->second << endl;
}
+
+}
+
+int main( int argc, const char **argv ) {
+
+ int port = 27017;
+ if ( argc != 1 ) {
+ if ( argc != 3 )
+ throw -12;
+ port = atoi( argv[ 2 ] );
+ }
+ port += 1000;
+
+ play( str::stream() << "http://localhost:" << port << "/" );
+
+#ifdef MONGO_SSL
+ play( "https://www.10gen.com/" );
+#endif
+
}
diff --git a/client/examples/rs.cpp b/client/examples/rs.cpp
index 65fff8d2948..3307d87b56b 100644
--- a/client/examples/rs.cpp
+++ b/client/examples/rs.cpp
@@ -57,14 +57,19 @@ int main( int argc , const char ** argv ) {
unsigned nThreads = 1;
bool print = false;
+ bool testTimeout = false;
for ( int i=1; i<argc; i++ ) {
if ( mongoutils::str::equals( "--threads" , argv[i] ) ) {
nThreads = atoi( argv[++i] );
}
- else if ( mongoutils::str::equals( "--print" , argv[1] ) ) {
+ else if ( mongoutils::str::equals( "--print" , argv[i] ) ) {
print = true;
}
+ // Run a special mode to demonstrate the DBClientReplicaSet so_timeout option.
+ else if ( mongoutils::str::equals( "--testTimeout" , argv[i] ) ) {
+ testTimeout = true;
+ }
else {
cerr << "unknown option: " << argv[i] << endl;
return 1;
@@ -79,7 +84,7 @@ int main( int argc , const char ** argv ) {
return 1;
}
- DBClientReplicaSet * conn = (DBClientReplicaSet*)cs.connect( errmsg );
+ DBClientReplicaSet * conn = dynamic_cast<DBClientReplicaSet*>(cs.connect( errmsg, testTimeout ? 10 : 0 ));
if ( ! conn ) {
cout << "error connecting: " << errmsg << endl;
return 2;
@@ -88,6 +93,17 @@ int main( int argc , const char ** argv ) {
string collName = "test.rs1";
conn->dropCollection( collName );
+
+ if ( testTimeout ) {
+ conn->insert( collName, BSONObj() );
+ try {
+ conn->count( collName, BSON( "$where" << "sleep(40000)" ) );
+ } catch( DBException& ) {
+ return 0;
+ }
+ cout << "expected socket exception" << endl;
+ return 1;
+ }
vector<boost::shared_ptr<boost::thread> > threads;
for ( unsigned i=0; i<nThreads; i++ ) {
diff --git a/client/parallel.cpp b/client/parallel.cpp
index f157927703f..76b0168be22 100644
--- a/client/parallel.cpp
+++ b/client/parallel.cpp
@@ -410,6 +410,7 @@ namespace mongo {
}
}
+ // TODO: Merge with futures API? We do a lot of error checking here that would be useful elsewhere.
void ParallelSortClusteredCursor::_init() {
// log() << "Starting parallel search..." << endl;
@@ -720,17 +721,23 @@ namespace mongo {
// ---- Future -----
// -----------------
- Future::CommandResult::CommandResult( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn )
- :_server(server) ,_db(db) ,_cmd(cmd) ,_conn(conn) ,_done(false)
+ Future::CommandResult::CommandResult( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn )
+ :_server(server) ,_db(db) , _options(options), _cmd(cmd) ,_conn(conn) ,_done(false)
{
try {
if ( ! _conn ){
_connHolder.reset( new ScopedDbConnection( _server ) );
_conn = _connHolder->get();
}
-
- _cursor.reset( new DBClientCursor(_conn, _db + ".$cmd", _cmd, -1/*limit*/, 0, NULL, 0, 0));
- _cursor->initLazy();
+
+ if ( _conn->lazySupported() ) {
+ _cursor.reset( new DBClientCursor(_conn, _db + ".$cmd", _cmd, -1/*limit*/, 0, NULL, _options, 0));
+ _cursor->initLazy();
+ }
+ else {
+ _done = true; // we set _done first because even if there is an error we're done
+ _ok = _conn->runCommand( db , cmd , _res , options );
+ }
}
catch ( std::exception& e ) {
error() << "Future::spawnComand (part 1) exception: " << e.what() << endl;
@@ -768,8 +775,8 @@ namespace mongo {
return _ok;
}
- shared_ptr<Future::CommandResult> Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn ) {
- shared_ptr<Future::CommandResult> res (new Future::CommandResult( server , db , cmd , conn ));
+ shared_ptr<Future::CommandResult> Future::spawnCommand( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn ) {
+ shared_ptr<Future::CommandResult> res (new Future::CommandResult( server , db , cmd , options , conn ));
return res;
}
diff --git a/client/parallel.h b/client/parallel.h
index 332840edea1..869bff95a4a 100644
--- a/client/parallel.h
+++ b/client/parallel.h
@@ -280,10 +280,11 @@ namespace mongo {
private:
- CommandResult( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn );
+ CommandResult( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn );
string _server;
string _db;
+ int _options;
BSONObj _cmd;
DBClientBase * _conn;
scoped_ptr<ScopedDbConnection> _connHolder; // used if not provided a connection
@@ -304,7 +305,7 @@ namespace mongo {
* @param cmd cmd to exec
* @param conn optional connection to use. will use standard pooled if non-specified
*/
- static shared_ptr<CommandResult> spawnCommand( const string& server , const string& db , const BSONObj& cmd , DBClientBase * conn = 0 );
+ static shared_ptr<CommandResult> spawnCommand( const string& server , const string& db , const BSONObj& cmd , int options , DBClientBase * conn = 0 );
};
diff --git a/client/syncclusterconnection.h b/client/syncclusterconnection.h
index edd458fe683..68dd338a408 100644
--- a/client/syncclusterconnection.h
+++ b/client/syncclusterconnection.h
@@ -96,6 +96,7 @@ namespace mongo {
virtual bool auth(const string &dbname, const string &username, const string &password_text, string& errmsg, bool digestPassword);
+ virtual bool lazySupported() const { return false; }
private:
SyncClusterConnection( SyncClusterConnection& prev, double socketTimeout = 0 );
string _toString() const;
diff --git a/db/btree.cpp b/db/btree.cpp
index 232ac615470..e4753bef696 100644
--- a/db/btree.cpp
+++ b/db/btree.cpp
@@ -44,7 +44,7 @@ namespace mongo {
}
/** data check. like assert, but gives a reasonable error message to the user. */
-#define check(expr) _IF(!(expr)) { checkFailed(__LINE__); }
+#define check(expr) if(!(expr) ) { checkFailed(__LINE__); }
#define VERIFYTHISLOC dassert( thisLoc.btree<V>() == this );
diff --git a/db/btree.h b/db/btree.h
index 2e47d69a221..9ffa54cddc0 100644
--- a/db/btree.h
+++ b/db/btree.h
@@ -1071,7 +1071,7 @@ namespace mongo {
* Our btrees may (rarely) have "unused" keys when items are deleted.
* Skip past them.
*/
- virtual bool skipUnusedKeys( bool mayJump ) = 0;
+ virtual bool skipUnusedKeys() = 0;
bool skipOutOfRangeKeysAndCheckEnd();
void skipAndCheck();
diff --git a/db/btreecursor.cpp b/db/btreecursor.cpp
index cd145ef861f..f39d5bb0535 100644
--- a/db/btreecursor.cpp
+++ b/db/btreecursor.cpp
@@ -68,7 +68,7 @@ namespace mongo {
return !currKeyNode().prevChildBucket.isNull();
}
- bool skipUnusedKeys( bool mayJump ) {
+ bool skipUnusedKeys() {
int u = 0;
while ( 1 ) {
if ( !ok() )
@@ -80,9 +80,6 @@ namespace mongo {
u++;
//don't include unused keys in nscanned
//++_nscanned;
- if ( mayJump && ( u % 10 == 0 ) ) {
- skipOutOfRangeKeysAndCheckEnd();
- }
}
if ( u > 10 )
OCCASIONALLY log() << "btree unused skipped:" << u << '\n';
@@ -114,13 +111,13 @@ namespace mongo {
while( 1 ) {
// if ( b->keyAt(keyOfs).woEqual(keyAtKeyOfs) &&
// b->k(keyOfs).recordLoc == locAtKeyOfs ) {
- if ( keyAt(keyOfs).shallowEqual(keyAtKeyOfs) ) {
+ if ( keyAt(keyOfs).binaryEqual(keyAtKeyOfs) ) {
const _KeyNode& kn = keyNode(keyOfs);
if( kn.recordLoc == locAtKeyOfs ) {
if ( !kn.isUsed() ) {
// we were deleted but still exist as an unused
// marker key. advance.
- skipUnusedKeys( false );
+ skipUnusedKeys();
}
return;
}
@@ -149,7 +146,7 @@ namespace mongo {
bucket = _locate(keyAtKeyOfs, locAtKeyOfs);
RARELY log() << "key seems to have moved in the index, refinding. " << bucket.toString() << endl;
if ( ! bucket.isNull() )
- skipUnusedKeys( false );
+ skipUnusedKeys();
}
@@ -329,18 +326,24 @@ namespace mongo {
if ( ok() ) {
_nscanned = 1;
}
- skipUnusedKeys( false );
+ skipUnusedKeys();
checkEnd();
}
void BtreeCursor::skipAndCheck() {
- skipUnusedKeys( true );
+ int startNscanned = _nscanned;
+ skipUnusedKeys();
while( 1 ) {
if ( !skipOutOfRangeKeysAndCheckEnd() ) {
break;
}
- while( skipOutOfRangeKeysAndCheckEnd() );
- if ( !skipUnusedKeys( true ) ) {
+ do {
+ if ( _nscanned > startNscanned + 20 ) {
+ skipUnusedKeys();
+ return;
+ }
+ } while( skipOutOfRangeKeysAndCheckEnd() );
+ if ( !skipUnusedKeys() ) {
break;
}
}
@@ -395,7 +398,7 @@ namespace mongo {
bucket = _advance(bucket, keyOfs, _direction, "BtreeCursor::advance");
if ( !_independentFieldRanges ) {
- skipUnusedKeys( false );
+ skipUnusedKeys();
checkEnd();
if ( ok() ) {
++_nscanned;
diff --git a/db/client.cpp b/db/client.cpp
index be5dba9ae56..bf3aead75a6 100644
--- a/db/client.cpp
+++ b/db/client.cpp
@@ -122,10 +122,13 @@ namespace mongo {
error() << "Client::shutdown not called: " << _desc << endl;
}
- scoped_lock bl(clientsMutex);
- if ( ! _shutdown )
- clients.erase(this);
- delete _curOp;
+ if ( ! inShutdown() ) {
+ // we can't clean up safely once we're in shutdown
+ scoped_lock bl(clientsMutex);
+ if ( ! _shutdown )
+ clients.erase(this);
+ delete _curOp;
+ }
}
bool Client::shutdown() {
@@ -469,7 +472,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return false; }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
Client& c = cc();
c.gotHandshake( cmdObj );
return 1;
@@ -688,11 +691,14 @@ namespace mongo {
#define OPDEBUG_APPEND_NUMBER(x) if( x ) b.append( #x , (x) )
#define OPDEBUG_APPEND_BOOL(x) if( x ) b.appendBool( #x , (x) )
- void OpDebug::append( BSONObjBuilder& b ) const {
+ void OpDebug::append( const CurOp& curop, BSONObjBuilder& b ) const {
b.append( "op" , iscommand ? "command" : opToString( op ) );
b.append( "ns" , ns.toString() );
if ( ! query.isEmpty() )
b.append( iscommand ? "command" : "query" , query );
+ else if ( ! iscommand && curop.haveQuery() )
+ curop.appendQuery( b , "query" );
+
if ( ! updateobj.isEmpty() )
b.append( "updateobj" , updateobj );
diff --git a/db/clientcursor.cpp b/db/clientcursor.cpp
index 615616e7a7c..e803afd459c 100644
--- a/db/clientcursor.cpp
+++ b/db/clientcursor.cpp
@@ -447,16 +447,29 @@ namespace mongo {
return rec;
}
- bool ClientCursor::yieldSometimes( RecordNeeds need ) {
+ bool ClientCursor::yieldSometimes( RecordNeeds need, bool *yielded ) {
+ if ( yielded ) {
+ *yielded = false;
+ }
if ( ! _yieldSometimesTracker.ping() ) {
Record* rec = _recordForYield( need );
- if ( rec )
+ if ( rec ) {
+ if ( yielded ) {
+ *yielded = true;
+ }
return yield( yieldSuggest() , rec );
+ }
return true;
}
int micros = yieldSuggest();
- return ( micros > 0 ) ? yield( micros , _recordForYield( need ) ) : true;
+ if ( micros > 0 ) {
+ if ( yielded ) {
+ *yielded = true;
+ }
+ return yield( micros , _recordForYield( need ) );
+ }
+ return true;
}
void ClientCursor::staticYield( int micros , const StringData& ns , Record * rec ) {
@@ -616,7 +629,7 @@ namespace mongo {
help << " example: { cursorInfo : 1 }";
}
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
ClientCursor::appendStats( result );
return true;
}
diff --git a/db/clientcursor.h b/db/clientcursor.h
index b3bd996c768..75c7da85cc6 100644
--- a/db/clientcursor.h
+++ b/db/clientcursor.h
@@ -186,9 +186,10 @@ namespace mongo {
/**
* @param needRecord whether or not the next record has to be read from disk for sure
* if this is true, will yield of next record isn't in memory
+ * @param yielded true if a yield occurred, and potentially if a yield did not occur
* @return same as yield()
*/
- bool yieldSometimes( RecordNeeds need );
+ bool yieldSometimes( RecordNeeds need, bool *yielded = 0 );
static int yieldSuggest();
static void staticYield( int micros , const StringData& ns , Record * rec );
diff --git a/db/cloner.cpp b/db/cloner.cpp
index 2a46ea22cb4..8956133daa3 100644
--- a/db/cloner.cpp
+++ b/db/cloner.cpp
@@ -460,7 +460,7 @@ namespace mongo {
help << "{ clone : \"host13\" }";
}
CmdClone() : Command("clone") { }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string from = cmdObj.getStringField("clone");
if ( from.empty() )
return false;
@@ -486,7 +486,7 @@ namespace mongo {
"Warning: the local copy of 'ns' is emptied before the copying begins. Any existing data will be lost there."
;
}
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string fromhost = cmdObj.getStringField("from");
if ( fromhost.empty() ) {
errmsg = "missing 'from' parameter";
@@ -538,7 +538,7 @@ namespace mongo {
help << "get a nonce for subsequent copy db request from secure server\n";
help << "usage: {copydbgetnonce: 1, fromhost: <hostname>}";
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string fromhost = cmdObj.getStringField("fromhost");
if ( fromhost.empty() ) {
/* copy from self */
@@ -579,7 +579,7 @@ namespace mongo {
help << "copy a database from another host to this host\n";
help << "usage: {copydb: 1, fromhost: <hostname>, fromdb: <db>, todb: <db>[, slaveOk: <bool>, username: <username>, nonce: <nonce>, key: <key>]}";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
bool slaveOk = cmdObj["slaveOk"].trueValue();
string fromhost = cmdObj.getStringField("fromhost");
if ( fromhost.empty() ) {
@@ -633,7 +633,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << " example: { renameCollection: foo.a, to: bar.b }";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string source = cmdObj.getStringField( name.c_str() );
string target = cmdObj.getStringField( "to" );
if ( source.empty() || target.empty() ) {
@@ -671,6 +671,8 @@ namespace mongo {
nsToDatabase( target.c_str(), to );
if ( strcmp( from, to ) == 0 ) {
renameNamespace( source.c_str(), target.c_str() );
+ // make sure we drop counters etc
+ Top::global.collectionDropped( source );
return true;
}
}
diff --git a/db/cmdline.cpp b/db/cmdline.cpp
index d0b80455ff2..06880c98829 100644
--- a/db/cmdline.cpp
+++ b/db/cmdline.cpp
@@ -19,6 +19,7 @@
#include "pch.h"
#include "cmdline.h"
#include "commands.h"
+#include "../util/password.h"
#include "../util/processinfo.h"
#include "../util/net/listen.h"
#include "security_common.h"
@@ -27,6 +28,8 @@
#include <direct.h>
#endif
+#define MAX_LINE_LENGTH 256
+
namespace po = boost::program_options;
namespace fs = boost::filesystem;
@@ -64,6 +67,14 @@ namespace mongo {
("fork" , "fork server process" )
#endif
;
+
+ hidden.add_options()
+#ifdef MONGO_SSL
+ ("sslOnNormalPorts" , "use ssl on configured ports" )
+ ("sslPEMKeyFile" , po::value<string>(&cmdLine.sslPEMKeyFile), "PEM file for ssl" )
+ ("sslPEMKeyPassword" , new PasswordValue(&cmdLine.sslPEMKeyPassword) , "PEM file password" )
+#endif
+ ;
}
@@ -85,6 +96,32 @@ namespace mongo {
}
#endif
+ void CmdLine::parseConfigFile( istream &f, stringstream &ss ) {
+ string s;
+ char line[MAX_LINE_LENGTH];
+
+ while ( f ) {
+ f.getline(line, MAX_LINE_LENGTH);
+ s = line;
+ std::remove(s.begin(), s.end(), ' ');
+ std::remove(s.begin(), s.end(), '\t');
+ boost::to_upper(s);
+
+ if ( s.find( "FASTSYNC" ) != string::npos )
+ cout << "warning \"fastsync\" should not be put in your configuration file" << endl;
+
+ if ( s.c_str()[0] == '#' ) {
+ // skipping commented line
+ } else if ( s.find( "=FALSE" ) == string::npos ) {
+ ss << line << endl;
+ } else {
+ cout << "warning: remove or comment out this line by starting it with \'#\', skipping now : " << line << endl;
+ }
+ }
+ return;
+ }
+
+
bool CmdLine::store( int argc , char ** argv ,
boost::program_options::options_description& visible,
@@ -141,7 +178,9 @@ namespace mongo {
return false;
}
- po::store( po::parse_config_file( f , all ) , params );
+ stringstream ss;
+ CmdLine::parseConfigFile( f, ss );
+ po::store( po::parse_config_file( ss , all ) , params );
f.close();
}
@@ -287,7 +326,25 @@ namespace mongo {
noauth = false;
}
+#ifdef MONGO_SSL
+ if (params.count("sslOnNormalPorts") ) {
+ cmdLine.sslOnNormalPorts = true;
+ if ( cmdLine.sslPEMKeyPassword.size() == 0 ) {
+ log() << "need sslPEMKeyPassword" << endl;
+ dbexit(EXIT_BADOPTIONS);
+ }
+
+ if ( cmdLine.sslPEMKeyFile.size() == 0 ) {
+ log() << "need sslPEMKeyFile" << endl;
+ dbexit(EXIT_BADOPTIONS);
+ }
+
+ cmdLine.sslServerManager = new SSLManager( false );
+ cmdLine.sslServerManager->setupPEM( cmdLine.sslPEMKeyFile , cmdLine.sslPEMKeyPassword );
+ }
+#endif
+
{
BSONObjBuilder b;
for (po::variables_map::const_iterator it(params.begin()), end(params.end()); it != end; it++){
@@ -354,7 +411,7 @@ namespace mongo {
virtual bool adminOnly() const { return true; }
virtual bool slaveOk() const { return true; }
- virtual bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
result.append("argv", argvArray);
result.append("parsed", parsedOpts);
return true;
diff --git a/db/cmdline.h b/db/cmdline.h
index 7b6d12a2e04..60eb668a735 100644
--- a/db/cmdline.h
+++ b/db/cmdline.h
@@ -21,6 +21,12 @@
namespace mongo {
+#ifdef MONGO_SSL
+ class SSLManager;
+#endif
+
+
+
/* command line options
*/
/* concurrency: OK/READ */
@@ -63,6 +69,7 @@ namespace mongo {
bool quiet; // --quiet
bool noTableScan; // --notablescan no table scans allowed
bool prealloc; // --noprealloc no preallocation of data files
+ bool preallocj; // --nopreallocj no preallocation of journal files
bool smallfiles; // --smallfiles allocate smaller data files
bool configsvr; // --configsvr
@@ -71,7 +78,8 @@ namespace mongo {
int quotaFiles; // --quotaFiles
bool cpu; // --cpu show cpu time periodically
- bool dur; // --dur durability (now --journal)
+ bool dur; // --dur durability (now --journal)
+ unsigned journalCommitInterval; // group/batch commit interval ms
/** --durOptions 7 dump journal and terminate without doing anything further
--durOptions 4 recover and terminate without listening
@@ -99,6 +107,14 @@ namespace mongo {
bool noUnixSocket; // --nounixsocket
string socket; // UNIX domain socket directory
+#ifdef MONGO_SSL
+ bool sslOnNormalPorts; // --sslOnNormalPorts
+ string sslPEMKeyFile; // --sslPEMKeyFile
+ string sslPEMKeyPassword; // --sslPEMKeyPassword
+
+ SSLManager* sslServerManager; // currently leaks on close
+#endif
+
static void addGlobalOptions( boost::program_options::options_description& general ,
boost::program_options::options_description& hidden );
@@ -106,6 +122,7 @@ namespace mongo {
boost::program_options::options_description& hidden );
+ static void parseConfigFile( istream &f, stringstream &ss);
/**
* @return true if should run program, false if should exit
*/
@@ -116,18 +133,28 @@ namespace mongo {
boost::program_options::variables_map &output );
};
+ // todo move to cmdline.cpp?
inline CmdLine::CmdLine() :
- port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), smallfiles(sizeof(int*) == 4),
+ port(DefaultDBPort), rest(false), jsonp(false), quiet(false), noTableScan(false), prealloc(true), preallocj(true), smallfiles(sizeof(int*) == 4),
configsvr(false),
quota(false), quotaFiles(8), cpu(false), durOptions(0), objcheck(false), oplogSize(0), defaultProfile(0), slowMS(100), pretouch(0), moveParanoia( true ),
syncdelay(60), noUnixSocket(false), socket("/tmp")
{
- // default may change for this later.
+ journalCommitInterval = 0; // 0 means use default
+ dur = false;
#if defined(_DURABLEDEFAULTON)
dur = true;
-#else
+#endif
+ if( sizeof(void*) == 8 )
+ dur = true;
+#if defined(_DURABLEDEFAULTOFF)
dur = false;
#endif
+
+#ifdef MONGO_SSL
+ sslOnNormalPorts = false;
+ sslServerManager = 0;
+#endif
}
extern CmdLine cmdLine;
diff --git a/db/commands.h b/db/commands.h
index 454e2277e06..c18621828f2 100644
--- a/db/commands.h
+++ b/db/commands.h
@@ -20,6 +20,7 @@
#include "../pch.h"
#include "jsobj.h"
#include "../util/timer.h"
+#include "../client/dbclient.h"
namespace mongo {
@@ -45,7 +46,7 @@ namespace mongo {
return value is true if succeeded. if false, set errmsg text.
*/
- virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) = 0;
+ virtual bool run(const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) = 0;
/*
note: logTheTop() MUST be false if READ
@@ -94,6 +95,11 @@ namespace mongo {
*/
virtual bool requiresAuth() { return true; }
+ /* Return true if a replica set secondary should go into "recovering"
+ (unreadable) state while running this command.
+ */
+ virtual bool maintenanceMode() const { return false; }
+
/** @param webUI expose the command in the web ui as localhost:28017/<name>
@param oldName an optional old, deprecated name for the command
*/
@@ -120,7 +126,7 @@ namespace mongo {
static const map<string,Command*>* commandsByBestName() { return _commandsByBestName; }
static const map<string,Command*>* webCommands() { return _webCommands; }
/** @return if command was found and executed */
- static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder);
+ static bool runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, int queryOptions = 0);
static LockType locktype( const string& name );
static Command * findCommand( const string& name );
};
@@ -139,7 +145,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const;
CmdShutdown() : Command("shutdown") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl);
+ bool run(const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl);
private:
bool shutdownHelper();
};
diff --git a/db/commands/distinct.cpp b/db/commands/distinct.cpp
index 9a10e69d5a8..48f44050e49 100644
--- a/db/commands/distinct.cpp
+++ b/db/commands/distinct.cpp
@@ -32,7 +32,7 @@ namespace mongo {
help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
Timer t;
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
diff --git a/db/commands/find_and_modify.cpp b/db/commands/find_and_modify.cpp
index 2856ab3d3f1..0cf766fcf87 100644
--- a/db/commands/find_and_modify.cpp
+++ b/db/commands/find_and_modify.cpp
@@ -37,7 +37,7 @@ namespace mongo {
virtual bool logTheOp() { return false; } // the modifications will be logged directly
virtual bool slaveOk() const { return false; }
virtual LockType locktype() const { return WRITE; }
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
static DBDirectClient db;
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
diff --git a/db/commands/group.cpp b/db/commands/group.cpp
index 9d7acbdf7d4..d3e5839748c 100644
--- a/db/commands/group.cpp
+++ b/db/commands/group.cpp
@@ -20,6 +20,7 @@
#include "../instance.h"
#include "../queryoptimizer.h"
#include "../../scripting/engine.h"
+#include "../clientcursor.h"
namespace mongo {
@@ -44,7 +45,7 @@ namespace mongo {
uassert( 10042 , "return of $key has to be an object" , type == Object );
return s->getObject( "return" );
}
- return obj.extractFields( keyPattern , true );
+ return obj.extractFields( keyPattern , true ).getOwned();
}
bool group( string realdbname , const string& ns , const BSONObj& query ,
@@ -88,14 +89,27 @@ namespace mongo {
list<BSONObj> blah;
shared_ptr<Cursor> cursor = NamespaceDetailsTransient::getCursor(ns.c_str() , query);
+ ClientCursor::CleanupPointer ccPointer;
+ ccPointer.reset( new ClientCursor( QueryOption_NoCursorTimeout, cursor, ns ) );
while ( cursor->ok() ) {
+
+ if ( !ccPointer->yieldSometimes( ClientCursor::MaybeCovered ) ||
+ !cursor->ok() ) {
+ break;
+ }
+
if ( ( cursor->matcher() && !cursor->matcher()->matchesCurrent( cursor.get() ) ) ||
cursor->getsetdup( cursor->currLoc() ) ) {
cursor->advance();
continue;
}
+ if ( !ccPointer->yieldSometimes( ClientCursor::WillNeed ) ||
+ !cursor->ok() ) {
+ break;
+ }
+
BSONObj obj = cursor->current();
cursor->advance();
@@ -117,6 +131,7 @@ namespace mongo {
throw UserException( 9010 , (string)"reduce invoke failed: " + s->getError() );
}
}
+ ccPointer.reset();
if (!finalize.empty()) {
s->exec( "$finalize = " + finalize , "finalize define" , false , true , true , 100 );
@@ -140,7 +155,7 @@ namespace mongo {
return true;
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
if ( !globalScriptEngine ) {
errmsg = "server-side JavaScript execution is disabled";
diff --git a/db/commands/isself.cpp b/db/commands/isself.cpp
index cac8380dc20..5a868de919f 100644
--- a/db/commands/isself.cpp
+++ b/db/commands/isself.cpp
@@ -130,7 +130,7 @@ namespace mongo {
help << "{ _isSelf : 1 } INTERNAL ONLY";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
init();
result.append( "id" , _id );
return true;
diff --git a/db/commands/mr.cpp b/db/commands/mr.cpp
index 75f5615b9f6..56e9770dff2 100644
--- a/db/commands/mr.cpp
+++ b/db/commands/mr.cpp
@@ -879,8 +879,6 @@ namespace mongo {
}
}
-// boost::thread_specific_ptr<State*> _tl;
-
/**
* emit that will be called by js function
*/
@@ -932,7 +930,7 @@ namespace mongo {
help << "http://www.mongodb.org/display/DOCS/MapReduce";
}
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname , BSONObj& cmd, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname , BSONObj& cmd, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
Timer t;
Client::GodScope cg;
Client& client = cc();
@@ -968,12 +966,6 @@ namespace mongo {
state.init();
state.prepTempCollection();
- {
- State** s = new State*();
- s[0] = &state;
-// _tl.reset( s );
- }
-
wassert( config.limit < 0x4000000 ); // see case on next line to 32 bit unsigned
ProgressMeterHolder pm( op->setMessage( "m/r: (1/3) emit phase" , state.incomingDocuments() ) );
long long mapTime = 0;
@@ -988,7 +980,9 @@ namespace mongo {
// obtain cursor on data to apply mr to, sorted
shared_ptr<Cursor> temp = NamespaceDetailsTransient::getCursor( config.ns.c_str(), config.filter, config.sort );
+ uassert( 15876, str::stream() << "could not create cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, temp.get() );
auto_ptr<ClientCursor> cursor( new ClientCursor( QueryOption_NoCursorTimeout , temp , config.ns.c_str() ) );
+ uassert( 15877, str::stream() << "could not create client cursor over " << config.ns << " for query : " << config.filter << " sort : " << config.sort, cursor.get() );
Timer mt;
// go through each doc
@@ -1065,11 +1059,19 @@ namespace mongo {
countsBuilder.appendNumber( "reduce" , state.numReduces() );
timingBuilder.append( "reduceTime" , inReduce / 1000 );
timingBuilder.append( "mode" , state.jsMode() ? "js" : "mixed" );
-
-// _tl.reset();
+ }
+ // TODO: The error handling code for queries is v. fragile,
+ // *requires* rethrow AssertionExceptions - should probably fix.
+ catch ( AssertionException& e ){
+ log() << "mr failed, removing collection" << causedBy(e) << endl;
+ throw e;
+ }
+ catch ( std::exception& e ){
+ log() << "mr failed, removing collection" << causedBy(e) << endl;
+ throw e;
}
catch ( ... ) {
- log() << "mr failed, removing collection" << endl;
+ log() << "mr failed for unknown reason, removing collection" << endl;
throw;
}
@@ -1116,7 +1118,7 @@ namespace mongo {
virtual bool slaveOverrideOk() { return true; }
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string shardedOutputCollection = cmdObj["shardedOutputCollection"].valuestrsafe();
string postProcessCollection = cmdObj["postProcessCollection"].valuestrsafe();
bool postProcessOnly = !(postProcessCollection.empty());
diff --git a/db/compact.cpp b/db/compact.cpp
index a1197460f4f..c6e5f77ee0e 100644
--- a/db/compact.cpp
+++ b/db/compact.cpp
@@ -263,6 +263,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool adminOnly() const { return false; }
virtual bool slaveOk() const { return true; }
+ virtual bool maintenanceMode() const { return true; }
virtual bool logTheOp() { return false; }
virtual void help( stringstream& help ) const {
help << "compact collection\n"
@@ -274,7 +275,7 @@ namespace mongo {
virtual bool requiresAuth() { return true; }
CompactCmd() : Command("compact") { }
- virtual bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string coll = cmdObj.firstElement().valuestr();
if( coll.empty() || db.empty() ) {
errmsg = "no collection name specified";
diff --git a/db/curop.h b/db/curop.h
index f261812d383..2717d78cc62 100644
--- a/db/curop.h
+++ b/db/curop.h
@@ -28,6 +28,8 @@
namespace mongo {
+ class CurOp;
+
/* lifespan is different than CurOp because of recursives with DBDirectClient */
class OpDebug {
public:
@@ -36,7 +38,7 @@ namespace mongo {
void reset();
string toString() const;
- void append( BSONObjBuilder& b ) const;
+ void append( const CurOp& curop, BSONObjBuilder& b ) const;
// -------------------
@@ -119,7 +121,7 @@ namespace mongo {
int size() const { return *_size; }
bool have() const { return size() > 0; }
- BSONObj get() {
+ BSONObj get() const {
_lock.lock();
BSONObj o;
try {
@@ -133,7 +135,7 @@ namespace mongo {
return o;
}
- void append( BSONObjBuilder& b , const StringData& name ) {
+ void append( BSONObjBuilder& b , const StringData& name ) const {
scoped_spinlock lk(_lock);
BSONObj temp = _get();
b.append( name , temp );
@@ -141,7 +143,7 @@ namespace mongo {
private:
/** you have to be locked when you call this */
- BSONObj _get() {
+ BSONObj _get() const {
int sz = size();
if ( sz == 0 )
return BSONObj();
@@ -153,7 +155,7 @@ namespace mongo {
/** you have to be locked when you call this */
void _reset( int sz ) { _size[0] = sz; }
- SpinLock _lock;
+ mutable SpinLock _lock;
int * _size;
char _buf[512];
};
@@ -168,7 +170,8 @@ namespace mongo {
bool haveQuery() const { return _query.have(); }
BSONObj query() { return _query.get(); }
-
+ void appendQuery( BSONObjBuilder& b , const StringData& name ) const { _query.append( b , name ); }
+
void ensureStarted() {
if ( _start == 0 )
_start = _checkpoint = curTimeMicros64();
diff --git a/db/cursor.h b/db/cursor.h
index ff9c9821ada..9639b2677b1 100644
--- a/db/cursor.h
+++ b/db/cursor.h
@@ -132,6 +132,8 @@ namespace mongo {
virtual void setMatcher( shared_ptr< CoveredIndexMatcher > matcher ) {
massert( 13285, "manual matcher config not allowed", false );
}
+
+ virtual void explainDetails( BSONObjBuilder& b ) { return; }
};
// strategy object implementing direction of traversal.
diff --git a/db/database.cpp b/db/database.cpp
index 7906e9b435a..97b3fa011cb 100644
--- a/db/database.cpp
+++ b/db/database.cpp
@@ -192,22 +192,31 @@ namespace mongo {
return ret;
}
+ bool fileIndexExceedsQuota( const char *ns, int fileIndex, bool enforceQuota ) {
+ return
+ cmdLine.quota &&
+ enforceQuota &&
+ fileIndex >= cmdLine.quotaFiles &&
+ // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g.
+ // rejecting an index insert after inserting the main record.
+ !NamespaceString::special( ns ) &&
+ NamespaceString( ns ).db != "local";
+ }
+
MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) {
// check existing files
for ( int i=numFiles()-1; i>=0; i-- ) {
MongoDataFile* f = getFile( i );
if ( f->getHeader()->unusedLength >= sizeNeeded ) {
- // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g.
- // rejecting an index insert after inserting the main record.
- if( cmdLine.quota && enforceQuota && i > cmdLine.quotaFiles && !NamespaceString::special(ns) )
+ if ( fileIndexExceedsQuota( ns, i-1, enforceQuota ) ) // NOTE i-1 is the value used historically for this check.
;
else
return f;
}
}
- if( cmdLine.quota && enforceQuota && numFiles() >= cmdLine.quotaFiles && !NamespaceString::special(ns) )
+ if ( fileIndexExceedsQuota( ns, numFiles(), enforceQuota ) )
uasserted(12501, "quota exceeded");
// allocate files until we either get one big enough or hit maxSize
@@ -261,8 +270,8 @@ namespace mongo {
log() << "creating profile collection: " << profileName << endl;
BSONObjBuilder spec;
spec.appendBool( "capped", true );
- spec.append( "size", 131072.0 );
- if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , true ) ) {
+ spec.append( "size", 1024*1024 );
+ if ( ! userCreateNS( profileName.c_str(), spec.done(), errmsg , false /* we don't replica profile messages */ ) ) {
return false;
}
}
diff --git a/db/db.cpp b/db/db.cpp
index 9f90b9ddd02..2d4970af044 100644
--- a/db/db.cpp
+++ b/db/db.cpp
@@ -70,7 +70,6 @@ namespace mongo {
extern string repairpath;
void setupSignals( bool inFork );
- void startReplSets(ReplSetCmdline*);
void startReplication();
void exitCleanly( ExitCode code );
@@ -216,8 +215,6 @@ namespace mongo {
void listen(int port) {
//testTheDb();
- log() << "waiting for connections on port " << port << endl;
-
MessageServer::Options options;
options.port = port;
options.ipList = cmdLine.bind_ip;
@@ -483,12 +480,6 @@ namespace mongo {
clientCursorMonitor.go();
PeriodicTask::theRunner->go();
- if( !cmdLine._replSet.empty() ) {
- replSet = true;
- ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet);
- boost::thread t( boost::bind( &startReplSets, replSetCmdline) );
- }
-
listen(listenPort);
// listen() will return when exit code closes its socket.
@@ -575,10 +566,12 @@ int main(int argc, char* argv[]) {
("directoryperdb", "each database will be stored in a separate directory")
("journal", "enable journaling")
("journalOptions", po::value<int>(), "journal diagnostic options")
+ ("journalCommitInterval", po::value<unsigned>(), "how often to group/batch commit (ms)")
("ipv6", "enable IPv6 support (disabled by default)")
("jsonp","allow JSONP access via http (has security implications)")
("noauth", "run without security")
("nohttpinterface", "disable http interface")
+ ("nojournal", "disable journaling (journaling is on by default for 64 bit)")
("noprealloc", "disable data file preallocation - will often hurt performance")
("noscripting", "disable scripting engine")
("notablescan", "do not allow table scans")
@@ -631,12 +624,11 @@ int main(int argc, char* argv[]) {
("pretouch", po::value<int>(), "n pretouch threads for applying replicationed operations")
("command", po::value< vector<string> >(), "command")
("cacheSize", po::value<long>(), "cache size (in MB) for rec store")
- // these move to unhidden later:
("nodur", "disable journaling (currently the default)")
- ("nojournal", "disable journaling (currently the default)")
// things we don't want people to use
("nocursors", "diagnostic/debugging option that turns off cursors DO NOT USE IN PRODUCTION")
("nohints", "ignore query hints")
+ ("nopreallocj", "don't preallocate journal files")
("dur", "enable journaling") // deprecated version
("durOptions", po::value<int>(), "durability diagnostic options") // deprecated version
// deprecated pairing command line options
@@ -745,6 +737,15 @@ int main(int argc, char* argv[]) {
if (params.count("durOptions")) {
cmdLine.durOptions = params["durOptions"].as<int>();
}
+ if( params.count("journalCommitInterval") ) {
+ // don't check if dur is false here as many will just use the default, and will default to off on win32.
+ // ie no point making life a little more complex by giving an error on a dev environment.
+ cmdLine.journalCommitInterval = params["journalCommitInterval"].as<unsigned>();
+ if( cmdLine.journalCommitInterval <= 1 || cmdLine.journalCommitInterval > 300 ) {
+ out() << "--journalCommitInterval out of allowed range (0-300ms)" << endl;
+ dbexit( EXIT_BADOPTIONS );
+ }
+ }
if (params.count("journalOptions")) {
cmdLine.durOptions = params["journalOptions"].as<int>();
}
@@ -761,6 +762,9 @@ int main(int argc, char* argv[]) {
if (params.count("nohints")) {
useHints = false;
}
+ if (params.count("nopreallocj")) {
+ cmdLine.preallocj = false;
+ }
if (params.count("nohttpinterface")) {
noHttpInterface = true;
}
diff --git a/db/db.vcxproj b/db/db.vcxproj
index 685015ed7f6..8f831cb8559 100755
--- a/db/db.vcxproj
+++ b/db/db.vcxproj
@@ -459,9 +459,27 @@
<ClCompile Include="..\s\shard.cpp" />
<ClCompile Include="..\s\shardconnection.cpp" />
<ClCompile Include="..\s\shardkey.cpp" />
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\alignedbuilder.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
</ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\concurrency\spin_lock.cpp" />
<ClCompile Include="..\util\concurrency\synchronization.cpp" />
<ClCompile Include="..\util\concurrency\task.cpp" />
@@ -561,6 +579,7 @@
<ClCompile Include="..\client\parallel.cpp" />
<ClCompile Include="pdfile.cpp" />
<ClCompile Include="queryoptimizer.cpp" />
+ <ClCompile Include="scanandorder.cpp" />
<ClCompile Include="security.cpp" />
<ClCompile Include="security_commands.cpp" />
<ClCompile Include="security_common.cpp" />
@@ -652,6 +671,8 @@
<ClInclude Include="..\targetver.h" />
<ClInclude Include="..\pcre-7.4\config.h" />
<ClInclude Include="..\pcre-7.4\pcre.h" />
+ <ClInclude Include="..\third_party\snappy\config.h" />
+ <ClInclude Include="..\third_party\snappy\snappy.h" />
<ClInclude Include="..\util\alignedbuilder.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
<ClInclude Include="..\util\concurrency\race.h" />
diff --git a/db/db.vcxproj.filters b/db/db.vcxproj.filters
index d9e9def86f8..36b0df1ddc2 100755
--- a/db/db.vcxproj.filters
+++ b/db/db.vcxproj.filters
@@ -1,4 +1,4 @@
-<?xml version="1.0" encoding="utf-8"?>
+<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup>
<ClCompile Include="..\bson\oid.cpp" />
@@ -166,6 +166,16 @@
<ClCompile Include="..\util\net\message_port.cpp" />
<ClCompile Include="dbmessage.cpp" />
<ClCompile Include="commands\find_and_modify.cpp" />
+ <ClCompile Include="..\util\compress.cpp">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <Filter>snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="scanandorder.cpp" />
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\client\dbclientcursor.h" />
@@ -315,6 +325,12 @@
<ClInclude Include="..\util\net\sock.h" />
<ClInclude Include="..\util\concurrency\rwlockimpl.h" />
<ClInclude Include="..\util\concurrency\mutexdebugger.h" />
+ <ClInclude Include="..\third_party\snappy\config.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy.h">
+ <Filter>snappy</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<ResourceCompile Include="db.rc" />
@@ -349,4 +365,9 @@
<Library Include="..\..\js\js64d.lib" />
<Library Include="..\..\js\js64r.lib" />
</ItemGroup>
+ <ItemGroup>
+ <Filter Include="snappy">
+ <UniqueIdentifier>{bb99c086-7926-4f50-838d-f5f0c18397c0}</UniqueIdentifier>
+ </Filter>
+ </ItemGroup>
</Project> \ No newline at end of file
diff --git a/db/dbcommands.cpp b/db/dbcommands.cpp
index 73c1004d4f2..2edd7684ff8 100644
--- a/db/dbcommands.cpp
+++ b/db/dbcommands.cpp
@@ -31,6 +31,7 @@
#include "../util/lruishmap.h"
#include "../util/md5.hpp"
#include "../util/processinfo.h"
+#include "../util/ramlog.h"
#include "json.h"
#include "repl.h"
#include "repl_block.h"
@@ -53,14 +54,16 @@ namespace mongo {
namespace dur {
void setAgeOutJournalFiles(bool rotate);
}
+ /** @return true if fields found */
bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
BSONElement e = cmdObj["ageOutJournalFiles"];
if( !e.eoo() ) {
bool r = e.trueValue();
log() << "ageOutJournalFiles " << r << endl;
dur::setAgeOutJournalFiles(r);
+ return true;
}
- return true;
+ return false;
}
void flushDiagLog();
@@ -85,7 +88,7 @@ namespace mongo {
help << "reset error state (used with getpreverror)";
}
CmdResetError() : Command("resetError", false, "reseterror") {}
- bool run(const string& db, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& db, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
LastError *le = lastError.get();
assert( le );
le->reset();
@@ -116,7 +119,7 @@ namespace mongo {
<< " { w:n } - await replication to n servers (including self) before returning\n"
<< " { wtimeout:m} - timeout for w in m milliseconds";
}
- bool run(const string& dbname, BSONObj& _cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& _cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
LastError *le = lastError.disableForCommand();
bool err = false;
@@ -246,7 +249,7 @@ namespace mongo {
return true;
}
CmdGetPrevError() : Command("getPrevError", false, "getpreverror") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
LastError *le = lastError.disableForCommand();
le->appendSelf( result );
if ( le->valid )
@@ -268,14 +271,14 @@ namespace mongo {
<< "N to wait N seconds for other members to catch up.";
}
- bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
bool force = cmdObj.hasField("force") && cmdObj["force"].trueValue();
if (!force && theReplSet && theReplSet->isPrimary()) {
- int timeout, now, start;
+ long long timeout, now, start;
timeout = now = start = curTimeMicros64()/1000000;
if (cmdObj.hasField("timeoutSecs")) {
- timeout += cmdObj["timeoutSecs"].numberInt();
+ timeout += cmdObj["timeoutSecs"].numberLong();
}
OpTime lastOp = theReplSet->lastOpTimeWritten;
@@ -329,7 +332,7 @@ namespace mongo {
}
virtual LockType locktype() const { return WRITE; }
CmdDropDatabase() : Command("dropDatabase") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONElement e = cmdObj.firstElement();
log() << "dropDatabase " << dbname << endl;
int p = (int) e.number();
@@ -349,12 +352,13 @@ namespace mongo {
virtual bool slaveOk() const {
return true;
}
+ virtual bool maintenanceMode() const { return true; }
virtual void help( stringstream& help ) const {
help << "repair database. also compacts. note: slow.";
}
virtual LockType locktype() const { return WRITE; }
CmdRepairDatabase() : Command("repairDatabase") {}
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONElement e = cmdObj.firstElement();
log() << "repairDatabase " << dbname << endl;
int p = (int) e.number();
@@ -388,7 +392,7 @@ namespace mongo {
}
virtual LockType locktype() const { return WRITE; }
CmdProfile() : Command("profile") {}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONElement e = cmdObj.firstElement();
result.append("was", cc().database()->profile);
result.append("slowms", cmdLine.slowMS );
@@ -425,7 +429,7 @@ namespace mongo {
help << "returns lots of administrative server statistics";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
long long start = Listener::getElapsedTimeMillis();
BSONObjBuilder timeBuilder(128);
@@ -596,6 +600,21 @@ namespace mongo {
timeBuilder.appendNumber( "after dur" , Listener::getElapsedTimeMillis() - start );
+ {
+ RamLog* rl = RamLog::get( "warnings" );
+ verify(15880, rl);
+
+ if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes
+ vector<const char*> lines;
+ rl->get( lines );
+
+ BSONArrayBuilder arr( result.subarrayStart( "warnings" ) );
+ for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ )
+ arr.append( lines[i] );
+ arr.done();
+ }
+ }
+
if ( ! authed )
result.append( "note" , "run against admin for more info" );
@@ -619,7 +638,7 @@ namespace mongo {
virtual void help( stringstream& help ) const { help << "internal"; }
virtual LockType locktype() const { return NONE; }
CmdGetOpTime() : Command("getoptime") { }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
writelock l( "" );
result.appendDate("optime", OpTime::now().asDate());
return true;
@@ -648,7 +667,7 @@ namespace mongo {
}
void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Monitoring+and+Diagnostics#MonitoringandDiagnostics-DatabaseRecord%2FReplay"; }
virtual LockType locktype() const { return WRITE; }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
int was = _diaglog.setLevel( cmdObj.firstElement().numberInt() );
flushDiagLog();
if ( !cmdLine.quiet )
@@ -771,7 +790,7 @@ namespace mongo {
}
virtual void help( stringstream& help ) const { help << "drop a collection\n{drop : <collectionName>}"; }
virtual LockType locktype() const { return WRITE; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string nsToDrop = dbname + '.' + cmdObj.firstElement().valuestr();
NamespaceDetails *d = nsdetails(nsToDrop.c_str());
if ( !cmdLine.quiet )
@@ -805,7 +824,7 @@ namespace mongo {
return false;
}
virtual void help( stringstream& help ) const { help << "count objects in collection"; }
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
string err;
long long n = runCount(ns.c_str(), cmdObj, err);
@@ -844,7 +863,8 @@ namespace mongo {
help << "create a collection explicitly\n"
"{ create: <ns>[, capped: <bool>, size: <collSizeInBytes>, max: <nDocs>] }";
}
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ uassert(15888, "must pass name of collection to create", cmdObj.firstElement().valuestrsafe()[0] != '\0');
string ns = dbname + '.' + cmdObj.firstElement().valuestr();
string err;
uassert(14832, "specify size:<n> when capped is true", !cmdObj["capped"].trueValue() || cmdObj["size"].isNumber() || cmdObj.hasField("$nExtents"));
@@ -869,7 +889,7 @@ namespace mongo {
help << "drop indexes for a collection";
}
CmdDropIndexes() : Command("dropIndexes", false, "deleteIndexes") { }
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& anObjBuilder, bool /*fromRepl*/) {
BSONElement e = jsobj.firstElement();
string toDeleteNs = dbname + '.' + e.valuestr();
NamespaceDetails *d = nsdetails(toDeleteNs.c_str());
@@ -914,7 +934,7 @@ namespace mongo {
help << "re-index a collection";
}
CmdReIndex() : Command("reIndex") { }
- bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
static DBDirectClient db;
BSONElement e = jsobj.firstElement();
@@ -969,7 +989,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const { help << "list databases on this server"; }
CmdListDatabases() : Command("listDatabases" , true ) {}
- bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
vector< string > dbNames;
getDatabaseNames( dbNames );
vector< BSONObj > dbInfos;
@@ -1038,7 +1058,7 @@ namespace mongo {
virtual LockType locktype() const { return WRITE; }
CmdCloseAllDatabases() : Command( "closeAllDatabases" ) {}
- bool run(const string& dbname , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbname , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
bool ok;
try {
ok = dbHolder.closeAll( dbpath , result, false );
@@ -1065,7 +1085,7 @@ namespace mongo {
help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }";
}
virtual LockType locktype() const { return READ; }
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string ns = dbname;
ns += ".";
{
@@ -1164,7 +1184,7 @@ namespace mongo {
"\nkeyPattern, min, and max parameters are optional."
"\nnote: This command may take a while to run";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
Timer timer;
string ns = jsobj.firstElement().String();
@@ -1282,7 +1302,7 @@ namespace mongo {
help << "{ collStats:\"blog.posts\" , scale : 1 } scale divides sizes e.g. for KB use 1024\n"
" avgObjSize - in bytes";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string ns = dbname + "." + jsobj.firstElement().valuestr();
Client::Context cx( ns );
@@ -1351,7 +1371,7 @@ namespace mongo {
"Get stats on a database. Not instantaneous. Slower for databases with large .ns files.\n" <<
"Example: { dbStats:1, scale:1 }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
int scale = 1;
if ( jsobj["scale"].isNumber() ) {
scale = jsobj["scale"].numberInt();
@@ -1426,7 +1446,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{ cloneCollectionAsCapped:<fromName>, toCollection:<toName>, size:<sizeInBytes> }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string from = jsobj.getStringField( "cloneCollectionAsCapped" );
string to = jsobj.getStringField( "toCollection" );
long long size = (long long)jsobj.getField( "size" ).number();
@@ -1488,7 +1508,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{ convertToCapped:<fromCollectionName>, size:<sizeInBytes> }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
BackgroundOperation::assertNoBgOpInProgForDb(dbname.c_str());
string from = jsobj.getStringField( "convertToCapped" );
@@ -1544,7 +1564,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{whatsmyuri:1}";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
BSONObj info = cc().curop()->infoNoauth();
result << "you" << info[ "client" ];
return true;
@@ -1559,7 +1579,7 @@ namespace mongo {
return true;
}
virtual bool slaveOk() const {
- return false;
+ return true;
}
virtual LockType locktype() const { return WRITE; }
virtual bool requiresAuth() {
@@ -1568,7 +1588,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "internal. for testing only.";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string coll = cmdObj[ "godinsert" ].valuestrsafe();
uassert( 13049, "godinsert must specify a collection", !coll.empty() );
string ns = dbname + "." + coll;
@@ -1583,7 +1603,7 @@ namespace mongo {
DBHashCmd() : Command( "dbHash", false, "dbhash" ) {}
virtual bool slaveOk() const { return true; }
virtual LockType locktype() const { return READ; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
list<string> colls;
Database* db = cc().database();
if ( db )
@@ -1629,9 +1649,8 @@ namespace mongo {
cursor = findTableScan( c.c_str() , BSONObj() );
}
else {
- bb.done();
- errmsg = (string)"can't find _id index for: " + c;
- return 0;
+ log() << "can't find _id index for: " << c << endl;
+ continue;
}
md5_state_t st;
@@ -1677,7 +1696,7 @@ namespace mongo {
help << "w:true write lock. secs:<seconds>";
}
CmdSleep() : Command("sleep") { }
- bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
int secs = 100;
if ( cmdObj["secs"].isNumber() )
secs = cmdObj["secs"].numberInt();
@@ -1700,7 +1719,7 @@ namespace mongo {
virtual bool slaveOk() const { return false; }
virtual LockType locktype() const { return WRITE; }
virtual bool requiresAuth() { return true; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string coll = cmdObj[ "captrunc" ].valuestrsafe();
uassert( 13416, "captrunc must specify a collection", !coll.empty() );
string ns = dbname + "." + coll;
@@ -1727,7 +1746,7 @@ namespace mongo {
virtual bool slaveOk() const { return false; }
virtual LockType locktype() const { return WRITE; }
virtual bool requiresAuth() { return true; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string coll = cmdObj[ "emptycapped" ].valuestrsafe();
uassert( 13428, "emptycapped must specify a collection", !coll.empty() );
string ns = dbname + "." + coll;
@@ -1792,13 +1811,22 @@ namespace mongo {
if ( c->adminOnly() )
log( 2 ) << "command: " << cmdObj << endl;
+ if (c->maintenanceMode() && theReplSet && theReplSet->isSecondary()) {
+ theReplSet->setMaintenanceMode(true);
+ }
+
if ( c->locktype() == Command::NONE ) {
// we also trust that this won't crash
client.curop()->ensureStarted();
string errmsg;
- int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl );
+ int ok = c->run( dbname , cmdObj , queryOptions, errmsg , result , fromRepl );
if ( ! ok )
result.append( "errmsg" , errmsg );
+
+ if (c->maintenanceMode() && theReplSet) {
+ theReplSet->setMaintenanceMode(false);
+ }
+
return ok;
}
@@ -1812,11 +1840,13 @@ namespace mongo {
client.curop()->ensureStarted();
Client::Context ctx( dbname , dbpath , &lk , c->requiresAuth() );
+ bool retval = true;
+
try {
string errmsg;
- if ( ! c->run(dbname, cmdObj, errmsg, result, fromRepl ) ) {
+ if ( ! c->run(dbname, cmdObj, queryOptions, errmsg, result, fromRepl ) ) {
result.append( "errmsg" , errmsg );
- return false;
+ retval = false;
}
}
catch ( DBException& e ) {
@@ -1824,14 +1854,18 @@ namespace mongo {
ss << "exception: " << e.what();
result.append( "errmsg" , ss.str() );
result.append( "code" , e.getCode() );
- return false;
+ retval = false;
}
- if ( c->logTheOp() && ! fromRepl ) {
+ if ( retval && c->logTheOp() && ! fromRepl ) {
logOp("c", cmdns, cmdObj);
}
- return true;
+ if (c->maintenanceMode() && theReplSet) {
+ theReplSet->setMaintenanceMode(false);
+ }
+
+ return retval;
}
diff --git a/db/dbcommands_admin.cpp b/db/dbcommands_admin.cpp
index 47f6c691ab4..566027fc594 100644
--- a/db/dbcommands_admin.cpp
+++ b/db/dbcommands_admin.cpp
@@ -47,7 +47,7 @@ namespace mongo {
virtual void help(stringstream& h) const { h << "internal"; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string dropns = dbname + "." + cmdObj.firstElement().valuestrsafe();
if ( !cmdLine.quiet )
@@ -82,7 +82,7 @@ namespace mongo {
virtual bool adminOnly() const { return true; }
virtual void help(stringstream& h) const { h << "test how long to write and fsync to a test file in the journal/ directory"; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
filesystem::path p = dur::getJournalDir();
p /= "journalLatencyTest";
@@ -157,7 +157,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
//{ validate: "collectionnamewithoutthedbpart" [, scandata: <bool>] [, full: <bool> } */
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
string ns = dbname + "." + cmdObj.firstElement().valuestrsafe();
NamespaceDetails * d = nsdetails( ns.c_str() );
if ( !cmdLine.quiet )
@@ -473,7 +473,7 @@ namespace mongo {
return !x.empty();
}*/
virtual void help(stringstream& h) const { h << url(); }
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
bool sync = !cmdObj["async"].trueValue(); // async means do an fsync, but return immediately
bool lock = cmdObj["lock"].trueValue();
log() << "CMD fsync: sync:" << sync << " lock:" << lock << endl;
diff --git a/db/dbcommands_generic.cpp b/db/dbcommands_generic.cpp
index 2e025b500ea..a9e13eab741 100644
--- a/db/dbcommands_generic.cpp
+++ b/db/dbcommands_generic.cpp
@@ -79,7 +79,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "internal command facilitating running in certain cloud computing environments";
}
- bool run(const string& dbname, BSONObj& obj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& obj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
if( !obj.hasElement("servers") ) {
vector<string> ips;
obj["servers"].Obj().Vals(ips);
@@ -106,7 +106,7 @@ namespace mongo {
help << "get version #, etc.\n";
help << "{ buildinfo:1 }";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
result << "version" << versionString << "gitVersion" << gitVersion() << "sysInfo" << sysInfo();
result << "versionArray" << versionArray;
result << "bits" << ( sizeof( int* ) == 4 ? 32 : 64 );
@@ -137,7 +137,7 @@ namespace mongo {
help << " syncdelay\n";
help << "{ getParameter:'*' } to get everything\n";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
bool all = *cmdObj.firstElement().valuestrsafe() == '*';
int before = result.len();
@@ -166,11 +166,6 @@ namespace mongo {
}
} cmdGet;
- // dev - experimental. so only in set command for now. may go away or change
- namespace dur {
- int groupCommitIntervalMs = 100;
- }
-
// tempish
bool setParmsMongodSpecific(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl );
@@ -184,23 +179,24 @@ namespace mongo {
help << "set administrative option(s)\n";
help << "{ setParameter:1, <param>:<value> }\n";
help << "supported so far:\n";
- help << " notablescan\n";
+ help << " journalCommitInterval\n";
help << " logLevel\n";
+ help << " notablescan\n";
help << " quiet\n";
help << " syncdelay\n";
}
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
int s = 0;
- setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl);
- if( cmdObj.hasElement("groupCommitIntervalMs") ) {
+ bool found = setParmsMongodSpecific(dbname, cmdObj, errmsg, result, fromRepl);
+ if( cmdObj.hasElement("journalCommitInterval") ) {
if( !cmdLine.dur ) {
errmsg = "journaling is off";
return false;
}
- int x = (int) cmdObj["groupCommitIntervalMs"].Number();
- assert( x > 0 && x < 500 );
- dur::groupCommitIntervalMs = x;
- log() << "groupCommitIntervalMs " << x << endl;
+ int x = (int) cmdObj["journalCommitInterval"].Number();
+ assert( x > 1 && x < 500 );
+ cmdLine.journalCommitInterval = x;
+ log() << "setParameter journalCommitInterval=" << x << endl;
s++;
}
if( cmdObj.hasElement("notablescan") ) {
@@ -241,7 +237,7 @@ namespace mongo {
s++;
}
- if( s == 0 ) {
+ if( s == 0 && !found ) {
errmsg = "no option found to set, use help:true to see options ";
return false;
}
@@ -257,7 +253,7 @@ namespace mongo {
virtual void help( stringstream &help ) const { help << "a way to check that the server is alive. responds immediately even if server is in a db lock."; }
virtual LockType locktype() const { return NONE; }
virtual bool requiresAuth() { return false; }
- virtual bool run(const string& badns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& badns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
// IMPORTANT: Don't put anything in here that might lock db - including authentication
return true;
}
@@ -270,7 +266,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
virtual bool readOnly() { return true; }
virtual LockType locktype() const { return NONE; }
- virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( globalScriptEngine ) {
BSONObjBuilder bb( result.subobjStart( "js" ) );
result.append( "utf8" , globalScriptEngine->utf8Ok() );
@@ -292,7 +288,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return true; }
- virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
rotateLogs();
return 1;
}
@@ -306,7 +302,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return false; }
- virtual bool run(const string& ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& ns, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
BSONObjBuilder b( result.subobjStart( "commands" ) );
for ( map<string,Command*>::iterator i=_commands->begin(); i!=_commands->end(); ++i ) {
Command * c = i->second;
@@ -361,7 +357,7 @@ namespace mongo {
}
virtual LockType locktype() const { return NONE; }
CmdForceError() : Command("forceerror") {}
- bool run(const string& dbnamne, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbnamne, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
uassert( 10038 , "forced error", false);
return true;
}
@@ -373,7 +369,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
virtual LockType locktype() const { return NONE; }
virtual bool requiresAuth() { return false; }
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
result << "options" << QueryOption_AllSupported;
return true;
}
@@ -393,7 +389,7 @@ namespace mongo {
help << "{ getLog : '*' } OR { getLog : 'global' }";
}
- virtual bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string p = cmdObj.firstElement().String();
if ( p == "*" ) {
vector<string> names;
diff --git a/db/dbeval.cpp b/db/dbeval.cpp
index 3a53200a49f..5fe137fc3a3 100644
--- a/db/dbeval.cpp
+++ b/db/dbeval.cpp
@@ -121,7 +121,7 @@ namespace mongo {
}
virtual LockType locktype() const { return NONE; }
CmdEval() : Command("eval", false, "$eval") { }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
AuthenticationInfo *ai = cc().getAuthenticationInfo();
uassert( 12598 , "$eval reads unauthorized", ai->isAuthorizedReads(dbname.c_str()) );
diff --git a/db/dbmessage.h b/db/dbmessage.h
index a14d4cf5142..a789bff849c 100644
--- a/db/dbmessage.h
+++ b/db/dbmessage.h
@@ -122,7 +122,7 @@ namespace mongo {
/** the 32 bit field before the ns
* track all bit usage here as its cross op
- * 0: InsertOption_KeepGoing
+ * 0: InsertOption_ContinueOnError
* 1: fromWriteback
*/
int& reservedField() { return *reserved; }
@@ -233,7 +233,7 @@ namespace mongo {
public:
enum ReservedOptions {
- Reserved_InsertOption_KeepGoing = 1 << 0 ,
+ Reserved_InsertOption_ContinueOnError = 1 << 0 ,
Reserved_FromWriteback = 1 << 1
};
};
diff --git a/db/dbwebserver.cpp b/db/dbwebserver.cpp
index 40950a8ccb3..50a59fa1267 100644
--- a/db/dbwebserver.cpp
+++ b/db/dbwebserver.cpp
@@ -61,7 +61,7 @@ namespace mongo {
class DbWebServer : public MiniWebServer {
public:
DbWebServer(const string& ip, int port, const AdminAccess* webUsers)
- : MiniWebServer(ip, port), _webUsers(webUsers) {
+ : MiniWebServer("admin web console", ip, port), _webUsers(webUsers) {
WebStatusPlugin::initAll();
}
@@ -424,7 +424,7 @@ namespace mongo {
string errmsg;
BSONObjBuilder sub;
- if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) )
+ if ( ! c->run( "admin.$cmd" , co , 0, errmsg , sub , false ) )
buf.append( cmd , errmsg );
else
buf.append( cmd , sub.obj() );
@@ -531,7 +531,6 @@ namespace mongo {
Client::initThread("websvr");
const int p = cmdLine.port + 1000;
DbWebServer mini(cmdLine.bind_ip, p, adminAccessPtr.get());
- log() << "web admin interface listening on port " << p << endl;
mini.initAndListen();
cc().shutdown();
}
diff --git a/db/driverHelpers.cpp b/db/driverHelpers.cpp
index d98a33b25c5..12aa01886c4 100644
--- a/db/driverHelpers.cpp
+++ b/db/driverHelpers.cpp
@@ -46,7 +46,7 @@ namespace mongo {
class ObjectIdTest : public BasicDriverHelper {
public:
ObjectIdTest() : BasicDriverHelper( "driverOIDTest" ) {}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( cmdObj.firstElement().type() != jstOID ) {
errmsg = "not oid";
return false;
diff --git a/db/dur.cpp b/db/dur.cpp
index 6cb69ac5ac2..dfa36f95224 100644
--- a/db/dur.cpp
+++ b/db/dur.cpp
@@ -62,11 +62,11 @@
#include "dur_journal.h"
#include "dur_commitjob.h"
#include "dur_recover.h"
+#include "dur_stats.h"
#include "../util/concurrency/race.h"
#include "../util/mongoutils/hash.h"
#include "../util/mongoutils/str.h"
#include "../util/timer.h"
-#include "dur_stats.h"
using namespace mongoutils;
@@ -74,8 +74,9 @@ namespace mongo {
namespace dur {
- void WRITETODATAFILES();
- void PREPLOGBUFFER();
+ void PREPLOGBUFFER(JSectHeader& outParm);
+ void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed);
+ void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed);
/** declared later in this file
only used in this file -- use DurableInterface::commitNow() outside
@@ -129,6 +130,7 @@ namespace mongo {
"commits" << _commits <<
"journaledMB" << _journaledBytes / 1000000.0 <<
"writeToDataFilesMB" << _writeToDataFilesBytes / 1000000.0 <<
+ "compression" << _journaledBytes / (_uncompressedBytes+1.0) <<
"commitsInWriteLock" << _commitsInWriteLock <<
"earlyCommits" << _earlyCommits <<
"timeMs" <<
@@ -143,6 +145,8 @@ namespace mongo {
b << "ageOutJournalFiles" << "mutex timeout";
if( r == 0 )
b << "ageOutJournalFiles" << false;
+ if( cmdLine.journalCommitInterval != 0 )
+ b << "journalCommitIntervalMs" << cmdLine.journalCommitInterval;
return b.obj();
}
@@ -269,6 +273,9 @@ namespace mongo {
}
bool DurableImpl::commitIfNeeded() {
+ if ( ! dbMutex.isWriteLocked() ) // we implicitly commit if needed when releasing write lock
+ return false;
+
DEV commitJob._nSinceCommitIfNeededCall = 0;
if (commitJob.bytes() > UncommittedBytesLimit) { // should this also fire if CmdLine::DurAlwaysCommit?
stats.curr->_earlyCommits++;
@@ -325,15 +332,6 @@ namespace mongo {
}
#endif
- /** write the buffer we have built to the journal and fsync it.
- outside of lock as that could be slow.
- */
- static void WRITETOJOURNAL(AlignedBuilder& ab) {
- Timer t;
- journal(ab);
- stats.curr->_writeToJournalMicros += t.micros();
- }
-
// Functor to be called over all MongoFiles
class validateSingleMapMatches {
@@ -486,6 +484,7 @@ namespace mongo {
stats.curr->_remapPrivateViewMicros += t.micros();
}
+ // lock order: dbMutex first, then this
mutex groupCommitMutex("groupCommit");
bool _groupCommitWithLimitedLocks() {
@@ -502,8 +501,8 @@ namespace mongo {
commitJob.notifyCommitted();
return true;
}
-
- PREPLOGBUFFER();
+ JSectHeader h;
+ PREPLOGBUFFER(h);
RWLockRecursive::Shared lk3(MongoFile::mmmutex);
@@ -515,16 +514,15 @@ namespace mongo {
lk1.reset();
// ****** now other threads can do writes ******
-
- WRITETOJOURNAL(commitJob._ab);
+ WRITETOJOURNAL(h, commitJob._ab);
assert( abLen == commitJob._ab.len() ); // a check that no one touched the builder while we were doing work. if so, our locking is wrong.
// data is now in the journal, which is sufficient for acknowledging getLastError.
// (ok to crash after that)
commitJob.notifyCommitted();
- WRITETODATAFILES();
- assert( abLen == commitJob._ab.len() ); // WRITETODATAFILES uses _ab also
+ WRITETODATAFILES(h, commitJob._ab);
+ assert( abLen == commitJob._ab.len() ); // check again wasn't modded
commitJob._ab.reset();
// can't : dbMutex._remapPrivateViewRequested = true;
@@ -570,18 +568,19 @@ namespace mongo {
// (and we are only read locked in the dbMutex, so it could happen)
scoped_lock lk(groupCommitMutex);
- PREPLOGBUFFER();
+ JSectHeader h;
+ PREPLOGBUFFER(h);
// todo : write to the journal outside locks, as this write can be slow.
// however, be careful then about remapprivateview as that cannot be done
// if new writes are then pending in the private maps.
- WRITETOJOURNAL(commitJob._ab);
+ WRITETOJOURNAL(h, commitJob._ab);
// data is now in the journal, which is sufficient for acknowledging getLastError.
// (ok to crash after that)
commitJob.notifyCommitted();
- WRITETODATAFILES();
+ WRITETODATAFILES(h, commitJob._ab);
debugValidateAllMapsMatch();
commitJob.reset();
@@ -613,6 +612,7 @@ namespace mongo {
}
/** locking: in read lock when called
+ or, for early commits (commitIfNeeded), in write lock
@see MongoMMF::close()
*/
static void groupCommit() {
@@ -686,29 +686,41 @@ namespace mongo {
}
extern int groupCommitIntervalMs;
+ filesystem::path getJournalDir();
void durThread() {
Client::initThread("journal");
+
+ bool samePartition = true;
+ try {
+ const string dbpathDir = boost::filesystem::path(dbpath).native_directory_string();
+ samePartition = onSamePartition(getJournalDir().string(), dbpathDir);
+ }
+ catch(...) {
+ }
+
while( !inShutdown() ) {
RACECHECK
+
+ unsigned ms = cmdLine.journalCommitInterval;
+ if( ms == 0 ) {
+ // use default
+ ms = samePartition ? 100 : 30;
+ }
+
+ unsigned oneThird = (ms / 3) + 1; // +1 so never zero
+
try {
- int millis = groupCommitIntervalMs;
- {
- stats.rotate();
- {
- Timer t;
- journalRotate(); // note we do this part outside of mongomutex
- millis -= t.millis();
- wassert( millis <= groupCommitIntervalMs ); // race if groupCommitIntervalMs was changing by another thread so wassert
- if( millis < 2 )
- millis = 2;
- }
+ stats.rotate();
- // we do this in a couple blocks, which makes it a tiny bit faster (only a little) on throughput,
- // but is likely also less spiky on our cpu usage, which is good:
- sleepmillis(millis/2);
- commitJob.wi()._deferred.invoke();
- sleepmillis(millis/2);
+ // we do this in a couple blocks (the invoke()), which makes it a tiny bit faster (only a little) on throughput,
+ // but is likely also less spiky on our cpu usage, which is good.
+
+ // commit sooner if one or more getLastError j:true is pending
+ for( unsigned i = 1; i <= 2; i++ ) {
+ sleepmillis(oneThird);
+ if( commitJob._notify.nWaiting() )
+ break;
commitJob.wi()._deferred.invoke();
}
@@ -772,6 +784,13 @@ namespace mongo {
void DurableImpl::syncDataAndTruncateJournal() {
dbMutex.assertWriteLocked();
+ // a commit from the commit thread won't begin while we are in the write lock,
+ // but it may already be in progress and the end of that work is done outside
+ // (dbMutex) locks. This line waits for that to complete if already underway.
+ {
+ scoped_lock lk(groupCommitMutex);
+ }
+
groupCommit();
MongoFile::flushAll(true);
journalCleanup();
diff --git a/db/dur_journal.cpp b/db/dur_journal.cpp
index f85dda32b51..0a1bc5ebbad 100644
--- a/db/dur_journal.cpp
+++ b/db/dur_journal.cpp
@@ -34,6 +34,7 @@
#include "../util/file.h"
#include "../util/checksum.h"
#include "../util/concurrency/race.h"
+#include "../util/compress.h"
using namespace mongoutils;
@@ -92,6 +93,11 @@ namespace mongo {
assert(false);
}
+ JSectFooter::JSectFooter() {
+ memset(this, 0, sizeof(*this));
+ sentinel = JEntry::OpCode_Footer;
+ }
+
JSectFooter::JSectFooter(const void* begin, int len) { // needs buffer to compute hash
sentinel = JEntry::OpCode_Footer;
reserved = 0;
@@ -103,6 +109,10 @@ namespace mongo {
}
bool JSectFooter::checkHash(const void* begin, int len) const {
+ if( !magicOk() ) {
+ log() << "journal footer not valid" << endl;
+ return false;
+ }
Checksum c;
c.gen(begin, len);
DEV log() << "checkHash len:" << len << " hash:" << toHex(hash, 16) << " current:" << toHex(c.bytes, 16) << endl;
@@ -317,13 +327,13 @@ namespace mongo {
void preallocateFiles() {
if( exists(getJournalDir()/"prealloc.0") || // if enabled previously, keep using
- exists(getJournalDir()/"prealloc.1") ||
- preallocateIsFaster() ) {
+ exists(getJournalDir()/"prealloc.1") ||
+ ( cmdLine.preallocj && preallocateIsFaster() ) ) {
usingPreallocate = true;
try {
_preallocateFiles();
}
- catch(...) {
+ catch(...) {
log() << "warning caught exception in preallocateFiles, continuing" << endl;
}
}
@@ -343,10 +353,12 @@ namespace mongo {
{
// zero the header
File f;
- f.open(temppath.string().c_str(), false, true);
+ f.open(temppath.string().c_str(), false, false);
char buf[8192];
memset(buf, 0, 8192);
f.write(0, buf, 8192);
+ f.truncate(DataLimitPerJournalFile);
+ f.fsync();
}
boost::filesystem::rename(temppath, filepath);
return;
@@ -471,12 +483,6 @@ namespace mongo {
/** called during recovery (the error message text below assumes that)
*/
unsigned long long journalReadLSN() {
- if( !debug ) {
- // in nondebug build, for now, be conservative until more tests written, and apply the whole journal.
- // however we will still write the lsn file to exercise that code, and use in _DEBUG build.
- return 0;
- }
-
if( !MemoryMappedFile::exists(lsnPath()) ) {
log() << "info no lsn file in journal/ directory" << endl;
return 0;
@@ -595,15 +601,7 @@ namespace mongo {
j._ageOut = a;
}
- /** check if time to rotate files. assure a file is open.
- done separately from the journal() call as we can do this part
- outside of lock.
- thread: durThread()
- */
- void journalRotate() {
- j.rotate();
- }
- void Journal::rotate() {
+ void Journal::_rotate() {
assert( !dbMutex.atLeastReadLocked() );
RACECHECK
@@ -618,6 +616,7 @@ namespace mongo {
return;
if( _curLogFile ) {
+ _curLogFile->truncate();
closeCurrentJournalFile();
removeUnneededJournalFiles();
}
@@ -636,24 +635,74 @@ namespace mongo {
}
}
- /** write to journal
+ /** write (append) the buffer we have built to the journal and fsync it.
+ outside of dbMutex lock as this could be slow.
+ @param uncompressed - a buffer that will be written to the journal after compression
+ will not return until on disk
*/
- void journal(const AlignedBuilder& b) {
- j.journal(b);
+ void WRITETOJOURNAL(JSectHeader h, AlignedBuilder& uncompressed) {
+ Timer t;
+ j.journal(h, uncompressed);
+ stats.curr->_writeToJournalMicros += t.micros();
}
- void Journal::journal(const AlignedBuilder& b) {
+ void Journal::journal(const JSectHeader& h, const AlignedBuilder& uncompressed) {
+ RACECHECK
+ static AlignedBuilder b(32*1024*1024);
+ /* buffer to journal will be
+ JSectHeader
+ compressed operations
+ JSectFooter
+ */
+ const unsigned headTailSize = sizeof(JSectHeader) + sizeof(JSectFooter);
+ const unsigned max = maxCompressedLength(uncompressed.len()) + headTailSize;
+ b.reset(max);
+
+ {
+ dassert( h.sectionLen() == (unsigned) 0xffffffff ); // we will backfill later
+ b.appendStruct(h);
+ }
+
+ size_t compressedLength = 0;
+ rawCompress(uncompressed.buf(), uncompressed.len(), b.cur(), &compressedLength);
+ assert( compressedLength < 0xffffffff );
+ assert( compressedLength < max );
+ b.skip(compressedLength);
+
+ // footer
+ unsigned L = 0xffffffff;
+ {
+ // pad to alignment, and set the total section length in the JSectHeader
+ assert( 0xffffe000 == (~(Alignment-1)) );
+ unsigned lenUnpadded = b.len() + sizeof(JSectFooter);
+ L = (lenUnpadded + Alignment-1) & (~(Alignment-1));
+ dassert( L >= lenUnpadded );
+
+ ((JSectHeader*)b.atOfs(0))->setSectionLen(lenUnpadded);
+
+ JSectFooter f(b.buf(), b.len()); // computes checksum
+ b.appendStruct(f);
+ dassert( b.len() == lenUnpadded );
+
+ b.skip(L - lenUnpadded);
+ dassert( b.len() % Alignment == 0 );
+ }
+
try {
mutex::scoped_lock lk(_curLogFileMutex);
// must already be open -- so that _curFileId is correct for previous buffer building
assert( _curLogFile );
- stats.curr->_journaledBytes += b.len();
- _written += b.len();
- _curLogFile->synchronousAppend((void *) b.buf(), b.len());
+ stats.curr->_uncompressedBytes += b.len();
+ unsigned w = b.len();
+ _written += w;
+ assert( w <= L );
+ stats.curr->_journaledBytes += L;
+ _curLogFile->synchronousAppend((const void *) b.buf(), L);
+ _rotate();
}
catch(std::exception& e) {
- log() << "warning exception in dur::journal " << e.what() << endl;
+ log() << "error exception in dur::journal " << e.what() << endl;
throw;
}
}
diff --git a/db/dur_journal.h b/db/dur_journal.h
index e8e3dfd1465..664f63942e0 100644
--- a/db/dur_journal.h
+++ b/db/dur_journal.h
@@ -28,7 +28,8 @@ namespace mongo {
extern bool okToCleanUp;
/** at termination after db files closed & fsynced
- also after covery
+ also after recovery
+ closes and removes journal files
@param log report in log that we are cleaning up if we actually do any work
*/
void journalCleanup(bool log = false);
@@ -43,12 +44,6 @@ namespace mongo {
*/
void journalRotate();
- /** write/append to journal file *
- @param buf - a buffer that will be written to the journal.
- will not return until on disk
- */
- void journal(const AlignedBuilder& buf);
-
/** flag that something has gone wrong during writing to the journal
(not for recovery mode)
*/
@@ -67,5 +62,7 @@ namespace mongo {
// in case disk controller buffers writes
const long long ExtraKeepTimeMs = 10000;
+ const unsigned JournalCommitIntervalDefault = 100;
+
}
}
diff --git a/db/dur_journalformat.h b/db/dur_journalformat.h
index 72587ccd7b6..10ed8487b71 100644
--- a/db/dur_journalformat.h
+++ b/db/dur_journalformat.h
@@ -22,6 +22,8 @@ namespace mongo {
namespace dur {
+ const unsigned Alignment = 8192;
+
#pragma pack(1)
/** beginning header for a journal/j._<n> file
there is nothing important int this header at this time. except perhaps version #.
@@ -34,7 +36,11 @@ namespace mongo {
// x4142 is asci--readable if you look at the file with head/less -- thus the starting values were near
// that. simply incrementing the version # is safe on a fwd basis.
+#if defined(_NOCOMPRESS)
enum { CurrentVersion = 0x4148 };
+#else
+ enum { CurrentVersion = 0x4149 };
+#endif
unsigned short _version;
// these are just for diagnostic ease (make header more useful as plain text)
@@ -55,11 +61,25 @@ namespace mongo {
/** "Section" header. A section corresponds to a group commit.
len is length of the entire section including header and footer.
+ header and footer are not compressed, just the stuff in between.
*/
struct JSectHeader {
- unsigned len; // length in bytes of the whole section
+ private:
+ unsigned _sectionLen; // unpadded length in bytes of the whole section
+ public:
unsigned long long seqNumber; // sequence number that can be used on recovery to not do too much work
unsigned long long fileId; // matches JHeader::fileId
+ unsigned sectionLen() const { return _sectionLen; }
+
+ // we store the unpadded length so we can use that when we uncompress. to
+ // get the true total size this must be rounded up to the Alignment.
+ void setSectionLen(unsigned lenUnpadded) { _sectionLen = lenUnpadded; }
+
+ unsigned sectionLenWithPadding() const {
+ unsigned x = (sectionLen() + (Alignment-1)) & (~(Alignment-1));
+ dassert( x % Alignment == 0 );
+ return x;
+ }
};
/** an individual write operation within a group commit section. Either the entire section should
@@ -111,6 +131,7 @@ namespace mongo {
/** group commit section footer. md5 is a key field. */
struct JSectFooter {
+ JSectFooter();
JSectFooter(const void* begin, int len); // needs buffer to compute hash
unsigned sentinel;
unsigned char hash[16];
@@ -123,6 +144,8 @@ namespace mongo {
@return true if buffer looks valid
*/
bool checkHash(const void* begin, int len) const;
+
+ bool magicOk() const { return *((unsigned*)magic) == 0x0a0a0a0a; }
};
/** declares "the next entry(s) are for this database / file path prefix" */
diff --git a/db/dur_journalimpl.h b/db/dur_journalimpl.h
index e436eae45f1..bf771c5d768 100644
--- a/db/dur_journalimpl.h
+++ b/db/dur_journalimpl.h
@@ -18,6 +18,7 @@
#pragma once
+#include "dur_journalformat.h"
#include "../util/logfile.h"
namespace mongo {
@@ -40,14 +41,14 @@ namespace mongo {
*/
void rotate();
- /** write to journal
+ /** append to the journal file
*/
- void journal(const AlignedBuilder& b);
+ void journal(const JSectHeader& h, const AlignedBuilder& b);
boost::filesystem::path getFilePathFor(int filenumber) const;
unsigned long long lastFlushTime() const { return _lastFlushTime; }
- void cleanup(bool log);
+ void cleanup(bool log); // closes and removes journal files
unsigned long long curFileId() const { return _curFileId; }
@@ -61,6 +62,11 @@ namespace mongo {
void open();
private:
+ /** check if time to rotate files. assure a file is open.
+ * internally called with every commit
+ */
+ void _rotate();
+
void _open();
void closeCurrentJournalFile();
void removeUnneededJournalFiles();
diff --git a/db/dur_preplogbuffer.cpp b/db/dur_preplogbuffer.cpp
index 5851e415408..0d8ef3688db 100644
--- a/db/dur_preplogbuffer.cpp
+++ b/db/dur_preplogbuffer.cpp
@@ -60,7 +60,7 @@ namespace mongo {
size_t ofs = 1;
MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs);
- _IF( !mmf->willNeedRemap() ) {
+ if( unlikely(!mmf->willNeedRemap()) ) {
// tag this mmf as needed a remap of its private view later.
// usually it will already be dirty/already set, so we do the if above first
// to avoid possibility of cpu cache line contention
@@ -97,7 +97,7 @@ namespace mongo {
#endif
bb.appendBuf(i->start(), e.len);
- _IF (e.len != (unsigned)i->length()) {
+ if (unlikely(e.len != (unsigned)i->length())) {
log() << "journal info splitting prepBasicWrite at boundary" << endl;
// This only happens if we write to the last byte in a file and
@@ -120,40 +120,25 @@ namespace mongo {
// each time events switch to a different database we journal a JDbContext
RelativePath lastDbPath;
- set<WriteIntent>::iterator i = commitJob.writes().begin();
-
- const WriteIntent *w = &(*i);
- while(1) {
- i++;
- const WriteIntent *next = 0;
- IF( i != commitJob.writes().end() ) {
- next = &(*i);
- PREFETCH(next);
- }
- prepBasicWrite_inlock(bb, w, lastDbPath);
- _IF( next == 0 )
- break;
- w = next;
- };
+ for( set<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) {
+ prepBasicWrite_inlock(bb, &(*i), lastDbPath);
+ }
}
- void resetLogBuffer(AlignedBuilder& bb) {
+ void resetLogBuffer(/*out*/JSectHeader& h, AlignedBuilder& bb) {
bb.reset();
- // JSectHeader
- JSectHeader h;
- h.len = (unsigned) 0xffffffff; // total length, will fill in later
+ h.setSectionLen(0xffffffff); // total length, will fill in later
h.seqNumber = getLastDataFileFlushTime();
h.fileId = j.curFileId();
-
- bb.appendStruct(h);
}
/** we will build an output buffer ourself and then use O_DIRECT
we could be in read lock for this
caller handles locking
+ @return partially populated sectheader and _ab set
*/
- void _PREPLOGBUFFER() {
+ void _PREPLOGBUFFER(JSectHeader& h) {
assert( cmdLine.dur );
{
@@ -165,7 +150,7 @@ namespace mongo {
}
AlignedBuilder& bb = commitJob._ab;
- resetLogBuffer(bb);
+ resetLogBuffer(h, bb); // adds JSectHeader
// ops other than basic writes (DurOp's)
{
@@ -174,34 +159,14 @@ namespace mongo {
}
}
- {
- prepBasicWrites(bb);
- }
-
- // pad to alignment, and set the total section length in the JSectHeader
- assert( 0xffffe000 == (~(Alignment-1)) );
- unsigned lenWillBe = bb.len() + sizeof(JSectFooter);
- unsigned L = (lenWillBe + Alignment-1) & (~(Alignment-1));
- dassert( L >= lenWillBe );
- *((unsigned*)bb.atOfs(0)) = L;
-
- {
- JSectFooter f(bb.buf(), bb.len());
- bb.appendStruct(f);
- }
-
- {
- unsigned padding = L - bb.len();
- bb.skip(padding);
- dassert( bb.len() % Alignment == 0 );
- }
+ prepBasicWrites(bb);
return;
}
- void PREPLOGBUFFER() {
+ void PREPLOGBUFFER(/*out*/ JSectHeader& h) {
Timer t;
j.assureLogFileOpen(); // so fileId is set
- _PREPLOGBUFFER();
+ _PREPLOGBUFFER(h);
stats.curr->_prepLogBufferMicros += t.micros();
}
diff --git a/db/dur_recover.cpp b/db/dur_recover.cpp
index 2e1516914f1..1e719c0070d 100644
--- a/db/dur_recover.cpp
+++ b/db/dur_recover.cpp
@@ -27,6 +27,7 @@
#include "namespace.h"
#include "../util/mongoutils/str.h"
#include "../util/bufreader.h"
+#include "../util/concurrency/race.h"
#include "pdfile.h"
#include "database.h"
#include "db.h"
@@ -35,6 +36,7 @@
#include "cmdline.h"
#include "curop.h"
#include "mongommf.h"
+#include "../util/compress.h"
#include <sys/stat.h>
#include <fcntl.h>
@@ -92,59 +94,73 @@ namespace mongo {
throws
*/
class JournalSectionIterator : boost::noncopyable {
+ auto_ptr<BufReader> _entries;
+ const JSectHeader _h;
+ const char *_lastDbName; // pointer into mmaped journal file
+ const bool _doDurOps;
+ string _uncompressed;
public:
- JournalSectionIterator(const void *p, unsigned len, bool doDurOps)
- : _br(p, len)
- , _sectHead(static_cast<const JSectHeader*>(_br.skip(sizeof(JSectHeader))))
- , _lastDbName(NULL)
- , _doDurOps(doDurOps)
- {}
+ JournalSectionIterator(const JSectHeader& h, const void *compressed, unsigned compressedLen, bool doDurOpsRecovering) :
+ _h(h),
+ _lastDbName(0)
+ , _doDurOps(doDurOpsRecovering)
+ {
+ assert( doDurOpsRecovering );
+ bool ok = uncompress((const char *)compressed, compressedLen, &_uncompressed);
+ if( !ok ) {
+ // it should always be ok (i think?) as there is a previous check to see that the JSectFooter is ok
+ log() << "couldn't uncompress journal section" << endl;
+ msgasserted(15874, "couldn't uncompress journal section");
+ }
+ const char *p = _uncompressed.c_str();
+ assert( compressedLen == _h.sectionLen() - sizeof(JSectFooter) - sizeof(JSectHeader) );
+ _entries = auto_ptr<BufReader>( new BufReader(p, _uncompressed.size()) );
+ }
+
+ // we work with the uncompressed buffer when doing a WRITETODATAFILES (for speed)
+ JournalSectionIterator(const JSectHeader &h, const void *p, unsigned len) :
+ _entries( new BufReader((const char *) p, len) ),
+ _h(h),
+ _lastDbName(0)
+ , _doDurOps(false)
- bool atEof() const { return _br.atEof(); }
+ { }
- unsigned long long seqNumber() const { return _sectHead->seqNumber; }
+ bool atEof() const { return _entries->atEof(); }
+
+ unsigned long long seqNumber() const { return _h.seqNumber; }
/** get the next entry from the log. this function parses and combines JDbContext and JEntry's.
- * @return true if got an entry. false at successful end of section (and no entry returned).
* throws on premature end of section.
*/
- bool next(ParsedJournalEntry& e) {
+ void next(ParsedJournalEntry& e) {
unsigned lenOrOpCode;
- _br.read(lenOrOpCode);
+ _entries->read(lenOrOpCode);
if (lenOrOpCode > JEntry::OpCode_Min) {
switch( lenOrOpCode ) {
case JEntry::OpCode_Footer: {
- if (_doDurOps) {
- const char* pos = (const char*) _br.pos();
- pos -= sizeof(lenOrOpCode); // rewind to include OpCode
- const JSectFooter& footer = *(const JSectFooter*)pos;
- int len = pos - (char*)_sectHead;
- if (!footer.checkHash(_sectHead, len)) {
- massert(13594, "journal checksum doesn't match", false);
- }
- }
- return false; // false return value denotes end of section
+ assert( false );
}
case JEntry::OpCode_FileCreated:
case JEntry::OpCode_DropDb: {
e.dbName = 0;
- boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, _br);
+ boost::shared_ptr<DurOp> op = DurOp::read(lenOrOpCode, *_entries);
if (_doDurOps) {
e.op = op;
}
- return true;
+ return;
}
case JEntry::OpCode_DbContext: {
- _lastDbName = (const char*) _br.pos();
- const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _br.remaining());
+ _lastDbName = (const char*) _entries->pos();
+ const unsigned limit = std::min((unsigned)Namespace::MaxNsLen, _entries->remaining());
const unsigned len = strnlen(_lastDbName, limit);
massert(13533, "problem processing journal file during recovery", _lastDbName[len] == '\0');
- _br.skip(len+1); // skip '\0' too
- _br.read(lenOrOpCode);
+ _entries->skip(len+1); // skip '\0' too
+ _entries->read(lenOrOpCode); // read this for the fall through
}
// fall through as a basic operation always follows jdbcontext, and we don't have anything to return yet
@@ -156,18 +172,13 @@ namespace mongo {
// JEntry - a basic write
assert( lenOrOpCode && lenOrOpCode < JEntry::OpCode_Min );
- _br.rewind(4);
- e.e = (JEntry *) _br.skip(sizeof(JEntry));
+ _entries->rewind(4);
+ e.e = (JEntry *) _entries->skip(sizeof(JEntry));
e.dbName = e.e->isLocalDbContext() ? "local" : _lastDbName;
assert( e.e->len == lenOrOpCode );
- _br.skip(e.e->len);
- return true;
+ _entries->skip(e.e->len);
}
- private:
- BufReader _br;
- const JSectHeader* _sectHead;
- const char *_lastDbName; // pointer into mmaped journal file
- const bool _doDurOps;
+
};
static string fileName(const char* dbName, int fileNo) {
@@ -289,27 +300,64 @@ namespace mongo {
log() << "END section" << endl;
}
- void RecoveryJob::processSection(const void *p, unsigned len) {
+ void RecoveryJob::processSection(const JSectHeader *h, const void *p, unsigned len, const JSectFooter *f) {
scoped_lock lk(_mx);
+ RACECHECK
+
+ /** todo: we should really verify the checksum to see that seqNumber is ok?
+ that is expensive maybe there is some sort of checksum of just the header
+ within the header itself
+ */
+ if( _recovering && _lastDataSyncedFromLastRun > h->seqNumber + ExtraKeepTimeMs ) {
+ if( h->seqNumber != _lastSeqMentionedInConsoleLog ) {
+ static int n;
+ if( ++n < 10 ) {
+ log() << "recover skipping application of section seq:" << h->seqNumber << " < lsn:" << _lastDataSyncedFromLastRun << endl;
+ }
+ else if( n == 10 ) {
+ log() << "recover skipping application of section more..." << endl;
+ }
+ _lastSeqMentionedInConsoleLog = h->seqNumber;
+ }
+ return;
+ }
- vector<ParsedJournalEntry> entries;
- JournalSectionIterator i(p, len, _recovering);
+ auto_ptr<JournalSectionIterator> i;
+ if( _recovering ) {
+ i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, p, len, _recovering));
+ }
+ else {
+ i = auto_ptr<JournalSectionIterator>(new JournalSectionIterator(*h, /*after header*/p, /*w/out header*/len));
+ }
- //DEV log() << "recovery processSection seq:" << i.seqNumber() << endl;
- if( _recovering && _lastDataSyncedFromLastRun > i.seqNumber() + ExtraKeepTimeMs ) {
- if( i.seqNumber() != _lastSeqMentionedInConsoleLog ) {
- log() << "recover skipping application of section seq:" << i.seqNumber() << " < lsn:" << _lastDataSyncedFromLastRun << endl;
- _lastSeqMentionedInConsoleLog = i.seqNumber();
+ // we use a static so that we don't have to reallocate every time through. occasionally we
+ // go back to a small allocation so that if there were a spiky growth it won't stick forever.
+ static vector<ParsedJournalEntry> entries;
+ entries.clear();
+/** TEMP uncomment
+ RARELY OCCASIONALLY {
+ if( entries.capacity() > 2048 ) {
+ entries.shrink_to_fit();
+ entries.reserve(2048);
}
- return;
}
+*/
// first read all entries to make sure this section is valid
ParsedJournalEntry e;
- while( i.next(e) ) {
+ while( !i->atEof() ) {
+ i->next(e);
entries.push_back(e);
}
+ // after the entries check the footer checksum
+ if( _recovering ) {
+ assert( ((const char *)h) + sizeof(JSectHeader) == p );
+ if( !f->checkHash(h, len + sizeof(JSectHeader)) ) {
+ msgasserted(13594, "journal checksum doesn't match");
+ }
+ }
+
// got all the entries for one group commit. apply them:
applyEntries(entries);
}
@@ -345,11 +393,16 @@ namespace mongo {
if( h.fileId != fileId ) {
if( debug || (cmdLine.durOptions & CmdLine::DurDumpJournal) ) {
log() << "Ending processFileBuffer at differing fileId want:" << fileId << " got:" << h.fileId << endl;
- log() << " sect len:" << h.len << " seqnum:" << h.seqNumber << endl;
+ log() << " sect len:" << h.sectionLen() << " seqnum:" << h.seqNumber << endl;
}
return true;
}
- processSection(br.skip(h.len), h.len);
+ unsigned slen = h.sectionLen();
+ unsigned dataLen = slen - sizeof(JSectHeader) - sizeof(JSectFooter);
+ const char *hdr = (const char *) br.skip(h.sectionLenWithPadding());
+ const char *data = hdr + sizeof(JSectHeader);
+ const char *footer = data + dataLen;
+ processSection((const JSectHeader*) hdr, data, dataLen, (const JSectFooter*) footer);
// ctrl c check
killCurrentOp.checkForInterrupt(false);
@@ -367,6 +420,17 @@ namespace mongo {
/** apply a specific journal file */
bool RecoveryJob::processFile(path journalfile) {
log() << "recover " << journalfile.string() << endl;
+
+ try {
+ if( boost::filesystem::file_size( journalfile.string() ) == 0 ) {
+ log() << "recover info " << journalfile.string() << " has zero length" << endl;
+ return true;
+ }
+ } catch(...) {
+ // if something weird like a permissions problem keep going so the massert down below can happen (presumably)
+ log() << "recover exception checking filesize" << endl;
+ }
+
MemoryMappedFile f;
void *p = f.mapWithOptions(journalfile.string().c_str(), MongoFile::READONLY | MongoFile::SEQUENTIAL);
massert(13544, str::stream() << "recover error couldn't open " << journalfile.string(), p);
@@ -382,13 +446,19 @@ namespace mongo {
_lastDataSyncedFromLastRun = journalReadLSN();
log() << "recover lsn: " << _lastDataSyncedFromLastRun << endl;
+ // todo: we could truncate the journal file at rotation time to the right length, then this abruptEnd
+ // check can be turned back on. this is relevant when prealloc is being used.
for( unsigned i = 0; i != files.size(); ++i ) {
- /*bool abruptEnd = */processFile(files[i]);
- /*if( abruptEnd && i+1 < files.size() ) {
+ bool abruptEnd = processFile(files[i]);
+ if( abruptEnd && i+1 < files.size() ) {
+#if 1 // Leaving this as a warning for now. TODO: make this an error post 2.0
+ log() << "recover warning: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
+#else
log() << "recover error: abrupt end to file " << files[i].string() << ", yet it isn't the last journal file" << endl;
close();
uasserted(13535, "recover abrupt journal file end");
- }*/
+#endif
+ }
}
close();
diff --git a/db/dur_recover.h b/db/dur_recover.h
index b5a922b498a..955e730ea05 100644
--- a/db/dur_recover.h
+++ b/db/dur_recover.h
@@ -2,6 +2,7 @@
#pragma once
+#include "dur_journalformat.h"
#include "../util/concurrency/mutex.h"
#include "../util/file.h"
@@ -15,10 +16,14 @@ namespace mongo {
*/
class RecoveryJob : boost::noncopyable {
public:
- RecoveryJob() :_lastDataSyncedFromLastRun(0), _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; }
+ RecoveryJob() : _lastDataSyncedFromLastRun(0),
+ _mx("recovery"), _recovering(false) { _lastSeqMentionedInConsoleLog = 1; }
void go(vector<path>& files);
~RecoveryJob();
- void processSection(const void *, unsigned len);
+
+ /** @param data data between header and footer. compressed if recovering. */
+ void processSection(const JSectHeader *h, const void *data, unsigned len, const JSectFooter *f);
+
void close(); // locks and calls _close()
static RecoveryJob & get() { return _instance; }
diff --git a/db/dur_stats.h b/db/dur_stats.h
index d4943c01cb3..50a26d1f215 100644
--- a/db/dur_stats.h
+++ b/db/dur_stats.h
@@ -20,6 +20,7 @@ namespace mongo {
unsigned _commits;
unsigned _earlyCommits; // count of early commits from commitIfNeeded() or from getDur().commitNow()
unsigned long long _journaledBytes;
+ unsigned long long _uncompressedBytes;
unsigned long long _writeToDataFilesBytes;
unsigned long long _prepLogBufferMicros;
diff --git a/db/dur_writetodatafiles.cpp b/db/dur_writetodatafiles.cpp
index cdccb018d83..6724f0731aa 100644
--- a/db/dur_writetodatafiles.cpp
+++ b/db/dur_writetodatafiles.cpp
@@ -47,9 +47,9 @@ namespace mongo {
@see https://docs.google.com/drawings/edit?id=1TklsmZzm7ohIZkwgeK6rMvsdaR13KjtJYMsfLr175Zc&hl=en
*/
- void WRITETODATAFILES_Impl1() {
+ void WRITETODATAFILES_Impl1(const JSectHeader& h, AlignedBuilder& uncompressed) {
RWLockRecursive::Shared lk(MongoFile::mmmutex);
- RecoveryJob::get().processSection(commitJob._ab.buf(), commitJob._ab.len());
+ RecoveryJob::get().processSection(&h, uncompressed.buf(), uncompressed.len(), 0);
}
#if 0
@@ -81,16 +81,14 @@ namespace mongo {
#endif
// concurrency: in mmmutex, not necessarily in dbMutex
- void WRITETODATAFILES() {
+ void WRITETODATAFILES(const JSectHeader& h, AlignedBuilder& uncompressed) {
Timer t;
#if defined(_EXPERIMENTAL)
WRITETODATAFILES_Impl3();
#else
- WRITETODATAFILES_Impl1();
+ WRITETODATAFILES_Impl1(h, uncompressed);
#endif
stats.curr->_writeToDataFilesMicros += t.micros();
-
-
}
}
diff --git a/db/durop.h b/db/durop.h
index c4574c2e3cb..9ab1bfcbede 100644
--- a/db/durop.h
+++ b/db/durop.h
@@ -28,8 +28,6 @@ namespace mongo {
namespace dur {
- const unsigned Alignment = 8192;
-
/** DurOp - Operations we journal that aren't just basic writes.
*
* Basic writes are logged as JEntry's, and indicated in ram temporarily as struct dur::WriteIntent.
diff --git a/db/geo/2d.cpp b/db/geo/2d.cpp
index 21b0eaa6601..9b762b260de 100644
--- a/db/geo/2d.cpp
+++ b/db/geo/2d.cpp
@@ -138,7 +138,11 @@ namespace mongo {
GeoHash b = a;
b.move(1, 1);
- _error = distance(a, b);
+ // Epsilon is 1/100th of a bucket size
+ // TODO: Can we actually find error bounds for the sqrt function?
+ double epsilon = 0.001 / _scaling;
+ _error = distance(a, b) + epsilon;
+
// Error in radians
_errorSphere = deg2rad( _error );
}
@@ -293,6 +297,14 @@ namespace mongo {
}
+ BSONObj _fromBSONHash( const BSONElement& e ) const {
+ return _unhash( _tohash( e ) );
+ }
+
+ BSONObj _fromBSONHash( const BSONObj& o ) const {
+ return _unhash( _tohash( o.firstElement() ) );
+ }
+
GeoHash _tohash( const BSONElement& e ) const {
if ( e.isABSONObj() )
return _hash( e.embeddedObject() );
@@ -368,6 +380,10 @@ namespace mongo {
}
double sizeEdge( const GeoHash& a ) const {
+
+ if( ! a.constrains() )
+ return _max - _min;
+
double ax,ay,bx,by;
GeoHash b = a;
b.move( 1 , 1 );
@@ -443,6 +459,10 @@ namespace mongo {
Box() {}
+ BSONArray toBSON() const {
+ return BSON_ARRAY( BSON_ARRAY( _min._x << _min._y ) << BSON_ARRAY( _max._x << _max._y ) );
+ }
+
string toString() const {
StringBuilder buf(64);
buf << _min.toString() << " -->> " << _max.toString();
@@ -630,8 +650,8 @@ namespace mongo {
}
else if( fudge == 0 ){
- if( p._y == p1._y && p._x == p1._x ) return true;
- else if( p._y == p2._y && p._x == p2._x ) return true;
+ if( p._y == p1._y && p._x == p1._x ) return true;
+ else if( p._y == p2._y && p._x == p2._x ) return true;
}
// Normal intersection test.
@@ -742,293 +762,96 @@ namespace mongo {
geo2dplugin.getName();
}
- struct GeoUnitTest : public UnitTest {
-
- int round( double d ) {
- return (int)(.5+(d*1000));
- }
-
-#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); }
-
- void run() {
- assert( ! GeoHash::isBitSet( 0 , 0 ) );
- assert( ! GeoHash::isBitSet( 0 , 31 ) );
- assert( GeoHash::isBitSet( 1 , 31 ) );
-
- IndexSpec i( BSON( "loc" << "2d" ) );
- Geo2dType g( &geo2dplugin , &i );
- {
- double x = 73.01212;
- double y = 41.352964;
- BSONObj in = BSON( "x" << x << "y" << y );
- GeoHash h = g._hash( in );
- BSONObj out = g._unhash( h );
- assert( round(x) == round( out["x"].number() ) );
- assert( round(y) == round( out["y"].number() ) );
- assert( round( in["x"].number() ) == round( out["x"].number() ) );
- assert( round( in["y"].number() ) == round( out["y"].number() ) );
- }
-
- {
- double x = -73.01212;
- double y = 41.352964;
- BSONObj in = BSON( "x" << x << "y" << y );
- GeoHash h = g._hash( in );
- BSONObj out = g._unhash( h );
- assert( round(x) == round( out["x"].number() ) );
- assert( round(y) == round( out["y"].number() ) );
- assert( round( in["x"].number() ) == round( out["x"].number() ) );
- assert( round( in["y"].number() ) == round( out["y"].number() ) );
- }
-
- {
- GeoHash h( "0000" );
- h.move( 0 , 1 );
- GEOHEQ( h , "0001" );
- h.move( 0 , -1 );
- GEOHEQ( h , "0000" );
-
- h.init( "0001" );
- h.move( 0 , 1 );
- GEOHEQ( h , "0100" );
- h.move( 0 , -1 );
- GEOHEQ( h , "0001" );
-
-
- h.init( "0000" );
- h.move( 1 , 0 );
- GEOHEQ( h , "0010" );
- }
-
- {
- Box b( 5 , 5 , 2 );
- assert( "(5,5) -->> (7,7)" == b.toString() );
- }
-
- {
- GeoHash a = g.hash( 1 , 1 );
- GeoHash b = g.hash( 4 , 5 );
- assert( 5 == (int)(g.distance( a , b ) ) );
- a = g.hash( 50 , 50 );
- b = g.hash( 42 , 44 );
- assert( round(10) == round(g.distance( a , b )) );
- }
-
- {
- GeoHash x("0000");
- assert( 0 == x.getHash() );
- x.init( 0 , 1 , 32 );
- GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" )
-
- assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) );
- assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) );
- }
-
- {
- GeoHash x("1010");
- GEOHEQ( x , "1010" );
- GeoHash y = x + "01";
- GEOHEQ( y , "101001" );
- }
-
- {
-
- GeoHash a = g.hash( 5 , 5 );
- GeoHash b = g.hash( 5 , 7 );
- GeoHash c = g.hash( 100 , 100 );
- /*
- cout << "a: " << a << endl;
- cout << "b: " << b << endl;
- cout << "c: " << c << endl;
-
- cout << "a: " << a.toStringHex1() << endl;
- cout << "b: " << b.toStringHex1() << endl;
- cout << "c: " << c.toStringHex1() << endl;
- */
- BSONObj oa = a.wrap();
- BSONObj ob = b.wrap();
- BSONObj oc = c.wrap();
- /*
- cout << "a: " << oa.hexDump() << endl;
- cout << "b: " << ob.hexDump() << endl;
- cout << "c: " << oc.hexDump() << endl;
- */
- assert( oa.woCompare( ob ) < 0 );
- assert( oa.woCompare( oc ) < 0 );
-
- }
-
- {
- GeoHash x( "000000" );
- x.move( -1 , 0 );
- GEOHEQ( x , "101010" );
- x.move( 1 , -1 );
- GEOHEQ( x , "010101" );
- x.move( 0 , 1 );
- GEOHEQ( x , "000000" );
- }
- {
- GeoHash prefix( "110011000000" );
- GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" );
- assert( ! entry.hasPrefix( prefix ) );
- entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000");
- assert( entry.toString().find( prefix.toString() ) == 0 );
- assert( entry.hasPrefix( GeoHash( "1100" ) ) );
- assert( entry.hasPrefix( prefix ) );
- }
-
- {
- GeoHash a = g.hash( 50 , 50 );
- GeoHash b = g.hash( 48 , 54 );
- assert( round( 4.47214 ) == round( g.distance( a , b ) ) );
- }
-
-
- {
- Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) );
- assert( b.inside( 29.763 , -95.363 ) );
- assert( ! b.inside( 32.9570255 , -96.1082497 ) );
- assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) );
- }
-
- {
- GeoHash a( "11001111" );
- assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) );
- assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) );
- }
-
- {
- int N = 10000;
- {
- Timer t;
- for ( int i=0; i<N; i++ ) {
- unsigned x = (unsigned)rand();
- unsigned y = (unsigned)rand();
- GeoHash h( x , y );
- unsigned a,b;
- h.unhash_slow( a,b );
- assert( a == x );
- assert( b == y );
- }
- //cout << "slow: " << t.millis() << endl;
- }
-
- {
- Timer t;
- for ( int i=0; i<N; i++ ) {
- unsigned x = (unsigned)rand();
- unsigned y = (unsigned)rand();
- GeoHash h( x , y );
- unsigned a,b;
- h.unhash_fast( a,b );
- assert( a == x );
- assert( b == y );
- }
- //cout << "fast: " << t.millis() << endl;
- }
-
- }
-
- {
- // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example
-
- {
- Point BNA (-86.67, 36.12);
- Point LAX (-118.40, 33.94);
+ class GeoHopper;
- double dist1 = spheredist_deg(BNA, LAX);
- double dist2 = spheredist_deg(LAX, BNA);
+ class GeoPoint {
+ public:
- // target is 0.45306
- assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
- assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
- }
- {
- Point BNA (-1.5127, 0.6304);
- Point LAX (-2.0665, 0.5924);
+ GeoPoint() : _distance( -1 ), _exact( false )
+ {}
- double dist1 = spheredist_rad(BNA, LAX);
- double dist2 = spheredist_rad(LAX, BNA);
+ //// Distance not used ////
- // target is 0.45306
- assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
- assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
- }
- {
- Point JFK (-73.77694444, 40.63861111 );
- Point LAX (-118.40, 33.94);
+ GeoPoint( const GeoKeyNode& node )
+ : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( -1 ) , _exact( false ) {
+ }
- double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES;
- assert( dist > 2469 && dist < 2470 );
- }
+ //// Immediate initialization of distance ////
- {
- Point BNA (-86.67, 36.12);
- Point LAX (-118.40, 33.94);
- Point JFK (-73.77694444, 40.63861111 );
- assert( spheredist_deg(BNA, BNA) < 1e-6);
- assert( spheredist_deg(LAX, LAX) < 1e-6);
- assert( spheredist_deg(JFK, JFK) < 1e-6);
+ GeoPoint( const GeoKeyNode& node, double distance, bool exact )
+ : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _distance( distance ), _exact( exact ) {
+ }
- Point zero (0, 0);
- Point antizero (0,-180);
+ GeoPoint( const GeoPoint& pt, double distance, bool exact )
+ : _key( pt.key() ) , _loc( pt.loc() ) , _o( pt.obj() ), _distance( distance ), _exact( exact ) {
+ }
- // these were known to cause NaN
- assert( spheredist_deg(zero, zero) < 1e-6);
- assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6);
- assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6);
- }
- }
+ bool operator<( const GeoPoint& other ) const {
+ if( _distance != other._distance ) return _distance < other._distance;
+ if( _exact != other._exact ) return _exact < other._exact;
+ return _loc < other._loc;
}
- } geoUnitTest;
- class GeoHopper;
+ double distance() const {
+ return _distance;
+ }
- class GeoPoint {
- public:
- GeoPoint() { }
+ bool isExact() const {
+ return _exact;
+ }
- //// Distance not used ////
+ BSONObj key() const {
+ return _key;
+ }
- GeoPoint( const GeoKeyNode& node )
- : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ) , _exactDistance( -1 ), _exactWithin( false ) {
+ DiskLoc loc() const {
+ return _loc;
}
-
- //// Immediate initialization of exact distance ////
- GeoPoint( const GeoKeyNode& node , double exactDistance, bool exactWithin )
- : _key( node._key ) , _loc( node.recordLoc ) , _o( node.recordLoc.obj() ), _exactDistance( exactDistance ), _exactWithin( exactWithin ) {
+ BSONObj obj() const {
+ return _o;
}
- bool operator<( const GeoPoint& other ) const {
- return _exactDistance < other._exactDistance;
+ BSONObj pt() const {
+ return _pt;
}
- bool isEmpty() const {
+ bool isEmpty() {
return _o.isEmpty();
}
string toString() const {
- return str::stream() << "Point from " << _o.toString() << " dist : " << _exactDistance << " within ? " << _exactWithin;
+ return str::stream() << "Point from " << _o << " dist : " << _distance << ( _exact ? " (ex)" : " (app)" );
}
BSONObj _key;
DiskLoc _loc;
BSONObj _o;
+ BSONObj _pt;
- double _exactDistance;
- bool _exactWithin;
+ double _distance;
+ bool _exact;
};
// GeoBrowse subclasses this
class GeoAccumulator {
public:
- GeoAccumulator( const Geo2dType * g , const BSONObj& filter )
- : _g(g) , _lookedAt(0) , _objectsLoaded(0) , _found(0) {
+ GeoAccumulator( const Geo2dType * g , const BSONObj& filter, bool uniqueDocs, bool needDistance )
+ : _g(g) ,
+ _keysChecked(0) ,
+ _lookedAt(0) ,
+ _matchesPerfd(0) ,
+ _objectsLoaded(0) ,
+ _pointsLoaded(0) ,
+ _found(0) ,
+ _uniqueDocs( uniqueDocs ) ,
+ _needDistance( needDistance )
+ {
if ( ! filter.isEmpty() ) {
_matcher.reset( new CoveredIndexMatcher( filter , g->keyPattern() ) );
+ GEODEBUG( "Matcher is now " << _matcher->docMatcher().toString() );
}
}
@@ -1042,6 +865,9 @@ namespace mongo {
set< pair<DiskLoc,int> > _seen;
public:
bool seen(DiskLoc bucket, int pos) {
+
+ _keysChecked++;
+
pair< set<pair<DiskLoc,int> >::iterator, bool > seenBefore = _seen.insert( make_pair(bucket,pos) );
if ( ! seenBefore.second ) {
GEODEBUG( "\t\t\t\t already seen : " << bucket.toString() << ' ' << pos ); // node.key.toString() << " @ " << Point( _g, GeoHash( node.key.firstElement() ) ).toString() << " with " << node.recordLoc.obj()["_id"] );
@@ -1050,29 +876,43 @@ namespace mongo {
return false;
}
- void add( const GeoKeyNode& node ) {
+ enum KeyResult { BAD, BORDER, GOOD };
+
+ virtual void add( const GeoKeyNode& node ) {
- GEODEBUG( "\t\t\t\t checking key " << node.key.toString() )
+ GEODEBUG( "\t\t\t\t checking key " << node._key.toString() )
_lookedAt++;
- // distance check
- double d = 0;
- if ( ! checkDistance( node , d ) ) {
- GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << d );
+ ////
+ // Approximate distance check using key data
+ ////
+ double keyD = 0;
+ Point keyP( _g, GeoHash( node._key.firstElement(), _g->_bits ) );
+ KeyResult keyOk = approxKeyCheck( keyP, keyD );
+ if ( keyOk == BAD ) {
+ GEODEBUG( "\t\t\t\t bad distance : " << node.recordLoc.obj() << "\t" << keyD );
return;
}
- GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << d );
+ GEODEBUG( "\t\t\t\t good distance : " << node.recordLoc.obj() << "\t" << keyD );
+ ////
+ // Check for match using other key (and potentially doc) criteria
+ ////
// Remember match results for each object
map<DiskLoc, bool>::iterator match = _matched.find( node.recordLoc );
bool newDoc = match == _matched.end();
if( newDoc ) {
+ GEODEBUG( "\t\t\t\t matching new doc with " << (_matcher ? _matcher->docMatcher().toString() : "(empty)" ) );
+
// matcher
MatchDetails details;
if ( _matcher.get() ) {
bool good = _matcher->matchesWithSingleKeyIndex( node._key , node.recordLoc , &details );
+
+ _matchesPerfd++;
+
if ( details._loadedObject )
_objectsLoaded++;
@@ -1094,12 +934,50 @@ namespace mongo {
return;
}
- addSpecific( node , d, newDoc );
- _found++;
+ ////
+ // Exact check with particular data fields
+ ////
+ // Can add multiple points
+ int diff = addSpecific( node , keyP, keyOk == BORDER, keyD, newDoc );
+ if( diff > 0 ) _found += diff;
+ else _found -= -diff;
+
+ }
+
+ virtual void getPointsFor( const BSONObj& key, const BSONObj& obj, vector< BSONObj >& locsForNode, bool allPoints = false ){
+
+ // Find all the location objects from the keys
+ vector< BSONObj > locs;
+ _g->getKeys( obj, allPoints ? locsForNode : locs );
+ _pointsLoaded++;
+
+ if( allPoints ) return;
+ if( locs.size() == 1 ){
+ locsForNode.push_back( locs[0] );
+ return;
+ }
+
+ // Find the particular location we want
+ GeoHash keyHash( key.firstElement(), _g->_bits );
+
+ // log() << "Hash: " << node.key << " and " << keyHash.getHash() << " unique " << _uniqueDocs << endl;
+ for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
+
+ // Ignore all locations not hashed to the key's hash, since we may see
+ // those later
+ if( _g->_hash( *i ) != keyHash ) continue;
+
+ locsForNode.push_back( *i );
+
+ }
+
}
- virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) = 0;
- virtual bool checkDistance( const GeoKeyNode& node , double& d ) = 0;
+ virtual int addSpecific( const GeoKeyNode& node, const Point& p , bool inBounds, double d, bool newDoc ) = 0;
+ virtual KeyResult approxKeyCheck( const Point& p , double& keyD ) = 0;
+ virtual bool exactDocCheck( const Point& p , double& d ) = 0;
+ virtual bool expensiveExactCheck(){ return false; }
+
long long found() const {
return _found;
@@ -1109,9 +987,16 @@ namespace mongo {
map<DiskLoc, bool> _matched;
shared_ptr<CoveredIndexMatcher> _matcher;
+ long long _keysChecked;
long long _lookedAt;
+ long long _matchesPerfd;
long long _objectsLoaded;
+ long long _pointsLoaded;
long long _found;
+
+ bool _uniqueDocs;
+ bool _needDistance;
+
};
struct BtreeLocation {
@@ -1264,8 +1149,8 @@ namespace mongo {
DONE
} _state;
- GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj() )
- : GeoCursorBase( g ), GeoAccumulator( g , filter ) ,
+ GeoBrowse( const Geo2dType * g , string type , BSONObj filter = BSONObj(), bool uniqueDocs = true, bool needDistance = false )
+ : GeoCursorBase( g ), GeoAccumulator( g , filter, uniqueDocs, needDistance ) ,
_type( type ) , _filter( filter ) , _firstCall(true), _nscanned(), _centerPrefix(0, 0, 0) {
// Set up the initial expand state
@@ -1350,11 +1235,9 @@ namespace mongo {
virtual void fillStack( int maxToCheck, int maxToAdd = -1, bool onlyExpand = false ) {
#ifdef GEODEBUGGING
-
- int s = _state;
log() << "Filling stack with maximum of " << maxToCheck << ", state : " << (int) _state << endl;
-
#endif
+
if( maxToAdd < 0 ) maxToAdd = maxToCheck;
int maxFound = _foundInExp + maxToCheck;
assert( maxToCheck > 0 );
@@ -1395,7 +1278,6 @@ namespace mongo {
while ( true ) {
GEODEBUG( "box prefix [" << _prefix << "]" );
-
#ifdef GEODEBUGGING
if( _prefix.constrains() ) {
log() << "current expand box : " << Box( _g, _prefix ).toString() << endl;
@@ -1407,6 +1289,9 @@ namespace mongo {
GEODEBUG( "expanding box points... ");
+ // Record the prefix we're actively exploring...
+ _expPrefix.reset( new GeoHash( _prefix ) );
+
// Find points inside this prefix
while ( _min.hasPrefix( _prefix ) && _min.advance( -1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded );
while ( _max.hasPrefix( _prefix ) && _max.advance( 1 , _foundInExp , this ) && _foundInExp < maxFound && _found < maxAdded );
@@ -1419,7 +1304,7 @@ namespace mongo {
#endif
- GEODEBUG( "finished expand, found : " << ( maxToCheck - ( maxFound - _found ) ) );
+ GEODEBUG( "finished expand, found : " << ( maxToAdd - ( maxAdded - _found ) ) );
if( _foundInExp >= maxFound || _found >= maxAdded ) return;
// We've searched this prefix fully, remember
@@ -1429,6 +1314,7 @@ namespace mongo {
if ( ! _prefix.constrains() ) {
GEODEBUG( "box exhausted" );
_state = DONE;
+ notePrefix();
return;
}
@@ -1453,8 +1339,9 @@ namespace mongo {
break;
}
- }
+ notePrefix();
+ }
// If we doeighbors
if( onlyExpand ) return;
@@ -1495,7 +1382,7 @@ namespace mongo {
GeoHash _neighborPrefix = _centerPrefix;
_neighborPrefix.move( i, j );
- GEODEBUG( "moving to " << i << " , " << j );
+ GEODEBUG( "moving to " << i << " , " << j << " fringe : " << _fringe.size() );
PREFIXDEBUG( _centerPrefix, _g );
PREFIXDEBUG( _neighborPrefix , _g );
while( _fringe.size() > 0 ) {
@@ -1542,7 +1429,7 @@ namespace mongo {
// be entirely done. Max recurse depth is < 8 * 16.
// If we're maxed out on points, return
- if( _foundInExp >= maxFound ) {
+ if( _foundInExp >= maxFound || _found >= maxAdded ) {
// Make sure we'll come back to add more points
assert( _state == DOING_EXPAND );
return;
@@ -1571,14 +1458,63 @@ namespace mongo {
// The amount the current box overlaps our search area
virtual double intersectsBox( Box& cur ) = 0;
- virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) {
+ virtual int addSpecific( const GeoKeyNode& node , const Point& keyP , bool onBounds , double keyD , bool newDoc ) {
- if( ! newDoc ) return;
+ int found = 0;
- if ( _cur.isEmpty() )
- _cur = GeoPoint( node );
- else
- _stack.push_back( GeoPoint( node ) );
+ // We need to handle every possible point in this method, even those not in the key value, to
+ // avoid us tracking which hashes we've already seen.
+ if( ! newDoc ){
+ // log() << "Already handled doc!" << endl;
+ return 0;
+ }
+
+ if( _uniqueDocs && ! onBounds ) {
+ // log() << "Added ind to " << _type << endl;
+ _stack.push_front( GeoPoint( node ) );
+ found++;
+ }
+ else {
+ // We now handle every possible point in the document, even those not in the key value,
+ // since we're iterating through them anyway - prevents us from having to save the hashes
+ // we've seen per-doc
+
+ // If we're filtering by hash, get the original
+ bool expensiveExact = expensiveExactCheck();
+
+ vector< BSONObj > locs;
+ getPointsFor( node._key, node.recordLoc.obj(), locs, true );
+ for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ){
+
+ double d = -1;
+ Point p( *i );
+
+ // We can avoid exact document checks by redoing approx checks,
+ // if the exact checks are more expensive.
+ bool needExact = true;
+ if( expensiveExact ){
+ assert( false );
+ KeyResult result = approxKeyCheck( p, d );
+ if( result == BAD ) continue;
+ else if( result == GOOD ) needExact = false;
+ }
+
+ if( ! needExact || exactDocCheck( p, d ) ){
+ // log() << "Added mult to " << _type << endl;
+ _stack.push_front( GeoPoint( node ) );
+ found++;
+ // If returning unique, just exit after first point is added
+ if( _uniqueDocs ) break;
+ }
+ }
+ }
+
+ if ( _cur.isEmpty() && _stack.size() > 0 ){
+ _cur = _stack.front();
+ _stack.pop_front();
+ }
+
+ return found;
}
virtual long long nscanned() {
@@ -1588,6 +1524,35 @@ namespace mongo {
return _nscanned;
}
+ virtual void explainDetails( BSONObjBuilder& b ){
+ b << "keysChecked" << _keysChecked;
+ b << "lookedAt" << _lookedAt;
+ b << "matchesPerfd" << _matchesPerfd;
+ b << "objectsLoaded" << _objectsLoaded;
+ b << "pointsLoaded" << _pointsLoaded;
+ }
+
+ virtual BSONObj prettyIndexBounds() const {
+
+ vector<GeoHash>::const_iterator i = _expPrefixes.end();
+ if( _expPrefixes.size() > 0 && *(--i) != *( _expPrefix.get() ) )
+ _expPrefixes.push_back( *( _expPrefix.get() ) );
+
+ BSONObjBuilder bob;
+ BSONArrayBuilder bab;
+ for( i = _expPrefixes.begin(); i != _expPrefixes.end(); ++i ){
+ bab << Box( _g, *i ).toBSON();
+ }
+ bob << _g->_geo << bab.arr();
+
+ return bob.obj();
+
+ }
+
+ void notePrefix() {
+ _expPrefixes.push_back( _prefix );
+ }
+
string _type;
BSONObj _filter;
list<GeoPoint> _stack;
@@ -1616,6 +1581,9 @@ namespace mongo {
BtreeLocation _min;
BtreeLocation _max;
+ shared_ptr<GeoHash> _expPrefix;
+ mutable vector<GeoHash> _expPrefixes;
+
};
@@ -1623,133 +1591,148 @@ namespace mongo {
public:
typedef multiset<GeoPoint> Holder;
- GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN )
- : GeoBrowse( g, "search", filter ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0)
+ GeoHopper( const Geo2dType * g , unsigned max , const Point& n , const BSONObj& filter = BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = true )
+ : GeoBrowse( g, "search", filter, uniqueDocs, needDistance ), _max( max ) , _near( n ), _maxDistance( maxDistance ), _type( type ), _distError( type == GEO_PLAIN ? g->_error : g->_errorSphere ), _farthest(0)
{}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
// Always check approximate distance, since it lets us avoid doing
// checks of the rest of the object if it succeeds
- // TODO: Refactor so that we can check exact distance and within if we are going to
- // anyway.
- d = approxDistance( node );
- assert( d >= 0 );
- // Out of the error range, see how close we are to the furthest points
- bool good = d <= _maxDistance + 2 * _distError /* In error range */
- && ( _points.size() < _max /* need more points */
- || d <= farthest() + 2 * _distError /* could be closer than previous points */ );
+ switch (_type) {
+ case GEO_PLAIN:
+ d = _near.distance( p );
+ break;
+ case GEO_SPHERE:
+ checkEarthBounds( p );
+ d = spheredist_deg( _near, p );
+ break;
+ default: assert( false );
+ }
+ assert( d >= 0 );
GEODEBUG( "\t\t\t\t\t\t\t checkDistance " << _near.toString()
- << "\t" << GeoHash( node.key.firstElement() ) << "\t" << d
- << " ok: " << good << " farthest: " << farthest() );
+ << "\t" << p.toString() << "\t" << d
+ << " farthest: " << farthest() );
- return good;
- }
+ // If we need more points
+ double borderDist = ( _points.size() < _max ? _maxDistance : farthest() );
+
+ if( d >= borderDist - 2 * _distError && d <= borderDist + 2 * _distError ) return BORDER;
+ else return d < borderDist ? GOOD : BAD;
- double approxDistance( const GeoKeyNode& node ) {
- return approxDistance( GeoHash( node._key.firstElement() ) );
}
- double approxDistance( const GeoHash& h ) {
+ virtual bool exactDocCheck( const Point& p, double& d ){
- double approxDistance = -1;
- Point p( _g, h );
- switch (_type) {
+ bool within = false;
+
+ // Get the appropriate distance for the type
+ switch ( _type ) {
case GEO_PLAIN:
- approxDistance = _near.distance( p );
+ d = _near.distance( p );
+ within = _near.distanceWithin( p, _maxDistance );
break;
case GEO_SPHERE:
checkEarthBounds( p );
- approxDistance = spheredist_deg( _near, p );
+ d = spheredist_deg( _near, p );
+ within = ( d <= _maxDistance );
break;
default: assert( false );
}
- return approxDistance;
+ return within;
}
- double exactDistances( const GeoKeyNode& node ) {
-
- GEODEBUG( "Finding exact distance for " << node.key.toString() << " and " << node.recordLoc.obj().toString() );
-
- // Find all the location objects from the keys
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
+ // Always in distance units, whether radians or normal
+ double farthest() const {
+ return _farthest;
+ }
- double maxDistance = -1;
+ virtual int addSpecific( const GeoKeyNode& node, const Point& keyP, bool onBounds, double keyD, bool newDoc ) {
- // Find the particular location we want
- BSONObj loc;
- GeoHash keyHash( node._key.firstElement(), _g->_bits );
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
+ // Unique documents
- loc = *i;
+ GeoPoint newPoint( node, keyD, false );
- // Ignore all locations not hashed to the key's hash, since we may see
- // those later
- if( _g->_hash( loc ) != keyHash ) continue;
+ int prevSize = _points.size();
- double exactDistance = -1;
- bool exactWithin = false;
+ // STEP 1 : Remove old duplicate points from the set if needed
+ if( _uniqueDocs ){
- Point p( loc );
+ // Lookup old point with same doc
+ map< DiskLoc , Holder::iterator >::iterator oldPointIt = _seenPts.find( newPoint.loc() );
- // Get the appropriate distance for the type
- switch ( _type ) {
- case GEO_PLAIN:
- exactDistance = _near.distance( p );
- exactWithin = _near.distanceWithin( p, _maxDistance );
- break;
- case GEO_SPHERE:
- checkEarthBounds( p );
- exactDistance = spheredist_deg( _near, p );
- exactWithin = ( exactDistance <= _maxDistance );
- break;
- default: assert( false );
+ if( oldPointIt != _seenPts.end() ){
+ const GeoPoint& oldPoint = *(oldPointIt->second);
+ // We don't need to care if we've already seen this same approx pt or better,
+ // or we've already gone to disk once for the point
+ if( oldPoint < newPoint ){
+ GEODEBUG( "\t\tOld point closer than new point" );
+ return 0;
+ }
+ GEODEBUG( "\t\tErasing old point " << oldPointIt->first.obj() );
+ _points.erase( oldPointIt->second );
}
+ }
- assert( exactDistance >= 0 );
- if( !exactWithin ) continue;
+ Holder::iterator newIt = _points.insert( newPoint );
+ if( _uniqueDocs ) _seenPts[ newPoint.loc() ] = newIt;
- GEODEBUG( "Inserting exact point: " << GeoPoint( node , exactDistance, exactWithin ).toString() );
+ GEODEBUG( "\t\tInserted new point " << newPoint.toString() << " approx : " << keyD );
- // Add a point for this location
- _points.insert( GeoPoint( node , exactDistance, exactWithin ) );
+ assert( _max > 0 );
- if( exactDistance > maxDistance ) maxDistance = exactDistance;
- }
+ Holder::iterator lastPtIt = _points.end();
+ lastPtIt--;
+ _farthest = lastPtIt->distance() + 2 * _distError;
- return maxDistance;
+ return _points.size() - prevSize;
}
- // Always in distance units, whether radians or normal
- double farthest() const {
- return _farthest;
- }
+ // Removes extra points from end of _points set.
+ // Check can be a bit costly if we have lots of exact points near borders,
+ // so we'll do this every once and awhile.
+ void processExtraPoints(){
- bool inErrorBounds( double approxD ) const {
- return approxD >= _maxDistance - _distError && approxD <= _maxDistance + _distError;
- }
+ if( _points.size() == 0 ) return;
- virtual void addSpecific( const GeoKeyNode& node , double d, bool newDoc ) {
+ int prevSize = _points.size();
- GEODEBUG( "\t\t" << GeoHash( node.key.firstElement() ) << "\t" << node.recordLoc.obj() << "\t" << d );
+ // Erase all points from the set with a position >= _max *and*
+ // whose distance isn't close to the _max - 1 position distance
- double maxDistance = exactDistances( node );
- if( maxDistance >= 0 ){
+ int numToErase = _points.size() - _max;
+ if( numToErase < 0 ) numToErase = 0;
- // Recalculate the current furthest point.
- int numToErase = _points.size() - _max;
- while( numToErase-- > 0 ){
- _points.erase( --_points.end() );
- }
+ // Get the first point definitely in the _points array
+ Holder::iterator startErase = _points.end();
+ for( int i = 0; i < numToErase + 1; i++ ) startErase--;
+ _farthest = startErase->distance() + 2 * _distError;
- _farthest = boost::next( _points.end(), -1 )->_exactDistance;
+ GEODEBUG( "\t\tPotentially erasing " << numToErase << " points, " << " size : " << _points.size() << " max : " << _max << " dist : " << startErase->distance() << " farthest dist : " << _farthest << " from error : " << _distError );
+ startErase++;
+ while( numToErase > 0 && startErase->distance() <= _farthest ){
+ GEODEBUG( "\t\tNot erasing point " << startErase->toString() );
+ numToErase--;
+ startErase++;
+ assert( startErase != _points.end() || numToErase == 0 );
}
+
+ if( _uniqueDocs ){
+ for( Holder::iterator i = startErase; i != _points.end(); ++i )
+ _seenPts.erase( i->loc() );
+ }
+
+ _points.erase( startErase, _points.end() );
+
+ int diff = _points.size() - prevSize;
+ if( diff > 0 ) _found += diff;
+ else _found -= -diff;
+
}
unsigned _max;
@@ -1760,17 +1743,20 @@ namespace mongo {
double _distError;
double _farthest;
+ map< DiskLoc , Holder::iterator > _seenPts;
+
};
class GeoSearch : public GeoHopper {
public:
- GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN )
- : GeoHopper( g , numWanted , startPt , filter , maxDistance, type ),
+ GeoSearch( const Geo2dType * g , const Point& startPt , int numWanted=100 , BSONObj filter=BSONObj() , double maxDistance = numeric_limits<double>::max() , GeoDistType type=GEO_PLAIN, bool uniqueDocs = false, bool needDistance = false )
+ : GeoHopper( g , numWanted , startPt , filter , maxDistance, type, uniqueDocs, needDistance ),
_start( g->hash( startPt._x, startPt._y ) ),
- _numWanted( numWanted ),
- _type(type)
+ // TODO: Remove numWanted...
+ _numWanted( numWanted ),
+ _type(type)
{
assert( g->getDetails() );
@@ -1795,6 +1781,8 @@ namespace mongo {
void exec() {
+ if( _numWanted == 0 ) return;
+
/*
* Search algorithm
* 1) use geohash prefix to find X items
@@ -1805,7 +1793,7 @@ namespace mongo {
#ifdef GEODEBUGGING
- log() << "start near search for points near " << _near << " (max dist " << _maxDistance << ")" << endl;
+ log() << "start near search for " << _numWanted << " points near " << _near << " (max dist " << _maxDistance << ")" << endl;
#endif
@@ -1815,13 +1803,16 @@ namespace mongo {
long long f = found();
assert( f <= 0x7fffffff );
fillStack( maxPointsHeuristic, _numWanted - static_cast<int>(f) , true );
+ processExtraPoints();
} while( _state != DONE && _state != DONE_NEIGHBOR &&
found() < _numWanted &&
(! _prefix.constrains() || _g->sizeEdge( _prefix ) <= _scanDistance ) );
// If we couldn't scan or scanned everything, we're done
- if( _state == DONE ) return;
-
+ if( _state == DONE ){
+ expandEndPoints();
+ return;
+ }
}
#ifdef GEODEBUGGING
@@ -1856,6 +1847,8 @@ namespace mongo {
_want = Box( _near._x - farDist , _near._y - farDist , farDist * 2 );
GEODEBUGPRINT( _want.toString() );
+ // log() << "Found : " << found() << " wanted : " << _numWanted << " Far distance : " << farDist << " box : " << _want << endl;
+
// Remember the far distance for further scans
_scanDistance = farDist;
@@ -1874,15 +1867,195 @@ namespace mongo {
// Do regular search in the full region
do {
fillStack( maxPointsHeuristic );
+ processExtraPoints();
}
while( _state != DONE );
}
- GEODEBUG( "done near search" )
+ GEODEBUG( "done near search with " << _points.size() << " points " );
+
+ expandEndPoints();
}
+ void addExactPoints( const GeoPoint& pt, Holder& points, bool force ){
+ int before, after;
+ addExactPoints( pt, points, before, after, force );
+ }
+
+ void addExactPoints( const GeoPoint& pt, Holder& points, int& before, int& after, bool force ){
+
+ before = 0;
+ after = 0;
+
+ GEODEBUG( "Adding exact points for " << pt.toString() );
+
+ if( pt.isExact() ){
+ if( force ) points.insert( pt );
+ return;
+ }
+
+ vector<BSONObj> locs;
+ getPointsFor( pt.key(), pt.obj(), locs, _uniqueDocs );
+
+ GeoPoint nearestPt( pt, -1, true );
+
+ for( vector<BSONObj>::iterator i = locs.begin(); i != locs.end(); i++ ){
+
+ Point loc( *i );
+
+ double d;
+ if( ! exactDocCheck( loc, d ) ) continue;
+
+ if( _uniqueDocs && ( nearestPt.distance() < 0 || d < nearestPt.distance() ) ){
+ nearestPt._distance = d;
+ nearestPt._pt = *i;
+ continue;
+ }
+ else if( ! _uniqueDocs ){
+ GeoPoint exactPt( pt, d, true );
+ exactPt._pt = *i;
+ GEODEBUG( "Inserting exact pt " << exactPt.toString() << " for " << pt.toString() << " exact : " << d << " is less? " << ( exactPt < pt ) << " bits : " << _g->_bits );
+ points.insert( exactPt );
+ exactPt < pt ? before++ : after++;
+ }
+
+ }
+
+ if( _uniqueDocs && nearestPt.distance() >= 0 ){
+ GEODEBUG( "Inserting unique exact pt " << nearestPt.toString() << " for " << pt.toString() << " exact : " << nearestPt.distance() << " is less? " << ( nearestPt < pt ) << " bits : " << _g->_bits );
+ points.insert( nearestPt );
+ if( nearestPt < pt ) before++;
+ else after++;
+ }
+
+ }
+
+ // TODO: Refactor this back into holder class, allow to run periodically when we are seeing a lot of pts
+ void expandEndPoints( bool finish = true ){
+
+ processExtraPoints();
+
+ // All points in array *could* be in maxDistance
+
+ // Step 1 : Trim points to max size
+ // TODO: This check will do little for now, but is skeleton for future work in incremental $near
+ // searches
+ if( _max > 0 ){
+
+ int numToErase = _points.size() - _max;
+
+ if( numToErase > 0 ){
+
+ Holder tested;
+
+ // Work backward through all points we're not sure belong in the set
+ Holder::iterator maybePointIt = _points.end();
+ maybePointIt--;
+ double approxMin = maybePointIt->distance() - 2 * _distError;
+
+ GEODEBUG( "\t\tNeed to erase " << numToErase << " max : " << _max << " min dist " << approxMin << " error : " << _distError << " starting from : " << (*maybePointIt).toString() );
+
+ // Insert all
+ int erased = 0;
+ while( _points.size() > 0 && ( maybePointIt->distance() >= approxMin || erased < numToErase ) ){
+
+ Holder::iterator current = maybePointIt--;
+
+ addExactPoints( *current, tested, true );
+ _points.erase( current );
+ erased++;
+
+ if( tested.size() )
+ approxMin = tested.begin()->distance() - 2 * _distError;
+
+ }
+
+ GEODEBUG( "\t\tEnding search at point " << ( _points.size() == 0 ? "(beginning)" : maybePointIt->toString() ) );
+
+ int numToAddBack = erased - numToErase;
+ assert( numToAddBack >= 0 );
+
+ GEODEBUG( "\t\tNum tested valid : " << tested.size() << " erased : " << erased << " added back : " << numToAddBack );
+
+#ifdef GEODEBUGGING
+ for( Holder::iterator it = tested.begin(); it != tested.end(); it++ ){
+ log() << "Tested Point: " << *it << endl;
+ }
+#endif
+ Holder::iterator testedIt = tested.begin();
+ for( int i = 0; i < numToAddBack && testedIt != tested.end(); i++ ){
+ _points.insert( *testedIt );
+ testedIt++;
+ }
+ }
+ }
+
+#ifdef GEODEBUGGING
+ for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){
+ log() << "Point: " << *it << endl;
+ }
+#endif
+ // We've now trimmed first set of unneeded points
+
+ GEODEBUG( "\t\t Start expanding, num points : " << _points.size() << " max : " << _max );
+
+ // Step 2: iterate through all points and add as needed
+
+ unsigned expandedPoints = 0;
+ Holder::iterator it = _points.begin();
+ double expandWindowEnd = -1;
+ while( it != _points.end() ){
+ const GeoPoint& currPt = *it;
+
+ // TODO: If one point is exact, maybe not 2 * _distError
+
+ // See if we're in an expand window
+ bool inWindow = currPt.distance() <= expandWindowEnd;
+ // If we're not, and we're done with points, break
+ if( ! inWindow && expandedPoints >= _max ) break;
+
+ bool expandApprox = ! currPt.isExact() && ( ! _uniqueDocs || ( finish && _needDistance ) || inWindow );
+
+ if( expandApprox ){
+
+ // Add new point(s)
+ // These will only be added in a radius of 2 * _distError around the current point,
+ // so should not affect previously valid points.
+ int before, after;
+ addExactPoints( currPt, _points, before, after, false );
+ expandedPoints += before;
+
+ if( _max > 0 && expandedPoints < _max )
+ expandWindowEnd = currPt.distance() + 2 * _distError;
+
+ // Iterate to the next point
+ Holder::iterator current = it++;
+ // Erase the current point
+ _points.erase( current );
+
+ }
+ else{
+ expandedPoints++;
+ it++;
+ }
+ }
+
+ GEODEBUG( "\t\tFinished expanding, num points : " << _points.size() << " max : " << _max );
+
+ // Finish
+ // TODO: Don't really need to trim?
+ for( ; expandedPoints > _max; expandedPoints-- ) it--;
+ _points.erase( it, _points.end() );
+
+#ifdef GEODEBUGGING
+ for( Holder::iterator it = _points.begin(); it != _points.end(); it++ ){
+ log() << "Point: " << *it << endl;
+ }
+#endif
+ }
+
virtual GeoHash expandStartHash(){
return _start;
}
@@ -1915,7 +2088,7 @@ namespace mongo {
: GeoCursorBase( s->_spec ) ,
_s( s ) , _cur( s->_points.begin() ) , _end( s->_points.end() ), _nscanned() {
if ( _cur != _end ) {
- ++_nscanned;
+ ++_nscanned;
}
}
@@ -1975,8 +2148,8 @@ namespace mongo {
class GeoCircleBrowse : public GeoBrowse {
public:
- GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center")
- : GeoBrowse( g , "circle" , filter ) {
+ GeoCircleBrowse( const Geo2dType * g , const BSONObj& circle , BSONObj filter = BSONObj() , const string& type="$center", bool uniqueDocs = true )
+ : GeoBrowse( g , "circle" , filter, uniqueDocs ) {
uassert( 13060 , "$center needs 2 fields (middle,max distance)" , circle.nFields() == 2 );
@@ -2040,19 +2213,16 @@ namespace mongo {
return cur.intersects( _bBox );
}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
-
- GeoHash h( node._key.firstElement(), _g->_bits );
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
// Inexact hash distance checks.
double error = 0;
switch (_type) {
case GEO_PLAIN:
- d = _g->distance( _start , h );
+ d = _startPt.distance( p );
error = _g->_error;
break;
case GEO_SPHERE: {
- Point p( _g, h );
checkEarthBounds( p );
d = spheredist_deg( _startPt, p );
error = _g->_errorSphere;
@@ -2062,40 +2232,25 @@ namespace mongo {
}
// If our distance is in the error bounds...
- if( d >= _maxDistance - error && d <= _maxDistance + error ) {
-
- // Do exact check
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
-
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
-
- GEODEBUG( "Inexact distance : " << d << " vs " << _maxDistance << " from " << ( *i ).toString() << " due to error " << error );
-
- Point p( *i );
- // Exact distance checks.
- switch (_type) {
- case GEO_PLAIN: {
- if( _startPt.distanceWithin( p, _maxDistance ) ) return true;
- break;
- }
- case GEO_SPHERE:
- // Ignore all locations not hashed to the key's hash, since spherical calcs are
- // more expensive.
- if( _g->_hash( *i ) != h ) break;
- checkEarthBounds( p );
- if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true;
- break;
- default: assert( false );
- }
+ if( d >= _maxDistance - error && d <= _maxDistance + error ) return BORDER;
+ return d > _maxDistance ? BAD : GOOD;
+ }
- }
+ virtual bool exactDocCheck( const Point& p, double& d ){
- return false;
+ switch (_type) {
+ case GEO_PLAIN: {
+ if( _startPt.distanceWithin( p, _maxDistance ) ) return true;
+ break;
+ }
+ case GEO_SPHERE:
+ checkEarthBounds( p );
+ if( spheredist_deg( _startPt , p ) <= _maxDistance ) return true;
+ break;
+ default: assert( false );
}
- GEODEBUG( "\t " << h << "\t" << d );
- return d <= _maxDistance;
+ return false;
}
GeoDistType _type;
@@ -2111,8 +2266,8 @@ namespace mongo {
class GeoBoxBrowse : public GeoBrowse {
public:
- GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj() )
- : GeoBrowse( g , "box" , filter ) {
+ GeoBoxBrowse( const Geo2dType * g , const BSONObj& box , BSONObj filter = BSONObj(), bool uniqueDocs = true )
+ : GeoBrowse( g , "box" , filter, uniqueDocs ) {
uassert( 13063 , "$box needs 2 fields (bottomLeft,topRight)" , box.nFields() == 2 );
@@ -2133,7 +2288,7 @@ namespace mongo {
_fudge = _g->_error;
_wantLen = _fudge +
std::max( ( _want._max._x - _want._min._x ) ,
- ( _want._max._y - _want._min._y ) );
+ ( _want._max._y - _want._min._y ) ) / 2;
ok();
}
@@ -2171,39 +2326,14 @@ namespace mongo {
return cur.intersects( _want );
}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
-
- GeoHash h( node._key.firstElement() );
- Point approxPt( _g, h );
-
- bool approxInside = _want.inside( approxPt, _fudge );
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
+ if( _want.onBoundary( p, _fudge ) ) return BORDER;
+ else return _want.inside( p, _fudge ) ? GOOD : BAD;
- if( approxInside && _want.onBoundary( approxPt, _fudge ) ) {
-
- // Do exact check
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
-
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
- if( _want.inside( Point( *i ) ) ) {
-
- GEODEBUG( "found exact point : " << _want.toString()
- << " exact point : " << Point( *i ).toString()
- << " approx point : " << approxPt.toString()
- << " because of error: " << _fudge );
-
- return true;
- }
- }
-
- return false;
- }
-
- GEODEBUG( "checking point : " << _want.toString()
- << " point: " << approxPt.toString()
- << " in : " << _want.inside( approxPt, _fudge ) );
+ }
- return approxInside;
+ virtual bool exactDocCheck( const Point& p, double& d ){
+ return _want.inside( p );
}
Box _want;
@@ -2218,7 +2348,7 @@ namespace mongo {
public:
GeoPolygonBrowse( const Geo2dType* g , const BSONObj& polyPoints ,
- BSONObj filter = BSONObj() ) : GeoBrowse( g , "polygon" , filter ) {
+ BSONObj filter = BSONObj(), bool uniqueDocs = true ) : GeoBrowse( g , "polygon" , filter, uniqueDocs ) {
GEODEBUG( "In Polygon" )
@@ -2233,7 +2363,7 @@ namespace mongo {
uassert( 14030, "polygon must be defined by three points or more", _poly.size() >= 3 );
_bounds = _poly.bounds();
- _maxDim = _bounds.maxDim();
+ _maxDim = _g->_error + _bounds.maxDim() / 2;
ok();
}
@@ -2253,51 +2383,17 @@ namespace mongo {
return cur.intersects( _bounds );
}
- virtual bool checkDistance( const GeoKeyNode& node, double& d ) {
-
- GeoHash h( node._key.firstElement(), _g->_bits );
- Point p( _g, h );
+ virtual KeyResult approxKeyCheck( const Point& p, double& d ) {
int in = _poly.contains( p, _g->_error );
- if( in != 0 ) {
-
- if ( in > 0 ) {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx in polygon" );
- }
- else {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] approx not in polygon" );
- }
-
- if( in != 0 ) return in > 0;
- }
-
- // Do exact check, since to approximate check was inconclusive
- vector< BSONObj > locs;
- _g->getKeys( node.recordLoc.obj(), locs );
-
- for( vector< BSONObj >::iterator i = locs.begin(); i != locs.end(); ++i ) {
-
- Point p( *i );
- // Ignore all points not hashed to the current value
- // This implicitly assumes hashing is less costly than the polygon check, which
- // may or may not be true.
- if( _g->hash( p ) != h ) continue;
+ if( in == 0 ) return BORDER;
+ else return in > 0 ? GOOD : BAD;
- // Use the point in polygon algorithm to see if the point
- // is contained in the polygon.
- bool in = _poly.contains( p );
- if ( in ) {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly in polygon" );
- }
- else {
- GEODEBUG( "Point: [" << p._x << ", " << p._y << "] exactly not in polygon" );
- }
- if( in ) return in;
-
- }
+ }
- return false;
+ virtual bool exactDocCheck( const Point& p, double& d ){
+ return _poly.contains( p );
}
private:
@@ -2324,7 +2420,7 @@ namespace mongo {
if ( e.type() == Array ) {
// If we get an array query, assume it is a location, and do a $within { $center : [[x, y], 0] } search
- shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ) ) );
+ shared_ptr<Cursor> c( new GeoCircleBrowse( this , BSON( "0" << e.embeddedObjectUserCheck() << "1" << 0 ), query.filterFieldsUndotted( BSON( _geo << "" ), false ), "$center", true ) );
return c;
}
else if ( e.type() == Object ) {
@@ -2364,33 +2460,44 @@ namespace mongo {
if ( e.isNumber() )
maxDistance = e.numberDouble();
}
- shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type ) );
+
+ bool uniqueDocs = false;
+ if( ! n["$uniqueDocs"].eoo() ) uniqueDocs = n["$uniqueDocs"].trueValue();
+
+ shared_ptr<GeoSearch> s( new GeoSearch( this , Point( e ) , numWanted , query , maxDistance, type, uniqueDocs ) );
s->exec();
shared_ptr<Cursor> c;
c.reset( new GeoSearchCursor( s ) );
return c;
}
case BSONObj::opWITHIN: {
+
e = e.embeddedObject().firstElement();
uassert( 13057 , "$within has to take an object or array" , e.isABSONObj() );
+
+ BSONObj context = e.embeddedObject();
e = e.embeddedObject().firstElement();
string type = e.fieldName();
+
+ bool uniqueDocs = true;
+ if( ! context["$uniqueDocs"].eoo() ) uniqueDocs = context["$uniqueDocs"].trueValue();
+
if ( startsWith(type, "$center") ) {
uassert( 13059 , "$center has to take an object or array" , e.isABSONObj() );
- shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type) );
+ shared_ptr<Cursor> c( new GeoCircleBrowse( this , e.embeddedObjectUserCheck() , query , type, uniqueDocs ) );
return c;
}
else if ( type == "$box" ) {
uassert( 13065 , "$box has to take an object or array" , e.isABSONObj() );
- shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query ) );
+ shared_ptr<Cursor> c( new GeoBoxBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) );
return c;
}
else if ( startsWith( type, "$poly" ) ) {
uassert( 14029 , "$polygon has to take an object or array" , e.isABSONObj() );
- shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query ) );
+ shared_ptr<Cursor> c( new GeoPolygonBrowse( this , e.embeddedObjectUserCheck() , query, uniqueDocs ) );
return c;
}
- throw UserException( 13058 , (string)"unknown $with type: " + type );
+ throw UserException( 13058 , (string)"unknown $within type: " + type );
}
default:
// Otherwise... assume the object defines a point, and we want to do a zero-radius $within $center
@@ -2414,7 +2521,7 @@ namespace mongo {
bool slaveOk() const { return true; }
void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; }
bool slaveOverrideOk() { return true; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string ns = dbname + "." + cmdObj.firstElement().valuestr();
NamespaceDetails * d = nsdetails( ns.c_str() );
@@ -2450,6 +2557,12 @@ namespace mongo {
assert( numWanted >= 0 );
}
+ bool uniqueDocs = false;
+ if( ! cmdObj["uniqueDocs"].eoo() ) uniqueDocs = cmdObj["uniqueDocs"].trueValue();
+
+ bool includeLocs = false;
+ if( ! cmdObj["includeLocs"].eoo() ) includeLocs = cmdObj["includeLocs"].trueValue();
+
uassert(13046, "'near' param missing/invalid", !cmdObj["near"].eoo());
const Point n( cmdObj["near"] );
result.append( "near" , g->_tohash( cmdObj["near"] ).toString() );
@@ -2466,7 +2579,7 @@ namespace mongo {
if ( cmdObj["spherical"].trueValue() )
type = GEO_SPHERE;
- GeoSearch gs( g , n , numWanted , filter , maxDistance , type );
+ GeoSearch gs( g , n , numWanted , filter , maxDistance , type, uniqueDocs, true );
if ( cmdObj["start"].type() == String) {
GeoHash start ((string) cmdObj["start"].valuestr());
@@ -2486,11 +2599,12 @@ namespace mongo {
for ( GeoHopper::Holder::iterator i=gs._points.begin(); i!=gs._points.end(); i++ ) {
const GeoPoint& p = *i;
- double dis = distanceMultiplier * p._exactDistance;
+ double dis = distanceMultiplier * p.distance();
totalDistance += dis;
BSONObjBuilder bb( arr.subobjStart( BSONObjBuilder::numStr( x++ ) ) );
bb.append( "dis" , dis );
+ if( includeLocs ) bb.append( "loc" , p._pt );
bb.append( "obj" , p._o );
bb.done();
}
@@ -2516,7 +2630,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
bool slaveOk() const { return true; }
bool slaveOverrideOk() { return true; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string ns = dbname + "." + cmdObj.firstElement().valuestr();
NamespaceDetails * d = nsdetails( ns.c_str() );
@@ -2571,4 +2685,248 @@ namespace mongo {
} geoWalkCmd;
+ struct GeoUnitTest : public UnitTest {
+
+ int round( double d ) {
+ return (int)(.5+(d*1000));
+ }
+
+#define GEOHEQ(a,b) if ( a.toString() != b ){ cout << "[" << a.toString() << "] != [" << b << "]" << endl; assert( a == GeoHash(b) ); }
+
+ void run() {
+ assert( ! GeoHash::isBitSet( 0 , 0 ) );
+ assert( ! GeoHash::isBitSet( 0 , 31 ) );
+ assert( GeoHash::isBitSet( 1 , 31 ) );
+
+ IndexSpec i( BSON( "loc" << "2d" ) );
+ Geo2dType g( &geo2dplugin , &i );
+ {
+ double x = 73.01212;
+ double y = 41.352964;
+ BSONObj in = BSON( "x" << x << "y" << y );
+ GeoHash h = g._hash( in );
+ BSONObj out = g._unhash( h );
+ assert( round(x) == round( out["x"].number() ) );
+ assert( round(y) == round( out["y"].number() ) );
+ assert( round( in["x"].number() ) == round( out["x"].number() ) );
+ assert( round( in["y"].number() ) == round( out["y"].number() ) );
+ }
+
+ {
+ double x = -73.01212;
+ double y = 41.352964;
+ BSONObj in = BSON( "x" << x << "y" << y );
+ GeoHash h = g._hash( in );
+ BSONObj out = g._unhash( h );
+ assert( round(x) == round( out["x"].number() ) );
+ assert( round(y) == round( out["y"].number() ) );
+ assert( round( in["x"].number() ) == round( out["x"].number() ) );
+ assert( round( in["y"].number() ) == round( out["y"].number() ) );
+ }
+
+ {
+ GeoHash h( "0000" );
+ h.move( 0 , 1 );
+ GEOHEQ( h , "0001" );
+ h.move( 0 , -1 );
+ GEOHEQ( h , "0000" );
+
+ h.init( "0001" );
+ h.move( 0 , 1 );
+ GEOHEQ( h , "0100" );
+ h.move( 0 , -1 );
+ GEOHEQ( h , "0001" );
+
+
+ h.init( "0000" );
+ h.move( 1 , 0 );
+ GEOHEQ( h , "0010" );
+ }
+
+ {
+ Box b( 5 , 5 , 2 );
+ assert( "(5,5) -->> (7,7)" == b.toString() );
+ }
+
+ {
+ GeoHash a = g.hash( 1 , 1 );
+ GeoHash b = g.hash( 4 , 5 );
+ assert( 5 == (int)(g.distance( a , b ) ) );
+ a = g.hash( 50 , 50 );
+ b = g.hash( 42 , 44 );
+ assert( round(10) == round(g.distance( a , b )) );
+ }
+
+ {
+ GeoHash x("0000");
+ assert( 0 == x.getHash() );
+ x.init( 0 , 1 , 32 );
+ GEOHEQ( x , "0000000000000000000000000000000000000000000000000000000000000001" )
+
+ assert( GeoHash( "1100").hasPrefix( GeoHash( "11" ) ) );
+ assert( ! GeoHash( "1000").hasPrefix( GeoHash( "11" ) ) );
+ }
+
+ {
+ GeoHash x("1010");
+ GEOHEQ( x , "1010" );
+ GeoHash y = x + "01";
+ GEOHEQ( y , "101001" );
+ }
+
+ {
+
+ GeoHash a = g.hash( 5 , 5 );
+ GeoHash b = g.hash( 5 , 7 );
+ GeoHash c = g.hash( 100 , 100 );
+ /*
+ cout << "a: " << a << endl;
+ cout << "b: " << b << endl;
+ cout << "c: " << c << endl;
+
+ cout << "a: " << a.toStringHex1() << endl;
+ cout << "b: " << b.toStringHex1() << endl;
+ cout << "c: " << c.toStringHex1() << endl;
+ */
+ BSONObj oa = a.wrap();
+ BSONObj ob = b.wrap();
+ BSONObj oc = c.wrap();
+ /*
+ cout << "a: " << oa.hexDump() << endl;
+ cout << "b: " << ob.hexDump() << endl;
+ cout << "c: " << oc.hexDump() << endl;
+ */
+ assert( oa.woCompare( ob ) < 0 );
+ assert( oa.woCompare( oc ) < 0 );
+
+ }
+
+ {
+ GeoHash x( "000000" );
+ x.move( -1 , 0 );
+ GEOHEQ( x , "101010" );
+ x.move( 1 , -1 );
+ GEOHEQ( x , "010101" );
+ x.move( 0 , 1 );
+ GEOHEQ( x , "000000" );
+ }
+
+ {
+ GeoHash prefix( "110011000000" );
+ GeoHash entry( "1100110000011100000111000001110000011100000111000001000000000000" );
+ assert( ! entry.hasPrefix( prefix ) );
+
+ entry = GeoHash("1100110000001100000111000001110000011100000111000001000000000000");
+ assert( entry.toString().find( prefix.toString() ) == 0 );
+ assert( entry.hasPrefix( GeoHash( "1100" ) ) );
+ assert( entry.hasPrefix( prefix ) );
+ }
+
+ {
+ GeoHash a = g.hash( 50 , 50 );
+ GeoHash b = g.hash( 48 , 54 );
+ assert( round( 4.47214 ) == round( g.distance( a , b ) ) );
+ }
+
+
+ {
+ Box b( Point( 29.762283 , -95.364271 ) , Point( 29.764283000000002 , -95.36227099999999 ) );
+ assert( b.inside( 29.763 , -95.363 ) );
+ assert( ! b.inside( 32.9570255 , -96.1082497 ) );
+ assert( ! b.inside( 32.9570255 , -96.1082497 , .01 ) );
+ }
+
+ {
+ GeoHash a( "11001111" );
+ assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11") ) );
+ assert( GeoHash( "11" ) == a.commonPrefix( GeoHash("11110000") ) );
+ }
+
+ {
+ int N = 10000;
+ {
+ Timer t;
+ for ( int i=0; i<N; i++ ) {
+ unsigned x = (unsigned)rand();
+ unsigned y = (unsigned)rand();
+ GeoHash h( x , y );
+ unsigned a,b;
+ h.unhash_slow( a,b );
+ assert( a == x );
+ assert( b == y );
+ }
+ //cout << "slow: " << t.millis() << endl;
+ }
+
+ {
+ Timer t;
+ for ( int i=0; i<N; i++ ) {
+ unsigned x = (unsigned)rand();
+ unsigned y = (unsigned)rand();
+ GeoHash h( x , y );
+ unsigned a,b;
+ h.unhash_fast( a,b );
+ assert( a == x );
+ assert( b == y );
+ }
+ //cout << "fast: " << t.millis() << endl;
+ }
+
+ }
+
+ {
+ // see http://en.wikipedia.org/wiki/Great-circle_distance#Worked_example
+
+ {
+ Point BNA (-86.67, 36.12);
+ Point LAX (-118.40, 33.94);
+
+ double dist1 = spheredist_deg(BNA, LAX);
+ double dist2 = spheredist_deg(LAX, BNA);
+
+ // target is 0.45306
+ assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
+ assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
+ }
+ {
+ Point BNA (-1.5127, 0.6304);
+ Point LAX (-2.0665, 0.5924);
+
+ double dist1 = spheredist_rad(BNA, LAX);
+ double dist2 = spheredist_rad(LAX, BNA);
+
+ // target is 0.45306
+ assert( 0.45305 <= dist1 && dist1 <= 0.45307 );
+ assert( 0.45305 <= dist2 && dist2 <= 0.45307 );
+ }
+ {
+ Point JFK (-73.77694444, 40.63861111 );
+ Point LAX (-118.40, 33.94);
+
+ double dist = spheredist_deg(JFK, LAX) * EARTH_RADIUS_MILES;
+ assert( dist > 2469 && dist < 2470 );
+ }
+
+ {
+ Point BNA (-86.67, 36.12);
+ Point LAX (-118.40, 33.94);
+ Point JFK (-73.77694444, 40.63861111 );
+ assert( spheredist_deg(BNA, BNA) < 1e-6);
+ assert( spheredist_deg(LAX, LAX) < 1e-6);
+ assert( spheredist_deg(JFK, JFK) < 1e-6);
+
+ Point zero (0, 0);
+ Point antizero (0,-180);
+
+ // these were known to cause NaN
+ assert( spheredist_deg(zero, zero) < 1e-6);
+ assert( fabs(M_PI-spheredist_deg(zero, antizero)) < 1e-6);
+ assert( fabs(M_PI-spheredist_deg(antizero, zero)) < 1e-6);
+ }
+ }
+ }
+ } geoUnitTest;
+
+
}
+
diff --git a/db/geo/core.h b/db/geo/core.h
index 74f4b6e8269..b77997844f2 100644
--- a/db/geo/core.h
+++ b/db/geo/core.h
@@ -278,14 +278,19 @@ namespace mongo {
return *this;
}
- bool operator==(const GeoHash& h ) {
+ bool operator==(const GeoHash& h ) const {
return _hash == h._hash && _bits == h._bits;
}
- bool operator!=(const GeoHash& h ) {
+ bool operator!=(const GeoHash& h ) const {
return !( *this == h );
}
+ bool operator<(const GeoHash& h ) const {
+ if( _hash != h._hash ) return _hash < h._hash;
+ return _bits < h._bits;
+ }
+
GeoHash& operator+=( const char * s ) {
unsigned pos = _bits * 2;
_bits += strlen(s) / 2;
diff --git a/db/geo/haystack.cpp b/db/geo/haystack.cpp
index fd6b2392d6a..a5dd478f625 100644
--- a/db/geo/haystack.cpp
+++ b/db/geo/haystack.cpp
@@ -264,7 +264,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
bool slaveOk() const { return true; }
bool slaveOverrideOk() const { return true; }
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
string ns = dbname + "." + cmdObj.firstElement().valuestr();
diff --git a/db/index.cpp b/db/index.cpp
index 8aebef45e8e..67a0d44e444 100644
--- a/db/index.cpp
+++ b/db/index.cpp
@@ -27,11 +27,6 @@
namespace mongo {
- /** old (<= v1.8) : 0
- 1 is new version
- */
- const int DefaultIndexVersionNumber = 1;
-
template< class V >
class IndexInterfaceImpl : public IndexInterface {
public:
diff --git a/db/index.h b/db/index.h
index debe2aa9c26..54b06394435 100644
--- a/db/index.h
+++ b/db/index.h
@@ -150,14 +150,18 @@ namespace mongo {
return io.getStringField("ns");
}
- int version() const {
- BSONElement e = info.obj()["v"];
+ static int versionForIndexObj( const BSONObj &obj ) {
+ BSONElement e = obj["v"];
if( e.type() == NumberInt )
return e._numberInt();
// should normally be an int. this is for backward compatibility
int v = e.numberInt();
uassert(14802, "index v field should be Integer type", v == 0);
- return v;
+ return v;
+ }
+
+ int version() const {
+ return versionForIndexObj( info.obj() );
}
/** @return true if index has unique constraint */
diff --git a/db/indexkey.cpp b/db/indexkey.cpp
index cc2cd43daf5..6d6fcc58cae 100644
--- a/db/indexkey.cpp
+++ b/db/indexkey.cpp
@@ -22,9 +22,15 @@
#include "btree.h"
#include "ops/query.h"
#include "background.h"
+#include "../util/text.h"
namespace mongo {
+ /** old (<= v1.8) : 0
+ 1 is new version
+ */
+ const int DefaultIndexVersionNumber = 1;
+
map<string,IndexPlugin*> * IndexPlugin::_plugins;
IndexType::IndexType( const IndexPlugin * plugin , const IndexSpec * spec )
@@ -100,6 +106,14 @@ namespace mongo {
}
{
+ // _undefinedElt
+ BSONObjBuilder b;
+ b.appendUndefined( "" );
+ _undefinedObj = b.obj();
+ _undefinedElt = _undefinedObj.firstElement();
+ }
+
+ {
// handle plugins
string pluginName = IndexPlugin::findPluginName( keyPattern );
if ( pluginName.size() ) {
@@ -116,131 +130,289 @@ namespace mongo {
_finishedInit = true;
}
-
- void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
- if ( _indexType.get() ) { //plugin (eg geo)
- _indexType->getKeys( obj , keys );
- return;
- }
- vector<const char*> fieldNames( _fieldNames );
- vector<BSONElement> fixed( _fixed );
- _getKeys( fieldNames , fixed , obj, keys );
- if ( keys.empty() && ! _sparse )
- keys.insert( _nullKey );
+ void assertParallelArrays( const char *first, const char *second ) {
+ stringstream ss;
+ ss << "cannot index parallel arrays [" << first << "] [" << second << "]";
+ uasserted( 10088 , ss.str() );
}
-
- void IndexSpec::_getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const {
- BSONElement arrElt;
- unsigned arrIdx = ~0;
- int numNotFound = 0;
-
- for( unsigned i = 0; i < fieldNames.size(); ++i ) {
- if ( *fieldNames[ i ] == '\0' )
- continue;
-
- BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] );
-
- if ( e.eoo() ) {
- e = _nullElt; // no matching field
- numNotFound++;
+
+ class KeyGeneratorV0 {
+ public:
+ KeyGeneratorV0( const IndexSpec &spec ) : _spec( spec ) {}
+
+ void getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
+ if ( _spec._indexType.get() ) { //plugin (eg geo)
+ _spec._indexType->getKeys( obj , keys );
+ return;
}
-
- if ( e.type() != Array )
- fieldNames[ i ] = ""; // no matching field or non-array match
-
- if ( *fieldNames[ i ] == '\0' )
- fixed[ i ] = e; // no need for further object expansion (though array expansion still possible)
-
- if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here
- arrIdx = i;
- arrElt = e;
+ vector<const char*> fieldNames( _spec._fieldNames );
+ vector<BSONElement> fixed( _spec._fixed );
+ _getKeys( fieldNames , fixed , obj, keys );
+ if ( keys.empty() && ! _spec._sparse )
+ keys.insert( _spec._nullKey );
+ }
+
+ private:
+ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const {
+ BSONElement arrElt;
+ unsigned arrIdx = ~0;
+ int numNotFound = 0;
+
+ for( unsigned i = 0; i < fieldNames.size(); ++i ) {
+ if ( *fieldNames[ i ] == '\0' )
+ continue;
+
+ BSONElement e = obj.getFieldDottedOrArray( fieldNames[ i ] );
+
+ if ( e.eoo() ) {
+ e = _spec._nullElt; // no matching field
+ numNotFound++;
+ }
+
+ if ( e.type() != Array )
+ fieldNames[ i ] = ""; // no matching field or non-array match
+
+ if ( *fieldNames[ i ] == '\0' )
+ fixed[ i ] = e; // no need for further object expansion (though array expansion still possible)
+
+ if ( e.type() == Array && arrElt.eoo() ) { // we only expand arrays on a single path -- track the path here
+ arrIdx = i;
+ arrElt = e;
+ }
+
+ // enforce single array path here
+ if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) {
+ assertParallelArrays( e.fieldName(), arrElt.fieldName() );
+ }
}
-
- // enforce single array path here
- if ( e.type() == Array && e.rawdata() != arrElt.rawdata() ) {
- stringstream ss;
- ss << "cannot index parallel arrays [" << e.fieldName() << "] [" << arrElt.fieldName() << "]";
- uasserted( 10088 , ss.str() );
+
+ bool allFound = true; // have we found elements for all field names in the key spec?
+ for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) {
+ if ( **i != '\0' ) {
+ allFound = false;
+ break;
+ }
}
- }
-
- bool allFound = true; // have we found elements for all field names in the key spec?
- for( vector<const char*>::const_iterator i = fieldNames.begin(); i != fieldNames.end(); ++i ) {
- if ( **i != '\0' ) {
- allFound = false;
- break;
+
+ if ( _spec._sparse && numNotFound == _spec._nFields ) {
+ // we didn't find any fields
+ // so we're not going to index this document
+ return;
}
- }
-
- if ( _sparse && numNotFound == _nFields ) {
- // we didn't find any fields
- // so we're not going to index this document
- return;
- }
-
- bool insertArrayNull = false;
-
- if ( allFound ) {
- if ( arrElt.eoo() ) {
- // no terminal array element to expand
- BSONObjBuilder b(_sizeTracker);
- for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i )
- b.appendAs( *i, "" );
- keys.insert( b.obj() );
+
+ bool insertArrayNull = false;
+
+ if ( allFound ) {
+ if ( arrElt.eoo() ) {
+ // no terminal array element to expand
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i )
+ b.appendAs( *i, "" );
+ keys.insert( b.obj() );
+ }
+ else {
+ // terminal array element to expand, so generate all keys
+ BSONObjIterator i( arrElt.embeddedObject() );
+ if ( i.more() ) {
+ while( i.more() ) {
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( unsigned j = 0; j < fixed.size(); ++j ) {
+ if ( j == arrIdx )
+ b.appendAs( i.next(), "" );
+ else
+ b.appendAs( fixed[ j ], "" );
+ }
+ keys.insert( b.obj() );
+ }
+ }
+ else if ( fixed.size() > 1 ) {
+ insertArrayNull = true;
+ }
+ }
}
else {
- // terminal array element to expand, so generate all keys
+ // nonterminal array element to expand, so recurse
+ assert( !arrElt.eoo() );
BSONObjIterator i( arrElt.embeddedObject() );
if ( i.more() ) {
while( i.more() ) {
- BSONObjBuilder b(_sizeTracker);
- for( unsigned j = 0; j < fixed.size(); ++j ) {
- if ( j == arrIdx )
- b.appendAs( i.next(), "" );
- else
- b.appendAs( fixed[ j ], "" );
+ BSONElement e = i.next();
+ if ( e.type() == Object ) {
+ _getKeys( fieldNames, fixed, e.embeddedObject(), keys );
}
- keys.insert( b.obj() );
}
}
- else if ( fixed.size() > 1 ) {
+ else {
insertArrayNull = true;
}
}
- }
- else {
- // nonterminal array element to expand, so recurse
- assert( !arrElt.eoo() );
- BSONObjIterator i( arrElt.embeddedObject() );
- if ( i.more() ) {
- while( i.more() ) {
- BSONElement e = i.next();
- if ( e.type() == Object ) {
- _getKeys( fieldNames, fixed, e.embeddedObject(), keys );
+
+ if ( insertArrayNull ) {
+ // x : [] - need to insert undefined
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( unsigned j = 0; j < fixed.size(); ++j ) {
+ if ( j == arrIdx ) {
+ b.appendUndefined( "" );
+ }
+ else {
+ BSONElement e = fixed[j];
+ if ( e.eoo() )
+ b.appendNull( "" );
+ else
+ b.appendAs( e , "" );
}
}
+ keys.insert( b.obj() );
}
- else {
- insertArrayNull = true;
+ }
+
+ const IndexSpec &_spec;
+ };
+
+ class KeyGeneratorV1 {
+ public:
+ KeyGeneratorV1( const IndexSpec &spec ) : _spec( spec ) {}
+
+ void getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
+ if ( _spec._indexType.get() ) { //plugin (eg geo)
+ _spec._indexType->getKeys( obj , keys );
+ return;
+ }
+ vector<const char*> fieldNames( _spec._fieldNames );
+ vector<BSONElement> fixed( _spec._fixed );
+ _getKeys( fieldNames , fixed , obj, keys );
+ if ( keys.empty() && ! _spec._sparse )
+ keys.insert( _spec._nullKey );
+ }
+
+ private:
+ /**
+ * @param arrayNestedArray - set if the returned element is an array nested directly within arr.
+ */
+ BSONElement extractNextElement( const BSONObj &obj, const BSONObj &arr, const char *&field, bool &arrayNestedArray ) const {
+ string firstField = mongoutils::str::before( field, '.' );
+ bool haveObjField = !obj.getField( firstField ).eoo();
+ BSONElement arrField = arr.getField( firstField );
+ bool haveArrField = !arrField.eoo();
+
+ // An index component field name cannot exist in both a document array and one of that array's children.
+ uassert( 15855 , "Parallel references while expanding indexed field in array", !haveObjField || !haveArrField );
+
+ arrayNestedArray = false;
+ if ( haveObjField ) {
+ return obj.getFieldDottedOrArray( field );
+ }
+ else if ( haveArrField ) {
+ if ( arrField.type() == Array ) {
+ arrayNestedArray = true;
+ }
+ return arr.getFieldDottedOrArray( field );
}
+ return BSONElement();
}
-
- if ( insertArrayNull ) {
- // x : [] - need to insert undefined
- BSONObjBuilder b(_sizeTracker);
- for( unsigned j = 0; j < fixed.size(); ++j ) {
- if ( j == arrIdx ) {
- b.appendUndefined( "" );
+
+ void _getKeysArrEltFixed( vector<const char*> &fieldNames , vector<BSONElement> &fixed , const BSONElement &arrEntry, BSONObjSet &keys, int numNotFound, const BSONElement &arrObjElt, const set< unsigned > &arrIdxs, bool mayExpandArrayUnembedded ) const {
+ // set up any terminal array values
+ for( set<unsigned>::const_iterator j = arrIdxs.begin(); j != arrIdxs.end(); ++j ) {
+ if ( *fieldNames[ *j ] == '\0' ) {
+ fixed[ *j ] = mayExpandArrayUnembedded ? arrEntry : arrObjElt;
+ }
+ }
+ // recurse
+ _getKeys( fieldNames, fixed, ( arrEntry.type() == Object ) ? arrEntry.embeddedObject() : BSONObj(), keys, numNotFound, arrObjElt.embeddedObject() );
+ }
+
+ /**
+ * @param fieldNames - fields to index, may be postfixes in recursive calls
+ * @param fixed - values that have already been identified for their index fields
+ * @param obj - object from which keys should be extracted, based on names in fieldNames
+ * @param keys - set where index keys are written
+ * @param numNotFound - number of index fields that have already been identified as missing
+ * @param array - array from which keys should be extracted, based on names in fieldNames
+ * If obj and array are both nonempty, obj will be one of the elements of array.
+ */
+ void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys, int numNotFound = 0, const BSONObj &array = BSONObj() ) const {
+ BSONElement arrElt;
+ set<unsigned> arrIdxs;
+ bool mayExpandArrayUnembedded = true;
+ for( unsigned i = 0; i < fieldNames.size(); ++i ) {
+ if ( *fieldNames[ i ] == '\0' ) {
+ continue;
+ }
+
+ bool arrayNestedArray;
+ // Extract element matching fieldName[ i ] from object xor array.
+ BSONElement e = extractNextElement( obj, array, fieldNames[ i ], arrayNestedArray );
+
+ if ( e.eoo() ) {
+ // if field not present, set to null
+ fixed[ i ] = _spec._nullElt;
+ // done expanding this field name
+ fieldNames[ i ] = "";
+ numNotFound++;
+ }
+ else if ( e.type() == Array ) {
+ arrIdxs.insert( i );
+ if ( arrElt.eoo() ) {
+ // we only expand arrays on a single path -- track the path here
+ arrElt = e;
+ }
+ else if ( e.rawdata() != arrElt.rawdata() ) {
+ // enforce single array path here
+ assertParallelArrays( e.fieldName(), arrElt.fieldName() );
+ }
+ if ( arrayNestedArray ) {
+ mayExpandArrayUnembedded = false;
+ }
}
else {
- BSONElement e = fixed[j];
- if ( e.eoo() )
- b.appendNull( "" );
- else
- b.appendAs( e , "" );
+ // not an array - no need for further expansion
+ fixed[ i ] = e;
+ }
+ }
+
+ if ( arrElt.eoo() ) {
+ // No array, so generate a single key.
+ if ( _spec._sparse && numNotFound == _spec._nFields ) {
+ return;
+ }
+ BSONObjBuilder b(_spec._sizeTracker);
+ for( vector< BSONElement >::iterator i = fixed.begin(); i != fixed.end(); ++i ) {
+ b.appendAs( *i, "" );
+ }
+ keys.insert( b.obj() );
+ }
+ else if ( arrElt.embeddedObject().firstElement().eoo() ) {
+ // Empty array, so set matching fields to undefined.
+ _getKeysArrEltFixed( fieldNames, fixed, _spec._undefinedElt, keys, numNotFound, arrElt, arrIdxs, true );
+ }
+ else {
+ // Non empty array that can be expanded, so generate a key for each member.
+ BSONObj arrObj = arrElt.embeddedObject();
+ BSONObjIterator i( arrObj );
+ while( i.more() ) {
+ _getKeysArrEltFixed( fieldNames, fixed, i.next(), keys, numNotFound, arrElt, arrIdxs, mayExpandArrayUnembedded );
}
}
- keys.insert( b.obj() );
+ }
+
+ const IndexSpec &_spec;
+ };
+
+ void IndexSpec::getKeys( const BSONObj &obj, BSONObjSet &keys ) const {
+ switch( indexVersion() ) {
+ case 0: {
+ KeyGeneratorV0 g( *this );
+ g.getKeys( obj, keys );
+ break;
+ }
+ case 1: {
+ KeyGeneratorV1 g( *this );
+ g.getKeys( obj, keys );
+ break;
+ }
+ default:
+ massert( 15869, "Invalid index version for key generation.", false );
}
}
@@ -275,6 +447,13 @@ namespace mongo {
IndexSuitability IndexType::suitability( const BSONObj& query , const BSONObj& order ) const {
return _spec->_suitability( query , order );
}
+
+ int IndexSpec::indexVersion() const {
+ if ( !info.hasField( "v" ) ) {
+ return DefaultIndexVersionNumber;
+ }
+ return IndexDetails::versionForIndexObj( info );
+ }
bool IndexType::scanAndOrderRequired( const BSONObj& query , const BSONObj& order ) const {
return ! order.isEmpty();
diff --git a/db/indexkey.h b/db/indexkey.h
index 4a755f8a4e8..c04cd6396f6 100644
--- a/db/indexkey.h
+++ b/db/indexkey.h
@@ -25,6 +25,8 @@
namespace mongo {
+ extern const int DefaultIndexVersionNumber;
+
class Cursor;
class IndexSpec;
class IndexType; // TODO: this name sucks
@@ -161,16 +163,21 @@ namespace mongo {
protected:
+ int indexVersion() const;
+
IndexSuitability _suitability( const BSONObj& query , const BSONObj& order ) const ;
- void _getKeys( vector<const char*> fieldNames , vector<BSONElement> fixed , const BSONObj &obj, BSONObjSet &keys ) const;
-
BSONSizeTracker _sizeTracker;
vector<const char*> _fieldNames;
vector<BSONElement> _fixed;
+
BSONObj _nullKey; // a full key with all fields null
BSONObj _nullObj; // only used for _nullElt
BSONElement _nullElt; // jstNull
+
+ BSONObj _undefinedObj; // only used for _undefinedElt
+ BSONElement _undefinedElt; // undefined
+
int _nFields; // number of fields in the index
bool _sparse; // if the index is sparse
shared_ptr<IndexType> _indexType;
@@ -179,6 +186,8 @@ namespace mongo {
void _init();
friend class IndexType;
+ friend class KeyGeneratorV0;
+ friend class KeyGeneratorV1;
public:
bool _finishedInit;
};
diff --git a/db/instance.cpp b/db/instance.cpp
index ede433d652b..971cd2e7b38 100644
--- a/db/instance.cpp
+++ b/db/instance.cpp
@@ -587,7 +587,7 @@ namespace mongo {
}
NOINLINE_DECL void insertMulti(DbMessage& d, const char *ns, const BSONObj& _js) {
- const bool keepGoing = d.reservedField() & InsertOption_KeepGoing;
+ const bool keepGoing = d.reservedField() & InsertOption_ContinueOnError;
int n = 0;
BSONObj js(_js);
while( 1 ) {
diff --git a/db/instance.h b/db/instance.h
index 2b86eb44fce..422c77d5ffa 100644
--- a/db/instance.h
+++ b/db/instance.h
@@ -147,6 +147,8 @@ namespace mongo {
virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; }
double getSoTimeout() const { return 0; }
+
+ virtual bool lazySupported() const { return true; }
private:
static HostAndPort _clientHost;
};
diff --git a/db/introspect.cpp b/db/introspect.cpp
index ca65710b3fc..7e1d19ce2f3 100644
--- a/db/introspect.cpp
+++ b/db/introspect.cpp
@@ -40,7 +40,7 @@ namespace mongo {
profileBufBuilder.reset();
BSONObjBuilder b(profileBufBuilder);
b.appendDate("ts", jsTime());
- currentOp.debug().append( b );
+ currentOp.debug().append( currentOp , b );
b.append("client", c.clientAddress() );
@@ -49,6 +49,26 @@ namespace mongo {
BSONObj p = b.done();
+ if (p.objsize() > 100*1024){
+ string small = p.toString(/*isArray*/false, /*full*/false);
+
+ warning() << "can't add full line to system.profile: " << small;
+
+ // rebuild with limited info
+ BSONObjBuilder b(profileBufBuilder);
+ b.appendDate("ts", jsTime());
+ b.append("client", c.clientAddress() );
+ if ( c.getAuthenticationInfo() )
+ b.append( "user" , c.getAuthenticationInfo()->getUser( nsToDatabase( ns ) ) );
+
+ b.append("err", "profile line too large (max is 100KB)");
+ if (small.size() < 100*1024){ // should be much smaller but if not don't break anything
+ b.append("abbreviated", small);
+ }
+
+ p = b.done();
+ }
+
// write: not replicated
NamespaceDetails *d = db->namespaceIndex.details(ns);
if( d ) {
diff --git a/db/jsobj.cpp b/db/jsobj.cpp
index 53c2329bd35..dcb77447873 100644
--- a/db/jsobj.cpp
+++ b/db/jsobj.cpp
@@ -45,7 +45,7 @@ BOOST_STATIC_ASSERT( sizeof(mongo::OID) == 12 );
namespace mongo {
- BSONElement nullElement;
+ BSONElement eooElement;
GENOIDLabeler GENOID;
@@ -508,6 +508,12 @@ namespace mongo {
}
BSONObj staticNull = fromjson( "{'':null}" );
+ BSONObj makeUndefined() {
+ BSONObjBuilder b;
+ b.appendUndefined( "" );
+ return b.obj();
+ }
+ BSONObj staticUndefined = makeUndefined();
/* well ordered compare */
int BSONObj::woSortOrder(const BSONObj& other, const BSONObj& sortKey , bool useDotted ) const {
@@ -613,13 +619,13 @@ namespace mongo {
}
if ( sub.eoo() )
- return nullElement;
- else if ( sub.type() == Array || name[0] == '\0')
+ return eooElement;
+ else if ( sub.type() == Array || name[0] == '\0' )
return sub;
else if ( sub.type() == Object )
return sub.embeddedObject().getFieldDottedOrArray( name );
else
- return nullElement;
+ return eooElement;
}
/**
@@ -919,7 +925,7 @@ namespace mongo {
c.appendRegex("x", "goo");
BSONObj p = c.done();
- assert( !o.shallowEqual( p ) );
+ assert( !o.binaryEqual( p ) );
assert( o.woCompare( p ) < 0 );
}
@@ -1024,7 +1030,7 @@ namespace mongo {
BSONObj a = A.done();
BSONObj b = B.done();
BSONObj c = C.done();
- assert( !a.shallowEqual( b ) ); // comments on operator==
+ assert( !a.binaryEqual( b ) ); // comments on operator==
int cmp = a.woCompare(b);
assert( cmp == 0 );
cmp = a.woCompare(c);
@@ -1167,13 +1173,9 @@ namespace mongo {
while (l.more() && r.more()){
if (strcmp(l.next().fieldName(), r.next().fieldName())) {
- PRINTFL;
return false;
}
}
- PRINT(l.more());
- PRINT(r.more());
- PRINT(l.more() || r.more());
return !(l.more() || r.more()); // false if lhs and rhs have diff nFields()
}
diff --git a/db/key.cpp b/db/key.cpp
index ddc2d593350..648502ebf17 100644
--- a/db/key.cpp
+++ b/db/key.cpp
@@ -264,15 +264,17 @@ namespace mongo {
if( (t & 0x78) == 0 && t != ByteArrayDeprecated ) {
int len;
const char * d = e.binData(len);
- int code = BinDataLengthToCode[len];
- if( code >= 0 ) {
- if( t >= 128 )
- t = (t-128) | 0x08;
- dassert( (code&t) == 0 );
- b.appendUChar( cbindata|bits );
- b.appendUChar( code | t );
- b.appendBuf(d, len);
- break;
+ if( len <= BinDataLenMax ) {
+ int code = BinDataLengthToCode[len];
+ if( code >= 0 ) {
+ if( t >= 128 )
+ t = (t-128) | 0x08;
+ dassert( (code&t) == 0 );
+ b.appendUChar( cbindata|bits );
+ b.appendUChar( code | t );
+ b.appendBuf(d, len);
+ break;
+ }
}
}
traditional(obj);
diff --git a/db/matcher.cpp b/db/matcher.cpp
index 23d5a7057bf..2b92d5797c3 100644
--- a/db/matcher.cpp
+++ b/db/matcher.cpp
@@ -64,8 +64,14 @@ namespace mongo {
}
~Where() {
- if ( scope.get() )
- scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
+ if ( scope.get() ){
+ try {
+ scope->execSetup( "_mongo.readOnly = false;" , "make not read only" );
+ }
+ catch( DBException& e ){
+ warning() << "javascript scope cleanup interrupted" << causedBy( e ) << endl;
+ }
+ }
if ( jsScope ) {
delete jsScope;
@@ -148,6 +154,9 @@ namespace mongo {
rm._prefix = prefix;
}
else {
+ uassert( 15882, "$elemMatch not allowed within $in",
+ ie.type() != Object ||
+ ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH );
_myset->insert(ie);
}
}
diff --git a/db/modules/mms.cpp b/db/modules/mms.cpp
index 28fc225477f..40abb391dfb 100644
--- a/db/modules/mms.cpp
+++ b/db/modules/mms.cpp
@@ -142,7 +142,7 @@ namespace mongo {
string errmsg;
BSONObjBuilder sub;
- if ( ! c->run( "admin.$cmd" , co , errmsg , sub , false ) )
+ if ( ! c->run( "admin.$cmd" , co , 0 , errmsg , sub , false ) )
postData.append( cmd , errmsg );
else
postData.append( cmd , sub.obj() );
diff --git a/db/mongommf.h b/db/mongommf.h
index b347e4ff259..0c4e8e4a19d 100644
--- a/db/mongommf.h
+++ b/db/mongommf.h
@@ -75,7 +75,7 @@ namespace mongo {
fileSuffixNo() is 3
if the suffix is "ns", fileSuffixNo -1
*/
- RelativePath relativePath() const {
+ const RelativePath& relativePath() const {
DEV assert( !_p._p.empty() );
return _p;
}
diff --git a/db/namespace.cpp b/db/namespace.cpp
index 927f56b6e7b..2bc7409e56c 100644
--- a/db/namespace.cpp
+++ b/db/namespace.cpp
@@ -604,6 +604,17 @@ namespace mongo {
}
}
+ void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) {
+ assertInWriteLock();
+ vector< string > found;
+ for( ouriter i = _map.begin(); i != _map.end(); ++i )
+ if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 )
+ found.push_back( i->first );
+ for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) {
+ _map.erase(*i);
+ }
+ }
+
void NamespaceDetailsTransient::computeIndexKeys() {
_keysComputed = true;
_indexKeys.clear();
@@ -657,7 +668,7 @@ namespace mongo {
// index details across commands are in cursors and nsd
// transient (including query cache) so clear these.
ClientCursor::invalidate( from );
- NamespaceDetailsTransient::clearForPrefix( from );
+ NamespaceDetailsTransient::eraseForPrefix( from );
NamespaceDetails *details = ni->details( from );
ni->add_ns( to, *details );
diff --git a/db/namespace.h b/db/namespace.h
index a1b7c2274bc..3dfb3f33767 100644
--- a/db/namespace.h
+++ b/db/namespace.h
@@ -454,6 +454,7 @@ namespace mongo {
Can be useful as index namespaces share the same start as the regular collection.
SLOW - sequential scan of all NamespaceDetailsTransient objects */
static void clearForPrefix(const char *prefix);
+ static void eraseForPrefix(const char *prefix);
/**
* @return a cursor interface to the query optimizer. The implementation may
diff --git a/db/oplog.cpp b/db/oplog.cpp
index 7286fd9053c..dc9db76d9d5 100644
--- a/db/oplog.cpp
+++ b/db/oplog.cpp
@@ -473,9 +473,9 @@ namespace mongo {
return _qp.nsd()->capFirstNewRecord;
}
- void assertExtentNonempty( const Extent *e ) {
+ void wassertExtentNonempty( const Extent *e ) {
// TODO ensure this requirement is clearly enforced, or fix.
- massert( 14834, "empty extent found during finding start scan", !e->firstRecord.isNull() );
+ wassert( !e->firstRecord.isNull() );
}
DiskLoc FindingStartCursor::prevExtentFirstLoc( const DiskLoc &rec ) {
@@ -488,14 +488,14 @@ namespace mongo {
e = e->xprev.ext();
}
if ( e->myLoc != _qp.nsd()->capExtent ) {
- assertExtentNonempty( e );
+ wassertExtentNonempty( e );
return e->firstRecord;
}
}
else {
if ( !e->xprev.isNull() ) {
e = e->xprev.ext();
- assertExtentNonempty( e );
+ wassertExtentNonempty( e );
return e->firstRecord;
}
}
@@ -506,20 +506,30 @@ namespace mongo {
shared_ptr<Cursor> c = _qp.newCursor( startLoc );
_findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns()) );
}
+
+ bool FindingStartCursor::firstDocMatchesOrEmpty() const {
+ shared_ptr<Cursor> c = _qp.newCursor();
+ return !c->ok() || _matcher->matchesCurrent( c.get() );
+ }
void FindingStartCursor::init() {
- // Use a ClientCursor here so we can release db mutex while scanning
- // oplog (can take quite a while with large oplogs).
- shared_ptr<Cursor> c = _qp.newReverseCursor();
- _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) );
- _findingStartTimer.reset();
- _findingStartMode = Initial;
BSONElement tsElt = _qp.originalQuery()[ "ts" ];
massert( 13044, "no ts field in query", !tsElt.eoo() );
BSONObjBuilder b;
b.append( tsElt );
BSONObj tsQuery = b.obj();
_matcher.reset(new CoveredIndexMatcher(tsQuery, _qp.indexKey()));
+ if ( firstDocMatchesOrEmpty() ) {
+ _c = _qp.newCursor();
+ _findingStart = false;
+ return;
+ }
+ // Use a ClientCursor here so we can release db mutex while scanning
+ // oplog (can take quite a while with large oplogs).
+ shared_ptr<Cursor> c = _qp.newReverseCursor();
+ _findingStartCursor.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, _qp.ns(), BSONObj()) );
+ _findingStartTimer.reset();
+ _findingStartMode = Initial;
}
// -------------------------------------
@@ -704,7 +714,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "internal (sharding)\n{ applyOps : [ ] , preCondition : [ { ns : ... , q : ... , res : ... } ] }";
}
- virtual bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( cmdObj.firstElement().type() != Array ) {
errmsg = "ops has to be an array";
diff --git a/db/oplog.h b/db/oplog.h
index f87a1c85e04..79fb01b0a4d 100644
--- a/db/oplog.h
+++ b/db/oplog.h
@@ -118,6 +118,7 @@ namespace mongo {
_findingStartCursor.reset( 0 );
}
void init();
+ bool firstDocMatchesOrEmpty() const;
};
void pretouchOperation(const BSONObj& op);
diff --git a/db/ops/query.cpp b/db/ops/query.cpp
index 120382fa7d8..f13b6e5ea4b 100644
--- a/db/ops/query.cpp
+++ b/db/ops/query.cpp
@@ -36,6 +36,7 @@
#include "../lasterror.h"
#include "../../s/d_logic.h"
#include "../repl_block.h"
+#include "../../server.h"
namespace mongo {
@@ -92,21 +93,15 @@ namespace mongo {
ClientCursor::Pointer p(cursorid);
ClientCursor *cc = p.c();
- int bufSize = 512;
- if ( cc ) {
- bufSize += sizeof( QueryResult );
- bufSize += MaxBytesToReturnToClientAtOnce;
- }
+ int bufSize = 512 + sizeof( QueryResult ) + MaxBytesToReturnToClientAtOnce;
BufBuilder b( bufSize );
-
b.skip(sizeof(QueryResult));
-
int resultFlags = ResultFlag_AwaitCapable;
int start = 0;
int n = 0;
- if ( !cc ) {
+ if ( unlikely(!cc) ) {
log() << "getMore: cursorid not found " << ns << " " << cursorid << endl;
cursorid = 0;
resultFlags = ResultFlag_CursorNotFound;
@@ -420,6 +415,8 @@ namespace mongo {
*_b << "indexBounds" << c->prettyIndexBounds();
+ c->explainDetails( *_b );
+
if ( !hint ) {
*_b << "allPlans" << _a->arr();
}
@@ -899,9 +896,6 @@ namespace mongo {
if ( ! (explain || pq.showDiskLoc()) && isSimpleIdQuery( query ) && !pq.hasOption( QueryOption_CursorTailable ) ) {
- //NamespaceDetails* d = nsdetails(ns);
- //uassert(14820, "capped collections have no _id index by default, can only query by _id if one added", d == NULL || d->haveIdIndex() );
-
bool nsFound = false;
bool indexFound = false;
diff --git a/db/ops/update.cpp b/db/ops/update.cpp
index 3221fe0f277..d70048d2cc2 100644
--- a/db/ops/update.cpp
+++ b/db/ops/update.cpp
@@ -1060,11 +1060,10 @@ namespace mongo {
debug.updateobj = updateobj;
- /* idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case */
- /* NOTE: when yield() is added herein, these must be refreshed after each call to yield! */
+ // idea with these here it to make them loop invariant for multi updates, and thus be a bit faster for that case
+ // The pointers may be left invalid on a failed or terminal yield recovery.
NamespaceDetails *d = nsdetails(ns); // can be null if an upsert...
NamespaceDetailsTransient *nsdt = &NamespaceDetailsTransient::get_w(ns);
- /* end note */
auto_ptr<ModSet> mods;
bool isOperatorUpdate = updateobj.firstElementFieldName()[0] == '$';
@@ -1105,6 +1104,9 @@ namespace mongo {
shared_ptr< MultiCursor::CursorOp > opPtr( new UpdateOp( mods.get() && mods->hasDynamicArray() ) );
shared_ptr< MultiCursor > c( new MultiCursor( ns, patternOrig, BSONObj(), opPtr, true ) );
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
+
if( c->ok() ) {
set<DiskLoc> seenObjects;
MatchDetails details;
@@ -1114,20 +1116,28 @@ namespace mongo {
bool atomic = c->matcher()->docMatcher().atomic();
- // *****************
- if ( cc.get() == 0 ) {
- shared_ptr< Cursor > cPtr = c;
- cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
- }
-
- if ( ! cc->yieldSometimes( ClientCursor::WillNeed ) ) {
- cc.release();
- break;
- }
- if ( !c->ok() ) {
- break;
+ if ( !atomic ) {
+ // *****************
+ if ( cc.get() == 0 ) {
+ shared_ptr< Cursor > cPtr = c;
+ cc.reset( new ClientCursor( QueryOption_NoCursorTimeout , cPtr , ns ) );
+ }
+
+ bool didYield;
+ if ( ! cc->yieldSometimes( ClientCursor::WillNeed, &didYield ) ) {
+ cc.release();
+ break;
+ }
+ if ( !c->ok() ) {
+ break;
+ }
+
+ if ( didYield ) {
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
+ }
+ // *****************
}
- // *****************
// May have already matched in UpdateOp, but do again to get details set correctly
if ( ! c->matcher()->matchesCurrent( c.get(), &details ) ) {
@@ -1146,6 +1156,8 @@ namespace mongo {
if ( !c->ok() ) {
break;
}
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
}
continue;
}
@@ -1276,10 +1288,11 @@ namespace mongo {
if ( !c->ok() ) {
break;
}
+ d = nsdetails(ns);
+ nsdt = &NamespaceDetailsTransient::get_w(ns);
}
- if (atomic)
- getDur().commitIfNeeded();
+ getDur().commitIfNeeded();
continue;
}
diff --git a/db/pdfile.cpp b/db/pdfile.cpp
index 0b7a5b0830d..0569ba6868e 100644
--- a/db/pdfile.cpp
+++ b/db/pdfile.cpp
@@ -869,6 +869,7 @@ namespace mongo {
result.append("ns", name.c_str());
ClientCursor::invalidate(name.c_str());
Top::global.collectionDropped( name );
+ NamespaceDetailsTransient::eraseForPrefix( name.c_str() );
dropNS(name);
}
@@ -967,7 +968,7 @@ namespace mongo {
}
}
- void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn) {
+ void DataFileMgr::deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK, bool noWarn, bool doLog ) {
dassert( todelete == dl.rec() );
NamespaceDetails* d = nsdetails(ns);
@@ -976,6 +977,14 @@ namespace mongo {
uassert( 10089 , "can't remove from a capped collection" , 0 );
return;
}
+
+ BSONObj toDelete;
+ if ( doLog ) {
+ BSONElement e = dl.obj()["_id"];
+ if ( e.type() ) {
+ toDelete = e.wrap();
+ }
+ }
/* check if any cursors point to us. if so, advance them. */
ClientCursor::aboutToDelete(dl);
@@ -984,6 +993,10 @@ namespace mongo {
_deleteRecord(d, ns, todelete, dl);
NamespaceDetailsTransient::get_w( ns ).notifyOfWriteOp();
+
+ if ( ! toDelete.isEmpty() ) {
+ logOp( "d" , ns , toDelete );
+ }
}
@@ -1181,7 +1194,13 @@ namespace mongo {
BSONObjExternalSorter::Data d = i->next();
try {
- btBuilder.addKey(d.first, d.second);
+ if ( !dupsAllowed && dropDups ) {
+ LastError::Disabled led( lastError.get() );
+ btBuilder.addKey(d.first, d.second);
+ }
+ else {
+ btBuilder.addKey(d.first, d.second);
+ }
}
catch( AssertionException& e ) {
if ( dupsAllowed ) {
@@ -1189,8 +1208,9 @@ namespace mongo {
throw;
}
- if( e.interrupted() )
- throw;
+ if( e.interrupted() ) {
+ killCurrentOp.checkForInterrupt();
+ }
if ( ! dropDups )
throw;
@@ -1276,7 +1296,7 @@ namespace mongo {
log(1) << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl;
for( list<DiskLoc>::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ ){
- theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true );
+ theDataFileMgr.deleteRecord( ns, i->rec(), *i, false, true , true );
getDur().commitIfNeeded();
}
@@ -1302,18 +1322,27 @@ namespace mongo {
while ( cc->ok() ) {
BSONObj js = cc->current();
try {
- _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+ {
+ if ( !dupsAllowed && dropDups ) {
+ LastError::Disabled led( lastError.get() );
+ _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+ }
+ else {
+ _indexRecord(d, idxNo, js, cc->currLoc(), dupsAllowed);
+ }
+ }
cc->advance();
}
catch( AssertionException& e ) {
- if( e.interrupted() )
- throw;
+ if( e.interrupted() ) {
+ killCurrentOp.checkForInterrupt();
+ }
if ( dropDups ) {
DiskLoc toDelete = cc->currLoc();
bool ok = cc->advance();
cc->updateLocation();
- theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true );
+ theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true , true );
if( ClientCursor::find(id, false) == 0 ) {
cc.release();
if( !ok ) {
diff --git a/db/pdfile.h b/db/pdfile.h
index 0f45e6d337e..64dba68ca41 100644
--- a/db/pdfile.h
+++ b/db/pdfile.h
@@ -142,7 +142,7 @@ namespace mongo {
static Record* getRecord(const DiskLoc& dl);
static DeletedRecord* makeDeletedRecord(const DiskLoc& dl, int len);
- void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false);
+ void deleteRecord(const char *ns, Record *todelete, const DiskLoc& dl, bool cappedOK = false, bool noWarn = false, bool logOp=false);
/* does not clean up indexes, etc. : just deletes the record in the pdfile. use deleteRecord() to unindex */
void _deleteRecord(NamespaceDetails *d, const char *ns, Record *todelete, const DiskLoc& dl);
diff --git a/db/queryoptimizer.cpp b/db/queryoptimizer.cpp
index 4173eaaa2cd..e49e9b11ecb 100644
--- a/db/queryoptimizer.cpp
+++ b/db/queryoptimizer.cpp
@@ -52,7 +52,7 @@ namespace mongo {
QueryPlan::QueryPlan(
NamespaceDetails *d, int idxNo,
- const FieldRangeSetPair &frsp, const FieldRangeSetPair &originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
+ const FieldRangeSetPair &frsp, const FieldRangeSetPair *originalFrsp, const BSONObj &originalQuery, const BSONObj &order, const BSONObj &startKey, const BSONObj &endKey , string special ) :
_d(d), _idxNo(idxNo),
_frs( frsp.frsForIndex( _d, _idxNo ) ),
_frsMulti( frsp.frsForIndex( _d, -1 ) ),
@@ -166,12 +166,17 @@ doneCheckOrder:
_optimal = true;
if ( exactIndexedQueryCount == _frs.nNontrivialRanges() &&
orderFieldsUnindexed.size() == 0 &&
- exactIndexedQueryCount == _index->keyPattern().nFields() &&
+ exactIndexedQueryCount == idxKey.nFields() &&
exactIndexedQueryCount == _originalQuery.nFields() ) {
_exactKeyMatch = true;
}
_frv.reset( new FieldRangeVector( _frs, idxSpec, _direction ) );
- _originalFrv.reset( new FieldRangeVector( originalFrsp.frsForIndex( _d, _idxNo ), idxSpec, _direction ) );
+ if ( originalFrsp ) {
+ _originalFrv.reset( new FieldRangeVector( originalFrsp->frsForIndex( _d, _idxNo ), idxSpec, _direction ) );
+ }
+ else {
+ _originalFrv = _frv;
+ }
if ( _startOrEndSpec ) {
BSONObj newStart, newEnd;
if ( !startKey.isEmpty() )
@@ -206,8 +211,25 @@ doneCheckOrder:
}
if ( willScanTable() ) {
- if ( _frs.nNontrivialRanges() )
+ if ( _frs.nNontrivialRanges() ) {
checkTableScanAllowed( _frs.ns() );
+
+ // if we are doing a table scan on _id
+ // and its a capped collection
+ // we disallow as its a common user error
+ // .system. and local collections are exempt
+ if ( _d && _d->capped && _frs.range( "_id" ).nontrivial() ) {
+ if ( cc().isSyncThread() ||
+ str::contains( _frs.ns() , ".system." ) ||
+ str::startsWith( _frs.ns() , "local." ) ) {
+ // ok
+ }
+ else {
+ warning() << "_id query on capped collection without an _id index, performance will be poor collection: " << _frs.ns() << endl;
+ //uassert( 14820, str::stream() << "doing _id query on a capped collection without an index is not allowed: " << _frs.ns() ,
+ }
+ }
+ }
return findTableScan( _frs.ns(), _order, startLoc );
}
@@ -328,7 +350,7 @@ doneCheckOrder:
massert( 10365 , errmsg, indexDetailsForRange( _frsp->ns(), errmsg, _min, _max, keyPattern ) );
}
NamespaceDetails *d = nsdetails(_ns);
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(id), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) );
}
// returns an IndexDetails * for a hint, 0 if hint is $natural.
@@ -374,7 +396,7 @@ doneCheckOrder:
NamespaceDetails *d = nsdetails( ns );
if ( !d || !_frsp->matchPossible() ) {
// Table scan plan, when no matches are possible
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) );
return;
}
@@ -388,7 +410,7 @@ doneCheckOrder:
else {
massert( 10366 , "natural order cannot be specified with $min/$max", _min.isEmpty() && _max.isEmpty() );
// Table scan plan
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) );
}
return;
}
@@ -398,7 +420,7 @@ doneCheckOrder:
BSONObj keyPattern;
IndexDetails *idx = indexDetailsForRange( ns, errmsg, _min, _max, keyPattern );
massert( 10367 , errmsg, idx );
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, *_originalFrsp, _originalQuery, _order, _min, _max ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, d->idxNo(*idx), *_frsp, _originalFrsp.get(), _originalQuery, _order, _min, _max ) ) );
return;
}
@@ -407,13 +429,13 @@ doneCheckOrder:
if ( idx >= 0 ) {
_usingPrerecordedPlan = true;
_mayRecordPlan = false;
- _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , *_originalFrsp , _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d , idx , *_frsp , _originalFrsp.get() , _originalQuery, _order ) ) );
return;
}
}
if ( _originalQuery.isEmpty() && _order.isEmpty() ) {
- _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ) );
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ) );
return;
}
@@ -428,7 +450,7 @@ doneCheckOrder:
if ( spec.getTypeName() == _special && spec.suitability( _originalQuery , _order ) ) {
_usingPrerecordedPlan = true;
_mayRecordPlan = false;
- _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , *_originalFrsp , _originalQuery, _order ,
+ _plans.push_back( QueryPlanPtr( new QueryPlan( d , j , *_frsp , _originalFrsp.get() , _originalQuery, _order ,
BSONObj() , BSONObj() , _special ) ) );
return;
}
@@ -445,7 +467,7 @@ doneCheckOrder:
_oldNScanned = oldNScanned;
if ( !strcmp( bestIndex.firstElementFieldName(), "$natural" ) ) {
// Table scan plan
- p.reset( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ p.reset( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
}
NamespaceDetails::IndexIterator i = d->ii();
@@ -453,7 +475,7 @@ doneCheckOrder:
int j = i.pos();
IndexDetails& ii = i.next();
if( ii.keyPattern().woCompare(bestIndex) == 0 ) {
- p.reset( new QueryPlan( d, j, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ p.reset( new QueryPlan( d, j, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
}
}
@@ -480,7 +502,7 @@ doneCheckOrder:
if ( !_frsp->matchPossible() || ( _frsp->noNontrivialRanges() && _order.isEmpty() ) ||
( !_order.isEmpty() && !strcmp( _order.firstElementFieldName(), "$natural" ) ) ) {
// Table scan plan
- addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst );
+ addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst );
return;
}
@@ -490,10 +512,11 @@ doneCheckOrder:
QueryPlanPtr optimalPlan;
for( int i = 0; i < d->nIndexes; ++i ) {
if ( normalQuery ) {
- if ( !_frsp->matchPossibleForIndex( d, i, d->idx( i ).keyPattern() ) ) {
+ BSONObj keyPattern = d->idx( i ).keyPattern();
+ if ( !_frsp->matchPossibleForIndex( d, i, keyPattern ) ) {
// If no match is possible, only generate a trival plan that won't
// scan any documents.
- QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
addPlan( p, checkFirst );
return;
}
@@ -502,7 +525,7 @@ doneCheckOrder:
}
}
- QueryPlanPtr p( new QueryPlan( d, i, *_frsp, *_originalFrsp, _originalQuery, _order ) );
+ QueryPlanPtr p( new QueryPlan( d, i, *_frsp, _originalFrsp.get(), _originalQuery, _order ) );
if ( p->optimal() ) {
if ( !optimalPlan.get() ) {
optimalPlan = p;
@@ -520,7 +543,7 @@ doneCheckOrder:
addPlan( *i, checkFirst );
// Table scan plan
- addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, *_originalFrsp, _originalQuery, _order ) ), checkFirst );
+ addPlan( QueryPlanPtr( new QueryPlan( d, -1, *_frsp, _originalFrsp.get(), _originalQuery, _order ) ), checkFirst );
}
shared_ptr<QueryOp> QueryPlanSet::runOp( QueryOp &op ) {
@@ -538,7 +561,7 @@ doneCheckOrder:
return r.runUntilFirstCompletes();
}
- shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp ) {
+ shared_ptr<QueryOp> QueryPlanSet::nextOp( QueryOp &originalOp, bool retried ) {
if ( !_runner ) {
_runner.reset( new Runner( *this, originalOp ) );
shared_ptr<QueryOp> op = _runner->init();
@@ -553,10 +576,14 @@ doneCheckOrder:
if ( !_usingPrerecordedPlan || _bestGuessOnly || _plans.size() > 1 ) {
return op;
}
+
+ // Avoid an infinite loop here
+ uassert( 15878, str::stream() << "query plans not successful even with no constraints, potentially due to additional sort", ! retried );
+
// Retry with all candidate plans.
QueryUtilIndexed::clearIndexesForPatterns( *_frsp, _order );
init();
- return nextOp( originalOp );
+ return nextOp( originalOp, true );
}
bool QueryPlanSet::prepareToYield() {
@@ -815,24 +842,29 @@ doneCheckOrder:
_ns( ns ),
_or( !query.getField( "$or" ).eoo() ),
_query( query.getOwned() ),
- _org( ns, _query ),
_i(),
_honorRecordedPlan( honorRecordedPlan ),
_bestGuessOnly( bestGuessOnly ),
_hint( ( hint && !hint->eoo() ) ? hint->wrap() : BSONObj() ),
_mayYield( mayYield ),
_tableScanned() {
- if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() || !_org.getSpecial().empty() ) {
+ if ( !order.isEmpty() || !min.isEmpty() || !max.isEmpty() ) {
_or = false;
}
- if ( _or && uselessOr( _hint.firstElement() ) ) {
- _or = false;
+ if ( _or ) {
+ // Only construct an OrRangeGenerator if we may handle $or clauses.
+ _org.reset( new OrRangeGenerator( ns, _query ) );
+ if ( !_org->getSpecial().empty() ) {
+ _or = false;
+ }
+ else if ( uselessOr( _hint.firstElement() ) ) {
+ _or = false;
+ }
}
// if _or == false, don't use or clauses for index selection
if ( !_or ) {
auto_ptr<FieldRangeSetPair> frsp( new FieldRangeSetPair( ns, _query, true ) );
- auto_ptr<FieldRangeSetPair> oldFrsp( new FieldRangeSetPair( *frsp ) );
- _currentQps.reset( new QueryPlanSet( ns, frsp, oldFrsp, _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) );
+ _currentQps.reset( new QueryPlanSet( ns, frsp, auto_ptr<FieldRangeSetPair>(), _query, order, hint, honorRecordedPlan, min, max, _bestGuessOnly, _mayYield ) );
}
else {
BSONElement e = _query.getField( "$or" );
@@ -847,8 +879,8 @@ doneCheckOrder:
return _currentQps->runOp( op );
}
++_i;
- auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() );
- auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() );
+ auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() );
+ auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() );
BSONElement hintElt = _hint.firstElement();
_currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) );
shared_ptr<QueryOp> ret( _currentQps->runOp( op ) );
@@ -856,7 +888,7 @@ doneCheckOrder:
_tableScanned = true;
} else {
// If the full table was scanned, don't bother popping the last or clause.
- _org.popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() );
+ _org->popOrClause( ret->qp().nsd(), ret->qp().idxNo(), ret->qp().indexed() ? ret->qp().indexKey() : BSONObj() );
}
return ret;
}
@@ -877,7 +909,7 @@ doneCheckOrder:
if ( op->qp().willScanTable() ) {
_tableScanned = true;
} else {
- _org.popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() );
+ _org->popOrClause( op->qp().nsd(), op->qp().idxNo(), op->qp().indexed() ? op->qp().indexKey() : BSONObj() );
}
return op;
}
@@ -887,8 +919,8 @@ doneCheckOrder:
shared_ptr<QueryOp> op;
while( mayRunMore() ) {
++_i;
- auto_ptr<FieldRangeSetPair> frsp( _org.topFrsp() );
- auto_ptr<FieldRangeSetPair> originalFrsp( _org.topFrspOriginal() );
+ auto_ptr<FieldRangeSetPair> frsp( _org->topFrsp() );
+ auto_ptr<FieldRangeSetPair> originalFrsp( _org->topFrspOriginal() );
BSONElement hintElt = _hint.firstElement();
_currentQps.reset( new QueryPlanSet( _ns, frsp, originalFrsp, _query, BSONObj(), &hintElt, _honorRecordedPlan, BSONObj(), BSONObj(), _bestGuessOnly, _mayYield ) );
op = nextOpHandleEndOfClause();
@@ -954,9 +986,9 @@ doneCheckOrder:
if ( !id ) {
return true;
}
- return QueryUtilIndexed::uselessOr( _org, nsd, nsd->idxNo( *id ) );
+ return QueryUtilIndexed::uselessOr( *_org, nsd, nsd->idxNo( *id ) );
}
- return QueryUtilIndexed::uselessOr( _org, nsd, -1 );
+ return QueryUtilIndexed::uselessOr( *_org, nsd, -1 );
}
MultiCursor::MultiCursor( const char *ns, const BSONObj &pattern, const BSONObj &order, shared_ptr<CursorOp> op, bool mayYield )
@@ -1199,12 +1231,13 @@ doneCheckOrder:
}
bool QueryUtilIndexed::indexUseful( const FieldRangeSetPair &frsp, NamespaceDetails *d, int idxNo, const BSONObj &order ) {
- frsp.assertValidIndex( d, idxNo );
- if ( !frsp.matchPossibleForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ) ) {
+ DEV frsp.assertValidIndex( d, idxNo );
+ BSONObj keyPattern = d->idx( idxNo ).keyPattern();
+ if ( !frsp.matchPossibleForIndex( d, idxNo, keyPattern ) ) {
// No matches are possible in the index so the index may be useful.
return true;
}
- return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, d->idx( idxNo ).keyPattern() ), order ) != USELESS;
+ return d->idx( idxNo ).getSpec().suitability( frsp.simplifiedQueryForIndex( d, idxNo, keyPattern ), order ) != USELESS;
}
void QueryUtilIndexed::clearIndexesForPatterns( const FieldRangeSetPair &frsp, const BSONObj &order ) {
diff --git a/db/queryoptimizer.h b/db/queryoptimizer.h
index e55e791e1ca..ad6b985ab1f 100644
--- a/db/queryoptimizer.h
+++ b/db/queryoptimizer.h
@@ -35,10 +35,13 @@ namespace mongo {
class QueryPlan : boost::noncopyable {
public:
+ /**
+ * @param originalFrsp - original constraints for this query clause. If null, frsp will be used instead.
+ */
QueryPlan(NamespaceDetails *d,
int idxNo, // -1 = no index
const FieldRangeSetPair &frsp,
- const FieldRangeSetPair &originalFrsp,
+ const FieldRangeSetPair *originalFrsp,
const BSONObj &originalQuery,
const BSONObj &order,
const BSONObj &startKey = BSONObj(),
@@ -245,6 +248,9 @@ namespace mongo {
typedef boost::shared_ptr<QueryPlan> QueryPlanPtr;
typedef vector<QueryPlanPtr> PlanSet;
+ /**
+ * @param originalFrsp - original constraints for this query clause; if null, frsp will be used.
+ */
QueryPlanSet( const char *ns,
auto_ptr<FieldRangeSetPair> frsp,
auto_ptr<FieldRangeSetPair> originalFrsp,
@@ -272,7 +278,7 @@ namespace mongo {
}
/** Initialize or iterate a runner generated from @param originalOp. */
- shared_ptr<QueryOp> nextOp( QueryOp &originalOp );
+ shared_ptr<QueryOp> nextOp( QueryOp &originalOp, bool retried = false );
/** Yield the runner member. */
@@ -290,7 +296,7 @@ namespace mongo {
//for testing
const FieldRangeSetPair &frsp() const { return *_frsp; }
- const FieldRangeSetPair &originalFrsp() const { return *_originalFrsp; }
+ const FieldRangeSetPair *originalFrsp() const { return _originalFrsp.get(); }
bool modifiedKeys() const;
bool hasMultiKey() const;
@@ -420,7 +426,7 @@ namespace mongo {
shared_ptr<Cursor> singleCursor() const;
/** @return true iff more $or clauses need to be scanned. */
- bool mayRunMore() const { return _or ? ( !_tableScanned && !_org.orFinished() ) : _i == 0; }
+ bool mayRunMore() const { return _or ? ( !_tableScanned && !_org->orFinished() ) : _i == 0; }
/** @return non-$or version of explain output. */
BSONObj oldExplain() const { assertNotOr(); return _currentQps->explain(); }
/** @return true iff this is not a $or query and a plan is selected based on previous success of this plan. */
@@ -445,7 +451,7 @@ namespace mongo {
const char * _ns;
bool _or;
BSONObj _query;
- OrRangeGenerator _org;
+ shared_ptr<OrRangeGenerator> _org; // May be null in certain non $or query cases.
auto_ptr<QueryPlanSet> _currentQps;
int _i;
bool _honorRecordedPlan;
diff --git a/db/queryutil-inl.h b/db/queryutil-inl.h
index 2c3a757b385..d0fc212cef9 100644
--- a/db/queryutil-inl.h
+++ b/db/queryutil-inl.h
@@ -130,5 +130,24 @@ namespace mongo {
}
return ret;
}
+
+ inline bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
+ assertValidIndexOrNoIndex( d, idxNo );
+ if ( !matchPossible() ) {
+ return false;
+ }
+ if ( idxNo < 0 ) {
+ // multi key matchPossible() is true, so return true.
+ return true;
+ }
+ return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern );
+ }
+ inline void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const {
+ massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 );
+ if ( idxNo >= 0 ) {
+ assertValidIndex( d, idxNo );
+ }
+ }
+
} // namespace mongo
diff --git a/db/queryutil.cpp b/db/queryutil.cpp
index ec9ee693511..717eac816b8 100644
--- a/db/queryutil.cpp
+++ b/db/queryutil.cpp
@@ -28,6 +28,7 @@
namespace mongo {
extern BSONObj staticNull;
+ extern BSONObj staticUndefined;
/** returns a string that when used as a matcher, would match a super set of regex()
returns "" for complex regular expressions
@@ -79,6 +80,10 @@ namespace mongo {
r = r.substr( 0 , r.size() - 1 );
return r; //breaking here fails with /^a?/
}
+ else if (c == '|') {
+ // whole match so far is optional. Nothing we can do here.
+ return string();
+ }
else if (c == '\\') {
c = *(regex++);
if (c == 'Q'){
@@ -107,7 +112,7 @@ namespace mongo {
ss << c;
}
}
- else if (strchr("^$.[|()+{", c)) {
+ else if (strchr("^$.[()+{", c)) {
// list of "metacharacters" from man pcrepattern
r = ss.str();
break;
@@ -153,25 +158,33 @@ namespace mongo {
FieldRange::FieldRange( const BSONElement &e, bool singleKey, bool isNot, bool optimize )
: _singleKey( singleKey ) {
+ int op = e.getGtLtOp();
+
// NOTE with $not, we could potentially form a complementary set of intervals.
- if ( !isNot && !e.eoo() && e.type() != RegEx && e.getGtLtOp() == BSONObj::opIN ) {
+ if ( !isNot && !e.eoo() && e.type() != RegEx && op == BSONObj::opIN ) {
set<BSONElement,element_lt> vals;
vector<FieldRange> regexes;
uassert( 12580 , "invalid query" , e.isABSONObj() );
BSONObjIterator i( e.embeddedObject() );
while( i.more() ) {
BSONElement ie = i.next();
+ uassert( 15881, "$elemMatch not allowed within $in",
+ ie.type() != Object ||
+ ie.embeddedObject().firstElement().getGtLtOp() != BSONObj::opELEM_MATCH );
if ( ie.type() == RegEx ) {
regexes.push_back( FieldRange( ie, singleKey, false, optimize ) );
}
else {
- // A document array may be indexed by its first element, or
- // as a full array if it is embedded within another array.
+ // A document array may be indexed by its first element, by undefined
+ // if it is empty, or as a full array if it is embedded within another
+ // array.
vals.insert( ie );
if ( ie.type() == Array ) {
- if ( !ie.embeddedObject().firstElement().eoo() ) {
- vals.insert( ie.embeddedObject().firstElement() );
- }
+ BSONElement temp = ie.embeddedObject().firstElement();
+ if ( temp.eoo() ) {
+ temp = staticUndefined.firstElement();
+ }
+ vals.insert( temp );
}
}
}
@@ -185,17 +198,21 @@ namespace mongo {
return;
}
- // A document array may be indexed by its first element, or
- // as a full array if it is embedded within another array.
- if ( e.type() == Array && e.getGtLtOp() == BSONObj::Equality ) {
+ // A document array may be indexed by its first element, by undefined
+ // if it is empty, or as a full array if it is embedded within another
+ // array.
+ if ( e.type() == Array && op == BSONObj::Equality ) {
_intervals.push_back( FieldInterval(e) );
- const BSONElement& temp = e.embeddedObject().firstElement();
- if ( ! temp.eoo() ) {
- if ( temp < e )
- _intervals.insert( _intervals.begin() , temp );
- else
- _intervals.push_back( FieldInterval(temp) );
+ BSONElement temp = e.embeddedObject().firstElement();
+ if ( temp.eoo() ) {
+ temp = staticUndefined.firstElement();
+ }
+ if ( temp < e ) {
+ _intervals.insert( _intervals.begin() , temp );
+ }
+ else {
+ _intervals.push_back( FieldInterval(temp) );
}
return;
@@ -215,8 +232,6 @@ namespace mongo {
if ( e.eoo() )
return;
- int op = e.getGtLtOp();
-
bool existsSpec = false;
if ( op == BSONObj::opEXISTS ) {
existsSpec = e.trueValue();
@@ -622,6 +637,27 @@ namespace mongo {
return o;
}
+ string FieldInterval::toString() const {
+ StringBuilder buf;
+ buf << ( _lower._inclusive ? "[" : "(" );
+ buf << _lower._bound;
+ buf << " , ";
+ buf << _upper._bound;
+ buf << ( _upper._inclusive ? "]" : ")" );
+ return buf.str();
+ }
+
+ string FieldRange::toString() const {
+ StringBuilder buf;
+ buf << "(FieldRange special: " << _special << " singleKey: " << _special << " intervals: ";
+ for( vector<FieldInterval>::const_iterator i = _intervals.begin(); i != _intervals.end(); ++i ) {
+ buf << i->toString();
+ }
+
+ buf << ")";
+ return buf.str();
+ }
+
string FieldRangeSet::getSpecial() const {
string s = "";
for ( map<string,FieldRange>::const_iterator i=_ranges.begin(); i!=_ranges.end(); i++ ) {
@@ -773,30 +809,32 @@ namespace mongo {
}
void FieldRangeSet::processQueryField( const BSONElement &e, bool optimize ) {
- if ( strcmp( e.fieldName(), "$and" ) == 0 ) {
- uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 );
- BSONObjIterator i( e.embeddedObject() );
- while( i.more() ) {
- BSONElement e = i.next();
- uassert( 14817 , "$and elements must be objects" , e.type() == Object );
- BSONObjIterator j( e.embeddedObject() );
- while( j.more() ) {
- processQueryField( j.next(), optimize );
- }
- }
- }
+ if ( e.fieldName()[ 0 ] == '$' ) {
+ if ( strcmp( e.fieldName(), "$and" ) == 0 ) {
+ uassert( 14816 , "$and expression must be a nonempty array" , e.type() == Array && e.embeddedObject().nFields() > 0 );
+ BSONObjIterator i( e.embeddedObject() );
+ while( i.more() ) {
+ BSONElement e = i.next();
+ uassert( 14817 , "$and elements must be objects" , e.type() == Object );
+ BSONObjIterator j( e.embeddedObject() );
+ while( j.more() ) {
+ processQueryField( j.next(), optimize );
+ }
+ }
+ }
- if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
- return;
- }
+ if ( strcmp( e.fieldName(), "$where" ) == 0 ) {
+ return;
+ }
- if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
- return;
- }
+ if ( strcmp( e.fieldName(), "$or" ) == 0 ) {
+ return;
+ }
- if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
- return;
- }
+ if ( strcmp( e.fieldName(), "$nor" ) == 0 ) {
+ return;
+ }
+ }
bool equality = ( getGtLtOp( e ) == BSONObj::Equality );
if ( equality && e.type() == Object ) {
@@ -1055,32 +1093,11 @@ namespace mongo {
return ret;
}
- const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const {
- assertValidIndexOrNoIndex( nsd, idxNo );
- if ( idxNo < 0 ) {
- // An unindexed cursor cannot have a "single key" constraint.
- return _multiKey;
- }
- return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey;
- }
-
bool FieldRangeSetPair::noNontrivialRanges() const {
return _singleKey.matchPossible() && _singleKey.nNontrivialRanges() == 0 &&
_multiKey.matchPossible() && _multiKey.nNontrivialRanges() == 0;
}
- bool FieldRangeSetPair::matchPossibleForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
- assertValidIndexOrNoIndex( d, idxNo );
- if ( !matchPossible() ) {
- return false;
- }
- if ( idxNo < 0 ) {
- // multi key matchPossible() is true, so return true.
- return true;
- }
- return frsForIndex( d, idxNo ).matchPossibleForIndex( keyPattern );
- }
-
FieldRangeSetPair &FieldRangeSetPair::operator&=( const FieldRangeSetPair &other ) {
_singleKey &= other._singleKey;
_multiKey &= other._multiKey;
@@ -1093,21 +1110,23 @@ namespace mongo {
return *this;
}
+ BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
+ return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern );
+ }
+
void FieldRangeSetPair::assertValidIndex( const NamespaceDetails *d, int idxNo ) const {
massert( 14048, "FieldRangeSetPair invalid index specified", idxNo >= 0 && idxNo < d->nIndexes );
}
-
- void FieldRangeSetPair::assertValidIndexOrNoIndex( const NamespaceDetails *d, int idxNo ) const {
- massert( 14049, "FieldRangeSetPair invalid index specified", idxNo >= -1 );
- if ( idxNo >= 0 ) {
- assertValidIndex( d, idxNo );
+
+ const FieldRangeSet &FieldRangeSetPair::frsForIndex( const NamespaceDetails* nsd, int idxNo ) const {
+ assertValidIndexOrNoIndex( nsd, idxNo );
+ if ( idxNo < 0 ) {
+ // An unindexed cursor cannot have a "single key" constraint.
+ return _multiKey;
}
+ return nsd->isMultikey( idxNo ) ? _multiKey : _singleKey;
}
-
- BSONObj FieldRangeSetPair::simplifiedQueryForIndex( NamespaceDetails *d, int idxNo, const BSONObj &keyPattern ) const {
- return frsForIndex( d, idxNo ).simplifiedQuery( keyPattern );
- }
-
+
bool FieldRangeVector::matchesElement( const BSONElement &e, int i, bool forward ) const {
bool eq;
int l = matchingLowElement( e, i, forward, eq );
diff --git a/db/queryutil.h b/db/queryutil.h
index 00d2d264961..104cde28e4a 100644
--- a/db/queryutil.h
+++ b/db/queryutil.h
@@ -53,6 +53,8 @@ namespace mongo {
/** @return true iff the interval is an equality constraint. */
bool equality() const;
mutable int _cachedEquality;
+
+ string toString() const;
};
/**
@@ -103,6 +105,8 @@ namespace mongo {
* NOTE the resulting intervals might not be strictValid().
*/
void reverse( FieldRange &ret ) const;
+
+ string toString() const;
private:
BSONObj addObj( const BSONObj &o );
void finishOperation( const vector<FieldInterval> &newIntervals, const FieldRange &other );
diff --git a/db/record.cpp b/db/record.cpp
index f5fa972227a..18be9c75fe2 100644
--- a/db/record.cpp
+++ b/db/record.cpp
@@ -120,14 +120,16 @@ namespace mongo {
/**
* after this call, we assume the page is in ram
+ * @param doHalf if this is a known good access, want to put in first half
* @return whether we know the page is in ram
*/
- bool access( size_t region , short offset ) {
+ bool access( size_t region , short offset , bool doHalf ) {
int regionHash = hash(region);
scoped_spinlock lk( _lock );
-
- RARELY {
+
+ static int rarely_count = 0;
+ if ( rarely_count++ % 2048 == 0 ) {
long long now = Listener::getElapsedTimeMillis();
RARELY if ( now == 0 ) {
tlog() << "warning Listener::getElapsedTimeMillis returning 0ms" << endl;
@@ -137,8 +139,8 @@ namespace mongo {
_rotate();
}
}
-
- for ( int i=0; i<NumSlices; i++ ) {
+
+ for ( int i=0; i<NumSlices / ( doHalf ? 2 : 1 ); i++ ) {
int pos = (_curSlice+i)%NumSlices;
State s = _slices[pos].get( regionHash , region , offset );
@@ -205,7 +207,7 @@ namespace mongo {
const size_t region = page >> 6;
const size_t offset = page & 0x3f;
- if ( ps::rolling.access( region , offset ) )
+ if ( ps::rolling.access( region , offset , false ) )
return true;
if ( ! blockSupported )
@@ -214,14 +216,11 @@ namespace mongo {
}
Record* Record::accessed() {
- if ( ! MemoryTrackingEnabled )
- return this;
-
const size_t page = (size_t)data >> 12;
const size_t region = page >> 6;
const size_t offset = page & 0x3f;
-
- ps::rolling.access( region , offset );
+
+ ps::rolling.access( region , offset , true );
return this;
}
diff --git a/db/repl.cpp b/db/repl.cpp
index a4ab6e4f0ea..3d08f2324c0 100644
--- a/db/repl.cpp
+++ b/db/repl.cpp
@@ -95,7 +95,7 @@ namespace mongo {
virtual LockType locktype() const { return WRITE; }
void help(stringstream&h) const { h << "resync (from scratch) an out of date replica slave.\nhttp://www.mongodb.org/display/DOCS/Master+Slave"; }
CmdResync() : Command("resync") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( cmdLine.usingReplSets() ) {
errmsg = "resync command not currently supported with replica sets. See RS102 info in the mongodb documentations";
result.append("info", "http://www.mongodb.org/display/DOCS/Resyncing+a+Very+Stale+Replica+Set+Member");
@@ -232,7 +232,7 @@ namespace mongo {
}
virtual LockType locktype() const { return NONE; }
CmdIsMaster() : Command("isMaster", true, "ismaster") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
/* currently request to arbiter is (somewhat arbitrarily) an ismaster request that is not
authenticated.
we allow unauthenticated ismaster but we aren't as verbose informationally if
@@ -1407,6 +1407,7 @@ namespace mongo {
void newRepl();
void oldRepl();
+ void startReplSets(ReplSetCmdline*);
void startReplication() {
/* if we are going to be a replica set, we aren't doing other forms of replication. */
if( !cmdLine._replSet.empty() ) {
@@ -1416,6 +1417,11 @@ namespace mongo {
log() << "***" << endl;
}
newRepl();
+
+ replSet = true;
+ ReplSetCmdline *replSetCmdline = new ReplSetCmdline(cmdLine._replSet);
+ boost::thread t( boost::bind( &startReplSets, replSetCmdline) );
+
return;
}
diff --git a/db/repl/consensus.cpp b/db/repl/consensus.cpp
index 3a4dd9b5b3d..07ee2fa80a3 100644
--- a/db/repl/consensus.cpp
+++ b/db/repl/consensus.cpp
@@ -25,6 +25,7 @@ namespace mongo {
public:
CmdReplSetFresh() : ReplSetCommand("replSetFresh") { }
private:
+
bool shouldVeto(const BSONObj& cmdObj, string& errmsg) {
unsigned id = cmdObj["id"].Int();
const Member* primary = theReplSet->box.getPrimary();
@@ -66,7 +67,7 @@ namespace mongo {
return false;
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
@@ -101,7 +102,7 @@ namespace mongo {
public:
CmdReplSetElect() : ReplSetCommand("replSetElect") { }
private:
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
theReplSet->elect.electCmdReceived(cmdObj, &result);
@@ -152,7 +153,7 @@ namespace mongo {
LastYea &L = this->ly.ref(lk);
time_t now = time(0);
if( L.when + LeaseTime >= now && L.who != memberId ) {
- log(1) << "replSet not voting yea for " << memberId <<
+ LOG(1) << "replSet not voting yea for " << memberId <<
" voted for " << L.who << ' ' << now-L.when << " secs ago" << rsLog;
throw VoteException();
}
@@ -176,7 +177,7 @@ namespace mongo {
void Consensus::electCmdReceived(BSONObj cmd, BSONObjBuilder* _b) {
BSONObjBuilder& b = *_b;
DEV log() << "replSet received elect msg " << cmd.toString() << rsLog;
- else log(2) << "replSet received elect msg " << cmd.toString() << rsLog;
+ else LOG(2) << "replSet received elect msg " << cmd.toString() << rsLog;
string set = cmd["set"].String();
unsigned whoid = cmd["whoid"].Int();
int cfgver = cmd["cfgver"].Int();
@@ -309,7 +310,7 @@ namespace mongo {
allUp = false;
}
}
- log(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog;
+ LOG(1) << "replSet dev we are freshest of up nodes, nok:" << nok << " nTies:" << nTies << rsLog;
assert( ord <= theReplSet->lastOpTimeWritten ); // <= as this may change while we are working...
return true;
}
diff --git a/db/repl/heartbeat.cpp b/db/repl/heartbeat.cpp
index 6247b4b1d13..7d3f78c73b5 100644
--- a/db/repl/heartbeat.cpp
+++ b/db/repl/heartbeat.cpp
@@ -39,6 +39,8 @@ namespace mongo {
extern bool replSetBlind;
extern ReplSettings replSettings;
+ unsigned int HeartbeatInfo::numPings;
+
long long HeartbeatInfo::timeDown() const {
if( up() ) return 0;
if( downSince == 0 )
@@ -51,7 +53,7 @@ namespace mongo {
public:
virtual bool adminOnly() const { return false; }
CmdReplSetHeartbeat() : ReplSetCommand("replSetHeartbeat") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( replSetBlind )
return false;
@@ -62,6 +64,10 @@ namespace mongo {
return false;
}
+ if (!checkAuth(errmsg, result)) {
+ return false;
+ }
+
/* we want to keep heartbeat connections open when relinquishing primary. tag them here. */
{
AbstractMessagingPort *mp = cc().port();
@@ -147,7 +153,7 @@ namespace mongo {
string name() const { return "rsHealthPoll"; }
void doWork() {
if ( !theReplSet ) {
- log(2) << "replSet not initialized yet, skipping health poll this round" << rsLog;
+ LOG(2) << "replSet not initialized yet, skipping health poll this round" << rsLog;
return;
}
@@ -169,7 +175,10 @@ namespace mongo {
time_t after = mem.lastHeartbeat = before + (mem.ping / 1000);
// weight new ping with old pings
- mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2));
+ // on the first ping, just use the ping value
+ if (old.ping != 0) {
+ mem.ping = (unsigned int)((old.ping * .8) + (mem.ping * .2));
+ }
if ( info["time"].isNumber() ) {
long long t = info["time"].numberLong();
@@ -191,6 +200,8 @@ namespace mongo {
mem.hbstate = MemberState(state.Int());
}
if( ok ) {
+ HeartbeatInfo::numPings++;
+
if( mem.upSince == 0 ) {
log() << "replSet info member " << h.toString() << " is up" << rsLog;
mem.upSince = mem.lastHeartbeat;
@@ -262,6 +273,7 @@ namespace mongo {
private:
void down(HeartbeatInfo& mem, string msg) {
mem.health = 0.0;
+ mem.ping = 0;
if( mem.upSince || mem.downSince == 0 ) {
mem.upSince = 0;
mem.downSince = jsTime();
diff --git a/db/repl/replset_commands.cpp b/db/repl/replset_commands.cpp
index 79639acd567..68dab7eb3c1 100644
--- a/db/repl/replset_commands.cpp
+++ b/db/repl/replset_commands.cpp
@@ -45,14 +45,18 @@ namespace mongo {
help << "Just for regression tests.\n";
}
CmdReplSetTest() : ReplSetCommand("replSetTest") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
log() << "replSet replSetTest command received: " << cmdObj.toString() << rsLog;
+
+ if (!checkAuth(errmsg, result)) {
+ return false;
+ }
+
if( cmdObj.hasElement("forceInitialSyncFailure") ) {
replSetForceInitialSyncFailure = (unsigned) cmdObj["forceInitialSyncFailure"].Number();
return true;
}
- // may not need this, but if removed check all tests still work:
if( !check(errmsg, result) )
return false;
@@ -76,11 +80,11 @@ namespace mongo {
help << "internal";
}
CmdReplSetGetRBID() : ReplSetCommand("replSetGetRBID") {
- // this is ok but micros or combo with some rand() and/or 64 bits might be better --
+ // this is ok but micros or combo with some rand() and/or 64 bits might be better --
// imagine a restart and a clock correction simultaneously (very unlikely but possible...)
rbid = (int) curTimeMillis64();
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
result.append("rbid",rbid);
@@ -108,7 +112,7 @@ namespace mongo {
help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
}
CmdReplSetGetStatus() : ReplSetCommand("replSetGetStatus", true) { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if ( cmdObj["forShell"].trueValue() )
lastError.disableForCommand();
@@ -128,17 +132,21 @@ namespace mongo {
help << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
}
CmdReplSetReconfig() : ReplSetCommand("replSetReconfig"), mutex("rsreconfig") { }
- virtual bool run(const string& a, BSONObj& b, string& errmsg, BSONObjBuilder& c, bool d) {
+ virtual bool run(const string& a, BSONObj& b, int e, string& errmsg, BSONObjBuilder& c, bool d) {
try {
rwlock_try_write lk(mutex);
- return _run(a,b,errmsg,c,d);
+ return _run(a,b,e,errmsg,c,d);
}
catch(rwlock_try_write::exception&) { }
errmsg = "a replSetReconfig is already in progress";
return false;
}
private:
- bool _run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool _run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ if ( !checkAuth(errmsg, result) ) {
+ return false;
+ }
+
if( cmdObj["replSetReconfig"].type() != Object ) {
errmsg = "no configuration specified";
return false;
@@ -209,7 +217,7 @@ namespace mongo {
}
CmdReplSetFreeze() : ReplSetCommand("replSetFreeze") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
int secs = (int) cmdObj.firstElement().numberInt();
@@ -233,7 +241,7 @@ namespace mongo {
}
CmdReplSetStepDown() : ReplSetCommand("replSetStepDown") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
if( !check(errmsg, result) )
return false;
if( !theReplSet->box.getState().primary() ) {
@@ -252,19 +260,19 @@ namespace mongo {
long long int diff = lastOp - closest;
result.append("closest", closest);
result.append("difference", diff);
-
+
if (diff < 0) {
// not our problem, but we'll wait until thing settle down
errmsg = "someone is ahead of the primary?";
return false;
}
-
+
if (diff > 10) {
errmsg = "no secondaries within 10 seconds of my optime";
return false;
}
}
-
+
int secs = (int) cmdObj.firstElement().numberInt();
if( secs == 0 )
secs = 60;
diff --git a/db/repl/rs.cpp b/db/repl/rs.cpp
index 84b92fe9297..243e087eff1 100644
--- a/db/repl/rs.cpp
+++ b/db/repl/rs.cpp
@@ -24,9 +24,12 @@
#include "rs.h"
#include "connections.h"
#include "../repl.h"
+#include "../instance.h"
-namespace mongo {
+using namespace std;
+namespace mongo {
+
using namespace bson;
bool replSet = false;
@@ -60,18 +63,43 @@ namespace mongo {
}
void ReplSetImpl::assumePrimary() {
- log(2) << "assuming primary" << endl;
+ LOG(2) << "replSet assuming primary" << endl;
assert( iAmPotentiallyHot() );
writelock lk("admin."); // so we are synchronized with _logOp()
+
+ // Make sure that new OpTimes are higher than existing ones even with clock skew
+ DBDirectClient c;
+ BSONObj lastOp = c.findOne( "local.oplog.rs", Query().sort(reverseNaturalObj), NULL, QueryOption_SlaveOk );
+ if ( !lastOp.isEmpty() ) {
+ OpTime::setLast( lastOp[ "ts" ].date() );
+ }
+
changeState(MemberState::RS_PRIMARY);
}
void ReplSetImpl::changeState(MemberState s) { box.change(s, _self); }
+ void ReplSetImpl::setMaintenanceMode(const bool inc) {
+ lock lk(this);
+
+ if (inc) {
+ log() << "replSet going into maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog;
+
+ _maintenanceMode++;
+ changeState(MemberState::RS_RECOVERING);
+ }
+ else {
+ _maintenanceMode--;
+ // no need to change state, syncTail will try to go live as a secondary soon
+
+ log() << "leaving maintenance mode (" << _maintenanceMode << " other tasks)" << rsLog;
+ }
+ }
+
Member* ReplSetImpl::getMostElectable() {
lock lk(this);
-
- Member *max = 0;
+
+ Member *max = 0;
for (set<unsigned>::iterator it = _electableSet.begin(); it != _electableSet.end(); it++) {
const Member *temp = findById(*it);
@@ -91,7 +119,7 @@ namespace mongo {
const bool closeOnRelinquish = true;
void ReplSetImpl::relinquish() {
- log(2) << "attempting to relinquish" << endl;
+ LOG(2) << "replSet attempting to relinquish" << endl;
if( box.getState().primary() ) {
{
writelock lk("admin."); // so we are synchronized with _logOp()
@@ -239,7 +267,7 @@ namespace mongo {
if( myConfig().arbiterOnly )
b.append("arbiterOnly", true);
- if( myConfig().priority == 0 )
+ if( myConfig().priority == 0 && !myConfig().arbiterOnly)
b.append("passive", true);
if( myConfig().slaveDelay )
b.append("slaveDelay", myConfig().slaveDelay);
@@ -296,8 +324,10 @@ namespace mongo {
_currentSyncTarget(0),
_hbmsgTime(0),
_self(0),
+ _maintenanceMode(0),
mgr( new Manager(this) ),
ghost( new GhostSync(this) ) {
+
_cfg = 0;
memset(_hbmsg, 0, sizeof(_hbmsg));
strcpy( _hbmsg , "initial startup" );
@@ -306,7 +336,7 @@ namespace mongo {
_seeds = &replSetCmdline.seeds;
- log(1) << "replSet beginning startup..." << rsLog;
+ LOG(1) << "replSet beginning startup..." << rsLog;
loadConfig();
@@ -317,7 +347,7 @@ namespace mongo {
for( set<HostAndPort>::iterator i = replSetCmdline.seedSet.begin(); i != replSetCmdline.seedSet.end(); i++ ) {
if( i->isSelf() ) {
if( sss == 1 )
- log(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog;
+ LOG(1) << "replSet warning self is listed in the seed list and there are no other seeds listed did you intend that?" << rsLog;
}
else
log() << "replSet warning command line seed " << i->toString() << " is not present in the current repl set config" << rsLog;
@@ -382,7 +412,7 @@ namespace mongo {
getLastErrorDefault = new BSONObj( c.getLastErrorDefaults );
}
- list<const ReplSetConfig::MemberCfg*> newOnes;
+ list<ReplSetConfig::MemberCfg*> newOnes;
// additive short-cuts the new config setup. If we are just adding a
// node/nodes and nothing else is changing, this is additive. If it's
// not a reconfig, we're not adding anything
@@ -391,8 +421,8 @@ namespace mongo {
unsigned nfound = 0;
int me = 0;
for( vector<ReplSetConfig::MemberCfg>::iterator i = c.members.begin(); i != c.members.end(); i++ ) {
- const ReplSetConfig::MemberCfg& m = *i;
+ ReplSetConfig::MemberCfg& m = *i;
if( m.h.isSelf() ) {
me++;
}
@@ -443,8 +473,8 @@ namespace mongo {
// this is a shortcut for simple changes
if( additive ) {
log() << "replSet info : additive change to configuration" << rsLog;
- for( list<const ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) {
- const ReplSetConfig::MemberCfg* m = *i;
+ for( list<ReplSetConfig::MemberCfg*>::const_iterator i = newOnes.begin(); i != newOnes.end(); i++ ) {
+ ReplSetConfig::MemberCfg *m = *i;
Member *mi = new Member(m->h, m->_id, m, false);
/** we will indicate that new members are up() initially so that we don't relinquish our
@@ -456,6 +486,11 @@ namespace mongo {
_members.push(mi);
startHealthTaskFor(mi);
}
+
+ // if we aren't creating new members, we may have to update the
+ // groups for the current ones
+ _cfg->updateMembers(_members);
+
return true;
}
@@ -479,7 +514,7 @@ namespace mongo {
string members = "";
for( vector<ReplSetConfig::MemberCfg>::iterator i = _cfg->members.begin(); i != _cfg->members.end(); i++ ) {
- const ReplSetConfig::MemberCfg& m = *i;
+ ReplSetConfig::MemberCfg& m = *i;
Member *mi;
members += ( members == "" ? "" : ", " ) + m.h.toString();
if( m.h.isSelf() ) {
@@ -594,7 +629,7 @@ namespace mongo {
if( ++once == 1 )
log() << "replSet info you may need to run replSetInitiate -- rs.initiate() in the shell -- if that is not already done" << rsLog;
if( _seeds->size() == 0 )
- log(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog;
+ LOG(1) << "replSet info no seed hosts were specified on the --replSet command line" << rsLog;
}
else {
startupStatus = EMPTYUNREACHABLE;
diff --git a/db/repl/rs.h b/db/repl/rs.h
index 7654597a930..14c630d27a2 100644
--- a/db/repl/rs.h
+++ b/db/repl/rs.h
@@ -58,10 +58,11 @@ namespace mongo {
~Member(); // intentionally unimplemented as should never be called -- see List1<>::Base.
Member(const Member&);
public:
- Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self);
+ Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self);
string fullName() const { return h().toString(); }
const ReplSetConfig::MemberCfg& config() const { return _config; }
+ ReplSetConfig::MemberCfg& configw() { return _config; }
const HeartbeatInfo& hbinfo() const { return _hbinfo; }
HeartbeatInfo& get_hbinfo() { return _hbinfo; }
string lhb() const { return _hbinfo.lastHeartbeatMsg; }
@@ -74,7 +75,7 @@ namespace mongo {
private:
friend class ReplSetImpl;
- const ReplSetConfig::MemberCfg _config;
+ ReplSetConfig::MemberCfg _config;
const HostAndPort _h;
HeartbeatInfo _hbinfo;
};
@@ -242,13 +243,19 @@ namespace mongo {
const Member *primary;
};
const SP get() {
- scoped_lock lk(m);
+ rwlock lk(m, false);
return sp;
}
- MemberState getState() const { return sp.state; }
- const Member* getPrimary() const { return sp.primary; }
+ MemberState getState() const {
+ rwlock lk(m, false);
+ return sp.state;
+ }
+ const Member* getPrimary() const {
+ rwlock lk(m, false);
+ return sp.primary;
+ }
void change(MemberState s, const Member *self) {
- scoped_lock lk(m);
+ rwlock lk(m, true);
if( sp.state != s ) {
log() << "replSet " << s.toString() << rsLog;
}
@@ -262,24 +269,25 @@ namespace mongo {
}
}
void set(MemberState s, const Member *p) {
- scoped_lock lk(m);
- sp.state = s; sp.primary = p;
+ rwlock lk(m, true);
+ sp.state = s;
+ sp.primary = p;
}
void setSelfPrimary(const Member *self) { change(MemberState::RS_PRIMARY, self); }
void setOtherPrimary(const Member *mem) {
- scoped_lock lk(m);
+ rwlock lk(m, true);
assert( !sp.state.primary() );
sp.primary = mem;
}
void noteRemoteIsPrimary(const Member *remote) {
- scoped_lock lk(m);
+ rwlock lk(m, true);
if( !sp.state.secondary() && !sp.state.fatal() )
sp.state = MemberState::RS_RECOVERING;
sp.primary = remote;
}
StateBox() : m("StateBox") { }
private:
- mongo::mutex m;
+ RWLock m;
SP sp;
};
@@ -446,11 +454,20 @@ namespace mongo {
List1<Member> _members; // all members of the set EXCEPT _self.
ReplSetConfig::MemberCfg _config; // config of _self
unsigned _id; // _id of _self
+
+ int _maintenanceMode; // if we should stay in recovering state
public:
// this is called from within a writelock in logOpRS
unsigned selfId() const { return _id; }
Manager *mgr;
GhostSync *ghost;
+ /**
+ * This forces a secondary to go into recovering state and stay there
+ * until this is called again, passing in "false". Multiple threads can
+ * call this and it will leave maintenance mode once all of the callers
+ * have called it again, passing in false.
+ */
+ void setMaintenanceMode(const bool inc);
private:
Member* head() const { return _members.head(); }
public:
@@ -553,11 +570,29 @@ namespace mongo {
virtual bool logTheOp() { return false; }
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream &help ) const { help << "internal"; }
+
+ /**
+ * Some replica set commands call this and then call check(). This is
+ * intentional, as they might do things before theReplSet is initialized
+ * that still need to be checked for auth.
+ */
+ bool checkAuth(string& errmsg, BSONObjBuilder& result) {
+ if( !noauth && adminOnly() ) {
+ AuthenticationInfo *ai = cc().getAuthenticationInfo();
+ if (!ai->isAuthorizedForLock("admin", locktype())) {
+ errmsg = "replSet command unauthorized";
+ return false;
+ }
+ }
+ return true;
+ }
+
bool check(string& errmsg, BSONObjBuilder& result) {
if( !replSet ) {
errmsg = "not running with --replSet";
return false;
}
+
if( theReplSet == 0 ) {
result.append("startupStatus", ReplSet::startupStatus);
string s;
@@ -566,7 +601,8 @@ namespace mongo {
result.append("info", "run rs.initiate(...) if not yet done for the set");
return false;
}
- return true;
+
+ return checkAuth(errmsg, result);
}
};
@@ -578,7 +614,7 @@ namespace mongo {
/** inlines ----------------- */
- inline Member::Member(HostAndPort h, unsigned ord, const ReplSetConfig::MemberCfg *c, bool self) :
+ inline Member::Member(HostAndPort h, unsigned ord, ReplSetConfig::MemberCfg *c, bool self) :
_config(*c), _h(h), _hbinfo(ord) {
assert(c);
if( self )
diff --git a/db/repl/rs_config.cpp b/db/repl/rs_config.cpp
index 4d6c7b59bba..745d60b537c 100644
--- a/db/repl/rs_config.cpp
+++ b/db/repl/rs_config.cpp
@@ -83,14 +83,24 @@ namespace mongo {
if( hidden ) b << "hidden" << hidden;
if( !buildIndexes ) b << "buildIndexes" << buildIndexes;
if( !tags.empty() ) {
- BSONArrayBuilder a;
- for( set<string>::const_iterator i = tags.begin(); i != tags.end(); i++ )
- a.append(*i);
- b.appendArray("tags", a.done());
+ BSONObjBuilder a;
+ for( map<string,string>::const_iterator i = tags.begin(); i != tags.end(); i++ )
+ a.append((*i).first, (*i).second);
+ b.append("tags", a.done());
}
return b.obj();
}
+ void ReplSetConfig::updateMembers(List1<Member> &dest) {
+ for (vector<MemberCfg>::iterator source = members.begin(); source < members.end(); source++) {
+ for( Member *d = dest.head(); d; d = d->next() ) {
+ if (d->fullName() == (*source).h.toString()) {
+ d->configw().groupsw() = (*source).groups();
+ }
+ }
+ }
+ }
+
bo ReplSetConfig::asBson() const {
bob b;
b.append("_id", _id).append("version", version);
@@ -307,85 +317,39 @@ namespace mongo {
}
void ReplSetConfig::_populateTagMap(map<string,TagClause> &tagMap) {
- // stage 1: create subgroups for each server corresponding to each of
- // its tags. If a server has three tags, we want it to end up in three
- // subgroups, e.g.: A is tagged with ["A", "dc.ny", "m"]. At the end of
- // this step, tagMap will contain:
- // "A" => {"A.A" : A}
- // "dc.ny" => {"dc.ny.A" : A}
- // "m" => {"m.A" : A}
- // If we have more than one server with the same tag, we end up with
- // something like "x.y.z" => [{"x.y.z.A" : A},{"x.y.z.B" : B}] (if A
- // and B were tagged with "x.y.z").
+ // create subgroups for each server corresponding to each of
+ // its tags. E.g.:
+ //
+ // A is tagged with {"server" : "A", "dc" : "ny"}
+ // B is tagged with {"server" : "B", "dc" : "ny"}
+ //
+ // At the end of this step, tagMap will contain:
+ //
+ // "server" => {"A" : [A], "B" : [B]}
+ // "dc" => {"ny" : [A,B]}
+
for (unsigned i=0; i<members.size(); i++) {
MemberCfg member = members[i];
- for (set<string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) {
- TagClause& clause = tagMap[*tag];
- clause.name = *tag;
+ for (map<string,string>::iterator tag = member.tags.begin(); tag != member.tags.end(); tag++) {
+ string label = (*tag).first;
+ string value = (*tag).second;
- // we also populate the map, to be used by step 2... I think
- // this is correct, as step 2 condenses the groups anyway
- string perServerName = *tag+"."+members[i].h.toString();
+ TagClause& clause = tagMap[label];
+ clause.name = label;
TagSubgroup* subgroup;
- if (clause.subgroups.find(perServerName) == clause.subgroups.end()) {
- clause.subgroups[perServerName] = subgroup = new TagSubgroup(perServerName);
+ // search for "ny" in "dc"'s clause
+ if (clause.subgroups.find(value) == clause.subgroups.end()) {
+ clause.subgroups[value] = subgroup = new TagSubgroup(value);
}
else {
- subgroup = clause.subgroups[perServerName];
+ subgroup = clause.subgroups[value];
}
subgroup->m.insert(&members[i]);
}
}
-
- // stage 2: generate all parent tags. If we have "x.y.z", this
- // generates "x.y" and "x" and creates a map for each clause, e.g.,
- // "x"'s clause might have a map that looks like:
- // "x.y" => {A, B} {C}
- // "x.w" => {D} {E, F}
- for (map<string,TagClause>::iterator baseClause = tagMap.begin(); baseClause != tagMap.end(); baseClause++) {
- string prevPrefix = (*baseClause).first;
- const char *dot = strrchr(prevPrefix.c_str(), '.');
-
- while (dot) {
- // get x.y
- string xyTag = string(prevPrefix.c_str(), dot - prevPrefix.c_str());
- log(1) << "generating tag " << xyTag << rsLog;
- TagClause& xyClause = tagMap[xyTag];
- xyClause.name = xyTag;
-
- // get all of x.y.z's subgroups, add them as a single subgroup of x.y
- TagSubgroup* condensedSubgroup;;
- if (xyClause.subgroups.find(prevPrefix) == xyClause.subgroups.end()) {
- // label this subgroup one higher than the current, e.g.,
- // "x.y.z" if we're creating the "x.y" clause
- condensedSubgroup = new TagSubgroup(prevPrefix);
- xyClause.subgroups[prevPrefix] = condensedSubgroup;
- }
- else {
- condensedSubgroup = xyClause.subgroups[prevPrefix];
- assert(condensedSubgroup->name == prevPrefix);
- }
-
- TagClause& xyzClause = tagMap[prevPrefix];
-
- for (map<string,TagSubgroup*>::iterator xyzSubgroup = xyzClause.subgroups.begin();
- xyzSubgroup != xyzClause.subgroups.end(); xyzSubgroup++) {
- for (set<MemberCfg*>::const_iterator xyzMember = (*xyzSubgroup).second->m.begin();
- xyzMember != (*xyzSubgroup).second->m.end(); xyzMember++) {
- condensedSubgroup->m.insert(*xyzMember);
- // we'll link the member back with the group later, to
- // avoid creating extra link-backs
- }
- }
-
- // advance: if we were handling "x.y", now do "x"
- prevPrefix = xyTag;
- dot = strrchr(prevPrefix.c_str(), '.');
- }
- }
}
void ReplSetConfig::parseRules(const BSONObj& modes) {
@@ -442,7 +406,7 @@ namespace mongo {
for (set<MemberCfg *>::iterator cfg = (*sgs).second->m.begin();
!foundMe && cfg != (*sgs).second->m.end(); cfg++) {
- (*cfg)->groupsw(this).insert((*sgs).second);
+ (*cfg)->groupsw().insert((*sgs).second);
}
}
@@ -463,7 +427,7 @@ namespace mongo {
}
// if we got here, this is a valid rule
- log(1) << "new rule " << rule.fieldName() << ": " << r->toString() << rsLog;
+ LOG(1) << "replSet new rule " << rule.fieldName() << ": " << r->toString() << rsLog;
rules[rule.fieldName()] = r;
}
}
@@ -532,9 +496,10 @@ namespace mongo {
if( mobj.hasElement("votes") )
m.votes = (unsigned) mobj["votes"].Number();
if( mobj.hasElement("tags") ) {
- vector<BSONElement> v = mobj["tags"].Array();
- for( unsigned i = 0; i < v.size(); i++ )
- m.tags.insert( v[i].String() );
+ const BSONObj &t = mobj["tags"].Obj();
+ for (BSONObj::iterator c = t.begin(); c.more(); c.next()) {
+ m.tags[(*c).fieldName()] = (*c).String();
+ }
}
m.check();
}
diff --git a/db/repl/rs_config.h b/db/repl/rs_config.h
index d9c9d97ed4d..4e0d1e862c0 100644
--- a/db/repl/rs_config.h
+++ b/db/repl/rs_config.h
@@ -25,7 +25,7 @@
#include "health.h"
namespace mongo {
-
+ class Member;
const string rsConfigNs = "local.system.replset";
class ReplSetConfig {
@@ -61,15 +61,14 @@ namespace mongo {
int slaveDelay; /* seconds. int rather than unsigned for convenient to/front bson conversion. */
bool hidden; /* if set, don't advertise to drives in isMaster. for non-primaries (priority 0) */
bool buildIndexes; /* if false, do not create any non-_id indexes */
- set<string> tags; /* tagging for data center, rack, etc. */
+ map<string,string> tags; /* tagging for data center, rack, etc. */
private:
set<TagSubgroup*> _groups; // the subgroups this member belongs to
public:
const set<TagSubgroup*>& groups() const {
return _groups;
}
- set<TagSubgroup*>& groupsw(ReplSetConfig *c) {
- assert(!c->_constructed);
+ set<TagSubgroup*>& groupsw() {
return _groups;
}
void check() const; /* check validity, assert if not. */
@@ -114,6 +113,11 @@ namespace mongo {
void saveConfigLocally(BSONObj comment); // to local db
string saveConfigEverywhere(); // returns textual info on what happened
+ /**
+ * Update members' groups when the config changes but members stay the same.
+ */
+ void updateMembers(List1<Member> &dest);
+
BSONObj asBson() const;
bool _constructed;
diff --git a/db/repl/rs_initialsync.cpp b/db/repl/rs_initialsync.cpp
index 814bb1d0bf8..142878ab478 100644
--- a/db/repl/rs_initialsync.cpp
+++ b/db/repl/rs_initialsync.cpp
@@ -75,7 +75,7 @@ namespace mongo {
if( d && d->stats.nrecords == 0 )
return; // already empty, ok.
- log(1) << "replSet empty oplog" << rsLog;
+ LOG(1) << "replSet empty oplog" << rsLog;
d->emptyCappedCollection(rsoplog);
}
@@ -85,6 +85,7 @@ namespace mongo {
// find the member with the lowest ping time that has more data than me
for (Member *m = _members.head(); m; m = m->next()) {
if (m->hbinfo().up() &&
+ HeartbeatInfo::numPings > config().members.size()*2 &&
(m->state() == MemberState::RS_PRIMARY ||
(m->state() == MemberState::RS_SECONDARY && m->hbinfo().opTime > lastOpTimeWritten)) &&
(!closest || m->hbinfo().ping < closest->hbinfo().ping)) {
diff --git a/db/repl/rs_initiate.cpp b/db/repl/rs_initiate.cpp
index 5dd0ab23d24..0a796e1e445 100644
--- a/db/repl/rs_initiate.cpp
+++ b/db/repl/rs_initiate.cpp
@@ -150,7 +150,7 @@ namespace mongo {
h << "Initiate/christen a replica set.";
h << "\nhttp://www.mongodb.org/display/DOCS/Replica+Set+Commands";
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
log() << "replSet replSetInitiate admin command received from client" << rsLog;
if( !replSet ) {
diff --git a/db/repl/rs_member.h b/db/repl/rs_member.h
index 8e5a8ad9da3..d60bb5261e9 100644
--- a/db/repl/rs_member.h
+++ b/db/repl/rs_member.h
@@ -80,7 +80,8 @@ namespace mongo {
DiagStr lastHeartbeatMsg;
OpTime opTime;
int skew;
- unsigned int ping; // microseconds
+ unsigned int ping; // milliseconds
+ static unsigned int numPings;
bool up() const { return health > 0; }
diff --git a/db/repl/rs_rollback.cpp b/db/repl/rs_rollback.cpp
index 67d6cc26f07..cce5c091074 100644
--- a/db/repl/rs_rollback.cpp
+++ b/db/repl/rs_rollback.cpp
@@ -574,7 +574,7 @@ namespace mongo {
sethbmsg("rollback 6");
// clean up oplog
- log(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
+ LOG(2) << "replSet rollback truncate oplog after " << h.commonPoint.toStringPretty() << rsLog;
// todo: fatal error if this throws?
oplogDetails->cappedTruncateAfter(rsoplog, h.commonPointOurDiskloc, false);
diff --git a/db/repl/rs_sync.cpp b/db/repl/rs_sync.cpp
index 95bbe2040a6..5fe3075c0f7 100644
--- a/db/repl/rs_sync.cpp
+++ b/db/repl/rs_sync.cpp
@@ -188,6 +188,16 @@ namespace mongo {
*/
bool ReplSetImpl::tryToGoLiveAsASecondary(OpTime& /*out*/ minvalid) {
bool golive = false;
+
+ {
+ lock lk( this );
+
+ if (_maintenanceMode > 0) {
+ // we're not actually going live
+ return true;
+ }
+ }
+
{
readlock lk("local.replset.minvalid");
BSONObj mv;
@@ -211,7 +221,7 @@ namespace mongo {
BSONObj remoteOldestOp = r.findOne(rsoplog, Query());
OpTime ts = remoteOldestOp["ts"]._opTime();
DEV log() << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
- else log(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
+ else LOG(3) << "replSet remoteOldestOp: " << ts.toStringLong() << rsLog;
DEV {
log() << "replSet lastOpTimeWritten: " << lastOpTimeWritten.toStringLong() << rsLog;
log() << "replSet our state: " << state().toString() << rsLog;
@@ -251,7 +261,7 @@ namespace mongo {
assert(r.conn() == 0);
if( !r.connect(hn) ) {
- log(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
+ LOG(2) << "replSet can't connect to " << hn << " to read operations" << rsLog;
r.resetConnection();
return false;
}
@@ -407,7 +417,7 @@ namespace mongo {
}
- {
+ try {
writelock lk("");
/* if we have become primary, we dont' want to apply things from elsewhere
@@ -421,11 +431,16 @@ namespace mongo {
syncApply(o);
_logOpObjRS(o); // with repl sets we write the ops to our oplog too
}
+ catch (DBException& e) {
+ sethbmsg(str::stream() << "syncTail: " << e.toString() << ", syncing: " << o);
+ sleepsecs(30);
+ return;
+ }
}
}
r.tailCheck();
if( !r.haveCursor() ) {
- log(1) << "replSet end syncTail pass with " << hn << rsLog;
+ LOG(1) << "replSet end syncTail pass with " << hn << rsLog;
// TODO : reuse our connection to the primary.
return;
}
@@ -475,9 +490,7 @@ namespace mongo {
_syncThread();
}
catch(DBException& e) {
- sethbmsg(str::stream() << "syncThread: " << e.toString() <<
- ", try 'use local; db.oplog.rs.findOne({ts : {$gt : new Timestamp(" <<
- lastOpTimeWritten.getSecs() << "000," << lastOpTimeWritten.getInc() << ")}});' on the primary");
+ sethbmsg(str::stream() << "syncThread: " << e.toString());
sleepsecs(10);
}
catch(...) {
@@ -580,7 +593,7 @@ namespace mongo {
// the target might end up with a new Member, but s.slave never
// changes so we'll compare the names
|| target == slave->slave || target->fullName() == slave->slave->fullName()) {
- log(1) << "replica set ghost target no good" << endl;
+ LOG(1) << "replica set ghost target no good" << endl;
return;
}
@@ -593,8 +606,7 @@ namespace mongo {
slave->reader.ghostQueryGTE(rsoplog, last);
}
- log(1) << "last: " << slave->last.toString() << " to " << last.toString() << rsLog;
-
+ LOG(1) << "replSet last: " << slave->last.toString() << " to " << last.toString() << rsLog;
if (slave->last > last) {
return;
}
@@ -608,11 +620,11 @@ namespace mongo {
BSONObj o = slave->reader.nextSafe();
slave->last = o["ts"]._opTime();
}
- log(2) << "now last is " << slave->last.toString() << rsLog;
+ LOG(2) << "now last is " << slave->last.toString() << rsLog;
}
catch (DBException& e) {
// we'll be back
- log(2) << "replSet ghost sync error: " << e.what() << " for "
+ LOG(2) << "replSet ghost sync error: " << e.what() << " for "
<< slave->slave->fullName() << rsLog;
slave->reader.resetConnection();
}
diff --git a/db/scanandorder.cpp b/db/scanandorder.cpp
new file mode 100644
index 00000000000..efa9c8d7f13
--- /dev/null
+++ b/db/scanandorder.cpp
@@ -0,0 +1,93 @@
+/* scanandorder.cpp
+ Order results (that aren't already indexes and in order.)
+*/
+
+/**
+ * Copyright (C) 2008 10gen Inc.
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License, version 3,
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Affero General Public License for more details.
+ *
+ * You should have received a copy of the GNU Affero General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "pch.h"
+#include "scanandorder.h"
+
+namespace mongo {
+
+ const unsigned ScanAndOrder::MaxScanAndOrderBytes = 32 * 1024 * 1024;
+
+ void ScanAndOrder::_add(BSONObj& k, BSONObj o, DiskLoc* loc) {
+ if (!loc) {
+ _best.insert(make_pair(k.getOwned(),o.getOwned()));
+ }
+ else {
+ BSONObjBuilder b;
+ b.appendElements(o);
+ b.append("$diskLoc", loc->toBSONObj());
+ _best.insert(make_pair(k.getOwned(), b.obj().getOwned()));
+ }
+ }
+
+ void ScanAndOrder::_addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) {
+ /* todo : we don't correct _approxSize here. */
+ const BSONObj& worstBestKey = i->first;
+ int c = worstBestKey.woCompare(k, _order._spec.keyPattern);
+ if ( c > 0 ) {
+ // k is better, 'upgrade'
+ _best.erase(i);
+ _add(k, o, loc);
+ }
+ }
+
+
+ void ScanAndOrder::add(BSONObj o, DiskLoc* loc) {
+ assert( o.isValid() );
+ BSONObj k = _order.getKeyFromObject(o);
+ if ( k.isEmpty() ) {
+ return;
+ }
+ if ( (int) _best.size() < _limit ) {
+ _approxSize += k.objsize();
+ _approxSize += o.objsize();
+
+ /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */
+ uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < MaxScanAndOrderBytes );
+
+ _add(k, o, loc);
+ return;
+ }
+ BestMap::iterator i;
+ assert( _best.end() != _best.begin() );
+ i = _best.end();
+ i--;
+ _addIfBetter(k, o, i, loc);
+ }
+
+
+ void ScanAndOrder::fill(BufBuilder& b, Projection *filter, int& nout ) const {
+ int n = 0;
+ int nFilled = 0;
+ for ( BestMap::const_iterator i = _best.begin(); i != _best.end(); i++ ) {
+ n++;
+ if ( n <= _startFrom )
+ continue;
+ const BSONObj& o = i->second;
+ fillQueryResultFromObj(b, filter, o);
+ nFilled++;
+ if ( nFilled >= _limit )
+ break;
+ uassert( 10129 , "too much data for sort() with no index", b.len() < (int)MaxScanAndOrderBytes ); // appserver limit
+ }
+ nout = nFilled;
+ }
+
+} // namespace mongo
diff --git a/db/scanandorder.h b/db/scanandorder.h
index 2957ae60245..33e76f61f67 100644
--- a/db/scanandorder.h
+++ b/db/scanandorder.h
@@ -22,6 +22,7 @@
#include "indexkey.h"
#include "queryutil.h"
+#include "projection.h"
namespace mongo {
@@ -76,30 +77,9 @@ namespace mongo {
typedef multimap<BSONObj,BSONObj,BSONObjCmp> BestMap;
class ScanAndOrder {
- void _add(BSONObj& k, BSONObj o, DiskLoc* loc) {
- if (!loc) {
- _best.insert(make_pair(k.getOwned(),o.getOwned()));
- }
- else {
- BSONObjBuilder b;
- b.appendElements(o);
- b.append("$diskLoc", loc->toBSONObj());
- _best.insert(make_pair(k.getOwned(), b.obj().getOwned()));
- }
- }
-
- void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc) {
- /* todo : we don't correct _approxSize here. */
- const BSONObj& worstBestKey = i->first;
- int c = worstBestKey.woCompare(k, _order._spec.keyPattern);
- if ( c > 0 ) {
- // k is better, 'upgrade'
- _best.erase(i);
- _add(k, o, loc);
- }
- }
-
public:
+ static const unsigned MaxScanAndOrderBytes;
+
ScanAndOrder(int startFrom, int limit, BSONObj order, const FieldRangeSet &frs) :
_best( BSONObjCmp( order ) ),
_startFrom(startFrom), _order(order, frs) {
@@ -107,60 +87,25 @@ namespace mongo {
_approxSize = 0;
}
- int size() const {
- return _best.size();
- }
-
- void add(BSONObj o, DiskLoc* loc) {
- assert( o.isValid() );
- BSONObj k = _order.getKeyFromObject(o);
- if ( k.isEmpty() ) {
- return;
- }
- if ( (int) _best.size() < _limit ) {
- _approxSize += k.objsize();
- _approxSize += o.objsize();
-
- /* note : adjust when bson return limit adjusts. note this limit should be a bit higher. */
- uassert( 10128 , "too much data for sort() with no index. add an index or specify a smaller limit", _approxSize < 32 * 1024 * 1024 );
-
- _add(k, o, loc);
- return;
- }
- BestMap::iterator i;
- assert( _best.end() != _best.begin() );
- i = _best.end();
- i--;
- _addIfBetter(k, o, i, loc);
- }
+ int size() const { return _best.size(); }
- void _fill(BufBuilder& b, Projection *filter, int& nout, BestMap::iterator begin, BestMap::iterator end) {
- int n = 0;
- int nFilled = 0;
- for ( BestMap::iterator i = begin; i != end; i++ ) {
- n++;
- if ( n <= _startFrom )
- continue;
- BSONObj& o = i->second;
- fillQueryResultFromObj(b, filter, o);
- nFilled++;
- if ( nFilled >= _limit )
- break;
- uassert( 10129 , "too much data for sort() with no index", b.len() < 4000000 ); // appserver limit
- }
- nout = nFilled;
- }
+ void add(BSONObj o, DiskLoc* loc);
/* scanning complete. stick the query result in b for n objects. */
- void fill(BufBuilder& b, Projection *filter, int& nout) {
- _fill(b, filter, nout, _best.begin(), _best.end());
- }
-
+ void fill(BufBuilder& b, Projection *filter, int& nout ) const;
+
+ private:
+
+ void _add(BSONObj& k, BSONObj o, DiskLoc* loc);
+
+ void _addIfBetter(BSONObj& k, BSONObj o, BestMap::iterator i, DiskLoc* loc);
+
BestMap _best; // key -> full object
int _startFrom;
int _limit; // max to send back.
KeyType _order;
unsigned _approxSize;
+
};
} // namespace mongo
diff --git a/db/security.cpp b/db/security.cpp
index 4a6f32600aa..b57326a8233 100644
--- a/db/security.cpp
+++ b/db/security.cpp
@@ -30,7 +30,7 @@
namespace mongo {
bool AuthenticationInfo::_warned = false;
-
+ /*
void AuthenticationInfo::print() const {
cout << "AuthenticationInfo: " << this << '\n';
for ( MA::const_iterator i=_dbs.begin(); i!=_dbs.end(); i++ ) {
@@ -38,7 +38,7 @@ namespace mongo {
}
cout << "END" << endl;
}
-
+ */
string AuthenticationInfo::getUser( const string& dbname ) const {
scoped_spinlock lk(_lock);
@@ -78,9 +78,9 @@ namespace mongo {
pwd = internalSecurity.pwd;
}
else {
- static BSONObj userPattern = fromjson("{\"user\":1}");
+ // static BSONObj userPattern = fromjson("{\"user\":1}");
string systemUsers = dbname + ".system.users";
- OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1");
+ // OCCASIONALLY Helpers::ensureIndex(systemUsers.c_str(), userPattern, false, "user_1");
{
BSONObjBuilder b;
b << "user" << user;
@@ -107,7 +107,7 @@ namespace mongo {
}
}
- bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
AuthenticationInfo *ai = cc().getAuthenticationInfo();
ai->logout(dbname);
return true;
diff --git a/db/security.h b/db/security.h
index 2937ef29f80..2937ef29f80 100644..100755
--- a/db/security.h
+++ b/db/security.h
diff --git a/db/security_commands.cpp b/db/security_commands.cpp
index 16face7fc32..2db96802404 100644
--- a/db/security_commands.cpp
+++ b/db/security_commands.cpp
@@ -56,7 +56,7 @@ namespace mongo {
void help(stringstream& h) const { h << "internal"; }
virtual LockType locktype() const { return NONE; }
CmdGetNonce() : Command("getnonce") {}
- bool run(const string&, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string&, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
nonce64 *n = new nonce64(Security::getNonce());
stringstream ss;
ss << hex << *n;
@@ -68,7 +68,7 @@ namespace mongo {
CmdLogout cmdLogout;
- bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdAuthenticate::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
log() << " authenticate: " << cmdObj << endl;
string user = cmdObj.getStringField("user");
diff --git a/db/security_common.h b/db/security_common.h
index 3af70cc7b97..2f2565f3ce0 100644
--- a/db/security_common.h
+++ b/db/security_common.h
@@ -57,10 +57,10 @@ namespace mongo {
virtual bool slaveOk() const {
return true;
}
- virtual LockType locktype() const { return WRITE; }
+ virtual LockType locktype() const { return READ; }
virtual void help(stringstream& ss) const { ss << "internal"; }
CmdAuthenticate() : Command("authenticate") {}
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl);
+ bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl);
private:
bool getUserObj(const string& dbname, const string& user, BSONObj& userObj, string& pwd);
void authenticate(const string& dbname, const string& user, const bool readOnly);
@@ -77,7 +77,7 @@ namespace mongo {
void help(stringstream& h) const { h << "de-authenticate"; }
virtual LockType locktype() const { return NONE; }
CmdLogout() : Command("logout") {}
- bool run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl);
+ bool run(const string& dbname , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl);
};
} // namespace mongo
diff --git a/db/stats/top.cpp b/db/stats/top.cpp
index 51a270c8c8c..f5b6ee42f1c 100644
--- a/db/stats/top.cpp
+++ b/db/stats/top.cpp
@@ -156,7 +156,7 @@ namespace mongo {
virtual LockType locktype() const { return READ; }
virtual void help( stringstream& help ) const { help << "usage by collection, in micros "; }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
{
BSONObjBuilder b( result.subobjStart( "totals" ) );
b.append( "note" , "all times in microseconds" );
diff --git a/dbtests/basictests.cpp b/dbtests/basictests.cpp
index 299dc4352ad..80bd7d70892 100644
--- a/dbtests/basictests.cpp
+++ b/dbtests/basictests.cpp
@@ -26,6 +26,7 @@
#include "../util/queue.h"
#include "../util/paths.h"
#include "../util/stringutils.h"
+#include "../util/compress.h"
#include "../db/db.h"
namespace BasicTests {
@@ -411,6 +412,21 @@ namespace BasicTests {
ASSERT_EQUALS( -1 , lexNumCmp( "a.b.c.d0" , "a.b.c.d00" ) );
ASSERT_EQUALS( 1 , lexNumCmp( "a.b.c.0.y" , "a.b.c.00.x" ) );
+ ASSERT_EQUALS( -1, lexNumCmp( "a", "a-" ) );
+ ASSERT_EQUALS( 1, lexNumCmp( "a-", "a" ) );
+ ASSERT_EQUALS( 0, lexNumCmp( "a-", "a-" ) );
+
+ ASSERT_EQUALS( -1, lexNumCmp( "a", "a-c" ) );
+ ASSERT_EQUALS( 1, lexNumCmp( "a-c", "a" ) );
+ ASSERT_EQUALS( 0, lexNumCmp( "a-c", "a-c" ) );
+
+ ASSERT_EQUALS( 1, lexNumCmp( "a-c.t", "a.t" ) );
+ ASSERT_EQUALS( -1, lexNumCmp( "a.t", "a-c.t" ) );
+ ASSERT_EQUALS( 0, lexNumCmp( "a-c.t", "a-c.t" ) );
+
+ ASSERT_EQUALS( 1, lexNumCmp( "ac.t", "a.t" ) );
+ ASSERT_EQUALS( -1, lexNumCmp( "a.t", "ac.t" ) );
+ ASSERT_EQUALS( 0, lexNumCmp( "ac.t", "ac.t" ) );
}
};
@@ -596,6 +612,40 @@ namespace BasicTests {
}
};
+ class CmdLineParseConfigTest {
+ public:
+ void run() {
+ stringstream ss1;
+ istringstream iss1("");
+ CmdLine::parseConfigFile( iss1, ss1 );
+ stringstream ss2;
+ istringstream iss2("password=\'foo bar baz\'");
+ CmdLine::parseConfigFile( iss2, ss2 );
+ stringstream ss3;
+ istringstream iss3("\t this = false \n#that = true\n #another = whocares\n\n other = monkeys ");
+ CmdLine::parseConfigFile( iss3, ss3 );
+
+ ASSERT( ss1.str().compare("\n") == 0 );
+ ASSERT( ss2.str().compare("password=\'foo bar baz\'\n\n") == 0 );
+ ASSERT( ss3.str().compare("\n other = monkeys \n\n") == 0 );
+ }
+ };
+
+ struct CompressionTest1 {
+ void run() {
+ const char * c = "this is a test";
+ std::string s;
+ size_t len = compress(c, strlen(c)+1, &s);
+ assert( len > 0 );
+
+ std::string out;
+ bool ok = uncompress(s.c_str(), s.size(), &out);
+ assert(ok);
+ assert( strcmp(out.c_str(), c) == 0 );
+ }
+ } ctest1;
+
+
class All : public Suite {
public:
All() : Suite( "basic" ) {
@@ -632,6 +682,9 @@ namespace BasicTests {
add< HostAndPortTests >();
add< RelativePathTest >();
+ add< CmdLineParseConfigTest >();
+
+ add< CompressionTest1 >();
}
} myall;
diff --git a/dbtests/cursortests.cpp b/dbtests/cursortests.cpp
index 4d2de164165..cf661864b95 100644
--- a/dbtests/cursortests.cpp
+++ b/dbtests/cursortests.cpp
@@ -33,6 +33,7 @@ namespace CursorTests {
class Base {
protected:
+ static const char *ns() { return "unittests.cursortests.Base"; }
FieldRangeVector *vec( int *vals, int len, int direction = 1 ) {
FieldRangeSet s( "", BSON( "a" << 1 ), true );
for( int i = 0; i < len; i += 2 ) {
@@ -49,6 +50,7 @@ namespace CursorTests {
IndexSpec *idxSpec = new IndexSpec( BSON( "a" << 1 ) );
return new FieldRangeVector( s, *idxSpec, direction );
}
+ DBDirectClient _c;
private:
vector< BSONObj > _objs;
};
@@ -258,6 +260,29 @@ namespace CursorTests {
}
virtual BSONObj idx() const { return BSON( "a" << 1 << "b" << 1 ); }
};
+
+ class AbortImplicitScan : public Base {
+ public:
+ void run() {
+ dblock lk;
+ IndexSpec idx( BSON( "a" << 1 << "b" << 1 ) );
+ _c.ensureIndex( ns(), idx.keyPattern );
+ for( int i = 0; i < 300; ++i ) {
+ _c.insert( ns(), BSON( "a" << i << "b" << 5 ) );
+ }
+ FieldRangeSet frs( ns(), BSON( "b" << 3 ), true );
+ boost::shared_ptr<FieldRangeVector> frv( new FieldRangeVector( frs, idx, 1 ) );
+ Client::Context ctx( ns() );
+ scoped_ptr<BtreeCursor> c( BtreeCursor::make( nsdetails( ns() ), 1, nsdetails( ns() )->idx(1), frv, 1 ) );
+ int initialNscanned = c->nscanned();
+ ASSERT( initialNscanned < 200 );
+ ASSERT( c->ok() );
+ c->advance();
+ ASSERT( c->nscanned() > initialNscanned );
+ ASSERT( c->nscanned() < 200 );
+ ASSERT( c->ok() );
+ }
+ };
} // namespace BtreeCursorTests
@@ -274,6 +299,7 @@ namespace CursorTests {
add< BtreeCursorTests::EqIn >();
add< BtreeCursorTests::RangeEq >();
add< BtreeCursorTests::RangeIn >();
+ add< BtreeCursorTests::AbortImplicitScan >();
}
} myall;
} // namespace CursorTests
diff --git a/dbtests/directclienttests.cpp b/dbtests/directclienttests.cpp
index 5b3bde70889..860eb7e7e5c 100644
--- a/dbtests/directclienttests.cpp
+++ b/dbtests/directclienttests.cpp
@@ -84,7 +84,7 @@ namespace DirectClientTests {
ASSERT_EQUALS((int)client().count(ns), 1);
client().dropCollection(ns);
- client().insert(ns, objs, InsertOption_KeepGoing);
+ client().insert(ns, objs, InsertOption_ContinueOnError);
ASSERT_EQUALS(client().getLastErrorDetailed()["code"].numberInt(), 11000);
ASSERT_EQUALS((int)client().count(ns), 2);
}
diff --git a/dbtests/framework.cpp b/dbtests/framework.cpp
index 99fcad51d97..95ed8b33668 100644
--- a/dbtests/framework.cpp
+++ b/dbtests/framework.cpp
@@ -209,6 +209,7 @@ namespace mongo {
hidden_options.add_options()
("suites", po::value< vector<string> >(), "test suites to run")
+ ("nopreallocj", "disable journal prealloc")
;
positional_options.add("suites", -1);
@@ -247,6 +248,10 @@ namespace mongo {
cmdLine.dur = true;
}
+ if( params.count("nopreallocj") ) {
+ cmdLine.preallocj = false;
+ }
+
if (params.count("debug") || params.count("verbose") ) {
logLevel = 1;
}
diff --git a/dbtests/jsobjtests.cpp b/dbtests/jsobjtests.cpp
index 9f00d4cabce..034bb97c620 100644
--- a/dbtests/jsobjtests.cpp
+++ b/dbtests/jsobjtests.cpp
@@ -569,6 +569,13 @@ namespace JsobjTests {
}
{
+ BSONObjBuilder b;
+ b.appendBinData("f", 33, (BinDataType) 1, "123456789012345678901234567890123");
+ BSONObj o = b.obj();
+ keyTest( o, false );
+ }
+
+ {
for( int i = 1; i <= 3; i++ ) {
for( int j = 1; j <= 3; j++ ) {
BSONObjBuilder b;
diff --git a/dbtests/namespacetests.cpp b/dbtests/namespacetests.cpp
index 392917dd6d3..bbb8f5e596e 100644
--- a/dbtests/namespacetests.cpp
+++ b/dbtests/namespacetests.cpp
@@ -44,12 +44,13 @@ namespace NamespaceTests {
ASSERT( theDataFileMgr.findAll( ns() )->eof() );
}
protected:
- void create() {
+ void create( bool sparse = false ) {
NamespaceDetailsTransient::get_w( ns() ).deletedIndex();
BSONObjBuilder builder;
builder.append( "ns", ns() );
builder.append( "name", "testIndex" );
builder.append( "key", key() );
+ builder.append( "sparse", sparse );
BSONObj bobj = builder.done();
id_.info = theDataFileMgr.insert( ns(), bobj.objdata(), bobj.objsize() );
// head not needed for current tests
@@ -339,12 +340,13 @@ namespace NamespaceTests {
elts.push_back( simpleBC( i ) );
BSONObjBuilder b;
b.append( "a", elts );
-
+ BSONObj obj = b.obj();
+
BSONObjSet keys;
- id().getKeysFromObject( b.done(), keys );
+ id().getKeysFromObject( obj, keys );
checkSize( 4, keys );
BSONObjSet::iterator i = keys.begin();
- assertEquals( nullObj(), *i++ );
+ assertEquals( nullObj(), *i++ ); // see SERVER-3377
for ( int j = 1; j < 4; ++i, ++j ) {
BSONObjBuilder b;
b.append( "", j );
@@ -532,9 +534,49 @@ namespace NamespaceTests {
id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
checkSize(1, keys );
+ ASSERT_EQUALS( Undefined, keys.begin()->firstElement().type() );
keys.clear();
}
};
+
+ class DoubleArray : Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[1,2]}" ), keys );
+ checkSize(2, keys );
+ BSONObjSet::const_iterator i = keys.begin();
+ ASSERT_EQUALS( BSON( "" << 1 << "" << 1 ), *i );
+ ++i;
+ ASSERT_EQUALS( BSON( "" << 2 << "" << 2 ), *i );
+ keys.clear();
+ }
+
+ protected:
+ BSONObj key() const {
+ return BSON( "a" << 1 << "a" << 1 );
+ }
+ };
+
+ class DoubleEmptyArray : Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize(1, keys );
+ ASSERT_EQUALS( fromjson( "{'':undefined,'':undefined}" ), *keys.begin() );
+ keys.clear();
+ }
+
+ protected:
+ BSONObj key() const {
+ return BSON( "a" << 1 << "a" << 1 );
+ }
+ };
class MultiEmptyArray : Base {
public:
@@ -558,7 +600,9 @@ namespace NamespaceTests {
id().getKeysFromObject( fromjson( "{a:1,b:[]}" ), keys );
checkSize(1, keys );
//cout << "YO : " << *(keys.begin()) << endl;
- ASSERT_EQUALS( NumberInt , keys.begin()->firstElement().type() );
+ BSONObjIterator i( *keys.begin() );
+ ASSERT_EQUALS( NumberInt , i.next().type() );
+ ASSERT_EQUALS( Undefined , i.next().type() );
keys.clear();
}
@@ -567,8 +611,313 @@ namespace NamespaceTests {
return aAndB();
}
};
+
+ class NestedEmptyArray : Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 ); }
+ };
+
+ class MultiNestedEmptyArray : Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null,'':null}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 << "a.c" << 1 ); }
+ };
+
+ class UnevenNestedEmptyArray : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':undefined,'':null}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[{b:1}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':{b:1},'':1}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[{b:[]}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':{b:[]},'':undefined}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a" << 1 << "a.b" << 1 ); }
+ };
+
+ class ReverseUnevenNestedEmptyArray : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null,'':undefined}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 << "a" << 1 ); }
+ };
+
+ class SparseReverseUnevenNestedEmptyArray : public Base {
+ public:
+ void run() {
+ create( true );
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null,'':undefined}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 << "a" << 1 ); }
+ };
+
+ class SparseEmptyArray : public Base {
+ public:
+ void run() {
+ create( true );
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:1}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[{c:1}]}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 ); }
+ };
+ class SparseEmptyArraySecond : public Base {
+ public:
+ void run() {
+ create( true );
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:1}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[{c:1}]}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "z" << 1 << "a.b" << 1 ); }
+ };
+
+ class NonObjectMissingNestedField : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[1]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[1,{b:1}]}" ), keys );
+ checkSize( 2, keys );
+ BSONObjSet::const_iterator c = keys.begin();
+ ASSERT_EQUALS( fromjson( "{'':null}" ), *c );
+ ++c;
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *c );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 ); }
+ };
+
+ class SparseNonObjectMissingNestedField : public Base {
+ public:
+ void run() {
+ create( true );
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[1]}" ), keys );
+ checkSize( 0, keys );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[1,{b:1}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.b" << 1 ); }
+ };
+
+ class IndexedArrayIndex : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[1]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( BSON( "" << 1 ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[1]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':[1]}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':undefined}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:{'0':1}}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( BSON( "" << 1 ), *keys.begin() );
+ keys.clear();
+
+ ASSERT_EXCEPTION( id().getKeysFromObject( fromjson( "{a:[{'0':1}]}" ), keys ), UserException );
+
+ ASSERT_EXCEPTION( id().getKeysFromObject( fromjson( "{a:[1,{'0':2}]}" ), keys ), UserException );
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.0" << 1 ); }
+ };
+
+ class DoubleIndexedArrayIndex : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[[1]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':null}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[[]]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':undefined}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.0.0" << 1 ); }
+ };
+
+ class ObjectWithinArray : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[{b:1}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[{b:[1]}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[{b:[[1]]}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':[1]}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[{b:1}]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[{b:[1]}]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[{b:[[1]]}]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':[1]}" ), *keys.begin() );
+ keys.clear();
+
+ id().getKeysFromObject( fromjson( "{a:[[{b:[]}]]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':undefined}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.0.b" << 1 ); }
+ };
+
+ class ArrayWithinObjectWithinArray : public Base {
+ public:
+ void run() {
+ create();
+
+ BSONObjSet keys;
+ id().getKeysFromObject( fromjson( "{a:[{b:[1]}]}" ), keys );
+ checkSize( 1, keys );
+ ASSERT_EQUALS( fromjson( "{'':1}" ), *keys.begin() );
+ keys.clear();
+ }
+ protected:
+ BSONObj key() const { return BSON( "a.0.b.0" << 1 ); }
+ };
+
+ // also test numeric string field names
+
} // namespace IndexDetailsTests
namespace NamespaceDetailsTests {
@@ -862,7 +1211,22 @@ namespace NamespaceTests {
add< IndexDetailsTests::AlternateMissing >();
add< IndexDetailsTests::MultiComplex >();
add< IndexDetailsTests::EmptyArray >();
+ add< IndexDetailsTests::DoubleArray >();
+ add< IndexDetailsTests::DoubleEmptyArray >();
add< IndexDetailsTests::MultiEmptyArray >();
+ add< IndexDetailsTests::NestedEmptyArray >();
+ add< IndexDetailsTests::MultiNestedEmptyArray >();
+ add< IndexDetailsTests::UnevenNestedEmptyArray >();
+ add< IndexDetailsTests::ReverseUnevenNestedEmptyArray >();
+ add< IndexDetailsTests::SparseReverseUnevenNestedEmptyArray >();
+ add< IndexDetailsTests::SparseEmptyArray >();
+ add< IndexDetailsTests::SparseEmptyArraySecond >();
+ add< IndexDetailsTests::NonObjectMissingNestedField >();
+ add< IndexDetailsTests::SparseNonObjectMissingNestedField >();
+ add< IndexDetailsTests::IndexedArrayIndex >();
+ add< IndexDetailsTests::DoubleIndexedArrayIndex >();
+ add< IndexDetailsTests::ObjectWithinArray >();
+ add< IndexDetailsTests::ArrayWithinObjectWithinArray >();
add< IndexDetailsTests::MissingField >();
add< IndexDetailsTests::SubobjectMissing >();
add< IndexDetailsTests::CompoundMissing >();
diff --git a/dbtests/perftests.cpp b/dbtests/perftests.cpp
index 11fda45c819..6766797a950 100644
--- a/dbtests/perftests.cpp
+++ b/dbtests/perftests.cpp
@@ -36,6 +36,7 @@
#include "../util/checksum.h"
#include "../util/version.h"
#include "../db/key.h"
+#include "../util/compress.h"
using namespace bson;
@@ -118,7 +119,7 @@ namespace PerfTests {
// optional 2nd test phase to be timed separately
// return name of it
- virtual const char * timed2() { return 0; }
+ virtual string timed2() { return ""; }
virtual void post() { }
@@ -133,8 +134,68 @@ namespace PerfTests {
virtual bool showDurStats() { return true; }
static DBClientConnection *conn;
+ static unsigned once;
public:
+ /* if you want recording of the timings, place the password for the perf database
+ in ./../settings.py:
+ pstatspassword="<pwd>"
+ */
+ void connect() {
+ if( once )
+ return;
+ ++once;
+
+ // no writing to perf db if _DEBUG
+ DEV return;
+
+ const char *fn = "../../settings.py";
+ if( !exists(fn) ) {
+ if( exists("settings.py") )
+ fn = "settings.py";
+ else {
+ cout << "no ../../settings.py or ./settings.py file found. will not write perf stats to pstats db." << endl;
+ cout << "it is recommended this be enabled even on dev boxes" << endl;
+ return;
+ }
+ }
+
+ try {
+ if( conn == 0 ) {
+ MemoryMappedFile f;
+ const char *p = (const char *) f.mapWithOptions(fn, MongoFile::READONLY);
+ string pwd;
+
+ {
+ const char *q = str::after(p, "pstatspassword=\"");
+ if( *q == 0 ) {
+ cout << "info perftests.cpp: no pstatspassword= in settings.py" << endl;
+ return;
+ }
+ else {
+ pwd = str::before(q, '\"');
+ }
+ }
+
+ DBClientConnection *c = new DBClientConnection(false, 0, 10);
+ string err;
+ if( c->connect("perfdb.10gen.cc", err) ) {
+ if( !c->auth("perf", "perf", pwd, err) ) {
+ cout << "info: authentication with stats db failed: " << err << endl;
+ assert(false);
+ }
+ conn = c;
+ }
+ else {
+ cout << err << " (to log perfstats)" << endl;
+ }
+ }
+ }
+ catch(...) { }
+ }
+
+ virtual unsigned batchSize() { return 50; }
+
void say(unsigned long long n, int ms, string s) {
unsigned long long rps = n*1000/ms;
cout << "stats " << setw(33) << left << s << ' ' << right << setw(9) << rps << ' ' << right << setw(5) << ms << "ms ";
@@ -142,124 +203,70 @@ namespace PerfTests {
cout << dur::stats.curr->_asCSV();
cout << endl;
- /* if you want recording of the timings, place the password for the perf database
- in ./../settings.py:
- pstatspassword="<pwd>"
- */
- const char *fn = "../../settings.py";
- static bool ok = true;
- if( ok ) {
- DEV {
- // no writing to perf db if dev
- }
- else if( !exists(fn) ) {
- static int once;
- if( exists("settings.py") )
- fn = "settings.py";
- else if( once++ == 0 ) {
- cout << "no ../../settings.py or ./settings.py file found. will not write perf stats to pstats db." << endl;
- cout << "it is recommended this be enabled even on dev boxes" << endl;
- }
- }
- else {
- try {
- if( conn == 0 ) {
- MemoryMappedFile f;
- const char *p = (const char *) f.mapWithOptions(fn, MongoFile::READONLY);
- string pwd;
-
- {
- const char *q = str::after(p, "pstatspassword=\"");
- if( *q == 0 ) {
- cout << "info perftests.cpp: no pstatspassword= in settings.py" << endl;
- ok = false;
- }
- else {
- pwd = str::before(q, '\"');
- }
- }
+ connect();
- if( ok ) {
- conn = new DBClientConnection(false, 0, 10);
- string err;
- if( conn->connect("mongo05.10gen.cust.cbici.net", err) ) {
- if( !conn->auth("perf", "perf", pwd, err) ) {
- cout << "info: authentication with stats db failed: " << err << endl;
- assert(false);
- }
- }
- else {
- cout << err << " (to log perfstats)" << endl;
- ok = false;
+ if( conn && !conn->isFailed() ) {
+ const char *ns = "perf.pstats";
+ if( perfHist ) {
+ static bool needver = true;
+ try {
+ // try to report rps from last time */
+ Query q;
+ {
+ BSONObjBuilder b;
+ b.append("host",getHostName()).append("test",s).append("dur",cmdLine.dur);
+ DEV { b.append("info.DEBUG",true); }
+ else b.appendNull("info.DEBUG");
+ if( sizeof(int*) == 4 )
+ b.append("info.bits", 32);
+ else
+ b.appendNull("info.bits");
+ q = Query(b.obj()).sort("when",-1);
+ }
+ BSONObj fields = BSON( "rps" << 1 << "info" << 1 );
+ vector<BSONObj> v;
+ conn->findN(v, ns, q, perfHist, 0, &fields);
+ for( vector<BSONObj>::iterator i = v.begin(); i != v.end(); i++ ) {
+ BSONObj o = *i;
+ double lastrps = o["rps"].Number();
+ if( lastrps ) {
+ cout << "stats " << setw(33) << right << "new/old:" << ' ' << setw(9);
+ cout << fixed << setprecision(2) << rps / lastrps;
+ if( needver ) {
+ cout << " " << o.getFieldDotted("info.git").toString();
}
+ cout << '\n';
}
}
- if( conn && !conn->isFailed() ) {
- const char *ns = "perf.pstats";
- if( perfHist ) {
- static bool needver = true;
- try {
- // try to report rps from last time */
- Query q;
- {
- BSONObjBuilder b;
- b.append("host",getHostName()).append("test",s).append("dur",cmdLine.dur);
- DEV b.append("info.DEBUG",true);
- else b.appendNull("info.DEBUG");
- if( sizeof(int*) == 4 ) b.append("info.bits", 32);
- else b.appendNull("info.bits");
- q = Query(b.obj()).sort("when",-1);
- }
- //cout << q.toString() << endl;
- BSONObj fields = BSON( "rps" << 1 << "info" << 1 );
- vector<BSONObj> v;
- conn->findN(v, ns, q, perfHist, 0, &fields);
- for( vector<BSONObj>::iterator i = v.begin(); i != v.end(); i++ ) {
- BSONObj o = *i;
- double lastrps = o["rps"].Number();
- if( lastrps ) {
- cout << "stats " << setw(33) << right << "new/old:" << ' ' << setw(9);
- cout << fixed << setprecision(2) << rps / lastrps;
- if( needver ) {
- cout << " " << o.getFieldDotted("info.git").toString();
- }
- cout << '\n';
- }
- }
- } catch(...) { }
- cout.flush();
- needver = false;
- }
- {
- bob b;
- b.append("host", getHostName());
- b.appendTimeT("when", time(0));
- b.append("test", s);
- b.append("rps", (int) rps);
- b.append("millis", ms);
- b.appendBool("dur", cmdLine.dur);
- if( showDurStats() && cmdLine.dur )
- b.append("durStats", dur::stats.curr->_asObj());
- {
- bob inf;
- inf.append("version", versionString);
- if( sizeof(int*) == 4 ) inf.append("bits", 32);
- DEV inf.append("DEBUG", true);
+ } catch(...) { }
+ cout.flush();
+ needver = false;
+ }
+ {
+ bob b;
+ b.append("host", getHostName());
+ b.appendTimeT("when", time(0));
+ b.append("test", s);
+ b.append("rps", (int) rps);
+ b.append("millis", ms);
+ b.appendBool("dur", cmdLine.dur);
+ if( showDurStats() && cmdLine.dur )
+ b.append("durStats", dur::stats.curr->_asObj());
+ {
+ bob inf;
+ inf.append("version", versionString);
+ if( sizeof(int*) == 4 ) inf.append("bits", 32);
+ DEV inf.append("DEBUG", true);
#if defined(_WIN32)
- inf.append("os", "win");
+ inf.append("os", "win");
#endif
- inf.append("git", gitVersion());
- inf.append("boost", BOOST_VERSION);
- b.append("info", inf.obj());
- }
- BSONObj o = b.obj();
- //cout << "inserting " << o.toString() << endl;
- conn->insert(ns, o);
- }
- }
- }
- catch(...) {
+ inf.append("git", gitVersion());
+ inf.append("boost", BOOST_VERSION);
+ b.append("info", inf.obj());
}
+ BSONObj o = b.obj();
+ //cout << "inserting " << o.toString() << endl;
+ conn->insert(ns, o);
}
}
}
@@ -277,9 +284,9 @@ namespace PerfTests {
dur::stats._intervalMicros = 0; // no auto rotate
dur::stats.curr->reset();
- Timer t;
+ mongo::Timer t;
unsigned long long n = 0;
- const unsigned Batch = 50;
+ const unsigned Batch = batchSize();
if( hlm == 0 ) {
// means just do once
@@ -314,10 +321,10 @@ namespace PerfTests {
post();
{
- const char *test2name = timed2();
- if( test2name ) {
+ string test2name = timed2();
+ if( test2name.size() != 0 ) {
dur::stats.curr->reset();
- Timer t;
+ mongo::Timer t;
unsigned long long n = 0;
while( 1 ) {
unsigned i;
@@ -335,6 +342,7 @@ namespace PerfTests {
};
DBClientConnection *B::conn;
+ unsigned B::once;
unsigned dontOptimizeOutHopefully;
@@ -598,6 +606,48 @@ namespace PerfTests {
virtual bool showDurStats() { return false; }
};
+ class Compress : public B {
+ public:
+ const unsigned sz;
+ void *p;
+ Compress() : sz(1024*1024*100+3) { }
+ virtual unsigned batchSize() { return 1; }
+ string name() { return "compress"; }
+ virtual bool showDurStats() { return false; }
+ virtual int howLongMillis() { return 4000; }
+ unsigned long long expectation() { return 1000000; }
+ void prep() {
+ p = malloc(sz);
+ // this isn't a fair test as it is mostly rands but we just want a rough perf check
+ static int last;
+ for (unsigned i = 0; i<sz; i++) {
+ int r = rand();
+ if( (r & 0x300) == 0x300 )
+ r = last;
+ ((char*)p)[i] = r;
+ last = r;
+ }
+ }
+ size_t last;
+ string res;
+ void timed() {
+ mongo::Timer t;
+ string out;
+ size_t len = compress((const char *) p, sz, &out);
+ bool ok = uncompress(out.c_str(), out.size(), &res);
+ ASSERT(ok);
+ static unsigned once;
+ if( once++ == 0 )
+ cout << "compress round trip " << sz/(1024.0*1024) / (t.millis()/1000.0) << "MB/sec\n";
+ //cout << len / (1024.0/1024) << " compressed" << endl;
+ (void)len; //fix unused error while above line is commented out
+ }
+ void post() {
+ ASSERT( memcmp(res.c_str(), p, sz) == 0 );
+ free(p);
+ }
+ };
+
// test speed of checksum method
class ChecksumTest : public B {
public:
@@ -607,6 +657,7 @@ namespace PerfTests {
virtual int howLongMillis() { return 2000; }
int expectationTimeMillis() { return 5000; }
virtual bool showDurStats() { return false; }
+ virtual unsigned batchSize() { return 1; }
void *p;
@@ -684,7 +735,7 @@ namespace PerfTests {
void timed() {
client().insert( ns(), x );
}
- const char * timed2() {
+ string timed2() {
client().findOne(ns(), query);
return "findOne_by_id";
}
@@ -753,7 +804,7 @@ namespace PerfTests {
client().update(ns(), q, y, /*upsert*/true);
}
- const char * timed2() {
+ virtual string timed2() {
static BSONObj I = BSON( "$inc" << BSON( "y" << 1 ) );
// test some $inc's
@@ -762,8 +813,7 @@ namespace PerfTests {
BSONObj q = BSON("x" << x);
client().update(ns(), q, I);
- static string s = name()+"-inc";
- return s.c_str();
+ return name()+"-inc";
}
unsigned long long expectation() { return 1000; }
@@ -778,6 +828,16 @@ namespace PerfTests {
this->client().ensureIndex(this->ns(), BSON("y"<<1));
this->client().ensureIndex(this->ns(), BSON("z"<<1));
}
+
+ /*
+ virtual string timed2() {
+ string x = T::timed2();
+ if ( x.size() == 0 )
+ return x;
+
+ return x + "-with-more-indexes";
+ }
+ */
};
void t() {
@@ -822,6 +882,8 @@ namespace PerfTests {
}
else {
add< Dummy >();
+ add< ChecksumTest >();
+ add< Compress >();
add< TLS >();
add< Malloc >();
add< Timer >();
@@ -838,7 +900,6 @@ namespace PerfTests {
add< BSONIter >();
add< BSONGetFields1 >();
add< BSONGetFields2 >();
- add< ChecksumTest >();
add< TaskQueueTest >();
add< InsertDup >();
add< Insert1 >();
diff --git a/dbtests/queryoptimizertests.cpp b/dbtests/queryoptimizertests.cpp
index bd597572d52..83a2d267c57 100644
--- a/dbtests/queryoptimizertests.cpp
+++ b/dbtests/queryoptimizertests.cpp
@@ -104,7 +104,7 @@ namespace QueryOptimizerTests {
auto_ptr< FieldRangeSetPair > FieldRangeSetPair_GLOBAL;
#define FRSP(x) ( FieldRangeSetPair_GLOBAL.reset( new FieldRangeSetPair( ns(), x ) ), *FieldRangeSetPair_GLOBAL )
auto_ptr< FieldRangeSetPair > FieldRangeSetPair_GLOBAL2;
-#define FRSP2(x) ( FieldRangeSetPair_GLOBAL2.reset( new FieldRangeSetPair( ns(), x ) ), *FieldRangeSetPair_GLOBAL2 )
+#define FRSP2(x) ( FieldRangeSetPair_GLOBAL2.reset( new FieldRangeSetPair( ns(), x ) ), FieldRangeSetPair_GLOBAL2.get() )
class NoIndex : public Base {
public:
@@ -886,7 +886,7 @@ namespace QueryOptimizerTests {
}
BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" );
auto_ptr< FieldRangeSetPair > frsp( new FieldRangeSetPair( ns(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ) ) );
- QueryPlan qp( nsd(), 1, *frsp, *frsp, fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
+ QueryPlan qp( nsd(), 1, *frsp, frsp.get(), fromjson( "{a:5,b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
boost::shared_ptr<Cursor> c = qp.newCursor();
double expected[] = { 2, 3, 6, 9 };
ASSERT( c->ok() );
@@ -908,7 +908,7 @@ namespace QueryOptimizerTests {
}
BSONObj hint = fromjson( "{$hint:{a:1,b:1}}" );
auto_ptr< FieldRangeSetPair > frsp( new FieldRangeSetPair( ns(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ) ) );
- QueryPlan qp( nsd(), 1, *frsp, *frsp, fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
+ QueryPlan qp( nsd(), 1, *frsp, frsp.get(), fromjson( "{a:{$gte:5},b:{$in:[2,3,6,9,11]}}" ), BSONObj() );
boost::shared_ptr<Cursor> c = qp.newCursor();
int matches[] = { 2, 3, 6, 9 };
for( int i = 0; i < 4; ++i, c->advance() ) {
@@ -1900,18 +1900,19 @@ namespace QueryOptimizerTests {
public:
void run() {
_cli.createCollection( ns(), 1000, true );
- _cli.insert( ns(), BSON( "_id" << 1 ) );
+ _cli.insert( ns(), BSON( "x" << 1 ) );
{
dblock lk;
Client::Context ctx( ns() );
- setQueryOptimizerCursor( BSON( "_id" << GT << 0 ) );
- ASSERT_EQUALS( 1, current().getIntField( "_id" ) );
+ setQueryOptimizerCursor( BSON( "x" << GT << 0 ) );
+ ASSERT_EQUALS( 1, current().getIntField( "x" ) );
ASSERT( prepareToYield() );
}
-
- while( _cli.count( ns(), BSON( "_id" << 1 ) ) > 0 ) {
- _cli.insert( ns(), BSONObj() );
+
+ int x = 2;
+ while( _cli.count( ns(), BSON( "x" << 1 ) ) > 0 ) {
+ _cli.insert( ns(), BSON( "x" << x++ ) );
}
{
@@ -2088,26 +2089,26 @@ namespace QueryOptimizerTests {
public:
void run() {
_cli.createCollection( ns(), 1000, true );
- _cli.insert( ns(), BSON( "_id" << 1 << "a" << 1 ) );
- _cli.ensureIndex( ns(), BSON( "_id" << 1 ) );
+ _cli.insert( ns(), BSON( "a" << 1 << "b" << 1 ) );
+ _cli.ensureIndex( ns(), BSON( "a" << 1 ) );
shared_ptr<Cursor> c;
{
dblock lk;
Client::Context ctx( ns() );
- c = newQueryOptimizerCursor( ns(), BSON( "_id" << GT << 0 << "a" << GT << 0 ) );
- ASSERT_EQUALS( 1, c->current().getIntField( "_id" ) );
+ c = newQueryOptimizerCursor( ns(), BSON( "a" << GT << 0 << "b" << GT << 0 ) );
+ ASSERT_EQUALS( 1, c->current().getIntField( "a" ) );
ASSERT( !c->getsetdup( c->currLoc() ) );
c->advance();
- ASSERT_EQUALS( 1, c->current().getIntField( "_id" ) );
+ ASSERT_EQUALS( 1, c->current().getIntField( "a" ) );
ASSERT( c->getsetdup( c->currLoc() ) );
ASSERT( c->prepareToYield() );
}
int i = 1;
- while( _cli.count( ns(), BSON( "_id" << 1 ) ) > 0 ) {
+ while( _cli.count( ns(), BSON( "a" << 1 ) ) > 0 ) {
++i;
- _cli.insert( ns(), BSON( "_id" << i << "a" << i ) );
+ _cli.insert( ns(), BSON( "a" << i << "b" << i ) );
}
{
@@ -2116,7 +2117,7 @@ namespace QueryOptimizerTests {
c->recoverFromYield();
ASSERT( c->ok() );
// {$natural:1} plan does not recover, {_id:1} plan does.
- ASSERT( 1 < c->current().getIntField( "_id" ) );
+ ASSERT( 1 < c->current().getIntField( "a" ) );
}
}
};
diff --git a/dbtests/querytests.cpp b/dbtests/querytests.cpp
index a50eadfcd31..694053b10a8 100644
--- a/dbtests/querytests.cpp
+++ b/dbtests/querytests.cpp
@@ -361,6 +361,7 @@ namespace QueryTests {
void insertA(const char* ns, int a) {
BSONObjBuilder b;
b.appendOID("_id", 0, true);
+ b.appendOID("value", 0, true);
b.append("a", a);
insert(ns, b.obj());
}
@@ -374,7 +375,7 @@ namespace QueryTests {
auto_ptr< DBClientCursor > c1 = client().query( ns, QUERY( "a" << GT << -1 ), 0, 0, 0, QueryOption_CursorTailable );
OID id;
id.init("000000000000000000000000");
- auto_ptr< DBClientCursor > c2 = client().query( ns, QUERY( "_id" << GT << id ), 0, 0, 0, QueryOption_CursorTailable );
+ auto_ptr< DBClientCursor > c2 = client().query( ns, QUERY( "value" << GT << id ), 0, 0, 0, QueryOption_CursorTailable );
c1->next();
c1->next();
ASSERT( !c1->more() );
@@ -399,7 +400,6 @@ namespace QueryTests {
}
void run() {
const char *ns = "unittests.querytests.OplogReplayMode";
- insert( ns, BSON( "ts" << 3 ) );
insert( ns, BSON( "ts" << 0 ) );
insert( ns, BSON( "ts" << 1 ) );
insert( ns, BSON( "ts" << 2 ) );
@@ -407,6 +407,12 @@ namespace QueryTests {
ASSERT( c->more() );
ASSERT_EQUALS( 2, c->next().getIntField( "ts" ) );
ASSERT( !c->more() );
+
+ insert( ns, BSON( "ts" << 3 ) );
+ c = client().query( ns, QUERY( "ts" << GT << 1 ).hint( BSON( "$natural" << 1 ) ), 0, 0, 0, QueryOption_OplogReplay );
+ ASSERT( c->more() );
+ ASSERT_EQUALS( 2, c->next().getIntField( "ts" ) );
+ ASSERT( c->more() );
}
};
@@ -1146,7 +1152,35 @@ namespace QueryTests {
private:
int _old;
};
+
+ /**
+ * Check OplogReplay mode where query timestamp is earlier than the earliest
+ * entry in the collection.
+ */
+ class FindingStartStale : public CollectionBase {
+ public:
+ FindingStartStale() : CollectionBase( "findingstart" ) {}
+ void run() {
+ unsigned startNumCursors = ClientCursor::numCursors();
+
+ BSONObj info;
+ ASSERT( client().runCommand( "unittests", BSON( "create" << "querytests.findingstart" << "capped" << true << "$nExtents" << 5 << "autoIndexId" << false ), info ) );
+
+ // Check OplogReplay mode with empty collection.
+ auto_ptr< DBClientCursor > c = client().query( ns(), QUERY( "ts" << GTE << 50 ), 0, 0, 0, QueryOption_OplogReplay );
+ ASSERT( !c->more() );
+
+ // Check with some docs in the collection.
+ for( int i = 100; i < 150; client().insert( ns(), BSON( "ts" << i++ ) ) );
+ c = client().query( ns(), QUERY( "ts" << GTE << 50 ), 0, 0, 0, QueryOption_OplogReplay );
+ ASSERT( c->more() );
+ ASSERT_EQUALS( 100, c->next()[ "ts" ].numberInt() );
+
+ // Check that no persistent cursors outlast our queries above.
+ ASSERT_EQUALS( startNumCursors, ClientCursor::numCursors() );
+ }
+ };
class WhatsMyUri : public CollectionBase {
public:
@@ -1362,6 +1396,7 @@ namespace QueryTests {
add< HelperTest >();
add< HelperByIdTest >();
add< FindingStartPartiallyFull >();
+ add< FindingStartStale >();
add< WhatsMyUri >();
add< parsedtests::basic1 >();
diff --git a/dbtests/repltests.cpp b/dbtests/repltests.cpp
index ecaacf74874..2bf522555ab 100644
--- a/dbtests/repltests.cpp
+++ b/dbtests/repltests.cpp
@@ -25,6 +25,8 @@
#include "../db/json.h"
#include "dbtests.h"
+#include "../db/oplog.h"
+#include "../db/queryoptimizer.h"
namespace mongo {
void createOplog();
@@ -1049,6 +1051,31 @@ namespace ReplTests {
}
};
+ /**
+ * Check against oldest document in the oplog before scanning backward
+ * from the newest document.
+ */
+ class FindingStartCursorStale : public Base {
+ public:
+ void run() {
+ for( int i = 0; i < 10; ++i ) {
+ client()->insert( ns(), BSON( "_id" << i ) );
+ }
+ dblock lk;
+ Client::Context ctx( cllNS() );
+ NamespaceDetails *nsd = nsdetails( cllNS() );
+ BSONObjBuilder b;
+ b.appendTimestamp( "$gte" );
+ BSONObj query = BSON( "ts" << b.obj() );
+ FieldRangeSetPair frsp( cllNS(), query );
+ BSONObj order = BSON( "$natural" << 1 );
+ QueryPlan qp( nsd, -1, frsp, &frsp, query, order );
+ FindingStartCursor fsc( qp );
+ ASSERT( fsc.done() );
+ ASSERT_EQUALS( 0, fsc.cursor()->current()[ "o" ].Obj()[ "_id" ].Int() );
+ }
+ };
+
class All : public Suite {
public:
All() : Suite( "repl" ) {
@@ -1103,6 +1130,7 @@ namespace ReplTests {
add< DeleteOpIsIdBased >();
add< DatabaseIgnorerBasic >();
add< DatabaseIgnorerUpdate >();
+ add< FindingStartCursorStale >();
}
} myall;
diff --git a/dbtests/test.sln b/dbtests/test.sln
new file mode 100755
index 00000000000..3a1b741c716
--- /dev/null
+++ b/dbtests/test.sln
@@ -0,0 +1,26 @@
+
+Microsoft Visual Studio Solution File, Format Version 11.00
+# Visual Studio 2010
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "test", "test.vcxproj", "{215B2D68-0A70-4D10-8E75-B33010C62A91}"
+EndProject
+Global
+ GlobalSection(SolutionConfigurationPlatforms) = preSolution
+ Debug|Win32 = Debug|Win32
+ Debug|x64 = Debug|x64
+ Release|Win32 = Release|Win32
+ Release|x64 = Release|x64
+ EndGlobalSection
+ GlobalSection(ProjectConfigurationPlatforms) = postSolution
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.ActiveCfg = Debug|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|Win32.Build.0 = Debug|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|x64.ActiveCfg = Debug|x64
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Debug|x64.Build.0 = Debug|x64
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.ActiveCfg = Release|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|Win32.Build.0 = Release|Win32
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|x64.ActiveCfg = Release|x64
+ {215B2D68-0A70-4D10-8E75-B33010C62A91}.Release|x64.Build.0 = Release|x64
+ EndGlobalSection
+ GlobalSection(SolutionProperties) = preSolution
+ HideSolutionNode = FALSE
+ EndGlobalSection
+EndGlobal
diff --git a/dbtests/test.vcxproj b/dbtests/test.vcxproj
index 1460e9d45d1..fde77d2d20b 100644
--- a/dbtests/test.vcxproj
+++ b/dbtests/test.vcxproj
@@ -259,8 +259,16 @@
<ClInclude Include="..\db\resource.h" />
<ClInclude Include="..\db\scanandorder.h" />
<ClInclude Include="..\db\security.h" />
+ <ClInclude Include="..\third_party\snappy\config.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-c.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-internal.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-sinksource.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-internal.h" />
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-public.h" />
+ <ClInclude Include="..\third_party\snappy\snappy.h" />
<ClInclude Include="..\util\builder.h" />
<ClInclude Include="..\util\checksum.h" />
+ <ClInclude Include="..\util\compress.h" />
<ClInclude Include="..\util\concurrency\list.h" />
<ClInclude Include="..\util\concurrency\task.h" />
<ClInclude Include="..\util\concurrency\value.h" />
@@ -325,6 +333,7 @@
<ClCompile Include="..\db\repl\rs_rollback.cpp" />
<ClCompile Include="..\db\repl\rs_sync.cpp" />
<ClCompile Include="..\db\restapi.cpp" />
+ <ClCompile Include="..\db\scanandorder.cpp" />
<ClCompile Include="..\db\security_common.cpp" />
<ClCompile Include="..\pcre-7.4\pcrecpp.cc">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
@@ -609,9 +618,27 @@
<ClCompile Include="..\s\shard.cpp" />
<ClCompile Include="..\s\shardconnection.cpp" />
<ClCompile Include="..\s\shardkey.cpp" />
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\alignedbuilder.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
</ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">NotUsing</PrecompiledHeader>
+ <PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Release|x64'">NotUsing</PrecompiledHeader>
+ </ClCompile>
<ClCompile Include="..\util\concurrency\spin_lock.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">NotUsing</PrecompiledHeader>
</ClCompile>
diff --git a/dbtests/test.vcxproj.filters b/dbtests/test.vcxproj.filters
index 1c832cd17ba..35e85fba416 100755
--- a/dbtests/test.vcxproj.filters
+++ b/dbtests/test.vcxproj.filters
@@ -56,6 +56,9 @@
<Filter Include="bson">
<UniqueIdentifier>{e6652333-c77f-420c-af8e-72d55bc095fe}</UniqueIdentifier>
</Filter>
+ <Filter Include="misc and third party\snappy">
+ <UniqueIdentifier>{fbc4416f-ca67-4e63-a1ea-49027de7e080}</UniqueIdentifier>
+ </Filter>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\..\boostw\boost_1_34_1\boost\config\auto_link.hpp">
@@ -304,6 +307,30 @@
<ClInclude Include="..\server.h">
<Filter>db\h</Filter>
</ClInclude>
+ <ClInclude Include="..\third_party\snappy\config.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-c.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-internal.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-sinksource.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-internal.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\third_party\snappy\snappy-stubs-public.h">
+ <Filter>misc and third party\snappy</Filter>
+ </ClInclude>
+ <ClInclude Include="..\util\compress.h">
+ <Filter>misc and third party</Filter>
+ </ClInclude>
</ItemGroup>
<ItemGroup>
<Library Include="..\..\js\js64r.lib">
@@ -857,6 +884,18 @@
<ClCompile Include="..\util\concurrency\spin_lock.cpp">
<Filter>util\concurrency</Filter>
</ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy.cc">
+ <Filter>misc and third party\snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\util\compress.cpp">
+ <Filter>misc and third party</Filter>
+ </ClCompile>
+ <ClCompile Include="..\third_party\snappy\snappy-sinksource.cc">
+ <Filter>misc and third party\snappy</Filter>
+ </ClCompile>
+ <ClCompile Include="..\db\scanandorder.cpp">
+ <Filter>db\cpp</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\SConstruct">
diff --git a/debian/changelog b/debian/changelog
index abc4a2bce28..d1e37c93b1d 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,15 @@
+mongodb (1.9.2) unstable; urgency=low
+
+ * see http://jira.mongodb.org/browse/SERVER/fixforversion/10261
+
+ -- Richard Kreuter <richard@10gen.com> Thu, 11 Aug 2011 16:56:28 -0500
+
+mongodb (1.9.1) unstable; urgency=low
+
+ * see http://jira.mongodb.org/browse/SERVER/fixforversion/10261
+
+ -- Richard Kreuter <richard@10gen.com> Tue, 26 Jul 2011 16:56:28 -0500
+
mongodb (1.9.0) unstable; urgency=low
* see http://jira.mongodb.org/browse/SERVER/fixforversion/10232
diff --git a/distsrc/client/SConstruct b/distsrc/client/SConstruct
index c2d309a4e5a..54fc9437d3c 100755
--- a/distsrc/client/SConstruct
+++ b/distsrc/client/SConstruct
@@ -41,7 +41,7 @@ linux = False
if "darwin" == os.sys.platform:
addExtraLibs( "/opt/local/" )
nix = True
-elif "linux2" == os.sys.platform:
+elif "linux2" == os.sys.platform or "linux3" == os.sys.platform:
nix = True
linux = True
diff --git a/doxygenConfig b/doxygenConfig
index 577ce0119c5..3d873903fe8 100644
--- a/doxygenConfig
+++ b/doxygenConfig
@@ -3,7 +3,7 @@
#---------------------------------------------------------------------------
DOXYFILE_ENCODING = UTF-8
PROJECT_NAME = MongoDB
-PROJECT_NUMBER = 1.9.1-pre-
+PROJECT_NUMBER = 2.0.0-rc0-pre-
OUTPUT_DIRECTORY = docs/doxygen
CREATE_SUBDIRS = NO
OUTPUT_LANGUAGE = English
diff --git a/jstests/ageoutjournalfiles.js b/jstests/ageoutjournalfiles.js
new file mode 100644
index 00000000000..f7fe2275480
--- /dev/null
+++ b/jstests/ageoutjournalfiles.js
@@ -0,0 +1,16 @@
+if (db.serverStatus().dur) {
+
+ assert(db.serverStatus().dur.ageOutJournalFiles != false);
+
+ db.adminCommand({ setParameter: 1, ageOutJournalFiles: false });
+
+ assert(db.serverStatus().dur.ageOutJournalFiles == false);
+
+ db.adminCommand({ setParameter: 1, ageOutJournalFiles: true });
+
+ assert(db.serverStatus().dur.ageOutJournalFiles != false);
+
+}
+else {
+// print("dur is off");
+} \ No newline at end of file
diff --git a/jstests/array_match3.js b/jstests/array_match3.js
index 06ee926a6a6..c8653430770 100644
--- a/jstests/array_match3.js
+++ b/jstests/array_match3.js
@@ -10,6 +10,4 @@ assert.eq( 2, t.count( {'a.0':5} ) );
// Test with index.
t.ensureIndex( {'a.0':1} );
-if ( 0 ) { // SERVER-2902
assert.eq( 2, t.count( {'a.0':5} ) );
-}
diff --git a/jstests/arrayfind4.js b/jstests/arrayfind4.js
new file mode 100644
index 00000000000..b141425f2e9
--- /dev/null
+++ b/jstests/arrayfind4.js
@@ -0,0 +1,22 @@
+// Test query empty array SERVER-2258
+
+t = db.jstests_arrayfind4;
+t.drop();
+
+t.save( {a:[]} );
+t.ensureIndex( {a:1} );
+
+assert.eq( 1, t.find( {a:[]} ).hint( {$natural:1} ).itcount() );
+assert.eq( 1, t.find( {a:[]} ).hint( {a:1} ).itcount() );
+
+assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {$natural:1} ).itcount() );
+assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {a:1} ).itcount() );
+
+t.remove();
+t.save( {a:[[]]} );
+
+assert.eq( 1, t.find( {a:[]} ).hint( {$natural:1} ).itcount() );
+assert.eq( 1, t.find( {a:[]} ).hint( {a:1} ).itcount() );
+
+assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {$natural:1} ).itcount() );
+assert.eq( 1, t.find( {a:{$in:[[]]}} ).hint( {a:1} ).itcount() );
diff --git a/jstests/arrayfind5.js b/jstests/arrayfind5.js
new file mode 100644
index 00000000000..083dc0622c8
--- /dev/null
+++ b/jstests/arrayfind5.js
@@ -0,0 +1,23 @@
+// Test indexed elemmatch of missing field.
+
+t = db.jstests_arrayfind5;
+t.drop();
+
+function check( nullElemMatch ) {
+ assert.eq( 1, t.find( {'a.b':1} ).itcount() );
+ assert.eq( 1, t.find( {a:{$elemMatch:{b:1}}} ).itcount() );
+ assert.eq( 0, t.find( {'a.b':null} ).itcount() );
+ assert.eq( nullElemMatch ? 1 : 0, t.find( {a:{$elemMatch:{b:null}}} ).itcount() ); // see SERVER-3377
+}
+
+t.save( {a:[{},{b:1}]} );
+check( true );
+t.ensureIndex( {'a.b':1} );
+check( true );
+
+t.drop();
+
+t.save( {a:[5,{b:1}]} );
+check( false );
+t.ensureIndex( {'a.b':1} );
+check( false );
diff --git a/jstests/capped2.js b/jstests/capped2.js
index 1f8bf1d01c6..65bb82f4c07 100644
--- a/jstests/capped2.js
+++ b/jstests/capped2.js
@@ -47,7 +47,7 @@ function checkDecreasing( i ) {
for( i = 0 ;; ++i ) {
debug( "capped 2: " + i );
- tzz.save( val[ i ] );
+ tzz.insert( val[ i ] );
if ( tzz.count() == 0 ) {
assert( i > 100, "K" );
break;
@@ -57,6 +57,6 @@ for( i = 0 ;; ++i ) {
for( i = 600 ; i >= 0 ; --i ) {
debug( "capped 2: " + i );
- tzz.save( val[ i ] );
+ tzz.insert( val[ i ] );
checkDecreasing( i );
}
diff --git a/jstests/capped5.js b/jstests/capped5.js
index f56d2278a7e..be6c27d7256 100644
--- a/jstests/capped5.js
+++ b/jstests/capped5.js
@@ -9,7 +9,6 @@ db.createCollection( tn , {capped: true, size: 1024 * 1024 * 1 } );
t.insert( { _id : 5 , x : 11 , z : 52 } );
assert.eq( 0 , t.getIndexKeys().length , "A0" )
assert.eq( 52 , t.findOne( { x : 11 } ).z , "A1" );
-assert.eq( 52 , t.findOne( { _id : 5, x : 11 } ).z , "A2" );
t.ensureIndex( { _id : 1 } )
t.ensureIndex( { x : 1 } )
diff --git a/jstests/capped6.js b/jstests/capped6.js
index 65798075208..098f667732f 100644
--- a/jstests/capped6.js
+++ b/jstests/capped6.js
@@ -52,7 +52,7 @@ var max = 0;
*/
function doTest() {
for( var i = max; i < oldMax; ++i ) {
- tzz.save( val[ i ] );
+ tzz.insert( val[ i ] );
}
max = oldMax;
count = tzz.count();
diff --git a/jstests/cappeda.js b/jstests/cappeda.js
new file mode 100644
index 00000000000..4a4b14a64e5
--- /dev/null
+++ b/jstests/cappeda.js
@@ -0,0 +1,33 @@
+
+t = db.scan_capped_id;
+t.drop()
+
+x = t.runCommand( "create" , { capped : true , size : 10000 } )
+assert( x.ok )
+
+for ( i=0; i<100; i++ )
+ t.insert( { _id : i , x : 1 } )
+
+function q() {
+ return t.findOne( { _id : 5 } )
+}
+
+function u() {
+ t.update( { _id : 5 } , { $set : { x : 2 } } );
+ var gle = db.getLastError();
+ if ( gle )
+ throw gle;
+}
+
+
+// SERVER-3064
+//assert.throws( q , [] , "A1" );
+//assert.throws( u , [] , "B1" );
+
+t.ensureIndex( { _id : 1 } )
+
+assert.eq( 1 , q().x )
+q()
+u()
+
+assert.eq( 2 , q().x )
diff --git a/jstests/date3.js b/jstests/date3.js
new file mode 100644
index 00000000000..81b385a8616
--- /dev/null
+++ b/jstests/date3.js
@@ -0,0 +1,29 @@
+// Check dates before Unix epoch - SERVER-405
+
+t = db.date3;
+t.drop()
+
+d1 = new Date(-1000)
+dz = new Date(0)
+d2 = new Date(1000)
+
+t.save( {x: 2, d: d2} )
+t.save( {x: 1, d: d1} )
+
+function test () {
+ var list = t.find( {d: {$lt: dz}} )
+ assert.eq ( 1, list.size() )
+ assert.eq ( 1, list[0].x )
+ assert.eq ( d1, list[0].d )
+ var list = t.find( {d: {$gt: dz}} )
+ assert.eq ( 1, list.size() )
+ assert.eq ( 2, list[0].x )
+ var list = t.find().sort( {d:1} )
+ assert.eq ( 2, list.size() )
+ assert.eq ( 1, list[0].x )
+ assert.eq ( 2, list[1].x )
+}
+
+test()
+t.ensureIndex( {d: 1} )
+test()
diff --git a/jstests/dbhash.js b/jstests/dbhash.js
index e9cbc944b5f..7fea4b4d50c 100644
--- a/jstests/dbhash.js
+++ b/jstests/dbhash.js
@@ -14,16 +14,22 @@ db.getCollectionNames().forEach( function( x ) {
}
} );
+function dbhash( mydb ) {
+ var ret = mydb.runCommand( "dbhash" );
+ assert.commandWorked( ret, "dbhash failure" );
+ return ret;
+}
+
function gh( coll , mydb ){
if ( ! mydb ) mydb = db;
- var x = mydb.runCommand( "dbhash" ).collections[coll.getName()];
+ var x = dbhash( mydb ).collections[coll.getName()];
if ( ! x )
return "";
return x;
}
function dbh( mydb ){
- return mydb.runCommand( "dbhash" ).md5;
+ return dbhash( mydb ).md5;
}
assert.eq( gh( a ) , gh( b ) , "A1" );
diff --git a/jstests/disk/quota.js b/jstests/disk/quota.js
new file mode 100644
index 00000000000..d93e5eaafc0
--- /dev/null
+++ b/jstests/disk/quota.js
@@ -0,0 +1,47 @@
+// Check functioning of --quotaFiles parameter, including with respect to SERVER-3293 ('local' database).
+
+port = allocatePorts( 1 )[ 0 ];
+
+baseName = "jstests_disk_quota";
+dbpath = "/data/db/" + baseName;
+
+m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--quotaFiles", "1", "--smallfiles" );
+db = m.getDB( baseName );
+
+big = new Array( 10000 ).toString();
+
+// Insert documents until quota is exhausted.
+while( !db.getLastError() ) {
+ db[ baseName ].save( {b:big} );
+}
+printjson( db.getLastError() );
+
+dotTwoDataFile = dbpath + "/" + baseName + ".2";
+files = listFiles( dbpath );
+for( i in files ) {
+ // Since only one data file is allowed, a .0 file is expected and a .1 file may be preallocated (SERVER-3410) but no .2 file is expected.
+ assert.neq( dotTwoDataFile, files[ i ].name );
+}
+
+dotTwoDataFile = dbpath + "/" + "local" + ".2";
+// Check that quota does not apply to local db, and a .2 file can be created.
+l = m.getDB( "local" )[ baseName ];
+for( i = 0; i < 10000; ++i ) {
+ l.save( {b:big} );
+ assert( !db.getLastError() );
+ dotTwoFound = false;
+ if ( i % 100 != 0 ) {
+ continue;
+ }
+ files = listFiles( dbpath );
+ for( f in files ) {
+ if ( files[ f ].name == dotTwoDataFile ) {
+ dotTwoFound = true;
+ }
+ }
+ if ( dotTwoFound ) {
+ break;
+ }
+}
+
+assert( dotTwoFound );
diff --git a/jstests/disk/quota2.js b/jstests/disk/quota2.js
new file mode 100644
index 00000000000..c0d30dfecbf
--- /dev/null
+++ b/jstests/disk/quota2.js
@@ -0,0 +1,38 @@
+// Test for quotaFiles off by one file limit issue - SERVER-3420.
+
+if ( 0 ) { // SERVER-3420
+
+port = allocatePorts( 1 )[ 0 ];
+
+baseName = "jstests_disk_quota2";
+dbpath = "/data/db/" + baseName;
+
+m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--quotaFiles", "1", "--smallfiles" );
+db = m.getDB( baseName );
+
+big = new Array( 10000 ).toString();
+
+// Insert documents until quota is exhausted.
+while( !db.getLastError() ) {
+ db[ baseName ].save( {b:big} );
+}
+
+db.resetError();
+
+// Trigger allocation of an additional file for a 'special' namespace.
+for( n = 0; !db.getLastError(); ++n ) {
+ db.createCollection( '' + n );
+}
+
+print( n );
+
+// Check that new docs are saved in the .0 file.
+for( i = 0; i < n; ++i ) {
+ c = db[ ''+i ];
+ c.save( {b:big} );
+ if( !db.getLastError() ) {
+ assert.eq( 0, c.find()._addSpecial( "$showDiskLoc", true )[ 0 ].$diskLoc.file );
+ }
+}
+
+} \ No newline at end of file
diff --git a/jstests/drop2.js b/jstests/drop2.js
index a1d619df1b3..87e646e1ee9 100644
--- a/jstests/drop2.js
+++ b/jstests/drop2.js
@@ -26,7 +26,7 @@ function op( drop ) {
return null;
}
-s1 = startParallelShell( "db.jstests_drop2.count( { $where: function() { while( 1 ) { ; } } } )" );
+s1 = startParallelShell( "db.jstests_drop2.count( { $where: function() { while( 1 ) { sleep( 1 ); } } } )" );
countOp = null;
assert.soon( function() { countOp = op( false ); return countOp; } );
diff --git a/jstests/dur/diskfull.js b/jstests/dur/diskfull.js
index da45c20afd4..c123ea1541e 100644
--- a/jstests/dur/diskfull.js
+++ b/jstests/dur/diskfull.js
@@ -14,23 +14,23 @@ for ( i in files ) {
if ( !doIt ) {
print( "path " + startPath + " missing, skipping diskfull test" );
doIt = false;
-}
-
-function checkNoJournalFiles(path, pass) {
- var files = listFiles(path);
- if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
- if (pass == null) {
- // wait a bit longer for mongod to potentially finish if it is still running.
- sleep(10000);
- return checkNoJournalFiles(path, 1);
- }
- print("\n\n\n");
- print("FAIL path:" + path);
- print("unexpected files:");
- printjson(files);
- assert(false, "FAIL a journal/lsn file is present which is unexpected");
- }
-}
+}
+
+function checkNoJournalFiles(path, pass) {
+ var files = listFiles(path);
+ if (files.some(function (f) { return f.name.indexOf("prealloc") < 0; })) {
+ if (pass == null) {
+ // wait a bit longer for mongod to potentially finish if it is still running.
+ sleep(10000);
+ return checkNoJournalFiles(path, 1);
+ }
+ print("\n\n\n");
+ print("FAIL path:" + path);
+ print("unexpected files:");
+ printjson(files);
+ assert(false, "FAIL a journal/lsn file is present which is unexpected");
+ }
+}
/** Clear dbpath without removing and recreating diskfulltest directory, as resetDbpath does */
function clear() {
@@ -56,7 +56,9 @@ function work() {
d.foo.insert( { _id:i, b:big } );
}
- d.getLastError();
+ gle = d.getLastError();
+ if ( gle )
+ throw gle;
} catch ( e ) {
print( e );
raise( e );
@@ -86,9 +88,8 @@ function runFirstMongodAndFillDisk() {
conn = startMongodNoReset("--port", 30001, "--dbpath", startPath, "--dur", "--smallfiles", "--durOptions", 8, "--noprealloc");
assert.throws( work, null, "no exception thrown when exceeding disk capacity" );
- waitMongoProgramOnPort( 30001 );
-
- // the above wait doesn't work on windows
+ stopMongod( 30001 );
+
sleep(5000);
}
@@ -104,9 +105,9 @@ function runSecondMongdAndRecover() {
// stopMongod seems to be asynchronous (hmmm) so we sleep here.
sleep(5000);
- // at this point, after clean shutdown, there should be no journal files
- log("check no journal files");
- checkNoJournalFiles(startPath + "/journal/");
+ // at this point, after clean shutdown, there should be no journal files
+ log("check no journal files");
+ checkNoJournalFiles(startPath + "/journal/");
log();
}
@@ -133,4 +134,4 @@ if ( doIt ) {
print(testname + " SUCCESS");
-} \ No newline at end of file
+}
diff --git a/jstests/evald.js b/jstests/evald.js
index 78cabb68045..7b18f3cc893 100644
--- a/jstests/evald.js
+++ b/jstests/evald.js
@@ -53,10 +53,10 @@ function doIt( ev, wait, where ) {
}
-doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { ; } } } )", true, true );
-doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { ; } } } )", false, true );
-doIt( "while( true ) {;}", false );
-doIt( "while( true ) {;}", true );
+doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { sleep(1); } } } )", true, true );
+doIt( "db.jstests_evald.count( { $where: function() { while( 1 ) { sleep(1); } } } )", false, true );
+doIt( "while( true ) { sleep(1);}", false );
+doIt( "while( true ) { sleep(1);}", true );
// the for loops are currently required, as a spawned op masks the parent op - see SERVER-1931
doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count( {i:10} ); }", true );
@@ -65,4 +65,4 @@ doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count(
doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} db.jstests_evald.count(); }", false );
doIt( "while( 1 ) { for( var i = 0; i < 10000; ++i ) {;} try { db.jstests_evald.count( {i:10} ); } catch ( e ) { } }", true );
-doIt( "while( 1 ) { try { while( 1 ) { ; } } catch ( e ) { } }", true );
+doIt( "while( 1 ) { try { while( 1 ) { sleep(1); } } catch ( e ) { } }", true );
diff --git a/jstests/exists9.js b/jstests/exists9.js
index 09695ac4203..66378d1b424 100644
--- a/jstests/exists9.js
+++ b/jstests/exists9.js
@@ -25,8 +25,7 @@ assert.eq( 1, t.count( {a:{$exists:false}} ) );
t.ensureIndex( {a:1} );
assert.eq( 1, t.find( {a:{$exists:true}} ).hint( {a:1} ).itcount() );
assert.eq( 1, t.find( {a:{$exists:false}} ).hint( {a:1} ).itcount() );
-// The empty array will be scanned, but not returned.
-assert.eq( 2, t.find( {a:{$exists:false}} ).hint( {a:1} ).explain().nscanned );
+assert.eq( 1, t.find( {a:{$exists:false}} ).hint( {a:1} ).explain().nscanned );
t.drop();
@@ -39,6 +38,4 @@ assert.eq( 1, t.count( {'a.0':{$exists:false}} ) );
// With index.
t.ensureIndex( {'a.0':1} );
assert.eq( 1, t.find( {'a.0':{$exists:true}} ).hint( {'a.0':1} ).itcount() );
-if ( 0 ) { // SERVER-2902
assert.eq( 1, t.find( {'a.0':{$exists:false}} ).hint( {'a.0':1} ).itcount() );
-}
diff --git a/jstests/geo_mapreduce2.js b/jstests/geo_mapreduce2.js
new file mode 100644
index 00000000000..9c393457c7b
--- /dev/null
+++ b/jstests/geo_mapreduce2.js
@@ -0,0 +1,36 @@
+// Geo mapreduce 2 from SERVER-3478
+
+var coll = db.geoMR2
+coll.drop()
+
+for( var i = 0; i < 300; i++ )
+ coll.insert({ i : i, location : [ 10, 20 ] })
+
+coll.ensureIndex({ location : "2d" })
+
+// map function
+m = function() {
+ emit( null, { count : this.i } )
+}
+
+// reduce function
+r = function( key, values ) {
+
+ var total = 0
+ for ( var i = 0; i < values.length; i++ ) {
+ total += values[i].count
+ }
+
+ return { count : total }
+};
+
+try{ coll.mapReduce( m, r,
+ { out : coll.getName() + "_mr",
+ sort : { _id : 1 },
+ query : { 'location' : { $within : { $centerSphere : [[ 10, 20 ], 0.01 ] } } } })
+
+}
+catch( e ){
+ // This should occur, since we can't in-mem sort for mreduce
+ printjson( e )
+}
diff --git a/jstests/group7.js b/jstests/group7.js
new file mode 100644
index 00000000000..5bf9232577c
--- /dev/null
+++ b/jstests/group7.js
@@ -0,0 +1,43 @@
+// Test yielding group command SERVER-1395
+
+t = db.jstests_group7;
+t.drop();
+
+function checkForYield( docs, updates ) {
+ t.drop();
+ a = 0;
+ for( var i = 0; i < docs; ++i ) {
+ t.save( {a:a} );
+ }
+ db.getLastError();
+
+ // Iteratively update all a values atomically.
+ p = startParallelShell( 'for( a = 0; a < ' + updates + '; ++a ) { db.jstests_group7.update( {$atomic:true}, {$set:{a:a}}, false, true ); db.getLastError(); }' );
+
+ for( var i = 0; i < updates; ++i ) {
+ ret = t.group({key:{a:1},reduce:function(){},initial:{}});
+ // Check if group sees more than one a value, indicating that it yielded.
+ if ( ret.length > 1 ) {
+ p();
+ return true;
+ }
+ printjson( ret );
+ }
+
+ p();
+ return false;
+}
+
+var yielded = false;
+var docs = 1500;
+var updates = 50;
+for( var j = 1; j <= 6; ++j ) {
+ if ( checkForYield( docs, updates ) ) {
+ yielded = true;
+ break;
+ }
+ // Increase docs and updates to encourage yielding.
+ docs *= 2;
+ updates *= 2;
+}
+assert( yielded ); \ No newline at end of file
diff --git a/jstests/in9.js b/jstests/in9.js
index b0d70b6a4fc..34cefb8278a 100644
--- a/jstests/in9.js
+++ b/jstests/in9.js
@@ -31,5 +31,5 @@ function doTest() {
doTest();
// SERVER-1943 not fixed yet
-//t.ensureIndex( {key:1} );
-//doTest();
+t.ensureIndex( {key:1} );
+doTest();
diff --git a/jstests/ina.js b/jstests/ina.js
new file mode 100644
index 00000000000..cf614ab994d
--- /dev/null
+++ b/jstests/ina.js
@@ -0,0 +1,15 @@
+// Uassert when $elemMatch is attempted within $in SERVER-3545
+
+t = db.jstests_ina;
+t.drop();
+t.save( {} );
+
+assert.throws( function() { t.find( {a:{$in:[{$elemMatch:{b:1}}]}} ).itcount(); } );
+assert.throws( function() { t.find( {a:{$not:{$in:[{$elemMatch:{b:1}}]}}} ).itcount(); } );
+
+assert.throws( function() { t.find( {a:{$nin:[{$elemMatch:{b:1}}]}} ).itcount(); } );
+assert.throws( function() { t.find( {a:{$not:{$nin:[{$elemMatch:{b:1}}]}}} ).itcount(); } );
+
+// NOTE Above we don't check cases like {b:2,$elemMatch:{b:3,4}} - generally
+// we assume that the first key is $elemMatch if any key is, and validating
+// every key is expensive in some cases. \ No newline at end of file
diff --git a/jstests/indexbindata.js b/jstests/indexbindata.js
new file mode 100755
index 00000000000..e69de29bb2d
--- /dev/null
+++ b/jstests/indexbindata.js
diff --git a/jstests/indexr.js b/jstests/indexr.js
index b900e8ccbd5..60ecfb13ed2 100644
--- a/jstests/indexr.js
+++ b/jstests/indexr.js
@@ -28,17 +28,13 @@ t.remove();
t.save( { a: [ { b: 3, c: 6 }, { b: 1, c: 1 } ] } );
assert.eq( 1, t.count( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ) );
-if ( 0 ) { // SERVER-3005
assert.eq( 1, t.count( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ) );
-}
assert.eq( [[{$minElement:1},{$maxElement:1}]], t.find( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ).explain().indexBounds['a.c'] );
assert.eq( [[{$minElement:1},{$maxElement:1}]], t.find( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ).explain().indexBounds['a.c'] );
// Check reverse direction.
assert.eq( 1, t.find( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ).sort( {'a.b':-1} ).itcount() );
-if ( 0 ) { // SERVER-3005
assert.eq( 1, t.find( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ).sort( {a:-1} ).itcount() );
-}
assert.eq( [[{$maxElement:1},{$minElement:1}]], t.find( { 'a.b':{ $gt:2 }, 'a.c': { $lt:4 } } ).sort( {'a.b':-1} ).explain().indexBounds['a.c'] );
assert.eq( [[{$maxElement:1},{$minElement:1}]], t.find( { a:{ b:3, c:6 }, 'a.c': { $lt:4 } } ).sort( {a:-1} ).explain().indexBounds['a.c'] );
diff --git a/jstests/indexs.js b/jstests/indexs.js
index 3a52584bfd3..609f912affe 100644
--- a/jstests/indexs.js
+++ b/jstests/indexs.js
@@ -17,7 +17,5 @@ t.drop();
t.ensureIndex( {a:1,'a.b':1} );
t.save( { a: [ { b: 3 } ] } );
assert.eq( ib, t.find( { a:{ b:3 } } ).explain().indexBounds );
-if ( 0 ) { // SERVER-3005
assert.eq( 1, t.find( { a:{ b:3 } } ).explain().nscanned );
assert.eq( 1, t.count( { a:{ b:3 } } ) );
-} \ No newline at end of file
diff --git a/jstests/indext.js b/jstests/indext.js
new file mode 100644
index 00000000000..e418dc2e959
--- /dev/null
+++ b/jstests/indext.js
@@ -0,0 +1,21 @@
+// Sparse indexes with arrays SERVER-3216
+
+t = db.jstests_indext;
+t.drop();
+
+t.ensureIndex( {'a.b':1}, {sparse:true} );
+t.save( {a:[]} );
+t.save( {a:1} );
+assert.eq( 0, t.find().hint( {'a.b':1} ).itcount() );
+assert.eq( 0, t.find().hint( {'a.b':1} ).explain().nscanned );
+
+t.ensureIndex( {'a.b':1,'a.c':1}, {sparse:true} );
+t.save( {a:[]} );
+t.save( {a:1} );
+assert.eq( 0, t.find().hint( {'a.b':1,'a.c':1} ).itcount() );
+assert.eq( 0, t.find().hint( {'a.b':1,'a.c':1} ).explain().nscanned );
+
+t.save( {a:[{b:1}]} );
+t.save( {a:1} );
+assert.eq( 1, t.find().hint( {'a.b':1,'a.c':1} ).itcount() );
+assert.eq( 1, t.find().hint( {'a.b':1,'a.c':1} ).explain().nscanned );
diff --git a/jstests/indexu.js b/jstests/indexu.js
new file mode 100644
index 00000000000..c7fa8ed3365
--- /dev/null
+++ b/jstests/indexu.js
@@ -0,0 +1,137 @@
+// Test index key generation with duplicate values addressed by array index and
+// object field. SERVER-2902
+
+t = db.jstests_indexu;
+t.drop();
+
+var dupDoc = {a:[{'0':1}]}; // There are two 'a.0' fields in this doc.
+var dupDoc2 = {a:[{'1':1},'c']};
+var noDupDoc = {a:[{'1':1}]};
+
+// Test that we can't index dupDoc.
+t.save( dupDoc );
+assert( !db.getLastError() );
+t.ensureIndex( {'a.0':1} );
+assert( db.getLastError() );
+
+t.remove();
+t.ensureIndex( {'a.0':1} );
+assert( !db.getLastError() );
+t.save( dupDoc );
+assert( db.getLastError() );
+
+// Test that we can't index dupDoc2.
+t.drop();
+t.save( dupDoc2 );
+assert( !db.getLastError() );
+t.ensureIndex( {'a.1':1} );
+assert( db.getLastError() );
+
+t.remove();
+t.ensureIndex( {'a.1':1} );
+assert( !db.getLastError() );
+t.save( dupDoc2 );
+assert( db.getLastError() );
+
+// Test that we can index dupDoc with a different index.
+t.drop();
+t.ensureIndex( {'a.b':1} );
+t.save( dupDoc );
+assert( !db.getLastError() );
+
+// Test number field starting with hyphen.
+t.drop();
+t.ensureIndex( {'a.-1':1} );
+t.save( {a:[{'-1':1}]} );
+assert( !db.getLastError() );
+
+// Test number field starting with zero.
+t.drop();
+t.ensureIndex( {'a.00':1} );
+t.save( {a:[{'00':1}]} );
+assert( !db.getLastError() );
+
+// Test multiple array indexes
+t.drop();
+t.ensureIndex( {'a.0':1,'a.1':1} );
+t.save( {a:[{'1':1}]} );
+assert( !db.getLastError() );
+t.save( {a:[{'1':1},4]} );
+assert( db.getLastError() );
+
+// Test that we can index noDupDoc.
+t.drop();
+t.save( noDupDoc );
+t.ensureIndex( {'a.0':1} );
+assert( !db.getLastError() );
+t.ensureIndex( {'a.1':1} );
+assert( !db.getLastError() );
+
+t.drop();
+t.ensureIndex( {'a.0':1} );
+t.ensureIndex( {'a.1':1} );
+t.save( noDupDoc );
+assert( !db.getLastError() );
+
+// Test that we can query noDupDoc.
+assert.eq( 1, t.find( {'a.1':1} ).hint( {'a.1':1} ).itcount() );
+assert.eq( 1, t.find( {'a.1':1} ).hint( {$natural:1} ).itcount() );
+assert.eq( 1, t.find( {'a.0':{'1':1}} ).hint( {'a.0':1} ).itcount() );
+assert.eq( 1, t.find( {'a.0':{'1':1}} ).hint( {$natural:1} ).itcount() );
+
+// Check multiple nested array fields.
+t.drop();
+t.save( {a:[[1]]} );
+t.ensureIndex( {'a.0.0':1} );
+assert( !db.getLastError() );
+assert.eq( 1, t.find( {'a.0.0':1} ).hint( {$natural:1} ).itcount() );
+assert.eq( 1, t.find( {'a.0.0':1} ).hint( {'a.0.0':1} ).itcount() );
+
+// Check where there is a duplicate for a partially addressed field but not for a fully addressed field.
+t.drop();
+t.save( {a:[[1],{'0':1}]} );
+t.ensureIndex( {'a.0.0':1} );
+assert( db.getLastError() );
+
+// Check where there is a duplicate for a fully addressed field.
+t.drop();
+t.save( {a:[[1],{'0':[1]}]} );
+assert( !db.getLastError() );
+t.ensureIndex( {'a.0.0':1} );
+assert( db.getLastError() );
+
+// Two ways of addressing parse to an array.
+t.drop();
+t.save( {a:[{'0':1}]} );
+t.ensureIndex( {'a.0.0':1} );
+assert( db.getLastError() );
+
+// Test several key depths - with same arrays being found.
+t.drop();
+t.save( {a:[{'0':[{'0':1}]}]} );
+t.ensureIndex( {'a.0.0.0.0.0.0':1} );
+assert( db.getLastError() );
+t.ensureIndex( {'a.0.0.0.0.0':1} );
+assert( db.getLastError() );
+t.ensureIndex( {'a.0.0.0.0':1} );
+assert( db.getLastError() );
+t.ensureIndex( {'a.0.0.0':1} );
+assert( db.getLastError() );
+t.ensureIndex( {'a.0.0':1} );
+assert( db.getLastError() );
+t.ensureIndex( {'a.0':1} );
+assert( db.getLastError() );
+t.ensureIndex( {'a':1} );
+assert( !db.getLastError() );
+
+// Two prefixes extract docs, but one terminates extraction before array.
+t.drop();
+t.save( {a:[{'0':{'c':[]}}]} );
+t.ensureIndex( {'a.0.c':1} );
+assert( db.getLastError() );
+
+t.drop();
+t.save( {a:[[{'b':1}]]} );
+assert.eq( 1, t.find( {'a.0.b':1} ).itcount() );
+t.ensureIndex( {'a.0.b':1} );
+assert.eq( 1, t.find( {'a.0.b':1} ).itcount() );
diff --git a/jstests/indexv.js b/jstests/indexv.js
new file mode 100644
index 00000000000..a69ff2a4664
--- /dev/null
+++ b/jstests/indexv.js
@@ -0,0 +1,18 @@
+// Check null key generation.
+
+t = db.jstests_indexv;
+t.drop();
+
+t.ensureIndex( {'a.b':1} );
+
+t.save( {a:[{},{b:1}]} );
+var e = t.find( {'a.b':null} ).explain();
+assert.eq( 0, e.n );
+assert.eq( 1, e.nscanned );
+
+t.drop();
+t.ensureIndex( {'a.b.c':1} );
+t.save( {a:[{b:[]},{b:{c:1}}]} );
+var e = t.find( {'a.b.c':null} ).explain();
+assert.eq( 0, e.n );
+assert.eq( 1, e.nscanned );
diff --git a/jstests/indexw.js b/jstests/indexw.js
new file mode 100644
index 00000000000..326443400d1
--- /dev/null
+++ b/jstests/indexw.js
@@ -0,0 +1,14 @@
+// Check that v0 keys are generated for v0 indexes SERVER-3375
+
+t = db.jstests_indexw;
+t.drop();
+
+t.save( {a:[]} );
+assert.eq( 1, t.count( {a:[]} ) );
+t.ensureIndex( {a:1} );
+assert.eq( 1, t.count( {a:[]} ) );
+t.dropIndexes();
+
+// The count result is incorrect - just checking here that v0 key generation is used.
+t.ensureIndex( {a:1}, {v:0} );
+assert.eq( 0, t.count( {a:[]} ) );
diff --git a/jstests/libs/testconfig b/jstests/libs/testconfig
new file mode 100644
index 00000000000..0c1fc871d61
--- /dev/null
+++ b/jstests/libs/testconfig
@@ -0,0 +1,4 @@
+fastsync = true
+#comment line
+#commentedflagwithan = false
+version = false
diff --git a/jstests/ork.js b/jstests/ork.js
new file mode 100644
index 00000000000..d6d40161e69
--- /dev/null
+++ b/jstests/ork.js
@@ -0,0 +1,11 @@
+// SERVER-2585 Test $or clauses within indexed top level $or clauses.
+
+t = db.jstests_ork;
+t.drop();
+
+t.ensureIndex( {a:1} );
+t.save( {a:[1,2],b:5} );
+t.save( {a:[2,4],b:5} );
+
+assert.eq( 2, t.find( {$or:[{a:1,$and:[{$or:[{a:2},{a:3}]},{$or:[{b:5}]}]},{a:2,$or:[{a:3},{a:4}]}]} ).itcount() );
+assert.eq( 1, t.find( {$or:[{a:1,$and:[{$or:[{a:2},{a:3}]},{$or:[{b:6}]}]},{a:2,$or:[{a:3},{a:4}]}]} ).itcount() );
diff --git a/jstests/orl.js b/jstests/orl.js
new file mode 100644
index 00000000000..2726975d5aa
--- /dev/null
+++ b/jstests/orl.js
@@ -0,0 +1,13 @@
+// SERVER-3445 Test using coarse multikey bounds for or range elimination.
+
+t = db.jstests_orl;
+t.drop();
+
+t.ensureIndex( {'a.b':1,'a.c':1} );
+// make the index multikey
+t.save( {a:{b:[1,2]}} );
+
+// SERVER-3445
+if ( 0 ) {
+assert( !t.find( {$or:[{'a.b':2,'a.c':3},{'a.b':2,'a.c':4}]} ).explain().clauses );
+} \ No newline at end of file
diff --git a/jstests/orm.js b/jstests/orm.js
new file mode 100644
index 00000000000..83183f05a59
--- /dev/null
+++ b/jstests/orm.js
@@ -0,0 +1,26 @@
+// Test dropping during a $or yield SERVER-3555
+
+if ( 0 ) { // SERVER-3555
+
+t = db.jstests_orm;
+t.drop();
+
+clauses = [];
+for( i = 0; i < 10; ++i ) {
+ clauses.push( {a:{$lte:(i+1)*5000/10},i:49999} );
+ clauses.push( {b:{$lte:(i+1)*5000/10},i:49999} );
+}
+
+p = startParallelShell( 'for( i = 0; i < 30; ++i ) { sleep( 1000 ); db.jstests_orm.drop() }' );
+for( j = 0; j < 10; ++j ) {
+ for( i = 0; i < 5000; ++i ) {
+ t.save( {a:i,i:i} );
+ t.save( {b:i,i:i} );
+ }
+ t.ensureIndex( {a:1} );
+ t.ensureIndex( {b:1} );
+ t.find( {$or:clauses} ).itcount();
+}
+p();
+
+} \ No newline at end of file
diff --git a/jstests/profile1.js b/jstests/profile1.js
index eed64f60ae2..9654357127f 100644
--- a/jstests/profile1.js
+++ b/jstests/profile1.js
@@ -1,3 +1,4 @@
+print("profile1.js BEGIN");
try {
@@ -61,21 +62,50 @@ try {
after = db.system.profile.count()
assert.eq( before + 3 , after , "X1" )
+ /* sleep() could be inaccurate on certain platforms. let's check */
+ print("\nsleep 2 time actual:");
+ for (var i = 0; i < 4; i++) {
+ print(db.eval("var x = new Date(); sleep(2); return new Date() - x;"));
+ }
+ print();
+ print("\nsleep 20 times actual:");
+ for (var i = 0; i < 4; i++) {
+ print(db.eval("var x = new Date(); sleep(20); return new Date() - x;"));
+ }
+ print();
+ print("\nsleep 120 times actual:");
+ for (var i = 0; i < 4; i++) {
+ print(db.eval("var x = new Date(); sleep(120); return new Date() - x;"));
+ }
+ print();
+
+ function evalSleepMoreThan(millis,max){
+ var start = new Date();
+ db.eval("sleep("+millis+")");
+ var end = new Date();
+ var actual = end.getTime() - start.getTime();
+ if ( actual > ( millis + 5 ) ) {
+ print( "warning wanted to sleep for: " + millis + " but took: " + actual );
+ }
+ return actual >= max ? 1 : 0;
+ }
+
db.setProfilingLevel(1,100);
before = db.system.profile.count();
- db.eval( "sleep(25)" )
- db.eval( "sleep(120)" )
+ var delta = 0;
+ delta += evalSleepMoreThan( 15 , 100 );
+ delta += evalSleepMoreThan( 120 , 100 );
after = db.system.profile.count()
- assert.eq( before + 1 , after , "X2 : " + getProfileAString() )
+ assert.eq( before + delta , after , "X2 : " + getProfileAString() )
db.setProfilingLevel(1,20);
before = db.system.profile.count();
- db.eval( "sleep(25)" )
- db.eval( "sleep(120)" )
+ delta = 0;
+ delta += evalSleepMoreThan( 5 , 20 );
+ delta += evalSleepMoreThan( 120 , 20 );
after = db.system.profile.count()
- assert.eq( before + 2 , after , "X3 : " + getProfileAString() )
-
-
+ assert.eq( before + delta , after , "X3 : " + getProfileAString() )
+
db.profile.drop();
db.setProfilingLevel(2)
var q = { _id : 5 };
@@ -85,7 +115,9 @@ try {
assert.eq( q , r.query , "Y1" );
assert.eq( u , r.updateobj , "Y2" );
assert.eq( "update" , r.op , "Y3" );
- assert.eq( "test.profile1" , r.ns , "Y4" );
+ assert.eq("test.profile1", r.ns, "Y4");
+
+ print("profile1.js SUCCESS OK");
} finally {
// disable profiling for subsequent tests
diff --git a/jstests/profile2.js b/jstests/profile2.js
new file mode 100644
index 00000000000..929b463ca3d
--- /dev/null
+++ b/jstests/profile2.js
@@ -0,0 +1,19 @@
+print("profile2.js BEGIN");
+
+try {
+
+ assert.commandWorked( db.runCommand( {profile:2} ) );
+
+ huge = 'huge';
+ while (huge.length < 2*1024*1024){
+ huge += huge;
+ }
+
+ db.profile2.count({huge:huge}) // would make a huge entry in db.system.profile
+
+ print("profile2.js SUCCESS OK");
+
+} finally {
+ // disable profiling for subsequent tests
+ assert.commandWorked( db.runCommand( {profile:0} ) );
+}
diff --git a/jstests/profile3.js b/jstests/profile3.js
new file mode 100644
index 00000000000..a6574b76f8a
--- /dev/null
+++ b/jstests/profile3.js
@@ -0,0 +1,26 @@
+
+t = db.profile3;
+t.drop();
+
+try {
+ db.setProfilingLevel(0);
+
+ db.system.profile.drop();
+ assert.eq( 0 , db.system.profile.count() )
+
+ db.setProfilingLevel(2);
+
+ t.insert( { x : 1 } );
+ t.findOne( { x : 1 } );
+ t.find( { x : 1 } ).count();
+
+ db.system.profile.find().forEach( printjson )
+
+ db.setProfilingLevel(0);
+ db.system.profile.drop();
+
+}
+finally {
+ db.setProfilingLevel(0);
+}
+
diff --git a/jstests/regexa.js b/jstests/regexa.js
index e9644627548..b0d47190e77 100644
--- a/jstests/regexa.js
+++ b/jstests/regexa.js
@@ -14,6 +14,6 @@ t.save( {a:'a'} );
check();
t.ensureIndex( {a:1} );
-if ( 0 ) { // SERVER-3298
+if ( 1 ) { // SERVER-3298
check();
-} \ No newline at end of file
+}
diff --git a/jstests/repl/basic1.js b/jstests/repl/basic1.js
index aaa07dc6cc1..4a6091d9755 100644
--- a/jstests/repl/basic1.js
+++ b/jstests/repl/basic1.js
@@ -160,6 +160,8 @@ assert.eq( 0 , as.system.profile.count() , "P2" )
assert.eq( 1 , as.foo.findOne().x , "P3" );
assert.eq( 0 , as.system.profile.count() , "P4" )
+assert( as.getCollectionNames().indexOf( "system.profile" ) < 0 , "P4.5" )
+
as.setProfilingLevel(2)
as.foo.findOne();
assert.eq( 1 , as.system.profile.count() , "P5" )
diff --git a/jstests/repl/drop_dups.js b/jstests/repl/drop_dups.js
new file mode 100644
index 00000000000..1fa9984ea06
--- /dev/null
+++ b/jstests/repl/drop_dups.js
@@ -0,0 +1,63 @@
+
+var rt = new ReplTest( "drop_dups" );
+
+m = rt.start( true );
+s = rt.start( false );
+
+function block(){
+ am.runCommand( { getlasterror : 1 , w : 2 , wtimeout : 3000 } )
+}
+
+am = m.getDB( "foo" );
+as = s.getDB( "foo" );
+
+function run( createInBackground ) {
+
+ collName = "foo" + ( createInBackground ? "B" : "F" );
+
+ am[collName].drop();
+ am.blah.insert( { x : 1 } )
+ block();
+
+ for ( i=0; i<10; i++ ) {
+ am[collName].insert( { _id : i , x : Math.floor( i / 2 ) } )
+ }
+
+ block();
+
+ am.runCommand( { "godinsert" : collName , obj : { _id : 100 , x : 20 } } );
+ am.runCommand( { "godinsert" : collName , obj : { _id : 101 , x : 20 } } );
+
+ as.runCommand( { "godinsert" : collName , obj : { _id : 101 , x : 20 } } );
+ as.runCommand( { "godinsert" : collName , obj : { _id : 100 , x : 20 } } );
+
+ assert.eq( as[collName].count() , am[collName].count() );
+
+ function mymap(z) {
+ return z._id + ":" + z.x + ",";
+ }
+
+
+ if ( am.serverStatus().mem.bits == 64 ) {
+ assert.neq( tojson(am[collName].find().map(mymap)) ,
+ tojson(as[collName].find().map(mymap)) , "order is not supposed to be same on master and slave but it is" );
+ }
+
+
+ am[collName].ensureIndex( { x : 1 } , { unique : true , dropDups : true , background : createInBackground } );
+ am.blah.insert( { x : 1 } )
+ block();
+
+ assert.eq( 2 , am[collName].getIndexKeys().length , "A1 : " + createInBackground )
+ assert.eq( 2 , as[collName].getIndexKeys().length , "A2 : " + createInBackground )
+
+ assert.eq( am[collName].find().sort( { _id : 1 } ).map(mymap) ,
+ as[collName].find().sort( { _id : 1 } ).map(mymap) , "different things dropped on master and slave" );
+
+
+}
+
+run( false )
+run( true )
+
+rt.stop()
diff --git a/jstests/repl/repl3.js b/jstests/repl/repl3.js
index d3c38486b19..5ace9b69d2f 100644
--- a/jstests/repl/repl3.js
+++ b/jstests/repl/repl3.js
@@ -10,38 +10,42 @@ soonCount = function( count ) {
} );
}
-doTest = function( signal ) {
-
- rt = new ReplTest( "repl3tests" );
-
- m = rt.start( true );
- s = rt.start( false );
-
- am = m.getDB( baseName ).a
-
- am.save( { _id: new ObjectId() } );
- soonCount( 1 );
- rt.stop( false, signal );
-
- big = new Array( 2000 ).toString();
- for( i = 0; i < 1000; ++i )
- am.save( { _id: new ObjectId(), i: i, b: big } );
-
- s = rt.start( false, { autoresync: null }, true );
-
+doTest = function (signal) {
+
+ print("repl3.js doTest(" + signal + ")")
+
+ rt = new ReplTest("repl3tests");
+
+ m = rt.start(true);
+ s = rt.start(false);
+
+ am = m.getDB(baseName).a
+
+ am.save({ _id: new ObjectId() });
+ soonCount(1);
+ rt.stop(false, signal);
+
+ big = new Array(2000).toString();
+ for (i = 0; i < 1000; ++i)
+ am.save({ _id: new ObjectId(), i: i, b: big });
+
+ s = rt.start(false, { autoresync: null }, true);
+
// after SyncException, mongod waits 10 secs.
- sleep( 15000 );
-
+ sleep(15000);
+
// Need the 2 additional seconds timeout, since commands don't work on an 'allDead' node.
- soonCount( 1001 );
- as = s.getDB( baseName ).a
- assert.eq( 1, as.find( { i: 0 } ).count() );
- assert.eq( 1, as.find( { i: 999 } ).count() );
-
- assert.commandFailed( s.getDB( "admin" ).runCommand( { "resync" : 1 } ) );
+ soonCount(1001);
+ as = s.getDB(baseName).a
+ assert.eq(1, as.find({ i: 0 }).count());
+ assert.eq(1, as.find({ i: 999 }).count());
+
+ assert.commandFailed(s.getDB("admin").runCommand({ "resync": 1 }));
rt.stop();
}
doTest( 15 ); // SIGTERM
doTest( 9 ); // SIGKILL
+
+print("repl3.js OK")
diff --git a/jstests/replsets/auth1.js b/jstests/replsets/auth1.js
index e9765c08153..edc162cca16 100644
--- a/jstests/replsets/auth1.js
+++ b/jstests/replsets/auth1.js
@@ -81,6 +81,10 @@ function doQueryOn(p) {
doQueryOn(slave);
master.adminCommand({logout:1});
+
+print("unauthorized:");
+printjson(master.adminCommand({replSetGetStatus : 1}));
+
doQueryOn(master);
diff --git a/jstests/replsets/downstream.js b/jstests/replsets/downstream.js
new file mode 100755
index 00000000000..795e6671d46
--- /dev/null
+++ b/jstests/replsets/downstream.js
@@ -0,0 +1,36 @@
+// BUG: [SERVER-1768] replica set getlasterror {w: 2} after 2000
+// inserts hangs while secondary servers log "replSet error RS102 too stale to catch up" every once in a while
+
+function newReplicaSet (name, numServers) {
+ var rs = new ReplSetTest({name: name, nodes: numServers})
+ rs.startSet()
+ rs.initiate()
+ rs.awaitReplication()
+ return rs
+}
+
+function go() {
+var N = 2000
+
+// ~1KB string
+var Text = ''
+for (var i = 0; i < 40; i++)
+ Text += 'abcdefghijklmnopqrstuvwxyz'
+
+// Create replica set of 3 servers
+var repset = newReplicaSet('repset', 3)
+var conn = repset.getMaster()
+var db = conn.getDB('test')
+
+// Add data to it
+for (var i = 0; i < N; i++)
+ db['foo'].insert({x: i, text: Text})
+
+// wait to be copied to at least one secondary (BUG hangs here)
+db.getLastError(2)
+
+print('getlasterror_w2.js SUCCESS')
+}
+
+// turn off until fixed
+//go();
diff --git a/jstests/replsets/fastsync.js b/jstests/replsets/fastsync.js
index 5ba978481cd..1c9c2152ebb 100644
--- a/jstests/replsets/fastsync.js
+++ b/jstests/replsets/fastsync.js
@@ -48,7 +48,7 @@ var admin = p.getDB("admin");
var foo = p.getDB("foo");
var local = p.getDB("local");
-var config = {_id : basename, members : [{_id : 0, host : hostname+":"+ports[0]}]};
+var config = {_id : basename, members : [{_id : 0, host : hostname+":"+ports[0], priority:2}]};
printjson(config);
var result = admin.runCommand({replSetInitiate : config});
print("result:");
@@ -98,6 +98,7 @@ var startSlave = function(n) {
config.members.push({_id:n, host:hostname+":"+ports[n]});
result = admin.runCommand({replSetReconfig : config});
+ printjson(result);
assert(result.ok, "reconfig worked");
reconnect(p);
@@ -125,6 +126,10 @@ var startSlave = function(n) {
assert.eq(status.members[n].state, 2);
+ assert.soon(function() {
+ return admin.runCommand({isMaster : 1}).ismaster;
+ });
+
admin.foo.insert({x:1});
assert.soon(function() {
var last = local.oplog.rs.find().sort({$natural:-1}).limit(1).next();
diff --git a/jstests/replsets/maintenance.js b/jstests/replsets/maintenance.js
new file mode 100644
index 00000000000..5b068cd3d8e
--- /dev/null
+++ b/jstests/replsets/maintenance.js
@@ -0,0 +1,32 @@
+
+
+var replTest = new ReplSetTest( {name: 'unicomplex', nodes: 3} );
+var conns = replTest.startSet();
+replTest.initiate();
+
+// Make sure we have a master
+var master = replTest.getMaster();
+
+for (i=0;i<10000; i++) { master.getDB("bar").foo.insert({x:1,y:i,abc:123,str:"foo bar baz"}); }
+for (i=0;i<1000; i++) { master.getDB("bar").foo.update({y:i},{$push :{foo : "barrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrrr"}}); }
+
+replTest.awaitReplication();
+
+assert.soon(function() { return conns[2].getDB("admin").isMaster().secondary; });
+
+join = startParallelShell( "db.getSisterDB('bar').runCommand({compact : 'foo'});", replTest.ports[2] );
+
+print("check secondary goes to recovering");
+assert.soon(function() { return !conns[2].getDB("admin").isMaster().secondary; });
+
+print("joining");
+join();
+
+print("check secondary becomes a secondary again");
+var x = 0;
+assert.soon(function() {
+ var im = conns[2].getDB("admin").isMaster();
+ if (x++ % 5 == 0) printjson(im);
+ return im.secondary;
+});
+
diff --git a/jstests/replsets/remove1.js b/jstests/replsets/remove1.js
index 6b9cf5d3c30..f93fe9eb071 100644
--- a/jstests/replsets/remove1.js
+++ b/jstests/replsets/remove1.js
@@ -92,8 +92,12 @@ print("reconfig with minority");
replTest.stop(1);
assert.soon(function() {
- reconnect(master);
- return master.getDB("admin").runCommand({isMaster : 1}).secondary;
+ try {
+ return master.getDB("admin").runCommand({isMaster : 1}).secondary;
+ }
+ catch(e) {
+ print("trying to get master: "+e);
+ }
});
config.version = 4;
diff --git a/jstests/replsets/replset5.js b/jstests/replsets/replset5.js
index a861bd6ff04..67ce2d78bcd 100644
--- a/jstests/replsets/replset5.js
+++ b/jstests/replsets/replset5.js
@@ -23,53 +23,63 @@ doTest = function (signal) {
master.getDB("barDB").bar.save({ a: 1 });
replTest.awaitReplication();
- // These writes should be replicated immediately
- var docNum = 5000;
- for(var n=0; n<docNum; n++) {
- master.getDB(testDB).foo.insert({ n: n });
- }
-
- // If you want to test failure, just add values for w and wtimeout
- // to the following command. This will override the default set above and
- // prevent replication from happening in time for the count tests below.
- var result = master.getDB("admin").runCommand({getlasterror: 1});
- printjson(result);
-
+ // These writes should be replicated immediately
+ var docNum = 5000;
+ for (var n = 0; n < docNum; n++) {
+ master.getDB(testDB).foo.insert({ n: n });
+ }
+
+ // should use the configured last error defaults from above, that's what we're testing.
+ //
+ // If you want to test failure, just add values for w and wtimeout (e.g. w=1)
+ // to the following command. This will override the default set above and
+ // prevent replication from happening in time for the count tests below.
+ //
+ var result = master.getDB("admin").runCommand({ getlasterror: 1 });
+ print("replset5.js getlasterror result:");
+ printjson(result);
+
+ if (result.err == "timeout") {
+ print("\WARNING getLastError timed out and should not have.\nThis machine seems extremely slow. Stopping test without failing it\n")
+ replTest.stopSet(signal);
+ print("\WARNING getLastError timed out and should not have.\nThis machine seems extremely slow. Stopping test without failing it\n")
+ return;
+ }
var slaves = replTest.liveNodes.slaves;
slaves[0].setSlaveOk();
slaves[1].setSlaveOk();
- print("Testing slave counts");
+ print("replset5.js Testing slave counts");
+
+ var slave0count = slaves[0].getDB(testDB).foo.count();
+ assert(slave0count == docNum, "Slave 0 has " + slave0count + " of " + docNum + " documents!");
+
+ var slave1count = slaves[1].getDB(testDB).foo.count();
+ assert(slave1count == docNum, "Slave 1 has " + slave1count + " of " + docNum + " documents!");
+
+ var master1count = master.getDB(testDB).foo.count();
+ assert(master1count == docNum, "Master has " + master1count + " of " + docNum + " documents!");
+
+ print("replset5.js reconfigure with hidden=1");
+ config = master.getDB("local").system.replset.findOne();
+ config.version++;
+ config.members[2].priority = 0;
+ config.members[2].hidden = 1;
+
+ try {
+ master.adminCommand({ replSetReconfig: config });
+ }
+ catch (e) {
+ print(e);
+ }
+
+ config = master.getDB("local").system.replset.findOne();
+ printjson(config);
+ assert.eq(config.members[2].hidden, true);
- var slave0count = slaves[0].getDB(testDB).foo.count();
- assert( slave0count == docNum, "Slave 0 has " + slave0count + " of " + docNum + " documents!");
-
- var slave1count = slaves[1].getDB(testDB).foo.count();
- assert( slave1count == docNum, "Slave 1 has " + slave1count + " of " + docNum + " documents!");
-
- var master1count = master.getDB(testDB).foo.count();
- assert( master1count == docNum, "Master has " + master1count + " of " + docNum + " documents!");
-
- print("reconfigure with hidden=1");
- config = master.getDB("local").system.replset.findOne();
- config.version++;
- config.members[2].priority = 0;
- config.members[2].hidden = 1;
-
- try {
- master.adminCommand({replSetReconfig : config});
- }
- catch(e) {
- print(e);
- }
-
- config = master.getDB("local").system.replset.findOne();
- printjson(config);
- assert.eq(config.members[2].hidden, true);
-
replTest.stopSet(signal);
}
-doTest( 15 );
-print("replset5.js success");
+doTest( 15 );
+print("replset5.js success");
diff --git a/jstests/replsets/replsetarb2.js b/jstests/replsets/replsetarb2.js
index a20c41b79c0..6f712cbc257 100644
--- a/jstests/replsets/replsetarb2.js
+++ b/jstests/replsets/replsetarb2.js
@@ -8,11 +8,11 @@ doTest = function( signal ) {
print(tojson(nodes));
var conns = replTest.startSet();
- var r = replTest.initiate({"_id" : "unicomplex",
+ var r = replTest.initiate({"_id" : "unicomplex",
"members" : [
- {"_id" : 0, "host" : nodes[0] },
- {"_id" : 1, "host" : nodes[1], "arbiterOnly" : true, "votes": 1},
- {"_id" : 2, "host" : nodes[2] }]});
+ {"_id" : 0, "host" : nodes[0] },
+ {"_id" : 1, "host" : nodes[1], "arbiterOnly" : true, "votes": 1, "priority" : 0},
+ {"_id" : 2, "host" : nodes[2] }]});
// Make sure we have a master
var master = replTest.getMaster();
@@ -24,6 +24,10 @@ doTest = function( signal ) {
return res.myState == 7;
}, "Aribiter failed to initialize.");
+ var result = conns[1].getDB("admin").runCommand({isMaster : 1});
+ assert(result.arbiterOnly);
+ assert(!result.passive);
+
// Wait for initial replication
master.getDB("foo").foo.insert({a: "foo"});
replTest.awaitReplication();
diff --git a/jstests/replsets/rollback2.js b/jstests/replsets/rollback2.js
index 46fb548ccdf..7ab3c6bf4ee 100644
--- a/jstests/replsets/rollback2.js
+++ b/jstests/replsets/rollback2.js
@@ -202,9 +202,24 @@ doTest = function (signal) {
wait(function () { return B.isMaster().ismaster || B.isMaster().secondary; });
// everyone is up here...
- assert(A.isMaster().ismaster || A.isMaster().secondary, "A up");
- assert(B.isMaster().ismaster || B.isMaster().secondary, "B up");
replTest.awaitReplication();
+
+ // theoretically, a read could slip in between StateBox::change() printing
+ // replSet SECONDARY
+ // and the replset actually becoming secondary
+ // so we're trying to wait for that here
+ print("waiting for secondary");
+ assert.soon(function() {
+ try {
+ var aim = A.isMaster();
+ var bim = B.isMaster();
+ return (aim.ismaster || aim.secondary) &&
+ (bim.ismaster || bim.secondary);
+ }
+ catch(e) {
+ print("checking A and B: "+e);
+ }
+ });
verify(a);
diff --git a/jstests/replsets/tags.js b/jstests/replsets/tags.js
index 8ef8a0a12cc..4e738862afe 100644
--- a/jstests/replsets/tags.js
+++ b/jstests/replsets/tags.js
@@ -8,16 +8,16 @@ var nodes = replTest.startSet();
var port = replTest.ports;
replTest.initiate({_id : name, members :
[
- {_id:0, host : host+":"+port[0], tags : ["0", "dc.ny.rk1", "machine"]},
- {_id:1, host : host+":"+port[1], tags : ["1", "dc.ny.rk1", "machine"]},
- {_id:2, host : host+":"+port[2], tags : ["2", "dc.ny.rk2", "machine"]},
- {_id:3, host : host+":"+port[3], tags : ["3", "dc.sf.rk1", "machine"]},
- {_id:4, host : host+":"+port[4], tags : ["4", "dc.sf.rk2", "machine"]},
+ {_id:0, host : host+":"+port[0], tags : {"server" : "0", "dc" : "ny", "ny" : "1", "rack" : "ny.rk1"}},
+ {_id:1, host : host+":"+port[1], tags : {"server" : "1", "dc" : "ny", "ny" : "2", "rack" : "ny.rk1"}},
+ {_id:2, host : host+":"+port[2], tags : {"server" : "2", "dc" : "ny", "ny" : "3", "rack" : "ny.rk2", "2" : "this"}},
+ {_id:3, host : host+":"+port[3], tags : {"server" : "3", "dc" : "sf", "sf" : "1", "rack" : "sf.rk1"}},
+ {_id:4, host : host+":"+port[4], tags : {"server" : "4", "dc" : "sf", "sf" : "2", "rack" : "sf.rk2"}},
],
settings : {
getLastErrorModes : {
- "important" : {"dc" : 2, "machine" : 3},
- "a machine" : {"machine" : 1}
+ "important" : {"dc" : 2, "server" : 3},
+ "a machine" : {"server" : 1}
}
}});
@@ -29,14 +29,14 @@ printjson(config);
var modes = config.settings.getLastErrorModes;
assert.eq(typeof modes, "object");
assert.eq(modes.important.dc, 2);
-assert.eq(modes.important.machine, 3);
-assert.eq(modes["a machine"]["machine"], 1);
+assert.eq(modes.important.server, 3);
+assert.eq(modes["a machine"]["server"], 1);
config.version++;
config.members[1].priority = 1.5;
config.members[2].priority = 2;
-modes.rack = {"dc.sf" : 1};
-modes.niceRack = {"dc.sf" : 2};
+modes.rack = {"sf" : 1};
+modes.niceRack = {"sf" : 2};
modes["a machine"]["2"] = 1;
modes.on2 = {"2" : 1}
@@ -57,10 +57,10 @@ printjson(config);
modes = config.settings.getLastErrorModes;
assert.eq(typeof modes, "object");
assert.eq(modes.important.dc, 2);
-assert.eq(modes.important.machine, 3);
-assert.eq(modes["a machine"]["machine"], 1);
-assert.eq(modes.rack["dc.sf"], 1);
-assert.eq(modes.niceRack["dc.sf"], 2);
+assert.eq(modes.important.server, 3);
+assert.eq(modes["a machine"]["server"], 1);
+assert.eq(modes.rack["sf"], 1);
+assert.eq(modes.niceRack["sf"], 2);
print("bridging");
replTest.bridge();
@@ -75,8 +75,11 @@ replTest.partition(3, 4);
print("done bridging");
print("test1");
+print("2 should be primary");
master = replTest.getMaster();
+printjson(master.getDB("admin").runCommand({replSetGetStatus:1}));
+
var timeout = 20000;
master.getDB("foo").bar.insert({x:1});
diff --git a/jstests/replsets/tags2.js b/jstests/replsets/tags2.js
new file mode 100644
index 00000000000..16dfcdf4983
--- /dev/null
+++ b/jstests/replsets/tags2.js
@@ -0,0 +1,44 @@
+// Change a getLastErrorMode from 2 to 3 servers
+
+var host = getHostName();
+var replTest = new ReplSetTest( {name: "rstag", nodes: 3, startPort: 31000} );
+var nodes = replTest.startSet();
+var ports = replTest.ports;
+var conf = {_id : "rstag", version: 1, members : [
+ {_id : 0, host : host+":"+ports[0], tags : {"backup" : "A"}},
+ {_id : 1, host : host+":"+ports[1], tags : {"backup" : "B"}},
+ {_id : 2, host : host+":"+ports[2], tags : {"backup" : "C"}} ],
+ settings : {getLastErrorModes : {
+ backedUp : {backup : 2} }} };
+replTest.initiate( conf );
+replTest.awaitReplication();
+
+master = replTest.getMaster();
+var db = master.getDB("test");
+db.foo.insert( {x:1} );
+var result = db.runCommand( {getLastError:1, w:"backedUp", wtimeout:20000} );
+assert.eq (result.err, null);
+
+conf.version = 2;
+conf.settings.getLastErrorModes.backedUp.backup = 3;
+master.getDB("admin").runCommand( {replSetReconfig: conf} );
+replTest.awaitReplication();
+
+master = replTest.getMaster();
+var db = master.getDB("test");
+db.foo.insert( {x:2} );
+var result = db.runCommand( {getLastError:1, w:"backedUp", wtimeout:20000} );
+assert.eq (result.err, null);
+
+conf.version = 3;
+conf.members[0].priorty = 3;
+conf.members[2].priorty = 0;
+master.getDB("admin").runCommand( {replSetReconfig: conf} );
+
+master = replTest.getMaster();
+var db = master.getDB("test");
+db.foo.insert( {x:3} );
+var result = db.runCommand( {getLastError:1, w:"backedUp", wtimeout:20000} );
+assert.eq (result.err, null);
+
+replTest.stopSet();
diff --git a/jstests/replsets/toostale.js b/jstests/replsets/toostale.js
index a1217a6a547..08b1a9c2c6f 100644
--- a/jstests/replsets/toostale.js
+++ b/jstests/replsets/toostale.js
@@ -111,16 +111,12 @@ replTest.restart(2);
print("8: check s2.state == 3");
-status = master.getDB("admin").runCommand({replSetGetStatus:1});
-while (status.state == 0) {
- print("state is 0: ");
- printjson(status);
- sleep(1000);
- status = master.getDB("admin").runCommand({replSetGetStatus:1});
-}
+assert.soon(function() {
+ var status = master.getDB("admin").runCommand({replSetGetStatus:1});
+ printjson(status);
+ return status.members && status.members[2].state == 3;
+});
-printjson(status);
-assert.eq(status.members[2].state, 3, 'recovering');
print("make sure s2 doesn't become primary");
replTest.stop(0);
diff --git a/jstests/sharding/addshard4.js b/jstests/sharding/addshard4.js
index 81cc1f89e73..4a44b5537b2 100644
--- a/jstests/sharding/addshard4.js
+++ b/jstests/sharding/addshard4.js
@@ -2,14 +2,14 @@
s = new ShardingTest( "addshard4", 2 , 0 , 1 , {useHostname : true});
-r = new ReplSetTest({name : "addshard4", nodes : 3, startPort : 34000});
+r = new ReplSetTest({name : "addshard4", nodes : 3, startPort : 31100});
r.startSet();
var config = r.getReplSetConfig();
config.members[2].priority = 0;
r.initiate(config);
-//Wait for replica set to be fully initialized - could take some time
+//Wait for replica set to be fully initialized - could take some time
//to pre-allocate files on slow systems
r.awaitReplication();
@@ -25,14 +25,14 @@ var result = s.adminCommand({"addshard" : shardName});
printjson(result);
assert.eq(result, true);
-r = new ReplSetTest({name : "addshard42", nodes : 3, startPort : 36000});
+r = new ReplSetTest({name : "addshard42", nodes : 3, startPort : 31200});
r.startSet();
config = r.getReplSetConfig();
config.members[2].arbiterOnly = true;
r.initiate(config);
-// Wait for replica set to be fully initialized - could take some time
+// Wait for replica set to be fully initialized - could take some time
// to pre-allocate files on slow systems
r.awaitReplication();
diff --git a/jstests/sharding/array_shard_key.js b/jstests/sharding/array_shard_key.js
new file mode 100644
index 00000000000..1ea61e8d3a8
--- /dev/null
+++ b/jstests/sharding/array_shard_key.js
@@ -0,0 +1,127 @@
+// Ensure you can't shard on an array key
+
+var st = new ShardingTest({ name : jsTestName(), shards : 3 })
+
+var mongos = st.s0
+
+var coll = mongos.getCollection( jsTestName() + ".foo" )
+
+st.shardColl( coll, { _id : 1, i : 1 }, { _id : ObjectId(), i : 1 } )
+
+printjson( mongos.getDB("config").chunks.find().toArray() )
+
+st.printShardingStatus()
+
+print( "1: insert some invalid data" )
+
+var value = null
+
+var checkError = function( shouldError ){
+ var error = coll.getDB().getLastError()
+
+ if( error != null ) printjson( error )
+
+ if( error == null && ! shouldError ) return
+ if( error != null && shouldError ) return
+
+ if( error == null ) print( "No error detected!" )
+ else print( "Unexpected error!" )
+
+ assert( false )
+}
+
+// Insert an object with invalid array key
+coll.insert({ i : [ 1, 2 ] })
+checkError( true )
+
+// Insert an object with valid array key
+coll.insert({ i : 1 })
+checkError( false )
+
+// Update the value with valid other field
+value = coll.findOne({ i : 1 })
+coll.update( value, { $set : { j : 2 } } )
+checkError( false )
+
+// Update the value with invalid other fields
+value = coll.findOne({ i : 1 })
+coll.update( value, Object.merge( value, { i : [ 3 ] } ) )
+checkError( true )
+
+// Multi-update the value with invalid other fields
+value = coll.findOne({ i : 1 })
+coll.update( value, Object.merge( value, { i : [ 3, 4 ] } ), false, true)
+checkError( true )
+
+// Single update the value with valid other fields
+value = coll.findOne({ i : 1 })
+coll.update( Object.merge( value, { i : [ 3, 4 ] } ), value )
+checkError( true )
+
+// Multi-update the value with other fields (won't work, but no error)
+value = coll.findOne({ i : 1 })
+coll.update( Object.merge( value, { i : [ 1, 1 ] } ), { $set : { k : 4 } }, false, true)
+checkError( false )
+
+// Query the value with other fields (won't work, but no error)
+value = coll.findOne({ i : 1 })
+coll.find( Object.merge( value, { i : [ 1, 1 ] } ) ).toArray()
+checkError( false )
+
+// Can't remove using multikey, but shouldn't error
+value = coll.findOne({ i : 1 })
+coll.remove( Object.extend( value, { i : [ 1, 2, 3, 4 ] } ) )
+checkError( false )
+
+// Can't remove using multikey, but shouldn't error
+value = coll.findOne({ i : 1 })
+coll.remove( Object.extend( value, { i : [ 1, 2, 3, 4, 5 ] } ) )
+error = coll.getDB().getLastError()
+assert.eq( error, null )
+assert.eq( coll.find().itcount(), 1 )
+
+value = coll.findOne({ i : 1 })
+coll.remove( Object.extend( value, { i : 1 } ) )
+error = coll.getDB().getLastError()
+assert.eq( error, null )
+assert.eq( coll.find().itcount(), 0 )
+
+printjson( "Sharding-then-inserting-multikey tested, now trying inserting-then-sharding-multikey" )
+
+// Insert a bunch of data then shard over key which is an array
+var coll = mongos.getCollection( "" + coll + "2" )
+for( var i = 0; i < 10; i++ ){
+ // TODO : does not check weird cases like [ i, i ]
+ coll.insert({ i : [ i, i + 1 ] })
+ checkError( false )
+}
+
+coll.ensureIndex({ _id : 1, i : 1 })
+
+try {
+ st.shardColl( coll, { _id : 1, i : 1 }, { _id : ObjectId(), i : 1 } )
+}
+catch( e ){
+ print( "Correctly threw error on sharding with multikey index." )
+}
+
+st.printShardingStatus()
+
+// Insert a bunch of data then shard over key which is not an array
+var coll = mongos.getCollection( "" + coll + "3" )
+for( var i = 0; i < 10; i++ ){
+ // TODO : does not check weird cases like [ i, i ]
+ coll.insert({ i : i })
+ checkError( false )
+}
+
+coll.ensureIndex({ _id : 1, i : 1 })
+
+st.shardColl( coll, { _id : 1, i : 1 }, { _id : ObjectId(), i : 1 } )
+
+st.printShardingStatus()
+
+
+
+// Finish
+st.stop()
diff --git a/jstests/sharding/auth.js b/jstests/sharding/auth.js
index 559ec2c1fac..8d8d7d79dab 100644
--- a/jstests/sharding/auth.js
+++ b/jstests/sharding/auth.js
@@ -56,7 +56,7 @@ s.s = s._mongos[0] = s["s0"] = conn;
login(adminUser);
-d1 = new ReplSetTest({name : "d1", nodes : 3, startPort : 34000});
+d1 = new ReplSetTest({name : "d1", nodes : 3, startPort : 31100});
d1.startSet({keyFile : "jstests/libs/key2"});
d1.initiate();
@@ -102,6 +102,18 @@ s.getDB(testUser.db).addUser(testUser.username, testUser.password);
logout(adminUser);
+print("query try");
+var e = assert.throws(function() {
+ conn.getDB("foo").bar.findOne();
+});
+printjson(e);
+
+print("cmd try");
+e = assert.throws(function() {
+ conn.getDB("foo").runCommand({listdbs:1});
+});
+printjson(e);
+
print("insert try 1");
s.getDB("test").foo.insert({x:1});
result = s.getDB("test").runCommand({getLastError : 1});
@@ -118,7 +130,7 @@ assert.eq(result.err, null);
logout(testUser);
-d2 = new ReplSetTest({name : "d2", nodes : 3, startPort : 36000});
+d2 = new ReplSetTest({name : "d2", nodes : 3, startPort : 31200});
d2.startSet({keyFile : "jstests/libs/key1"});
d2.initiate();
@@ -156,4 +168,10 @@ while (cursor.hasNext()) {
assert.eq(count, 501);
+// check that dump doesn't get stuck with auth
+var x = runMongoProgram( "mongodump", "--host", "127.0.0.1:31000", "-d", testUser.db, "-u", testUser.username, "-p", testUser.password);
+
+print("result: "+x);
+
+
s.stop();
diff --git a/jstests/sharding/count_slaveok.js b/jstests/sharding/count_slaveok.js
new file mode 100644
index 00000000000..075ab41c2ad
--- /dev/null
+++ b/jstests/sharding/count_slaveok.js
@@ -0,0 +1,69 @@
+// Tests count and distinct using slaveOk
+
+var st = new ShardingTest( testName = "countSlaveOk",
+ numShards = 1,
+ verboseLevel = 0,
+ numMongos = 1,
+ { rs : true,
+ rs0 : { nodes : 2 }
+ })
+
+var rst = st._rs[0].test
+
+// Insert data into replica set
+var conn = new Mongo( st.s.host )
+conn.setLogLevel( 3 )
+
+var coll = conn.getCollection( "test.countSlaveOk" )
+coll.drop()
+
+for( var i = 0; i < 300; i++ ){
+ coll.insert( { i : i % 10 } )
+}
+
+var connA = conn
+var connB = new Mongo( st.s.host )
+var connC = new Mongo( st.s.host )
+
+// Make sure the writes get through, otherwise we can continue to error these one-at-a-time
+coll.getDB().getLastError()
+
+st.printShardingStatus()
+
+// Wait for client to update itself and replication to finish
+rst.awaitReplication()
+
+var primary = rst.getPrimary()
+var sec = rst.getSecondary()
+
+// Data now inserted... stop the master, since only two in set, other will still be secondary
+rst.stop( rst.getMaster(), undefined, true )
+printjson( rst.status() )
+
+// Wait for the mongos to recognize the slave
+ReplSetTest.awaitRSClientHosts( conn, sec, { ok : true, secondary : true } )
+
+// Need to check slaveOk=true first, since slaveOk=false will destroy conn in pool when
+// master is down
+conn.setSlaveOk()
+
+// Should throw exception, since not slaveOk'd
+assert.eq( 30, coll.find({ i : 0 }).count() )
+assert.eq( 10, coll.distinct("i").length )
+
+try {
+
+ conn.setSlaveOk( false )
+ coll.find({ i : 0 }).count()
+
+ print( "Should not reach here!" )
+ printjson( coll.getDB().getLastError() )
+ assert( false )
+
+}
+catch( e ){
+ print( "Non-slaveOk'd connection failed." )
+}
+
+// Finish
+st.stop()
diff --git a/jstests/sharding/drop_sharded_db.js b/jstests/sharding/drop_sharded_db.js
new file mode 100644
index 00000000000..aedde8f5032
--- /dev/null
+++ b/jstests/sharding/drop_sharded_db.js
@@ -0,0 +1,62 @@
+// Tests the dropping of a sharded database SERVER-3471 SERVER-1726
+
+var st = new ShardingTest({ name : jsTestName() })
+
+var mongos = st.s0
+var config = mongos.getDB( "config" )
+
+var dbName = "buy"
+var dbA = mongos.getDB( dbName )
+var dbB = mongos.getDB( dbName + "_201107" )
+var dbC = mongos.getDB( dbName + "_201108" )
+
+print( "1: insert some data and colls into all dbs" )
+
+var numDocs = 3000;
+var numColls = 10;
+for( var i = 0; i < numDocs; i++ ){
+ dbA.getCollection( "data" + (i % numColls) ).insert({ _id : i })
+ dbB.getCollection( "data" + (i % numColls) ).insert({ _id : i })
+ dbC.getCollection( "data" + (i % numColls) ).insert({ _id : i })
+}
+
+print( "2: shard the colls ")
+
+for( var i = 0; i < numColls; i++ ){
+
+ var key = { _id : 1 }
+ st.shardColl( dbA.getCollection( "data" + i ), key )
+ st.shardColl( dbB.getCollection( "data" + i ), key )
+ st.shardColl( dbC.getCollection( "data" + i ), key )
+
+}
+
+print( "3: drop the non-suffixed db ")
+
+dbA.dropDatabase()
+
+
+print( "3: ensure only the non-suffixed db was dropped ")
+
+var dbs = mongos.getDBNames()
+for( var i = 0; i < dbs.length; i++ ){
+ assert.neq( dbs, "" + dbA )
+}
+
+assert.eq( 0, config.databases.find({ _id : "" + dbA }).toArray().length )
+assert.eq( 1, config.databases.find({ _id : "" + dbB }).toArray().length )
+assert.eq( 1, config.databases.find({ _id : "" + dbC }).toArray().length )
+
+assert.eq( numColls, config.collections.find({ _id : RegExp( "^" + dbA + "\\..*" ), dropped : true }).toArray().length )
+assert.eq( numColls, config.collections.find({ _id : RegExp( "^" + dbB + "\\..*" ), dropped : false }).toArray().length )
+assert.eq( numColls, config.collections.find({ _id : RegExp( "^" + dbC + "\\..*" ), dropped : false }).toArray().length )
+
+for( var i = 0; i < numColls; i++ ){
+
+ assert.eq( numDocs / numColls, dbB.getCollection( "data" + (i % numColls) ).find().itcount() )
+ assert.eq( numDocs / numColls, dbC.getCollection( "data" + (i % numColls) ).find().itcount() )
+
+}
+
+// Finish
+st.stop()
diff --git a/jstests/sharding/features3.js b/jstests/sharding/features3.js
index 6870bb70208..5277d22ac56 100644
--- a/jstests/sharding/features3.js
+++ b/jstests/sharding/features3.js
@@ -17,54 +17,79 @@ for ( i=0; i<N; i++ )
db.foo.insert( { _id : i } )
db.getLastError();
x = db.foo.stats();
+assert.eq( "test.foo" , x.ns , "basic1" )
+assert( x.sharded , "basic2" )
assert.eq( N , x.count , "total count" )
assert.eq( N / 2 , x.shards.shard0000.count , "count on shard0000" )
assert.eq( N / 2 , x.shards.shard0001.count , "count on shard0001" )
assert( x.totalIndexSize > 0 )
assert( x.numExtents > 0 )
+db.bar.insert( { x : 1 } )
+x = db.bar.stats();
+assert.eq( 1 , x.count , "XXX1" )
+assert.eq( "test.bar" , x.ns , "XXX2" )
+assert( ! x.sharded , "XXX3: " + tojson(x) )
+
+// Fork shell and start pulling back data
start = new Date()
print( "about to fork shell: " + Date() )
-join = startParallelShell( "db.foo.find( function(){ x = ''; for ( i=0; i<10000; i++ ){ x+=i; } return true; } ).itcount()" )
+
+// TODO: Still potential problem when our sampling of current ops misses when $where is active -
+// solution is to increase sleep time
+parallelCommand = "try { while(true){" +
+ " db.foo.find( function(){ x = ''; for ( i=0; i<10000; i++ ){ x+=i; } sleep( 1000 ); return true; } ).itcount() " +
+ "}} catch(e){ print('PShell execution ended:'); printjson( e ) }"
+
+join = startParallelShell( parallelCommand )
print( "after forking shell: " + Date() )
+// Get all current $where operations
function getMine( printInprog ){
+
var inprog = db.currentOp().inprog;
+
if ( printInprog )
printjson( inprog )
+
+ // Find all the where queries
var mine = []
for ( var x=0; x<inprog.length; x++ ){
if ( inprog[x].query && inprog[x].query.$where ){
mine.push( inprog[x] )
}
}
+
return mine;
}
-state = 0; // 0 = not found, 1 = killed,
-killTime = null;
+var state = 0; // 0 = not found, 1 = killed,
+var killTime = null;
+var i = 0;
-for ( i=0; i<( 100* 1000 ); i++ ){
+assert.soon( function(){
+
+ // Get all the current operations
mine = getMine( state == 0 && i > 20 );
- if ( state == 0 ){
- if ( mine.length == 0 ){
- sleep(1);
- continue;
- }
+ i++;
+
+ // Wait for the queries to start
+ if ( state == 0 && mine.length > 0 ){
+ // Queries started
state = 1;
+ // Kill all $where
mine.forEach( function(z){ printjson( db.getSisterDB( "admin" ).killOp( z.opid ) ); } )
killTime = new Date()
}
- else if ( state == 1 ){
- if ( mine.length == 0 ){
- state = 2;
- break;
- }
- sleep(1)
- continue;
+ // Wait for killed queries to end
+ else if ( state == 1 && mine.length == 0 ){
+ // Queries ended
+ state = 2;
+ return true;
}
-}
+
+}, "Couldn't kill the $where operations.", 2 * 60 * 1000 )
print( "after loop: " + Date() );
assert( killTime , "timed out waiting too kill last mine:" + tojson(mine) )
diff --git a/jstests/sharding/group_slaveok.js b/jstests/sharding/group_slaveok.js
new file mode 100644
index 00000000000..3b7cec4910f
--- /dev/null
+++ b/jstests/sharding/group_slaveok.js
@@ -0,0 +1,68 @@
+// Tests group using slaveOk
+
+var st = new ShardingTest( testName = "groupSlaveOk",
+ numShards = 1,
+ verboseLevel = 0,
+ numMongos = 1,
+ { rs : true,
+ rs0 : { nodes : 2 }
+ })
+
+var rst = st._rs[0].test
+
+// Insert data into replica set
+var conn = new Mongo( st.s.host )
+conn.setLogLevel( 3 )
+
+var coll = conn.getCollection( "test.groupSlaveOk" )
+coll.drop()
+
+for( var i = 0; i < 300; i++ ){
+ coll.insert( { i : i % 10 } )
+}
+
+// Make sure the writes get through, otherwise we can continue to error these one-at-a-time
+coll.getDB().getLastError()
+
+st.printShardingStatus()
+
+// Wait for client to update itself and replication to finish
+rst.awaitReplication()
+
+var primary = rst.getPrimary()
+var sec = rst.getSecondary()
+
+// Data now inserted... stop the master, since only two in set, other will still be secondary
+rst.stop( rst.getMaster(), undefined, true )
+printjson( rst.status() )
+
+// Wait for the mongos to recognize the slave
+ReplSetTest.awaitRSClientHosts( conn, sec, { ok : true, secondary : true } )
+
+// Need to check slaveOk=true first, since slaveOk=false will destroy conn in pool when
+// master is down
+conn.setSlaveOk()
+
+// Should not throw exception, since slaveOk'd
+assert.eq( 10, coll.group({ key : { i : true } ,
+ reduce : function( obj, ctx ){ ctx.count += 1 } ,
+ initial : { count : 0 } }).length )
+
+try {
+
+ conn.setSlaveOk( false )
+ coll.group({ key : { i : true } ,
+ reduce : function( obj, ctx ){ ctx.count += 1 } ,
+ initial : { count : 0 } })
+
+ print( "Should not reach here!" )
+ printjson( coll.getDB().getLastError() )
+ assert( false )
+
+}
+catch( e ){
+ print( "Non-slaveOk'd connection failed." )
+}
+
+// Finish
+st.stop()
diff --git a/jstests/sharding/parallel.js b/jstests/sharding/parallel.js
new file mode 100644
index 00000000000..d35459c3730
--- /dev/null
+++ b/jstests/sharding/parallel.js
@@ -0,0 +1,38 @@
+numShards = 3
+s = new ShardingTest( "parallel" , numShards , 2 , 2 , { sync : true } );
+
+s.adminCommand( { enablesharding : "test" } );
+s.adminCommand( { shardcollection : "test.foo" , key : { _id : 1 } } );
+
+db = s.getDB( "test" );
+
+N = 10000;
+
+for ( i=0; i<N; i+=(N/12) ) {
+ s.adminCommand( { split : "test.foo" , middle : { _id : i } } )
+ sh.moveChunk( "test.foo", { _id : i } , "shard000" + Math.floor( Math.random() * numShards ) )
+}
+
+
+for ( i=0; i<N; i++ )
+ db.foo.insert( { _id : i } )
+db.getLastError();
+
+
+doCommand = function( dbname , cmd ) {
+ x = benchRun( { ops : [ { op : "findOne" , ns : dbname + ".$cmd" , query : cmd } ] ,
+ host : db.getMongo().host , parallel : 2 , seconds : 2 } )
+ printjson(x)
+ x = benchRun( { ops : [ { op : "findOne" , ns : dbname + ".$cmd" , query : cmd } ] ,
+ host : s._mongos[1].host , parallel : 2 , seconds : 2 } )
+ printjson(x)
+}
+
+doCommand( "test" , { dbstats : 1 } )
+doCommand( "config" , { dbstats : 1 } )
+
+x = s.getDB( "config" ).stats()
+assert( x.ok , tojson(x) )
+printjson(x)
+
+s.stop()
diff --git a/jstests/sharding/shard3.js b/jstests/sharding/shard3.js
index 5f2c0b5148f..e27316e17b6 100644
--- a/jstests/sharding/shard3.js
+++ b/jstests/sharding/shard3.js
@@ -62,6 +62,7 @@ function doCounts( name , total , onlyItCounts ){
var total = doCounts( "before wrong save" )
secondary.save( { num : -3 } );
+printjson( secondary.getDB().getLastError() )
doCounts( "after wrong save" , total , true )
e = a.find().explain();
assert.eq( 3 , e.n , "ex1" )
diff --git a/jstests/sharding/sync6.js b/jstests/sharding/sync6.js
index 0543837a822..233534bf1aa 100644
--- a/jstests/sharding/sync6.js
+++ b/jstests/sharding/sync6.js
@@ -17,10 +17,13 @@ commandConn.getDB( "admin" ).runCommand( { setParameter : 1, logLevel : 1 } )
// Have lots of threads, so use larger i
// Can't test too many, we get socket exceptions... possibly due to the
// javascript console.
-for ( var i = 8; i < 12; i++ ) {
+for ( var i = 8; i < 9; i++ ) {
- // Our force time is 2 seconds
- var takeoverMS = 2000;
+ // Our force time is 4 seconds
+ // Slower machines can't keep up the LockPinger rate, which can lead to lock failures
+ // since our locks are only valid if the LockPinger pings faster than the force time.
+ // Actual lock timeout is 15 minutes, so a few seconds is extremely aggressive
+ var takeoverMS = 4000;
// Generate valid sleep and skew for this timeout
var threadSleepWithLock = takeoverMS / 2;
diff --git a/jstests/slowNightly/command_line_parsing.js b/jstests/slowNightly/command_line_parsing.js
index 38c7324ddb9..ba7b1369627 100644
--- a/jstests/slowNightly/command_line_parsing.js
+++ b/jstests/slowNightly/command_line_parsing.js
@@ -7,3 +7,15 @@ var baseName = "jstests_slowNightly_command_line_parsing";
var m = startMongod( "--port", port, "--dbpath", "/data/db/" + baseName, "--notablescan" );
m.getDB( baseName ).getCollection( baseName ).save( {a:1} );
assert.throws( function() { m.getDB( baseName ).getCollection( baseName ).find( {a:1} ).toArray() } );
+
+// test config file
+var m2 = startMongod( "--port", port+2, "--dbpath", "/data/db/" + baseName +"2", "--config", "jstests/libs/testconfig");
+var m2result = {
+ "parsed" : {
+ "config" : "jstests/libs/testconfig",
+ "dbpath" : "/data/db/jstests_slowNightly_command_line_parsing2",
+ "fastsync" : "true",
+ "port" : 31002
+ }
+};
+assert( friendlyEqual(m2result.parsed, m2.getDB("admin").runCommand( "getCmdLineOpts" ).parsed) );
diff --git a/jstests/slowNightly/dur_big_atomic_update.js b/jstests/slowNightly/dur_big_atomic_update.js
index ffb0d838cc2..800b4b831fb 100644
--- a/jstests/slowNightly/dur_big_atomic_update.js
+++ b/jstests/slowNightly/dur_big_atomic_update.js
@@ -23,6 +23,23 @@ err = d.getLastErrorObj();
assert(err.err == null);
assert(err.n == 1024);
+d.dropDatabase();
+
+for (var i=0; i<1024; i++){
+ d.foo.insert({_id:i});
+}
+
+// Do it again but in a db.eval
+d.eval(
+ function(host, big_string) {
+ new Mongo(host).getDB("test").foo.update({}, {$set: {big_string: big_string}}, false, /*multi*/true)
+ }, conn.host, big_string); // Can't pass in connection or DB objects
+
+err = d.getLastErrorObj();
+
+assert(err.err == null);
+assert(err.n == 1024);
+
// free up space
d.dropDatabase();
diff --git a/jstests/slowNightly/replReads.js b/jstests/slowNightly/replReads.js
new file mode 100644
index 00000000000..4fe91305738
--- /dev/null
+++ b/jstests/slowNightly/replReads.js
@@ -0,0 +1,108 @@
+2// Test that doing slaveOk reads from secondaries hits all the secondaries evenly
+
+function testReadLoadBalancing(numReplicas) {
+
+ s = new ShardingTest( "replReads" , 1 /* numShards */, 0 /* verboseLevel */, 1 /* numMongos */, { rs : true , numReplicas : numReplicas, chunksize : 1 } )
+
+ s.adminCommand({enablesharding : "test"})
+ s.config.settings.find().forEach(printjson)
+
+ s.adminCommand({shardcollection : "test.foo", key : {_id : 1}})
+
+ s.getDB("test").foo.insert({a : 123})
+
+ primary = s._rs[0].test.liveNodes.master
+ secondaries = s._rs[0].test.liveNodes.slaves
+
+ function rsStats() {
+ return s.getDB( "admin" ).runCommand( "connPoolStats" )["replicaSets"]["replReads-rs0"];
+ }
+
+ assert.eq( numReplicas , rsStats().hosts.length );
+
+ function isMasterOrSecondary( info ){
+ if ( ! info.ok )
+ return false;
+ if ( info.ismaster )
+ return true;
+ return info.secondary && ! info.hidden;
+ }
+
+ assert.soon(
+ function() {
+ var x = rsStats().hosts;
+ printjson(x)
+ for ( var i=0; i<x.length; i++ )
+ if ( ! isMasterOrSecondary( x[i] ) )
+ return false;
+ return true;
+ }
+ );
+
+ for (var i = 0; i < secondaries.length; i++) {
+ assert.soon( function(){ return secondaries[i].getDB("test").foo.count() > 0; } )
+ secondaries[i].getDB('test').setProfilingLevel(2)
+ }
+
+ for (var i = 0; i < secondaries.length * 10; i++) {
+ conn = new Mongo(s._mongos[0].host)
+ conn.setSlaveOk()
+ conn.getDB('test').foo.findOne()
+ }
+
+ for (var i = 0; i < secondaries.length; i++) {
+ var profileCollection = secondaries[i].getDB('test').system.profile;
+ assert.eq(10, profileCollection.find().count(), "Wrong number of read queries sent to secondary " + i + " " + tojson( profileCollection.find().toArray() ))
+ }
+
+ db = primary.getDB( "test" );
+
+ printjson(rs.status());
+ c = rs.conf();
+ print( "config before: " + tojson(c) );
+ for ( i=0; i<c.members.length; i++ ) {
+ if ( c.members[i].host == db.runCommand( "ismaster" ).primary )
+ continue;
+ c.members[i].hidden = true;
+ c.members[i].priority = 0;
+ break;
+ }
+ rs.reconfig( c );
+ print( "config after: " + tojson( rs.conf() ) );
+
+ assert.soon(
+ function() {
+ var x = rsStats();
+ printjson(x);
+ var numOk = 0;
+ for ( var i=0; i<x.hosts.length; i++ )
+ if ( x.hosts[i].hidden )
+ return true;
+ return false;
+ } , "one slave not ok" , 180000 , 5000
+ );
+
+ for (var i = 0; i < secondaries.length * 10; i++) {
+ conn = new Mongo(s._mongos[0].host)
+ conn.setSlaveOk()
+ conn.getDB('test').foo.findOne()
+ }
+
+ var counts = []
+ for (var i = 0; i < secondaries.length; i++) {
+ var profileCollection = secondaries[i].getDB('test').system.profile;
+ counts.push( profileCollection.find().count() );
+ }
+
+ counts = counts.sort();
+ assert.eq( 20 , counts[1] - counts[0] , "counts wrong: " + tojson( counts ) );
+
+ s.stop()
+}
+
+//for (var i = 1; i < 10; i++) {
+// testReadLoadBalancing(i)
+//}
+
+// Is there a way that this can be run multiple times with different values?
+testReadLoadBalancing(3)
diff --git a/jstests/slowNightly/sharding_migrateBigObject.js b/jstests/slowNightly/sharding_migrateBigObject.js
index d8ff740d81d..5ad9ed12a18 100644
--- a/jstests/slowNightly/sharding_migrateBigObject.js
+++ b/jstests/slowNightly/sharding_migrateBigObject.js
@@ -10,10 +10,8 @@ var admin = mongos.getDB("admin")
admin.runCommand({ addshard : "localhost:30001" })
admin.runCommand({ addshard : "localhost:30002" })
-
-
-var coll = mongos.getDB("test").getCollection("stuff")
-coll.drop()
+db = mongos.getDB("test");
+var coll = db.getCollection("stuff")
var data = "x"
var nsq = 16
@@ -28,6 +26,9 @@ for( var i = 0; i < 40; i++ ) {
if(i != 0 && i % 10 == 0) printjson( coll.stats() )
coll.save({ data : dataObj })
}
+db.getLastError();
+
+assert.eq( 40 , coll.count() , "prep1" );
printjson( coll.stats() )
@@ -37,6 +38,8 @@ admin.printShardingStatus()
admin.runCommand({ shardcollection : "" + coll, key : { _id : 1 } })
+assert.lt( 5 , mongos.getDB( "config" ).chunks.find( { ns : "test.stuff" } ).count() , "not enough chunks" );
+
assert.soon(
function(){
res = mongos.getDB( "config" ).chunks.group( { cond : { ns : "test.stuff" } ,
@@ -45,7 +48,7 @@ assert.soon(
initial : { nChunks : 0 } } );
printjson( res );
- return res.length > 1 && Math.abs( res[0].nChunks - res[1].nChunks ) <= 1;
+ return res.length > 1 && Math.abs( res[0].nChunks - res[1].nChunks ) <= 3;
} ,
"never migrated" , 180000 , 1000 );
diff --git a/jstests/slowNightly/sharding_passthrough.js b/jstests/slowNightly/sharding_passthrough.js
index 65d22059381..d81df685bc5 100644
--- a/jstests/slowNightly/sharding_passthrough.js
+++ b/jstests/slowNightly/sharding_passthrough.js
@@ -62,17 +62,17 @@ files.forEach(
* clean (apitest_dbcollection)
* logout and getnonce
*/
- if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile1|dbhash|median|apitest_dbcollection|evalb|evald|eval_nolock|auth1|auth2|dropdb_race|unix_socket\d*)\.js$/.test(x.name)) {
+ if (/[\/\\](error3|capped.*|splitvector|apitest_db|cursor6|copydb-auth|profile\d*|dbhash|median|apitest_dbcollection|evalb|evald|eval_nolock|auth1|auth2|dropdb_race|unix_socket\d*)\.js$/.test(x.name)) {
print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name)
return;
}
// These are bugs (some might be fixed now):
- if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4|profile1)\.js$/.test(x.name)) {
+ if (/[\/\\](apply_ops1|count5|cursor8|or4|shellkillop|update4|profile\d*)\.js$/.test(x.name)) {
print(" !!!!!!!!!!!!!!! skipping test that has failed under sharding but might not anymore " + x.name)
return;
}
// These aren't supposed to get run under sharding:
- if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4|notablescan|compact.*|check_shard_index|mr_replaceIntoDB)\.js$/.test(x.name)) {
+ if (/[\/\\](dbadmin|error1|fsync|fsync2|geo.*|indexh|remove5|update4|notablescan|compact.*|check_shard_index|bench_test.*|mr_replaceIntoDB)\.js$/.test(x.name)) {
print(" >>>>>>>>>>>>>>> skipping test that would fail under sharding " + x.name)
return;
}
diff --git a/jstests/slowNightly/sharding_rs1.js b/jstests/slowNightly/sharding_rs1.js
index 01358e207de..f73e690d42e 100644
--- a/jstests/slowNightly/sharding_rs1.js
+++ b/jstests/slowNightly/sharding_rs1.js
@@ -59,6 +59,12 @@ assert.soon( function(){
s.config.settings.update( { _id: "balancer" }, { $set : { stopped: true } } , true );
+sleep( 1000 );
+
+while ( sh.isBalancerRunning() ){
+ sleep( 1000 );
+}
+
for ( i=0; i<s._rs.length; i++ ){
r = s._rs[i];
r.test.awaitReplication();
diff --git a/jstests/slowNightly/sharding_rs_arb1.js b/jstests/slowNightly/sharding_rs_arb1.js
new file mode 100644
index 00000000000..be4c4dcd136
--- /dev/null
+++ b/jstests/slowNightly/sharding_rs_arb1.js
@@ -0,0 +1,40 @@
+x = 5
+name = "sharding_rs_arb1"
+replTest = new ReplSetTest( { name : name , nodes : 3 , startPort : 31000 } );
+nodes = replTest.startSet();
+var port = replTest.ports;
+replTest.initiate({_id : name, members :
+ [
+ {_id:0, host : getHostName()+":"+port[0]},
+ {_id:1, host : getHostName()+":"+port[1]},
+ {_id:2, host : getHostName()+":"+port[2], arbiterOnly : true},
+ ],
+ });
+
+replTest.awaitReplication();
+
+master = replTest.getMaster();
+db = master.getDB( "test" );
+printjson( rs.status() );
+
+var config = startMongodEmpty("--configsvr", "--port", 29999, "--dbpath", "/data/db/" + name + "_config" );
+
+var mongos = startMongos("--port", 30000, "--configdb", getHostName() + ":29999")
+var admin = mongos.getDB("admin")
+var url = name + "/";
+for ( i=0; i<port.length; i++ ) {
+ if ( i > 0 )
+ url += ",";
+ url += getHostName() + ":" + port[i];
+}
+print( url )
+res = admin.runCommand( { addshard : url } )
+printjson( res )
+assert( res.ok , tojson(res) )
+
+
+
+stopMongod( 30000 )
+stopMongod( 29999 )
+replTest.stopSet();
+
diff --git a/jstests/slowNightly/sync6_slow.js b/jstests/slowNightly/sync6_slow.js
new file mode 100644
index 00000000000..63d6123833c
--- /dev/null
+++ b/jstests/slowNightly/sync6_slow.js
@@ -0,0 +1,82 @@
+// More complete version of sharding/sync6.js
+// Test that distributed lock forcing does not result in inconsistencies, using a
+// fast timeout.
+
+// Note that this test will always have random factors, since we can't control the
+// thread scheduling.
+
+test = new SyncCCTest( "sync6", { logpath : "/dev/null" } )
+
+// Startup another process to handle our commands to the cluster, mostly so it's
+// easier to read.
+var commandConn = startMongodTest( 30000 + 4, "syncCommander", false, {})//{ logpath : "/dev/null" } )//{verbose : ""} )
+// { logpath : "/data/db/syncCommander/mongod.log" } );
+
+// Up the log level for this test
+commandConn.getDB( "admin" ).runCommand( { setParameter : 1, logLevel : 0 } )
+
+// Have lots of threads, so use larger i
+// Can't test too many, we get socket exceptions... possibly due to the
+// javascript console.
+// TODO: Figure out our max bounds here - use less threads now to avoid pinger starvation issues.
+for ( var t = 0; t < 4; t++ ) {
+for ( var i = 4; i < 5; i++ ) {
+
+ // Our force time is 6 seconds - slightly diff from sync6 to ensure exact time not important
+ var takeoverMS = 6000;
+
+ // Generate valid sleep and skew for this timeout
+ var threadSleepWithLock = takeoverMS / 2;
+ var configServerTimeSkew = [ 0, 0, 0 ]
+ for ( var h = 0; h < 3; h++ ) {
+ // Skew by 1/30th the takeover time either way, at max
+ configServerTimeSkew[h] = ( i + h ) % Math.floor( takeoverMS / 60 )
+ // Make skew pos or neg
+ configServerTimeSkew[h] *= ( ( i + h ) % 2 ) ? -1 : 1;
+ }
+
+ // Build command
+ command = { _testDistLockWithSkew : 1 }
+
+ // Basic test parameters
+ command["lockName"] = "TimeSkewFailNewTest_lock_" + i;
+ command["host"] = test.url
+ command["seed"] = i
+ command["numThreads"] = ( i % 50 ) + 1
+
+ // Critical values so we're sure of correct operation
+ command["takeoverMS"] = takeoverMS
+ command["wait"] = 4 * takeoverMS // so we must force the lock
+ command["skewHosts"] = configServerTimeSkew
+ command["threadWait"] = threadSleepWithLock
+
+ // Less critical test params
+
+ // 1/3 of threads will not release the lock
+ command["hangThreads"] = 3
+ // Amount of time to wait before trying lock again
+ command["threadSleep"] = 1;// ( ( i + 1 ) * 100 ) % (takeoverMS / 4)
+ // Amount of total clock skew possible between locking threads (processes)
+ // This can be large now.
+ command["skewRange"] = ( command["takeoverMS"] * 3 ) * 60 * 1000
+
+ // Double-check our sleep, host skew, and takeoverMS values again
+
+ // At maximum, our threads must sleep only half the lock timeout time.
+ assert( command["threadWait"] <= command["takeoverMS"] / 2 )
+ for ( var h = 0; h < command["skewHosts"].length; h++ ) {
+ // At maximum, our config server time skew needs to be less than 1/30th
+ // the total time skew (1/60th either way).
+ assert( Math.abs( command["skewHosts"][h] ) <= ( command["takeoverMS"] / 60 ) )
+ }
+
+ result = commandConn.getDB( "admin" ).runCommand( command )
+ printjson( result )
+ printjson( command )
+ assert( result.ok, "Skewed threads did not increment correctly." );
+
+}
+}
+
+stopMongoProgram( 30004 )
+test.stop();
diff --git a/jstests/slowWeekly/geo_full.js b/jstests/slowWeekly/geo_full.js
index ab8715be6a6..9eb1b7a54bf 100644
--- a/jstests/slowWeekly/geo_full.js
+++ b/jstests/slowWeekly/geo_full.js
@@ -25,17 +25,25 @@ var randEnvironment = function(){
return { max : 180,
min : -180,
bits : Math.floor( Random.rand() * 32 ) + 1,
- earth : true }
+ earth : true,
+ bucketSize : 360 / ( 4 * 1024 * 1024 * 1024 ) }
}
var scales = [ 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000, 100000 ]
var scale = scales[ Math.floor( Random.rand() * scales.length ) ]
var offset = Random.rand() * scale
- return { max : Random.rand() * scale + offset,
- min : - Random.rand() * scale + offset,
- bits : Math.floor( Random.rand() * 32 ) + 1,
- earth : false }
+ var max = Random.rand() * scale + offset
+ var min = - Random.rand() * scale + offset
+ var bits = Math.floor( Random.rand() * 32 ) + 1
+ var range = max - min
+ var bucketSize = range / ( 4 * 1024 * 1024 * 1024 )
+
+ return { max : max,
+ min : min,
+ bits : bits,
+ earth : false,
+ bucketSize : bucketSize }
}
@@ -271,6 +279,7 @@ var randYesQuery = function(){
var locArray = function( loc ){
if( loc.x ) return [ loc.x, loc.y ]
+ if( ! loc.length ) return [ loc[0], loc[1] ]
return loc
}
@@ -287,32 +296,54 @@ var locsArray = function( locs ){
}
}
-var numTests = 30
+var minBoxSize = function( env, box ){
+ return env.bucketSize * Math.pow( 2, minBucketScale( env, box ) )
+}
+
+var minBucketScale = function( env, box ){
+
+ if( box.length && box[0].length )
+ box = [ box[0][0] - box[1][0], box[0][1] - box[1][1] ]
+
+ if( box.length )
+ box = Math.max( box[0], box[1] )
+
+ print( box )
+ print( env.bucketSize )
+
+ return Math.ceil( Math.log( box / env.bucketSize ) / Math.log( 2 ) )
+
+}
+
+// TODO: Add spherical $uniqueDocs tests
+var numTests = 100
// Our seed will change every time this is run, but
// each individual test will be reproducible given
// that seed and test number
-var seed = Math.floor( Random.rand() * ( 10 ^ 30) )
+var seed = new Date().getTime()
for ( var test = 0; test < numTests; test++ ) {
Random.srand( seed + test );
-
+ //Random.srand( 42240 )
+ //Random.srand( 7344 )
var t = db.testAllGeo
t.drop()
print( "Generating test environment #" + test )
var env = randEnvironment()
+ //env.bits = 11
var query = randQuery( env )
var data = randDataType()
-
+ //data.numDocs = 100; data.maxLocs = 3;
var results = {}
var totalPoints = 0
print( "Calculating target results for " + data.numDocs + " docs with max " + data.maxLocs + " locs " )
// Index after a random number of docs added
var indexIt = Math.floor( Random.rand() * data.numDocs )
-
+
for ( var i = 0; i < data.numDocs; i++ ) {
if( indexIt == i ){
@@ -346,7 +377,7 @@ for ( var test = 0; test < numTests; test++ ) {
randQueryAdditions( doc, indResults )
//printjson( doc )
-
+ doc._id = i
t.insert( doc )
}
@@ -362,27 +393,33 @@ for ( var test = 0; test < numTests; test++ ) {
// exact
print( "Exact query..." )
assert.eq( results.exact.docsIn, t.find( { "locs.loc" : randLocType( query.exact ), "exact.docIn" : randYesQuery() } ).count() )
-
+
// $center
print( "Center query..." )
- assert.eq( results.center.docsIn, t.find( { "locs.loc" : { $within : { $center : [ query.center, query.radius ] } }, "center.docIn" : randYesQuery() } ).count() )
+ print( "Min box : " + minBoxSize( env, query.radius ) )
+ assert.eq( results.center.docsIn, t.find( { "locs.loc" : { $within : { $center : [ query.center, query.radius ], $uniqueDocs : 1 } }, "center.docIn" : randYesQuery() } ).count() )
+ assert.eq( results.center.locsIn, t.find( { "locs.loc" : { $within : { $center : [ query.center, query.radius ], $uniqueDocs : false } }, "center.docIn" : randYesQuery() } ).count() )
if( query.sphereRadius >= 0 ){
print( "Center sphere query...")
// $centerSphere
assert.eq( results.sphere.docsIn, t.find( { "locs.loc" : { $within : { $centerSphere : [ query.sphereCenter, query.sphereRadius ] } }, "sphere.docIn" : randYesQuery() } ).count() )
+ assert.eq( results.sphere.locsIn, t.find( { "locs.loc" : { $within : { $centerSphere : [ query.sphereCenter, query.sphereRadius ], $uniqueDocs : 0.0 } }, "sphere.docIn" : randYesQuery() } ).count() )
}
// $box
print( "Box query..." )
- assert.eq( results.box.docsIn, t.find( { "locs.loc" : { $within : { $box : query.box } }, "box.docIn" : randYesQuery() } ).count() )
+ assert.eq( results.box.docsIn, t.find( { "locs.loc" : { $within : { $box : query.box, $uniqueDocs : true } }, "box.docIn" : randYesQuery() } ).count() )
+ assert.eq( results.box.locsIn, t.find( { "locs.loc" : { $within : { $box : query.box, $uniqueDocs : false } }, "box.docIn" : randYesQuery() } ).count() )
// $polygon
print( "Polygon query..." )
assert.eq( results.poly.docsIn, t.find( { "locs.loc" : { $within : { $polygon : query.boxPoly } }, "poly.docIn" : randYesQuery() } ).count() )
+ assert.eq( results.poly.locsIn, t.find( { "locs.loc" : { $within : { $polygon : query.boxPoly, $uniqueDocs : 0 } }, "poly.docIn" : randYesQuery() } ).count() )
// $near
print( "Near query..." )
assert.eq( results.center.locsIn > 100 ? 100 : results.center.locsIn, t.find( { "locs.loc" : { $near : query.center, $maxDistance : query.radius } } ).count( true ) )
+
if( query.sphereRadius >= 0 ){
print( "Near sphere query...")
// $centerSphere
@@ -391,27 +428,39 @@ for ( var test = 0; test < numTests; test++ ) {
// geoNear
+ // results limited by size of objects
if( data.maxLocs < 100 ){
-
+
+ // GeoNear query
+ print( "GeoNear query..." )
+ assert.eq( results.center.locsIn > 100 ? 100 : results.center.locsIn, t.getDB().runCommand({ geoNear : "testAllGeo", near : query.center, maxDistance : query.radius }).results.length )
+ // GeoNear query
+ assert.eq( results.center.docsIn > 100 ? 100 : results.center.docsIn, t.getDB().runCommand({ geoNear : "testAllGeo", near : query.center, maxDistance : query.radius, uniqueDocs : true }).results.length )
+
+
var num = 2 * results.center.locsIn;
if( num > 200 ) num = 200;
var output = db.runCommand( {
geoNear : "testAllGeo",
near : query.center,
- maxDistance : query.radius ,
+ maxDistance : query.radius ,
+ includeLocs : true,
num : num } ).results
-
+
assert.eq( Math.min( 200, results.center.locsIn ), output.length )
var distance = 0;
for ( var i = 0; i < output.length; i++ ) {
var retDistance = output[i].dis
-
+ var retLoc = locArray( output[i].loc )
+
// print( "Dist from : " + results[i].loc + " to " + startPoint + " is "
// + retDistance + " vs " + radius )
var arrLocs = locsArray( output[i].obj.locs )
+
+ assert.contains( retLoc, arrLocs )
// printjson( arrLocs )
@@ -422,6 +471,7 @@ for ( var test = 0; test < numTests; test++ ) {
}
assert( distInObj )
+ assert.between( retDistance - 0.0001 , Geo.distance( locArray( query.center ), retLoc ), retDistance + 0.0001 )
assert.lte( retDistance, query.radius )
assert.gte( retDistance, distance )
distance = retDistance
diff --git a/jstests/slowWeekly/geo_mnypts_plus_fields.js b/jstests/slowWeekly/geo_mnypts_plus_fields.js
new file mode 100644
index 00000000000..f67e49ba930
--- /dev/null
+++ b/jstests/slowWeekly/geo_mnypts_plus_fields.js
@@ -0,0 +1,98 @@
+// Test sanity of geo queries with a lot of points
+
+var maxFields = 2;
+
+for( var fields = 1; fields < maxFields; fields++ ){
+
+ var coll = db.testMnyPts
+ coll.drop()
+
+ var totalPts = 500 * 1000
+
+ // Add points in a 100x100 grid
+ for( var i = 0; i < totalPts; i++ ){
+ var ii = i % 10000
+
+ var doc = { loc : [ ii % 100, Math.floor( ii / 100 ) ] }
+
+ // Add fields with different kinds of data
+ for( var j = 0; j < fields; j++ ){
+
+ var field = null
+
+ if( j % 3 == 0 ){
+ // Make half the points not searchable
+ field = "abcdefg" + ( i % 2 == 0 ? "h" : "" )
+ }
+ else if( j % 3 == 1 ){
+ field = new Date()
+ }
+ else{
+ field = true
+ }
+
+ doc[ "field" + j ] = field
+ }
+
+ coll.insert( doc )
+ }
+
+ // Create the query for the additional fields
+ queryFields = {}
+ for( var j = 0; j < fields; j++ ){
+
+ var field = null
+
+ if( j % 3 == 0 ){
+ field = "abcdefg"
+ }
+ else if( j % 3 == 1 ){
+ field = { $lte : new Date() }
+ }
+ else{
+ field = true
+ }
+
+ queryFields[ "field" + j ] = field
+ }
+
+ coll.ensureIndex({ loc : "2d" })
+
+ // Check that quarter of points in each quadrant
+ for( var i = 0; i < 4; i++ ){
+ var x = i % 2
+ var y = Math.floor( i / 2 )
+
+ var box = [[0, 0], [49, 49]]
+ box[0][0] += ( x == 1 ? 50 : 0 )
+ box[1][0] += ( x == 1 ? 50 : 0 )
+ box[0][1] += ( y == 1 ? 50 : 0 )
+ box[1][1] += ( y == 1 ? 50 : 0 )
+
+ // Now only half of each result comes back
+ assert.eq( totalPts / ( 4 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).count() )
+ assert.eq( totalPts / ( 4 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).itcount() )
+
+ }
+
+ // Check that half of points in each half
+ for( var i = 0; i < 2; i++ ){
+
+ var box = [[0, 0], [49, 99]]
+ box[0][0] += ( i == 1 ? 50 : 0 )
+ box[1][0] += ( i == 1 ? 50 : 0 )
+
+ assert.eq( totalPts / ( 2 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).count() )
+ assert.eq( totalPts / ( 2 * 2 ), coll.find(Object.extend( { loc : { $within : { $box : box } } }, queryFields ) ).itcount() )
+
+ }
+
+ // Check that all but corner set of points in radius
+ var circle = [[0, 0], (100 - 1) * Math.sqrt( 2 ) - 0.25 ]
+
+ // All [99,x] pts are field0 : "abcdefg"
+ assert.eq( totalPts / 2 - totalPts / ( 100 * 100 ), coll.find(Object.extend( { loc : { $within : { $center : circle } } }, queryFields ) ).count() )
+ assert.eq( totalPts / 2 - totalPts / ( 100 * 100 ), coll.find(Object.extend( { loc : { $within : { $center : circle } } }, queryFields ) ).itcount() )
+
+}
+
diff --git a/jstests/slowWeekly/update_yield1.js b/jstests/slowWeekly/update_yield1.js
index 7e95855adb1..5f7183064f3 100644
--- a/jstests/slowWeekly/update_yield1.js
+++ b/jstests/slowWeekly/update_yield1.js
@@ -54,7 +54,7 @@ while ( ( (new Date()).getTime() - start ) < ( time * 2 ) ){
assert.eq( 1 , x.inprog.length , "nothing in prog" );
}
- assert.gt( 2000 , me );
+ assert.gt( time / 3 , me );
}
join();
diff --git a/jstests/sorta.js b/jstests/sorta.js
index f5942d4bddd..7c82778a186 100644
--- a/jstests/sorta.js
+++ b/jstests/sorta.js
@@ -5,16 +5,17 @@ t.drop();
// Enable _allow_dot to try and bypass v8 field name checking.
t.insert( {_id:0,a:MinKey}, true );
-t.save( {_id:1,a:null} );
-t.save( {_id:2,a:[]} );
+t.save( {_id:3,a:null} );
+t.save( {_id:1,a:[]} );
t.save( {_id:7,a:[2]} );
-t.save( {_id:3} );
-t.save( {_id:4,a:null} );
-t.save( {_id:5,a:[]} );
+t.save( {_id:4} );
+t.save( {_id:5,a:null} );
+t.save( {_id:2,a:[]} );
t.save( {_id:6,a:1} );
t.insert( {_id:8,a:MaxKey}, true );
function sorted( arr ) {
+ assert.eq( 9, arr.length );
for( i = 1; i < arr.length; ++i ) {
assert.lte( arr[ i-1 ]._id, arr[ i ]._id );
}
diff --git a/jstests/tool/csvexport1.js b/jstests/tool/csvexport1.js
new file mode 100644
index 00000000000..eb4e6e38431
--- /dev/null
+++ b/jstests/tool/csvexport1.js
@@ -0,0 +1,45 @@
+// csvexport1.js
+
+t = new ToolTest( "csvexport1" )
+
+c = t.startDB( "foo" );
+
+assert.eq( 0 , c.count() , "setup1" );
+
+objId = ObjectId()
+
+c.insert({ a : new NumberInt(1) , b : objId , c: [1, 2, 3], d : {a : "hello", b : "world"} , e: '-'})
+c.insert({ a : -2.0, c : MinKey, d : "Then he said, \"Hello World!\"", e : new NumberLong(3)})
+c.insert({ a : new BinData(0, "1234"), b : ISODate("2009-08-27"), c : new Timestamp(1234, 9876), d : /foo*\"bar\"/i, e : function foo() { print("Hello World!"); }})
+
+assert.eq( 3 , c.count() , "setup2" );
+
+t.runTool( "export" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" , "--csv", "-f", "a,b,c,d,e")
+
+
+c.drop()
+
+assert.eq( 0 , c.count() , "after drop" )
+
+t.runTool("import", "--file", t.extFile, "-d", t.baseName, "-c", "foo", "--type", "csv", "--headerline");
+
+assert.soon ( 3 + " == c.count()", "after import");
+
+// Note: Exporting and Importing to/from CSV is not designed to be round-trippable
+expected = []
+expected.push({ a : 1, b : "ObjectID(" + objId.toString() + ")", c : "[ 1, 2, 3 ]", d : "{ \"a\" : \"hello\", \"b\" : \"world\" }", e : "-"})
+expected.push({ a : -2.0, b : "", c : "$MinKey", d : "Then he said, \"Hello World!\"", e : 3})
+expected.push({ a : "D76DF8", b : "2009-08-27T00:00:00Z", c : "{ \"t\" : 1000 , \"i\" : 9876 }", d : "/foo*\\\"bar\\\"/i", e : tojson(function foo() { print("Hello World!"); })})
+
+actual = []
+actual.push(c.find({a : 1}).toArray()[0]);
+actual.push(c.find({a : -2.0}).toArray()[0]);
+actual.push(c.find({a : "D76DF8"}).toArray()[0]);
+
+for (i = 0; i < expected.length; i++) {
+ delete actual[i]._id
+ assert.eq( expected[i], actual[i], "CSV export " + i);
+}
+
+
+t.stop() \ No newline at end of file
diff --git a/jstests/tool/csvexport2.js b/jstests/tool/csvexport2.js
new file mode 100644
index 00000000000..3e0dd2c6829
--- /dev/null
+++ b/jstests/tool/csvexport2.js
@@ -0,0 +1,31 @@
+// csvexport2.js
+
+t = new ToolTest( "csvexport2" )
+
+c = t.startDB( "foo" );
+
+// This test is designed to test exporting of a CodeWithScope object.
+// However, due to SERVER-3391, it is not possible to create a CodeWithScope object in the mongo shell,
+// therefore this test does not work. Once SERVER-3391 is resolved, this test should be un-commented out
+
+//assert.eq( 0 , c.count() , "setup1" );
+
+//c.insert({ a : 1 , b : Code("print(\"Hello \" + x);", {"x" : "World!"})})
+//assert.eq( 1 , c.count() , "setup2" );
+//t.runTool( "export" , "--out" , t.extFile , "-d" , t.baseName , "-c" , "foo" , "--csv", "-f", "a,b")
+
+
+//c.drop()
+
+//assert.eq( 0 , c.count() , "after drop" )
+//t.runTool("import", "--file", t.extFile, "-d", t.baseName, "-c", "foo", "--type", "csv", "--headerline");
+//assert.soon ( 1 + " == c.count()", "after import");
+
+//expected = { a : 1, b : "\"{ \"$code\" : print(\"Hello \" + x); , \"$scope\" : { \"x\" : \"World!\" } }"};
+//actual = c.findOne()
+
+//delete actual._id;
+//assert.eq( expected, actual );
+
+
+t.stop() \ No newline at end of file
diff --git a/jstests/tool/csvimport1.js b/jstests/tool/csvimport1.js
new file mode 100644
index 00000000000..3bff1110cbe
--- /dev/null
+++ b/jstests/tool/csvimport1.js
@@ -0,0 +1,40 @@
+// csvimport1.js
+
+t = new ToolTest( "csvimport1" )
+
+c = t.startDB( "foo" );
+
+base = []
+base.push({ a : 1, b : "this is some text.\nThis text spans multiple lines, and just for fun\ncontains a comma", "c" : "This has leading and trailing whitespace!" })
+base.push({a : 2, b : "When someone says something you \"put it in quotes\"", "c" : "I like embedded quotes/slashes\\backslashes" })
+base.push({a : 3, b : " This line contains the empty string and has leading and trailing whitespace inside the quotes! ", "c" : "" })
+base.push({a : 4, b : "", "c" : "How are empty entries handled?" })
+base.push({a : 5, b : "\"\"", c : "\"This string is in quotes and contains empty quotes (\"\")\""})
+base.push({ a : "a" , b : "b" , c : "c"})
+
+assert.eq( 0 , c.count() , "setup" );
+
+t.runTool( "import" , "--file" , "jstests/tool/data/csvimport1.csv" , "-d" , t.baseName , "-c" , "foo" , "--type" , "csv" , "-f" , "a,b,c" );
+assert.soon( base.length + " == c.count()" , "after import 1 " );
+
+a = c.find().sort( { a : 1 } ).toArray();
+for (i = 0; i < base.length; i++ ) {
+ delete a[i]._id
+ assert.eq( tojson(base[i]), tojson(a[i]), "csv parse " + i)
+}
+
+c.drop()
+assert.eq( 0 , c.count() , "after drop" )
+
+t.runTool( "import" , "--file" , "jstests/tool/data/csvimport1.csv" , "-d" , t.baseName , "-c" , "foo" , "--type" , "csv" , "--headerline" )
+assert.soon( "c.findOne()" , "no data after sleep" );
+assert.eq( base.length - 1 , c.count() , "after import 2" );
+
+x = c.find().sort( { a : 1 } ).toArray();
+for (i = 0; i < base.length - 1; i++ ) {
+ delete x[i]._id
+ assert.eq( tojson(base[i]), tojson(x[i]), "csv parse with headerline " + i)
+}
+
+
+t.stop()
diff --git a/jstests/tool/data/csvimport1.csv b/jstests/tool/data/csvimport1.csv
new file mode 100644
index 00000000000..256d40a9184
--- /dev/null
+++ b/jstests/tool/data/csvimport1.csv
@@ -0,0 +1,8 @@
+a,b,c
+1,"this is some text.
+This text spans multiple lines, and just for fun
+contains a comma", "This has leading and trailing whitespace!"
+2, "When someone says something you ""put it in quotes""", I like embedded quotes/slashes\backslashes
+ 3 , " This line contains the empty string and has leading and trailing whitespace inside the quotes! ", ""
+ "4" ,, How are empty entries handled?
+"5","""""", """This string is in quotes and contains empty quotes ("""")"""
diff --git a/jstests/tool/dumprestore5.js b/jstests/tool/dumprestore5.js
new file mode 100644
index 00000000000..ce28fea2027
--- /dev/null
+++ b/jstests/tool/dumprestore5.js
@@ -0,0 +1,36 @@
+// dumprestore5.js
+
+t = new ToolTest( "dumprestore5" );
+
+t.startDB( "foo" );
+
+db = t.db
+
+db.addUser('user','password')
+
+assert.eq(1, db.system.users.count(), "setup")
+assert.eq(1, db.system.indexes.count(), "setup2")
+
+t.runTool( "dump" , "--out" , t.ext );
+
+db.dropDatabase()
+
+assert.eq(0, db.system.users.count(), "didn't drop users")
+assert.eq(0, db.system.indexes.count(), "didn't drop indexes")
+
+t.runTool("restore", "--dir", t.ext)
+
+assert.soon("db.system.users.findOne()", "no data after restore");
+assert.eq(1, db.system.users.find({user:'user'}).count(), "didn't restore users")
+assert.eq(1, db.system.indexes.count(), "didn't restore indexes")
+
+db.removeUser('user')
+db.addUser('user2', 'password2')
+
+t.runTool("restore", "--dir", t.ext, "--drop")
+
+assert.soon("1 == db.system.users.find({user:'user'}).count()", "didn't restore users 2")
+assert.eq(0, db.system.users.find({user:'user2'}).count(), "didn't drop users")
+assert.eq(1, db.system.indexes.count(), "didn't maintain indexes")
+
+t.stop();
diff --git a/jstests/unique2.js b/jstests/unique2.js
index 42cf9fbd0ac..1c2828830f4 100644
--- a/jstests/unique2.js
+++ b/jstests/unique2.js
@@ -1,3 +1,11 @@
+// Test unique and dropDups index options.
+
+function checkNprev( np ) {
+ // getPrevError() is not available sharded.
+ if ( typeof( myShardingTest ) == 'undefined' ) {
+ assert.eq( np, db.getPrevError().nPrev );
+ }
+}
t = db.jstests_unique2;
@@ -21,7 +29,9 @@ t.ensureIndex({k:1}, {unique:true});
t.insert({k:3});
t.insert({k:[2,3]});
+assert( db.getLastError() );
t.insert({k:[4,3]});
+assert( db.getLastError() );
assert( t.count() == 1 ) ;
assert( t.find().sort({k:1}).toArray().length == 1 ) ;
@@ -33,9 +43,52 @@ t.insert({k:[2,3]});
t.insert({k:[4,3]});
assert( t.count() == 3 ) ;
+// Trigger an error, so we can test n of getPrevError() later.
+assert.throws( function() { t.find( {$where:'aaa'} ).itcount(); } );
+assert( db.getLastError() );
+checkNprev( 1 );
+
t.ensureIndex({k:1}, {unique:true, dropDups:true});
+// Check error flag was not set SERVER-2054.
+assert( !db.getLastError() );
+// Check that offset of previous error is correct.
+checkNprev( 2 );
+
+// Check the dups were dropped.
+assert( t.count() == 1 ) ;
+assert( t.find().sort({k:1}).toArray().length == 1 ) ;
+assert( t.find().sort({k:1}).count() == 1 ) ;
+
+// Check that a new conflicting insert will cause an error.
+t.insert({k:[2,3]});
+assert( db.getLastError() );
+
+t.drop();
+t.insert({k:3});
+t.insert({k:[2,3]});
+t.insert({k:[4,3]});
+assert( t.count() == 3 ) ;
+
+
+// Now try with a background index op.
+
+// Trigger an error, so we can test n of getPrevError() later.
+assert.throws( function() { t.find( {$where:'aaa'} ).itcount(); } );
+assert( db.getLastError() );
+checkNprev( 1 );
+
+t.ensureIndex({k:1}, {background:true, unique:true, dropDups:true});
+// Check error flag was not set SERVER-2054.
+assert( !db.getLastError() );
+// Check that offset of pervious error is correct.
+checkNprev( 2 );
+
+// Check the dups were dropped.
assert( t.count() == 1 ) ;
assert( t.find().sort({k:1}).toArray().length == 1 ) ;
assert( t.find().sort({k:1}).count() == 1 ) ;
+// Check that a new conflicting insert will cause an error.
+t.insert({k:[2,3]});
+assert( db.getLastError() );
diff --git a/jstests/uniqueness.js b/jstests/uniqueness.js
index f1651b31c65..ce19ad08d82 100644
--- a/jstests/uniqueness.js
+++ b/jstests/uniqueness.js
@@ -26,8 +26,21 @@ db.jstests_uniqueness2.drop();
db.jstests_uniqueness2.insert({a:3});
db.jstests_uniqueness2.insert({a:3});
assert( db.jstests_uniqueness2.count() == 2 , 6) ;
+db.resetError();
db.jstests_uniqueness2.ensureIndex({a:1}, true);
assert( db.getLastError() , 7);
+assert( db.getLastError().match( /E11000/ ) );
+
+// Check for an error message when we index in the background and there are dups
+db.jstests_uniqueness2.drop();
+db.jstests_uniqueness2.insert({a:3});
+db.jstests_uniqueness2.insert({a:3});
+assert( db.jstests_uniqueness2.count() == 2 , 6) ;
+assert( !db.getLastError() );
+db.resetError();
+db.jstests_uniqueness2.ensureIndex({a:1}, {unique:true,background:true});
+assert( db.getLastError() , 7);
+assert( db.getLastError().match( /E11000/ ) );
/* Check that if we update and remove _id, it gets added back by the DB */
diff --git a/jstests/updatef.js b/jstests/updatef.js
new file mode 100644
index 00000000000..69425932f19
--- /dev/null
+++ b/jstests/updatef.js
@@ -0,0 +1,24 @@
+// Test unsafe management of nsdt on update command yield SERVER-3208
+
+prefixNS = db.jstests_updatef;
+prefixNS.save( {} );
+
+t = db.jstests_updatef_actual;
+t.drop();
+
+t.save( {a:0,b:[]} );
+for( i = 0; i < 1000; ++i ) {
+ t.save( {a:100} );
+}
+t.save( {a:0,b:[]} );
+
+db.getLastError();
+// Repeatedly rename jstests_updatef to jstests_updatef_ and back. This will
+// invalidate the jstests_updatef_actual NamespaceDetailsTransient object.
+s = startParallelShell( "for( i=0; i < 100; ++i ) { db.jstests_updatef.renameCollection( 'jstests_updatef_' ); db.jstests_updatef_.renameCollection( 'jstests_updatef' ); }" );
+
+for( i=0; i < 20; ++i ) {
+ t.update( {a:0}, {$push:{b:i}}, false, true );
+}
+
+s();
diff --git a/jstests/updateg.js b/jstests/updateg.js
new file mode 100644
index 00000000000..f8d452f71b2
--- /dev/null
+++ b/jstests/updateg.js
@@ -0,0 +1,17 @@
+// SERVER-3370 check modifiers with field name characters comparing less than '.' character.
+
+t = db.jstests_updateg;
+
+t.drop();
+t.update({}, { '$inc' : { 'all.t' : 1, 'all-copy.t' : 1 }}, true);
+assert.eq( 1, t.count( {all:{t:1},'all-copy':{t:1}} ) );
+
+t.drop();
+t.save({ 'all' : {}, 'all-copy' : {}});
+t.update({}, { '$inc' : { 'all.t' : 1, 'all-copy.t' : 1 }});
+assert.eq( 1, t.count( {all:{t:1},'all-copy':{t:1}} ) );
+
+t.drop();
+t.save({ 'all11' : {}, 'all2' : {}});
+t.update({}, { '$inc' : { 'all11.t' : 1, 'all2.t' : 1 }});
+assert.eq( 1, t.count( {all11:{t:1},'all2':{t:1}} ) );
diff --git a/pch.h b/pch.h
index 19620578a73..1e9684d16d6 100644
--- a/pch.h
+++ b/pch.h
@@ -44,7 +44,20 @@
# include <windows.h>
#endif
+#if defined(__linux__) && defined(MONGO_EXPOSE_MACROS)
+// glibc's optimized versions are better than g++ builtins
+# define __builtin_strcmp strcmp
+# define __builtin_strlen strlen
+# define __builtin_memchr memchr
+# define __builtin_memcmp memcmp
+# define __builtin_memcpy memcpy
+# define __builtin_memset memset
+# define __builtin_memmove memmove
+#endif
+
+
#include <ctime>
+#include <cstring>
#include <sstream>
#include <string>
#include <memory>
@@ -138,7 +151,11 @@ namespace mongo {
void asserted(const char *msg, const char *file, unsigned line);
}
-#define MONGO_assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) )
+
+
+// TODO: Rework the headers so we don't need this craziness
+#include "bson/inline_decls.h"
+#define MONGO_assert(_Expression) (void)( MONGO_likely(!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) )
#include "util/debug_util.h"
#include "util/goodies.h"
diff --git a/rpm/mongo.spec b/rpm/mongo.spec
index 332c6d29c96..5287ec4a39a 100644
--- a/rpm/mongo.spec
+++ b/rpm/mongo.spec
@@ -1,5 +1,5 @@
Name: mongo
-Version: 1.9.0
+Version: 1.9.2
Release: mongodb_1%{?dist}
Summary: mongo client shell and tools
License: AGPL 3.0
diff --git a/s/balance.cpp b/s/balance.cpp
index da25f3362c2..0cb39ad038d 100644
--- a/s/balance.cpp
+++ b/s/balance.cpp
@@ -155,7 +155,7 @@ namespace mongo {
cursor.reset();
if ( collections.empty() ) {
- log(1) << "no collections to balance" << endl;
+ LOG(1) << "no collections to balance" << endl;
return;
}
@@ -170,7 +170,7 @@ namespace mongo {
vector<Shard> allShards;
Shard::getAllShards( allShards );
if ( allShards.size() < 2) {
- log(1) << "can't balance without more active shards" << endl;
+ LOG(1) << "can't balance without more active shards" << endl;
return;
}
@@ -205,7 +205,7 @@ namespace mongo {
cursor.reset();
if (shardToChunksMap.empty()) {
- log(1) << "skipping empty collection (" << ns << ")";
+ LOG(1) << "skipping empty collection (" << ns << ")";
continue;
}
@@ -282,7 +282,7 @@ namespace mongo {
// now make sure we should even be running
if ( ! grid.shouldBalance() ) {
- log(1) << "skipping balancing round because balancing is disabled" << endl;
+ LOG(1) << "skipping balancing round because balancing is disabled" << endl;
conn.done();
sleepsecs( 30 );
@@ -297,25 +297,25 @@ namespace mongo {
{
dist_lock_try lk( &balanceLock , "doing balance round" );
if ( ! lk.got() ) {
- log(1) << "skipping balancing round because another balancer is active" << endl;
+ LOG(1) << "skipping balancing round because another balancer is active" << endl;
conn.done();
sleepsecs( 30 ); // no need to wake up soon
continue;
}
- log(1) << "*** start balancing round" << endl;
+ LOG(1) << "*** start balancing round" << endl;
vector<CandidateChunkPtr> candidateChunks;
_doBalanceRound( conn.conn() , &candidateChunks );
if ( candidateChunks.size() == 0 ) {
- log(1) << "no need to move any chunk" << endl;
+ LOG(1) << "no need to move any chunk" << endl;
}
else {
_balancedLastTime = _moveChunks( &candidateChunks );
}
- log(1) << "*** end of balancing round" << endl;
+ LOG(1) << "*** end of balancing round" << endl;
}
conn.done();
@@ -326,7 +326,7 @@ namespace mongo {
log() << "caught exception while doing balance: " << e.what() << endl;
// Just to match the opening statement if in log level 1
- log(1) << "*** End of balancing round" << endl;
+ LOG(1) << "*** End of balancing round" << endl;
sleepsecs( 30 ); // sleep a fair amount b/c of error
continue;
diff --git a/s/balancer_policy.cpp b/s/balancer_policy.cpp
index efb0fb924af..f1b4bf14db1 100644
--- a/s/balancer_policy.cpp
+++ b/s/balancer_policy.cpp
@@ -96,13 +96,13 @@ namespace mongo {
return NULL;
}
- log(1) << "collection : " << ns << endl;
- log(1) << "donor : " << max.second << " chunks on " << max.first << endl;
- log(1) << "receiver : " << min.second << " chunks on " << min.first << endl;
+ LOG(1) << "collection : " << ns << endl;
+ LOG(1) << "donor : " << max.second << " chunks on " << max.first << endl;
+ LOG(1) << "receiver : " << min.second << " chunks on " << min.first << endl;
if ( ! drainingShards.empty() ) {
string drainingStr;
joinStringDelim( drainingShards, &drainingStr, ',' );
- log(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl;
+ LOG(1) << "draining : " << ! drainingShards.empty() << "(" << drainingShards.size() << ")" << endl;
}
// Solving imbalances takes a higher priority than draining shards. Many shards can
diff --git a/s/chunk.cpp b/s/chunk.cpp
index b1984179864..09dc994d961 100644
--- a/s/chunk.cpp
+++ b/s/chunk.cpp
@@ -208,7 +208,7 @@ namespace mongo {
// no split points means there isn't enough data to split on
// 1 split point means we have between half the chunk size to full chunk size
// so we shouldn't split
- log(1) << "chunk not full enough to trigger auto-split" << endl;
+ LOG(1) << "chunk not full enough to trigger auto-split" << endl;
return BSONObj();
}
@@ -350,7 +350,7 @@ namespace mongo {
// this was implicit before since we did a splitVector on the same socket
ShardConnection::sync();
- log(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten << " splitThreshold: " << splitThreshold << endl;
+ LOG(1) << "about to initiate autosplit: " << *this << " dataWritten: " << _dataWritten << " splitThreshold: " << splitThreshold << endl;
_dataWritten = 0; // reset so we check often enough
@@ -378,7 +378,7 @@ namespace mongo {
Shard newLocation = Shard::pick( getShard() );
if ( getShard() == newLocation ) {
// if this is the best shard, then we shouldn't do anything (Shard::pick already logged our shard).
- log(1) << "recently split chunk: " << range << " already in the best shard: " << getShard() << endl;
+ LOG(1) << "recently split chunk: " << range << " already in the best shard: " << getShard() << endl;
return true; // we did split even if we didn't migrate
}
@@ -386,7 +386,7 @@ namespace mongo {
ChunkPtr toMove = cm->findChunk(min);
if ( ! (toMove->getMin() == min && toMove->getMax() == max) ){
- log(1) << "recently split chunk: " << range << " modified before we could migrate " << toMove << endl;
+ LOG(1) << "recently split chunk: " << range << " modified before we could migrate " << toMove << endl;
return true;
}
@@ -666,8 +666,10 @@ namespace mongo {
}
if ( c ) {
- if ( c->contains( obj ) )
+ if ( c->contains( key ) ){
+ dassert(c->contains(key)); // doesn't use fast-path in extractKey
return c;
+ }
PRINT(foo);
PRINT(*c);
@@ -791,7 +793,7 @@ namespace mongo {
set<Shard> seen;
- log(1) << "ChunkManager::drop : " << _ns << endl;
+ LOG(1) << "ChunkManager::drop : " << _ns << endl;
// lock all shards so no one can do a split/migrate
for ( ChunkMap::const_iterator i=_chunkMap.begin(); i!=_chunkMap.end(); ++i ) {
@@ -799,7 +801,7 @@ namespace mongo {
seen.insert( c->getShard() );
}
- log(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl;
+ LOG(1) << "ChunkManager::drop : " << _ns << "\t all locked" << endl;
// delete data from mongod
for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ) {
@@ -808,13 +810,13 @@ namespace mongo {
conn.done();
}
- log(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl;
+ LOG(1) << "ChunkManager::drop : " << _ns << "\t removed shard data" << endl;
// remove chunk data
ScopedDbConnection conn( configServer.modelServer() );
conn->remove( Chunk::chunkMetadataNS , BSON( "ns" << _ns ) );
conn.done();
- log(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl;
+ LOG(1) << "ChunkManager::drop : " << _ns << "\t removed chunk data" << endl;
for ( set<Shard>::iterator i=seen.begin(); i!=seen.end(); i++ ) {
ScopedDbConnection conn( *i );
@@ -830,7 +832,7 @@ namespace mongo {
conn.done();
}
- log(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl;
+ LOG(1) << "ChunkManager::drop : " << _ns << "\t DONE" << endl;
configServer.logChange( "dropCollection" , _ns , BSONObj() );
}
@@ -841,7 +843,7 @@ namespace mongo {
vector<BSONObj> splitPoints;
soleChunk->pickSplitVector( splitPoints , Chunk::MaxChunkSize );
if ( splitPoints.empty() ) {
- log(1) << "not enough data to warrant chunking " << getns() << endl;
+ LOG(1) << "not enough data to warrant chunking " << getns() << endl;
return;
}
@@ -983,7 +985,7 @@ namespace mongo {
void run() {
runShardChunkVersion();
- log(1) << "shardObjTest passed" << endl;
+ LOG(1) << "shardObjTest passed" << endl;
}
} shardObjTest;
@@ -1008,7 +1010,7 @@ namespace mongo {
cmdBuilder.append( "shardHost" , s.getConnString() );
BSONObj cmd = cmdBuilder.obj();
- log(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << " " << &conn << endl;
+ LOG(1) << " setShardVersion " << s.getName() << " " << conn.getServerAddress() << " " << ns << " " << cmd << " " << &conn << endl;
return conn.runCommand( "admin" , cmd , result );
}
diff --git a/s/commands_admin.cpp b/s/commands_admin.cpp
index 4cb30f99a3b..4568c4d3897 100644
--- a/s/commands_admin.cpp
+++ b/s/commands_admin.cpp
@@ -45,6 +45,7 @@
#include "stats.h"
#include "writeback_listener.h"
#include "client.h"
+#include "../util/ramlog.h"
namespace mongo {
@@ -82,7 +83,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << " shows status/reachability of servers in the cluster";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
result.append("configserver", configServer.getPrimary().getConnString() );
result.append("isdbgrid", 1);
return true;
@@ -95,7 +96,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << "flush all router config";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
grid.flushConfig();
result.appendBool( "flushed" , true );
return true;
@@ -112,7 +113,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
virtual LockType locktype() const { return NONE; }
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
result.append( "host" , prettyHostName() );
result.append("version", versionString);
result.append("process","mongos");
@@ -177,6 +178,20 @@ namespace mongo {
bb.done();
}
+ {
+ RamLog* rl = RamLog::get( "warnings" );
+ verify(15879, rl);
+
+ if (rl->lastWrite() >= time(0)-(10*60)){ // only show warnings from last 10 minutes
+ vector<const char*> lines;
+ rl->get( lines );
+
+ BSONArrayBuilder arr( result.subarrayStart( "warnings" ) );
+ for ( unsigned i=std::max(0,(int)lines.size()-10); i<lines.size(); i++ )
+ arr.append( lines[i] );
+ arr.done();
+ }
+ }
return 1;
}
@@ -187,7 +202,7 @@ namespace mongo {
class FsyncCommand : public GridAdminCmd {
public:
FsyncCommand() : GridAdminCmd( "fsync" ) {}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
if ( cmdObj["lock"].trueValue() ) {
errmsg = "can't do lock through mongos";
return false;
@@ -228,7 +243,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << " example: { moveprimary : 'foo' , to : 'localhost:9999' }";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string dbname = cmdObj.firstElement().valuestrsafe();
if ( dbname.size() == 0 ) {
@@ -323,7 +338,7 @@ namespace mongo {
<< "Enable sharding for a db. (Use 'shardcollection' command afterwards.)\n"
<< " { enablesharding : \"<dbname>\" }\n";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string dbname = cmdObj.firstElement().valuestrsafe();
if ( dbname.size() == 0 ) {
errmsg = "no db";
@@ -368,7 +383,7 @@ namespace mongo {
<< " { enablesharding : \"<dbname>\" }\n";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string ns = cmdObj.firstElement().valuestrsafe();
if ( ns.size() == 0 ) {
errmsg = "no ns";
@@ -517,7 +532,7 @@ namespace mongo {
help << " example: { getShardVersion : 'alleyinsider.foo' } ";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string ns = cmdObj.firstElement().valuestrsafe();
if ( ns.size() == 0 ) {
errmsg = "need to specify fully namespace";
@@ -530,7 +545,7 @@ namespace mongo {
return false;
}
- ChunkManagerPtr cm = config->getChunkManager( ns );
+ ChunkManagerPtr cm = config->getChunkManagerIfExists( ns );
if ( ! cm ) {
errmsg = "no chunk manager?";
return false;
@@ -555,7 +570,7 @@ namespace mongo {
;
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
if ( ! okForConfigChanges( errmsg ) )
return false;
@@ -633,7 +648,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << "{ movechunk : 'test.foo' , find : { num : 1 } , to : 'localhost:30001' }";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
if ( ! okForConfigChanges( errmsg ) )
return false;
@@ -710,7 +725,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << "list all shards of the system";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
ScopedDbConnection conn( configServer.getPrimary() );
vector<BSONObj> all;
@@ -734,7 +749,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << "add a new shard to the system";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
errmsg.clear();
// get replica set component hosts
@@ -795,7 +810,7 @@ namespace mongo {
virtual void help( stringstream& help ) const {
help << "remove a shard to the system.";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string target = cmdObj.firstElement().valuestrsafe();
Shard s = Shard::make( target );
if ( ! grid.knowAboutShard( s.getConnString() ) ) {
@@ -878,11 +893,12 @@ namespace mongo {
class IsDbGridCmd : public Command {
public:
virtual LockType locktype() const { return NONE; }
+ virtual bool requiresAuth() { return false; }
virtual bool slaveOk() const {
return true;
}
IsDbGridCmd() : Command("isdbgrid") { }
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
result.append("isdbgrid", 1);
result.append("hostname", getHostNameCached());
return true;
@@ -900,7 +916,7 @@ namespace mongo {
help << "test if this is master half of a replica pair";
}
CmdIsMaster() : Command("isMaster" , false , "ismaster") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
result.appendBool("ismaster", true );
result.append("msg", "isdbgrid");
result.appendNumber("maxBsonObjectSize", BSONObjMaxUserSize);
@@ -924,7 +940,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{whatsmyuri:1}";
}
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
result << "you" << ClientInfo::get()->getRemote();
return true;
}
@@ -942,7 +958,7 @@ namespace mongo {
help << "get previous error (since last reseterror command)";
}
CmdShardingGetPrevError() : Command( "getPrevError" , false , "getpreverror") { }
- virtual bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
errmsg += "getpreverror not supported for sharded environments";
return false;
}
@@ -960,7 +976,7 @@ namespace mongo {
}
CmdShardingGetLastError() : Command("getLastError" , false , "getlasterror") { }
- virtual bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
LastError *le = lastError.disableForCommand();
{
assert( le );
@@ -987,7 +1003,7 @@ namespace mongo {
return true;
}
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
LastError *le = lastError.get();
if ( le )
le->reset();
@@ -1018,7 +1034,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const { help << "list databases on cluster"; }
- bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
vector<Shard> shards;
Shard::getAllShards( shards );
@@ -1115,7 +1131,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const { help << "Not supported sharded"; }
- bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) {
+ bool run(const string& , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& /*result*/, bool /*fromRepl*/) {
errmsg = "closeAllDatabases isn't supported through mongos";
return false;
}
@@ -1131,7 +1147,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
virtual void help( stringstream& help ) const { help << "Not supported through mongos"; }
- bool run(const string& , BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
+ bool run(const string& , BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool /*fromRepl*/) {
if ( jsobj["forShell"].trueValue() )
lastError.disableForCommand();
@@ -1148,7 +1164,7 @@ namespace mongo {
<< "either (1) ran from localhost or (2) authenticated.";
}
- bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdShutdown::run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
return shutdownHelper();
}
diff --git a/s/commands_public.cpp b/s/commands_public.cpp
index 713b9489fc2..ef7110c7646 100644
--- a/s/commands_public.cpp
+++ b/s/commands_public.cpp
@@ -53,22 +53,34 @@ namespace mongo {
return false;
}
+ // Override if passthrough should also send query options
+ // Safer as off by default, can slowly enable as we add more tests
+ virtual bool passOptions() const { return false; }
+
// all grid commands are designed not to lock
virtual LockType locktype() const { return NONE; }
protected:
+
bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) {
- return _passthrough(conf->getName(), conf, cmdObj, result);
+ return _passthrough(conf->getName(), conf, cmdObj, 0, result);
}
bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) {
- return _passthrough("admin", conf, cmdObj, result);
+ return _passthrough("admin", conf, cmdObj, 0, result);
+ }
+
+ bool passthrough( DBConfigPtr conf, const BSONObj& cmdObj , int options, BSONObjBuilder& result ) {
+ return _passthrough(conf->getName(), conf, cmdObj, options, result);
+ }
+ bool adminPassthrough( DBConfigPtr conf, const BSONObj& cmdObj , int options, BSONObjBuilder& result ) {
+ return _passthrough("admin", conf, cmdObj, options, result);
}
private:
- bool _passthrough(const string& db, DBConfigPtr conf, const BSONObj& cmdObj , BSONObjBuilder& result ) {
+ bool _passthrough(const string& db, DBConfigPtr conf, const BSONObj& cmdObj , int options , BSONObjBuilder& result ) {
ShardConnection conn( conf->getPrimary() , "" );
BSONObj res;
- bool ok = conn->runCommand( db , cmdObj , res );
+ bool ok = conn->runCommand( db , cmdObj , res , passOptions() ? options : 0 );
if ( ! ok && res["code"].numberInt() == StaleConfigInContextCode ) {
conn.done();
throw StaleConfigException("foo","command failed because of stale config");
@@ -99,13 +111,14 @@ namespace mongo {
virtual void aggregateResults(const vector<BSONObj>& results, BSONObjBuilder& output) {}
// don't override
- virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& output, bool) {
+ virtual bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& output, bool) {
+ LOG(1) << "RunOnAllShardsCommand db: " << dbName << " cmd:" << cmdObj << endl;
set<Shard> shards;
getShards(dbName, cmdObj, shards);
list< shared_ptr<Future::CommandResult> > futures;
for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
- futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) );
+ futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj, 0 ) );
}
vector<BSONObj> results;
@@ -159,13 +172,13 @@ namespace mongo {
virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) = 0;
- virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) {
string fullns = getFullNS( dbName , cmdObj );
DBConfigPtr conf = grid.getDBConfig( dbName , false );
if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
- return passthrough( conf , cmdObj , result );
+ return passthrough( conf , cmdObj , options, result );
}
errmsg = "can't do command: " + name + " on sharded collection";
return false;
@@ -184,6 +197,16 @@ namespace mongo {
ReIndexCmd() : AllShardsCollectionCommand("reIndex") {}
} reIndexCmd;
+ class ProfileCmd : public PublicGridCommand {
+ public:
+ ProfileCmd() : PublicGridCommand("profile") {}
+ virtual bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) {
+ errmsg = "profile currently not supported via mongos";
+ return false;
+ }
+ } profileCmd;
+
+
class ValidateCmd : public AllShardsCollectionCommand {
public:
ValidateCmd() : AllShardsCollectionCommand("validate") {}
@@ -255,7 +278,7 @@ namespace mongo {
class DropCmd : public PublicGridCommand {
public:
DropCmd() : PublicGridCommand( "drop" ) {}
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string collection = cmdObj.firstElement().valuestrsafe();
string fullns = dbName + "." + collection;
@@ -280,7 +303,7 @@ namespace mongo {
class DropDBCmd : public PublicGridCommand {
public:
DropDBCmd() : PublicGridCommand( "dropDatabase" ) {}
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
BSONElement e = cmdObj.firstElement();
@@ -309,7 +332,7 @@ namespace mongo {
class RenameCollectionCmd : public PublicGridCommand {
public:
RenameCollectionCmd() : PublicGridCommand( "renameCollection" ) {}
- bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string fullnsFrom = cmdObj.firstElement().valuestrsafe();
string dbNameFrom = nsToDatabase( fullnsFrom.c_str() );
DBConfigPtr confFrom = grid.getDBConfig( dbNameFrom , false );
@@ -334,7 +357,7 @@ namespace mongo {
class CopyDBCmd : public PublicGridCommand {
public:
CopyDBCmd() : PublicGridCommand( "copydb" ) {}
- bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string todb = cmdObj.getStringField("todb");
uassert(13402, "need a todb argument", !todb.empty());
@@ -370,7 +393,8 @@ namespace mongo {
class CountCmd : public PublicGridCommand {
public:
CountCmd() : PublicGridCommand("count") { }
- bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool l) {
+ virtual bool passOptions() const { return true; }
+ bool run(const string& dbName, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) {
string collection = cmdObj.firstElement().valuestrsafe();
string fullns = dbName + "." + collection;
@@ -379,12 +403,11 @@ namespace mongo {
filter = cmdObj["query"].Obj();
DBConfigPtr conf = grid.getDBConfig( dbName , false );
-
if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
ShardConnection conn( conf->getPrimary() , fullns );
BSONObj temp;
- bool ok = conn->runCommand( dbName , cmdObj , temp );
+ bool ok = conn->runCommand( dbName , cmdObj , temp, options );
conn.done();
if ( ok ) {
@@ -399,7 +422,7 @@ namespace mongo {
}
// this collection got sharded
- ChunkManagerPtr cm = conf->getChunkManager( fullns , true );
+ ChunkManagerPtr cm = conf->getChunkManagerIfExists( fullns , true );
if ( ! cm ) {
errmsg = "should be sharded now";
result.append( "root" , temp );
@@ -410,11 +433,11 @@ namespace mongo {
long long total = 0;
map<string,long long> shardCounts;
- ChunkManagerPtr cm = conf->getChunkManager( fullns );
+ ChunkManagerPtr cm = conf->getChunkManagerIfExists( fullns );
while ( true ) {
if ( ! cm ) {
// probably unsharded now
- return run( dbName , cmdObj , errmsg , result , l );
+ return run( dbName , cmdObj , options , errmsg , result, false );
}
set<Shard> shards;
@@ -428,14 +451,14 @@ namespace mongo {
if ( conn.setVersion() ) {
total = 0;
shardCounts.clear();
- cm = conf->getChunkManager( fullns );
+ cm = conf->getChunkManagerIfExists( fullns );
conn.done();
hadToBreak = true;
break;
}
BSONObj temp;
- bool ok = conn->runCommand( dbName , BSON( "count" << collection << "query" << filter ) , temp );
+ bool ok = conn->runCommand( dbName , BSON( "count" << collection << "query" << filter ) , temp, options );
conn.done();
if ( ok ) {
@@ -449,7 +472,7 @@ namespace mongo {
// my version is old
total = 0;
shardCounts.clear();
- cm = conf->getChunkManager( fullns , true );
+ cm = conf->getChunkManagerIfExists( fullns , true );
hadToBreak = true;
break;
}
@@ -476,14 +499,13 @@ namespace mongo {
class CollectionStats : public PublicGridCommand {
public:
CollectionStats() : PublicGridCommand("collStats", "collstats") { }
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string collection = cmdObj.firstElement().valuestrsafe();
string fullns = dbName + "." + collection;
DBConfigPtr conf = grid.getDBConfig( dbName , false );
if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
- result.append( "ns" , fullns );
result.appendBool("sharded", false);
result.append( "primary" , conf->getPrimary().getName() );
return passthrough( conf , cmdObj , result);
@@ -602,7 +624,7 @@ namespace mongo {
class FindAndModifyCmd : public PublicGridCommand {
public:
FindAndModifyCmd() : PublicGridCommand("findAndModify", "findandmodify") { }
- bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string collection = cmdObj.firstElement().valuestrsafe();
string fullns = dbName + "." + collection;
@@ -639,7 +661,7 @@ namespace mongo {
class DataSizeCmd : public PublicGridCommand {
public:
DataSizeCmd() : PublicGridCommand("dataSize", "datasize") { }
- bool run(const string& dbName, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string fullns = cmdObj.firstElement().String();
DBConfigPtr conf = grid.getDBConfig( dbName , false );
@@ -703,7 +725,7 @@ namespace mongo {
class GroupCmd : public NotAllowedOnShardedCollectionCmd {
public:
GroupCmd() : NotAllowedOnShardedCollectionCmd("group") {}
-
+ virtual bool passOptions() const { return true; }
virtual string getFullNS( const string& dbName , const BSONObj& cmdObj ) {
return dbName + "." + cmdObj.firstElement().embeddedObjectUserCheck()["ns"].valuestrsafe();
}
@@ -716,14 +738,15 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << "{ distinct : 'collection name' , key : 'a.b' , query : {} }";
}
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool passOptions() const { return true; }
+ bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) {
string collection = cmdObj.firstElement().valuestrsafe();
string fullns = dbName + "." + collection;
DBConfigPtr conf = grid.getDBConfig( dbName , false );
if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
- return passthrough( conf , cmdObj , result );
+ return passthrough( conf , cmdObj , options, result );
}
ChunkManagerPtr cm = conf->getChunkManager( fullns );
@@ -739,7 +762,7 @@ namespace mongo {
for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end; ++i ) {
ShardConnection conn( *i , fullns );
BSONObj res;
- bool ok = conn->runCommand( conf->getName() , cmdObj , res );
+ bool ok = conn->runCommand( conf->getName() , cmdObj , res, options );
conn.done();
if ( ! ok ) {
@@ -774,7 +797,7 @@ namespace mongo {
virtual void help( stringstream &help ) const {
help << " example: { filemd5 : ObjectId(aaaaaaa) , root : \"fs\" }";
}
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string fullns = dbName;
fullns += ".";
{
@@ -811,15 +834,15 @@ namespace mongo {
public:
Geo2dFindNearCmd() : PublicGridCommand( "geoNear" ) {}
void help(stringstream& h) const { h << "http://www.mongodb.org/display/DOCS/Geospatial+Indexing#GeospatialIndexing-geoNearCommand"; }
-
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool passOptions() const { return true; }
+ bool run(const string& dbName , BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool) {
string collection = cmdObj.firstElement().valuestrsafe();
string fullns = dbName + "." + collection;
DBConfigPtr conf = grid.getDBConfig( dbName , false );
if ( ! conf || ! conf->isShardingEnabled() || ! conf->isSharded( fullns ) ) {
- return passthrough( conf , cmdObj , result );
+ return passthrough( conf , cmdObj , options, result );
}
ChunkManagerPtr cm = conf->getChunkManager( fullns );
@@ -836,7 +859,7 @@ namespace mongo {
list< shared_ptr<Future::CommandResult> > futures;
BSONArrayBuilder shardArray;
for ( set<Shard>::const_iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
- futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj ) );
+ futures.push_back( Future::spawnCommand( i->getConnString() , dbName , cmdObj, options ) );
shardArray.append(i->getName());
}
@@ -946,7 +969,7 @@ namespace mongo {
return b.obj();
}
- bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
Timer t;
string collection = cmdObj.firstElement().valuestrsafe();
@@ -1009,7 +1032,7 @@ namespace mongo {
for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
shared_ptr<ShardConnection> temp( new ShardConnection( i->getConnString() , fullns ) );
assert( temp->get() );
- futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand , temp->get() ) );
+ futures.push_back( Future::spawnCommand( i->getConnString() , dbName , shardedCommand , 0 , temp->get() ) );
shardConns.push_back( temp );
}
@@ -1096,7 +1119,7 @@ namespace mongo {
mr_shard::Config config( dbName , cmdObj );
mr_shard::State state(config);
- log(1) << "mr sharded output ns: " << config.ns << endl;
+ LOG(1) << "mr sharded output ns: " << config.ns << endl;
if (config.outType == mr_shard::Config::INMEMORY) {
errmsg = "This Map Reduce mode is not supported with sharded output";
@@ -1200,7 +1223,7 @@ namespace mongo {
BSONObj finalCmdObj = finalCmd.obj();
for ( set<Shard>::iterator i=shards.begin(), end=shards.end() ; i != end ; i++ ) {
shared_ptr<ShardConnection> temp( new ShardConnection( i->getConnString() , outns ) );
- futures.push_back( Future::spawnCommand( i->getConnString() , dbName , finalCmdObj , temp->get() ) );
+ futures.push_back( Future::spawnCommand( i->getConnString() , dbName , finalCmdObj , 0 , temp->get() ) );
shardConns.push_back( temp );
}
@@ -1268,7 +1291,7 @@ namespace mongo {
class ApplyOpsCmd : public PublicGridCommand {
public:
ApplyOpsCmd() : PublicGridCommand( "applyOps" ) {}
- virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
errmsg = "applyOps not allowed through mongos";
return false;
}
@@ -1277,7 +1300,7 @@ namespace mongo {
class CompactCmd : public PublicGridCommand {
public:
CompactCmd() : PublicGridCommand( "compact" ) {}
- virtual bool run(const string& dbName , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ virtual bool run(const string& dbName , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
errmsg = "compact not allowed through mongos";
return false;
}
@@ -1285,7 +1308,7 @@ namespace mongo {
}
- bool Command::runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder) {
+ bool Command::runAgainstRegistered(const char *ns, BSONObj& jsobj, BSONObjBuilder& anObjBuilder, int queryOptions) {
const char *p = strchr(ns, '.');
if ( !p ) return false;
if ( strcmp(p, ".$cmd") != 0 ) return false;
@@ -1326,7 +1349,7 @@ namespace mongo {
anObjBuilder.append( "help" , help.str() );
}
else {
- ok = c->run( nsToDatabase( ns ) , jsobj, errmsg, anObjBuilder, false);
+ ok = c->run( nsToDatabase( ns ) , jsobj, queryOptions, errmsg, anObjBuilder, false );
}
BSONObj tmp = anObjBuilder.asTempObj();
diff --git a/s/config.cpp b/s/config.cpp
index b65443cb0bd..f9e548235d3 100644
--- a/s/config.cpp
+++ b/s/config.cpp
@@ -185,6 +185,16 @@ namespace mongo {
return true;
}
+ ChunkManagerPtr DBConfig::getChunkManagerIfExists( const string& ns, bool shouldReload ){
+ try{
+ return getChunkManager( ns, shouldReload );
+ }
+ catch( AssertionException& e ){
+ warning() << "chunk manager not found for " << ns << causedBy( e ) << endl;
+ return ChunkManagerPtr();
+ }
+ }
+
ChunkManagerPtr DBConfig::getChunkManager( const string& ns , bool shouldReload ) {
BSONObj key;
bool unique;
@@ -201,8 +211,8 @@ namespace mongo {
_reload();
ci = _collections[ns];
}
- massert( 10181 , (string)"not sharded:" + ns , ci.isSharded() || ci.wasDropped() );
- assert( ci.wasDropped() || ! ci.key().isEmpty() );
+ massert( 10181 , (string)"not sharded:" + ns , ci.isSharded() );
+ assert( ! ci.key().isEmpty() );
if ( ! shouldReload || earlyReload )
return ci.getCM();
@@ -226,6 +236,7 @@ namespace mongo {
if ( v == oldVersion ) {
scoped_lock lk( _lock );
CollectionInfo& ci = _collections[ns];
+ massert( 15885 , str::stream() << "not sharded after reloading from chunks : " << ns , ci.isSharded() );
return ci.getCM();
}
}
@@ -244,7 +255,7 @@ namespace mongo {
scoped_lock lk( _lock );
CollectionInfo& ci = _collections[ns];
- massert( 14822 , (string)"state changed in the middle: " + ns , ci.isSharded() || ci.wasDropped() );
+ massert( 14822 , (string)"state changed in the middle: " + ns , ci.isSharded() );
if ( temp->getVersion() > ci.getCM()->getVersion() ) {
// we only want to reset if we're newer
@@ -252,6 +263,7 @@ namespace mongo {
ci.resetCM( temp.release() );
}
+ massert( 15883 , str::stream() << "not sharded after chunk manager reset : " << ns , ci.isSharded() );
return ci.getCM();
}
@@ -268,7 +280,7 @@ namespace mongo {
}
void DBConfig::unserialize(const BSONObj& from) {
- log(1) << "DBConfig unserialize: " << _name << " " << from << endl;
+ LOG(1) << "DBConfig unserialize: " << _name << " " << from << endl;
assert( _name == from["_id"].String() );
_shardingEnabled = from.getBoolField("partitioned");
@@ -300,13 +312,14 @@ namespace mongo {
unserialize( o );
BSONObjBuilder b;
- b.appendRegex( "_id" , (string)"^" + _name + "." );
+ b.appendRegex( "_id" , (string)"^" + _name + "\\." );
auto_ptr<DBClientCursor> cursor = conn->query( ShardNS::collection ,b.obj() );
assert( cursor.get() );
while ( cursor->more() ) {
BSONObj o = cursor->next();
- _collections[o["_id"].String()] = CollectionInfo( o );
+ if( o["dropped"].trueValue() ) _collections.erase( o["_id"].String() );
+ else _collections[o["_id"].String()] = CollectionInfo( o );
}
conn.done();
@@ -369,7 +382,7 @@ namespace mongo {
// 1
if ( ! configServer.allUp( errmsg ) ) {
- log(1) << "\t DBConfig::dropDatabase not all up" << endl;
+ LOG(1) << "\t DBConfig::dropDatabase not all up" << endl;
return 0;
}
@@ -392,7 +405,7 @@ namespace mongo {
log() << "error removing from config server even after checking!" << endl;
return 0;
}
- log(1) << "\t removed entry from config server for: " << _name << endl;
+ LOG(1) << "\t removed entry from config server for: " << _name << endl;
set<Shard> allServers;
@@ -428,7 +441,7 @@ namespace mongo {
conn.done();
}
- log(1) << "\t dropped primary db for: " << _name << endl;
+ LOG(1) << "\t dropped primary db for: " << _name << endl;
configServer.logChange( "dropDatabase" , _name , BSONObj() );
return true;
@@ -440,6 +453,7 @@ namespace mongo {
while ( true ) {
Collections::iterator i = _collections.begin();
for ( ; i != _collections.end(); ++i ) {
+ // log() << "coll : " << i->first << " and " << i->second.isSharded() << endl;
if ( i->second.isSharded() )
break;
}
@@ -453,7 +467,7 @@ namespace mongo {
}
seen.insert( i->first );
- log(1) << "\t dropping sharded collection: " << i->first << endl;
+ LOG(1) << "\t dropping sharded collection: " << i->first << endl;
i->second.getCM()->getAllShards( allServers );
i->second.getCM()->drop( i->second.getCM() );
@@ -461,7 +475,7 @@ namespace mongo {
num++;
uassert( 10184 , "_dropShardedCollections too many collections - bailing" , num < 100000 );
- log(2) << "\t\t dropped " << num << " so far" << endl;
+ LOG(2) << "\t\t dropped " << num << " so far" << endl;
}
return true;
@@ -528,7 +542,7 @@ namespace mongo {
string fullString;
joinStringDelim( configHosts, &fullString, ',' );
_primary.setAddress( ConnectionString( fullString , ConnectionString::SYNC ) );
- log(1) << " config string : " << fullString << endl;
+ LOG(1) << " config string : " << fullString << endl;
return true;
}
@@ -609,7 +623,7 @@ namespace mongo {
if ( checkConsistency ) {
string errmsg;
if ( ! checkConfigServersConsistent( errmsg ) ) {
- log( LL_ERROR ) << "config servers not in sync! " << errmsg << endl;
+ log( LL_ERROR ) << "config servers not in sync! " << errmsg << warnings;
return false;
}
}
@@ -672,7 +686,7 @@ namespace mongo {
string name = o["_id"].valuestrsafe();
got.insert( name );
if ( name == "chunksize" ) {
- log(1) << "MaxChunkSize: " << o["value"] << endl;
+ LOG(1) << "MaxChunkSize: " << o["value"] << endl;
Chunk::MaxChunkSize = o["value"].numberInt() * 1024 * 1024;
}
else if ( name == "balancer" ) {
@@ -746,7 +760,7 @@ namespace mongo {
conn->createCollection( "config.changelog" , 1024 * 1024 * 10 , true );
}
catch ( UserException& e ) {
- log(1) << "couldn't create changelog (like race condition): " << e << endl;
+ LOG(1) << "couldn't create changelog (like race condition): " << e << endl;
// don't care
}
createdCapped = true;
diff --git a/s/config.h b/s/config.h
index 6c8f8934aed..90c06cb0223 100644
--- a/s/config.h
+++ b/s/config.h
@@ -143,6 +143,7 @@ namespace mongo {
bool isSharded( const string& ns );
ChunkManagerPtr getChunkManager( const string& ns , bool reload = false );
+ ChunkManagerPtr getChunkManagerIfExists( const string& ns , bool reload = false );
/**
* @return the correct for shard for the ns
diff --git a/s/cursors.cpp b/s/cursors.cpp
index c65cdb9f97b..e8aeffb1cb4 100644
--- a/s/cursors.cpp
+++ b/s/cursors.cpp
@@ -112,7 +112,7 @@ namespace mongo {
}
bool hasMore = sendMore && _cursor->more();
- log(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << getId() << " totalSent: " << _totalSent << endl;
+ LOG(6) << "\t hasMore:" << hasMore << " wouldSendMoreIfHad: " << sendMore << " id:" << getId() << " totalSent: " << _totalSent << endl;
replyToQuery( 0 , r.p() , r.m() , b.buf() , b.len() , num , _totalSent , hasMore ? getId() : 0 );
_totalSent += num;
@@ -131,13 +131,15 @@ namespace mongo {
CursorCache::~CursorCache() {
// TODO: delete old cursors?
- int logLevel = 1;
+ bool print = logLevel > 0;
if ( _cursors.size() || _refs.size() )
- logLevel = 0;
- log( logLevel ) << " CursorCache at shutdown - "
- << " sharded: " << _cursors.size()
- << " passthrough: " << _refs.size()
- << endl;
+ print = true;
+
+ if ( print )
+ cout << " CursorCache at shutdown - "
+ << " sharded: " << _cursors.size()
+ << " passthrough: " << _refs.size()
+ << endl;
}
ShardedClientCursorPtr CursorCache::get( long long id ) const {
@@ -300,7 +302,7 @@ namespace mongo {
help << " example: { cursorInfo : 1 }";
}
virtual LockType locktype() const { return NONE; }
- bool run(const string&, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string&, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
cursorCache.appendInfo( result );
if ( jsobj["setTimeout"].isNumber() )
CursorCache::TIMEOUT = jsobj["setTimeout"].numberLong();
diff --git a/s/d_logic.cpp b/s/d_logic.cpp
index 5216b2e52ca..9d4fd74dd62 100644
--- a/s/d_logic.cpp
+++ b/s/d_logic.cpp
@@ -60,7 +60,7 @@ namespace mongo {
return false;
}
- log(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl;
+ LOG(1) << "connection meta data too old - will retry ns:(" << ns << ") op:(" << opToString(op) << ") " << errmsg << endl;
if ( doesOpGetAResponse( op ) ) {
assert( dbresponse );
@@ -97,8 +97,8 @@ namespace mongo {
const OID& clientID = ShardedConnectionInfo::get(false)->getID();
massert( 10422 , "write with bad shard config and no server id!" , clientID.isSet() );
- log(1) << "got write with an old config - writing back ns: " << ns << endl;
- if ( logLevel ) log(1) << m.toString() << endl;
+ LOG(1) << "got write with an old config - writing back ns: " << ns << endl;
+ if ( logLevel ) LOG(1) << m.toString() << endl;
BSONObjBuilder b;
b.appendBool( "writeBack" , true );
@@ -109,7 +109,7 @@ namespace mongo {
b.appendTimestamp( "version" , shardingState.getVersion( ns ) );
b.appendTimestamp( "yourVersion" , ShardedConnectionInfo::get( true )->getVersion( ns ) );
b.appendBinData( "msg" , m.header()->len , bdtCustom , (char*)(m.singleData()) );
- log(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl;
+ LOG(2) << "writing back msg with len: " << m.header()->len << " op: " << m.operation() << endl;
writeBackManager.queueWriteBack( clientID.str() , b.obj() );
return true;
diff --git a/s/d_migrate.cpp b/s/d_migrate.cpp
index 740a3148771..e24a02d3538 100644
--- a/s/d_migrate.cpp
+++ b/s/d_migrate.cpp
@@ -156,13 +156,28 @@ namespace mongo {
string toString() const {
return str::stream() << ns << " from " << min << " -> " << max;
}
-
+
void doRemove() {
ShardForceVersionOkModeBlock sf;
- writelock lk(ns);
- RemoveSaver rs("moveChunk",ns,"post-cleanup");
- long long num = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 );
- log() << "moveChunk deleted: " << num << migrateLog;
+ {
+ writelock lk(ns);
+ RemoveSaver rs("moveChunk",ns,"post-cleanup");
+ long long numDeleted = Helpers::removeRange( ns , min , max , true , false , cmdLine.moveParanoia ? &rs : 0 );
+ log() << "moveChunk deleted: " << numDeleted << migrateLog;
+ }
+
+ ReplTime lastOpApplied = cc().getLastOp();
+
+ Timer t;
+ for ( int i=0; i<3600; i++ ) {
+ if ( opReplicatedEnough( lastOpApplied , ( getSlaveCount() / 2 ) + 1 ) ) {
+ LOG(t.seconds() < 30 ? 1 : 0) << "moveChunk repl sync took " << t.seconds() << " seconds" << migrateLog;
+ return;
+ }
+ sleepsecs(1);
+ }
+
+ warning() << "moveChunk repl sync timed out after " << t.seconds() << " seconds" << migrateLog;
}
};
@@ -646,7 +661,7 @@ namespace mongo {
public:
TransferModsCommand() : ChunkCommandHelper( "_transferMods" ) {}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
return migrateFromStatus.transferMods( errmsg, result );
}
} transferModsCommand;
@@ -656,7 +671,7 @@ namespace mongo {
public:
InitialCloneCommand() : ChunkCommandHelper( "_migrateClone" ) {}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
return migrateFromStatus.clone( errmsg, result );
}
} initialCloneCommand;
@@ -680,7 +695,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
// 1. parse options
// 2. make sure my view is complete and lock
// 3. start migrate
@@ -1064,7 +1079,7 @@ namespace mongo {
preCond.done();
BSONObj cmd = cmdBuilder.obj();
- log(7) << "moveChunk update: " << cmd << migrateLog;
+ LOG(7) << "moveChunk update: " << cmd << migrateLog;
bool ok = false;
BSONObj cmdResult;
@@ -1177,7 +1192,7 @@ namespace mongo {
class MigrateStatus {
public:
-
+
MigrateStatus() : m_active("MigrateStatus") { active = false; }
void prepare() {
@@ -1345,9 +1360,19 @@ namespace mongo {
timing.done(4);
}
+ {
+ // pause to wait for replication
+ // this will prevent us from going into critical section until we're ready
+ Timer t;
+ while ( t.minutes() < 600 ) {
+ if ( flushPendingWrites( lastOpApplied ) )
+ break;
+ sleepsecs(1);
+ }
+ }
+
{
// 5. wait for commit
- Timer timeWaitingForCommit;
state = STEADY;
while ( state == STEADY || state == COMMIT_START ) {
@@ -1371,17 +1396,16 @@ namespace mongo {
if ( state == COMMIT_START ) {
if ( flushPendingWrites( lastOpApplied ) )
break;
-
- if ( timeWaitingForCommit.seconds() > 86400 ) {
- state = FAIL;
- errmsg = "timed out waiting for commit";
- return;
- }
}
sleepmillis( 10 );
}
+ if ( state == FAIL ) {
+ errmsg = "imted out waiting for commit";
+ return;
+ }
+
timing.done(5);
}
@@ -1516,12 +1540,14 @@ namespace mongo {
return false;
state = COMMIT_START;
- // we wait 5 minutes for the commit to succeed before giving up
- for ( int i=0; i<5*60*1000; i++ ) {
+ Timer t;
+ // we wait for the commit to succeed before giving up
+ while ( t.minutes() <= 5 ) {
sleepmillis(1);
if ( state == DONE )
return true;
}
+ state = FAIL;
log() << "startCommit never finished!" << migrateLog;
return false;
}
@@ -1571,7 +1597,7 @@ namespace mongo {
virtual LockType locktype() const { return WRITE; } // this is so don't have to do locking internally
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
if ( migrateStatus.getActive() ) {
errmsg = "migrate already in progress";
@@ -1608,7 +1634,7 @@ namespace mongo {
public:
RecvChunkStatusCommand() : ChunkCommandHelper( "_recvChunkStatus" ) {}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
migrateStatus.status( result );
return 1;
}
@@ -1619,7 +1645,7 @@ namespace mongo {
public:
RecvChunkCommitCommand() : ChunkCommandHelper( "_recvChunkCommit" ) {}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
bool ok = migrateStatus.startCommit();
migrateStatus.status( result );
return ok;
@@ -1631,7 +1657,7 @@ namespace mongo {
public:
RecvChunkAbortCommand() : ChunkCommandHelper( "_recvChunkAbort" ) {}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
migrateStatus.abort();
migrateStatus.status( result );
return true;
@@ -1653,7 +1679,7 @@ namespace mongo {
assert( ! isInRange( BSON( "x" << 5 ) , min , max ) );
assert( ! isInRange( BSON( "x" << 6 ) , min , max ) );
- log(1) << "isInRangeTest passed" << migrateLog;
+ LOG(1) << "isInRangeTest passed" << migrateLog;
}
} isInRangeTest;
}
diff --git a/s/d_split.cpp b/s/d_split.cpp
index 64fc4cb42e4..cef6188a2bb 100644
--- a/s/d_split.cpp
+++ b/s/d_split.cpp
@@ -57,7 +57,7 @@ namespace mongo {
"example: { medianKey:\"blog.posts\", keyPattern:{x:1}, min:{x:10}, max:{x:55} }\n"
"NOTE: This command may take a while to run";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
const char *ns = jsobj.getStringField( "medianKey" );
BSONObj min = jsobj.getObjectField( "min" );
BSONObj max = jsobj.getObjectField( "max" );
@@ -136,7 +136,7 @@ namespace mongo {
help << "Internal command.\n";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
const char* ns = jsobj.getStringField( "checkShardingIndex" );
BSONObj keyPattern = jsobj.getObjectField( "keyPattern" );
@@ -177,6 +177,11 @@ namespace mongo {
return false;
}
+ if( d->isMultikey( d->idxNo( *idx ) ) ) {
+ errmsg = "index is multikey, cannot use for sharding";
+ return false;
+ }
+
BtreeCursor * bc = BtreeCursor::make( d , d->idxNo(*idx) , *idx , min , max , false , 1 );
shared_ptr<Cursor> c( bc );
auto_ptr<ClientCursor> cc( new ClientCursor( QueryOption_NoCursorTimeout , c , ns ) );
@@ -248,7 +253,7 @@ namespace mongo {
"NOTE: This command may take a while to run";
}
- bool run(const string& dbname, BSONObj& jsobj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& jsobj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
//
// 1.a We'll parse the parameters in two steps. First, make sure the we can use the split index to get
@@ -524,7 +529,7 @@ namespace mongo {
virtual bool adminOnly() const { return true; }
virtual LockType locktype() const { return NONE; }
- bool run(const string& dbname, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
+ bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl ) {
//
// 1. check whether parameters passed to splitChunk are sound
@@ -686,7 +691,7 @@ namespace mongo {
BSONObjBuilder logDetail;
origChunk.appendShortVersion( "before" , logDetail );
- log(1) << "before split on " << origChunk << endl;
+ LOG(1) << "before split on " << origChunk << endl;
vector<ChunkInfo> newChunks;
ShardChunkVersion myVersion = maxVersion;
diff --git a/s/d_state.cpp b/s/d_state.cpp
index 409820047b0..f43865b222e 100644
--- a/s/d_state.cpp
+++ b/s/d_state.cpp
@@ -288,7 +288,7 @@ namespace mongo {
ShardedConnectionInfo* ShardedConnectionInfo::get( bool create ) {
ShardedConnectionInfo* info = _tl.get();
if ( ! info && create ) {
- log(1) << "entering shard mode for connection" << endl;
+ LOG(1) << "entering shard mode for connection" << endl;
info = new ShardedConnectionInfo();
_tl.reset( info );
}
@@ -316,7 +316,7 @@ namespace mongo {
void ShardedConnectionInfo::addHook() {
static bool done = false;
if (!done) {
- log(1) << "adding sharding hook" << endl;
+ LOG(1) << "adding sharding hook" << endl;
pool.addHook(new ShardingConnectionHook(false));
done = true;
}
@@ -380,7 +380,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
ShardedConnectionInfo::reset();
return true;
}
@@ -452,7 +452,7 @@ namespace mongo {
return true;
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
// Steps
// 1. check basic config
@@ -613,7 +613,7 @@ namespace mongo {
virtual LockType locktype() const { return NONE; }
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
string ns = cmdObj["getShardVersion"].valuestrsafe();
if ( ns.size() == 0 ) {
errmsg = "need to specify full namespace";
@@ -642,7 +642,7 @@ namespace mongo {
virtual LockType locktype() const { return WRITE; } // TODO: figure out how to make this not need to lock
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
shardingState.appendInfo( result );
return true;
}
diff --git a/s/d_writeback.cpp b/s/d_writeback.cpp
index 6839fc4b1ca..01c0c14ac0a 100644
--- a/s/d_writeback.cpp
+++ b/s/d_writeback.cpp
@@ -129,7 +129,7 @@ namespace mongo {
void help(stringstream& h) const { h<<"internal"; }
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
BSONElement e = cmdObj.firstElement();
if ( e.type() != jstOID ) {
@@ -144,7 +144,7 @@ namespace mongo {
// we want to do return at least at every 5 minutes so sockets don't timeout
BSONObj z;
if ( writeBackManager.getWritebackQueue(id.str())->queue.blockingPop( z, 5 * 60 /* 5 minutes */ ) ) {
- log(1) << "WriteBackCommand got : " << z << endl;
+ LOG(1) << "WriteBackCommand got : " << z << endl;
result.append( "data" , z );
}
else {
@@ -168,7 +168,7 @@ namespace mongo {
<< "This is an internal command";
}
- bool run(const string& , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool) {
+ bool run(const string& , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool) {
writeBackManager.appendStats( result );
return true;
}
diff --git a/s/grid.cpp b/s/grid.cpp
index 6141e061be6..3756e131a6a 100644
--- a/s/grid.cpp
+++ b/s/grid.cpp
@@ -125,6 +125,8 @@ namespace mongo {
name = &nameInternal;
}
+ ReplicaSetMonitorPtr rsMonitor;
+
// Check whether the host (or set) exists and run several sanity checks on this request.
// There are two set of sanity checks: making sure adding this particular shard is consistent
// with the replica set state (if it exists) and making sure this shards databases can be
@@ -140,7 +142,7 @@ namespace mongo {
errMsg = "can't use sync cluster as a shard. for replica set, have to use <setname>/<server1>,<server2>,...";
return false;
}
-
+
BSONObj resIsMongos;
bool ok = newShardConn->runCommand( "admin" , BSON( "isdbgrid" << 1 ) , resIsMongos );
@@ -264,6 +266,9 @@ namespace mongo {
}
}
+ if ( newShardConn->type() == ConnectionString::SET )
+ rsMonitor = ReplicaSetMonitor::get( setName );
+
newShardConn.done();
}
catch ( DBException& e ) {
@@ -295,7 +300,7 @@ namespace mongo {
// build the ConfigDB shard document
BSONObjBuilder b;
b.append( "_id" , *name );
- b.append( "host" , servers.toString() );
+ b.append( "host" , rsMonitor ? rsMonitor->getServerAddress() : servers.toString() );
if ( maxSize > 0 ) {
b.append( ShardFields::maxSize.name() , maxSize );
}
@@ -508,7 +513,7 @@ namespace mongo {
assert( Grid::_inBalancingWindow( w8 , now ) );
assert( Grid::_inBalancingWindow( w9 , now ) );
- log(1) << "BalancingWidowObjTest passed" << endl;
+ LOG(1) << "BalancingWidowObjTest passed" << endl;
}
} BalancingWindowObjTest;
diff --git a/s/request.cpp b/s/request.cpp
index cda75f63a17..36488cb5617 100644
--- a/s/request.cpp
+++ b/s/request.cpp
@@ -43,7 +43,12 @@ namespace mongo {
_clientInfo = ClientInfo::get();
_clientInfo->newRequest( p );
+ }
+ void Request::checkAuth() const {
+ char cl[256];
+ nsToDatabase(getns(), cl);
+ uassert(15845, "unauthorized", _clientInfo->getAuthenticationInfo()->isAuthorized(cl));
}
void Request::init() {
@@ -60,17 +65,21 @@ namespace mongo {
uassert( 13644 , "can't use 'local' database through mongos" , ! str::startsWith( getns() , "local." ) );
- _config = grid.getDBConfig( getns() );
+ const string nsStr (getns()); // use in functions taking string rather than char*
+
+ _config = grid.getDBConfig( nsStr );
if ( reload ) {
- if ( _config->isSharded( getns() ) )
- _config->getChunkManager( getns() , true );
+ if ( _config->isSharded( nsStr ) )
+ _config->getChunkManager( nsStr , true );
else
_config->reload();
}
- if ( _config->isSharded( getns() ) ) {
- _chunkManager = _config->getChunkManager( getns() , reload );
- uassert( 10193 , (string)"no shard info for: " + getns() , _chunkManager );
+ if ( _config->isSharded( nsStr ) ) {
+ _chunkManager = _config->getChunkManager( nsStr , reload );
+ // TODO: All of these uasserts are no longer necessary, getChunkManager() throws when
+ // not returning the right value.
+ uassert( 10193 , (string)"no shard info for: " + nsStr , _chunkManager );
}
else {
_chunkManager.reset();
@@ -104,7 +113,7 @@ namespace mongo {
}
- log(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.header()->id) << " attempt: " << attempt << endl;
+ LOG(3) << "Request::process ns: " << getns() << " msg id:" << (int)(_m.header()->id) << " attempt: " << attempt << endl;
Strategy * s = SINGLE;
_counter = &opsNonSharded;
@@ -138,10 +147,7 @@ namespace mongo {
s->getMore( *this );
}
else {
- char cl[256];
- nsToDatabase(getns(), cl);
- uassert(15845, "unauthorized", _clientInfo->getAuthenticationInfo()->isAuthorized(cl));
-
+ checkAuth();
s->writeOp( op, *this );
}
diff --git a/s/request.h b/s/request.h
index 6645ed9a092..86a484e378b 100644
--- a/s/request.h
+++ b/s/request.h
@@ -70,6 +70,8 @@ namespace mongo {
return _clientInfo;
}
+ void checkAuth() const;
+
// ---- remote location info -----
diff --git a/s/s_only.cpp b/s/s_only.cpp
index 4afa9008f71..6449b34ad81 100644
--- a/s/s_only.cpp
+++ b/s/s_only.cpp
@@ -91,7 +91,7 @@ namespace mongo {
}
string errmsg;
- int ok = c->run( dbname , cmdObj , errmsg , result , fromRepl );
+ int ok = c->run( dbname , cmdObj , queryOptions, errmsg , result , fromRepl );
if ( ! ok )
result.append( "errmsg" , errmsg );
return ok;
diff --git a/s/security.cpp b/s/security.cpp
index e27e68f4dcf..6cb9da624be 100644
--- a/s/security.cpp
+++ b/s/security.cpp
@@ -94,7 +94,7 @@ namespace mongo {
return false;
}
- bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
+ bool CmdLogout::run(const string& dbname , BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
AuthenticationInfo *ai = ClientInfo::get()->getAuthenticationInfo();
ai->logout(dbname);
return true;
diff --git a/s/server.cpp b/s/server.cpp
index 1ca2e4c9d7e..a6ffab96f29 100644
--- a/s/server.cpp
+++ b/s/server.cpp
@@ -26,6 +26,7 @@
#include "../util/ramlog.h"
#include "../util/signal_handlers.h"
#include "../util/admin_access.h"
+#include "../util/concurrency/task.h"
#include "../db/dbwebserver.h"
#include "../scripting/engine.h"
@@ -94,7 +95,7 @@ namespace mongo {
r.process();
}
catch ( AssertionException & e ) {
- log( e.isUserAssertion() ? 1 : 0 ) << "AssertionException in process: " << e.what() << endl;
+ log( e.isUserAssertion() ? 1 : 0 ) << "AssertionException while processing op type : " << m.operation() << " to : " << r.getns() << causedBy(e) << endl;
le->raiseError( e.getCode() , e.what() );
@@ -158,9 +159,6 @@ namespace mongo {
cursorCache.startTimeoutThread();
PeriodicTask::theRunner->go();
- log() << "waiting for connections on port " << cmdLine.port << endl;
- //DbGridListener l(port);
- //l.listen();
ShardedMessageHandler handler;
MessageServer * server = createServer( opts , &handler );
server->setAsTimeTracker();
@@ -321,6 +319,16 @@ int _main(int argc, char* argv[]) {
return 8;
}
+ {
+ class CheckConfigServers : public task::Task {
+ virtual string name() const { return "CheckConfigServers"; }
+ virtual void doWork() { configServer.ok(true); }
+ };
+ static CheckConfigServers checkConfigServers;
+
+ task::repeat(&checkConfigServers, 60*1000);
+ }
+
int configError = configServer.checkConfigVersion( params.count( "upgrade" ) );
if ( configError ) {
if ( configError > 0 ) {
diff --git a/s/shard.cpp b/s/shard.cpp
index dfd707857da..75326e047fc 100644
--- a/s/shard.cpp
+++ b/s/shard.cpp
@@ -235,7 +235,7 @@ namespace mongo {
virtual bool slaveOk() const { return true; }
virtual bool adminOnly() const { return true; }
- virtual bool run(const string&, mongo::BSONObj&, std::string& errmsg , mongo::BSONObjBuilder& result, bool) {
+ virtual bool run(const string&, mongo::BSONObj&, int, std::string& errmsg , mongo::BSONObjBuilder& result, bool) {
return staticShardInfo.getShardMap( result , errmsg );
}
} cmdGetShardMap;
@@ -346,7 +346,7 @@ namespace mongo {
best = t;
}
- log(1) << "best shard for new allocation is " << best << endl;
+ LOG(1) << "best shard for new allocation is " << best << endl;
return best.shard();
}
@@ -360,7 +360,7 @@ namespace mongo {
void ShardingConnectionHook::onCreate( DBClientBase * conn ) {
if( !noauth ) {
string err;
- log(2) << "calling onCreate auth for " << conn->toString() << endl;
+ LOG(2) << "calling onCreate auth for " << conn->toString() << endl;
uassert( 15847, "can't authenticate to shard server",
conn->auth("local", internalSecurity.user, internalSecurity.pwd, err, false));
}
diff --git a/s/shard_version.cpp b/s/shard_version.cpp
index 01447749ac9..4f84b0ae61e 100644
--- a/s/shard_version.cpp
+++ b/s/shard_version.cpp
@@ -96,7 +96,7 @@ namespace mongo {
ChunkManagerPtr manager;
const bool isSharded = conf->isSharded( ns );
if ( isSharded ) {
- manager = conf->getChunkManager( ns , authoritative );
+ manager = conf->getChunkManagerIfExists( ns , authoritative );
// It's possible the chunk manager was reset since we checked whether sharded was true,
// so must check this here.
if( manager ) officialSequenceNumber = manager->getSequenceNumber();
@@ -139,8 +139,14 @@ namespace mongo {
}
if ( result["reloadConfig"].trueValue() ) {
- // reload config
- conf->getChunkManager( ns , true );
+ if( result["version"].timestampTime() == 0 ){
+ // reload db
+ conf->reload();
+ }
+ else {
+ // reload config
+ conf->getChunkManager( ns , true );
+ }
}
const int maxNumTries = 7;
diff --git a/s/shardkey.cpp b/s/shardkey.cpp
index 9602b8566e5..d6c8eda1ae1 100644
--- a/s/shardkey.cpp
+++ b/s/shardkey.cpp
@@ -55,7 +55,8 @@ namespace mongo {
*/
for(set<string>::const_iterator it = patternfields.begin(); it != patternfields.end(); ++it) {
- if(obj.getFieldDotted(it->c_str()).eoo())
+ BSONElement e = obj.getFieldDotted(it->c_str());
+ if(e.eoo() || e.type() == Array)
return false;
}
return true;
@@ -83,7 +84,7 @@ namespace mongo {
vector<const char*> keysToMove;
keysToMove.push_back("_id");
BSONForEach(e, pattern) {
- if (strchr(e.fieldName(), '.') == NULL)
+ if (strchr(e.fieldName(), '.') == NULL && strcmp(e.fieldName(), "_id") != 0)
keysToMove.push_back(e.fieldName());
}
@@ -185,8 +186,8 @@ namespace mongo {
ShardKeyPattern k( fromjson("{a:1,'sub.b':-1,'sub.c':1}") );
BSONObj x = fromjson("{a:1,'sub.b':2,'sub.c':3}");
- assert( k.extractKey( fromjson("{a:1,sub:{b:2,c:3}}") ).shallowEqual(x) );
- assert( k.extractKey( fromjson("{sub:{b:2,c:3},a:1}") ).shallowEqual(x) );
+ assert( k.extractKey( fromjson("{a:1,sub:{b:2,c:3}}") ).binaryEqual(x) );
+ assert( k.extractKey( fromjson("{sub:{b:2,c:3},a:1}") ).binaryEqual(x) );
}
void moveToFrontTest() {
ShardKeyPattern sk (BSON("a" << 1 << "b" << 1));
@@ -194,13 +195,13 @@ namespace mongo {
BSONObj ret;
ret = sk.moveToFront(BSON("z" << 1 << "_id" << 1 << "y" << 1 << "a" << 1 << "x" << 1 << "b" << 1 << "w" << 1));
- assert(ret.shallowEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)));
+ assert(ret.binaryEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)));
ret = sk.moveToFront(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1));
- assert(ret.shallowEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)));
+ assert(ret.binaryEqual(BSON("_id" << 1 << "a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "x" << 1 << "w" << 1)));
ret = sk.moveToFront(BSON("z" << 1 << "y" << 1 << "a" << 1 << "b" << 1 << "Z" << 1 << "Y" << 1));
- assert(ret.shallowEqual(BSON("a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "Z" << 1 << "Y" << 1)));
+ assert(ret.binaryEqual(BSON("a" << 1 << "b" << 1 << "z" << 1 << "y" << 1 << "Z" << 1 << "Y" << 1)));
}
@@ -263,7 +264,7 @@ namespace mongo {
moveToFrontBenchmark(100);
}
- log(1) << "shardKeyTest passed" << endl;
+ LOG(1) << "shardKeyTest passed" << endl;
}
} shardKeyTest;
diff --git a/s/shardkey.h b/s/shardkey.h
index 96301ffe093..976cff09591 100644
--- a/s/shardkey.h
+++ b/s/shardkey.h
@@ -102,7 +102,21 @@ namespace mongo {
};
inline BSONObj ShardKeyPattern::extractKey(const BSONObj& from) const {
- BSONObj k = from.extractFields(pattern);
+ BSONObj k = from;
+ bool needExtraction = false;
+
+ BSONObjIterator a(from);
+ BSONObjIterator b(pattern);
+ while (a.more() && b.more()){
+ if (strcmp(a.next().fieldName(), b.next().fieldName()) != 0){
+ needExtraction = true;
+ break;
+ }
+ }
+
+ if (needExtraction || a.more() != b.more())
+ k = from.extractFields(pattern);
+
uassert(13334, "Shard Key must be less than 512 bytes", k.objsize() < 512);
return k;
}
diff --git a/s/strategy.cpp b/s/strategy.cpp
index b48a718b49d..4230b7fac92 100644
--- a/s/strategy.cpp
+++ b/s/strategy.cpp
@@ -38,7 +38,7 @@ namespace mongo {
conn.donotCheckVersion();
else if ( conn.setVersion() ) {
conn.done();
- throw StaleConfigException( r.getns() , "doWRite" , true );
+ throw StaleConfigException( r.getns() , "doWrite" , true );
}
conn->say( r.m() );
conn.done();
@@ -46,6 +46,8 @@ namespace mongo {
void Strategy::doQuery( Request& r , const Shard& shard ) {
+ r.checkAuth();
+
ShardConnection dbcon( shard , r.getns() );
DBClientBase &c = dbcon.conn();
diff --git a/s/strategy_shard.cpp b/s/strategy_shard.cpp
index 12d2049cdbf..c6b30e7965f 100644
--- a/s/strategy_shard.cpp
+++ b/s/strategy_shard.cpp
@@ -35,7 +35,9 @@ namespace mongo {
virtual void queryOp( Request& r ) {
QueryMessage q( r.d() );
- log(3) << "shard query: " << q.ns << " " << q.query << endl;
+ r.checkAuth();
+
+ LOG(3) << "shard query: " << q.ns << " " << q.query << endl;
if ( q.ntoreturn == 1 && strstr(q.ns, ".$cmd") )
throw UserException( 8010 , "something is wrong, shouldn't see a command here" );
@@ -73,7 +75,7 @@ namespace mongo {
try {
cursor->init();
- log(5) << " cursor type: " << cursor->type() << endl;
+ LOG(5) << " cursor type: " << cursor->type() << endl;
shardedCursorTypes.hit( cursor->type() );
if ( query.isExplain() ) {
@@ -92,7 +94,7 @@ namespace mongo {
if ( ! cc->sendNextBatch( r ) ) {
return;
}
- log(6) << "storing cursor : " << cc->getId() << endl;
+ LOG(6) << "storing cursor : " << cc->getId() << endl;
cursorCache.store( cc );
}
@@ -100,11 +102,11 @@ namespace mongo {
int ntoreturn = r.d().pullInt();
long long id = r.d().pullInt64();
- log(6) << "want cursor : " << id << endl;
+ LOG(6) << "want cursor : " << id << endl;
ShardedClientCursorPtr cursor = cursorCache.get( id );
if ( ! cursor ) {
- log(6) << "\t invalid cursor :(" << endl;
+ LOG(6) << "\t invalid cursor :(" << endl;
replyToQuery( ResultFlag_CursorNotFound , r.p() , r.m() , 0 , 0 , 0 );
return;
}
@@ -121,7 +123,7 @@ namespace mongo {
void _insert( Request& r , DbMessage& d, ChunkManagerPtr manager ) {
const int flags = d.reservedField();
- bool keepGoing = flags & InsertOption_KeepGoing; // modified before assertion if should abort
+ bool keepGoing = flags & InsertOption_ContinueOnError; // modified before assertion if should abort
while ( d.moreJSObjs() ) {
try {
@@ -139,8 +141,8 @@ namespace mongo {
}
if ( bad ) {
- log() << "tried to insert object without shard key: " << r.getns() << " " << o << endl;
- uasserted( 8011 , "tried to insert object without shard key" );
+ log() << "tried to insert object with no valid shard key: " << r.getns() << " " << o << endl;
+ uasserted( 8011 , "tried to insert object with no valid shard key" );
}
}
@@ -154,7 +156,7 @@ namespace mongo {
for ( int i=0; i<maxTries; i++ ) {
try {
ChunkPtr c = manager->findChunk( o );
- log(4) << " server:" << c->getShard().toString() << " " << o << endl;
+ LOG(4) << " server:" << c->getShard().toString() << " " << o << endl;
insert( c->getShard() , r.getns() , o , flags);
r.gotInsert();
@@ -167,20 +169,20 @@ namespace mongo {
int logLevel = i < ( maxTries / 2 );
LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl;
r.reset();
-
- unsigned long long old = manager->getSequenceNumber();
- manager = r.getChunkManager();
-
- LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl;
- if (!manager) {
+ manager = r.getChunkManager();
+ if( ! manager ) {
keepGoing = false;
uasserted(14804, "collection no longer sharded");
}
+
+ unsigned long long old = manager->getSequenceNumber();
+
+ LOG( logLevel ) << " sequence number - old: " << old << " new: " << manager->getSequenceNumber() << endl;
}
sleepmillis( i * 20 );
}
-
+
assert( inShutdown() || gotThrough ); // not caught below
} catch (const UserException&){
if (!keepGoing || !d.moreJSObjs()){
@@ -208,8 +210,8 @@ namespace mongo {
}
if ( bad ) {
- log() << "tried to insert object without shard key: " << nsChunkLookup << " " << o << endl;
- uasserted( 14842 , "tried to insert object without shard key" );
+ log() << "tried to insert object with no valid shard key: " << nsChunkLookup << " " << o << endl;
+ uasserted( 14842 , "tried to insert object with no valid shard key" );
}
}
@@ -222,7 +224,7 @@ namespace mongo {
for ( int i=0; i<maxTries; i++ ) {
try {
ChunkPtr c = manager->findChunk( o );
- log(4) << " server:" << c->getShard().toString() << " " << o << endl;
+ LOG(4) << " server:" << c->getShard().toString() << " " << o << endl;
insert( c->getShard() , ns , o , flags, safe);
break;
}
@@ -231,7 +233,7 @@ namespace mongo {
LOG( logLevel ) << "retrying insert because of StaleConfigException: " << e << " object: " << o << endl;
unsigned long long old = manager->getSequenceNumber();
- manager = conf->getChunkManager(ns);
+ manager = conf->getChunkManagerIfExists(ns);
LOG( logLevel ) << " sequenece number - old: " << old << " new: " << manager->getSequenceNumber() << endl;
@@ -256,7 +258,7 @@ namespace mongo {
bool multi = flags & UpdateOption_Multi;
if (upsert) {
- uassert(8012, "can't upsert something without shard key",
+ uassert(8012, "can't upsert something without valid shard key",
(manager->hasShardKey(toupdate) ||
(toupdate.firstElementFieldName()[0] == '$' && manager->hasShardKey(query))));
@@ -271,7 +273,8 @@ namespace mongo {
if ( multi ) {
}
else if ( strcmp( query.firstElementFieldName() , "_id" ) || query.nFields() != 1 ) {
- throw UserException( 8013 , "can't do non-multi update with query that doesn't have the shard key" );
+ log() << "Query " << query << endl;
+ throw UserException( 8013 , "can't do non-multi update with query that doesn't have a valid shard key" );
}
else {
save = true;
@@ -304,7 +307,7 @@ namespace mongo {
}
else {
uasserted(12376,
- str::stream() << "shard key must be in update object for collection: " << manager->getns() );
+ str::stream() << "valid shard key must be in update object for collection: " << manager->getns() );
}
}
@@ -349,7 +352,7 @@ namespace mongo {
bool multi = flags & UpdateOption_Multi;
if (upsert) {
- uassert(14854, "can't upsert something without shard key",
+ uassert(14854, "can't upsert something without valid shard key",
(manager->hasShardKey(toupdate) ||
(toupdate.firstElementFieldName()[0] == '$' && manager->hasShardKey(query))));
@@ -364,7 +367,7 @@ namespace mongo {
if ( multi ) {
}
else if ( strcmp( query.firstElementFieldName() , "_id" ) || query.nFields() != 1 ) {
- throw UserException( 14850 , "can't do non-multi update with query that doesn't have the shard key" );
+ throw UserException( 14850 , "can't do non-multi update with query that doesn't have a valid shard key" );
}
else {
save = true;
@@ -397,7 +400,7 @@ namespace mongo {
}
else {
uasserted(14857,
- str::stream() << "shard key must be in update object for collection: " << manager->getns() );
+ str::stream() << "valid shard key must be in update object for collection: " << manager->getns() );
}
}
@@ -447,7 +450,7 @@ namespace mongo {
while ( true ) {
try {
manager->getShardsForQuery( shards , pattern );
- log(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl;
+ LOG(2) << "delete : " << pattern << " \t " << shards.size() << " justOne: " << justOne << endl;
if ( shards.size() == 1 ) {
doWrite( dbDelete , r , *shards.begin() );
return;
@@ -479,7 +482,7 @@ namespace mongo {
virtual void writeOp( int op , Request& r ) {
const char *ns = r.getns();
- log(3) << "write: " << ns << endl;
+ LOG(3) << "write: " << ns << endl;
DbMessage& d = r.d();
ChunkManagerPtr info = r.getChunkManager();
diff --git a/s/strategy_single.cpp b/s/strategy_single.cpp
index b3eef9dafa4..012be5fb3dd 100644
--- a/s/strategy_single.cpp
+++ b/s/strategy_single.cpp
@@ -36,7 +36,7 @@ namespace mongo {
virtual void queryOp( Request& r ) {
QueryMessage q( r.d() );
- log(3) << "single query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn << endl;
+ LOG(3) << "single query: " << q.ns << " " << q.query << " ntoreturn: " << q.ntoreturn << " options : " << q.queryOptions << endl;
if ( r.isCommand() ) {
@@ -55,7 +55,7 @@ namespace mongo {
: str::equals("query", e.fieldName())))
cmdObj = e.embeddedObject();
}
- bool ok = Command::runAgainstRegistered(q.ns, cmdObj, builder);
+ bool ok = Command::runAgainstRegistered(q.ns, cmdObj, builder, q.queryOptions);
if ( ok ) {
BSONObj x = builder.done();
replyToQuery(0, r.p(), r.m(), x);
@@ -161,12 +161,12 @@ namespace mongo {
if ( r.isShardingEnabled() &&
strstr( ns , ".system.indexes" ) == strchr( ns , '.' ) &&
strchr( ns , '.' ) ) {
- log(1) << " .system.indexes write for: " << ns << endl;
+ LOG(1) << " .system.indexes write for: " << ns << endl;
handleIndexWrite( op , r );
return;
}
- log(3) << "single write: " << ns << endl;
+ LOG(3) << "single write: " << ns << endl;
doWrite( op , r , r.primaryShard() );
r.gotInsert(); // Won't handle mulit-insert correctly. Not worth parsing the request.
}
diff --git a/s/writeback_listener.cpp b/s/writeback_listener.cpp
index 81f75988a6e..5f320d3921f 100644
--- a/s/writeback_listener.cpp
+++ b/s/writeback_listener.cpp
@@ -117,7 +117,7 @@ namespace mongo {
while ( ! inShutdown() ) {
if ( ! Shard::isAShardNode( _addr ) ) {
- log(1) << _addr << " is not a shard node" << endl;
+ LOG(1) << _addr << " is not a shard node" << endl;
sleepsecs( 60 );
continue;
}
@@ -216,7 +216,10 @@ namespace mongo {
if ( gle["code"].numberInt() == 9517 ) {
log() << "writeback failed because of stale config, retrying attempts: " << attempts << endl;
- db->getChunkManager( ns , true );
+ if( ! db->getChunkManagerIfExists( ns , true ) ){
+ uassert( 15884, str::stream() << "Could not reload chunk manager after " << attempts << " attempts.", attempts <= 4 );
+ sleepsecs( attempts - 1 );
+ }
continue;
}
diff --git a/scripting/bench.cpp b/scripting/bench.cpp
index 1ac7f04a55e..9ada7d6495c 100644
--- a/scripting/bench.cpp
+++ b/scripting/bench.cpp
@@ -142,7 +142,7 @@ namespace mongo {
conn->remove( ns , fixQuery( e["query"].Obj() ) );
}
else if ( op == "update" ) {
- conn->update( ns , fixQuery( e["query"].Obj() ) , e["update"].Obj() );
+ conn->update( ns , fixQuery( e["query"].Obj() ) , e["update"].Obj() , e["upsert"].trueValue() );
}
else {
log() << "don't understand op: " << op << endl;
diff --git a/server.h b/server.h
index 370c63d0c47..781e4ccc24a 100644
--- a/server.h
+++ b/server.h
@@ -1,25 +1,25 @@
-/** @file server.h
-
- This file contains includes commonly needed in the server files (mongod, mongos, test). It is NOT included in the C++ client.
-
- Over time we should move more here, and more out of pch.h. And get rid of pch.h at some point.
-*/
-
-// todo is there a boost thign for this already?
-
-#pragma once
-
-#include "bson/inline_decls.h"
-
-/* Note: do not clutter code with these -- ONLY use in hot spots / significant loops. */
-
-// branch prediction. indicate we expect to enter the if statement body
-#define IF MONGOIF
-
-// branch prediction. indicate we expect to not enter the if statement body
-#define _IF MONGO_IF
-
-// prefetch data from memory
-#define PREFETCH MONGOPREFETCH
-
-using namespace bson;
+/** @file server.h
+
+ This file contains includes commonly needed in the server files (mongod, mongos, test). It is NOT included in the C++ client.
+
+ Over time we should move more here, and more out of pch.h. And get rid of pch.h at some point.
+*/
+
+// todo is there a boost thign for this already?
+
+#pragma once
+
+#include "bson/inline_decls.h"
+
+/* Note: do not clutter code with these -- ONLY use in hot spots / significant loops. */
+
+// branch prediction. indicate we expect to be true
+#define likely MONGO_likely
+
+// branch prediction. indicate we expect to be false
+#define unlikely MONGO_unlikely
+
+// prefetch data from memory
+#define PREFETCH MONGOPREFETCH
+
+using namespace bson;
diff --git a/shell/collection.js b/shell/collection.js
index cf8f5ce19c1..862a0a11440 100644
--- a/shell/collection.js
+++ b/shell/collection.js
@@ -120,7 +120,7 @@ DBCollection.prototype._validateObject = function( o ){
throw "can't save a DBQuery object";
}
-DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 };
+DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 , $MinKey : 1, $MaxKey : 1 };
DBCollection.prototype._validateForStorage = function( o ){
this._validateObject( o );
diff --git a/shell/dbshell.cpp b/shell/dbshell.cpp
index 8db622732dc..f3122c797d5 100644
--- a/shell/dbshell.cpp
+++ b/shell/dbshell.cpp
@@ -403,6 +403,8 @@ string finishCode( string code ) {
return "";
if ( ! line )
return "";
+ if ( code.find("\n\n") != string::npos ) // cancel multiline if two blank lines are entered
+ return ";";
while (startsWith(line, "... "))
line += 4;
@@ -504,6 +506,9 @@ int _main(int argc, char* argv[]) {
("version", "show version information")
("verbose", "increase verbosity")
("ipv6", "enable IPv6 support (disabled by default)")
+#ifdef MONGO_SSL
+ ("ssl", "use all for connections")
+#endif
;
hidden_options.add_options()
@@ -572,6 +577,11 @@ int _main(int argc, char* argv[]) {
if (params.count("quiet")) {
mongo::cmdLine.quiet = true;
}
+#ifdef MONGO_SSL
+ if (params.count("ssl")) {
+ mongo::cmdLine.sslOnNormalPorts = true;
+ }
+#endif
if (params.count("nokillop")) {
mongo::shellUtils::_nokillop = true;
}
@@ -579,6 +589,8 @@ int _main(int argc, char* argv[]) {
autoKillOp = true;
}
+
+
/* This is a bit confusing, here are the rules:
*
* if nodb is set then all positional parameters are files
diff --git a/shell/mongo.js b/shell/mongo.js
index e129784bf66..25357691c51 100644
--- a/shell/mongo.js
+++ b/shell/mongo.js
@@ -24,8 +24,9 @@ if ( typeof mongoInject == "function" ){
mongoInject( Mongo.prototype );
}
-Mongo.prototype.setSlaveOk = function() {
- this.slaveOk = true;
+Mongo.prototype.setSlaveOk = function( value ) {
+ if( value == undefined ) value = true
+ this.slaveOk = value
}
Mongo.prototype.getDB = function( name ){
@@ -43,6 +44,10 @@ Mongo.prototype.adminCommand = function( cmd ){
return this.getDB( "admin" ).runCommand( cmd );
}
+Mongo.prototype.setLogLevel = function( logLevel ){
+ return this.adminCommand({ setParameter : 1, logLevel : logLevel })
+}
+
Mongo.prototype.getDBNames = function(){
return this.getDBs().databases.map(
function(z){
diff --git a/shell/mongo_vstudio.cpp b/shell/mongo_vstudio.cpp
index ea0b2cd4b20..2fbb6d908b5 100644
--- a/shell/mongo_vstudio.cpp
+++ b/shell/mongo_vstudio.cpp
@@ -89,6 +89,26 @@ const StringData _jscode_raw_utils =
"doassert( \"[\" + a + \"] != [\" + b + \"] are equal : \" + msg );\n"
"}\n"
"\n"
+"assert.contains = function( o, arr, msg ){\n"
+"var wasIn = false\n"
+"\n"
+"if( ! arr.length ){\n"
+"for( i in arr ){\n"
+"wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) )\n"
+"return;\n"
+"if( wasIn ) break\n"
+"}\n"
+"}\n"
+"else {\n"
+"for( var i = 0; i < arr.length; i++ ){\n"
+"wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) )\n"
+"if( wasIn ) break\n"
+"}\n"
+"}\n"
+"\n"
+"if( ! wasIn ) doassert( tojson( o ) + \" was not in \" + tojson( arr ) + \" : \" + msg )\n"
+"}\n"
+"\n"
"assert.repeat = function( f, msg, timeout, interval ) {\n"
"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
"\n"
@@ -216,6 +236,18 @@ const StringData _jscode_raw_utils =
"doassert( a + \" is not greater than or eq \" + b + \" : \" + msg );\n"
"}\n"
"\n"
+"assert.between = function( a, b, c, msg, inclusive ){\n"
+"if ( assert._debug && msg ) print( \"in assert for: \" + msg );\n"
+"\n"
+"if( ( inclusive == undefined || inclusive == true ) &&\n"
+"a <= b && b <= c ) return;\n"
+"else if( a < b && b < c ) return;\n"
+"\n"
+"doassert( b + \" is not between \" + a + \" and \" + c + \" : \" + msg );\n"
+"}\n"
+"\n"
+"assert.betweenIn = function( a, b, c, msg ){ assert.between( a, b, c, msg, true ) }\n"
+"assert.betweenEx = function( a, b, c, msg ){ assert.between( a, b, c, msg, false ) }\n"
"\n"
"assert.close = function( a , b , msg , places ){\n"
"if (places === undefined) {\n"
@@ -243,6 +275,11 @@ const StringData _jscode_raw_utils =
"return dst;\n"
"}\n"
"\n"
+"Object.merge = function( dst, src, deep ){\n"
+"var clone = Object.extend( {}, dst, deep )\n"
+"return Object.extend( clone, src, deep )\n"
+"}\n"
+"\n"
"argumentsToArray = function( a ){\n"
"var arr = [];\n"
"for ( var i=0; i<a.length; i++ )\n"
@@ -943,6 +980,35 @@ const StringData _jscode_raw_utils =
"print( tojsononeline( x ) );\n"
"}\n"
"\n"
+"if ( typeof TestData == \"undefined\" ){\n"
+"TestData = undefined\n"
+"}\n"
+"\n"
+"jsTestName = function(){\n"
+"if( TestData ) return TestData.testName\n"
+"return \"__unknown_name__\"\n"
+"}\n"
+"\n"
+"jsTestFile = function(){\n"
+"if( TestData ) return TestData.testFile\n"
+"return \"__unknown_file__\"\n"
+"}\n"
+"\n"
+"jsTestPath = function(){\n"
+"if( TestData ) return TestData.testPath\n"
+"return \"__unknown_path__\"\n"
+"}\n"
+"\n"
+"jsTestOptions = function(){\n"
+"if( TestData ) return { noJournal : TestData.noJournal,\n"
+"noJournalPrealloc : TestData.noJournalPrealloc }\n"
+"return {}\n"
+"}\n"
+"\n"
+"testLog = function(x){\n"
+"print( jsTestFile() + \" - \" + x )\n"
+"}\n"
+"\n"
"shellPrintHelper = function (x) {\n"
"\n"
"if (typeof (x) == \"undefined\") {\n"
@@ -1481,6 +1547,41 @@ const StringData _jscode_raw_utils =
"return \"error: couldn't find \"+hn+\" in \"+tojson(c.members);\n"
"};\n"
"\n"
+"rs.debug = {};\n"
+"\n"
+"rs.debug.nullLastOpWritten = function(primary, secondary) {\n"
+"var p = connect(primary+\"/local\");\n"
+"var s = connect(secondary+\"/local\");\n"
+"s.getMongo().setSlaveOk();\n"
+"\n"
+"var secondToLast = s.oplog.rs.find().sort({$natural : -1}).limit(1).next();\n"
+"var last = p.runCommand({findAndModify : \"oplog.rs\",\n"
+"query : {ts : {$gt : secondToLast.ts}},\n"
+"sort : {$natural : 1},\n"
+"update : {$set : {op : \"n\"}}});\n"
+"\n"
+"if (!last.value.o || !last.value.o._id) {\n"
+"print(\"couldn't find an _id?\");\n"
+"}\n"
+"else {\n"
+"last.value.o = {_id : last.value.o._id};\n"
+"}\n"
+"\n"
+"print(\"nulling out this op:\");\n"
+"printjson(last);\n"
+"};\n"
+"\n"
+"rs.debug.getLastOpWritten = function(server) {\n"
+"var s = db.getSisterDB(\"local\");\n"
+"if (server) {\n"
+"s = connect(server+\"/local\");\n"
+"}\n"
+"s.getMongo().setSlaveOk();\n"
+"\n"
+"return s.oplog.rs.find().sort({$natural : -1}).limit(1).next();\n"
+"};\n"
+"\n"
+"\n"
"help = shellHelper.help = function (x) {\n"
"if (x == \"mr\") {\n"
"print(\"\\nSee also http://www.mongodb.org/display/DOCS/MapReduce\");\n"
@@ -1634,7 +1735,8 @@ const StringData _jscode_raw_utils_sh =
"print( \"\\tsh.moveChunk(fullName,find,to) move the chunk where 'find' is to 'to' (name of shard)\");\n"
"\n"
"print( \"\\tsh.setBalancerState( <bool on or not> ) turns the balancer on or off true=on, false=off\" );\n"
-"print( \"\\tsh.getBalancerState() return true if on, off if not\" );\n"
+"print( \"\\tsh.getBalancerState() return true if on, off if not\" );\n"
+"print( \"\\tsh.isBalancerRunning() return true if the balancer is running on any mongos\" );\n"
"\n"
"print( \"\\tsh.status() prints a general overview of the cluster\" )\n"
"}\n"
@@ -1691,6 +1793,11 @@ const StringData _jscode_raw_utils_sh =
"return true;\n"
"return ! x.stopped;\n"
"}\n"
+"\n"
+"sh.isBalancerRunning = function() {\n"
+"var x = db.getSisterDB( \"config\" ).locks.findOne( { _id : \"balancer\" } );\n"
+"return x.state > 0;\n"
+"}\n"
;
extern const JSFile utils_sh;
const JSFile utils_sh = { "shell/utils_sh.js" , _jscode_raw_utils_sh };
@@ -2552,8 +2659,9 @@ const StringData _jscode_raw_mongo =
"mongoInject( Mongo.prototype );\n"
"}\n"
"\n"
-"Mongo.prototype.setSlaveOk = function() {\n"
-"this.slaveOk = true;\n"
+"Mongo.prototype.setSlaveOk = function( value ) {\n"
+"if( value == undefined ) value = true\n"
+"this.slaveOk = value\n"
"}\n"
"\n"
"Mongo.prototype.getDB = function( name ){\n"
@@ -2571,6 +2679,10 @@ const StringData _jscode_raw_mongo =
"return this.getDB( \"admin\" ).runCommand( cmd );\n"
"}\n"
"\n"
+"Mongo.prototype.setLogLevel = function( logLevel ){\n"
+"return this.adminCommand({ setParameter : 1, logLevel : logLevel })\n"
+"}\n"
+"\n"
"Mongo.prototype.getDBNames = function(){\n"
"return this.getDBs().databases.map(\n"
"function(z){\n"
@@ -3162,7 +3274,7 @@ const StringData _jscode_raw_collection =
"throw \"can't save a DBQuery object\";\n"
"}\n"
"\n"
-"DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 };\n"
+"DBCollection._allowedFields = { $id : 1 , $ref : 1 , $db : 1 , $MinKey : 1, $MaxKey : 1 };\n"
"\n"
"DBCollection.prototype._validateForStorage = function( o ){\n"
"this._validateObject( o );\n"
diff --git a/shell/servers.js b/shell/servers.js
index a4e568a0f0f..e551559a79c 100755
--- a/shell/servers.js
+++ b/shell/servers.js
@@ -21,6 +21,25 @@ _parsePort = function() {
return port;
}
+connectionURLTheSame = function( a , b ){
+ if ( a == b )
+ return true;
+
+ if ( ! a || ! b )
+ return false;
+
+ a = a.split( "/" )[0]
+ b = b.split( "/" )[0]
+
+ return a == b;
+}
+
+assert( connectionURLTheSame( "foo" , "foo" ) )
+assert( ! connectionURLTheSame( "foo" , "bar" ) )
+
+assert( connectionURLTheSame( "foo/a,b" , "foo/b,a" ) )
+assert( ! connectionURLTheSame( "foo/a,b" , "bar/a,b" ) )
+
createMongoArgs = function( binaryName , args ){
var fullArgs = [ binaryName ];
@@ -79,6 +98,9 @@ startMongodTest = function (port, dirname, restart, extraOptions ) {
oplogSize: "40",
nohttpinterface: ""
};
+
+ if( jsTestOptions().noJournal ) options["nojournal"] = ""
+ if( jsTestOptions().noJournalPrealloc ) options["nopreallocj"] = ""
if ( extraOptions )
Object.extend( options , extraOptions );
@@ -158,6 +180,17 @@ myPort = function() {
* * useHostname to use the hostname (instead of localhost)
*/
ShardingTest = function( testName , numShards , verboseLevel , numMongos , otherParams ){
+
+ // Check if testName is an object, if so, pull params from there
+ if( testName && ! testName.charAt ){
+ var params = testName
+ testName = params.name || "test"
+ numShards = params.shards || 2
+ verboseLevel = params.verbose || 0
+ numMongos = params.mongos || 1
+ otherParams = params.other || {}
+ }
+
this._testName = testName;
if ( ! otherParams )
@@ -170,8 +203,7 @@ ShardingTest = function( testName , numShards , verboseLevel , numMongos , other
var localhost = otherParams.useHostname ? getHostName() : "localhost";
this._alldbpaths = []
-
-
+
if ( otherParams.rs ){
localhost = getHostName();
// start replica sets
@@ -179,15 +211,18 @@ ShardingTest = function( testName , numShards , verboseLevel , numMongos , other
for ( var i=0; i<numShards; i++){
var setName = testName + "-rs" + i;
- var rsDefaults = { oplogSize : 40 }
+ var rsDefaults = { oplogSize : 40, nodes : 3 }
var rsParams = otherParams["rs" + i]
for( var param in rsParams ){
rsDefaults[param] = rsParams[param]
}
+
+ var numReplicas = rsDefaults.nodes || otherParams.numReplicas || 3
+ delete rsDefaults.nodes
- var rs = new ReplSetTest( { name : setName , nodes : 3 , startPort : 31100 + ( i * 100 ) } );
- this._rs[i] = { setName : setName , test : rs , nodes : rs.startSet( rsParams ) , url : rs.getURL() };
+ var rs = new ReplSetTest( { name : setName , nodes : numReplicas , startPort : 31100 + ( i * 100 ) } );
+ this._rs[i] = { setName : setName , test : rs , nodes : rs.startSet( rsDefaults ) , url : rs.getURL() };
rs.initiate();
}
@@ -322,10 +357,9 @@ ShardingTest.prototype.getServer = function( dbname ){
for ( var i=0; i<this._connections.length; i++ ){
var c = this._connections[i];
- if ( name == c.name )
+ if ( connectionURLTheSame( name , c.name ) ||
+ connectionURLTheSame( rsName , c.name ) )
return c;
- if ( rsName && c.name.startsWith( rsName ) )
- return c;
}
throw "can't find server for: " + dbname + " name:" + name;
@@ -683,7 +717,7 @@ ShardingTest.prototype.getShards = function( coll, query ){
for( var i = 0; i < shards.length; i++ ){
for( var j = 0; j < this._connections.length; j++ ){
- if( this._connections[j].name == shards[i] ){
+ if ( connectionURLTheSame( this._connections[j].name , shards[i] ) ){
shards[i] = this._connections[j]
break;
}
@@ -730,22 +764,36 @@ ShardingTest.prototype.shardGo = function( collName , key , split , move , dbNam
if( collName.getDB )
c = "" + collName
+ var isEmpty = this.s.getCollection( c ).count() == 0
+
if( ! this.isSharded( dbName ) )
this.s.adminCommand( { enableSharding : dbName } )
- this.s.adminCommand( { shardcollection : c , key : key } );
- this.s.adminCommand( { split : c , middle : split } );
+ var result = this.s.adminCommand( { shardcollection : c , key : key } )
+ if( ! result.ok ){
+ printjson( result )
+ assert( false )
+ }
+ result = this.s.adminCommand( { split : c , middle : split } );
+ if( ! result.ok ){
+ printjson( result )
+ assert( false )
+ }
+
var result = null
for( var i = 0; i < 5; i++ ){
result = this.s.adminCommand( { movechunk : c , find : move , to : this.getOther( this.getServer( dbName ) ).name } );
if( result.ok ) break;
sleep( 5 * 1000 );
}
+ printjson( result )
assert( result.ok )
};
+ShardingTest.prototype.shardColl = ShardingTest.prototype.shardGo
+
ShardingTest.prototype.setBalancer = function( balancer ){
if( balancer || balancer == undefined ){
this.config.settings.update( { _id: "balancer" }, { $set : { stopped: false } } , true )
@@ -902,6 +950,8 @@ ReplTest.prototype.getOptions = function( master , extra , putBinaryFirst, norep
a.push( "--dbpath" );
a.push( this.getPath( master ) );
+ if( jsTestOptions().noJournal ) a.push( "--nojournal" )
+ if( jsTestOptions().noJournalPrealloc ) a.push( "--nopreallocj" )
if ( !norepl ) {
if ( master ){
@@ -1210,6 +1260,9 @@ ReplSetTest.prototype.getOptions = function( n , extra , putBinaryFirst ){
a.push( "--dbpath" );
a.push( this.getPath( ( n.host ? this.getNodeId( n ) : n ) ) );
+ if( jsTestOptions().noJournal ) a.push( "--nojournal" )
+ if( jsTestOptions().noJournalPrealloc ) a.push( "--nopreallocj" )
+
for ( var k in extra ){
var v = extra[k];
a.push( "--" + k );
@@ -1271,6 +1324,50 @@ ReplSetTest.prototype.callIsMaster = function() {
return master || false;
}
+ReplSetTest.awaitRSClientHosts = function( conn, host, hostOk, rs ) {
+
+ if( host.length ){
+ for( var i = 0; i < host.length; i++ ) this.awaitOk( conn, host[i] )
+ return
+ }
+
+ if( hostOk == undefined ) hostOk = { ok : true }
+ if( host.host ) host = host.host
+ if( rs && rs.getMaster ) rs = rs.name
+
+ print( "Awaiting " + host + " to be " + tojson( hostOk ) + " for " + conn + " (rs: " + rs + ")" )
+
+ var tests = 0
+ assert.soon( function() {
+ var rsClientHosts = conn.getDB( "admin" ).runCommand( "connPoolStats" )[ "replicaSets" ]
+ if( tests++ % 10 == 0 )
+ printjson( rsClientHosts )
+
+ for ( rsName in rsClientHosts ){
+ if( rs && rs != rsName ) continue
+ for ( var i = 0; i < rsClientHosts[rsName].hosts.length; i++ ){
+ var clientHost = rsClientHosts[rsName].hosts[ i ];
+ if( clientHost.addr != host ) continue
+
+ // Check that *all* host properties are set correctly
+ var propOk = true
+ for( var prop in hostOk ){
+ if( clientHost[prop] != hostOk[prop] ){
+ propOk = false
+ break
+ }
+ }
+
+ if( propOk ) return true;
+
+ }
+ }
+ return false;
+ }, "timed out waiting for replica set client to recognize hosts",
+ 3 * 20 * 1000 /* ReplicaSetMonitorWatcher updates every 20s */ )
+
+}
+
ReplSetTest.prototype.awaitSecondaryNodes = function( timeout ) {
var master = this.getMaster();
var slaves = this.liveNodes.slaves;
@@ -1296,6 +1393,7 @@ ReplSetTest.prototype.getMaster = function( timeout ) {
return master;
}
+ReplSetTest.prototype.getPrimary = ReplSetTest.prototype.getMaster
ReplSetTest.prototype.getSecondaries = function( timeout ){
var master = this.getMaster( timeout )
@@ -1308,6 +1406,16 @@ ReplSetTest.prototype.getSecondaries = function( timeout ){
return secs
}
+ReplSetTest.prototype.getSecondary = function( timeout ){
+ return this.getSecondaries( timeout )[0];
+}
+
+ReplSetTest.prototype.status = function( timeout ){
+ var master = this.callIsMaster()
+ if( ! master ) master = this.liveNodes.slaves[0]
+ return master.getDB("admin").runCommand({replSetGetStatus: 1})
+}
+
// Add a node to the test set
ReplSetTest.prototype.add = function( config ) {
if(this.ports.length == 0) {
@@ -1379,62 +1487,76 @@ ReplSetTest.prototype.reInitiate = function() {
this.initiate( config , 'replSetReconfig' );
}
+ReplSetTest.prototype.getLastOpTimeWritten = function() {
+ this.getMaster();
+ this.attempt({context : this, desc : "awaiting oplog query"},
+ function() {
+ try {
+ this.latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts'];
+ }
+ catch(e) {
+ print("ReplSetTest caught exception " + e);
+ return false;
+ }
+ return true;
+ });
+};
+
ReplSetTest.prototype.awaitReplication = function(timeout) {
- this.getMaster();
- timeout = timeout || 30000;
+ timeout = timeout || 30000;
- this.attempt({context : this, desc : "awaiting oplog query"},
- function() {
- try {
- latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts'];
- }
- catch(e) {
- print("ReplSetTest caught exception " + e);
- return false;
- }
- return true;
- });
-
- print("ReplSetTest " + latest);
-
- this.attempt({context: this, timeout: timeout, desc: "awaiting replication"},
- function() {
- var synced = true;
- for(var i=0; i<this.liveNodes.slaves.length; i++) {
- var slave = this.liveNodes.slaves[i];
-
- // Continue if we're connected to an arbiter
- if(res = slave.getDB("admin").runCommand({replSetGetStatus: 1})) {
- if(res.myState == 7) {
- continue;
- }
- }
-
- slave.getDB("admin").getMongo().setSlaveOk();
- var log = slave.getDB("local")['oplog.rs'];
- if(log.find({}).sort({'$natural': -1}).limit(1).hasNext()) {
- var entry = log.find({}).sort({'$natural': -1}).limit(1).next();
- printjson( entry );
- var ts = entry['ts'];
- print("ReplSetTest await TS for " + slave + " is " + ts.t+":"+ts.i + " and latest is " + latest.t+":"+latest.i);
-
- if (latest.t < ts.t || (latest.t == ts.t && latest.i < ts.i)) {
- latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts'];
- }
-
- print("ReplSetTest await oplog size for " + slave + " is " + log.count());
- synced = (synced && friendlyEqual(latest,ts))
- }
- else {
- synced = false;
- }
- }
-
- if(synced) {
- print("ReplSetTest await synced=" + synced);
- }
- return synced;
- });
+ this.getLastOpTimeWritten();
+
+ print("ReplSetTest " + this.latest);
+
+ this.attempt({context: this, timeout: timeout, desc: "awaiting replication"},
+ function() {
+ try {
+ var synced = true;
+ for(var i=0; i<this.liveNodes.slaves.length; i++) {
+ var slave = this.liveNodes.slaves[i];
+
+ // Continue if we're connected to an arbiter
+ if(res = slave.getDB("admin").runCommand({replSetGetStatus: 1})) {
+ if(res.myState == 7) {
+ continue;
+ }
+ }
+
+ slave.getDB("admin").getMongo().setSlaveOk();
+ var log = slave.getDB("local")['oplog.rs'];
+ if(log.find({}).sort({'$natural': -1}).limit(1).hasNext()) {
+ var entry = log.find({}).sort({'$natural': -1}).limit(1).next();
+ printjson( entry );
+ var ts = entry['ts'];
+ print("ReplSetTest await TS for " + slave + " is " + ts.t+":"+ts.i + " and latest is " + this.latest.t+":"+this.latest.i);
+
+ if (this.latest.t < ts.t || (this.latest.t == ts.t && this.latest.i < ts.i)) {
+ this.latest = this.liveNodes.master.getDB("local")['oplog.rs'].find({}).sort({'$natural': -1}).limit(1).next()['ts'];
+ }
+
+ print("ReplSetTest await oplog size for " + slave + " is " + log.count());
+ synced = (synced && friendlyEqual(this.latest,ts))
+ }
+ else {
+ synced = false;
+ }
+ }
+
+ if(synced) {
+ print("ReplSetTest await synced=" + synced);
+ }
+ return synced;
+ }
+ catch (e) {
+ print("ReplSetTest.awaitReplication: caught exception "+e);
+
+ // we might have a new master now
+ this.getLastOpTimeWritten();
+
+ return false;
+ }
+ });
}
ReplSetTest.prototype.getHashes = function( db ){
@@ -1704,10 +1826,10 @@ ReplSetTest.prototype.waitForIndicator = function( node, states, ind, timeout ){
var lastTime = null
var currTime = new Date().getTime()
var status = undefined
-
+
this.attempt({context: this, timeout: timeout, desc: "waiting for state indicator " + ind + " for " + timeout + "ms" }, function() {
- status = this.getMaster().getDB("admin").runCommand({ replSetGetStatus : 1 })
+ status = this.status()
if( lastTime == null || ( currTime = new Date().getTime() ) - (1000 * 5) > lastTime ){
if( lastTime == null ) print( "ReplSetTest waitForIndicator Initial status ( timeout : " + timeout + " ) :" )
diff --git a/shell/utils.js b/shell/utils.js
index 9a239dbc4a9..a903691fbd4 100644
--- a/shell/utils.js
+++ b/shell/utils.js
@@ -84,6 +84,26 @@ assert.neq = function( a , b , msg ){
doassert( "[" + a + "] != [" + b + "] are equal : " + msg );
}
+assert.contains = function( o, arr, msg ){
+ var wasIn = false
+
+ if( ! arr.length ){
+ for( i in arr ){
+ wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) )
+ return;
+ if( wasIn ) break
+ }
+ }
+ else {
+ for( var i = 0; i < arr.length; i++ ){
+ wasIn = arr[i] == o || ( ( arr[i] != null && o != null ) && friendlyEqual( arr[i] , o ) )
+ if( wasIn ) break
+ }
+ }
+
+ if( ! wasIn ) doassert( tojson( o ) + " was not in " + tojson( arr ) + " : " + msg )
+}
+
assert.repeat = function( f, msg, timeout, interval ) {
if ( assert._debug && msg ) print( "in assert for: " + msg );
@@ -211,6 +231,18 @@ assert.gte = function( a , b , msg ){
doassert( a + " is not greater than or eq " + b + " : " + msg );
}
+assert.between = function( a, b, c, msg, inclusive ){
+ if ( assert._debug && msg ) print( "in assert for: " + msg );
+
+ if( ( inclusive == undefined || inclusive == true ) &&
+ a <= b && b <= c ) return;
+ else if( a < b && b < c ) return;
+
+ doassert( b + " is not between " + a + " and " + c + " : " + msg );
+}
+
+assert.betweenIn = function( a, b, c, msg ){ assert.between( a, b, c, msg, true ) }
+assert.betweenEx = function( a, b, c, msg ){ assert.between( a, b, c, msg, false ) }
assert.close = function( a , b , msg , places ){
if (places === undefined) {
@@ -238,6 +270,11 @@ Object.extend = function( dst , src , deep ){
return dst;
}
+Object.merge = function( dst, src, deep ){
+ var clone = Object.extend( {}, dst, deep )
+ return Object.extend( clone, src, deep )
+}
+
argumentsToArray = function( a ){
var arr = [];
for ( var i=0; i<a.length; i++ )
@@ -938,6 +975,35 @@ printjsononeline = function(x){
print( tojsononeline( x ) );
}
+if ( typeof TestData == "undefined" ){
+ TestData = undefined
+}
+
+jsTestName = function(){
+ if( TestData ) return TestData.testName
+ return "__unknown_name__"
+}
+
+jsTestFile = function(){
+ if( TestData ) return TestData.testFile
+ return "__unknown_file__"
+}
+
+jsTestPath = function(){
+ if( TestData ) return TestData.testPath
+ return "__unknown_path__"
+}
+
+jsTestOptions = function(){
+ if( TestData ) return { noJournal : TestData.noJournal,
+ noJournalPrealloc : TestData.noJournalPrealloc }
+ return {}
+}
+
+testLog = function(x){
+ print( jsTestFile() + " - " + x )
+}
+
shellPrintHelper = function (x) {
if (typeof (x) == "undefined") {
@@ -1476,6 +1542,41 @@ rs.remove = function (hn) {
return "error: couldn't find "+hn+" in "+tojson(c.members);
};
+rs.debug = {};
+
+rs.debug.nullLastOpWritten = function(primary, secondary) {
+ var p = connect(primary+"/local");
+ var s = connect(secondary+"/local");
+ s.getMongo().setSlaveOk();
+
+ var secondToLast = s.oplog.rs.find().sort({$natural : -1}).limit(1).next();
+ var last = p.runCommand({findAndModify : "oplog.rs",
+ query : {ts : {$gt : secondToLast.ts}},
+ sort : {$natural : 1},
+ update : {$set : {op : "n"}}});
+
+ if (!last.value.o || !last.value.o._id) {
+ print("couldn't find an _id?");
+ }
+ else {
+ last.value.o = {_id : last.value.o._id};
+ }
+
+ print("nulling out this op:");
+ printjson(last);
+};
+
+rs.debug.getLastOpWritten = function(server) {
+ var s = db.getSisterDB("local");
+ if (server) {
+ s = connect(server+"/local");
+ }
+ s.getMongo().setSlaveOk();
+
+ return s.oplog.rs.find().sort({$natural : -1}).limit(1).next();
+};
+
+
help = shellHelper.help = function (x) {
if (x == "mr") {
print("\nSee also http://www.mongodb.org/display/DOCS/MapReduce");
diff --git a/shell/utils_sh.js b/shell/utils_sh.js
index 2f4a5a3f85b..5bd449bc61d 100644
--- a/shell/utils_sh.js
+++ b/shell/utils_sh.js
@@ -33,7 +33,8 @@ sh.help = function() {
print( "\tsh.moveChunk(fullName,find,to) move the chunk where 'find' is to 'to' (name of shard)");
print( "\tsh.setBalancerState( <bool on or not> ) turns the balancer on or off true=on, false=off" );
- print( "\tsh.getBalancerState() return true if on, off if not" );
+ print( "\tsh.getBalancerState() return true if on, off if not" );
+ print( "\tsh.isBalancerRunning() return true if the balancer is running on any mongos" );
print( "\tsh.status() prints a general overview of the cluster" )
}
@@ -90,3 +91,8 @@ sh.getBalancerState = function() {
return true;
return ! x.stopped;
}
+
+sh.isBalancerRunning = function() {
+ var x = db.getSisterDB( "config" ).locks.findOne( { _id : "balancer" } );
+ return x.state > 0;
+}
diff --git a/speed.js b/speed.js
new file mode 100755
index 00000000000..c5aa3a36964
--- /dev/null
+++ b/speed.js
@@ -0,0 +1,13 @@
+t = db.fooo;
+t.drop();
+x = { str:'aaaabbbbcc' }
+s = new Date();
+for( var i = 0; i < 100000; i++ ) {
+ x.i = i;
+ t.insert(x);
+}
+print( (new Date())-s );
+t.ensureIndex({x:1});
+t.ensureIndex({str:1});
+print( (new Date())-s );
+
diff --git a/third_party/linenoise/linenoise.cpp b/third_party/linenoise/linenoise.cpp
index 81f76194512..dca8dbb5a4f 100644
--- a/third_party/linenoise/linenoise.cpp
+++ b/third_party/linenoise/linenoise.cpp
@@ -549,7 +549,10 @@ static int linenoisePrompt(int fd, char *buf, size_t buflen, const char *prompt)
/* Only autocomplete when the callback is set. It returns < 0 when
* there was an error reading from fd. Otherwise it will return the
* character that should be handled next. */
- if (c == 9 && completionCallback != NULL) {
+ if (c == 9 && completionCallback != NULL) { /* tab */
+ /* ignore tabs used for indentation */
+ if (pos == 0) continue;
+
c = completeLine(fd,prompt,buf,buflen,&len,&pos,cols);
/* Return on errors */
if (c < 0) return len;
diff --git a/third_party/snappy/COPYING b/third_party/snappy/COPYING
new file mode 100755
index 00000000000..8d6bd9fed4e
--- /dev/null
+++ b/third_party/snappy/COPYING
@@ -0,0 +1,28 @@
+Copyright 2011, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/third_party/snappy/README b/third_party/snappy/README
new file mode 100755
index 00000000000..df8f0e178e2
--- /dev/null
+++ b/third_party/snappy/README
@@ -0,0 +1,135 @@
+Snappy, a fast compressor/decompressor.
+
+
+Introduction
+============
+
+Snappy is a compression/decompression library. It does not aim for maximum
+compression, or compatibility with any other compression library; instead,
+it aims for very high speeds and reasonable compression. For instance,
+compared to the fastest mode of zlib, Snappy is an order of magnitude faster
+for most inputs, but the resulting compressed files are anywhere from 20% to
+100% bigger. (For more information, see "Performance", below.)
+
+Snappy has the following properties:
+
+ * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code.
+ See "Performance" below.
+ * Stable: Over the last few years, Snappy has compressed and decompressed
+ petabytes of data in Google's production environment. The Snappy bitstream
+ format is stable and will not change between versions.
+ * Robust: The Snappy decompressor is designed not to crash in the face of
+ corrupted or malicious input.
+ * Free and open source software: Snappy is licensed under a BSD-type license.
+ For more information, see the included COPYING file.
+
+Snappy has previously been called "Zippy" in some Google presentations
+and the like.
+
+
+Performance
+===========
+
+Snappy is intended to be fast. On a single core of a Core i7 processor
+in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
+about 500 MB/sec or more. (These numbers are for the slowest inputs in our
+benchmark suite; others are much faster.) In our tests, Snappy usually
+is faster than algorithms in the same class (e.g. LZO, LZF, FastLZ, QuickLZ,
+etc.) while achieving comparable compression ratios.
+
+Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
+for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and
+other already-compressed data. Similar numbers for zlib in its fastest mode
+are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are
+capable of achieving yet higher compression rates, although usually at the
+expense of speed. Of course, compression ratio will vary significantly with
+the input.
+
+Although Snappy should be fairly portable, it is primarily optimized
+for 64-bit x86-compatible processors, and may run slower in other environments.
+In particular:
+
+ - Snappy uses 64-bit operations in several places to process more data at
+ once than would otherwise be possible.
+ - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
+ On some platforms, these must be emulated with single-byte loads
+ and stores, which is much slower.
+ - Snappy assumes little-endian throughout, and needs to byte-swap data in
+ several places if running on a big-endian platform.
+
+Experience has shown that even heavily tuned code can be improved.
+Performance optimizations, whether for 64-bit x86 or other platforms,
+are of course most welcome; see "Contact", below.
+
+
+Usage
+=====
+
+Note that Snappy, both the implementation and the main interface,
+is written in C++. However, several third-party bindings to other languages
+are available; see the Google Code page at http://code.google.com/p/snappy/
+for more information. Also, if you want to use Snappy from C code, you can
+use the included C bindings in snappy-c.h.
+
+To use Snappy from your own C++ program, include the file "snappy.h" from
+your calling file, and link against the compiled library.
+
+There are many ways to call Snappy, but the simplest possible is
+
+ snappy::Compress(input, &output);
+
+and similarly
+
+ snappy::Uncompress(input, &output);
+
+where "input" and "output" are both instances of std::string.
+
+There are other interfaces that are more flexible in various ways, including
+support for custom (non-array) input sources. See the header file for more
+information.
+
+
+Tests and benchmarks
+====================
+
+When you compile Snappy, snappy_unittest is compiled in addition to the
+library itself. You do not need it to use the compressor from your own library,
+but it contains several useful components for Snappy development.
+
+First of all, it contains unit tests, verifying correctness on your machine in
+various scenarios. If you want to change or optimize Snappy, please run the
+tests to verify you have not broken anything. Note that if you have the
+Google Test library installed, unit test behavior (especially failures) will be
+significantly more user-friendly. You can find Google Test at
+
+ http://code.google.com/p/googletest/
+
+You probably also want the gflags library for handling of command-line flags;
+you can find it at
+
+ http://code.google.com/p/google-gflags/
+
+In addition to the unit tests, snappy contains microbenchmarks used to
+tune compression and decompression performance. These are automatically run
+before the unit tests, but you can disable them using the flag
+--run_microbenchmarks=false if you have gflags installed (otherwise you will
+need to edit the source).
+
+Finally, snappy can benchmark Snappy against a few other compression libraries
+(zlib, LZO, LZF, FastLZ and QuickLZ), if they were detected at configure time.
+To benchmark using a given file, give the compression algorithm you want to test
+Snappy against (e.g. --zlib) and then a list of one or more file names on the
+command line. The testdata/ directory contains the files used by the
+microbenchmark, which should provide a reasonably balanced starting point for
+benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they
+are used to verify correctness in the presence of corrupted data in the unit
+test.)
+
+
+Contact
+=======
+
+Snappy is distributed through Google Code. For the latest version, a bug tracker,
+and other information, see
+
+ http://code.google.com/p/snappy/
diff --git a/third_party/snappy/config.h b/third_party/snappy/config.h
new file mode 100755
index 00000000000..bfc3b30087f
--- /dev/null
+++ b/third_party/snappy/config.h
@@ -0,0 +1,124 @@
+/* config.h.in. Generated from configure.ac by autoheader. */
+
+/* Define if building universal (internal helper macro) */
+//#undef AC_APPLE_UNIVERSAL_BUILD
+
+#if defined(_WIN32)
+// signed/unsigned mismatch
+#pragma warning( disable : 4018 )
+#endif
+
+/* Define to 1 if the compiler supports __builtin_ctz and friends. */
+#if defined(__GNUC__)
+#definfe HAVE_BUILTIN_CTZ 1
+#endif
+
+/* Define to 1 if the compiler supports __builtin_expect. */
+#if defined(__GNUC__)
+#definfe HAVE_BUILTIN_EXPECT 1
+#endif
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#if !defined(_WIN32)
+#define HAVE_DLFCN_H 1
+#endif
+
+/* Use the gflags package for command-line parsing. */
+#undef HAVE_GFLAGS
+
+/* Defined when Google Test is available. */
+#undef HAVE_GTEST
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#define HAVE_INTTYPES_H 1
+
+/* Define to 1 if you have the `fastlz' library (-lfastlz). */
+#undef HAVE_LIBFASTLZ
+
+/* Define to 1 if you have the `lzf' library (-llzf). */
+#undef HAVE_LIBLZF
+
+/* Define to 1 if you have the `lzo2' library (-llzo2). */
+#undef HAVE_LIBLZO2
+
+/* Define to 1 if you have the `quicklz' library (-lquicklz). */
+#undef HAVE_LIBQUICKLZ
+
+/* Define to 1 if you have the `z' library (-lz). */
+#undef HAVE_LIBZ
+
+/* Define to 1 if you have the <memory.h> header file. */
+#define HAVE_MEMORY_H 1
+
+/* Define to 1 if you have the <stddef.h> header file. */
+#define HAVE_STDDEF_H 1
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#define HAVE_STDINT_H 1
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#define HAVE_STDLIB_H 1
+
+/* Define to 1 if you have the <strings.h> header file. */
+#define HAVE_STRINGS_H 1
+
+/* Define to 1 if you have the <string.h> header file. */
+#define HAVE_STRING_H 1
+
+/* Define to 1 if you have the <sys/mman.h> header file. */
+#if !defined(_WIN32)
+#define HAVE_SYS_MMAN_H 1
+#endif
+
+/* Define to 1 if you have the <sys/resource.h> header file. */
+#define HAVE_SYS_RESOURCE_H 1
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#define HAVE_SYS_STAT_H 1
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#define HAVE_SYS_TYPES_H 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to 1 if you have the <windows.h> header file. */
+#if defined(_WIN32)
+#define HAVE_WINDOWS_H 1
+#endif
+
+/* Define to the sub-directory in which libtool stores uninstalled libraries.
+ */
+#define LT_OBJDIR "libs/"
+
+/* Name of package */
+#define PACKAGE "snappy"
+
+#define PACKAGE_BUGREPORT ""
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "snappy"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "snappy 1.0.3"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "snappy"
+
+/* Define to the home page for this package. */
+#define PACKAGE_URL ""
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "1.0.3"
+
+/* Define to 1 if you have the ANSI C header files. */
+#define STDC_HEADERS 1
+
+/* Version number of package */
+#define VERSION "1.0.3"
+
+/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most
+ significant byte first (like Motorola and SPARC, unlike Intel). */
+#if defined(__BIG_ENDIAN__)
+#define WORDS_BIGENDIAN 1
+#endif
diff --git a/third_party/snappy/snappy-internal.h b/third_party/snappy/snappy-internal.h
new file mode 100755
index 00000000000..a32eda59fb2
--- /dev/null
+++ b/third_party/snappy/snappy-internal.h
@@ -0,0 +1,150 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Internals shared between the Snappy implementation and its unittest.
+
+#ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_
+#define UTIL_SNAPPY_SNAPPY_INTERNAL_H_
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+namespace internal {
+
+class WorkingMemory {
+ public:
+ WorkingMemory() : large_table_(NULL) { }
+ ~WorkingMemory() { delete[] large_table_; }
+
+ // Allocates and clears a hash table using memory in "*this",
+ // stores the number of buckets in "*table_size" and returns a pointer to
+ // the base of the hash table.
+ uint16* GetHashTable(size_t input_size, int* table_size);
+
+ private:
+ uint16 small_table_[1<<10]; // 2KB
+ uint16* large_table_; // Allocated only when needed
+
+ DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
+};
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input_length <= kBlockSize"
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input_length)" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+char* CompressFragment(const char* input,
+ size_t input_length,
+ char* op,
+ uint16* table,
+ const int table_size);
+
+// Return the largest n such that
+//
+// s1[0,n-1] == s2[0,n-1]
+// and n <= (s2_limit - s2).
+//
+// Does not read *s2_limit or beyond.
+// Does not read *(s1 + (s2_limit - s2)) or beyond.
+// Requires that s2_limit >= s2.
+//
+// Separate implementation for x86_64, for speed. Uses the fact that
+// x86_64 is little endian.
+#if defined(ARCH_K8)
+static inline int FindMatchLength(const char* s1,
+ const char* s2,
+ const char* s2_limit) {
+ DCHECK_GE(s2_limit, s2);
+ int matched = 0;
+
+ // Find out how long the match is. We loop over the data 64 bits at a
+ // time until we find a 64-bit block that doesn't match; then we find
+ // the first non-matching bit and use that to calculate the total
+ // length of the match.
+ while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
+ if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
+ s2 += 8;
+ matched += 8;
+ } else {
+ // On current (mid-2008) Opteron models there is a 3% more
+ // efficient code sequence to find the first non-matching byte.
+ // However, what follows is ~10% better on Intel Core 2 and newer,
+ // and we expect AMD's bsf instruction to improve.
+ uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
+ int matching_bits = Bits::FindLSBSetNonZero64(x);
+ matched += matching_bits >> 3;
+ return matched;
+ }
+ }
+ while (PREDICT_TRUE(s2 < s2_limit)) {
+ if (PREDICT_TRUE(s1[matched] == *s2)) {
+ ++s2;
+ ++matched;
+ } else {
+ return matched;
+ }
+ }
+ return matched;
+}
+#else
+static inline int FindMatchLength(const char* s1,
+ const char* s2,
+ const char* s2_limit) {
+ // Implementation based on the x86-64 version, above.
+ DCHECK_GE(s2_limit, s2);
+ int matched = 0;
+
+ while (s2 <= s2_limit - 4 &&
+ UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
+ s2 += 4;
+ matched += 4;
+ }
+ if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
+ uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
+ int matching_bits = Bits::FindLSBSetNonZero(x);
+ matched += matching_bits >> 3;
+ } else {
+ while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+ ++s2;
+ ++matched;
+ }
+ }
+ return matched;
+}
+#endif
+
+} // end namespace internal
+} // end namespace snappy
+
+#endif // UTIL_SNAPPY_SNAPPY_INTERNAL_H_
diff --git a/third_party/snappy/snappy-sinksource.cc b/third_party/snappy/snappy-sinksource.cc
new file mode 100755
index 00000000000..1017895f962
--- /dev/null
+++ b/third_party/snappy/snappy-sinksource.cc
@@ -0,0 +1,72 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string.h>
+
+#include "snappy-sinksource.h"
+
+namespace snappy {
+
+Source::~Source() { }
+
+Sink::~Sink() { }
+
+char* Sink::GetAppendBuffer(size_t length, char* scratch) {
+ return scratch;
+}
+
+ByteArraySource::~ByteArraySource() { }
+
+size_t ByteArraySource::Available() const { return left_; }
+
+const char* ByteArraySource::Peek(size_t* len) {
+ *len = left_;
+ return ptr_;
+}
+
+void ByteArraySource::Skip(size_t n) {
+ left_ -= n;
+ ptr_ += n;
+}
+
+UncheckedByteArraySink::~UncheckedByteArraySink() { }
+
+void UncheckedByteArraySink::Append(const char* data, size_t n) {
+ // Do no copying if the caller filled in the result of GetAppendBuffer()
+ if (data != dest_) {
+ memcpy(dest_, data, n);
+ }
+ dest_ += n;
+}
+
+char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
+ return dest_;
+}
+
+
+}
diff --git a/third_party/snappy/snappy-sinksource.h b/third_party/snappy/snappy-sinksource.h
new file mode 100755
index 00000000000..430baeabb0e
--- /dev/null
+++ b/third_party/snappy/snappy-sinksource.h
@@ -0,0 +1,136 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
+#define UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
+
+#include <stddef.h>
+
+
+namespace snappy {
+
+// A Sink is an interface that consumes a sequence of bytes.
+class Sink {
+ public:
+ Sink() { }
+ virtual ~Sink();
+
+ // Append "bytes[0,n-1]" to this.
+ virtual void Append(const char* bytes, size_t n) = 0;
+
+ // Returns a writable buffer of the specified length for appending.
+ // May return a pointer to the caller-owned scratch buffer which
+ // must have at least the indicated length. The returned buffer is
+ // only valid until the next operation on this Sink.
+ //
+ // After writing at most "length" bytes, call Append() with the
+ // pointer returned from this function and the number of bytes
+ // written. Many Append() implementations will avoid copying
+ // bytes if this function returned an internal buffer.
+ //
+ // If a non-scratch buffer is returned, the caller may only pass a
+ // prefix of it to Append(). That is, it is not correct to pass an
+ // interior pointer of the returned array to Append().
+ //
+ // The default implementation always returns the scratch buffer.
+ virtual char* GetAppendBuffer(size_t length, char* scratch);
+
+ private:
+ // No copying
+ Sink(const Sink&);
+ void operator=(const Sink&);
+};
+
+// A Source is an interface that yields a sequence of bytes
+class Source {
+ public:
+ Source() { }
+ virtual ~Source();
+
+ // Return the number of bytes left to read from the source
+ virtual size_t Available() const = 0;
+
+ // Peek at the next flat region of the source. Does not reposition
+ // the source. The returned region is empty iff Available()==0.
+ //
+ // Returns a pointer to the beginning of the region and store its
+ // length in *len.
+ //
+ // The returned region is valid until the next call to Skip() or
+ // until this object is destroyed, whichever occurs first.
+ //
+ // The returned region may be larger than Available() (for example
+ // if this ByteSource is a view on a substring of a larger source).
+ // The caller is responsible for ensuring that it only reads the
+ // Available() bytes.
+ virtual const char* Peek(size_t* len) = 0;
+
+ // Skip the next n bytes. Invalidates any buffer returned by
+ // a previous call to Peek().
+ // REQUIRES: Available() >= n
+ virtual void Skip(size_t n) = 0;
+
+ private:
+ // No copying
+ Source(const Source&);
+ void operator=(const Source&);
+};
+
+// A Source implementation that yields the contents of a flat array
+class ByteArraySource : public Source {
+ public:
+ ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
+ virtual ~ByteArraySource();
+ virtual size_t Available() const;
+ virtual const char* Peek(size_t* len);
+ virtual void Skip(size_t n);
+ private:
+ const char* ptr_;
+ size_t left_;
+};
+
+// A Sink implementation that writes to a flat array without any bound checks.
+class UncheckedByteArraySink : public Sink {
+ public:
+ explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
+ virtual ~UncheckedByteArraySink();
+ virtual void Append(const char* data, size_t n);
+ virtual char* GetAppendBuffer(size_t len, char* scratch);
+
+ // Return the current output pointer so that a caller can see how
+ // many bytes were produced.
+ // Note: this is not a Sink method.
+ char* CurrentDestination() const { return dest_; }
+ private:
+ char* dest_;
+};
+
+
+}
+
+#endif // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
diff --git a/third_party/snappy/snappy-stubs-internal.cc b/third_party/snappy/snappy-stubs-internal.cc
new file mode 100755
index 00000000000..6ed334371f1
--- /dev/null
+++ b/third_party/snappy/snappy-stubs-internal.cc
@@ -0,0 +1,42 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <string>
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+
+void Varint::Append32(string* s, uint32 value) {
+ char buf[Varint::kMax32];
+ const char* p = Varint::Encode32(buf, value);
+ s->append(buf, p - buf);
+}
+
+} // namespace snappy
diff --git a/third_party/snappy/snappy-stubs-internal.h b/third_party/snappy/snappy-stubs-internal.h
new file mode 100755
index 00000000000..355a06bc568
--- /dev/null
+++ b/third_party/snappy/snappy-stubs-internal.h
@@ -0,0 +1,478 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various stubs for the open-source version of Snappy.
+
+#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
+#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <iostream>
+#include <string>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_SYS_MMAN
+#include <sys/mman.h>
+#endif
+
+#include "snappy-stubs-public.h"
+
+#if defined(__x86_64__)
+
+// Enable 64-bit optimized versions of some routines.
+#define ARCH_K8 1
+
+#endif
+
+// Needed by OS X, among others.
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// Pull in std::min, std::ostream, and the likes. This is safe because this
+// header file is never used from any public header files.
+using namespace std;
+
+// The size of an array, if known at compile-time.
+// Will give unexpected results if used on a pointer.
+// We undefine it first, since some compilers already have a definition.
+#ifdef ARRAYSIZE
+#undef ARRAYSIZE
+#endif
+#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+
+// Static prediction hints.
+#ifdef HAVE_BUILTIN_EXPECT
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) x
+#define PREDICT_TRUE(x) x
+#endif
+
+// This is only used for recomputing the tag byte table used during
+// decompression; for simplicity we just remove it from the open-source
+// version (anyone who wants to regenerate it can just do the call
+// themselves within main()).
+#define DEFINE_bool(flag_name, default_value, description) \
+ bool FLAGS_ ## flag_name = default_value;
+#define DECLARE_bool(flag_name) \
+ extern bool FLAGS_ ## flag_name;
+#define REGISTER_MODULE_INITIALIZER(name, code)
+
+namespace snappy {
+
+static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
+static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+
+// Logging.
+
+#define LOG(level) LogMessage()
+#define VLOG(level) true ? (void)0 : \
+ snappy::LogMessageVoidify() & snappy::LogMessage()
+
+class LogMessage {
+ public:
+ LogMessage() { }
+ ~LogMessage() {
+ cerr << endl;
+ }
+
+ LogMessage& operator<<(const std::string& msg) {
+ cerr << msg;
+ return *this;
+ }
+ LogMessage& operator<<(int x) {
+ cerr << x;
+ return *this;
+ }
+};
+
+// Asserts, both versions activated in debug mode only,
+// and ones that are always active.
+
+#define CRASH_UNLESS(condition) \
+ PREDICT_TRUE(condition) ? (void)0 : \
+ snappy::LogMessageVoidify() & snappy::LogMessageCrash()
+
+class LogMessageCrash : public LogMessage {
+ public:
+ LogMessageCrash() { }
+ ~LogMessageCrash() {
+ cerr << endl;
+ abort();
+ }
+};
+
+// This class is used to explicitly ignore values in the conditional
+// logging macros. This avoids compiler warnings like "value computed
+// is not used" and "statement has no effect".
+
+class LogMessageVoidify {
+ public:
+ LogMessageVoidify() { }
+ // This has to be an operator with a precedence lower than << but
+ // higher than ?:
+ void operator&(const LogMessage&) { }
+};
+
+#define CHECK(cond) CRASH_UNLESS(cond)
+#define CHECK_LE(a, b) CRASH_UNLESS((a) <= (b))
+#define CHECK_GE(a, b) CRASH_UNLESS((a) >= (b))
+#define CHECK_EQ(a, b) CRASH_UNLESS((a) == (b))
+#define CHECK_NE(a, b) CRASH_UNLESS((a) != (b))
+#define CHECK_LT(a, b) CRASH_UNLESS((a) < (b))
+#define CHECK_GT(a, b) CRASH_UNLESS((a) > (b))
+
+#ifdef NDEBUG
+
+#define DCHECK(cond) CRASH_UNLESS(true)
+#define DCHECK_LE(a, b) CRASH_UNLESS(true)
+#define DCHECK_GE(a, b) CRASH_UNLESS(true)
+#define DCHECK_EQ(a, b) CRASH_UNLESS(true)
+#define DCHECK_NE(a, b) CRASH_UNLESS(true)
+#define DCHECK_LT(a, b) CRASH_UNLESS(true)
+#define DCHECK_GT(a, b) CRASH_UNLESS(true)
+
+#else
+
+#define DCHECK(cond) CHECK(cond)
+#define DCHECK_LE(a, b) CHECK_LE(a, b)
+#define DCHECK_GE(a, b) CHECK_GE(a, b)
+#define DCHECK_EQ(a, b) CHECK_EQ(a, b)
+#define DCHECK_NE(a, b) CHECK_NE(a, b)
+#define DCHECK_LT(a, b) CHECK_LT(a, b)
+#define DCHECK_GT(a, b) CHECK_GT(a, b)
+
+#endif
+
+// Potentially unaligned loads and stores.
+
+#if 1
+//#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__) || defined(_WIN32)
+
+#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
+
+#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
+#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint16 UNALIGNED_LOAD16(const void *p) {
+ uint16 t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline uint32 UNALIGNED_LOAD32(const void *p) {
+ uint32 t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+ uint64 t;
+ memcpy(&t, p, sizeof t);
+ return t;
+}
+
+inline void UNALIGNED_STORE16(void *p, uint16 v) {
+ memcpy(p, &v, sizeof v);
+}
+
+inline void UNALIGNED_STORE32(void *p, uint32 v) {
+ memcpy(p, &v, sizeof v);
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+ memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+// The following guarantees declaration of the byte swap functions.
+#ifdef WORDS_BIGENDIAN
+
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define bswap_16(x) _byteswap_ushort(x)
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+#define bswap_16(x) OSSwapInt16(x)
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#else
+#include <byteswap.h>
+#endif
+
+#endif // WORDS_BIGENDIAN
+
+// Convert to little-endian storage, opposite of network format.
+// Convert x from host to little endian: x = LittleEndian.FromHost(x);
+// convert x from little endian to host: x = LittleEndian.ToHost(x);
+//
+// Store values into unaligned memory converting to little endian order:
+// LittleEndian.Store16(p, x);
+//
+// Load unaligned values stored in little endian converting to host order:
+// x = LittleEndian.Load16(p);
+class LittleEndian {
+ public:
+ // Conversion functions.
+#ifdef WORDS_BIGENDIAN
+
+ static uint16 FromHost16(uint16 x) { return bswap_16(x); }
+ static uint16 ToHost16(uint16 x) { return bswap_16(x); }
+
+ static uint32 FromHost32(uint32 x) { return bswap_32(x); }
+ static uint32 ToHost32(uint32 x) { return bswap_32(x); }
+
+ static bool IsLittleEndian() { return false; }
+
+#else // !defined(WORDS_BIGENDIAN)
+
+ static uint16 FromHost16(uint16 x) { return x; }
+ static uint16 ToHost16(uint16 x) { return x; }
+
+ static uint32 FromHost32(uint32 x) { return x; }
+ static uint32 ToHost32(uint32 x) { return x; }
+
+ static bool IsLittleEndian() { return true; }
+
+#endif // !defined(WORDS_BIGENDIAN)
+
+ // Functions to do unaligned loads and stores in little-endian order.
+ static uint16 Load16(const void *p) {
+ return ToHost16(UNALIGNED_LOAD16(p));
+ }
+
+ static void Store16(void *p, uint16 v) {
+ UNALIGNED_STORE16(p, FromHost16(v));
+ }
+
+ static uint32 Load32(const void *p) {
+ return ToHost32(UNALIGNED_LOAD32(p));
+ }
+
+ static void Store32(void *p, uint32 v) {
+ UNALIGNED_STORE32(p, FromHost32(v));
+ }
+};
+
+// Some bit-manipulation functions.
+class Bits {
+ public:
+ // Return floor(log2(n)) for positive integer n. Returns -1 iff n == 0.
+ static int Log2Floor(uint32 n);
+
+ // Return the first set least / most significant bit, 0-indexed. Returns an
+ // undefined value if n == 0. FindLSBSetNonZero() is similar to ffs() except
+ // that it's 0-indexed.
+ static int FindLSBSetNonZero(uint32 n);
+ static int FindLSBSetNonZero64(uint64 n);
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(Bits);
+};
+
+#ifdef HAVE_BUILTIN_CTZ
+
+inline int Bits::Log2Floor(uint32 n) {
+ return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+}
+
+inline int Bits::FindLSBSetNonZero(uint32 n) {
+ return __builtin_ctz(n);
+}
+
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
+ return __builtin_ctzll(n);
+}
+
+#else // Portable versions.
+
+inline int Bits::Log2Floor(uint32 n) {
+ if (n == 0)
+ return -1;
+ int log = 0;
+ uint32 value = n;
+ for (int i = 4; i >= 0; --i) {
+ int shift = (1 << i);
+ uint32 x = value >> shift;
+ if (x != 0) {
+ value = x;
+ log += shift;
+ }
+ }
+ assert(value == 1);
+ return log;
+}
+
+inline int Bits::FindLSBSetNonZero(uint32 n) {
+ int rc = 31;
+ for (int i = 4, shift = 1 << 4; i >= 0; --i) {
+ const uint32 x = n << shift;
+ if (x != 0) {
+ n = x;
+ rc -= shift;
+ }
+ shift >>= 1;
+ }
+ return rc;
+}
+
+// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
+ const uint32 bottombits = static_cast<uint32>(n);
+ if (bottombits == 0) {
+ // Bottom bits are zero, so scan in top bits
+ return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
+ } else {
+ return FindLSBSetNonZero(bottombits);
+ }
+}
+
+#endif // End portable versions.
+
+// Variable-length integer encoding.
+class Varint {
+ public:
+ // Maximum lengths of varint encoding of uint32.
+ static const int kMax32 = 5;
+
+ // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
+ // Never reads a character at or beyond limit. If a valid/terminated varint32
+ // was found in the range, stores it in *OUTPUT and returns a pointer just
+ // past the last byte of the varint32. Else returns NULL. On success,
+ // "result <= limit".
+ static const char* Parse32WithLimit(const char* ptr, const char* limit,
+ uint32* OUTPUT);
+
+ // REQUIRES "ptr" points to a buffer of length sufficient to hold "v".
+ // EFFECTS Encodes "v" into "ptr" and returns a pointer to the
+ // byte just past the last encoded byte.
+ static char* Encode32(char* ptr, uint32 v);
+
+ // EFFECTS Appends the varint representation of "value" to "*s".
+ static void Append32(string* s, uint32 value);
+};
+
+inline const char* Varint::Parse32WithLimit(const char* p,
+ const char* l,
+ uint32* OUTPUT) {
+ const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+ const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
+ uint32 b, result;
+ if (ptr >= limit) return NULL;
+ b = *(ptr++); result = b & 127; if (b < 128) goto done;
+ if (ptr >= limit) return NULL;
+ b = *(ptr++); result |= (b & 127) << 7; if (b < 128) goto done;
+ if (ptr >= limit) return NULL;
+ b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
+ if (ptr >= limit) return NULL;
+ b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
+ if (ptr >= limit) return NULL;
+ b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
+ return NULL; // Value is too long to be a varint32
+ done:
+ *OUTPUT = result;
+ return reinterpret_cast<const char*>(ptr);
+}
+
+inline char* Varint::Encode32(char* sptr, uint32 v) {
+ // Operate on characters as unsigneds
+ unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
+ static const int B = 128;
+ if (v < (1<<7)) {
+ *(ptr++) = v;
+ } else if (v < (1<<14)) {
+ *(ptr++) = v | B;
+ *(ptr++) = v>>7;
+ } else if (v < (1<<21)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = v>>14;
+ } else if (v < (1<<28)) {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = (v>>14) | B;
+ *(ptr++) = v>>21;
+ } else {
+ *(ptr++) = v | B;
+ *(ptr++) = (v>>7) | B;
+ *(ptr++) = (v>>14) | B;
+ *(ptr++) = (v>>21) | B;
+ *(ptr++) = v>>28;
+ }
+ return reinterpret_cast<char*>(ptr);
+}
+
+// If you know the internal layout of the std::string in use, you can
+// replace this function with one that resizes the string without
+// filling the new space with zeros (if applicable) --
+// it will be non-portable but faster.
+inline void STLStringResizeUninitialized(string* s, size_t new_size) {
+ s->resize(new_size);
+}
+
+// Return a mutable char* pointing to a string's internal buffer,
+// which may not be null-terminated. Writing through this pointer will
+// modify the string.
+//
+// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
+// next call to a string method that invalidates iterators.
+//
+// As of 2006-04, there is no standard-blessed way of getting a
+// mutable reference to a string's internal buffer. However, issue 530
+// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
+// proposes this as the method. It will officially be part of the standard
+// for C++0x. This should already work on all current implementations.
+inline char* string_as_array(string* str) {
+ return str->empty() ? NULL : &*str->begin();
+}
+
+} // namespace snappy
+
+#endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
diff --git a/third_party/snappy/snappy-stubs-public.h b/third_party/snappy/snappy-stubs-public.h
new file mode 100755
index 00000000000..074d4638866
--- /dev/null
+++ b/third_party/snappy/snappy-stubs-public.h
@@ -0,0 +1,85 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: sesse@google.com (Steinar H. Gunderson)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various type stubs for the open-source version of Snappy.
+//
+// This file cannot include config.h, as it is included from snappy.h,
+// which is a public header. Instead, snappy-stubs-public.h is generated by
+// from snappy-stubs-public.h.in at configure time.
+
+#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+
+#if !defined(_WIN32)
+#include <stdint.h>
+#endif
+
+#if 1
+#include <stddef.h>
+#endif
+
+#define SNAPPY_MAJOR 1
+#define SNAPPY_MINOR 0
+#define SNAPPY_PATCHLEVEL 3
+#define SNAPPY_VERSION \
+ ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
+
+#include <string>
+
+namespace snappy {
+
+#if !defined(_WIN32)
+typedef int8_t int8;
+typedef uint8_t uint8;
+typedef int16_t int16;
+typedef uint16_t uint16;
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int64_t int64;
+typedef uint64_t uint64;
+#else
+typedef signed char int8;
+typedef unsigned char uint8;
+typedef short int16;
+typedef unsigned short uint16;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+#endif
+
+typedef std::string string;
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+ TypeName(const TypeName&); \
+ void operator=(const TypeName&)
+
+} // namespace snappy
+
+#endif // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
diff --git a/third_party/snappy/snappy.cc b/third_party/snappy/snappy.cc
new file mode 100755
index 00000000000..fdc67e886c6
--- /dev/null
+++ b/third_party/snappy/snappy.cc
@@ -0,0 +1,1026 @@
+// Copyright 2005 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "snappy.h"
+#include "snappy-internal.h"
+#include "snappy-sinksource.h"
+
+#include <stdio.h>
+
+#include <algorithm>
+#include <string>
+#include <vector>
+
+
+namespace snappy {
+
+// Any hash function will produce a valid compressed bitstream, but a good
+// hash function reduces the number of collisions and thus yields better
+// compression for compressible input, and more speed for incompressible
+// input. Of course, it doesn't hurt if the hash function is reasonably fast
+// either, as it gets called a lot.
+static inline uint32 HashBytes(uint32 bytes, int shift) {
+ uint32 kMul = 0x1e35a7bd;
+ return (bytes * kMul) >> shift;
+}
+static inline uint32 Hash(const char* p, int shift) {
+ return HashBytes(UNALIGNED_LOAD32(p), shift);
+}
+
+size_t MaxCompressedLength(size_t source_len) {
+ // Compressed data can be defined as:
+ // compressed := item* literal*
+ // item := literal* copy
+ //
+ // The trailing literal sequence has a space blowup of at most 62/60
+ // since a literal of length 60 needs one tag byte + one extra byte
+ // for length information.
+ //
+ // Item blowup is trickier to measure. Suppose the "copy" op copies
+ // 4 bytes of data. Because of a special check in the encoding code,
+ // we produce a 4-byte copy only if the offset is < 65536. Therefore
+ // the copy op takes 3 bytes to encode, and this type of item leads
+ // to at most the 62/60 blowup for representing literals.
+ //
+ // Suppose the "copy" op copies 5 bytes of data. If the offset is big
+ // enough, it will take 5 bytes to encode the copy op. Therefore the
+ // worst case here is a one-byte literal followed by a five-byte copy.
+ // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
+ //
+ // This last factor dominates the blowup, so the final estimate is:
+ return 32 + source_len + source_len/6;
+}
+
+enum {
+ LITERAL = 0,
+ COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode
+ COPY_2_BYTE_OFFSET = 2,
+ COPY_4_BYTE_OFFSET = 3
+};
+
+// Copy "len" bytes from "src" to "op", one byte at a time. Used for
+// handling COPY operations where the input and output regions may
+// overlap. For example, suppose:
+// src == "ab"
+// op == src + 2
+// len == 20
+// After IncrementalCopy(src, op, len), the result will have
+// eleven copies of "ab"
+// ababababababababababab
+// Note that this does not match the semantics of either memcpy()
+// or memmove().
+static inline void IncrementalCopy(const char* src, char* op, int len) {
+ DCHECK_GT(len, 0);
+ do {
+ *op++ = *src++;
+ } while (--len > 0);
+}
+
+// Equivalent to IncrementalCopy except that it can write up to ten extra
+// bytes after the end of the copy, and that it is faster.
+//
+// The main part of this loop is a simple copy of eight bytes at a time until
+// we've copied (at least) the requested amount of bytes. However, if op and
+// src are less than eight bytes apart (indicating a repeating pattern of
+// length < 8), we first need to expand the pattern in order to get the correct
+// results. For instance, if the buffer looks like this, with the eight-byte
+// <src> and <op> patterns marked as intervals:
+//
+// abxxxxxxxxxxxx
+// [------] src
+// [------] op
+//
+// a single eight-byte copy from <src> to <op> will repeat the pattern once,
+// after which we can move <op> two bytes without moving <src>:
+//
+// ababxxxxxxxxxx
+// [------] src
+// [------] op
+//
+// and repeat the exercise until the two no longer overlap.
+//
+// This allows us to do very well in the special case of one single byte
+// repeated many times, without taking a big hit for more general cases.
+//
+// The worst case of extra writing past the end of the match occurs when
+// op - src == 1 and len == 1; the last copy will read from byte positions
+// [0..7] and write to [4..11], whereas it was only supposed to write to
+// position 1. Thus, ten excess bytes.
+
+namespace {
+
+const int kMaxIncrementCopyOverflow = 10;
+
+} // namespace
+
+static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
+ while (op - src < 8) {
+ UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+ len -= op - src;
+ op += op - src;
+ }
+ while (len > 0) {
+ UNALIGNED_STORE64(op, UNALIGNED_LOAD64(src));
+ src += 8;
+ op += 8;
+ len -= 8;
+ }
+}
+
+static inline char* EmitLiteral(char* op,
+ const char* literal,
+ int len,
+ bool allow_fast_path) {
+ int n = len - 1; // Zero-length literals are disallowed
+ if (n < 60) {
+ // Fits in tag byte
+ *op++ = LITERAL | (n << 2);
+
+ // The vast majority of copies are below 16 bytes, for which a
+ // call to memcpy is overkill. This fast path can sometimes
+ // copy up to 15 bytes too much, but that is okay in the
+ // main loop, since we have a bit to go on for both sides:
+ //
+ // - The input will always have kInputMarginBytes = 15 extra
+ // available bytes, as long as we're in the main loop, and
+ // if not, allow_fast_path = false.
+ // - The output will always have 32 spare bytes (see
+ // MaxCompressedLength).
+ if (allow_fast_path && len <= 16) {
+ UNALIGNED_STORE64(op, UNALIGNED_LOAD64(literal));
+ UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(literal + 8));
+ return op + len;
+ }
+ } else {
+ // Encode in upcoming bytes
+ char* base = op;
+ int count = 0;
+ op++;
+ while (n > 0) {
+ *op++ = n & 0xff;
+ n >>= 8;
+ count++;
+ }
+ assert(count >= 1);
+ assert(count <= 4);
+ *base = LITERAL | ((59+count) << 2);
+ }
+ memcpy(op, literal, len);
+ return op + len;
+}
+
+static inline char* EmitCopyLessThan64(char* op, int offset, int len) {
+ DCHECK_LE(len, 64);
+ DCHECK_GE(len, 4);
+ DCHECK_LT(offset, 65536);
+
+ if ((len < 12) && (offset < 2048)) {
+ int len_minus_4 = len - 4;
+ assert(len_minus_4 < 8); // Must fit in 3 bits
+ *op++ = COPY_1_BYTE_OFFSET | ((len_minus_4) << 2) | ((offset >> 8) << 5);
+ *op++ = offset & 0xff;
+ } else {
+ *op++ = COPY_2_BYTE_OFFSET | ((len-1) << 2);
+ LittleEndian::Store16(op, offset);
+ op += 2;
+ }
+ return op;
+}
+
+static inline char* EmitCopy(char* op, int offset, int len) {
+ // Emit 64 byte copies but make sure to keep at least four bytes reserved
+ while (len >= 68) {
+ op = EmitCopyLessThan64(op, offset, 64);
+ len -= 64;
+ }
+
+ // Emit an extra 60 byte copy if have too much data to fit in one copy
+ if (len > 64) {
+ op = EmitCopyLessThan64(op, offset, 60);
+ len -= 60;
+ }
+
+ // Emit remainder
+ op = EmitCopyLessThan64(op, offset, len);
+ return op;
+}
+
+
+bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
+ uint32 v = 0;
+ const char* limit = start + n;
+ if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
+ *result = v;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+namespace internal {
+uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
+ // Use smaller hash table when input.size() is smaller, since we
+ // fill the table, incurring O(hash table size) overhead for
+ // compression, and if the input is short, we won't need that
+ // many hash table entries anyway.
+ assert(kMaxHashTableSize >= 256);
+ int htsize = 256;
+ while (htsize < kMaxHashTableSize && htsize < input_size) {
+ htsize <<= 1;
+ }
+ CHECK_EQ(0, htsize & (htsize - 1)) << ": must be power of two";
+ CHECK_LE(htsize, kMaxHashTableSize) << ": hash table too large";
+
+ uint16* table;
+ if (htsize <= ARRAYSIZE(small_table_)) {
+ table = small_table_;
+ } else {
+ if (large_table_ == NULL) {
+ large_table_ = new uint16[kMaxHashTableSize];
+ }
+ table = large_table_;
+ }
+
+ *table_size = htsize;
+ memset(table, 0, htsize * sizeof(*table));
+ return table;
+}
+} // end namespace internal
+
+#if defined(_WIN32)
+// signed/unsigned mismatch
+# pragma warning( disable : 4244 )
+#endif
+
+// For 0 <= offset <= 4, GetUint32AtOffset(UNALIGNED_LOAD64(p), offset) will
+// equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have
+// empirically found that overlapping loads such as
+// UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
+// are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
+static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
+ DCHECK(0 <= offset && offset <= 4) << offset;
+ return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
+}
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input" is at most "kBlockSize" bytes long.
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input.size())" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+namespace internal {
+char* CompressFragment(const char* const input,
+ const size_t input_size,
+ char* op,
+ uint16* table,
+ const int table_size) {
+ // "ip" is the input pointer, and "op" is the output pointer.
+ const char* ip = input;
+ CHECK_LE(input_size, kBlockSize);
+ CHECK_EQ(table_size & (table_size - 1), 0) << ": table must be power of two";
+ const int shift = 32 - Bits::Log2Floor(table_size);
+ DCHECK_EQ(kuint32max >> shift, table_size - 1);
+ const char* ip_end = input + input_size;
+ const char* base_ip = ip;
+ // Bytes in [next_emit, ip) will be emitted as literal bytes. Or
+ // [next_emit, ip_end) after the main loop.
+ const char* next_emit = ip;
+
+ const int kInputMarginBytes = 15;
+ if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
+ const char* ip_limit = input + input_size - kInputMarginBytes;
+
+ for (uint32 next_hash = Hash(++ip, shift); ; ) {
+ DCHECK_LT(next_emit, ip);
+ // The body of this loop calls EmitLiteral once and then EmitCopy one or
+ // more times. (The exception is that when we're close to exhausting
+ // the input we goto emit_remainder.)
+ //
+ // In the first iteration of this loop we're just starting, so
+ // there's nothing to copy, so calling EmitLiteral once is
+ // necessary. And we only start a new iteration when the
+ // current iteration has determined that a call to EmitLiteral will
+ // precede the next call to EmitCopy (if any).
+ //
+ // Step 1: Scan forward in the input looking for a 4-byte-long match.
+ // If we get close to exhausting the input then goto emit_remainder.
+ //
+ // Heuristic match skipping: If 32 bytes are scanned with no matches
+ // found, start looking only at every other byte. If 32 more bytes are
+ // scanned, look at every third byte, etc.. When a match is found,
+ // immediately go back to looking at every byte. This is a small loss
+ // (~5% performance, ~0.1% density) for compressible data due to more
+ // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
+ // win since the compressor quickly "realizes" the data is incompressible
+ // and doesn't bother looking for matches everywhere.
+ //
+ // The "skip" variable keeps track of how many bytes there are since the
+ // last match; dividing it by 32 (ie. right-shifting by five) gives the
+ // number of bytes to move ahead for each iteration.
+ uint32 skip = 32;
+
+ const char* next_ip = ip;
+ const char* candidate;
+ do {
+ ip = next_ip;
+ uint32 hash = next_hash;
+ DCHECK_EQ(hash, Hash(ip, shift));
+ uint32 bytes_between_hash_lookups = skip++ >> 5;
+ next_ip = ip + bytes_between_hash_lookups;
+ if (PREDICT_FALSE(next_ip > ip_limit)) {
+ goto emit_remainder;
+ }
+ next_hash = Hash(next_ip, shift);
+ candidate = base_ip + table[hash];
+ DCHECK_GE(candidate, base_ip);
+ DCHECK_LT(candidate, ip);
+
+ table[hash] = ip - base_ip;
+ } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
+ UNALIGNED_LOAD32(candidate)));
+
+ // Step 2: A 4-byte match has been found. We'll later see if more
+ // than 4 bytes match. But, prior to the match, input
+ // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes."
+ DCHECK_LE(next_emit + 16, ip_end);
+ op = EmitLiteral(op, next_emit, ip - next_emit, true);
+
+ // Step 3: Call EmitCopy, and then see if another EmitCopy could
+ // be our next move. Repeat until we find no match for the
+ // input immediately after what was consumed by the last EmitCopy call.
+ //
+ // If we exit this loop normally then we need to call EmitLiteral next,
+ // though we don't yet know how big the literal will be. We handle that
+ // by proceeding to the next iteration of the main loop. We also can exit
+ // this loop via goto if we get close to exhausting the input.
+ uint64 input_bytes = 0;
+ uint32 candidate_bytes = 0;
+
+ do {
+ // We have a 4-byte match at ip, and no need to emit any
+ // "literal bytes" prior to ip.
+ const char* base = ip;
+ int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
+ ip += matched;
+ int offset = base - candidate;
+ DCHECK_EQ(0, memcmp(base, candidate, matched));
+ op = EmitCopy(op, offset, matched);
+ // We could immediately start working at ip now, but to improve
+ // compression we first update table[Hash(ip - 1, ...)].
+ const char* insert_tail = ip - 1;
+ next_emit = ip;
+ if (PREDICT_FALSE(ip >= ip_limit)) {
+ goto emit_remainder;
+ }
+ input_bytes = UNALIGNED_LOAD64(insert_tail);
+ uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
+ table[prev_hash] = ip - base_ip - 1;
+ uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
+ candidate = base_ip + table[cur_hash];
+ candidate_bytes = UNALIGNED_LOAD32(candidate);
+ table[cur_hash] = ip - base_ip;
+ } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
+
+ next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
+ ++ip;
+ }
+ }
+
+ emit_remainder:
+ // Emit the remaining bytes as a literal
+ if (next_emit < ip_end) {
+ op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
+ }
+
+ return op;
+}
+} // end namespace internal
+
+// Signature of output types needed by decompression code.
+// The decompression code is templatized on a type that obeys this
+// signature so that we do not pay virtual function call overhead in
+// the middle of a tight decompression loop.
+//
+// class DecompressionWriter {
+// public:
+// // Called before decompression
+// void SetExpectedLength(size_t length);
+//
+// // Called after decompression
+// bool CheckLength() const;
+//
+// // Called repeatedly during decompression
+// bool Append(const char* ip, uint32 length, bool allow_fast_path);
+// bool AppendFromSelf(uint32 offset, uint32 length);
+// };
+//
+// "allow_fast_path" is a parameter that says if there is at least 16
+// readable bytes in "ip". It is currently only used by SnappyArrayWriter.
+
+// -----------------------------------------------------------------------
+// Lookup table for decompression code. Generated by ComputeTable() below.
+// -----------------------------------------------------------------------
+
+// Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
+static const uint32 wordmask[] = {
+ 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
+};
+
+// Data stored per entry in lookup table:
+// Range Bits-used Description
+// ------------------------------------
+// 1..64 0..7 Literal/copy length encoded in opcode byte
+// 0..7 8..10 Copy offset encoded in opcode byte / 256
+// 0..4 11..13 Extra bytes after opcode
+//
+// We use eight bits for the length even though 7 would have sufficed
+// because of efficiency reasons:
+// (1) Extracting a byte is faster than a bit-field
+// (2) It properly aligns copy offset so we do not need a <<8
+static const uint16 char_table[256] = {
+ 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
+ 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
+ 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
+ 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
+ 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
+ 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
+ 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
+ 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
+ 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
+ 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
+ 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
+ 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
+ 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
+ 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
+ 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
+ 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
+ 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
+ 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
+ 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
+ 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
+ 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
+ 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
+ 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
+ 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
+ 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
+ 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
+ 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
+ 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
+ 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
+ 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
+ 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
+ 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
+};
+
+// In debug mode, allow optional computation of the table at startup.
+// Also, check that the decompression table is correct.
+#ifndef NDEBUG
+DEFINE_bool(snappy_dump_decompression_table, false,
+ "If true, we print the decompression table at startup.");
+
+static uint16 MakeEntry(unsigned int extra,
+ unsigned int len,
+ unsigned int copy_offset) {
+ // Check that all of the fields fit within the allocated space
+ DCHECK_EQ(extra, extra & 0x7); // At most 3 bits
+ DCHECK_EQ(copy_offset, copy_offset & 0x7); // At most 3 bits
+ DCHECK_EQ(len, len & 0x7f); // At most 7 bits
+ return len | (copy_offset << 8) | (extra << 11);
+}
+
+static void ComputeTable() {
+ uint16 dst[256];
+
+ // Place invalid entries in all places to detect missing initialization
+ int assigned = 0;
+ for (int i = 0; i < 256; i++) {
+ dst[i] = 0xffff;
+ }
+
+ // Small LITERAL entries. We store (len-1) in the top 6 bits.
+ for (unsigned int len = 1; len <= 60; len++) {
+ dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
+ assigned++;
+ }
+
+ // Large LITERAL entries. We use 60..63 in the high 6 bits to
+ // encode the number of bytes of length info that follow the opcode.
+ for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
+ // We set the length field in the lookup table to 1 because extra
+ // bytes encode len-1.
+ dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
+ assigned++;
+ }
+
+ // COPY_1_BYTE_OFFSET.
+ //
+ // The tag byte in the compressed data stores len-4 in 3 bits, and
+ // offset/256 in 5 bits. offset%256 is stored in the next byte.
+ //
+ // This format is used for length in range [4..11] and offset in
+ // range [0..2047]
+ for (unsigned int len = 4; len < 12; len++) {
+ for (unsigned int offset = 0; offset < 2048; offset += 256) {
+ dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
+ MakeEntry(1, len, offset>>8);
+ assigned++;
+ }
+ }
+
+ // COPY_2_BYTE_OFFSET.
+ // Tag contains len-1 in top 6 bits, and offset in next two bytes.
+ for (unsigned int len = 1; len <= 64; len++) {
+ dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
+ assigned++;
+ }
+
+ // COPY_4_BYTE_OFFSET.
+ // Tag contents len-1 in top 6 bits, and offset in next four bytes.
+ for (unsigned int len = 1; len <= 64; len++) {
+ dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
+ assigned++;
+ }
+
+ // Check that each entry was initialized exactly once.
+ CHECK_EQ(assigned, 256);
+ for (int i = 0; i < 256; i++) {
+ CHECK_NE(dst[i], 0xffff);
+ }
+
+ if (FLAGS_snappy_dump_decompression_table) {
+ printf("static const uint16 char_table[256] = {\n ");
+ for (int i = 0; i < 256; i++) {
+ printf("0x%04x%s",
+ dst[i],
+ ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", ")));
+ }
+ printf("};\n");
+ }
+
+ // Check that computed table matched recorded table
+ for (int i = 0; i < 256; i++) {
+ CHECK_EQ(dst[i], char_table[i]);
+ }
+}
+REGISTER_MODULE_INITIALIZER(snappy, ComputeTable());
+#endif /* !NDEBUG */
+
+// Helper class for decompression
+class SnappyDecompressor {
+ private:
+ Source* reader_; // Underlying source of bytes to decompress
+ const char* ip_; // Points to next buffered byte
+ const char* ip_limit_; // Points just past buffered bytes
+ uint32 peeked_; // Bytes peeked from reader (need to skip)
+ bool eof_; // Hit end of input without an error?
+ char scratch_[5]; // Temporary buffer for PeekFast() boundaries
+
+ // Ensure that all of the tag metadata for the next tag is available
+ // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even
+ // if (ip_limit_ - ip_ < 5).
+ //
+ // Returns true on success, false on error or end of input.
+ bool RefillTag();
+
+ public:
+ explicit SnappyDecompressor(Source* reader)
+ : reader_(reader),
+ ip_(NULL),
+ ip_limit_(NULL),
+ peeked_(0),
+ eof_(false) {
+ }
+
+ ~SnappyDecompressor() {
+ // Advance past any bytes we peeked at from the reader
+ reader_->Skip(peeked_);
+ }
+
+ // Returns true iff we have hit the end of the input without an error.
+ bool eof() const {
+ return eof_;
+ }
+
+ // Read the uncompressed length stored at the start of the compressed data.
+ // On succcess, stores the length in *result and returns true.
+ // On failure, returns false.
+ bool ReadUncompressedLength(uint32* result) {
+ DCHECK(ip_ == NULL); // Must not have read anything yet
+ // Length is encoded in 1..5 bytes
+ *result = 0;
+ uint32 shift = 0;
+ while (true) {
+ if (shift >= 32) return false;
+ size_t n;
+ const char* ip = reader_->Peek(&n);
+ if (n == 0) return false;
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
+ reader_->Skip(1);
+ *result |= static_cast<uint32>(c & 0x7f) << shift;
+ if (c < 128) {
+ break;
+ }
+ shift += 7;
+ }
+ return true;
+ }
+
+ // Process the next item found in the input.
+ // Returns true if successful, false on error or end of input.
+ template <class Writer>
+ void DecompressAllTags(Writer* writer) {
+ const char* ip = ip_;
+ for ( ;; ) {
+ if (ip_limit_ - ip < 5) {
+ ip_ = ip;
+ if (!RefillTag()) return;
+ ip = ip_;
+ }
+
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
+ const uint32 entry = char_table[c];
+ const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
+ ip += entry >> 11;
+ const uint32 length = entry & 0xff;
+
+ if ((c & 0x3) == LITERAL) {
+ uint32 literal_length = length + trailer;
+ uint32 avail = ip_limit_ - ip;
+ while (avail < literal_length) {
+ bool allow_fast_path = (avail >= 16);
+ if (!writer->Append(ip, avail, allow_fast_path)) return;
+ literal_length -= avail;
+ reader_->Skip(peeked_);
+ size_t n;
+ ip = reader_->Peek(&n);
+ avail = n;
+ peeked_ = avail;
+ if (avail == 0) return; // Premature end of input
+ ip_limit_ = ip + avail;
+ }
+ bool allow_fast_path = (avail >= 16);
+ if (!writer->Append(ip, literal_length, allow_fast_path)) {
+ return;
+ }
+ ip += literal_length;
+ } else {
+ // copy_offset/256 is encoded in bits 8..10. By just fetching
+ // those bits, we get copy_offset (since the bit-field starts at
+ // bit 8).
+ const uint32 copy_offset = entry & 0x700;
+ if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
+ return;
+ }
+ }
+ }
+ }
+};
+
+bool SnappyDecompressor::RefillTag() {
+ const char* ip = ip_;
+ if (ip == ip_limit_) {
+ // Fetch a new fragment from the reader
+ reader_->Skip(peeked_); // All peeked bytes are used up
+ size_t n;
+ ip = reader_->Peek(&n);
+ peeked_ = n;
+ if (n == 0) {
+ eof_ = true;
+ return false;
+ }
+ ip_limit_ = ip + n;
+ }
+
+ // Read the tag character
+ DCHECK_LT(ip, ip_limit_);
+ const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
+ const uint32 entry = char_table[c];
+ const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c'
+ DCHECK_LE(needed, sizeof(scratch_));
+
+ // Read more bytes from reader if needed
+ uint32 nbuf = ip_limit_ - ip;
+ if (nbuf < needed) {
+ // Stitch together bytes from ip and reader to form the word
+ // contents. We store the needed bytes in "scratch_". They
+ // will be consumed immediately by the caller since we do not
+ // read more than we need.
+ memmove(scratch_, ip, nbuf);
+ reader_->Skip(peeked_); // All peeked bytes are used up
+ peeked_ = 0;
+ while (nbuf < needed) {
+ size_t length;
+ const char* src = reader_->Peek(&length);
+ if (length == 0) return false;
+ uint32 to_add = min<uint32>(needed - nbuf, length);
+ memcpy(scratch_ + nbuf, src, to_add);
+ nbuf += to_add;
+ reader_->Skip(to_add);
+ }
+ DCHECK_EQ(nbuf, needed);
+ ip_ = scratch_;
+ ip_limit_ = scratch_ + needed;
+ } else if (nbuf < 5) {
+ // Have enough bytes, but move into scratch_ so that we do not
+ // read past end of input
+ memmove(scratch_, ip, nbuf);
+ reader_->Skip(peeked_); // All peeked bytes are used up
+ peeked_ = 0;
+ ip_ = scratch_;
+ ip_limit_ = scratch_ + nbuf;
+ } else {
+ // Pass pointer to buffer returned by reader_.
+ ip_ = ip;
+ }
+ return true;
+}
+
+template <typename Writer>
+static bool InternalUncompress(Source* r,
+ Writer* writer,
+ uint32 max_len) {
+ // Read the uncompressed length from the front of the compressed input
+ SnappyDecompressor decompressor(r);
+ uint32 uncompressed_len = 0;
+ if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
+ // Protect against possible DoS attack
+ if (static_cast<uint64>(uncompressed_len) > max_len) {
+ return false;
+ }
+
+ writer->SetExpectedLength(uncompressed_len);
+
+ // Process the entire input
+ decompressor.DecompressAllTags(writer);
+ return (decompressor.eof() && writer->CheckLength());
+}
+
+bool GetUncompressedLength(Source* source, uint32* result) {
+ SnappyDecompressor decompressor(source);
+ return decompressor.ReadUncompressedLength(result);
+}
+
+size_t Compress(Source* reader, Sink* writer) {
+ size_t written = 0;
+ int N = reader->Available();
+ char ulength[Varint::kMax32];
+ char* p = Varint::Encode32(ulength, N);
+ writer->Append(ulength, p-ulength);
+ written += (p - ulength);
+
+ internal::WorkingMemory wmem;
+ char* scratch = NULL;
+ char* scratch_output = NULL;
+
+ while (N > 0) {
+ // Get next block to compress (without copying if possible)
+ size_t fragment_size;
+ const char* fragment = reader->Peek(&fragment_size);
+ DCHECK_NE(fragment_size, 0) << ": premature end of input";
+ const int num_to_read = min(N, kBlockSize);
+ size_t bytes_read = fragment_size;
+
+ int pending_advance = 0;
+ if (bytes_read >= num_to_read) {
+ // Buffer returned by reader is large enough
+ pending_advance = num_to_read;
+ fragment_size = num_to_read;
+ } else {
+ // Read into scratch buffer
+ if (scratch == NULL) {
+ // If this is the last iteration, we want to allocate N bytes
+ // of space, otherwise the max possible kBlockSize space.
+ // num_to_read contains exactly the correct value
+ scratch = new char[num_to_read];
+ }
+ memcpy(scratch, fragment, bytes_read);
+ reader->Skip(bytes_read);
+
+ while (bytes_read < num_to_read) {
+ fragment = reader->Peek(&fragment_size);
+ size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
+ memcpy(scratch + bytes_read, fragment, n);
+ bytes_read += n;
+ reader->Skip(n);
+ }
+ DCHECK_EQ(bytes_read, num_to_read);
+ fragment = scratch;
+ fragment_size = num_to_read;
+ }
+ DCHECK_EQ(fragment_size, num_to_read);
+
+ // Get encoding table for compression
+ int table_size;
+ uint16* table = wmem.GetHashTable(num_to_read, &table_size);
+
+ // Compress input_fragment and append to dest
+ const int max_output = MaxCompressedLength(num_to_read);
+
+ // Need a scratch buffer for the output, in case the byte sink doesn't
+ // have room for us directly.
+ if (scratch_output == NULL) {
+ scratch_output = new char[max_output];
+ } else {
+ // Since we encode kBlockSize regions followed by a region
+ // which is <= kBlockSize in length, a previously allocated
+ // scratch_output[] region is big enough for this iteration.
+ }
+ char* dest = writer->GetAppendBuffer(max_output, scratch_output);
+ char* end = internal::CompressFragment(fragment, fragment_size,
+ dest, table, table_size);
+ writer->Append(dest, end - dest);
+ written += (end - dest);
+
+ N -= num_to_read;
+ reader->Skip(pending_advance);
+ }
+
+ delete[] scratch;
+ delete[] scratch_output;
+
+ return written;
+}
+
+// -----------------------------------------------------------------------
+// Flat array interfaces
+// -----------------------------------------------------------------------
+
+// A type that writes to a flat array.
+// Note that this is not a "ByteSink", but a type that matches the
+// Writer template argument to SnappyDecompressor::DecompressAllTags().
+class SnappyArrayWriter {
+ private:
+ char* base_;
+ char* op_;
+ char* op_limit_;
+
+ public:
+ inline explicit SnappyArrayWriter(char* dst)
+ : base_(dst),
+ op_(dst) {
+ }
+
+ inline void SetExpectedLength(size_t len) {
+ op_limit_ = op_ + len;
+ }
+
+ inline bool CheckLength() const {
+ return op_ == op_limit_;
+ }
+
+ inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
+ char* op = op_;
+ const int space_left = op_limit_ - op;
+ if (allow_fast_path && len <= 16 && space_left >= 16) {
+ // Fast path, used for the majority (about 90%) of dynamic invocations.
+ UNALIGNED_STORE64(op, UNALIGNED_LOAD64(ip));
+ UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(ip + 8));
+ } else {
+ if (space_left < len) {
+ return false;
+ }
+ memcpy(op, ip, len);
+ }
+ op_ = op + len;
+ return true;
+ }
+
+ inline bool AppendFromSelf(uint32 offset, uint32 len) {
+ char* op = op_;
+ const int space_left = op_limit_ - op;
+
+ if (op - base_ <= offset - 1u) { // -1u catches offset==0
+ return false;
+ }
+ if (len <= 16 && offset >= 8 && space_left >= 16) {
+ // Fast path, used for the majority (70-80%) of dynamic invocations.
+ UNALIGNED_STORE64(op, UNALIGNED_LOAD64(op - offset));
+ UNALIGNED_STORE64(op + 8, UNALIGNED_LOAD64(op - offset + 8));
+ } else {
+ if (space_left >= len + kMaxIncrementCopyOverflow) {
+ IncrementalCopyFastPath(op - offset, op, len);
+ } else {
+ if (space_left < len) {
+ return false;
+ }
+ IncrementalCopy(op - offset, op, len);
+ }
+ }
+
+ op_ = op + len;
+ return true;
+ }
+};
+
+bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
+ ByteArraySource reader(compressed, n);
+ return RawUncompress(&reader, uncompressed);
+}
+
+bool RawUncompress(Source* compressed, char* uncompressed) {
+ SnappyArrayWriter output(uncompressed);
+ return InternalUncompress(compressed, &output, kuint32max);
+}
+
+bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
+ size_t ulength;
+ if (!GetUncompressedLength(compressed, n, &ulength)) {
+ return false;
+ }
+ // Protect against possible DoS attack
+ if ((static_cast<uint64>(ulength) + uncompressed->size()) >
+ uncompressed->max_size()) {
+ return false;
+ }
+ STLStringResizeUninitialized(uncompressed, ulength);
+ return RawUncompress(compressed, n, string_as_array(uncompressed));
+}
+
+
+// A Writer that drops everything on the floor and just does validation
+class SnappyDecompressionValidator {
+ private:
+ size_t expected_;
+ size_t produced_;
+
+ public:
+ inline SnappyDecompressionValidator() : produced_(0) { }
+ inline void SetExpectedLength(size_t len) {
+ expected_ = len;
+ }
+ inline bool CheckLength() const {
+ return expected_ == produced_;
+ }
+ inline bool Append(const char* ip, uint32 len, bool allow_fast_path) {
+ produced_ += len;
+ return produced_ <= expected_;
+ }
+ inline bool AppendFromSelf(uint32 offset, uint32 len) {
+ if (produced_ <= offset - 1u) return false; // -1u catches offset==0
+ produced_ += len;
+ return produced_ <= expected_;
+ }
+};
+
+bool IsValidCompressedBuffer(const char* compressed, size_t n) {
+ ByteArraySource reader(compressed, n);
+ SnappyDecompressionValidator writer;
+ return InternalUncompress(&reader, &writer, kuint32max);
+}
+
+void RawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length) {
+ ByteArraySource reader(input, input_length);
+ UncheckedByteArraySink writer(compressed);
+ Compress(&reader, &writer);
+
+ // Compute how many bytes were added
+ *compressed_length = (writer.CurrentDestination() - compressed);
+}
+
+size_t Compress(const char* input, size_t input_length, string* compressed) {
+ // Pre-grow the buffer to the max length of the compressed output
+ compressed->resize(MaxCompressedLength(input_length));
+
+ size_t compressed_length;
+ RawCompress(input, input_length, string_as_array(compressed),
+ &compressed_length);
+ compressed->resize(compressed_length);
+ return compressed_length;
+}
+
+
+} // end namespace snappy
+
diff --git a/third_party/snappy/snappy.h b/third_party/snappy/snappy.h
new file mode 100755
index 00000000000..8d6ef2294f5
--- /dev/null
+++ b/third_party/snappy/snappy.h
@@ -0,0 +1,155 @@
+// Copyright 2005 and onwards Google Inc.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// A light-weight compression algorithm. It is designed for speed of
+// compression and decompression, rather than for the utmost in space
+// savings.
+//
+// For getting better compression ratios when you are compressing data
+// with long repeated sequences or compressing data that is similar to
+// other data, while still compressing fast, you might look at first
+// using BMDiff and then compressing the output of BMDiff with
+// Snappy.
+
+#ifndef UTIL_SNAPPY_SNAPPY_H__
+#define UTIL_SNAPPY_SNAPPY_H__
+
+#include <stddef.h>
+#include <string>
+
+#include "snappy-stubs-public.h"
+
+namespace snappy {
+ class Source;
+ class Sink;
+
+ // ------------------------------------------------------------------------
+ // Generic compression/decompression routines.
+ // ------------------------------------------------------------------------
+
+ // Compress the bytes read from "*source" and append to "*sink". Return the
+ // number of bytes written.
+ size_t Compress(Source* source, Sink* sink);
+
+ bool GetUncompressedLength(Source* source, uint32* result);
+
+ // ------------------------------------------------------------------------
+ // Higher-level string based routines (should be sufficient for most users)
+ // ------------------------------------------------------------------------
+
+ // Sets "*output" to the compressed version of "input[0,input_length-1]".
+ // Original contents of *output are lost.
+ //
+ // REQUIRES: "input[]" is not an alias of "*output".
+ size_t Compress(const char* input, size_t input_length, string* output);
+
+ // Decompresses "compressed[0,compressed_length-1]" to "*uncompressed".
+ // Original contents of "*uncompressed" are lost.
+ //
+ // REQUIRES: "compressed[]" is not an alias of "*uncompressed".
+ //
+ // returns false if the message is corrupted and could not be decompressed
+ bool Uncompress(const char* compressed, size_t compressed_length,
+ string* uncompressed);
+
+
+ // ------------------------------------------------------------------------
+ // Lower-level character array based routines. May be useful for
+ // efficiency reasons in certain circumstances.
+ // ------------------------------------------------------------------------
+
+ // REQUIRES: "compressed" must point to an area of memory that is at
+ // least "MaxCompressedLength(input_length)" bytes in length.
+ //
+ // Takes the data stored in "input[0..input_length]" and stores
+ // it in the array pointed to by "compressed".
+ //
+ // "*compressed_length" is set to the length of the compressed output.
+ //
+ // Example:
+ // char* output = new char[snappy::MaxCompressedLength(input_length)];
+ // size_t output_length;
+ // RawCompress(input, input_length, output, &output_length);
+ // ... Process(output, output_length) ...
+ // delete [] output;
+ void RawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length);
+
+ // Given data in "compressed[0..compressed_length-1]" generated by
+ // calling the Snappy::Compress routine, this routine
+ // stores the uncompressed data to
+ // uncompressed[0..GetUncompressedLength(compressed)-1]
+ // returns false if the message is corrupted and could not be decrypted
+ bool RawUncompress(const char* compressed, size_t compressed_length,
+ char* uncompressed);
+
+ // Given data from the byte source 'compressed' generated by calling
+ // the Snappy::Compress routine, this routine stores the uncompressed
+ // data to
+ // uncompressed[0..GetUncompressedLength(compressed,compressed_length)-1]
+ // returns false if the message is corrupted and could not be decrypted
+ bool RawUncompress(Source* compressed, char* uncompressed);
+
+ // Returns the maximal size of the compressed representation of
+ // input data that is "source_bytes" bytes in length;
+ size_t MaxCompressedLength(size_t source_bytes);
+
+ // REQUIRES: "compressed[]" was produced by RawCompress() or Compress()
+ // Returns true and stores the length of the uncompressed data in
+ // *result normally. Returns false on parsing error.
+ // This operation takes O(1) time.
+ bool GetUncompressedLength(const char* compressed, size_t compressed_length,
+ size_t* result);
+
+ // Returns true iff the contents of "compressed[]" can be uncompressed
+ // successfully. Does not return the uncompressed data. Takes
+ // time proportional to compressed_length, but is usually at least
+ // a factor of four faster than actual decompression.
+ bool IsValidCompressedBuffer(const char* compressed,
+ size_t compressed_length);
+
+ // *** DO NOT CHANGE THE VALUE OF kBlockSize ***
+ //
+ // New Compression code chops up the input into blocks of at most
+ // the following size. This ensures that back-references in the
+ // output never cross kBlockSize block boundaries. This can be
+ // helpful in implementing blocked decompression. However the
+ // decompression code should not rely on this guarantee since older
+ // compression code may not obey it.
+ static const int kBlockLog = 15;
+ static const int kBlockSize = 1 << kBlockLog;
+
+ static const int kMaxHashTableBits = 14;
+ static const int kMaxHashTableSize = 1 << kMaxHashTableBits;
+
+} // end namespace snappy
+
+
+#endif // UTIL_SNAPPY_SNAPPY_H__
diff --git a/tools/bridge.cpp b/tools/bridge.cpp
index f7518a17ad4..341a1dae687 100644
--- a/tools/bridge.cpp
+++ b/tools/bridge.cpp
@@ -88,7 +88,7 @@ set<MessagingPort*> ports;
class MyListener : public Listener {
public:
- MyListener( int port ) : Listener( "", port ) {}
+ MyListener( int port ) : Listener( "bridge" , "", port ) {}
virtual void accepted(MessagingPort *mp) {
ports.insert( mp );
Forwarder f( *mp );
diff --git a/tools/export.cpp b/tools/export.cpp
index fb32a9e58ff..c3a5420438d 100644
--- a/tools/export.cpp
+++ b/tools/export.cpp
@@ -45,6 +45,73 @@ public:
_usesstdout = false;
}
+ // Turn every double quote character into two double quote characters
+ // If hasSurroundingQuotes is true, doesn't escape the first and last
+ // characters of the string, if it's false, add a double quote character
+ // around the whole string.
+ string csvEscape(string str, bool hasSurroundingQuotes = false) {
+ size_t index = hasSurroundingQuotes ? 1 : 0;
+ while (((index = str.find('"', index)) != string::npos)
+ && (index < (hasSurroundingQuotes ? str.size() - 1 : str.size()))) {
+ str.replace(index, 1, "\"\"");
+ index += 2;
+ }
+ return hasSurroundingQuotes ? str : "\"" + str + "\"";
+ }
+
+ // Gets the string representation of a BSON object that can be correctly written to a CSV file
+ string csvString (const BSONElement& object) {
+ const char* binData; // Only used with BinData type
+
+ switch (object.type()) {
+ case MinKey:
+ return "$MinKey";
+ case MaxKey:
+ return "$MaxKey";
+ case NumberInt:
+ case NumberDouble:
+ case NumberLong:
+ case Bool:
+ return object.toString(false);
+ case String:
+ case Symbol:
+ return csvEscape(object.toString(false), true);
+ case Object:
+ return csvEscape(object.jsonString(Strict, false));
+ case Array:
+ return csvEscape(object.jsonString(Strict, false));
+ case BinData:
+ int len;
+ binData = object.binDataClean(len);
+ return toHex(binData, len);
+ case jstOID:
+ return "ObjectID(" + object.OID().toString() + ")"; // OIDs are always 24 bytes
+ case Date:
+ return timeToISOString(object.Date() / 1000);
+ case Timestamp:
+ return csvEscape(object.jsonString(Strict, false));
+ case RegEx:
+ return csvEscape("/" + string(object.regex()) + "/" + string(object.regexFlags()));
+ case Code:
+ return csvEscape(object.toString(false));
+ case CodeWScope:
+ if (string(object.codeWScopeScopeData()) == "") {
+ return csvEscape(object.toString(false));
+ } else {
+ return csvEscape(object.jsonString(Strict, false));
+ }
+ case EOO:
+ case Undefined:
+ case DBRef:
+ case jstNULL:
+ cerr << "Invalid BSON object type for CSV output: " << object.type() << endl;
+ return "";
+ }
+ // Can never get here
+ assert(false);
+ return "";
+ }
+
int run() {
string ns;
const bool csv = hasParam( "csv" );
@@ -137,7 +204,7 @@ public:
out << ",";
const BSONElement & e = obj.getFieldDotted(i->c_str());
if ( ! e.eoo() ) {
- out << e.jsonString( Strict , false );
+ out << csvString(e);
}
}
out << endl;
diff --git a/tools/import.cpp b/tools/import.cpp
index c7a18b940ec..16980b05fbb 100644
--- a/tools/import.cpp
+++ b/tools/import.cpp
@@ -27,6 +27,7 @@
#include <iostream>
#include <boost/program_options.hpp>
+#include <boost/algorithm/string.hpp>
using namespace mongo;
@@ -44,100 +45,215 @@ class Import : public Tool {
bool _doimport;
bool _jsonArray;
vector<string> _upsertFields;
+ static const int BUF_SIZE = 1024 * 1024 * 4;
+
+ string trimWhitespace(const string& str) {
+ if (str.size() == 0) {
+ return str;
+ }
+ size_t begin = 0;
+ size_t end = str.size() - 1;
+ while (begin < str.size() && isspace(str[begin])) { ++begin; } // Finds index of first non-whitespace character
+ while (end > 0 && isspace(str[end])) { --end; } // Finds index of last non-whitespace character
+ return str.substr(begin, end - begin + 1);
+ }
+
+ void csvTokenizeRow(const string& row, vector<string>& tokens) {
+ bool inQuotes = false;
+ bool prevWasQuote = false;
+ bool tokenQuoted = false;
+ string curtoken = "";
+ for (string::const_iterator it = row.begin(); it != row.end(); ++it) {
+ char element = *it;
+ if (element == '"') {
+ if (!inQuotes) {
+ inQuotes = true;
+ tokenQuoted = true;
+ curtoken = "";
+ } else {
+ if (prevWasQuote) {
+ curtoken += "\"";
+ prevWasQuote = false;
+ } else {
+ prevWasQuote = true;
+ }
+ }
+ } else {
+ if (inQuotes && prevWasQuote) {
+ inQuotes = false;
+ prevWasQuote = false;
+ tokens.push_back(curtoken);
+ }
+
+ if (element == ',' && !inQuotes) {
+ if (!tokenQuoted) { // If token was quoted, it's already been added
+ tokens.push_back(trimWhitespace(curtoken));
+ }
+ curtoken = "";
+ tokenQuoted = false;
+ } else {
+ curtoken += element;
+ }
+ }
+ }
+ if (!tokenQuoted || (inQuotes && prevWasQuote)) {
+ tokens.push_back(trimWhitespace(curtoken));
+ }
+ }
void _append( BSONObjBuilder& b , const string& fieldName , const string& data ) {
- if ( b.appendAsNumber( fieldName , data ) )
+ if ( _ignoreBlanks && data.size() == 0 )
return;
- if ( _ignoreBlanks && data.size() == 0 )
+ if ( b.appendAsNumber( fieldName , data ) )
return;
// TODO: other types?
- b.append( fieldName , data );
+ b.append ( fieldName , data );
+ }
+
+ /*
+ * Reads one line from in into buf.
+ * Returns the number of bytes that should be skipped - the caller should
+ * increment buf by this amount.
+ */
+ int getLine(istream* in, char* buf) {
+ if (_jsonArray) {
+ in->read(buf, BUF_SIZE);
+ uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit));
+ buf[ in->gcount() ] = '\0';
+ }
+ else {
+ in->getline( buf , BUF_SIZE );
+ log(1) << "got line:" << buf << endl;
+ }
+ uassert( 10263 , "unknown error reading file" ,
+ (!(in->rdstate() & ios_base::badbit)) &&
+ (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) );
+
+ int numBytesSkipped = 0;
+ if (strncmp("\xEF\xBB\xBF", buf, 3) == 0) { // UTF-8 BOM (notepad is stupid)
+ buf += 3;
+ numBytesSkipped += 3;
+ }
+
+ uassert(13289, "Invalid UTF8 character detected", isValidUTF8(buf));
+ return numBytesSkipped;
}
- BSONObj parseLine( char * line ) {
- uassert(13289, "Invalid UTF8 character detected", isValidUTF8(line));
+ /*
+ * Parses a BSON object out of a JSON array.
+ * Returns number of bytes processed on success and -1 on failure.
+ */
+ int parseJSONArray(char* buf, BSONObj& o) {
+ int len = 0;
+ while (buf[0] != '{' && buf[0] != '\0') {
+ len++;
+ buf++;
+ }
+ if (buf[0] == '\0')
+ return -1;
+
+ int jslen;
+ o = fromjson(buf, &jslen);
+ len += jslen;
- if ( _type == JSON ) {
+ return len;
+ }
+
+ /*
+ * Parses one object from the input file. This usually corresponds to one line in the input
+ * file, unless the file is a CSV and contains a newline within a quoted string entry.
+ * Returns a true if a BSONObj was successfully created and false if not.
+ */
+ bool parseRow(istream* in, BSONObj& o, int& numBytesRead) {
+ boost::scoped_array<char> buffer(new char[BUF_SIZE+2]);
+ char* line = buffer.get();
+
+ numBytesRead = getLine(in, line);
+ line += numBytesRead;
+
+ if (line[0] == '\0') {
+ return false;
+ }
+ numBytesRead += strlen( line );
+
+ if (_type == JSON) {
+ // Strip out trailing whitespace
char * end = ( line + strlen( line ) ) - 1;
- while ( isspace(*end) ) {
+ while ( end >= line && isspace(*end) ) {
*end = 0;
end--;
}
- return fromjson( line );
+ o = fromjson( line );
+ return true;
}
- BSONObjBuilder b;
+ vector<string> tokens;
+ if (_type == CSV) {
+ string row;
+ bool inside_quotes = false;
+ size_t last_quote = 0;
+ while (true) {
+ string lineStr(line);
+ // Deal with line breaks in quoted strings
+ last_quote = lineStr.find_first_of('"');
+ while (last_quote != string::npos) {
+ inside_quotes = !inside_quotes;
+ last_quote = lineStr.find_first_of('"', last_quote+1);
+ }
- unsigned int pos=0;
- while ( line[0] ) {
- string name;
- if ( pos < _fields.size() ) {
- name = _fields[pos];
+ row.append(lineStr);
+
+ if (inside_quotes) {
+ row.append("\n");
+ int num = getLine(in, line);
+ line += num;
+ numBytesRead += num;
+
+ uassert (15854, "CSV file ends while inside quoted field", line[0] != '\0');
+ numBytesRead += strlen( line );
+ } else {
+ break;
+ }
}
- else {
- stringstream ss;
- ss << "field" << pos;
- name = ss.str();
+ // now 'row' is string corresponding to one row of the CSV file
+ // (which may span multiple lines) and represents one BSONObj
+ csvTokenizeRow(row, tokens);
+ }
+ else { // _type == TSV
+ while (line[0] != '\t' && isspace(line[0])) { // Strip leading whitespace, but not tabs
+ line++;
}
- pos++;
-
- bool done = false;
- string data;
- char * end;
- if ( _type == CSV && line[0] == '"' ) {
- line++; //skip first '"'
-
- while (true) {
- end = strchr( line , '"' );
- if (!end) {
- data += line;
- done = true;
- break;
- }
- else if (end[1] == '"') {
- // two '"'s get appended as one
- data.append(line, end-line+1); //include '"'
- line = end+2; //skip both '"'s
- }
- else if (end[-1] == '\\') {
- // "\\\"" gets appended as '"'
- data.append(line, end-line-1); //exclude '\\'
- data.append("\"");
- line = end+1; //skip the '"'
- }
- else {
- data.append(line, end-line);
- line = end+2; //skip '"' and ','
- break;
- }
- }
+
+ boost::split(tokens, line, boost::is_any_of(_sep));
+ }
+
+ // Now that the row is tokenized, create a BSONObj out of it.
+ BSONObjBuilder b;
+ unsigned int pos=0;
+ for (vector<string>::iterator it = tokens.begin(); it != tokens.end(); ++it) {
+ string token = *it;
+ if ( _headerLine ) {
+ _fields.push_back(token);
}
else {
- end = strstr( line , _sep );
- if ( ! end ) {
- done = true;
- data = string( line );
+ string name;
+ if ( pos < _fields.size() ) {
+ name = _fields[pos];
}
else {
- data = string( line , end - line );
- line = end+1;
+ stringstream ss;
+ ss << "field" << pos;
+ name = ss.str();
}
- }
+ pos++;
- if ( _headerLine ) {
- while ( isspace( data[0] ) )
- data = data.substr( 1 );
- _fields.push_back( data );
+ _append( b , name , token );
}
- else
- _append( b , name , data );
-
- if ( done )
- break;
}
- return b.obj();
+ o = b.obj();
+ return true;
}
public:
@@ -255,68 +371,37 @@ public:
_jsonArray = true;
}
- int errors = 0;
-
- int num = 0;
-
time_t start = time(0);
-
log(1) << "filesize: " << fileSize << endl;
ProgressMeter pm( fileSize );
- const int BUF_SIZE = 1024 * 1024 * 4;
- boost::scoped_array<char> line(new char[BUF_SIZE+2]);
- char * buf = line.get();
- while ( _jsonArray || in->rdstate() == 0 ) {
- if (_jsonArray) {
- if (buf == line.get()) { //first pass
- in->read(buf, BUF_SIZE);
- uassert(13295, "JSONArray file too large", (in->rdstate() & ios_base::eofbit));
- buf[ in->gcount() ] = '\0';
- }
- }
- else {
- buf = line.get();
- in->getline( buf , BUF_SIZE );
- log(1) << "got line:" << buf << endl;
- }
- uassert( 10263 , "unknown error reading file" ,
- (!(in->rdstate() & ios_base::badbit)) &&
- (!(in->rdstate() & ios_base::failbit) || (in->rdstate() & ios_base::eofbit)) );
-
- int len = 0;
- if (strncmp("\xEF\xBB\xBF", buf, 3) == 0) { // UTF-8 BOM (notepad is stupid)
- buf += 3;
- len += 3;
- }
-
- if (_jsonArray) {
- while (buf[0] != '{' && buf[0] != '\0') {
- len++;
- buf++;
- }
- if (buf[0] == '\0')
- break;
- }
- else {
- while ((_type != TSV || buf[0] != '\t') && isspace( buf[0] )) {
- len++;
- buf++;
- }
- if (buf[0] == '\0')
- continue;
- len += strlen( buf );
- }
+ int num = 0;
+ int errors = 0;
+ int len = 0;
+ // buffer and line are only used when parsing a jsonArray
+ boost::scoped_array<char> buffer(new char[BUF_SIZE+2]);
+ char* line = buffer.get();
+ while ( _jsonArray || in->rdstate() == 0 ) {
try {
BSONObj o;
if (_jsonArray) {
- int jslen;
- o = fromjson(buf, &jslen);
- len += jslen;
- buf += jslen;
+ int bytesProcessed = 0;
+ if (line == buffer.get()) { // Only read on first pass - the whole array must be on one line.
+ bytesProcessed = getLine(in, line);
+ line += bytesProcessed;
+ len += bytesProcessed;
+ }
+ if ((bytesProcessed = parseJSONArray(line, o)) < 0) {
+ len += bytesProcessed;
+ break;
+ }
+ len += bytesProcessed;
+ line += len;
}
else {
- o = parseLine( buf );
+ if (!parseRow(in, o, len)) {
+ continue;
+ }
}
if ( _headerLine ) {
@@ -348,7 +433,7 @@ public:
}
catch ( std::exception& e ) {
cout << "exception:" << e.what() << endl;
- cout << buf << endl;
+ cout << line << endl;
errors++;
if (hasParam("stopOnError") || _jsonArray)
diff --git a/tools/restore.cpp b/tools/restore.cpp
index 3ff6a742d99..9adf90bd209 100644
--- a/tools/restore.cpp
+++ b/tools/restore.cpp
@@ -25,6 +25,7 @@
#include <boost/program_options.hpp>
#include <fcntl.h>
+#include <set>
using namespace mongo;
@@ -40,6 +41,7 @@ public:
bool _drop;
string _curns;
string _curdb;
+ set<string> _users; // For restoring users with --drop
Restore() : BSONTool( "restore" ) , _drop(false) {
add_options()
@@ -208,13 +210,31 @@ public:
out() << "\t going into namespace [" << ns << "]" << endl;
if ( _drop ) {
- out() << "\t dropping" << endl;
- conn().dropCollection( ns );
+ if (root.leaf() != "system.users.bson" ) {
+ out() << "\t dropping" << endl;
+ conn().dropCollection( ns );
+ } else {
+ // Create map of the users currently in the DB
+ BSONObj fields = BSON("user" << 1);
+ scoped_ptr<DBClientCursor> cursor(conn().query(ns, Query(), 0, 0, &fields));
+ while (cursor->more()) {
+ BSONObj user = cursor->next();
+ _users.insert(user["user"].String());
+ }
+ }
}
_curns = ns.c_str();
_curdb = NamespaceString(_curns).db;
processFile( root );
+ if (_drop && root.leaf() == "system.users.bson") {
+ // Delete any users that used to exist but weren't in the dump file
+ for (set<string>::iterator it = _users.begin(); it != _users.end(); ++it) {
+ BSONObj userMatch = BSON("user" << *it);
+ conn().remove(ns, Query(userMatch));
+ }
+ _users.clear();
+ }
}
virtual void gotObject( const BSONObj& obj ) {
@@ -260,7 +280,13 @@ public:
::abort();
}
}
- else {
+ else if (_drop && endsWith(_curns.c_str(), ".system.users") && _users.count(obj["user"].String())) {
+ // Since system collections can't be dropped, we have to manually
+ // replace the contents of the system.users collection
+ BSONObj userMatch = BSON("user" << obj["user"].String());
+ conn().update(_curns, Query(userMatch), obj);
+ _users.erase(obj["user"].String());
+ } else {
conn().insert( _curns , obj );
}
}
diff --git a/tools/tool.cpp b/tools/tool.cpp
index 98e18a9226a..d938e752041 100644
--- a/tools/tool.cpp
+++ b/tools/tool.cpp
@@ -380,8 +380,15 @@ namespace mongo {
if ( ! dbname.size() )
dbname = _db;
- if ( ! ( _username.size() || _password.size() ) )
+ if ( ! ( _username.size() || _password.size() ) ) {
+ // Make sure that we don't need authentication to connect to this db
+ // findOne throws an AssertionException if it's not authenticated.
+ if (_coll.size() > 0) {
+ // BSONTools don't have a collection
+ conn().findOne(getNS(), Query("{}"));
+ }
return;
+ }
string errmsg;
if ( _conn->auth( dbname , _username , _password , errmsg ) )
@@ -396,7 +403,7 @@ namespace mongo {
}
BSONTool::BSONTool( const char * name, DBAccess access , bool objcheck )
- : Tool( name , access , "" , "" ) , _objcheck( objcheck ) {
+ : Tool( name , access , "" , "" , false ) , _objcheck( objcheck ) {
add_options()
("objcheck" , "validate object before inserting" )
@@ -489,9 +496,9 @@ namespace mongo {
fclose( file );
uassert( 10265 , "counts don't match" , m.done() == fileLength );
- out() << "\t " << m.hits() << " objects found" << endl;
+ (_usesstdout ? cout : cerr ) << m.hits() << " objects found" << endl;
if ( _matcher.get() )
- out() << "\t " << processed << " objects processed" << endl;
+ (_usesstdout ? cout : cerr ) << processed << " objects processed" << endl;
return processed;
}
diff --git a/util/alignedbuilder.cpp b/util/alignedbuilder.cpp
index 732ef99c764..b2e0461b733 100644
--- a/util/alignedbuilder.cpp
+++ b/util/alignedbuilder.cpp
@@ -32,9 +32,30 @@ namespace mongo {
/** reset for a re-use. shrinks if > 128MB */
void AlignedBuilder::reset() {
_len = 0;
- const unsigned sizeCap = 128*1024*1024;
- if (_p._size > sizeCap)
- _realloc(sizeCap, _len);
+ RARELY {
+ const unsigned sizeCap = 128*1024*1024;
+ if (_p._size > sizeCap)
+ _realloc(sizeCap, _len);
+ }
+ }
+
+ /** reset with a hint as to the upcoming needed size specified */
+ void AlignedBuilder::reset(unsigned sz) {
+ _len = 0;
+ unsigned Q = 32 * 1024 * 1024 - 1;
+ unsigned want = (sz+Q) & (~Q);
+ if( _p._size == want ) {
+ return;
+ }
+ if( _p._size > want ) {
+ if( _p._size <= 64 * 1024 * 1024 )
+ return;
+ bool downsize = false;
+ RARELY { downsize = true; }
+ if( !downsize )
+ return;
+ }
+ _realloc(want, _len);
}
void AlignedBuilder::mallocSelfAligned(unsigned sz) {
@@ -52,10 +73,16 @@ namespace mongo {
/* "slow"/infrequent portion of 'grow()' */
void NOINLINE_DECL AlignedBuilder::growReallocate(unsigned oldLen) {
+ dassert( _len > _p._size );
unsigned a = _p._size;
assert( a );
while( 1 ) {
- a *= 2;
+ if( a < 128 * 1024 * 1024 )
+ a *= 2;
+ else if( sizeof(int*) == 4 )
+ a += 32 * 1024 * 1024;
+ else
+ a += 64 * 1024 * 1024;
DEV if( a > 256*1024*1024 ) {
log() << "dur AlignedBuilder too big, aborting in _DEBUG build" << endl;
abort();
diff --git a/util/alignedbuilder.h b/util/alignedbuilder.h
index 8760bfb9a44..1d246a9d78e 100644
--- a/util/alignedbuilder.h
+++ b/util/alignedbuilder.h
@@ -28,6 +28,9 @@ namespace mongo {
AlignedBuilder(unsigned init_size);
~AlignedBuilder() { kill(); }
+ /** reset with a hint as to the upcoming needed size specified */
+ void reset(unsigned sz);
+
/** reset for a re-use. shrinks if > 128MB */
void reset();
@@ -43,8 +46,12 @@ namespace mongo {
return l;
}
+ /** if buffer grows pointer no longer valid */
char* atOfs(unsigned ofs) { return _p._data + ofs; }
+ /** if buffer grows pointer no longer valid */
+ char* cur() { return _p._data + _len; }
+
void appendChar(char j) {
*((char*)grow(sizeof(char))) = j;
}
@@ -94,7 +101,7 @@ namespace mongo {
inline char* grow(unsigned by) {
unsigned oldlen = _len;
_len += by;
- if ( _len > _p._size ) {
+ if (MONGO_unlikely( _len > _p._size )) {
growReallocate(oldlen);
}
return _p._data + oldlen;
diff --git a/util/array.h b/util/array.h
index bf705a4d988..12822252fd7 100644
--- a/util/array.h
+++ b/util/array.h
@@ -18,6 +18,12 @@
namespace mongo {
+ /*
+ * simple array class that does no allocations
+ * same api as vector
+ * fixed buffer, so once capacity is exceeded, will assert
+ * meant to be-reused with clear()
+ */
template<typename T>
class FastArray {
public:
@@ -44,6 +50,7 @@ namespace mongo {
}
void push_back( const T& t ) {
+ assert( _size < _capacity );
_data[_size++] = t;
}
diff --git a/util/assert_util.cpp b/util/assert_util.cpp
index 52947bc02b8..da039c09a58 100644
--- a/util/assert_util.cpp
+++ b/util/assert_util.cpp
@@ -66,11 +66,23 @@ namespace mongo {
/* "warning" assert -- safe to continue, so we don't throw exception. */
NOINLINE_DECL void wasserted(const char *msg, const char *file, unsigned line) {
- problem() << "warning Assertion failure " << msg << ' ' << file << ' ' << dec << line << endl;
+ static bool rateLimited;
+ static time_t lastWhen;
+ static unsigned lastLine;
+ if( lastLine == line && time(0)-lastWhen < 5 ) {
+ if( rateLimited++ == 0 ) {
+ log() << "rate limiting wassert" << endl;
+ }
+ return;
+ }
+ lastWhen = time(0);
+ lastLine = line;
+
+ problem() << "warning assertion failure " << msg << ' ' << file << ' ' << dec << line << endl;
sayDbContext();
raiseError(0,msg && *msg ? msg : "wassertion failure");
assertionCount.condrollover( ++assertionCount.warning );
-#if defined(_DEBUG) || defined(_DURABLEDEFAULTON)
+#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) || defined(_DURABLEDEFAULTOFF)
// this is so we notice in buildbot
log() << "\n\n***aborting after wassert() failure in a debug/test build\n\n" << endl;
abort();
@@ -86,7 +98,7 @@ namespace mongo {
temp << "assertion " << file << ":" << line;
AssertionException e(temp.str(),0);
breakpoint();
-#if defined(_DEBUG) || defined(_DURABLEDEFAULTON)
+#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) || defined(_DURABLEDEFAULTOFF)
// this is so we notice in buildbot
log() << "\n\n***aborting after assert() failure in a debug/test build\n\n" << endl;
abort();
@@ -103,7 +115,7 @@ namespace mongo {
temp << msgid;
AssertionException e(temp.str(),0);
breakpoint();
-#if defined(_DEBUG) || defined(_DURABLEDEFAULTON)
+#if defined(_DEBUG) || defined(_DURABLEDEFAULTON) || defined(_DURABLEDEFAULTOFF)
// this is so we notice in buildbot
log() << "\n\n***aborting after verify() failure in a debug/test build\n\n" << endl;
abort();
diff --git a/util/assert_util.h b/util/assert_util.h
index 244fb2287e1..b4c68b7de34 100644
--- a/util/assert_util.h
+++ b/util/assert_util.h
@@ -175,15 +175,15 @@ namespace mongo {
#undef assert
#endif
-#define MONGO_assert(_Expression) (void)( (!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) )
+#define MONGO_assert(_Expression) (void)( MONGO_likely(!!(_Expression)) || (mongo::asserted(#_Expression, __FILE__, __LINE__), 0) )
#define assert MONGO_assert
/* "user assert". if asserts, user did something wrong, not our code */
-#define MONGO_uassert(msgid, msg, expr) (void)( (!!(expr)) || (mongo::uasserted(msgid, msg), 0) )
+#define MONGO_uassert(msgid, msg, expr) (void)( MONGO_likely(!!(expr)) || (mongo::uasserted(msgid, msg), 0) )
#define uassert MONGO_uassert
/* warning only - keeps going */
-#define MONGO_wassert(_Expression) (void)( (!!(_Expression)) || (mongo::wasserted(#_Expression, __FILE__, __LINE__), 0) )
+#define MONGO_wassert(_Expression) (void)( MONGO_likely(!!(_Expression)) || (mongo::wasserted(#_Expression, __FILE__, __LINE__), 0) )
#define wassert MONGO_wassert
/* display a message, no context, and throw assertionexception
@@ -191,7 +191,7 @@ namespace mongo {
easy way to throw an exception and log something without our stack trace
display happening.
*/
-#define MONGO_massert(msgid, msg, expr) (void)( (!!(expr)) || (mongo::msgasserted(msgid, msg), 0) )
+#define MONGO_massert(msgid, msg, expr) (void)( MONGO_likely(!!(expr)) || (mongo::msgasserted(msgid, msg), 0) )
#define massert MONGO_massert
/* dassert is 'debug assert' -- might want to turn off for production as these
diff --git a/util/bufreader.h b/util/bufreader.h
index a0dcefa8d83..53f0ba744e2 100644
--- a/util/bufreader.h
+++ b/util/bufreader.h
@@ -28,6 +28,7 @@ namespace mongo {
public:
class eof : public std::exception {
public:
+ eof() { }
virtual const char * what() { return "BufReader eof"; }
};
@@ -88,6 +89,7 @@ namespace mongo {
}
const void* pos() { return _pos; }
+ const void* start() { return _start; }
private:
const void *_start;
diff --git a/util/compress.cpp b/util/compress.cpp
new file mode 100644
index 00000000000..bcde488b88b
--- /dev/null
+++ b/util/compress.cpp
@@ -0,0 +1,31 @@
+// @file compress.cpp
+
+#include "../third_party/snappy/snappy.h"
+#include "compress.h"
+#include <string>
+#include <string.h>
+#include <assert.h>
+
+namespace mongo {
+
+ void rawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length)
+ {
+ snappy::RawCompress(input, input_length, compressed, compressed_length);
+ }
+
+ size_t maxCompressedLength(size_t source_len) {
+ return snappy::MaxCompressedLength(source_len);
+ }
+
+ size_t compress(const char* input, size_t input_length, std::string* output) {
+ return snappy::Compress(input, input_length, output);
+ }
+
+ bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed) {
+ return snappy::Uncompress(compressed, compressed_length, uncompressed);
+ }
+
+}
diff --git a/util/compress.h b/util/compress.h
new file mode 100644
index 00000000000..5bc5a3392bb
--- /dev/null
+++ b/util/compress.h
@@ -0,0 +1,21 @@
+// @file compress.h
+
+#pragma once
+
+#include <string>
+
+namespace mongo {
+
+ size_t compress(const char* input, size_t input_length, std::string* output);
+
+ bool uncompress(const char* compressed, size_t compressed_length, std::string* uncompressed);
+
+ size_t maxCompressedLength(size_t source_len);
+ void rawCompress(const char* input,
+ size_t input_length,
+ char* compressed,
+ size_t* compressed_length);
+
+}
+
+
diff --git a/util/concurrency/mutex.h b/util/concurrency/mutex.h
index 44c2ebee0ea..6ca76570cbf 100644
--- a/util/concurrency/mutex.h
+++ b/util/concurrency/mutex.h
@@ -24,6 +24,8 @@
namespace mongo {
+ void printStackTrace( ostream &o );
+
class mutex;
inline boost::xtime incxtimemillis( long long s ) {
@@ -86,6 +88,16 @@ namespace mongo {
class scoped_lock : boost::noncopyable {
public:
#if defined(_DEBUG)
+ struct PostStaticCheck {
+ PostStaticCheck() {
+ if ( StaticObserver::_destroyingStatics ) {
+ cout << "trying to lock a mongo::mutex during static shutdown" << endl;
+ printStackTrace( cout );
+ }
+ }
+ };
+
+ PostStaticCheck _check;
mongo::mutex * const _mut;
#endif
scoped_lock( mongo::mutex &m ) :
diff --git a/util/concurrency/race.h b/util/concurrency/race.h
index 924d6d2fc5a..6be13363a6f 100644
--- a/util/concurrency/race.h
+++ b/util/concurrency/race.h
@@ -7,6 +7,12 @@ namespace mongo {
namespace race {
+#ifdef _WIN32
+ typedef unsigned threadId_t;
+#else
+ typedef pthread_t threadId_t;
+#endif
+
#if defined(_DEBUG)
class Block {
diff --git a/util/concurrency/rwlock.h b/util/concurrency/rwlock.h
index d14774b4ece..c281e54ecf0 100644
--- a/util/concurrency/rwlock.h
+++ b/util/concurrency/rwlock.h
@@ -38,20 +38,22 @@ namespace mongo {
DEV mutexDebugger.leaving(_name);
RWLockBase::unlock();
}
+
+ void lock_shared() { RWLockBase::lock_shared(); }
+ void unlock_shared() { RWLockBase::unlock_shared(); }
+
void lockAsUpgradable() { RWLockBase::lockAsUpgradable(); }
void unlockFromUpgradable() { // upgradable -> unlocked
RWLockBase::unlockFromUpgradable();
}
void upgrade() { // upgradable -> exclusive lock
RWLockBase::upgrade();
- DEV mutexDebugger.entering(_name);
}
- void lock_shared() { RWLockBase::lock_shared(); }
- void unlock_shared() { RWLockBase::unlock_shared(); }
+
bool lock_shared_try( int millis ) { return RWLockBase::lock_shared_try(millis); }
+
bool lock_try( int millis = 0 ) {
if( RWLockBase::lock_try(millis) ) {
- DEV mutexDebugger.entering(_name);
return true;
}
return false;
diff --git a/util/concurrency/synchronization.cpp b/util/concurrency/synchronization.cpp
index 0ddc417eff1..ce2547c25eb 100644
--- a/util/concurrency/synchronization.cpp
+++ b/util/concurrency/synchronization.cpp
@@ -43,6 +43,7 @@ namespace mongo {
NotifyAll::NotifyAll() : _mutex("NotifyAll") {
_lastDone = 0;
_lastReturned = 0;
+ _nWaiting = 0;
}
NotifyAll::When NotifyAll::now() {
@@ -52,6 +53,7 @@ namespace mongo {
void NotifyAll::waitFor(When e) {
scoped_lock lock( _mutex );
+ ++_nWaiting;
while( _lastDone < e ) {
_condition.wait( lock.boost() );
}
@@ -59,6 +61,7 @@ namespace mongo {
void NotifyAll::awaitBeyondNow() {
scoped_lock lock( _mutex );
+ ++_nWaiting;
When e = ++_lastReturned;
while( _lastDone <= e ) {
_condition.wait( lock.boost() );
@@ -68,6 +71,7 @@ namespace mongo {
void NotifyAll::notifyAll(When e) {
scoped_lock lock( _mutex );
_lastDone = e;
+ _nWaiting = 0;
_condition.notify_all();
}
diff --git a/util/concurrency/synchronization.h b/util/concurrency/synchronization.h
index 2467292616f..a0e89f7246b 100644
--- a/util/concurrency/synchronization.h
+++ b/util/concurrency/synchronization.h
@@ -65,16 +65,21 @@ namespace mongo {
*/
void waitFor(When);
+ /** a bit faster than waitFor( now() ) */
void awaitBeyondNow();
/** may be called multiple times. notifies all waiters */
void notifyAll(When);
+ /** indicates how many threads are waiting for a notify. */
+ unsigned nWaiting() const { return _nWaiting; }
+
private:
mongo::mutex _mutex;
boost::condition _condition;
When _lastDone;
When _lastReturned;
+ unsigned _nWaiting;
};
} // namespace mongo
diff --git a/util/file.h b/util/file.h
index 826a905b90e..368e6927b43 100644
--- a/util/file.h
+++ b/util/file.h
@@ -47,6 +47,9 @@ namespace mongo {
fileofs len() { return 0; }
void fsync() { assert(false); }
+ // shrink file to size bytes. No-op if file already smaller.
+ void truncate(fileofs size);
+
/** @return -1 if error or unavailable */
static boost::intmax_t freeSpace(const string &path) { assert(false); return -1; }
};
@@ -57,10 +60,11 @@ namespace mongo {
class File : public FileInterface {
HANDLE fd;
bool _bad;
+ string _name;
void err(BOOL b=false) { /* false = error happened */
if( !b && !_bad ) {
_bad = true;
- log() << "File I/O error " << GetLastError() << '\n';
+ log() << "File " << _name << "I/O error " << GetLastError() << '\n';
}
}
public:
@@ -73,6 +77,7 @@ namespace mongo {
fd = INVALID_HANDLE_VALUE;
}
void open(const char *filename, bool readOnly=false , bool direct=false) {
+ _name = filename;
fd = CreateFile(
toNativeString(filename).c_str(),
( readOnly ? 0 : GENERIC_WRITE ) | GENERIC_READ, FILE_SHARE_WRITE|FILE_SHARE_READ,
@@ -123,6 +128,20 @@ namespace mongo {
return li.QuadPart;
}
void fsync() { FlushFileBuffers(fd); }
+
+ void truncate(fileofs size) {
+ if (len() <= size)
+ return;
+
+ LARGE_INTEGER li;
+ li.QuadPart = size;
+ if (SetFilePointerEx(fd, li, NULL, FILE_BEGIN) == 0){
+ err(false);
+ return; //couldn't seek
+ }
+
+ err(SetEndOfFile(fd));
+ }
};
#else
@@ -194,6 +213,13 @@ namespace mongo {
assert( !statvfs( path.c_str() , &info ) );
return boost::intmax_t( info.f_bavail ) * info.f_frsize;
}
+
+ void truncate(fileofs size) {
+ if (len() <= size)
+ return;
+
+ err(ftruncate(fd, size) == 0);
+ }
};
diff --git a/util/file_allocator.cpp b/util/file_allocator.cpp
index bf01d90865f..b0572f971bd 100644
--- a/util/file_allocator.cpp
+++ b/util/file_allocator.cpp
@@ -287,8 +287,8 @@ namespace mongo {
if ( fd > 0 )
close( fd );
log() << "error failed to allocate new file: " << name
- << " size: " << size << ' ' << errnoWithDescription() << endl;
- log() << " will try again in 10 seconds" << endl;
+ << " size: " << size << ' ' << errnoWithDescription() << warnings;
+ log() << " will try again in 10 seconds" << endl; // not going to warning logs
try {
if ( tmp.size() )
BOOST_CHECK_EXCEPTION( boost::filesystem::remove( tmp ) );
diff --git a/util/goodies.h b/util/goodies.h
index 51a80f6783c..65bfbaba982 100644
--- a/util/goodies.h
+++ b/util/goodies.h
@@ -109,6 +109,8 @@ namespace mongo {
// PRINTFL; prints file:line
#define MONGO_PRINTFL cout << __FILE__ ":" << __LINE__ << endl
#define PRINTFL MONGO_PRINTFL
+#define MONGO_FLOG log() << __FILE__ ":" << __LINE__ << endl
+#define FLOG MONGO_FLOG
#undef assert
#define assert MONGO_assert
diff --git a/util/log.h b/util/log.h
index b49d960c41d..d5c7e55aae0 100644
--- a/util/log.h
+++ b/util/log.h
@@ -298,6 +298,9 @@ namespace mongo {
}
public:
static Logstream& get() {
+ if ( StaticObserver::_destroyingStatics ) {
+ cout << "Logstream::get called in uninitialized state" << endl;
+ }
Logstream *p = tsp.get();
if( p == 0 )
tsp.reset( p = new Logstream() );
@@ -342,7 +345,7 @@ namespace mongo {
return Logstream::get().prolog();
}
-#define MONGO_LOG(level) MONGO_IF ( logLevel >= (level) ) log( level )
+#define MONGO_LOG(level) if ( MONGO_unlikely(logLevel >= (level)) ) log( level )
#define LOG MONGO_LOG
inline Nullstream& log( LogLevel l ) {
@@ -517,4 +520,6 @@ namespace mongo {
}
};
+ extern Tee* const warnings; // Things put here go in serverStatus
+
} // namespace mongo
diff --git a/util/logfile.cpp b/util/logfile.cpp
index 37e14b47678..609edb8fe2d 100644
--- a/util/logfile.cpp
+++ b/util/logfile.cpp
@@ -77,9 +77,18 @@ namespace mongo {
CloseHandle(_fd);
}
+ void LogFile::truncate() {
+ verify(15870, _fd != INVALID_HANDLE_VALUE);
+
+ if (!SetEndOfFile(_fd)){
+ msgasserted(15871, "Couldn't truncate file: " + errnoWithDescription());
+ }
+ }
+
void LogFile::synchronousAppend(const void *_buf, size_t _len) {
const size_t BlockSize = 8 * 1024 * 1024;
assert(_fd);
+ assert(_len % 4096 == 0);
const char *buf = (const char *) _buf;
size_t left = _len;
while( left ) {
@@ -88,7 +97,7 @@ namespace mongo {
if( !WriteFile(_fd, buf, toWrite, &written, NULL) ) {
DWORD e = GetLastError();
if( e == 87 )
- msgasserted(13519, "error 87 appending to file - misaligned direct write?");
+ msgasserted(13519, "error 87 appending to file - invalid parameter");
else
uasserted(13517, str::stream() << "error appending to file " << _name << ' ' << _len << ' ' << toWrite << ' ' << errnoWithDescription(e));
}
@@ -150,8 +159,20 @@ namespace mongo {
_fd = -1;
}
+ void LogFile::truncate() {
+ verify(15872, _fd >= 0);
+
+ BOOST_STATIC_ASSERT(sizeof(off_t) == 8); // we don't want overflow here
+ const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
+ if (ftruncate(_fd, pos) != 0){
+ msgasserted(15873, "Couldn't truncate file: " + errnoWithDescription());
+ }
+ }
+
void LogFile::synchronousAppend(const void *b, size_t len) {
- off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
+#ifdef POSIX_FADV_DONTNEED
+ const off_t pos = lseek(_fd, 0, SEEK_CUR); // doesn't actually seek
+#endif
const char *buf = (char *) b;
assert(_fd);
diff --git a/util/logfile.h b/util/logfile.h
index e4bbc467cb5..f6d1c94bf22 100644
--- a/util/logfile.h
+++ b/util/logfile.h
@@ -38,6 +38,8 @@ namespace mongo {
const string _name;
+ void truncate(); // Removes extra data after current position
+
private:
#if defined(_WIN32)
typedef HANDLE fd_type;
diff --git a/util/net/httpclient.cpp b/util/net/httpclient.cpp
index de45023c2aa..16eaa0ae80a 100644
--- a/util/net/httpclient.cpp
+++ b/util/net/httpclient.cpp
@@ -38,8 +38,15 @@ namespace mongo {
}
int HttpClient::_go( const char * command , string url , const char * body , Result * result ) {
- uassert( 10271 , "invalid url" , url.find( "http://" ) == 0 );
- url = url.substr( 7 );
+ bool ssl = false;
+ if ( url.find( "https://" ) == 0 ) {
+ ssl = true;
+ url = url.substr( 8 );
+ }
+ else {
+ uassert( 10271 , "invalid url" , url.find( "http://" ) == 0 );
+ url = url.substr( 7 );
+ }
string host , path;
if ( url.find( "/" ) == string::npos ) {
@@ -56,7 +63,7 @@ namespace mongo {
HD( "path [" << path << "]" );
string server = host;
- int port = 80;
+ int port = ssl ? 443 : 80;
string::size_type idx = host.find( ":" );
if ( idx != string::npos ) {
@@ -92,6 +99,15 @@ namespace mongo {
Socket sock;
if ( ! sock.connect( addr ) )
return -1;
+
+ if ( ssl ) {
+#ifdef MONGO_SSL
+ _checkSSLManager();
+ sock.secure( _sslManager.get() );
+#else
+ uasserted( 15862 , "no ssl support" );
+#endif
+ }
{
const char * out = req.c_str();
@@ -152,5 +168,10 @@ namespace mongo {
_body = entire;
}
+#ifdef MONGO_SSL
+ void HttpClient::_checkSSLManager() {
+ _sslManager.reset( new SSLManager( true ) );
+ }
+#endif
}
diff --git a/util/net/httpclient.h b/util/net/httpclient.h
index dadcc72f226..c3f8c824adc 100644
--- a/util/net/httpclient.h
+++ b/util/net/httpclient.h
@@ -18,10 +18,11 @@
#pragma once
#include "../../pch.h"
+#include "sock.h"
namespace mongo {
- class HttpClient {
+ class HttpClient : boost::noncopyable {
public:
typedef map<string,string> Headers;
@@ -68,6 +69,11 @@ namespace mongo {
private:
int _go( const char * command , string url , const char * body , Result * result );
+#ifdef MONGO_SSL
+ void _checkSSLManager();
+
+ scoped_ptr<SSLManager> _sslManager;
+#endif
};
}
diff --git a/util/net/listen.cpp b/util/net/listen.cpp
index 53139ccc385..16ddde880b1 100644
--- a/util/net/listen.cpp
+++ b/util/net/listen.cpp
@@ -95,20 +95,42 @@ namespace mongo {
return out;
}
+
+ Listener::Listener(const string& name, const string &ip, int port, bool logConnect )
+ : _port(port), _name(name), _ip(ip), _logConnect(logConnect), _elapsedTime(0) {
+#ifdef MONGO_SSL
+ _ssl = 0;
+ _sslPort = 0;
+
+ if ( cmdLine.sslOnNormalPorts && cmdLine.sslServerManager ) {
+ secure( cmdLine.sslServerManager );
+ }
+#endif
+ }
+
+ Listener::~Listener() {
+ if ( _timeTracker == this )
+ _timeTracker = 0;
+ }
- void Listener::initAndListen() {
- checkTicketNumbers();
- vector<SockAddr> mine = ipToAddrs(_ip.c_str(), _port, (!cmdLine.noUnixSocket && useUnixSockets()));
- vector<int> socks;
- SOCKET maxfd = 0; // needed for select()
+#ifdef MONGO_SSL
+ void Listener::secure( SSLManager* manager ) {
+ _ssl = manager;
+ }
- for (vector<SockAddr>::iterator it=mine.begin(), end=mine.end(); it != end; ++it) {
- SockAddr& me = *it;
+ void Listener::addSecurePort( SSLManager* manager , int additionalPort ) {
+ _ssl = manager;
+ _sslPort = additionalPort;
+ }
+
+#endif
+
+ bool Listener::_setupSockets( const vector<SockAddr>& mine , vector<int>& socks ) {
+ for (vector<SockAddr>::const_iterator it=mine.begin(), end=mine.end(); it != end; ++it) {
+ const SockAddr& me = *it;
SOCKET sock = ::socket(me.getType(), SOCK_STREAM, 0);
- if ( sock == INVALID_SOCKET ) {
- log() << "ERROR: listen(): invalid socket? " << errnoWithDescription() << endl;
- }
+ massert( 15863 , str::stream() << "listen(): invalid socket? " << errnoWithDescription() , sock >= 0 );
if (me.getType() == AF_UNIX) {
#if !defined(_WIN32)
@@ -138,42 +160,90 @@ namespace mongo {
if ( ::bind(sock, me.raw(), me.addressSize) != 0 ) {
int x = errno;
- log() << "listen(): bind() failed " << errnoWithDescription(x) << " for socket: " << me.toString() << endl;
+ error() << "listen(): bind() failed " << errnoWithDescription(x) << " for socket: " << me.toString() << endl;
if ( x == EADDRINUSE )
- log() << " addr already in use" << endl;
+ error() << " addr already in use" << endl;
closesocket(sock);
- return;
+ return false;
}
#if !defined(_WIN32)
if (me.getType() == AF_UNIX) {
if (chmod(me.getAddr().c_str(), 0777) == -1) {
- log() << "couldn't chmod socket file " << me << errnoWithDescription() << endl;
+ error() << "couldn't chmod socket file " << me << errnoWithDescription() << endl;
}
-
ListeningSockets::get()->addPath( me.getAddr() );
}
#endif
-
+
if ( ::listen(sock, 128) != 0 ) {
- log() << "listen(): listen() failed " << errnoWithDescription() << endl;
+ error() << "listen(): listen() failed " << errnoWithDescription() << endl;
closesocket(sock);
- return;
+ return false;
}
ListeningSockets::get()->add( sock );
socks.push_back(sock);
- if (sock > maxfd)
- maxfd = sock;
}
+
+ return true;
+ }
+
+ void Listener::initAndListen() {
+ checkTicketNumbers();
+ vector<int> socks;
+ set<int> sslSocks;
+
+ { // normal sockets
+ vector<SockAddr> mine = ipToAddrs(_ip.c_str(), _port, (!cmdLine.noUnixSocket && useUnixSockets()));
+ if ( ! _setupSockets( mine , socks ) )
+ return;
+ }
+
+#ifdef MONGO_SSL
+ if ( _ssl && _sslPort > 0 ) {
+ unsigned prev = socks.size();
+
+ vector<SockAddr> mine = ipToAddrs(_ip.c_str(), _sslPort, false );
+ if ( ! _setupSockets( mine , socks ) )
+ return;
+
+ for ( unsigned i=prev; i<socks.size(); i++ ) {
+ sslSocks.insert( socks[i] );
+ }
+
+ }
+#endif
+
+ SOCKET maxfd = 0; // needed for select()
+ for ( unsigned i=0; i<socks.size(); i++ ) {
+ if ( socks[i] > maxfd )
+ maxfd = socks[i];
+ }
+
+#ifdef MONGO_SSL
+ if ( _ssl == 0 ) {
+ _logListen( _port , false );
+ }
+ else if ( _sslPort == 0 ) {
+ _logListen( _port , true );
+ }
+ else {
+ // both
+ _logListen( _port , false );
+ _logListen( _sslPort , true );
+ }
+#else
+ _logListen( _port , false );
+#endif
static long connNumber = 0;
struct timeval maxSelectTime;
while ( ! inShutdown() ) {
fd_set fds[1];
FD_ZERO(fds);
-
+
for (vector<int>::iterator it=socks.begin(), end=socks.end(); it != end; ++it) {
FD_SET(*it, fds);
}
@@ -233,13 +303,25 @@ namespace mongo {
disableNagle(s);
if ( _logConnect && ! cmdLine.quiet )
log() << "connection accepted from " << from.toString() << " #" << ++connNumber << endl;
- accepted(s, from);
+
+ Socket newSock = Socket(s, from);
+#ifdef MONGO_SSL
+ if ( _ssl && ( _sslPort == 0 || sslSocks.count(*it) ) ) {
+ newSock.secureAccepted( _ssl );
+ }
+#endif
+ accepted( newSock );
}
}
}
- void Listener::accepted(int sock, const SockAddr& from) {
- accepted( new MessagingPort(sock, from) );
+ void Listener::_logListen( int port , bool ssl ) {
+ log() << _name << ( _name.size() ? " " : "" ) << "waiting for connections on port " << port << ( ssl ? " ssl" : "" ) << endl;
+ }
+
+
+ void Listener::accepted(Socket socket) {
+ accepted( new MessagingPort(socket) );
}
void Listener::accepted(MessagingPort *mp) {
diff --git a/util/net/listen.h b/util/net/listen.h
index e8b4189c0f5..415db1e3fb6 100644
--- a/util/net/listen.h
+++ b/util/net/listen.h
@@ -25,15 +25,25 @@ namespace mongo {
class Listener : boost::noncopyable {
public:
- Listener(const string &ip, int p, bool logConnect=true ) : _port(p), _ip(ip), _logConnect(logConnect), _elapsedTime(0) { }
- virtual ~Listener() {
- if ( _timeTracker == this )
- _timeTracker = 0;
- }
+
+ Listener(const string& name, const string &ip, int port, bool logConnect=true );
+
+ virtual ~Listener();
+
+#ifdef MONGO_SSL
+ /**
+ * make this an ssl socket
+ * ownership of SSLManager remains with the caller
+ */
+ void secure( SSLManager* manager );
+
+ void addSecurePort( SSLManager* manager , int additionalPort );
+#endif
+
void initAndListen(); // never returns unless error (start a thread)
/* spawn a thread, etc., then return */
- virtual void accepted(int sock, const SockAddr& from);
+ virtual void accepted(Socket socket);
virtual void accepted(MessagingPort *mp);
const int _port;
@@ -60,12 +70,25 @@ namespace mongo {
}
private:
+ string _name;
string _ip;
bool _logConnect;
long long _elapsedTime;
+
+#ifdef MONGO_SSL
+ SSLManager* _ssl;
+ int _sslPort;
+#endif
+
+ /**
+ * @return true iff everything went ok
+ */
+ bool _setupSockets( const vector<SockAddr>& mine , vector<int>& socks );
+
+ void _logListen( int port , bool ssl );
static const Listener* _timeTracker;
-
+
virtual bool useUnixSockets() const { return false; }
};
diff --git a/util/net/message_port.cpp b/util/net/message_port.cpp
index 8c50c8d52a3..9abfaf7c975 100644
--- a/util/net/message_port.cpp
+++ b/util/net/message_port.cpp
@@ -137,6 +137,10 @@ namespace mongo {
piggyBackData = 0;
}
+ MessagingPort::MessagingPort( Socket& sock )
+ : Socket( sock ) , piggyBackData( 0 ) {
+ }
+
void MessagingPort::shutdown() {
close();
}
diff --git a/util/net/message_port.h b/util/net/message_port.h
index 6bbcc46f71c..22ecafecfbc 100644
--- a/util/net/message_port.h
+++ b/util/net/message_port.h
@@ -56,6 +56,8 @@ namespace mongo {
// no data sent, then we detect that the other side is down
MessagingPort(double so_timeout = 0, int logLevel = 0 );
+ MessagingPort(Socket& socket);
+
virtual ~MessagingPort();
void shutdown();
diff --git a/util/net/message_server_port.cpp b/util/net/message_server_port.cpp
index e9712d490ba..ca0b13dae07 100644
--- a/util/net/message_server_port.cpp
+++ b/util/net/message_server_port.cpp
@@ -28,6 +28,10 @@
#include "../../db/lasterror.h"
#include "../../db/stats/counters.h"
+#ifdef __linux__ // TODO: consider making this ifndef _WIN32
+# include <sys/resource.h>
+#endif
+
namespace mongo {
namespace pms {
@@ -43,6 +47,8 @@ namespace mongo {
inPort->setLogLevel(1);
scoped_ptr<MessagingPort> p( inPort );
+ p->postFork();
+
string otherSide;
Message m;
@@ -98,7 +104,7 @@ namespace mongo {
class PortMessageServer : public MessageServer , public Listener {
public:
PortMessageServer( const MessageServer::Options& opts, MessageHandler * handler ) :
- Listener( opts.ipList, opts.port ) {
+ Listener( "" , opts.ipList, opts.port ) {
uassert( 10275 , "multiple PortMessageServer not supported" , ! pms::handler );
pms::handler = handler;
@@ -125,10 +131,18 @@ namespace mongo {
pthread_attr_init(&attrs);
pthread_attr_setdetachstate(&attrs, PTHREAD_CREATE_DETACHED);
- static const size_t STACK_SIZE = 1024*1024;
- pthread_attr_setstacksize(&attrs, (DEBUG_BUILD
- ? (STACK_SIZE / 2)
- : STACK_SIZE));
+ static const size_t STACK_SIZE = 1024*1024; // if we change this we need to update the warning
+
+ struct rlimit limits;
+ verify(15887, getrlimit(RLIMIT_STACK, &limits) == 0);
+ if (limits.rlim_cur > STACK_SIZE) {
+ pthread_attr_setstacksize(&attrs, (DEBUG_BUILD
+ ? (STACK_SIZE / 2)
+ : STACK_SIZE));
+ } else if (limits.rlim_cur < 1024*1024) {
+ warning() << "Stack size set to " << (limits.rlim_cur/1024) << "KB. We suggest 1MB" << endl;
+ }
+
pthread_t thread;
int failed = pthread_create(&thread, &attrs, (void*(*)(void*)) &pms::threadRun, p);
diff --git a/util/net/miniwebserver.cpp b/util/net/miniwebserver.cpp
index 269a60bc85c..01a3418a909 100644
--- a/util/net/miniwebserver.cpp
+++ b/util/net/miniwebserver.cpp
@@ -23,8 +23,8 @@
namespace mongo {
- MiniWebServer::MiniWebServer(const string &ip, int port)
- : Listener(ip, port, false)
+ MiniWebServer::MiniWebServer(const string& name, const string &ip, int port)
+ : Listener(name, ip, port, false)
{}
string MiniWebServer::parseURL( const char * buf ) {
@@ -108,17 +108,18 @@ namespace mongo {
return false;
}
- void MiniWebServer::accepted(int s, const SockAddr &from) {
- setSockTimeouts(s, 8);
+ void MiniWebServer::accepted(Socket sock) {
+ sock.postFork();
+ sock.setTimeout(8);
char buf[4096];
int len = 0;
while ( 1 ) {
int left = sizeof(buf) - 1 - len;
if( left == 0 )
break;
- int x = ::recv(s, buf + len, left, 0);
+ int x = sock.unsafe_recv( buf + len , left );
if ( x <= 0 ) {
- closesocket(s);
+ sock.close();
return;
}
len += x;
@@ -134,7 +135,7 @@ namespace mongo {
vector<string> headers;
try {
- doRequest(buf, parseURL( buf ), responseMsg, responseCode, headers, from);
+ doRequest(buf, parseURL( buf ), responseMsg, responseCode, headers, sock.remoteAddr() );
}
catch ( std::exception& e ) {
responseCode = 500;
@@ -165,8 +166,8 @@ namespace mongo {
ss << responseMsg;
string response = ss.str();
- ::send(s, response.c_str(), response.size(), 0);
- closesocket(s);
+ sock.send( response.c_str(), response.size() , "http response" );
+ sock.close();
}
string MiniWebServer::getHeader( const char * req , string wanted ) {
diff --git a/util/net/miniwebserver.h b/util/net/miniwebserver.h
index 01c810b551e..1fb6b3f2e65 100644
--- a/util/net/miniwebserver.h
+++ b/util/net/miniwebserver.h
@@ -27,7 +27,7 @@ namespace mongo {
class MiniWebServer : public Listener {
public:
- MiniWebServer(const string &ip, int _port);
+ MiniWebServer(const string& name, const string &ip, int _port);
virtual ~MiniWebServer() {}
virtual void doRequest(
@@ -53,7 +53,7 @@ namespace mongo {
static string urlDecode(string s) {return urlDecode(s.c_str());}
private:
- void accepted(int s, const SockAddr &from);
+ void accepted(Socket socket);
static bool fullReceive( const char *buf );
};
diff --git a/util/net/sock.cpp b/util/net/sock.cpp
index f9e4a85d832..69c42f2729d 100644
--- a/util/net/sock.cpp
+++ b/util/net/sock.cpp
@@ -34,21 +34,37 @@
# endif
#endif
+#ifdef MONGO_SSL
+#include <openssl/err.h>
+#include <openssl/ssl.h>
+#endif
+
+
namespace mongo {
static bool ipv6 = false;
void enableIPv6(bool state) { ipv6 = state; }
bool IPv6Enabled() { return ipv6; }
- // --- some global helpers -----
+ void setSockTimeouts(int sock, double secs) {
+ struct timeval tv;
+ tv.tv_sec = (int)secs;
+ tv.tv_usec = (int)((long long)(secs*1000*1000) % (1000*1000));
+ bool report = logLevel > 3; // solaris doesn't provide these
+ DEV report = true;
+ bool ok = setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0;
+ if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl;
+ ok = setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0;
+ DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl;
+ }
#if defined(_WIN32)
void disableNagle(int sock) {
int x = 1;
if ( setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, (char *) &x, sizeof(x)) )
- out() << "ERROR: disableNagle failed" << endl;
+ error() << "disableNagle failed" << endl;
if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) )
- out() << "ERROR: SO_KEEPALIVE failed" << endl;
+ error() << "SO_KEEPALIVE failed" << endl;
}
#else
@@ -62,11 +78,35 @@ namespace mongo {
#endif
if ( setsockopt(sock, level, TCP_NODELAY, (char *) &x, sizeof(x)) )
- log() << "ERROR: disableNagle failed: " << errnoWithDescription() << endl;
+ error() << "disableNagle failed: " << errnoWithDescription() << endl;
#ifdef SO_KEEPALIVE
if ( setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, (char *) &x, sizeof(x)) )
- log() << "ERROR: SO_KEEPALIVE failed: " << errnoWithDescription() << endl;
+ error() << "SO_KEEPALIVE failed: " << errnoWithDescription() << endl;
+
+# ifdef __linux__
+ socklen_t len = sizeof(x);
+ if ( getsockopt(sock, level, TCP_KEEPIDLE, (char *) &x, &len) )
+ error() << "can't get TCP_KEEPIDLE: " << errnoWithDescription() << endl;
+
+ if (x > 300) {
+ x = 300;
+ if ( setsockopt(sock, level, TCP_KEEPIDLE, (char *) &x, sizeof(x)) ) {
+ error() << "can't set TCP_KEEPIDLE: " << errnoWithDescription() << endl;
+ }
+ }
+
+ len = sizeof(x); // just in case it changed
+ if ( getsockopt(sock, level, TCP_KEEPINTVL, (char *) &x, &len) )
+ error() << "can't get TCP_KEEPINTVL: " << errnoWithDescription() << endl;
+
+ if (x > 300) {
+ x = 300;
+ if ( setsockopt(sock, level, TCP_KEEPINTVL, (char *) &x, sizeof(x)) ) {
+ error() << "can't set TCP_KEEPINTVL: " << errnoWithDescription() << endl;
+ }
+ }
+# endif
#endif
}
@@ -299,29 +339,119 @@ namespace mongo {
}
+ // ------------ SSLManager -----------------
+
+#ifdef MONGO_SSL
+ SSLManager::SSLManager( bool client ) {
+ _client = client;
+ SSL_library_init();
+ SSL_load_error_strings();
+ ERR_load_crypto_strings();
+
+ _context = SSL_CTX_new( client ? SSLv23_client_method() : SSLv23_server_method() );
+ massert( 15864 , mongoutils::str::stream() << "can't create SSL Context: " << ERR_error_string(ERR_get_error(), NULL) , _context );
+
+ SSL_CTX_set_options( _context, SSL_OP_ALL);
+ }
+
+ void SSLManager::setupPubPriv( const string& privateKeyFile , const string& publicKeyFile ) {
+ massert( 15865 ,
+ mongoutils::str::stream() << "Can't read SSL certificate from file "
+ << publicKeyFile << ":" << ERR_error_string(ERR_get_error(), NULL) ,
+ SSL_CTX_use_certificate_file(_context, publicKeyFile.c_str(), SSL_FILETYPE_PEM) );
+
+
+ massert( 15866 ,
+ mongoutils::str::stream() << "Can't read SSL private key from file "
+ << privateKeyFile << " : " << ERR_error_string(ERR_get_error(), NULL) ,
+ SSL_CTX_use_PrivateKey_file(_context, privateKeyFile.c_str(), SSL_FILETYPE_PEM) );
+ }
+
+
+ int SSLManager::password_cb(char *buf,int num, int rwflag,void *userdata){
+ SSLManager* sm = (SSLManager*)userdata;
+ string pass = sm->_password;
+ strcpy(buf,pass.c_str());
+ return(pass.size());
+ }
+
+ void SSLManager::setupPEM( const string& keyFile , const string& password ) {
+ _password = password;
+
+ massert( 15867 , "Can't read certificate file" , SSL_CTX_use_certificate_chain_file( _context , keyFile.c_str() ) );
+
+ SSL_CTX_set_default_passwd_cb_userdata( _context , this );
+ SSL_CTX_set_default_passwd_cb( _context, &SSLManager::password_cb );
+
+ massert( 15868 , "Can't read key file" , SSL_CTX_use_PrivateKey_file( _context , keyFile.c_str() , SSL_FILETYPE_PEM ) );
+ }
+
+ SSL * SSLManager::secure( int fd ) {
+ SSL * ssl = SSL_new( _context );
+ massert( 15861 , "can't create SSL" , ssl );
+ SSL_set_fd( ssl , fd );
+ return ssl;
+ }
+
+
+#endif
+
// ------------ Socket -----------------
Socket::Socket(int fd , const SockAddr& remote) :
_fd(fd), _remote(remote), _timeout(0) {
_logLevel = 0;
- _bytesOut = 0;
- _bytesIn = 0;
+ _init();
}
Socket::Socket( double timeout, int ll ) {
_logLevel = ll;
_fd = -1;
_timeout = timeout;
+ _init();
+ }
+
+ void Socket::_init() {
_bytesOut = 0;
_bytesIn = 0;
+#ifdef MONGO_SSL
+ _sslAccepted = 0;
+#endif
}
void Socket::close() {
+#ifdef MONGO_SSL
+ _ssl.reset();
+#endif
if ( _fd >= 0 ) {
closesocket( _fd );
_fd = -1;
}
}
+
+#ifdef MONGO_SSL
+ void Socket::secure( SSLManager * ssl ) {
+ assert( ssl );
+ assert( _fd >= 0 );
+ _ssl.reset( ssl->secure( _fd ) );
+ SSL_connect( _ssl.get() );
+ }
+
+ void Socket::secureAccepted( SSLManager * ssl ) {
+ _sslAccepted = ssl;
+ }
+#endif
+
+ void Socket::postFork() {
+#ifdef MONGO_SSL
+ if ( _sslAccepted ) {
+ assert( _fd );
+ _ssl.reset( _sslAccepted->secure( _fd ) );
+ SSL_accept( _ssl.get() );
+ _sslAccepted = 0;
+ }
+#endif
+ }
class ConnectBG : public BackgroundJob {
public:
@@ -347,7 +477,7 @@ namespace mongo {
}
if ( _timeout > 0 ) {
- setSockTimeouts( _fd, _timeout );
+ setTimeout( _timeout );
}
ConnectBG bg(_fd, remote);
@@ -377,12 +507,29 @@ namespace mongo {
return true;
}
+ int Socket::_send( const char * data , int len ) {
+#ifdef MONGO_SSL
+ if ( _ssl ) {
+ return SSL_write( _ssl.get() , data , len );
+ }
+#endif
+ return ::send( _fd , data , len , portSendFlags );
+ }
// sends all data or throws an exception
void Socket::send( const char * data , int len, const char *context ) {
while( len > 0 ) {
- int ret = ::send( _fd , data , len , portSendFlags );
+ int ret = _send( data , len );
if ( ret == -1 ) {
+
+#ifdef MONGO_SSL
+ if ( _ssl ) {
+ log() << "SSL Error ret: " << ret << " err: " << SSL_get_error( _ssl.get() , ret )
+ << " " << ERR_error_string(ERR_get_error(), NULL)
+ << endl;
+ }
+#endif
+
#if defined(_WIN32)
if ( WSAGetLastError() == WSAETIMEDOUT && _timeout != 0 ) {
#else
@@ -408,15 +555,27 @@ namespace mongo {
}
}
- // sends all data or throws an exception
- void Socket::send( const vector< pair< char *, int > > &data, const char *context ) {
-#if defined(_WIN32)
- // TODO use scatter/gather api
+ void Socket::_send( const vector< pair< char *, int > > &data, const char *context ) {
for( vector< pair< char *, int > >::const_iterator i = data.begin(); i != data.end(); ++i ) {
char * data = i->first;
int len = i->second;
send( data, len, context );
}
+ }
+
+ // sends all data or throws an exception
+ void Socket::send( const vector< pair< char *, int > > &data, const char *context ) {
+
+#ifdef MONGO_SSL
+ if ( _ssl ) {
+ _send( data , context );
+ return;
+ }
+#endif
+
+#if defined(_WIN32)
+ // TODO use scatter/gather api
+ _send( data , context );
#else
vector< struct iovec > d( data.size() );
int i = 0;
@@ -479,23 +638,26 @@ namespace mongo {
log(3) << "Socket recv() conn closed? " << remoteString() << endl;
throw SocketException( SocketException::CLOSED , remoteString() );
}
- else { /* ret < 0 */
+ else { /* ret < 0 */
+#if defined(_WIN32)
+ int e = WSAGetLastError();
+#else
int e = errno;
-
-#if defined(EINTR) && !defined(_WIN32)
+# if defined(EINTR)
if( e == EINTR ) {
if( ++retries == 1 ) {
log() << "EINTR retry" << endl;
continue;
}
}
+# endif
#endif
if ( ( e == EAGAIN
#if defined(_WIN32)
-
|| e == WSAETIMEDOUT
#endif
- ) && _timeout > 0 ) {
+ ) && _timeout > 0 )
+ {
// this is a timeout
log(_logLevel) << "Socket recv() timeout " << remoteString() <<endl;
throw SocketException( SocketException::RECV_TIMEOUT, remoteString() );
@@ -508,11 +670,33 @@ namespace mongo {
}
int Socket::unsafe_recv( char *buf, int max ) {
- int x = ::recv( _fd , buf , max , portRecvFlags );
+ int x = _recv( buf , max );
_bytesIn += x;
return x;
}
+
+ int Socket::_recv( char *buf, int max ) {
+#ifdef MONGO_SSL
+ if ( _ssl ){
+ return SSL_read( _ssl.get() , buf , max );
+ }
+#endif
+ return ::recv( _fd , buf , max , portRecvFlags );
+ }
+
+ void Socket::setTimeout( double secs ) {
+ struct timeval tv;
+ tv.tv_sec = (int)secs;
+ tv.tv_usec = (int)((long long)(secs*1000*1000) % (1000*1000));
+ bool report = logLevel > 3; // solaris doesn't provide these
+ DEV report = true;
+ bool ok = setsockopt(_fd, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0;
+ if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl;
+ ok = setsockopt(_fd, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0;
+ DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl;
+ }
+
#if defined(_WIN32)
struct WinsockInit {
WinsockInit() {
diff --git a/util/net/sock.h b/util/net/sock.h
index 9c6f0251ad6..1cd51333525 100644
--- a/util/net/sock.h
+++ b/util/net/sock.h
@@ -39,6 +39,10 @@
#endif // _WIN32
+#ifdef MONGO_SSL
+#include <openssl/ssl.h>
+#endif
+
namespace mongo {
const int SOCK_FAMILY_UNKNOWN_ERROR=13078;
@@ -68,24 +72,13 @@ namespace mongo {
return mongoutils::str::stream() << cmdLine.socket << "/mongodb-" << port << ".sock";
}
- inline void setSockTimeouts(int sock, double secs) {
- struct timeval tv;
- tv.tv_sec = (int)secs;
- tv.tv_usec = (int)((long long)(secs*1000*1000) % (1000*1000));
- bool report = logLevel > 3; // solaris doesn't provide these
- DEV report = true;
- bool ok = setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, (char *) &tv, sizeof(tv) ) == 0;
- if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl;
- ok = setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO, (char *) &tv, sizeof(tv) ) == 0;
- DEV if( report && !ok ) log() << "unabled to set SO_RCVTIMEO" << endl;
- }
-
// If an ip address is passed in, just return that. If a hostname is passed
// in, look up its ip and return that. Returns "" on failure.
string hostbyname(const char *hostname);
void enableIPv6(bool state=true);
bool IPv6Enabled();
+ void setSockTimeouts(int sock, double secs);
/**
* wrapped around os representation of network address
@@ -157,6 +150,29 @@ namespace mongo {
string _extra;
};
+#ifdef MONGO_SSL
+ class SSLManager : boost::noncopyable {
+ public:
+ SSLManager( bool client );
+
+ void setupPEM( const string& keyFile , const string& password );
+ void setupPubPriv( const string& privateKeyFile , const string& publicKeyFile );
+
+ /**
+ * creates an SSL context to be used for this file descriptor
+ * caller should delete
+ */
+ SSL * secure( int fd );
+
+ static int password_cb( char *buf,int num, int rwflag,void *userdata );
+
+ private:
+ bool _client;
+ SSL_CTX* _context;
+ string _password;
+ };
+#endif
+
/**
* thin wrapped around file descriptor and system calls
* todo: ssl
@@ -165,9 +181,12 @@ namespace mongo {
public:
Socket(int sock, const SockAddr& farEnd);
- // in some cases the timeout will actually be 2x this value - eg we do a partial send,
- // then the timeout fires, then we try to send again, then the timeout fires again with
- // no data sent, then we detect that the other side is down
+ /** In some cases the timeout will actually be 2x this value - eg we do a partial send,
+ then the timeout fires, then we try to send again, then the timeout fires again with
+ no data sent, then we detect that the other side is down.
+
+ Generally you don't want a timeout, you should be very prepared for errors if you set one.
+ */
Socket(double so_timeout = 0, int logLevel = 0 );
bool connect(SockAddr& farEnd);
@@ -190,8 +209,32 @@ namespace mongo {
void clearCounters() { _bytesIn = 0; _bytesOut = 0; }
long long getBytesIn() const { return _bytesIn; }
long long getBytesOut() const { return _bytesOut; }
+
+ void setTimeout( double secs );
+
+#ifdef MONGO_SSL
+ /** secures inline */
+ void secure( SSLManager * ssl );
+ void secureAccepted( SSLManager * ssl );
+#endif
+
+ /**
+ * call this after a fork for server sockets
+ */
+ void postFork();
+
private:
+ void _init();
+ /** raw send, same semantics as ::send */
+ int _send( const char * data , int len );
+
+ /** sends dumbly, just each buffer at a time */
+ void _send( const vector< pair< char *, int > > &data, const char *context );
+
+ /** raw recv, same semantics as ::recv */
+ int _recv( char * buf , int max );
+
int _fd;
SockAddr _remote;
double _timeout;
@@ -199,6 +242,11 @@ namespace mongo {
long long _bytesIn;
long long _bytesOut;
+#ifdef MONGO_SSL
+ shared_ptr<SSL> _ssl;
+ SSLManager * _sslAccepted;
+#endif
+
protected:
int _logLevel; // passed to log() when logging errors
diff --git a/util/paths.h b/util/paths.h
index 4ae591fb49b..2297a9a2f90 100644
--- a/util/paths.h
+++ b/util/paths.h
@@ -23,9 +23,9 @@
#include <sys/stat.h>
#include <fcntl.h>
-using namespace mongoutils;
-
namespace mongo {
+
+ using namespace mongoutils;
extern string dbpath;
diff --git a/util/processinfo_darwin.cpp b/util/processinfo_darwin.cpp
index c1190aec438..9f73cbffd4f 100644
--- a/util/processinfo_darwin.cpp
+++ b/util/processinfo_darwin.cpp
@@ -19,15 +19,14 @@
#include "processinfo.h"
#include "log.h"
-
+#include <mach/vm_statistics.h>
#include <mach/task_info.h>
-
#include <mach/mach_init.h>
#include <mach/mach_host.h>
#include <mach/mach_traps.h>
#include <mach/task.h>
#include <mach/vm_map.h>
-#include <mach/shared_memory_server.h>
+#include <mach/shared_region.h>
#include <iostream>
#include <sys/types.h>
diff --git a/util/ramlog.cpp b/util/ramlog.cpp
index f8cfa0a7052..69ffc175ee9 100644
--- a/util/ramlog.cpp
+++ b/util/ramlog.cpp
@@ -25,7 +25,7 @@ namespace mongo {
using namespace mongoutils;
- RamLog::RamLog( string name ) : _name(name) {
+ RamLog::RamLog( string name ) : _name(name), _lastWrite(0) {
h = 0; n = 0;
for( int i = 0; i < N; i++ )
lines[i][C-1] = 0;
@@ -48,6 +48,8 @@ namespace mongo {
}
void RamLog::write(LogLevel ll, const string& str) {
+ _lastWrite = time(0);
+
char *p = lines[(h+n)%N];
unsigned sz = str.size();
@@ -183,4 +185,6 @@ namespace mongo {
mongo::mutex* RamLog::_namedLock;
RamLog::RM* RamLog::_named = 0;
+
+ Tee* const warnings = new RamLog("warnings"); // Things put here go in serverStatus
}
diff --git a/util/ramlog.h b/util/ramlog.h
index 8539a436388..d3d5c8fbb4e 100644
--- a/util/ramlog.h
+++ b/util/ramlog.h
@@ -34,6 +34,8 @@ namespace mongo {
static RamLog* get( string name );
static void getNames( vector<string>& names );
+ time_t lastWrite() { return _lastWrite; } // 0 if no writes
+
protected:
static int repeats(const vector<const char *>& v, int i);
static string clean(const vector<const char *>& v, int i, string line="");
@@ -57,6 +59,7 @@ namespace mongo {
typedef map<string,RamLog*> RM;
static mongo::mutex* _namedLock;
static RM* _named;
+ time_t _lastWrite;
};
}
diff --git a/util/stringutils.h b/util/stringutils.h
index bab9f608f7e..93598aa520b 100644
--- a/util/stringutils.h
+++ b/util/stringutils.h
@@ -40,7 +40,11 @@ namespace mongo {
return string(copy);
}
- // for convenience, '{' is greater than anything and stops number parsing
+ /**
+ * Non numeric characters are compared lexicographically; numeric substrings
+ * are compared numerically; dots separate ordered comparable subunits.
+ * For convenience, character 255 is greater than anything else.
+ */
inline int lexNumCmp( const char *s1, const char *s2 ) {
//cout << "START : " << s1 << "\t" << s2 << endl;
@@ -48,6 +52,18 @@ namespace mongo {
while( *s1 && *s2 ) {
+ bool d1 = ( *s1 == '.' );
+ bool d2 = ( *s2 == '.' );
+ if ( d1 && !d2 )
+ return -1;
+ if ( d2 && !d1 )
+ return 1;
+ if ( d1 && d2 ) {
+ ++s1; ++s2;
+ startWord = true;
+ continue;
+ }
+
bool p1 = ( *s1 == (char)255 );
bool p2 = ( *s2 == (char)255 );
//cout << "\t\t " << p1 << "\t" << p2 << endl;
@@ -64,7 +80,6 @@ namespace mongo {
if ( startWord ) {
while ( *s1 == '0' ) s1++;
while ( *s2 == '0' ) s2++;
- startWord = false;
}
char * e1 = (char*)s1;
@@ -94,6 +109,7 @@ namespace mongo {
// otherwise, the numbers are equal
s1 = e1;
s2 = e2;
+ startWord = false;
continue;
}
@@ -109,11 +125,8 @@ namespace mongo {
if ( *s2 > *s1 )
return -1;
- if ( *s1 == '.' )
- startWord = true;
- else
- startWord = false;
s1++; s2++;
+ startWord = false;
}
if ( *s1 )
diff --git a/util/time_support.h b/util/time_support.h
index ce2cdbc0e15..ca17807ec96 100644
--- a/util/time_support.h
+++ b/util/time_support.h
@@ -52,6 +52,16 @@ namespace mongo {
return buf;
}
+ inline string timeToISOString(time_t time) {
+ struct tm t;
+ time_t_to_Struct( time, &t );
+
+ const char* fmt = "%Y-%m-%dT%H:%M:%SZ";
+ char buf[32];
+ assert(strftime(buf, sizeof(buf), fmt, &t) == 20);
+ return buf;
+ }
+
inline boost::gregorian::date currentDate() {
boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
return now.date();
diff --git a/util/timer.h b/util/timer.h
index 9db907185dd..cbfe859ef5c 100644
--- a/util/timer.h
+++ b/util/timer.h
@@ -30,6 +30,8 @@ namespace mongo {
Timer( unsigned long long startMicros ) { old = startMicros; }
int seconds() const { return (int)(micros() / 1000000); }
int millis() const { return (int)(micros() / 1000); }
+ int minutes() const { return seconds() / 60; }
+
/** gets time interval and resets at the same time. this way we can call curTimeMicros
once instead of twice if one wanted millis() and then reset().
diff --git a/util/version.cpp b/util/version.cpp
index f9c1471c88e..809f4cde3eb 100644
--- a/util/version.cpp
+++ b/util/version.cpp
@@ -26,6 +26,8 @@
#include "stringutils.h"
#include "../db/jsobj.h"
#include "file.h"
+#include "ramlog.h"
+#include "../db/cmdline.h"
namespace mongo {
@@ -36,7 +38,7 @@ namespace mongo {
* 1.2.3-rc4-pre-
* If you really need to do something else you'll need to fix _versionArray()
*/
- const char versionString[] = "1.9.1-pre-";
+ const char versionString[] = "2.0.0-rc0-pre-";
// See unit test for example outputs
static BSONArray _versionArray(const char* version){
@@ -114,35 +116,39 @@ namespace mongo {
log() << "build info: " << sysInfo() << endl;
}
+
+ static Tee * startupWarningsLog = new RamLog("startupWarnings"); //intentionally leaked
+
//
- // 32 bit systems warning
+ // system warnings
//
void show_warnings() {
- // each message adds a leading but not a trailing newline
+ // each message adds a leading and a trailing newline
bool warned = false;
{
const char * foo = strchr( versionString , '.' ) + 1;
int bar = atoi( foo );
if ( ( 2 * ( bar / 2 ) ) != bar ) {
- cout << "\n** NOTE: This is a development version (" << versionString << ") of MongoDB.";
- cout << "\n** Not recommended for production." << endl;
+ log() << startupWarningsLog;
+ log() << "** NOTE: This is a development version (" << versionString << ") of MongoDB." << startupWarningsLog;
+ log() << "** Not recommended for production." << startupWarningsLog;
warned = true;
}
}
if ( sizeof(int*) == 4 ) {
- cout << endl;
- cout << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << endl;
- cout << "** see http://blog.mongodb.org/post/137788967/32-bit-limitations" << endl;
- cout << "** with --journal, the limit is lower" << endl;
+ log() << startupWarningsLog;
+ log() << "** NOTE: when using MongoDB 32 bit, you are limited to about 2 gigabytes of data" << startupWarningsLog;
+ log() << "** see http://blog.mongodb.org/post/137788967/32-bit-limitations" << startupWarningsLog;
+ log() << "** with --journal, the limit is lower" << startupWarningsLog;
warned = true;
}
#ifdef __linux__
if (boost::filesystem::exists("/proc/vz") && !boost::filesystem::exists("/proc/bc")) {
- cout << endl;
- cout << "** WARNING: You are running in OpenVZ. This is known to be broken!!!" << endl;
+ log() << startupWarningsLog;
+ log() << "** WARNING: You are running in OpenVZ. This is known to be broken!!!" << startupWarningsLog;
warned = true;
}
@@ -172,22 +178,49 @@ namespace mongo {
const char* space = strchr(line, ' ');
if ( ! space ) {
- cout << "** WARNING: cannot parse numa_maps" << endl;
+ log() << startupWarningsLog;
+ log() << "** WARNING: cannot parse numa_maps" << startupWarningsLog;
warned = true;
}
else if ( ! startsWith(space+1, "interleave") ) {
- cout << endl;
- cout << "** WARNING: You are running on a NUMA machine." << endl;
- cout << "** We suggest launching mongod like this to avoid performance problems:" << endl;
- cout << "** numactl --interleave=all mongod [other options]" << endl;
+ log() << startupWarningsLog;
+ log() << "** WARNING: You are running on a NUMA machine." << startupWarningsLog;
+ log() << "** We suggest launching mongod like this to avoid performance problems:" << startupWarningsLog;
+ log() << "** numactl --interleave=all mongod [other options]" << startupWarningsLog;
warned = true;
}
}
}
+
+ if (cmdLine.dur){
+ fstream f ("/proc/sys/vm/overcommit_memory", ios_base::in);
+ unsigned val;
+ f >> val;
+
+ if (val == 2) {
+ log() << startupWarningsLog;
+ log() << "** WARNING: /proc/sys/vm/overcommit_memory is " << val << startupWarningsLog;
+ log() << "** Journaling works best with it set to 0 or 1" << startupWarningsLog;
+ }
+ }
+
+ if (boost::filesystem::exists("/proc/sys/vm/zone_reclaim_mode")){
+ fstream f ("/proc/sys/vm/zone_reclaim_mode", ios_base::in);
+ unsigned val;
+ f >> val;
+
+ if (val != 0) {
+ log() << startupWarningsLog;
+ log() << "** WARNING: /proc/sys/vm/zone_reclaim_mode is " << val << startupWarningsLog;
+ log() << "** We suggest setting it to 0" << startupWarningsLog;
+ log() << "** http://www.kernel.org/doc/Documentation/sysctl/vm.txt" << startupWarningsLog;
+ }
+ }
#endif
- if (warned)
- cout << endl;
+ if (warned) {
+ log() << startupWarningsLog;
+ }
}
int versionCmp(StringData rhs, StringData lhs) {