diff options
43 files changed, 3328 insertions, 2920 deletions
diff --git a/SConscript.buildinfo b/SConscript.buildinfo new file mode 100644 index 00000000000..3853adf2ba5 --- /dev/null +++ b/SConscript.buildinfo @@ -0,0 +1,45 @@ +# -*- mode: python; -*- + +# This SConscript describes construction of buildinfo.cpp, which is independent of the +# build variant's target. + +import os +import sys + +import buildscripts.utils + +Import('env windows') + +def getSysInfo(): + if windows: + return "windows " + str( sys.getwindowsversion() ) + else: + return " ".join( os.uname() ) + +buildinfo_filename = '#build/buildinfo.cpp' + +buildinfo_template = ''' +#include <string> +#include <boost/version.hpp> + +#include "mongo/util/version.h" + +namespace mongo { + const char * gitVersion() { return "%(git_version)s"; } + std::string sysInfo() { return "%(sys_info)s BOOST_LIB_VERSION=" BOOST_LIB_VERSION ; } +} // namespace mongo +''' + +def generate_buildinfo(env, target, source, **kw): + contents = str(source[0]) % dict(git_version=buildscripts.utils.getGitVersion(), + sys_info=getSysInfo()) + out = open(str(target[0]), 'wb') + try: + out.write(contents) + finally: + out.close() + +env.Command(buildinfo_filename, Value(buildinfo_template), generate_buildinfo) +env.AlwaysBuild(buildinfo_filename) +env.Install('$BUILD_DIR/mongo', buildinfo_filename) +env.Install('$BUILD_DIR/client_build/mongo', buildinfo_filename) diff --git a/SConstruct b/SConstruct index 58dacd1c897..aa874789f8a 100644 --- a/SConstruct +++ b/SConstruct @@ -15,21 +15,23 @@ EnsureSConsVersion( 1, 1, 0 ) -import os -import sys +import buildscripts +import buildscripts.bb +import datetime import imp -import types +import os import re import shutil +import stat +import sys +import types import urllib import urllib2 -import buildscripts -import buildscripts.bb -import stat from buildscripts import utils import libdeps +DEFAULT_INSTALL_DIR = "/usr/local" def _rpartition(string, sep): """A replacement for str.rpartition which is missing in Python < 2.5 @@ -64,8 +66,8 @@ options = {} options_topass = {} -def add_option( name, help , nargs , contributesToVariantDir , dest=None, - type="string", choices=None ): +def add_option( name, help, nargs, contributesToVariantDir, + dest=None, default = None, type="string", choices=None ): if dest is None: dest = name @@ -76,6 +78,7 @@ def add_option( name, help , nargs , contributesToVariantDir , dest=None, nargs=nargs, action="store", choices=choices, + default=default, help=help ) options[name] = { "help" : help , @@ -137,7 +140,7 @@ def get_variant_dir(): return s # installation/packaging -add_option( "prefix" , "installation prefix" , 1 , False ) +add_option( "prefix" , "installation prefix" , 1 , False, default=DEFAULT_INSTALL_DIR ) add_option( "distname" , "dist name (0.8.0)" , 1 , False ) add_option( "distmod", "additional piece for full dist name" , 1 , False ) add_option( "nostrip", "do not strip installed binaries" , 0 , False ) @@ -220,6 +223,9 @@ add_option( "use-cpu-profiler", add_option("mongod-concurrency-level", "Concurrency level, \"global\" or \"db\"", 1, True, type="choice", choices=["global", "db"]) +add_option('client-dist-basename', "Name of the client source archive.", 1, False, + default='mongo-cxx-driver') + # don't run configure if user calls --help if GetOption('help'): Return() @@ -278,9 +284,16 @@ usePCH = has_option( "usePCH" ) justClientLib = (COMMAND_LINE_TARGETS == ['mongoclient']) env = Environment( BUILD_DIR=variantDir, + CLIENT_ARCHIVE='${CLIENT_DIST_BASENAME}${DIST_ARCHIVE_SUFFIX}', + CLIENT_DIST_BASENAME=get_option('client-dist-basename'), + CLIENT_LICENSE='#distsrc/client/LICENSE.txt', + CLIENT_SCONSTRUCT='#distsrc/client/SConstruct', + DIST_ARCHIVE_SUFFIX='.tgz', MSVS_ARCH=msarch , + PYTHON=utils.find_python(), + SERVER_ARCHIVE='${SERVER_DIST_BASENAME}${DIST_ARCHIVE_SUFFIX}', TARGET_ARCH=msarch , - tools=["default", "gch", "jsheader", "mergelib" ], + tools=["default", "gch", "jsheader", "mergelib"], PYSYSPLATFORM=os.sys.platform, PCRE_VERSION='8.30', @@ -367,8 +380,6 @@ if ( not ( usesm or usev8 or justClientLib) ): usesm = True options_topass["usesm"] = True -distBuild = len( COMMAND_LINE_TARGETS ) == 1 and ( str( COMMAND_LINE_TARGETS[0] ) == "s3dist" or str( COMMAND_LINE_TARGETS[0] ) == "dist" ) - extraLibPlaces = [] env['EXTRACPPPATH'] = [] @@ -394,38 +405,18 @@ if has_option( "extralib" ): class InstallSetup: binaries = False - clientSrc = False + libraries = False headers = False - bannerFiles = tuple() - headerRoot = "include" def __init__(self): self.default() - + def default(self): self.binaries = True self.libraries = False - self.clientSrc = False self.headers = False - self.bannerFiles = tuple() - self.headerRoot = "include" - self.clientTestsDir = None - - def justClient(self): - self.binaries = False - self.libraries = False - self.clientSrc = True - self.headers = True - self.bannerFiles = [ "#distsrc/client/LICENSE.txt", - "#distsrc/client/SConstruct" ] - self.headerRoot = "mongo/" - self.clientTestsDir = "#src/mongo/client/examples/" installSetup = InstallSetup() -if distBuild: - installSetup.bannerFiles = [ "#distsrc/GNU-AGPL-3.0", - "#distsrc/README", - "#distsrc/THIRD-PARTY-NOTICES", ] if has_option( "full" ): installSetup.headers = True @@ -446,24 +437,14 @@ if force64: env['PROCESSOR_ARCHITECTURE'] = processor -DEFAULT_INSTALL_DIR = "/usr/local" installDir = DEFAULT_INSTALL_DIR nixLibPrefix = "lib" -distName = GetOption( "distname" ) dontReplacePackage = False - -if distBuild: - release = True - -def isDriverBuild(): - return GetOption( "prefix" ) and GetOption( "prefix" ).find( "mongo-cxx-driver" ) >= 0 +isBuildingLatest = False if has_option( "prefix" ): installDir = GetOption( "prefix" ) - if isDriverBuild(): - installDir = '#' + installDir - installSetup.justClient() def findVersion( root , choices ): if not isinstance(root, list): @@ -474,12 +455,6 @@ def findVersion( root , choices ): return r + c raise RuntimeError("can't find a version of [" + repr(root) + "] choices: " + repr(choices)) -def choosePathExist( choices , default=None): - for c in choices: - if c != None and os.path.exists( c ): - return c - return default - def filterExists(paths): return filter(os.path.exists, paths) @@ -497,7 +472,7 @@ if "darwin" == os.sys.platform: if force64: env.Append( EXTRACPPPATH=["/usr/64/include"] ) env.Append( EXTRALIBPATH=["/usr/64/lib"] ) - if installDir == DEFAULT_INSTALL_DIR and not distBuild: + if installDir == DEFAULT_INSTALL_DIR: installDir = "/usr/64/" else: env.Append( EXTRACPPPATH=filterExists(["/sw/include" , "/opt/local/include"]) ) @@ -547,8 +522,8 @@ elif os.sys.platform.startswith( "openbsd" ): elif "win32" == os.sys.platform: windows = True - #if force64: - # release = True + + env['DIST_ARCHIVE_SUFFIX'] = '.zip' if has_option( "win2008plus" ): env.Append( CPPDEFINES=[ "MONGO_USE_SRW_ON_WINDOWS" ] ) @@ -690,9 +665,9 @@ if nix: if not has_option('clang'): env.Append( CPPFLAGS=" -fno-builtin-memcmp " ) # glibc's memcmp is faster than gcc's - env.Append( CPPDEFINES="_FILE_OFFSET_BITS=64" ) - env.Append( CXXFLAGS=" -Wnon-virtual-dtor -Woverloaded-virtual" ) - env.Append( LINKFLAGS=" -fPIC -pthread -rdynamic" ) + env.Append( CPPDEFINES=["_FILE_OFFSET_BITS=64"] ) + env.Append( CXXFLAGS=["-Wnon-virtual-dtor", "-Woverloaded-virtual"] ) + env.Append( LINKFLAGS=["-fPIC", "-pthread", "-rdynamic"] ) env.Append( LIBS=[] ) #make scons colorgcc friendly @@ -811,12 +786,6 @@ env['MONGO_MODULE_FILES'] = moduleFiles # --- check system --- -def getSysInfo(): - if windows: - return "windows " + str( sys.getwindowsversion() ) - else: - return " ".join( os.uname() ) - def doConfigure( myenv , shell=False ): conf = Configure(myenv) myenv["LINKFLAGS_CLEAN"] = list( myenv["LINKFLAGS"] ) @@ -1067,37 +1036,23 @@ def getCodeVersion(): return None return allMatches[0] -if getCodeVersion() == None: +mongoCodeVersion = getCodeVersion() +if mongoCodeVersion == None: Exit(-1) -def getDistName( sofar ): - global distName - global dontReplacePackage - - if distName is not None: - return distName - - if str( COMMAND_LINE_TARGETS[0] ) == "s3dist": - version = getCodeVersion() - if not version.endswith( "+" ) and not version.endswith("-"): - print( "got real code version, doing release build for: " + version ) - dontReplacePackage = True - distName = version - return version - +if has_option('distname'): + distName = GetOption( "distname" ) +elif mongoCodeVersion[-1] not in ("+", "-"): + dontReplacePackage = True + distName = mongoCodeVersion +else: + isBuildingLatest = True + distName = utils.getGitBranchString("" , "-") + datetime.date.today().strftime("%Y-%m-%d") - return utils.getGitBranchString( "" , "-" ) + today.strftime( "%Y-%m-%d" ) +env['SERVER_DIST_BASENAME'] = 'mongodb-%s-%s' % (getSystemInstallName(), distName) -if distBuild: - if isDriverBuild(): - installDir = GetOption( "prefix" ) - else: - from datetime import date - today = date.today() - installDir = "#mongodb-" + getSystemInstallName() + "-" - installDir += getDistName( installDir ) - print "going to make dist: " + installDir[1:] +distFile = "${SERVER_ARCHIVE}" env['NIX_LIB_DIR'] = nixLibPrefix env['INSTALL_DIR'] = installDir @@ -1132,12 +1087,12 @@ env.AlwaysBuild( "push" ) # ---- deploying --- -def s3push( localName , remoteName=None , remotePrefix=None , fixName=True , platformDir=True ): - +def s3push( localName , remoteName=None , remotePrefix=None , fixName=True , platformDir=True, + isDriverBuild=False ): localName = str( localName ) if remotePrefix is None: - if distName is None: + if isBuildingLatest: remotePrefix = utils.getGitBranchString( "-" ) + "-latest" else: remotePrefix = "-" + distName @@ -1161,8 +1116,8 @@ def s3push( localName , remoteName=None , remotePrefix=None , fixName=True , pla name = name.lower() else: name = remoteName - - if isDriverBuild(): + + if isDriverBuild: name = "cxx-driver/" + name elif platformDir: name = platform + "/" + name @@ -1182,19 +1137,15 @@ env.Alias( "s3shell" , [ "mongo" ] , [ s3shellpush ] ) env.AlwaysBuild( "s3shell" ) def s3dist( env , target , source ): - s3push( distFile , "mongodb" ) + s3push( str(source[0]) , "mongodb" ) -env.Append( TARFLAGS=" -z " ) - -if installDir[-1] != "/": - if windows: - distFile = env.Zip( installDir + ".zip", installDir )[0] - else: - distFile = env.Tar( installDir + '.tgz', installDir )[0] +def s3distclient(env, target, source): + s3push(str(source[0]), "cxx-driver/mongodb") - env.Alias( "dist" , distFile ) - env.Alias( "s3dist" , [ distFile ] , [ s3dist ] ) - env.AlwaysBuild( "s3dist" ) +env.Alias( "dist" , '$SERVER_ARCHIVE' ) +env.Alias( "distclient", "$CLIENT_ARCHIVE") +env.AlwaysBuild(env.Alias( "s3dist" , [ '$SERVER_ARCHIVE' ] , [ s3dist ] )) +env.AlwaysBuild(env.Alias( "s3distclient" , [ '$CLIENT_ARCHIVE' ] , [ s3distclient ] )) # --- an uninstall target --- if len(COMMAND_LINE_TARGETS) > 0 and 'uninstall' in COMMAND_LINE_TARGETS: @@ -1204,6 +1155,15 @@ if len(COMMAND_LINE_TARGETS) > 0 and 'uninstall' in COMMAND_LINE_TARGETS: BUILD_TARGETS.remove("uninstall") BUILD_TARGETS.append("install") +clientEnv = env.Clone() +clientEnv['CPPDEFINES'].remove('MONGO_EXPOSE_MACROS') + +if not has_option('use-system-all') and not has_option('use-system-boost'): + clientEnv.Append(LIBS=['boost_thread', 'boost_filesystem', 'boost_system']) + clientEnv.Prepend(LIBPATH=['$BUILD_DIR/third_party/boost/']) + +clientEnv.Prepend(LIBS=['mongoclient'], LIBPATH=['.']) + # The following symbols are exported for use in subordinate SConscript files. # Ideally, the SConscript files would be purely declarative. They would only # import build environment objects, and would contain few or no conditional @@ -1213,15 +1173,17 @@ if len(COMMAND_LINE_TARGETS) > 0 and 'uninstall' in COMMAND_LINE_TARGETS: # conditional decision making that hasn't been moved up to this SConstruct file, # and they are exported here, as well. Export("env") +Export("clientEnv") Export("shellEnv") Export("testEnv") Export("has_option") -Export("installSetup getSysInfo") +Export("installSetup") Export("usesm usev8") Export("darwin windows solaris linux nix") -env.SConscript( 'src/SConscript', variant_dir=variantDir, duplicate=False ) -env.SConscript( 'SConscript.smoke' ) +env.SConscript( 'src/SConscript', variant_dir='$BUILD_DIR', duplicate=False ) +env.SConscript( 'src/SConscript.client', variant_dir='$BUILD_DIR/client_build', duplicate=False ) +env.SConscript( ['SConscript.buildinfo', 'SConscript.smoke'] ) def clean_old_dist_builds(env, target, source): prefix = "mongodb-%s-%s" % (platform, processor) @@ -1237,3 +1199,5 @@ def clean_old_dist_builds(env, target, source): env.Alias("dist_clean", [], [clean_old_dist_builds]) env.AlwaysBuild("dist_clean") + +env.Alias('all', ['core', 'tools', 'clientTests']) diff --git a/buildscripts/build_and_test_client.py b/buildscripts/build_and_test_client.py new file mode 100755 index 00000000000..e95dbb42917 --- /dev/null +++ b/buildscripts/build_and_test_client.py @@ -0,0 +1,59 @@ +#!/usr/bin/python + +'''Script to attempt an isolated build of the C++ driver and its examples. + +Working directory must be the repository root. + +Usage: + +./buildscripts/build_and_test_client.py <mongo client archive file> [optional scons arguments] + +The client is built in a temporary directory, and the sample programs are run against a mongod +instance found in the current working directory. The temporary directory and its contents are +destroyed at the end of execution. +''' + +import os +import shutil +import subprocess +import sys +import tempfile +import tarfile + +import utils + +def main(args): + archive_file = args[1] + scons_args = args[2:] + build_and_test(archive_file, scons_args) + +def build_and_test(archive, scons_args): + work_dir = tempfile.mkdtemp() + try: + extracted_root = extract_archive(work_dir, archive) + run_scons(extracted_root, scons_args) + smoke_client(extracted_root) + finally: + shutil.rmtree(work_dir) + +def extract_archive(work_dir, archive): + tf = tarfile.open(archive, 'r') + tf.extractall(path=work_dir) + return os.path.join( + work_dir, + os.path.dirname([n for n in tf.getnames() if n.endswith('SConstruct')][0]) + ) + +def run_scons(extracted_root, scons_args): + rc = subprocess.call(['scons', '-C', extracted_root, ] + scons_args + ['clientTests']) + if rc is not 0: + sys.exit(rc) + +def smoke_client(extracted_root): + rc = subprocess.call(utils.smoke_command("--test-path", extracted_root, "client")) + if rc is not 0: + sys.exit(rc) + +if __name__ == '__main__': + main(sys.argv) + sys.exit(0) diff --git a/buildscripts/make_archive.py b/buildscripts/make_archive.py new file mode 100755 index 00000000000..c4bd0100491 --- /dev/null +++ b/buildscripts/make_archive.py @@ -0,0 +1,116 @@ +#!/usr/bin/python + +'''Helper script for constructing an archive (zip or tar) from a list of files. + +The output format (tar, tgz, zip) is determined from the file name, unless the user specifies +--format on the command line. + +This script simplifies the specification of filename transformations, so that, e.g., +src/mongo/foo.cpp and build/linux2/normal/buildinfo.cpp can get put into the same +directory in the archive, perhaps mongodb-2.0.2/src/mongo. + +Usage: + +make_archive.py -o <output-file> [--format (tar|tgz|zip)] \ + [--transform match1=replacement1 [--transform match2=replacement2 [...]]] \ + <input file 1> [...] + +If the input file names start with "@", the file is expected to contain a list of +whitespace-separated file names to include in the archive. This helps get around the Windows +command line length limit. + +Transformations are processed in command-line order and are short-circuiting. So, if a file matches +match1, it is never compared against match2 or later. Matches are just python startswith() +comparisons. + +For a detailed usage example, see src/SConscript.client or src/mongo/SConscript. +''' + +import optparse +import os +import sys + +def main(argv): + opts = parse_options(argv[1:]) + archive = open_archive_for_write(opts.output_filename, opts.archive_format) + try: + for input_filename in opts.input_filenames: + archive.add(input_filename, arcname=get_preferred_filename(input_filename, + opts.transformations)) + finally: + archive.close() + +def parse_options(args): + parser = optparse.OptionParser() + parser.add_option('-o', dest='output_filename', default=None, + help='Name of the archive to output.', metavar='FILE') + parser.add_option('--format', dest='archive_format', default=None, + choices=('zip', 'tar', 'tgz'), + help='Format of archive to create. ' + 'If omitted, use the suffix of the output filename to decide.') + parser.add_option('--transform', action='append', dest='transformations', default=[]) + + (opts, input_filenames) = parser.parse_args(args) + opts.input_filenames = [] + + for input_filename in input_filenames: + if input_filename.startswith('@'): + opts.input_filenames.extend(line.strip() for line in open(input_filename[1:], 'r')) + else: + opts.input_filenames.append(input_filename) + + if opts.output_filename is None: + parser.error('-o switch is required') + + if opts.archive_format is None: + if opts.output_filename.endswith('.zip'): + opts.archive_format = 'zip' + elif opts.output_filename.endswith('tar.gz') or opts.output_filename.endswith('.tgz'): + opts.archive_format = 'tgz' + elif opts.output_filename.endswith('.tar'): + opts.archive_format = 'tar' + else: + parser.error('Could not deduce archive format from output filename "%s"' % + opts.output_filename) + + try: + opts.transformations = [ + xform.replace(os.path.altsep or os.path.sep, os.path.sep).split('=', 1) + for xform in opts.transformations] + except Exception, e: + parser.error(e) + + return opts + +def open_archive_for_write(filename, archive_format): + '''Open a tar or zip archive for write, with the given format, and return it. + + The type of archive is determined by the "archive_format" parameter, which should be + "tar", "tgz" (for gzipped tar) or "zip". + ''' + + if archive_format in ('tar', 'tgz'): + import tarfile + mode = 'w' + if archive_format is 'tgz': + mode += '|gz' + return tarfile.open(filename, mode) + if archive_format is 'zip': + import zipfile + # Infuriatingly, Zipfile calls the "add" method "write", but they're otherwise identical, + # for our purposes. WrappedZipFile is a minimal adapter class. + class WrappedZipFile(zipfile.ZipFile): + def add(self, filename, arcname): + return self.write(filename, arcname) + return WrappedZipFile(filename, 'w', zipfile.ZIP_DEFLATED) + raise ValueError('Unsupported archive format "%s"' % archive_format) + +def get_preferred_filename(input_filename, transformations): + for match, replace in transformations: + if input_filename.startswith(match): + return replace + input_filename[len(match):] + return input_filename + +if __name__ == '__main__': + main(sys.argv) + sys.exit(0) diff --git a/distsrc/client/SConstruct b/distsrc/client/SConstruct index 503b71ccbc7..5b40f1a00aa 100755 --- a/distsrc/client/SConstruct +++ b/distsrc/client/SConstruct @@ -1,102 +1,85 @@ +# -*- mode: python -*- + # scons file for MongoDB c++ client library and examples import os - -# options -AddOption( "--extrapath", - dest="extrapath", - type="string", - nargs=1, - action="store", - help="comma separated list of add'l paths (--extrapath /opt/foo/,/foo) static linking" ) - -AddOption( "--prefix", - dest="prefix", - type="string", - nargs=1, - action="store", - default="/usr/local", - help="installation root" ) - - -env = Environment( MSVS_ARCH=None ) - -def addExtraLibs( s ): +import sys + +# options +AddOption("--extrapath", + dest="extrapath", + type="string", + nargs=1, + action="store", + help="comma separated list of add'l paths (--extrapath /opt/foo/,/foo) static linking") + +AddOption("--prefix", + dest="prefix", + type="string", + nargs=1, + action="store", + default="/usr/local", + help="installation root") + + +env = Environment(BUILD_DIR='#build', + CLIENT_ARCHIVE='${CLIENT_DIST_BASENAME}${DIST_ARCHIVE_SUFFIX}', + CLIENT_DIST_BASENAME='mongo-cxx-driver', + CLIENT_LICENSE='#LICENSE.txt', + CLIENT_SCONSTRUCT='#SConstruct', + MSVS_ARCH=None, + PYTHON=sys.executable) + +def addExtraLibs(s): for x in s.split(","): - if os.path.exists( x ): - env.Append( CPPPATH=[ x + "/include" ] ) - env.Append( LIBPATH=[ x + "/lib" ] ) - env.Append( LIBPATH=[ x + "/lib64" ] ) + if os.path.exists(x): + env.Append(CPPPATH=[x + "/include", x], + LIBPATH=[x + "/lib", x + "/lib64"]) if GetOption( "extrapath" ) is not None: addExtraLibs( GetOption( "extrapath" ) ) -env.Append( CPPPATH=[ "mongo/" ] ) - -env.Append( CPPDEFINES=[ "_SCONS" , "MONGO_EXPOSE_MACROS" ] ) +env.Prepend(CPPPATH=["$BUILD_DIR", "$BUILD_DIR/mongo"]) +env.Append(CPPDEFINES=[ "_SCONS", "MONGO_EXPOSE_MACROS" ]) nix = False linux = False -if "darwin" == os.sys.platform: + +if "darwin" == sys.platform: addExtraLibs( "/opt/local/" ) nix = True -elif "linux2" == os.sys.platform or "linux3" == os.sys.platform: +elif sys.platform in ("linux2", "linux3"): nix = True linux = True +if sys.platform is 'win32': + env['DIST_ARCHIVE_SUFFIX'] = '.zip' +else: + env['DIST_ARCHIVE_SUFFIX'] = '.tgz' + if nix: - env.Append( CPPFLAGS=" -O3" ) - env.Append( LIBS=["pthread"] ) + env.Append(CCFLAGS=["-O3", "-pthread"]) if linux: - env.Append( LINKFLAGS=" -Wl,--as-needed -Wl,-zdefs " ) + env.Append(LINKFLAGS=["-Wl,--as-needed", "-Wl,-zdefs"]) -boostLibs = [ "thread" , "filesystem" , "system", "thread" ] +boostLibs = ["thread", "filesystem", "system"] conf = Configure(env) for lib in boostLibs: - if not conf.CheckLib("boost_%s-mt" % lib): - conf.CheckLib("boost_%s" % lib) + if not conf.CheckLib(["boost_%s-mt" % lib, "boost_%s" % lib], + language="C++"): + Exit(1) +conf.Finish() -dirs = [ "" , "bson/" , "bson/util/" , - "client/" , "s/" , "shell/" , - "db/" , - "scripting/" , - "util/" , "util/concurrency/" , "util/mongoutils/" , "util/net/" ] +clientEnv = env.Clone() +clientEnv['CPPDEFINES'].remove('MONGO_EXPOSE_MACROS') +clientEnv.Prepend(LIBS=['mongoclient'], LIBPATH=['.']) -allClientFiles = [] -for x in dirs: - allClientFiles += Glob( "mongo/" + x + "*.cpp" ) -allClientFiles += Glob( "mongo/util/*.c" ) +Export("env clientEnv") +env.SConscript('src/SConscript.client', variant_dir='$BUILD_DIR', duplicate=False) -libs = env.Library( "mongoclient" , allClientFiles ) +env.Default('${LIBPREFIX}mongoclient${LIBSUFFIX}') -# install - -prefix = GetOption( "prefix" ) - -for x in libs: - env.Install( prefix + "/lib/" , str(x) ) - -for x in dirs: - x = "mongo/" + x - env.Install( prefix + "/include/" + x , Glob( x + "*.h" ) ) - -env.Alias( "install" , prefix ) - -# example setup - -clientTests = [] -clientEnv = env.Clone(); -clientEnv.Prepend( LIBS=["mongoclient"] ) -clientEnv.Prepend( LIBPATH=["."] ) - -# examples - -clientTests += [ clientEnv.Program( "firstExample" , [ "client/examples/first.cpp" ] ) ] -clientTests += [ clientEnv.Program( "secondExample" , [ "client/examples/second.cpp" ] ) ] -clientTests += [ clientEnv.Program( "whereExample" , [ "client/examples/whereExample.cpp" ] ) ] -clientTests += [ clientEnv.Program( "authTest" , [ "client/examples/authTest.cpp" ] ) ] -clientTests += [ clientEnv.Program( "httpClientTest" , [ "client/examples/httpClientTest.cpp" ] ) ] -clientTests += [ clientEnv.Program( "clientTest" , [ "client/examples/clientTest.cpp" ] ) ] -clientEnv.Alias("clientTests", clientTests, []) +# install +env.Alias("install", GetOption('prefix')) diff --git a/src/SConscript.client b/src/SConscript.client new file mode 100644 index 00000000000..cb62622808a --- /dev/null +++ b/src/SConscript.client @@ -0,0 +1,133 @@ +# -*- mode: python -*- + +# This SConscript describes build and install rules for the Mongo C++ driver and associated exmaple +# programs. + +Import('env clientEnv') + +clientSource = [ + 'mongo/bson/oid.cpp', + 'mongo/buildinfo.cpp', + 'mongo/client/clientAndShell.cpp', + 'mongo/client/clientOnly.cpp', + 'mongo/client/connpool.cpp', + 'mongo/client/dbclient.cpp', + 'mongo/client/dbclient_rs.cpp', + 'mongo/client/dbclientcursor.cpp', + 'mongo/client/distlock.cpp', + 'mongo/client/gridfs.cpp', + 'mongo/client/model.cpp', + 'mongo/client/syncclusterconnection.cpp', + 'mongo/db/jsobj.cpp', + 'mongo/db/json.cpp', + 'mongo/db/lasterror.cpp', + 'mongo/db/namespace.cpp', + 'mongo/db/nonce.cpp', + 'mongo/pch.cpp', + 'mongo/util/assert_util.cpp', + 'mongo/util/background.cpp', + 'mongo/util/base64.cpp', + 'mongo/util/concurrency/rwlockimpl.cpp', + 'mongo/util/concurrency/spin_lock.cpp', + 'mongo/util/concurrency/synchronization.cpp', + 'mongo/util/concurrency/task.cpp', + 'mongo/util/concurrency/thread_pool.cpp', + 'mongo/util/concurrency/vars.cpp', + 'mongo/util/debug_util.cpp', + 'mongo/util/file_allocator.cpp', + 'mongo/util/histogram.cpp', + 'mongo/util/intrusive_counter.cpp', + 'mongo/util/log.cpp', + 'mongo/util/md5.cpp', + 'mongo/util/md5main.cpp', + 'mongo/util/net/httpclient.cpp', + 'mongo/util/net/listen.cpp', + 'mongo/util/net/message.cpp', + 'mongo/util/net/message_port.cpp', + 'mongo/util/net/sock.cpp', + 'mongo/util/password.cpp', + 'mongo/util/ramlog.cpp', + 'mongo/util/signal_handlers.cpp', + 'mongo/util/stringutils.cpp', + 'mongo/util/text.cpp', + 'mongo/util/trace.cpp', + 'mongo/util/util.cpp', + ] + +exampleSourceMap = [ + ('firstExample', 'mongo/client/examples/first.cpp'), + ('rsExample', 'mongo/client/examples/rs.cpp'), + ('secondExample', 'mongo/client/examples/second.cpp'), + ('whereExample', 'mongo/client/examples/whereExample.cpp'), + ('authTest', 'mongo/client/examples/authTest.cpp'), + ('httpClientTest', 'mongo/client/examples/httpClientTest.cpp'), + ('bsondemo', 'mongo/bson/bsondemo/bsondemo.cpp'), + ('clientTest', 'mongo/client/examples/clientTest.cpp'), + ] + +clientHeaders = [] +for id in ["", + "util/", + "util/net/", + "util/mongoutils/", + "util/concurrency/", + "db/", + "db/stats/", + "db/repl/", + "db/ops/", + "client/", + "bson/", + "bson/util/", + "s/", + "scripting/"]: + clientHeaders.extend(Glob('mongo/%s/*.h' % id)) + clientHeaders.extend(Glob('mongo/%s/*.hpp' % id)) + +env.Install('#/', [ + env.Library('mongoclient', clientSource), + #env.SharedLibrary('mongoclient', clientSource), + ]) + +clientTests = clientEnv.Install('#/', [ + clientEnv.Program(target, [source]) for (target, source) in exampleSourceMap]) + +clientEnv.Alias('clientTests', clientTests, []) + +env.Install( + '#/', + env.Command('$CLIENT_ARCHIVE', + ['#buildscripts/make_archive.py', + '$CLIENT_SCONSTRUCT', + '$CLIENT_LICENSE', + 'SConscript.client', + '#buildscripts/make_archive.py', + clientSource, + clientHeaders, + [source for (target, source) in exampleSourceMap]], + '${PYTHON} ${SOURCES[0]} -o $TARGET ' + '--transform ${str(Dir(BUILD_DIR))}/client_build=$CLIENT_DIST_BASENAME/src ' + '--transform ${str(Dir(BUILD_DIR))}=$CLIENT_DIST_BASENAME/src ' + '--transform distsrc/client=$CLIENT_DIST_BASENAME ' + '--transform =$CLIENT_DIST_BASENAME/ ' + '${TEMPFILE(SOURCES[1:])}')) + +# install +prefix = GetOption("prefix") + +env.Install(prefix + "/lib", '${LIBPREFIX}mongoclient${LIBSUFFIX}') + +for x in ["", + "bson/", + "bson/util/", + "client/", + "s/", + "shell/", + "db/", + "scripting/", + "util/", + "util/concurrency/", + "util/mongoutils/", + "util/net/" ]: + env.Install(prefix + "/include/mongo/" + x, + [Glob('mongo/%s*.h' % x), Glob('mongo/%s*.hpp' % x)]) + diff --git a/src/mongo/SConscript b/src/mongo/SConscript index 6fc891d48b8..ae484fbf077 100644 --- a/src/mongo/SConscript +++ b/src/mongo/SConscript @@ -10,42 +10,21 @@ Import("shellEnv") Import("testEnv") Import("has_option") Import("usesm usev8") -Import("installSetup getSysInfo") +Import("installSetup") Import("darwin windows solaris linux nix") def add_exe( v ): return "${PROGPREFIX}%s${PROGSUFFIX}" % v -def setupBuildInfoFile( env, target, source, **kw ): - version = utils.getGitVersion() - sysInfo = getSysInfo() - contents = '\n'.join([ - '#include "pch.h"', - '#include <iostream>', - '#include <boost/version.hpp>', - 'namespace mongo { const char * gitVersion(){ return "' + version + '"; } }', - 'namespace mongo { string sysInfo(){ return "' + sysInfo + ' BOOST_LIB_VERSION=" BOOST_LIB_VERSION ; } }', - ]) - - contents += '\n\n'; - - out = open( str( target[0] ) , 'wb' ) - try: - out.write( contents ) - finally: - out.close() - -env.AlwaysBuild( env.Command( 'buildinfo.cpp', [], setupBuildInfoFile ) ) - # ------ SOURCE FILE SETUP ----------- commonFiles = [ "pch.cpp", "buildinfo.cpp", - "db/indexkey.cpp", "db/jsobj.cpp", "bson/oid.cpp", "db/json.cpp", "db/lasterror.cpp", + "db/namespace.cpp", "db/nonce.cpp", "db/queryutil.cpp", "db/querypattern.cpp", @@ -111,6 +90,7 @@ coreServerFiles = [ "util/version.cpp", "db/commands/cloud.cpp", "db/dbmessage.cpp", "db/commands/pipeline.cpp", + "db/indexkey.cpp", "db/pipeline/accumulator.cpp", "db/pipeline/accumulator_add_to_set.cpp", "db/pipeline/accumulator_avg.cpp", @@ -156,14 +136,6 @@ coreServerFiles = [ "util/version.cpp", if "win32" == os.sys.platform: coreServerFiles.append( "util/ntservice.cpp" ) -clientFiles = commonFiles + [ - 'client/clientAndShell.cpp', - 'client/clientOnly.cpp', - 'client/gridfs.cpp', - 'db/commands.cpp', -] - - if usesm: coreServerFiles.append( "scripting/engine_spidermonkey.cpp" ) elif usev8: @@ -243,7 +215,7 @@ serverOnlyFiles = [ "db/curop.cpp", "db/repl_block.cpp", "db/btreecursor.cpp", "db/cloner.cpp", - "db/namespace.cpp", + "db/namespace_details.cpp", "db/cap.cpp", "db/matcher_covered.cpp", "db/dbeval.cpp", @@ -375,31 +347,10 @@ env.Install( '#/', mongos ) env.Library("clientandshell", "client/clientAndShell.cpp", LIBDEPS=["mongocommon", "coredb", "defaultversion", "gridfs"]) env.Library("allclient", "client/clientOnly.cpp", LIBDEPS=["clientandshell"]) -# c++ library -clientLib = env.MergeLibrary( "mongoclient", ["allclient"] ) -env.Install( '#/', clientLib ) -clientLibName = str( clientLib[0] ) + if has_option( "sharedclient" ): sharedClientLibName = str( env.SharedLibrary( "mongoclient", [], LIBDEPS=["allclient"], _LIBDEPS='$_LIBDEPS_OBJS' )[0] ) -clientEnv = env.Clone(); -clientEnv.Append( CPPPATH=["../"] ) -clientEnv.Prepend( LIBS=[ clientLib ] ) -clientEnv.Prepend( LIBPATH=["."] ) -clientEnv["CPPDEFINES"].remove( "MONGO_EXPOSE_MACROS" ) -l = clientEnv[ "LIBS" ] - -# examples -clientTests = [ - clientEnv.Program( "firstExample", [ "client/examples/first.cpp" ] ), - clientEnv.Program( "rsExample", [ "client/examples/rs.cpp" ] ), - clientEnv.Program( "secondExample", [ "client/examples/second.cpp" ] ), - clientEnv.Program( "whereExample", [ "client/examples/whereExample.cpp" ] ), - clientEnv.Program( "authTest", [ "client/examples/authTest.cpp" ] ), - clientEnv.Program( "httpClientTest", [ "client/examples/httpClientTest.cpp" ] ), - clientEnv.Program( "bsondemo", [ "bson/bsondemo/bsondemo.cpp" ] ), - ] - # dbtests test binary env.StaticLibrary('testframework', ['dbtests/framework.cpp']) @@ -412,13 +363,10 @@ if len(testEnv.subst('$PROGSUFFIX')): testEnv.Alias( "test", "#/${PROGPREFIX}test${PROGSUFFIX}" ) env.Install( '#/', testEnv.Program( "perftest", [ "dbtests/perf/perftest.cpp" ], LIBDEPS=["serveronly", "coreserver", "coredb", "testframework" ] ) ) -clientTests += [ clientEnv.Program( "clientTest", [ "client/examples/clientTest.cpp" ] ) ] - -env.Install( '#/', clientTests ) # --- sniffer --- mongosniff_built = False -if darwin or clientEnv["_HAVEPCAP"]: +if darwin or env["_HAVEPCAP"]: mongosniff_built = True sniffEnv = env.Clone() sniffEnv.Append( CPPDEFINES="MONGO_EXPOSE_MACROS" ) @@ -474,36 +422,36 @@ def checkGlibc(target,source,env): print( "************* " + str( target[0] ) + " has GLIBC_2.4 dependencies!" ) Exit(-3) -allBinaries = [] +distBinaries = [] def installBinary( e, name ): - if not installSetup.binaries: - return - - global allBinaries + global distBinaries name = add_exe( name ) - inst = e.Install( "$INSTALL_DIR/bin", name ) + if (not has_option( "no-glibc-check" ) and linux and "s3dist" in COMMAND_LINE_TARGETS): + e.AddPostAction( name, checkGlibc ) - allBinaries += [ name ] if (solaris or linux) and (not has_option("nostrip")): - e.AddPostAction( inst, 'strip $TARGET' ) + name = e.Command('stripped/%s' % name, name, 'strip -o $TARGET $SOURCE')[0] - if not has_option( "no-glibc-check" ) and linux and len( COMMAND_LINE_TARGETS ) == 1 and str( COMMAND_LINE_TARGETS[0] ) == "s3dist": - e.AddPostAction( inst, checkGlibc ) + distBinaries.append(name) + + if not installSetup.binaries: + return + + inst = e.Install( "$INSTALL_DIR/bin", name ) if nix: e.AddPostAction( inst, 'chmod 755 $TARGET' ) -for x in normalTools: - installBinary( env, "mongo" + x ) -installBinary( env, "mongofiles" ) -installBinary( env, "bsondump" ) -installBinary( env, "mongoperf" ) +for t in ["mongo" + x for x in normalTools] + ["mongofiles", "bsondump", "mongoperf"]: + installBinary( env, t ) + env.Alias("tools", '#/' + add_exe(t)) if mongosniff_built: installBinary(env, "mongosniff") + env.Alias("tools", '#/' + add_exe("mongosniff")) installBinary( env, "mongod" ) installBinary( env, "mongos" ) @@ -511,61 +459,39 @@ installBinary( env, "mongos" ) if shellEnv is not None: installBinary( env, "mongo" ) -env.Alias( "all", ['#/%s' % b for b in allBinaries ] ) env.Alias( "core", [ '#/%s' % b for b in [ add_exe( "mongo" ), add_exe( "mongod" ), add_exe( "mongos" ) ] ] ) #headers if installSetup.headers: for id in [ "", "util/", "util/net/", "util/mongoutils/", "util/concurrency/", "db/", "db/stats/", "db/repl/", "db/ops/", "client/", "bson/", "bson/util/", "s/", "scripting/" ]: - env.Install( "$INSTALL_DIR/" + installSetup.headerRoot + "/" + id, Glob( id + "*.h" ) ) - env.Install( "$INSTALL_DIR/" + installSetup.headerRoot + "/" + id, Glob( id + "*.hpp" ) ) - -if installSetup.clientSrc: - for x in clientFiles: - if isinstance(x, basestring): - x = [x] - for path in x: - path = str(path) - dirname, filename = os.path.split(path) - env.Install( "$INSTALL_DIR/mongo/" + dirname , path ) + env.Install( "$INSTALL_DIR/include/" + id, Glob( id + "*.h" ) ) + env.Install( "$INSTALL_DIR/include/" + id, Glob( id + "*.hpp" ) ) #lib if installSetup.libraries: - env.Install( "$INSTALL_DIR/$NIX_LIB_DIR", clientLibName ) + env.Install('$INSTALL_DIR/$NIX_LIB_DIR', '#${LIBPREFIX}mongoclient${LIBSUFFIX}') if has_option( "sharedclient" ): - env.Install( "$INSTALL_DIR/$NIX_LIB_DIR", sharedClientLibName ) - -#textfiles -env.Install( "$INSTALL_DIR", installSetup.bannerFiles ) - -if installSetup.clientTestsDir: - env.Install( '$INSTALL_DIR/client/', installSetup.clientTestsDir ) + env.Install( "$INSTALL_DIR/$NIX_LIB_DIR", '#${SHLIBPREFIX}mongoclient${SHLIBSUFFIX}') + +env.Command( + '#/${SERVER_ARCHIVE}', + ['#buildscripts/make_archive.py', + '#distsrc/GNU-AGPL-3.0', + '#distsrc/README', + '#distsrc/THIRD-PARTY-NOTICES', + distBinaries], + '$PYTHON ${SOURCES[0]} -o $TARGET ' + '--transform distsrc=$SERVER_DIST_BASENAME ' + '--transform ${str(Dir(BUILD_DIR))}/mongo/stripped=$SERVER_DIST_BASENAME/bin ' + '--transform ${str(Dir(BUILD_DIR))}/mongo=$SERVER_DIST_BASENAME/bin ' + '${TEMPFILE(SOURCES[1:])}') #final alias env.Alias( "install", "$INSTALL_DIR" ) -# aliases -env.Alias( "mongoclient", '#/%s' % ( has_option( "sharedclient" ) and sharedClientLibName or clientLibName ) ) - -# client dist -def build_and_test_client(env, target, source): - import subprocess - - installDir = env.subst('$INSTALL_DIR', target=target, source=source) - installDir = env.GetBuildPath(installDir) - if GetOption("extrapath") is not None: - scons_command = ["scons", "--extrapath=" + GetOption("extrapath")] - else: - scons_command = ["scons"] - - exit_code = subprocess.call(scons_command + ["clientTests"], cwd=installDir) - if exit_code: - return exit_code - - smoke_cmd = utils.smoke_command("--test-path", installDir, "client") - exit_code = subprocess.call(smoke_cmd) - if exit_code: - return exit_code - -env.Alias("clientBuild", [mongod, '$INSTALL_DIR'], [build_and_test_client]) +env.Alias("clientBuild", ['#buildscripts/build_and_test_client.py', + '#/${PROGPREFIX}mongod${PROGSUFFIX}', + '#$CLIENT_ARCHIVE'], + '$PYTHON ${SOURCES[0]} ${SOURCES[2]}' + ) env.AlwaysBuild("clientBuild") diff --git a/src/mongo/client/dbclient.cpp b/src/mongo/client/dbclient.cpp index 14fc7aaddd9..48e0edbcd0a 100644 --- a/src/mongo/client/dbclient.cpp +++ b/src/mongo/client/dbclient.cpp @@ -15,18 +15,26 @@ * limitations under the License. */ -#include "pch.h" -#include "dbclient.h" -#include "../bson/util/builder.h" -#include "../db/jsobj.h" -#include "../db/json.h" -#include "../db/instance.h" -#include "../util/md5.hpp" -#include "../db/dbmessage.h" -#include "../db/cmdline.h" -#include "connpool.h" -#include "../s/util.h" -#include "syncclusterconnection.h" +#include "mongo/pch.h" + +#include "mongo/client/dbclient.h" + +#include "mongo/bson/util/builder.h" +#include "mongo/client/constants.h" +#include "mongo/client/dbclient_rs.h" +#include "mongo/client/dbclientcursor.h" +#include "mongo/client/syncclusterconnection.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/json.h" +#include "mongo/db/namespace-inl.h" +#include "mongo/db/namespacestring.h" +#include "mongo/s/util.h" +#include "mongo/util/md5.hpp" + +#ifdef MONGO_SSL +// TODO: Remove references to cmdline from the client. +#include "mongo/db/cmdline.h" +#endif // defined MONGO_SSL namespace mongo { @@ -148,6 +156,12 @@ namespace mongo { } + Query::Query( const string &json ) : obj( fromjson( json ) ) {} + + Query::Query( const char *json ) : obj( fromjson( json ) ) {} + + Query& Query::hint(const string &jsonKeyPatt) { return hint( fromjson( jsonKeyPatt ) ); } + Query& Query::where(const string &jscode, BSONObj scope) { /* use where() before sort() and hint() and explain(), else this will assert. */ assert( ! isComplex() ); diff --git a/src/mongo/client/dbclient.h b/src/mongo/client/dbclient.h index d2ad1ad0580..698f091874b 100644 --- a/src/mongo/client/dbclient.h +++ b/src/mongo/client/dbclient.h @@ -1,6 +1,7 @@ /** @file dbclient.h - Core MongoDB C++ driver interfaces are defined here. + Include this file when writing client C++ applications, to get access to the + mongod C++ driver. */ /* Copyright 2009 10gen Inc. @@ -20,1044 +21,10 @@ #pragma once -#include "../pch.h" -#include "../util/net/message.h" -#include "../util/net/message_port.h" -#include "../db/jsobj.h" -#include "../db/json.h" -#include "../db/security.h" -#include <stack> +#include "mongo/client/redef_macros.h" -namespace mongo { +#include "mongo/client/dbclient_rs.h" +#include "mongo/client/dbclientcursor.h" +#include "mongo/client/dbclientinterface.h" - /** the query field 'options' can have these bits set: */ - enum QueryOptions { - /** Tailable means cursor is not closed when the last data is retrieved. rather, the cursor marks - the final object's position. you can resume using the cursor later, from where it was located, - if more data were received. Set on dbQuery and dbGetMore. - - like any "latent cursor", the cursor may become invalid at some point -- for example if that - final object it references were deleted. Thus, you should be prepared to requery if you get back - ResultFlag_CursorNotFound. - */ - QueryOption_CursorTailable = 1 << 1, - - /** allow query of replica slave. normally these return an error except for namespace "local". - */ - QueryOption_SlaveOk = 1 << 2, - - // findingStart mode is used to find the first operation of interest when - // we are scanning through a repl log. For efficiency in the common case, - // where the first operation of interest is closer to the tail than the head, - // we start from the tail of the log and work backwards until we find the - // first operation of interest. Then we scan forward from that first operation, - // actually returning results to the client. During the findingStart phase, - // we release the db mutex occasionally to avoid blocking the db process for - // an extended period of time. - QueryOption_OplogReplay = 1 << 3, - - /** The server normally times out idle cursors after an inactivy period to prevent excess memory uses - Set this option to prevent that. - */ - QueryOption_NoCursorTimeout = 1 << 4, - - /** Use with QueryOption_CursorTailable. If we are at the end of the data, block for a while rather - than returning no data. After a timeout period, we do return as normal. - */ - QueryOption_AwaitData = 1 << 5, - - /** Stream the data down full blast in multiple "more" packages, on the assumption that the client - will fully read all data queried. Faster when you are pulling a lot of data and know you want to - pull it all down. Note: it is not allowed to not read all the data unless you close the connection. - - Use the query( boost::function<void(const BSONObj&)> f, ... ) version of the connection's query() - method, and it will take care of all the details for you. - */ - QueryOption_Exhaust = 1 << 6, - - /** When sharded, this means its ok to return partial results - Usually we will fail a query if all required shards aren't up - If this is set, it'll be a partial result set - */ - QueryOption_PartialResults = 1 << 7 , - - QueryOption_AllSupported = QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | QueryOption_NoCursorTimeout | QueryOption_AwaitData | QueryOption_Exhaust | QueryOption_PartialResults - - }; - - enum UpdateOptions { - /** Upsert - that is, insert the item if no matching item is found. */ - UpdateOption_Upsert = 1 << 0, - - /** Update multiple documents (if multiple documents match query expression). - (Default is update a single document and stop.) */ - UpdateOption_Multi = 1 << 1, - - /** flag from mongo saying this update went everywhere */ - UpdateOption_Broadcast = 1 << 2 - }; - - enum RemoveOptions { - /** only delete one option */ - RemoveOption_JustOne = 1 << 0, - - /** flag from mongo saying this update went everywhere */ - RemoveOption_Broadcast = 1 << 1 - }; - - - /** - * need to put in DbMesssage::ReservedOptions as well - */ - enum InsertOptions { - /** With muli-insert keep processing inserts if one fails */ - InsertOption_ContinueOnError = 1 << 0 - }; - - class DBClientBase; - - /** - * ConnectionString handles parsing different ways to connect to mongo and determining method - * samples: - * server - * server:port - * foo/server:port,server:port SET - * server,server,server SYNC - * - * tyipcal use - * string errmsg, - * ConnectionString cs = ConnectionString::parse( url , errmsg ); - * if ( ! cs.isValid() ) throw "bad: " + errmsg; - * DBClientBase * conn = cs.connect( errmsg ); - */ - class ConnectionString { - public: - enum ConnectionType { INVALID , MASTER , PAIR , SET , SYNC }; - - ConnectionString() { - _type = INVALID; - } - - ConnectionString( const HostAndPort& server ) { - _type = MASTER; - _servers.push_back( server ); - _finishInit(); - } - - ConnectionString( ConnectionType type , const string& s , const string& setName = "" ) { - _type = type; - _setName = setName; - _fillServers( s ); - - switch ( _type ) { - case MASTER: - assert( _servers.size() == 1 ); - break; - case SET: - assert( _setName.size() ); - assert( _servers.size() >= 1 ); // 1 is ok since we can derive - break; - case PAIR: - assert( _servers.size() == 2 ); - break; - default: - assert( _servers.size() > 0 ); - } - - _finishInit(); - } - - ConnectionString( const string& s , ConnectionType favoredMultipleType ) { - _type = INVALID; - - _fillServers( s ); - if ( _type != INVALID ) { - // set already - } - else if ( _servers.size() == 1 ) { - _type = MASTER; - } - else { - _type = favoredMultipleType; - assert( _type == SET || _type == SYNC ); - } - _finishInit(); - } - - bool isValid() const { return _type != INVALID; } - - string toString() const { return _string; } - - DBClientBase* connect( string& errmsg, double socketTimeout = 0 ) const; - - string getSetName() const { return _setName; } - - vector<HostAndPort> getServers() const { return _servers; } - - ConnectionType type() const { return _type; } - - static ConnectionString parse( const string& url , string& errmsg ); - - static string typeToString( ConnectionType type ); - - private: - - void _fillServers( string s ); - void _finishInit(); - - ConnectionType _type; - vector<HostAndPort> _servers; - string _string; - string _setName; - }; - - /** - * controls how much a clients cares about writes - * default is NORMAL - */ - enum WriteConcern { - W_NONE = 0 , // TODO: not every connection type fully supports this - W_NORMAL = 1 - // TODO SAFE = 2 - }; - - class BSONObj; - class ScopedDbConnection; - class DBClientCursor; - class DBClientCursorBatchIterator; - - /** Represents a Mongo query expression. Typically one uses the QUERY(...) macro to construct a Query object. - Examples: - QUERY( "age" << 33 << "school" << "UCLA" ).sort("name") - QUERY( "age" << GT << 30 << LT << 50 ) - */ - class Query { - public: - BSONObj obj; - Query() : obj(BSONObj()) { } - Query(const BSONObj& b) : obj(b) { } - Query(const string &json) : - obj(fromjson(json)) { } - Query(const char * json) : - obj(fromjson(json)) { } - - /** Add a sort (ORDER BY) criteria to the query expression. - @param sortPattern the sort order template. For example to order by name ascending, time descending: - { name : 1, ts : -1 } - i.e. - BSON( "name" << 1 << "ts" << -1 ) - or - fromjson(" name : 1, ts : -1 ") - */ - Query& sort(const BSONObj& sortPattern); - - /** Add a sort (ORDER BY) criteria to the query expression. - This version of sort() assumes you want to sort on a single field. - @param asc = 1 for ascending order - asc = -1 for descending order - */ - Query& sort(const string &field, int asc = 1) { sort( BSON( field << asc ) ); return *this; } - - /** Provide a hint to the query. - @param keyPattern Key pattern for the index to use. - Example: - hint("{ts:1}") - */ - Query& hint(BSONObj keyPattern); - Query& hint(const string &jsonKeyPatt) { return hint(fromjson(jsonKeyPatt)); } - - /** Provide min and/or max index limits for the query. - min <= x < max - */ - Query& minKey(const BSONObj &val); - /** - max is exclusive - */ - Query& maxKey(const BSONObj &val); - - /** Return explain information about execution of this query instead of the actual query results. - Normally it is easier to use the mongo shell to run db.find(...).explain(). - */ - Query& explain(); - - /** Use snapshot mode for the query. Snapshot mode assures no duplicates are returned, or objects missed, which were - present at both the start and end of the query's execution (if an object is new during the query, or deleted during - the query, it may or may not be returned, even with snapshot mode). - - Note that short query responses (less than 1MB) are always effectively snapshotted. - - Currently, snapshot mode may not be used with sorting or explicit hints. - */ - Query& snapshot(); - - /** Queries to the Mongo database support a $where parameter option which contains - a javascript function that is evaluated to see whether objects being queried match - its criteria. Use this helper to append such a function to a query object. - Your query may also contain other traditional Mongo query terms. - - @param jscode The javascript function to evaluate against each potential object - match. The function must return true for matched objects. Use the this - variable to inspect the current object. - @param scope SavedContext for the javascript object. List in a BSON object any - variables you would like defined when the jscode executes. One can think - of these as "bind variables". - - Examples: - conn.findOne("test.coll", Query("{a:3}").where("this.b == 2 || this.c == 3")); - Query badBalance = Query().where("this.debits - this.credits < 0"); - */ - Query& where(const string &jscode, BSONObj scope); - Query& where(const string &jscode) { return where(jscode, BSONObj()); } - - /** - * @return true if this query has an orderby, hint, or some other field - */ - bool isComplex( bool * hasDollar = 0 ) const; - - BSONObj getFilter() const; - BSONObj getSort() const; - BSONObj getHint() const; - bool isExplain() const; - - string toString() const; - operator string() const { return toString(); } - private: - void makeComplex(); - template< class T > - void appendComplex( const char *fieldName, const T& val ) { - makeComplex(); - BSONObjBuilder b; - b.appendElements(obj); - b.append(fieldName, val); - obj = b.obj(); - } - }; - - /** - * Represents a full query description, including all options required for the query to be passed on - * to other hosts - */ - class QuerySpec { - - string _ns; - int _ntoskip; - int _ntoreturn; - int _options; - BSONObj _query; - BSONObj _fields; - Query _queryObj; - - public: - - QuerySpec( const string& ns, - const BSONObj& query, const BSONObj& fields, - int ntoskip, int ntoreturn, int options ) - : _ns( ns ), _ntoskip( ntoskip ), _ntoreturn( ntoreturn ), _options( options ), - _query( query.getOwned() ), _fields( fields.getOwned() ) , _queryObj( _query ) { - } - - QuerySpec() {} - - bool isEmpty() const { return _ns.size() == 0; } - - bool isExplain() const { return _queryObj.isExplain(); } - BSONObj filter() const { return _queryObj.getFilter(); } - - BSONObj hint() const { return _queryObj.getHint(); } - BSONObj sort() const { return _queryObj.getSort(); } - BSONObj query() const { return _query; } - BSONObj fields() const { return _fields; } - BSONObj* fieldsData() { return &_fields; } - - // don't love this, but needed downstrem - const BSONObj* fieldsPtr() const { return &_fields; } - - string ns() const { return _ns; } - int ntoskip() const { return _ntoskip; } - int ntoreturn() const { return _ntoreturn; } - int options() const { return _options; } - - void setFields( BSONObj& o ) { _fields = o.getOwned(); } - - string toString() const { - return str::stream() << "QSpec " << - BSON( "ns" << _ns << "n2skip" << _ntoskip << "n2return" << _ntoreturn << "options" << _options - << "query" << _query << "fields" << _fields ); - } - - }; - - - /** Typically one uses the QUERY(...) macro to construct a Query object. - Example: QUERY( "age" << 33 << "school" << "UCLA" ) - */ -#define QUERY(x) mongo::Query( BSON(x) ) - - // Useful utilities for namespaces - /** @return the database name portion of an ns string */ - string nsGetDB( const string &ns ); - - /** @return the collection name portion of an ns string */ - string nsGetCollection( const string &ns ); - - /** - interface that handles communication with the db - */ - class DBConnector { - public: - virtual ~DBConnector() {} - /** actualServer is set to the actual server where they call went if there was a choice (SlaveOk) */ - virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 ) = 0; - virtual void say( Message &toSend, bool isRetry = false , string * actualServer = 0 ) = 0; - virtual void sayPiggyBack( Message &toSend ) = 0; - /* used by QueryOption_Exhaust. To use that your subclass must implement this. */ - virtual bool recv( Message& m ) { assert(false); return false; } - // In general, for lazy queries, we'll need to say, recv, then checkResponse - virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL ) { - if( retry ) *retry = false; if( targetHost ) *targetHost = ""; - } - virtual bool lazySupported() const = 0; - }; - - /** - The interface that any db connection should implement - */ - class DBClientInterface : boost::noncopyable { - public: - virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, - const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ) = 0; - - virtual void insert( const string &ns, BSONObj obj , int flags=0) = 0; - - virtual void insert( const string &ns, const vector< BSONObj >& v , int flags=0) = 0; - - virtual void remove( const string &ns , Query query, bool justOne = 0 ) = 0; - - virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 ) = 0; - - virtual ~DBClientInterface() { } - - /** - @return a single object that matches the query. if none do, then the object is empty - @throws AssertionException - */ - virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); - - /** query N objects from the database into an array. makes sense mostly when you want a small number of results. if a huge number, use - query() and iterate the cursor. - */ - void findN(vector<BSONObj>& out, const string&ns, Query query, int nToReturn, int nToSkip = 0, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); - - virtual string getServerAddress() const = 0; - - /** don't use this - called automatically by DBClientCursor for you */ - virtual auto_ptr<DBClientCursor> getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ) = 0; - }; - - /** - DB "commands" - Basically just invocations of connection.$cmd.findOne({...}); - */ - class DBClientWithCommands : public DBClientInterface { - set<string> _seenIndexes; - public: - /** controls how chatty the client is about network errors & such. See log.h */ - int _logLevel; - - DBClientWithCommands() : _logLevel(0), _cachedAvailableOptions( (enum QueryOptions)0 ), _haveCachedAvailableOptions(false) { } - - /** helper function. run a simple command where the command expression is simply - { command : 1 } - @param info -- where to put result object. may be null if caller doesn't need that info - @param command -- command name - @return true if the command returned "ok". - */ - bool simpleCommand(const string &dbname, BSONObj *info, const string &command); - - /** Run a database command. Database commands are represented as BSON objects. Common database - commands have prebuilt helper functions -- see below. If a helper is not available you can - directly call runCommand. - - @param dbname database name. Use "admin" for global administrative commands. - @param cmd the command object to execute. For example, { ismaster : 1 } - @param info the result object the database returns. Typically has { ok : ..., errmsg : ... } fields - set. - @param options see enum QueryOptions - normally not needed to run a command - @return true if the command returned "ok". - */ - virtual bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0); - - /** Authorize access to a particular database. - Authentication is separate for each database on the server -- you may authenticate for any - number of databases on a single connection. - The "admin" database is special and once authenticated provides access to all databases on the - server. - @param digestPassword if password is plain text, set this to true. otherwise assumed to be pre-digested - @param[out] authLevel level of authentication for the given user - @return true if successful - */ - virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true, Auth::Level * level = NULL); - - /** count number of objects in collection ns that match the query criteria specified - throws UserAssertion if database returns an error - */ - virtual unsigned long long count(const string &ns, const BSONObj& query = BSONObj(), int options=0, int limit=0, int skip=0 ); - - string createPasswordDigest( const string &username , const string &clearTextPassword ); - - /** returns true in isMaster parm if this db is the current master - of a replica pair. - - pass in info for more details e.g.: - { "ismaster" : 1.0 , "msg" : "not paired" , "ok" : 1.0 } - - returns true if command invoked successfully. - */ - virtual bool isMaster(bool& isMaster, BSONObj *info=0); - - /** - Create a new collection in the database. Normally, collection creation is automatic. You would - use this function if you wish to specify special options on creation. - - If the collection already exists, no action occurs. - - @param ns fully qualified collection name - @param size desired initial extent size for the collection. - Must be <= 1000000000 for normal collections. - For fixed size (capped) collections, this size is the total/max size of the - collection. - @param capped if true, this is a fixed size collection (where old data rolls out). - @param max maximum number of objects if capped (optional). - - returns true if successful. - */ - bool createCollection(const string &ns, long long size = 0, bool capped = false, int max = 0, BSONObj *info = 0); - - /** Get error result from the last write operation (insert/update/delete) on this connection. - @return error message text, or empty string if no error. - */ - string getLastError(bool fsync = false, bool j = false, int w = 0, int wtimeout = 0); - - /** Get error result from the last write operation (insert/update/delete) on this connection. - @return full error object. - - If "w" is -1, wait for propagation to majority of nodes. - If "wtimeout" is 0, the operation will block indefinitely if needed. - */ - virtual BSONObj getLastErrorDetailed(bool fsync = false, bool j = false, int w = 0, int wtimeout = 0); - - /** Can be called with the returned value from getLastErrorDetailed to extract an error string. - If all you need is the string, just call getLastError() instead. - */ - static string getLastErrorString( const BSONObj& res ); - - /** Return the last error which has occurred, even if not the very last operation. - - @return { err : <error message>, nPrev : <how_many_ops_back_occurred>, ok : 1 } - - result.err will be null if no error has occurred. - */ - BSONObj getPrevError(); - - /** Reset the previous error state for this connection (accessed via getLastError and - getPrevError). Useful when performing several operations at once and then checking - for an error after attempting all operations. - */ - bool resetError() { return simpleCommand("admin", 0, "reseterror"); } - - /** Delete the specified collection. */ - virtual bool dropCollection( const string &ns ) { - string db = nsGetDB( ns ); - string coll = nsGetCollection( ns ); - uassert( 10011 , "no collection name", coll.size() ); - - BSONObj info; - - bool res = runCommand( db.c_str() , BSON( "drop" << coll ) , info ); - resetIndexCache(); - return res; - } - - /** Perform a repair and compaction of the specified database. May take a long time to run. Disk space - must be available equal to the size of the database while repairing. - */ - bool repairDatabase(const string &dbname, BSONObj *info = 0) { - return simpleCommand(dbname, info, "repairDatabase"); - } - - /** Copy database from one server or name to another server or name. - - Generally, you should dropDatabase() first as otherwise the copied information will MERGE - into whatever data is already present in this database. - - For security reasons this function only works when you are authorized to access the "admin" db. However, - if you have access to said db, you can copy any database from one place to another. - TODO: this needs enhancement to be more flexible in terms of security. - - This method provides a way to "rename" a database by copying it to a new db name and - location. The copy is "repaired" and compacted. - - fromdb database name from which to copy. - todb database name to copy to. - fromhost hostname of the database (and optionally, ":port") from which to - copy the data. copies from self if "". - - returns true if successful - */ - bool copyDatabase(const string &fromdb, const string &todb, const string &fromhost = "", BSONObj *info = 0); - - /** The Mongo database provides built-in performance profiling capabilities. Uset setDbProfilingLevel() - to enable. Profiling information is then written to the system.profile collection, which one can - then query. - */ - enum ProfilingLevel { - ProfileOff = 0, - ProfileSlow = 1, // log very slow (>100ms) operations - ProfileAll = 2 - - }; - bool setDbProfilingLevel(const string &dbname, ProfilingLevel level, BSONObj *info = 0); - bool getDbProfilingLevel(const string &dbname, ProfilingLevel& level, BSONObj *info = 0); - - - /** This implicitly converts from char*, string, and BSONObj to be an argument to mapreduce - You shouldn't need to explicitly construct this - */ - struct MROutput { - MROutput(const char* collection) : out(BSON("replace" << collection)) {} - MROutput(const string& collection) : out(BSON("replace" << collection)) {} - MROutput(const BSONObj& obj) : out(obj) {} - - BSONObj out; - }; - static MROutput MRInline; - - /** Run a map/reduce job on the server. - - See http://www.mongodb.org/display/DOCS/MapReduce - - ns namespace (db+collection name) of input data - jsmapf javascript map function code - jsreducef javascript reduce function code. - query optional query filter for the input - output either a string collection name or an object representing output type - if not specified uses inline output type - - returns a result object which contains: - { result : <collection_name>, - numObjects : <number_of_objects_scanned>, - timeMillis : <job_time>, - ok : <1_if_ok>, - [, err : <errmsg_if_error>] - } - - For example one might call: - result.getField("ok").trueValue() - on the result to check if ok. - */ - BSONObj mapreduce(const string &ns, const string &jsmapf, const string &jsreducef, BSONObj query = BSONObj(), MROutput output = MRInline); - - /** Run javascript code on the database server. - dbname database SavedContext in which the code runs. The javascript variable 'db' will be assigned - to this database when the function is invoked. - jscode source code for a javascript function. - info the command object which contains any information on the invocation result including - the return value and other information. If an error occurs running the jscode, error - information will be in info. (try "out() << info.toString()") - retValue return value from the jscode function. - args args to pass to the jscode function. when invoked, the 'args' variable will be defined - for use by the jscode. - - returns true if runs ok. - - See testDbEval() in dbclient.cpp for an example of usage. - */ - bool eval(const string &dbname, const string &jscode, BSONObj& info, BSONElement& retValue, BSONObj *args = 0); - - /** validate a collection, checking for errors and reporting back statistics. - this operation is slow and blocking. - */ - bool validate( const string &ns , bool scandata=true ) { - BSONObj cmd = BSON( "validate" << nsGetCollection( ns ) << "scandata" << scandata ); - BSONObj info; - return runCommand( nsGetDB( ns ).c_str() , cmd , info ); - } - - /* The following helpers are simply more convenient forms of eval() for certain common cases */ - - /* invocation with no return value of interest -- with or without one simple parameter */ - bool eval(const string &dbname, const string &jscode); - template< class T > - bool eval(const string &dbname, const string &jscode, T parm1) { - BSONObj info; - BSONElement retValue; - BSONObjBuilder b; - b.append("0", parm1); - BSONObj args = b.done(); - return eval(dbname, jscode, info, retValue, &args); - } - - /** eval invocation with one parm to server and one numeric field (either int or double) returned */ - template< class T, class NumType > - bool eval(const string &dbname, const string &jscode, T parm1, NumType& ret) { - BSONObj info; - BSONElement retValue; - BSONObjBuilder b; - b.append("0", parm1); - BSONObj args = b.done(); - if ( !eval(dbname, jscode, info, retValue, &args) ) - return false; - ret = (NumType) retValue.number(); - return true; - } - - /** - get a list of all the current databases - uses the { listDatabases : 1 } command. - throws on error - */ - list<string> getDatabaseNames(); - - /** - get a list of all the current collections in db - */ - list<string> getCollectionNames( const string& db ); - - bool exists( const string& ns ); - - /** Create an index if it does not already exist. - ensureIndex calls are remembered so it is safe/fast to call this function many - times in your code. - @param ns collection to be indexed - @param keys the "key pattern" for the index. e.g., { name : 1 } - @param unique if true, indicates that key uniqueness should be enforced for this index - @param name if not specified, it will be created from the keys automatically (which is recommended) - @param cache if set to false, the index cache for the connection won't remember this call - @param background build index in the background (see mongodb docs/wiki for details) - @param v index version. leave at default value. (unit tests set this parameter.) - @return whether or not sent message to db. - should be true on first call, false on subsequent unless resetIndexCache was called - */ - virtual bool ensureIndex( const string &ns , BSONObj keys , bool unique = false, const string &name = "", - bool cache = true, bool background = false, int v = -1 ); - - /** - clears the index cache, so the subsequent call to ensureIndex for any index will go to the server - */ - virtual void resetIndexCache(); - - virtual auto_ptr<DBClientCursor> getIndexes( const string &ns ); - - virtual void dropIndex( const string& ns , BSONObj keys ); - virtual void dropIndex( const string& ns , const string& indexName ); - - /** - drops all indexes for the collection - */ - virtual void dropIndexes( const string& ns ); - - virtual void reIndex( const string& ns ); - - string genIndexName( const BSONObj& keys ); - - /** Erase / drop an entire database */ - virtual bool dropDatabase(const string &dbname, BSONObj *info = 0) { - bool ret = simpleCommand(dbname, info, "dropDatabase"); - resetIndexCache(); - return ret; - } - - virtual string toString() = 0; - - protected: - /** if the result of a command is ok*/ - bool isOk(const BSONObj&); - - /** if the element contains a not master error */ - bool isNotMasterErrorString( const BSONElement& e ); - - BSONObj _countCmd(const string &ns, const BSONObj& query, int options, int limit, int skip ); - - /** - * Look up the options available on this client. Caches the answer from - * _lookupAvailableOptions(), below. - */ - QueryOptions availableOptions(); - - virtual QueryOptions _lookupAvailableOptions(); - - private: - enum QueryOptions _cachedAvailableOptions; - bool _haveCachedAvailableOptions; - }; - - /** - abstract class that implements the core db operations - */ - class DBClientBase : public DBClientWithCommands, public DBConnector { - protected: - WriteConcern _writeConcern; - - public: - DBClientBase() { - _writeConcern = W_NORMAL; - } - - WriteConcern getWriteConcern() const { return _writeConcern; } - void setWriteConcern( WriteConcern w ) { _writeConcern = w; } - - /** send a query to the database. - @param ns namespace to query, format is <dbname>.<collectname>[.<collectname>]* - @param query query to perform on the collection. this is a BSONObj (binary JSON) - You may format as - { query: { ... }, orderby: { ... } } - to specify a sort order. - @param nToReturn n to return (i.e., limit). 0 = unlimited - @param nToSkip start with the nth item - @param fieldsToReturn optional template of which fields to select. if unspecified, returns all fields - @param queryOptions see options enum at top of this file - - @return cursor. 0 if error (connection failure) - @throws AssertionException - */ - virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, - const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ); - - - /** Uses QueryOption_Exhaust, when available. - - Exhaust mode sends back all data queries as fast as possible, with no back-and-forth for - OP_GETMORE. If you are certain you will exhaust the query, it could be useful. - - Use the DBClientCursorBatchIterator version, below, if you want to do items in large - blocks, perhaps to avoid granular locking and such. - */ - virtual unsigned long long query( boost::function<void(const BSONObj&)> f, - const string& ns, - Query query, - const BSONObj *fieldsToReturn = 0, - int queryOptions = 0 ); - - virtual unsigned long long query( boost::function<void(DBClientCursorBatchIterator&)> f, - const string& ns, - Query query, - const BSONObj *fieldsToReturn = 0, - int queryOptions = 0 ); - - - /** don't use this - called automatically by DBClientCursor for you - @param cursorId id of cursor to retrieve - @return an handle to a previously allocated cursor - @throws AssertionException - */ - virtual auto_ptr<DBClientCursor> getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ); - - /** - insert an object into the database - */ - virtual void insert( const string &ns , BSONObj obj , int flags=0); - - /** - insert a vector of objects into the database - */ - virtual void insert( const string &ns, const vector< BSONObj >& v , int flags=0); - - /** - remove matching objects from the database - @param justOne if this true, then once a single match is found will stop - */ - virtual void remove( const string &ns , Query q , bool justOne = 0 ); - - /** - updates objects matching query - */ - virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = false , bool multi = false ); - - virtual bool isFailed() const = 0; - - virtual void killCursor( long long cursorID ) = 0; - - virtual bool callRead( Message& toSend , Message& response ) = 0; - // virtual bool callWrite( Message& toSend , Message& response ) = 0; // TODO: add this if needed - - virtual ConnectionString::ConnectionType type() const = 0; - - virtual double getSoTimeout() const = 0; - - }; // DBClientBase - - class DBClientReplicaSet; - - class ConnectException : public UserException { - public: - ConnectException(string msg) : UserException(9000,msg) { } - }; - - /** - A basic connection to the database. - This is the main entry point for talking to a simple Mongo setup - */ - class DBClientConnection : public DBClientBase { - public: - using DBClientBase::query; - - /** - @param _autoReconnect if true, automatically reconnect on a connection failure - @param cp used by DBClientReplicaSet. You do not need to specify this parameter - @param timeout tcp timeout in seconds - this is for read/write, not connect. - Connect timeout is fixed, but short, at 5 seconds. - */ - DBClientConnection(bool _autoReconnect=false, DBClientReplicaSet* cp=0, double so_timeout=0) : - clientSet(cp), _failed(false), autoReconnect(_autoReconnect), lastReconnectTry(0), _so_timeout(so_timeout) { - _numConnections++; - } - - virtual ~DBClientConnection() { - _numConnections--; - } - - /** Connect to a Mongo database server. - - If autoReconnect is true, you can try to use the DBClientConnection even when - false was returned -- it will try to connect again. - - @param serverHostname host to connect to. can include port number ( 127.0.0.1 , 127.0.0.1:5555 ) - If you use IPv6 you must add a port number ( ::1:27017 ) - @param errmsg any relevant error message will appended to the string - @deprecated please use HostAndPort - @return false if fails to connect. - */ - virtual bool connect(const char * hostname, string& errmsg) { - // TODO: remove this method - HostAndPort t( hostname ); - return connect( t , errmsg ); - } - - /** Connect to a Mongo database server. - - If autoReconnect is true, you can try to use the DBClientConnection even when - false was returned -- it will try to connect again. - - @param server server to connect to. - @param errmsg any relevant error message will appended to the string - @return false if fails to connect. - */ - virtual bool connect(const HostAndPort& server, string& errmsg); - - /** Connect to a Mongo database server. Exception throwing version. - Throws a UserException if cannot connect. - - If autoReconnect is true, you can try to use the DBClientConnection even when - false was returned -- it will try to connect again. - - @param serverHostname host to connect to. can include port number ( 127.0.0.1 , 127.0.0.1:5555 ) - */ - void connect(const string& serverHostname) { - string errmsg; - if( !connect(HostAndPort(serverHostname), errmsg) ) - throw ConnectException(string("can't connect ") + errmsg); - } - - virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true, Auth::Level* level=NULL); - - virtual auto_ptr<DBClientCursor> query(const string &ns, Query query=Query(), int nToReturn = 0, int nToSkip = 0, - const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ) { - checkConnection(); - return DBClientBase::query( ns, query, nToReturn, nToSkip, fieldsToReturn, queryOptions , batchSize ); - } - - virtual unsigned long long query( boost::function<void(DBClientCursorBatchIterator &)> f, - const string& ns, - Query query, - const BSONObj *fieldsToReturn, - int queryOptions ); - - virtual bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0); - - /** - @return true if this connection is currently in a failed state. When autoreconnect is on, - a connection will transition back to an ok state after reconnecting. - */ - bool isFailed() const { return _failed; } - - MessagingPort& port() { assert(p); return *p; } - - string toStringLong() const { - stringstream ss; - ss << _serverString; - if ( _failed ) ss << " failed"; - return ss.str(); - } - - /** Returns the address of the server */ - string toString() { return _serverString; } - - string getServerAddress() const { return _serverString; } - - virtual void killCursor( long long cursorID ); - virtual bool callRead( Message& toSend , Message& response ) { return call( toSend , response ); } - virtual void say( Message &toSend, bool isRetry = false , string * actualServer = 0 ); - virtual bool recv( Message& m ); - virtual void checkResponse( const char *data, int nReturned, bool* retry = NULL, string* host = NULL ); - virtual bool call( Message &toSend, Message &response, bool assertOk = true , string * actualServer = 0 ); - virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; } - void setSoTimeout(double to) { _so_timeout = to; } - double getSoTimeout() const { return _so_timeout; } - - virtual bool lazySupported() const { return true; } - - static int getNumConnections() { - return _numConnections; - } - - static void setLazyKillCursor( bool lazy ) { _lazyKillCursor = lazy; } - static bool getLazyKillCursor() { return _lazyKillCursor; } - - protected: - friend class SyncClusterConnection; - virtual void sayPiggyBack( Message &toSend ); - - DBClientReplicaSet *clientSet; - boost::scoped_ptr<MessagingPort> p; - boost::scoped_ptr<SockAddr> server; - bool _failed; - const bool autoReconnect; - time_t lastReconnectTry; - HostAndPort _server; // remember for reconnects - string _serverString; - void _checkConnection(); - - // throws SocketException if in failed state and not reconnecting or if waiting to reconnect - void checkConnection() { if( _failed ) _checkConnection(); } - - map< string, pair<string,string> > authCache; - double _so_timeout; - bool _connect( string& errmsg ); - - static AtomicUInt _numConnections; - static bool _lazyKillCursor; // lazy means we piggy back kill cursors on next op - -#ifdef MONGO_SSL - static SSLManager* sslManager(); - static SSLManager* _sslManager; -#endif - }; - - /** pings server to check if it's up - */ - bool serverAlive( const string &uri ); - - DBClientBase * createDirectClient(); - - BSONElement getErrField( const BSONObj& result ); - bool hasErrField( const BSONObj& result ); - - inline std::ostream& operator<<( std::ostream &s, const Query &q ) { - return s << q.toString(); - } - -} // namespace mongo - -#include "dbclientcursor.h" -#include "dbclient_rs.h" -#include "undef_macros.h" +#include "mongo/client/undef_macros.h" diff --git a/src/mongo/client/dbclient_rs.cpp b/src/mongo/client/dbclient_rs.cpp index 3cfbc82137f..ba69ca337c8 100644 --- a/src/mongo/client/dbclient_rs.cpp +++ b/src/mongo/client/dbclient_rs.cpp @@ -15,18 +15,22 @@ * limitations under the License. */ -#include "pch.h" -#include "dbclient.h" -#include "../bson/util/builder.h" -#include "../db/jsobj.h" -#include "../db/json.h" -#include "../db/dbmessage.h" -#include "connpool.h" -#include "dbclient_rs.h" -#include "../util/background.h" -#include "../util/timer.h" +#include "mongo/pch.h" + +#include "mongo/client/dbclient_rs.h" + #include <fstream> +#include "mongo/bson/util/builder.h" +#include "mongo/client/connpool.h" +#include "mongo/client/dbclient.h" +#include "mongo/client/dbclientcursor.h" +#include "mongo/db/dbmessage.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/json.h" +#include "mongo/util/background.h" +#include "mongo/util/timer.h" + namespace mongo { // -------------------------------- diff --git a/src/mongo/client/dbclient_rs.h b/src/mongo/client/dbclient_rs.h index 8baffe941b2..6d415e5ab27 100644 --- a/src/mongo/client/dbclient_rs.h +++ b/src/mongo/client/dbclient_rs.h @@ -17,8 +17,15 @@ #pragma once -#include "../pch.h" -#include "dbclient.h" +#include "mongo/pch.h" + +#include <boost/function.hpp> +#include <boost/shared_ptr.hpp> +#include <set> +#include <utility> + +#include "mongo/client/dbclientinterface.h" +#include "mongo/util/net/hostandport.h" namespace mongo { diff --git a/src/mongo/client/dbclientcursor.cpp b/src/mongo/client/dbclientcursor.cpp index dcf86818dc6..0f7a4e3ae2c 100644 --- a/src/mongo/client/dbclientcursor.cpp +++ b/src/mongo/client/dbclientcursor.cpp @@ -15,13 +15,16 @@ * limitations under the License. */ -#include "pch.h" -#include "dbclient.h" -#include "../db/dbmessage.h" -#include "../db/cmdline.h" -#include "connpool.h" -#include "../s/shard.h" -#include "../s/util.h" +#include "mongo/pch.h" + +#include "mongo/client/dbclientcursor.h" + +#include "mongo/client/connpool.h" +#include "mongo/client/dbclient.h" +#include "mongo/db/cmdline.h" +#include "mongo/db/dbmessage.h" +#include "mongo/s/shard.h" +#include "mongo/s/util.h" namespace mongo { diff --git a/src/mongo/client/dbclientcursor.h b/src/mongo/client/dbclientcursor.h index d7661438516..4f2ff2c7916 100644 --- a/src/mongo/client/dbclientcursor.h +++ b/src/mongo/client/dbclientcursor.h @@ -17,12 +17,16 @@ #pragma once -#include "../pch.h" -#include "../util/net/message.h" -#include "../db/jsobj.h" -#include "../db/json.h" +#include "mongo/pch.h" + #include <stack> +#include "mongo/client/dbclientinterface.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/json.h" +#include "mongo/util/assert_util.h" +#include "mongo/util/net/message.h" + namespace mongo { class AScopedConnection; diff --git a/src/mongo/client/dbclientinterface.h b/src/mongo/client/dbclientinterface.h new file mode 100644 index 00000000000..77d6a39c4e8 --- /dev/null +++ b/src/mongo/client/dbclientinterface.h @@ -0,0 +1,1056 @@ +/** @file dbclientinterface.h + + Core MongoDB C++ driver interfaces are defined here. +*/ + +/* Copyright 2009 10gen Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "mongo/pch.h" + +#include "mongo/db/authlevel.h" +#include "mongo/db/jsobj.h" +#include "mongo/util/net/message.h" +#include "mongo/util/net/message_port.h" + +namespace mongo { + + /** the query field 'options' can have these bits set: */ + enum QueryOptions { + /** Tailable means cursor is not closed when the last data is retrieved. rather, the cursor marks + the final object's position. you can resume using the cursor later, from where it was located, + if more data were received. Set on dbQuery and dbGetMore. + + like any "latent cursor", the cursor may become invalid at some point -- for example if that + final object it references were deleted. Thus, you should be prepared to requery if you get back + ResultFlag_CursorNotFound. + */ + QueryOption_CursorTailable = 1 << 1, + + /** allow query of replica slave. normally these return an error except for namespace "local". + */ + QueryOption_SlaveOk = 1 << 2, + + // findingStart mode is used to find the first operation of interest when + // we are scanning through a repl log. For efficiency in the common case, + // where the first operation of interest is closer to the tail than the head, + // we start from the tail of the log and work backwards until we find the + // first operation of interest. Then we scan forward from that first operation, + // actually returning results to the client. During the findingStart phase, + // we release the db mutex occasionally to avoid blocking the db process for + // an extended period of time. + QueryOption_OplogReplay = 1 << 3, + + /** The server normally times out idle cursors after an inactivy period to prevent excess memory uses + Set this option to prevent that. + */ + QueryOption_NoCursorTimeout = 1 << 4, + + /** Use with QueryOption_CursorTailable. If we are at the end of the data, block for a while rather + than returning no data. After a timeout period, we do return as normal. + */ + QueryOption_AwaitData = 1 << 5, + + /** Stream the data down full blast in multiple "more" packages, on the assumption that the client + will fully read all data queried. Faster when you are pulling a lot of data and know you want to + pull it all down. Note: it is not allowed to not read all the data unless you close the connection. + + Use the query( boost::function<void(const BSONObj&)> f, ... ) version of the connection's query() + method, and it will take care of all the details for you. + */ + QueryOption_Exhaust = 1 << 6, + + /** When sharded, this means its ok to return partial results + Usually we will fail a query if all required shards aren't up + If this is set, it'll be a partial result set + */ + QueryOption_PartialResults = 1 << 7 , + + QueryOption_AllSupported = QueryOption_CursorTailable | QueryOption_SlaveOk | QueryOption_OplogReplay | QueryOption_NoCursorTimeout | QueryOption_AwaitData | QueryOption_Exhaust | QueryOption_PartialResults + + }; + + enum UpdateOptions { + /** Upsert - that is, insert the item if no matching item is found. */ + UpdateOption_Upsert = 1 << 0, + + /** Update multiple documents (if multiple documents match query expression). + (Default is update a single document and stop.) */ + UpdateOption_Multi = 1 << 1, + + /** flag from mongo saying this update went everywhere */ + UpdateOption_Broadcast = 1 << 2 + }; + + enum RemoveOptions { + /** only delete one option */ + RemoveOption_JustOne = 1 << 0, + + /** flag from mongo saying this update went everywhere */ + RemoveOption_Broadcast = 1 << 1 + }; + + + /** + * need to put in DbMesssage::ReservedOptions as well + */ + enum InsertOptions { + /** With muli-insert keep processing inserts if one fails */ + InsertOption_ContinueOnError = 1 << 0 + }; + + class DBClientBase; + + /** + * ConnectionString handles parsing different ways to connect to mongo and determining method + * samples: + * server + * server:port + * foo/server:port,server:port SET + * server,server,server SYNC + * + * tyipcal use + * string errmsg, + * ConnectionString cs = ConnectionString::parse( url , errmsg ); + * if ( ! cs.isValid() ) throw "bad: " + errmsg; + * DBClientBase * conn = cs.connect( errmsg ); + */ + class ConnectionString { + public: + enum ConnectionType { INVALID , MASTER , PAIR , SET , SYNC }; + + ConnectionString() { + _type = INVALID; + } + + ConnectionString( const HostAndPort& server ) { + _type = MASTER; + _servers.push_back( server ); + _finishInit(); + } + + ConnectionString( ConnectionType type , const string& s , const string& setName = "" ) { + _type = type; + _setName = setName; + _fillServers( s ); + + switch ( _type ) { + case MASTER: + assert( _servers.size() == 1 ); + break; + case SET: + assert( _setName.size() ); + assert( _servers.size() >= 1 ); // 1 is ok since we can derive + break; + case PAIR: + assert( _servers.size() == 2 ); + break; + default: + assert( _servers.size() > 0 ); + } + + _finishInit(); + } + + ConnectionString( const string& s , ConnectionType favoredMultipleType ) { + _type = INVALID; + + _fillServers( s ); + if ( _type != INVALID ) { + // set already + } + else if ( _servers.size() == 1 ) { + _type = MASTER; + } + else { + _type = favoredMultipleType; + assert( _type == SET || _type == SYNC ); + } + _finishInit(); + } + + bool isValid() const { return _type != INVALID; } + + string toString() const { return _string; } + + DBClientBase* connect( string& errmsg, double socketTimeout = 0 ) const; + + string getSetName() const { return _setName; } + + vector<HostAndPort> getServers() const { return _servers; } + + ConnectionType type() const { return _type; } + + static ConnectionString parse( const string& url , string& errmsg ); + + static string typeToString( ConnectionType type ); + + private: + + void _fillServers( string s ); + void _finishInit(); + + ConnectionType _type; + vector<HostAndPort> _servers; + string _string; + string _setName; + }; + + /** + * controls how much a clients cares about writes + * default is NORMAL + */ + enum WriteConcern { + W_NONE = 0 , // TODO: not every connection type fully supports this + W_NORMAL = 1 + // TODO SAFE = 2 + }; + + class BSONObj; + class ScopedDbConnection; + class DBClientCursor; + class DBClientCursorBatchIterator; + + /** Represents a Mongo query expression. Typically one uses the QUERY(...) macro to construct a Query object. + Examples: + QUERY( "age" << 33 << "school" << "UCLA" ).sort("name") + QUERY( "age" << GT << 30 << LT << 50 ) + */ + class Query { + public: + BSONObj obj; + Query() : obj(BSONObj()) { } + Query(const BSONObj& b) : obj(b) { } + Query(const string &json); + Query(const char * json); + + /** Add a sort (ORDER BY) criteria to the query expression. + @param sortPattern the sort order template. For example to order by name ascending, time descending: + { name : 1, ts : -1 } + i.e. + BSON( "name" << 1 << "ts" << -1 ) + or + fromjson(" name : 1, ts : -1 ") + */ + Query& sort(const BSONObj& sortPattern); + + /** Add a sort (ORDER BY) criteria to the query expression. + This version of sort() assumes you want to sort on a single field. + @param asc = 1 for ascending order + asc = -1 for descending order + */ + Query& sort(const string &field, int asc = 1) { sort( BSON( field << asc ) ); return *this; } + + /** Provide a hint to the query. + @param keyPattern Key pattern for the index to use. + Example: + hint("{ts:1}") + */ + Query& hint(BSONObj keyPattern); + Query& hint(const string &jsonKeyPatt); + + /** Provide min and/or max index limits for the query. + min <= x < max + */ + Query& minKey(const BSONObj &val); + /** + max is exclusive + */ + Query& maxKey(const BSONObj &val); + + /** Return explain information about execution of this query instead of the actual query results. + Normally it is easier to use the mongo shell to run db.find(...).explain(). + */ + Query& explain(); + + /** Use snapshot mode for the query. Snapshot mode assures no duplicates are returned, or objects missed, which were + present at both the start and end of the query's execution (if an object is new during the query, or deleted during + the query, it may or may not be returned, even with snapshot mode). + + Note that short query responses (less than 1MB) are always effectively snapshotted. + + Currently, snapshot mode may not be used with sorting or explicit hints. + */ + Query& snapshot(); + + /** Queries to the Mongo database support a $where parameter option which contains + a javascript function that is evaluated to see whether objects being queried match + its criteria. Use this helper to append such a function to a query object. + Your query may also contain other traditional Mongo query terms. + + @param jscode The javascript function to evaluate against each potential object + match. The function must return true for matched objects. Use the this + variable to inspect the current object. + @param scope SavedContext for the javascript object. List in a BSON object any + variables you would like defined when the jscode executes. One can think + of these as "bind variables". + + Examples: + conn.findOne("test.coll", Query("{a:3}").where("this.b == 2 || this.c == 3")); + Query badBalance = Query().where("this.debits - this.credits < 0"); + */ + Query& where(const string &jscode, BSONObj scope); + Query& where(const string &jscode) { return where(jscode, BSONObj()); } + + /** + * @return true if this query has an orderby, hint, or some other field + */ + bool isComplex( bool * hasDollar = 0 ) const; + + BSONObj getFilter() const; + BSONObj getSort() const; + BSONObj getHint() const; + bool isExplain() const; + + string toString() const; + operator string() const { return toString(); } + private: + void makeComplex(); + template< class T > + void appendComplex( const char *fieldName, const T& val ) { + makeComplex(); + BSONObjBuilder b; + b.appendElements(obj); + b.append(fieldName, val); + obj = b.obj(); + } + }; + + /** + * Represents a full query description, including all options required for the query to be passed on + * to other hosts + */ + class QuerySpec { + + string _ns; + int _ntoskip; + int _ntoreturn; + int _options; + BSONObj _query; + BSONObj _fields; + Query _queryObj; + + public: + + QuerySpec( const string& ns, + const BSONObj& query, const BSONObj& fields, + int ntoskip, int ntoreturn, int options ) + : _ns( ns ), _ntoskip( ntoskip ), _ntoreturn( ntoreturn ), _options( options ), + _query( query.getOwned() ), _fields( fields.getOwned() ) , _queryObj( _query ) { + } + + QuerySpec() {} + + bool isEmpty() const { return _ns.size() == 0; } + + bool isExplain() const { return _queryObj.isExplain(); } + BSONObj filter() const { return _queryObj.getFilter(); } + + BSONObj hint() const { return _queryObj.getHint(); } + BSONObj sort() const { return _queryObj.getSort(); } + BSONObj query() const { return _query; } + BSONObj fields() const { return _fields; } + BSONObj* fieldsData() { return &_fields; } + + // don't love this, but needed downstrem + const BSONObj* fieldsPtr() const { return &_fields; } + + string ns() const { return _ns; } + int ntoskip() const { return _ntoskip; } + int ntoreturn() const { return _ntoreturn; } + int options() const { return _options; } + + void setFields( BSONObj& o ) { _fields = o.getOwned(); } + + string toString() const { + return str::stream() << "QSpec " << + BSON( "ns" << _ns << "n2skip" << _ntoskip << "n2return" << _ntoreturn << "options" << _options + << "query" << _query << "fields" << _fields ); + } + + }; + + + /** Typically one uses the QUERY(...) macro to construct a Query object. + Example: QUERY( "age" << 33 << "school" << "UCLA" ) + */ +#define QUERY(x) mongo::Query( BSON(x) ) + + // Useful utilities for namespaces + /** @return the database name portion of an ns string */ + string nsGetDB( const string &ns ); + + /** @return the collection name portion of an ns string */ + string nsGetCollection( const string &ns ); + + /** + interface that handles communication with the db + */ + class DBConnector { + public: + virtual ~DBConnector() {} + /** actualServer is set to the actual server where they call went if there was a choice (SlaveOk) */ + virtual bool call( Message &toSend, Message &response, bool assertOk=true , string * actualServer = 0 ) = 0; + virtual void say( Message &toSend, bool isRetry = false , string * actualServer = 0 ) = 0; + virtual void sayPiggyBack( Message &toSend ) = 0; + /* used by QueryOption_Exhaust. To use that your subclass must implement this. */ + virtual bool recv( Message& m ) { assert(false); return false; } + // In general, for lazy queries, we'll need to say, recv, then checkResponse + virtual void checkResponse( const char* data, int nReturned, bool* retry = NULL, string* targetHost = NULL ) { + if( retry ) *retry = false; if( targetHost ) *targetHost = ""; + } + virtual bool lazySupported() const = 0; + }; + + /** + The interface that any db connection should implement + */ + class DBClientInterface : boost::noncopyable { + public: + virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, + const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ) = 0; + + virtual void insert( const string &ns, BSONObj obj , int flags=0) = 0; + + virtual void insert( const string &ns, const vector< BSONObj >& v , int flags=0) = 0; + + virtual void remove( const string &ns , Query query, bool justOne = 0 ) = 0; + + virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = 0 , bool multi = 0 ) = 0; + + virtual ~DBClientInterface() { } + + /** + @return a single object that matches the query. if none do, then the object is empty + @throws AssertionException + */ + virtual BSONObj findOne(const string &ns, const Query& query, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); + + /** query N objects from the database into an array. makes sense mostly when you want a small number of results. if a huge number, use + query() and iterate the cursor. + */ + void findN(vector<BSONObj>& out, const string&ns, Query query, int nToReturn, int nToSkip = 0, const BSONObj *fieldsToReturn = 0, int queryOptions = 0); + + virtual string getServerAddress() const = 0; + + /** don't use this - called automatically by DBClientCursor for you */ + virtual auto_ptr<DBClientCursor> getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ) = 0; + }; + + /** + DB "commands" + Basically just invocations of connection.$cmd.findOne({...}); + */ + class DBClientWithCommands : public DBClientInterface { + set<string> _seenIndexes; + public: + /** controls how chatty the client is about network errors & such. See log.h */ + int _logLevel; + + DBClientWithCommands() : _logLevel(0), _cachedAvailableOptions( (enum QueryOptions)0 ), _haveCachedAvailableOptions(false) { } + + /** helper function. run a simple command where the command expression is simply + { command : 1 } + @param info -- where to put result object. may be null if caller doesn't need that info + @param command -- command name + @return true if the command returned "ok". + */ + bool simpleCommand(const string &dbname, BSONObj *info, const string &command); + + /** Run a database command. Database commands are represented as BSON objects. Common database + commands have prebuilt helper functions -- see below. If a helper is not available you can + directly call runCommand. + + @param dbname database name. Use "admin" for global administrative commands. + @param cmd the command object to execute. For example, { ismaster : 1 } + @param info the result object the database returns. Typically has { ok : ..., errmsg : ... } fields + set. + @param options see enum QueryOptions - normally not needed to run a command + @return true if the command returned "ok". + */ + virtual bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0); + + /** Authorize access to a particular database. + Authentication is separate for each database on the server -- you may authenticate for any + number of databases on a single connection. + The "admin" database is special and once authenticated provides access to all databases on the + server. + @param digestPassword if password is plain text, set this to true. otherwise assumed to be pre-digested + @param[out] authLevel level of authentication for the given user + @return true if successful + */ + virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true, Auth::Level * level = NULL); + + /** count number of objects in collection ns that match the query criteria specified + throws UserAssertion if database returns an error + */ + virtual unsigned long long count(const string &ns, const BSONObj& query = BSONObj(), int options=0, int limit=0, int skip=0 ); + + string createPasswordDigest( const string &username , const string &clearTextPassword ); + + /** returns true in isMaster parm if this db is the current master + of a replica pair. + + pass in info for more details e.g.: + { "ismaster" : 1.0 , "msg" : "not paired" , "ok" : 1.0 } + + returns true if command invoked successfully. + */ + virtual bool isMaster(bool& isMaster, BSONObj *info=0); + + /** + Create a new collection in the database. Normally, collection creation is automatic. You would + use this function if you wish to specify special options on creation. + + If the collection already exists, no action occurs. + + @param ns fully qualified collection name + @param size desired initial extent size for the collection. + Must be <= 1000000000 for normal collections. + For fixed size (capped) collections, this size is the total/max size of the + collection. + @param capped if true, this is a fixed size collection (where old data rolls out). + @param max maximum number of objects if capped (optional). + + returns true if successful. + */ + bool createCollection(const string &ns, long long size = 0, bool capped = false, int max = 0, BSONObj *info = 0); + + /** Get error result from the last write operation (insert/update/delete) on this connection. + @return error message text, or empty string if no error. + */ + string getLastError(bool fsync = false, bool j = false, int w = 0, int wtimeout = 0); + + /** Get error result from the last write operation (insert/update/delete) on this connection. + @return full error object. + + If "w" is -1, wait for propagation to majority of nodes. + If "wtimeout" is 0, the operation will block indefinitely if needed. + */ + virtual BSONObj getLastErrorDetailed(bool fsync = false, bool j = false, int w = 0, int wtimeout = 0); + + /** Can be called with the returned value from getLastErrorDetailed to extract an error string. + If all you need is the string, just call getLastError() instead. + */ + static string getLastErrorString( const BSONObj& res ); + + /** Return the last error which has occurred, even if not the very last operation. + + @return { err : <error message>, nPrev : <how_many_ops_back_occurred>, ok : 1 } + + result.err will be null if no error has occurred. + */ + BSONObj getPrevError(); + + /** Reset the previous error state for this connection (accessed via getLastError and + getPrevError). Useful when performing several operations at once and then checking + for an error after attempting all operations. + */ + bool resetError() { return simpleCommand("admin", 0, "reseterror"); } + + /** Delete the specified collection. */ + virtual bool dropCollection( const string &ns ) { + string db = nsGetDB( ns ); + string coll = nsGetCollection( ns ); + uassert( 10011 , "no collection name", coll.size() ); + + BSONObj info; + + bool res = runCommand( db.c_str() , BSON( "drop" << coll ) , info ); + resetIndexCache(); + return res; + } + + /** Perform a repair and compaction of the specified database. May take a long time to run. Disk space + must be available equal to the size of the database while repairing. + */ + bool repairDatabase(const string &dbname, BSONObj *info = 0) { + return simpleCommand(dbname, info, "repairDatabase"); + } + + /** Copy database from one server or name to another server or name. + + Generally, you should dropDatabase() first as otherwise the copied information will MERGE + into whatever data is already present in this database. + + For security reasons this function only works when you are authorized to access the "admin" db. However, + if you have access to said db, you can copy any database from one place to another. + TODO: this needs enhancement to be more flexible in terms of security. + + This method provides a way to "rename" a database by copying it to a new db name and + location. The copy is "repaired" and compacted. + + fromdb database name from which to copy. + todb database name to copy to. + fromhost hostname of the database (and optionally, ":port") from which to + copy the data. copies from self if "". + + returns true if successful + */ + bool copyDatabase(const string &fromdb, const string &todb, const string &fromhost = "", BSONObj *info = 0); + + /** The Mongo database provides built-in performance profiling capabilities. Uset setDbProfilingLevel() + to enable. Profiling information is then written to the system.profile collection, which one can + then query. + */ + enum ProfilingLevel { + ProfileOff = 0, + ProfileSlow = 1, // log very slow (>100ms) operations + ProfileAll = 2 + + }; + bool setDbProfilingLevel(const string &dbname, ProfilingLevel level, BSONObj *info = 0); + bool getDbProfilingLevel(const string &dbname, ProfilingLevel& level, BSONObj *info = 0); + + + /** This implicitly converts from char*, string, and BSONObj to be an argument to mapreduce + You shouldn't need to explicitly construct this + */ + struct MROutput { + MROutput(const char* collection) : out(BSON("replace" << collection)) {} + MROutput(const string& collection) : out(BSON("replace" << collection)) {} + MROutput(const BSONObj& obj) : out(obj) {} + + BSONObj out; + }; + static MROutput MRInline; + + /** Run a map/reduce job on the server. + + See http://www.mongodb.org/display/DOCS/MapReduce + + ns namespace (db+collection name) of input data + jsmapf javascript map function code + jsreducef javascript reduce function code. + query optional query filter for the input + output either a string collection name or an object representing output type + if not specified uses inline output type + + returns a result object which contains: + { result : <collection_name>, + numObjects : <number_of_objects_scanned>, + timeMillis : <job_time>, + ok : <1_if_ok>, + [, err : <errmsg_if_error>] + } + + For example one might call: + result.getField("ok").trueValue() + on the result to check if ok. + */ + BSONObj mapreduce(const string &ns, const string &jsmapf, const string &jsreducef, BSONObj query = BSONObj(), MROutput output = MRInline); + + /** Run javascript code on the database server. + dbname database SavedContext in which the code runs. The javascript variable 'db' will be assigned + to this database when the function is invoked. + jscode source code for a javascript function. + info the command object which contains any information on the invocation result including + the return value and other information. If an error occurs running the jscode, error + information will be in info. (try "out() << info.toString()") + retValue return value from the jscode function. + args args to pass to the jscode function. when invoked, the 'args' variable will be defined + for use by the jscode. + + returns true if runs ok. + + See testDbEval() in dbclient.cpp for an example of usage. + */ + bool eval(const string &dbname, const string &jscode, BSONObj& info, BSONElement& retValue, BSONObj *args = 0); + + /** validate a collection, checking for errors and reporting back statistics. + this operation is slow and blocking. + */ + bool validate( const string &ns , bool scandata=true ) { + BSONObj cmd = BSON( "validate" << nsGetCollection( ns ) << "scandata" << scandata ); + BSONObj info; + return runCommand( nsGetDB( ns ).c_str() , cmd , info ); + } + + /* The following helpers are simply more convenient forms of eval() for certain common cases */ + + /* invocation with no return value of interest -- with or without one simple parameter */ + bool eval(const string &dbname, const string &jscode); + template< class T > + bool eval(const string &dbname, const string &jscode, T parm1) { + BSONObj info; + BSONElement retValue; + BSONObjBuilder b; + b.append("0", parm1); + BSONObj args = b.done(); + return eval(dbname, jscode, info, retValue, &args); + } + + /** eval invocation with one parm to server and one numeric field (either int or double) returned */ + template< class T, class NumType > + bool eval(const string &dbname, const string &jscode, T parm1, NumType& ret) { + BSONObj info; + BSONElement retValue; + BSONObjBuilder b; + b.append("0", parm1); + BSONObj args = b.done(); + if ( !eval(dbname, jscode, info, retValue, &args) ) + return false; + ret = (NumType) retValue.number(); + return true; + } + + /** + get a list of all the current databases + uses the { listDatabases : 1 } command. + throws on error + */ + list<string> getDatabaseNames(); + + /** + get a list of all the current collections in db + */ + list<string> getCollectionNames( const string& db ); + + bool exists( const string& ns ); + + /** Create an index if it does not already exist. + ensureIndex calls are remembered so it is safe/fast to call this function many + times in your code. + @param ns collection to be indexed + @param keys the "key pattern" for the index. e.g., { name : 1 } + @param unique if true, indicates that key uniqueness should be enforced for this index + @param name if not specified, it will be created from the keys automatically (which is recommended) + @param cache if set to false, the index cache for the connection won't remember this call + @param background build index in the background (see mongodb docs/wiki for details) + @param v index version. leave at default value. (unit tests set this parameter.) + @return whether or not sent message to db. + should be true on first call, false on subsequent unless resetIndexCache was called + */ + virtual bool ensureIndex( const string &ns , BSONObj keys , bool unique = false, const string &name = "", + bool cache = true, bool background = false, int v = -1 ); + + /** + clears the index cache, so the subsequent call to ensureIndex for any index will go to the server + */ + virtual void resetIndexCache(); + + virtual auto_ptr<DBClientCursor> getIndexes( const string &ns ); + + virtual void dropIndex( const string& ns , BSONObj keys ); + virtual void dropIndex( const string& ns , const string& indexName ); + + /** + drops all indexes for the collection + */ + virtual void dropIndexes( const string& ns ); + + virtual void reIndex( const string& ns ); + + string genIndexName( const BSONObj& keys ); + + /** Erase / drop an entire database */ + virtual bool dropDatabase(const string &dbname, BSONObj *info = 0) { + bool ret = simpleCommand(dbname, info, "dropDatabase"); + resetIndexCache(); + return ret; + } + + virtual string toString() = 0; + + protected: + /** if the result of a command is ok*/ + bool isOk(const BSONObj&); + + /** if the element contains a not master error */ + bool isNotMasterErrorString( const BSONElement& e ); + + BSONObj _countCmd(const string &ns, const BSONObj& query, int options, int limit, int skip ); + + /** + * Look up the options available on this client. Caches the answer from + * _lookupAvailableOptions(), below. + */ + QueryOptions availableOptions(); + + virtual QueryOptions _lookupAvailableOptions(); + + private: + enum QueryOptions _cachedAvailableOptions; + bool _haveCachedAvailableOptions; + }; + + /** + abstract class that implements the core db operations + */ + class DBClientBase : public DBClientWithCommands, public DBConnector { + protected: + WriteConcern _writeConcern; + + public: + DBClientBase() { + _writeConcern = W_NORMAL; + } + + WriteConcern getWriteConcern() const { return _writeConcern; } + void setWriteConcern( WriteConcern w ) { _writeConcern = w; } + + /** send a query to the database. + @param ns namespace to query, format is <dbname>.<collectname>[.<collectname>]* + @param query query to perform on the collection. this is a BSONObj (binary JSON) + You may format as + { query: { ... }, orderby: { ... } } + to specify a sort order. + @param nToReturn n to return (i.e., limit). 0 = unlimited + @param nToSkip start with the nth item + @param fieldsToReturn optional template of which fields to select. if unspecified, returns all fields + @param queryOptions see options enum at top of this file + + @return cursor. 0 if error (connection failure) + @throws AssertionException + */ + virtual auto_ptr<DBClientCursor> query(const string &ns, Query query, int nToReturn = 0, int nToSkip = 0, + const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ); + + + /** Uses QueryOption_Exhaust, when available. + + Exhaust mode sends back all data queries as fast as possible, with no back-and-forth for + OP_GETMORE. If you are certain you will exhaust the query, it could be useful. + + Use the DBClientCursorBatchIterator version, below, if you want to do items in large + blocks, perhaps to avoid granular locking and such. + */ + virtual unsigned long long query( boost::function<void(const BSONObj&)> f, + const string& ns, + Query query, + const BSONObj *fieldsToReturn = 0, + int queryOptions = 0 ); + + virtual unsigned long long query( boost::function<void(DBClientCursorBatchIterator&)> f, + const string& ns, + Query query, + const BSONObj *fieldsToReturn = 0, + int queryOptions = 0 ); + + + /** don't use this - called automatically by DBClientCursor for you + @param cursorId id of cursor to retrieve + @return an handle to a previously allocated cursor + @throws AssertionException + */ + virtual auto_ptr<DBClientCursor> getMore( const string &ns, long long cursorId, int nToReturn = 0, int options = 0 ); + + /** + insert an object into the database + */ + virtual void insert( const string &ns , BSONObj obj , int flags=0); + + /** + insert a vector of objects into the database + */ + virtual void insert( const string &ns, const vector< BSONObj >& v , int flags=0); + + /** + remove matching objects from the database + @param justOne if this true, then once a single match is found will stop + */ + virtual void remove( const string &ns , Query q , bool justOne = 0 ); + + /** + updates objects matching query + */ + virtual void update( const string &ns , Query query , BSONObj obj , bool upsert = false , bool multi = false ); + + virtual bool isFailed() const = 0; + + virtual void killCursor( long long cursorID ) = 0; + + virtual bool callRead( Message& toSend , Message& response ) = 0; + // virtual bool callWrite( Message& toSend , Message& response ) = 0; // TODO: add this if needed + + virtual ConnectionString::ConnectionType type() const = 0; + + virtual double getSoTimeout() const = 0; + + }; // DBClientBase + + class DBClientReplicaSet; + + class ConnectException : public UserException { + public: + ConnectException(string msg) : UserException(9000,msg) { } + }; + + /** + A basic connection to the database. + This is the main entry point for talking to a simple Mongo setup + */ + class DBClientConnection : public DBClientBase { + public: + using DBClientBase::query; + + /** + @param _autoReconnect if true, automatically reconnect on a connection failure + @param cp used by DBClientReplicaSet. You do not need to specify this parameter + @param timeout tcp timeout in seconds - this is for read/write, not connect. + Connect timeout is fixed, but short, at 5 seconds. + */ + DBClientConnection(bool _autoReconnect=false, DBClientReplicaSet* cp=0, double so_timeout=0) : + clientSet(cp), _failed(false), autoReconnect(_autoReconnect), lastReconnectTry(0), _so_timeout(so_timeout) { + _numConnections++; + } + + virtual ~DBClientConnection() { + _numConnections--; + } + + /** Connect to a Mongo database server. + + If autoReconnect is true, you can try to use the DBClientConnection even when + false was returned -- it will try to connect again. + + @param serverHostname host to connect to. can include port number ( 127.0.0.1 , 127.0.0.1:5555 ) + If you use IPv6 you must add a port number ( ::1:27017 ) + @param errmsg any relevant error message will appended to the string + @deprecated please use HostAndPort + @return false if fails to connect. + */ + virtual bool connect(const char * hostname, string& errmsg) { + // TODO: remove this method + HostAndPort t( hostname ); + return connect( t , errmsg ); + } + + /** Connect to a Mongo database server. + + If autoReconnect is true, you can try to use the DBClientConnection even when + false was returned -- it will try to connect again. + + @param server server to connect to. + @param errmsg any relevant error message will appended to the string + @return false if fails to connect. + */ + virtual bool connect(const HostAndPort& server, string& errmsg); + + /** Connect to a Mongo database server. Exception throwing version. + Throws a UserException if cannot connect. + + If autoReconnect is true, you can try to use the DBClientConnection even when + false was returned -- it will try to connect again. + + @param serverHostname host to connect to. can include port number ( 127.0.0.1 , 127.0.0.1:5555 ) + */ + void connect(const string& serverHostname) { + string errmsg; + if( !connect(HostAndPort(serverHostname), errmsg) ) + throw ConnectException(string("can't connect ") + errmsg); + } + + virtual bool auth(const string &dbname, const string &username, const string &pwd, string& errmsg, bool digestPassword = true, Auth::Level* level=NULL); + + virtual auto_ptr<DBClientCursor> query(const string &ns, Query query=Query(), int nToReturn = 0, int nToSkip = 0, + const BSONObj *fieldsToReturn = 0, int queryOptions = 0 , int batchSize = 0 ) { + checkConnection(); + return DBClientBase::query( ns, query, nToReturn, nToSkip, fieldsToReturn, queryOptions , batchSize ); + } + + virtual unsigned long long query( boost::function<void(DBClientCursorBatchIterator &)> f, + const string& ns, + Query query, + const BSONObj *fieldsToReturn, + int queryOptions ); + + virtual bool runCommand(const string &dbname, const BSONObj& cmd, BSONObj &info, int options=0); + + /** + @return true if this connection is currently in a failed state. When autoreconnect is on, + a connection will transition back to an ok state after reconnecting. + */ + bool isFailed() const { return _failed; } + + MessagingPort& port() { assert(p); return *p; } + + string toStringLong() const { + stringstream ss; + ss << _serverString; + if ( _failed ) ss << " failed"; + return ss.str(); + } + + /** Returns the address of the server */ + string toString() { return _serverString; } + + string getServerAddress() const { return _serverString; } + + virtual void killCursor( long long cursorID ); + virtual bool callRead( Message& toSend , Message& response ) { return call( toSend , response ); } + virtual void say( Message &toSend, bool isRetry = false , string * actualServer = 0 ); + virtual bool recv( Message& m ); + virtual void checkResponse( const char *data, int nReturned, bool* retry = NULL, string* host = NULL ); + virtual bool call( Message &toSend, Message &response, bool assertOk = true , string * actualServer = 0 ); + virtual ConnectionString::ConnectionType type() const { return ConnectionString::MASTER; } + void setSoTimeout(double to) { _so_timeout = to; } + double getSoTimeout() const { return _so_timeout; } + + virtual bool lazySupported() const { return true; } + + static int getNumConnections() { + return _numConnections; + } + + static void setLazyKillCursor( bool lazy ) { _lazyKillCursor = lazy; } + static bool getLazyKillCursor() { return _lazyKillCursor; } + + protected: + friend class SyncClusterConnection; + virtual void sayPiggyBack( Message &toSend ); + + DBClientReplicaSet *clientSet; + boost::scoped_ptr<MessagingPort> p; + boost::scoped_ptr<SockAddr> server; + bool _failed; + const bool autoReconnect; + time_t lastReconnectTry; + HostAndPort _server; // remember for reconnects + string _serverString; + void _checkConnection(); + + // throws SocketException if in failed state and not reconnecting or if waiting to reconnect + void checkConnection() { if( _failed ) _checkConnection(); } + + map< string, pair<string,string> > authCache; + double _so_timeout; + bool _connect( string& errmsg ); + + static AtomicUInt _numConnections; + static bool _lazyKillCursor; // lazy means we piggy back kill cursors on next op + +#ifdef MONGO_SSL + static SSLManager* sslManager(); + static SSLManager* _sslManager; +#endif + }; + + /** pings server to check if it's up + */ + bool serverAlive( const string &uri ); + + DBClientBase * createDirectClient(); + + BSONElement getErrField( const BSONObj& result ); + bool hasErrField( const BSONObj& result ); + + inline std::ostream& operator<<( std::ostream &s, const Query &q ) { + return s << q.toString(); + } + +} // namespace mongo diff --git a/src/mongo/client/distlock.cpp b/src/mongo/client/distlock.cpp index 5e30f4ad52c..7316ba063cc 100644 --- a/src/mongo/client/distlock.cpp +++ b/src/mongo/client/distlock.cpp @@ -15,9 +15,13 @@ * limitations under the License. */ -#include "pch.h" -#include "dbclient.h" -#include "distlock.h" +#include "mongo/pch.h" + +#include "mongo/client/distlock.h" + +#include "mongo/client/dbclient.h" +#include "mongo/client/dbclientcursor.h" + namespace mongo { diff --git a/src/mongo/client/examples/first.cpp b/src/mongo/client/examples/first.cpp index 047ff1914b7..01428bc977a 100644 --- a/src/mongo/client/examples/first.cpp +++ b/src/mongo/client/examples/first.cpp @@ -21,7 +21,7 @@ #include <iostream> -#include "client/dbclient.h" +#include "mongo/client/dbclient.h" using namespace std; diff --git a/src/mongo/client/examples/second.cpp b/src/mongo/client/examples/second.cpp index 6cc2111580f..2126e5f8418 100644 --- a/src/mongo/client/examples/second.cpp +++ b/src/mongo/client/examples/second.cpp @@ -18,6 +18,7 @@ #include <iostream> #include "client/dbclient.h" +#include "client/connpool.h" using namespace std; using namespace mongo; @@ -31,26 +32,23 @@ int main( int argc, const char **argv ) { port = argv[ 2 ]; } - DBClientConnection conn; - string errmsg; - if ( ! conn.connect( string( "127.0.0.1:" ) + port , errmsg ) ) { - cout << "couldn't connect : " << errmsg << endl; - throw -11; - } + ScopedDbConnection conn( string( "127.0.0.1:" ) + port ); const char * ns = "test.second"; - conn.remove( ns , BSONObj() ); + conn->remove( ns , BSONObj() ); - conn.insert( ns , BSON( "name" << "eliot" << "num" << 17 ) ); - conn.insert( ns , BSON( "name" << "sara" << "num" << 24 ) ); + conn->insert( ns , BSON( "name" << "eliot" << "num" << 17 ) ); + conn->insert( ns , BSON( "name" << "sara" << "num" << 24 ) ); - auto_ptr<DBClientCursor> cursor = conn.query( ns , BSONObj() ); + auto_ptr<DBClientCursor> cursor = conn->query( ns , BSONObj() ); cout << "using cursor" << endl; while ( cursor->more() ) { BSONObj obj = cursor->next(); cout << "\t" << obj.jsonString() << endl; } - conn.ensureIndex( ns , BSON( "name" << 1 << "num" << -1 ) ); + conn->ensureIndex( ns , BSON( "name" << 1 << "num" << -1 ) ); + + conn.done(); } diff --git a/src/mongo/client/gridfs.cpp b/src/mongo/client/gridfs.cpp index 7dcb1d5bcb1..b3f23a07277 100644 --- a/src/mongo/client/gridfs.cpp +++ b/src/mongo/client/gridfs.cpp @@ -15,13 +15,15 @@ * limitations under the License. */ -#include "pch.h" +#include "mongo/pch.h" + +#include <boost/smart_ptr.hpp> #include <fcntl.h> -#include <utility> #include <fstream> +#include <utility> -#include "gridfs.h" -#include <boost/smart_ptr.hpp> +#include "mongo/client/gridfs.h" +#include "mongo/client/dbclientcursor.h" #if defined(_WIN32) #include <io.h> diff --git a/src/mongo/client/parallel.h b/src/mongo/client/parallel.h index a968464910f..f7ad47d9dd1 100644 --- a/src/mongo/client/parallel.h +++ b/src/mongo/client/parallel.h @@ -21,13 +21,17 @@ #pragma once -#include "../pch.h" -#include "dbclient.h" -#include "redef_macros.h" -#include "../db/dbmessage.h" -#include "../db/matcher.h" -#include "../util/concurrency/mvar.h" -#include "../s/util.h" +#include "mongo/pch.h" + +#include "mongo/client/dbclient.h" + +#include "mongo/client/redef_macros.h" + +#include "mongo/db/dbmessage.h" +#include "mongo/db/matcher.h" +#include "mongo/db/namespacestring.h" +#include "mongo/s/util.h" +#include "mongo/util/concurrency/mvar.h" namespace mongo { @@ -462,4 +466,4 @@ namespace mongo { } -#include "undef_macros.h" +#include "mongo/client/undef_macros.h" diff --git a/src/mongo/client/syncclusterconnection.cpp b/src/mongo/client/syncclusterconnection.cpp index fbb93dc8af2..f9262ab6240 100644 --- a/src/mongo/client/syncclusterconnection.cpp +++ b/src/mongo/client/syncclusterconnection.cpp @@ -16,9 +16,12 @@ */ -#include "pch.h" -#include "syncclusterconnection.h" -#include "../db/dbmessage.h" +#include "mongo/pch.h" + +#include "mongo/client/syncclusterconnection.h" + +#include "mongo/client/dbclientcursor.h" +#include "mongo/db/dbmessage.h" // error codes 8000-8009 diff --git a/src/mongo/db/authlevel.h b/src/mongo/db/authlevel.h new file mode 100644 index 00000000000..af0a5665424 --- /dev/null +++ b/src/mongo/db/authlevel.h @@ -0,0 +1,43 @@ +// mongo/db/authlevel.h + +/** + * Copyright (C) 2009 10gen Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License, version 3, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <string> + +namespace mongo { + + /* + * for a particular db + * levels + * 0 : none + * 1 : read + * 2 : write + */ + struct Auth { + + enum Level { NONE = 0 , + READ = 1 , + WRITE = 2 }; + + Auth() : level( NONE ) {} + + Level level; + string user; + }; +} // namespace mongo diff --git a/src/mongo/db/client.h b/src/mongo/db/client.h index c18642ee96e..a277a97ec89 100644 --- a/src/mongo/db/client.h +++ b/src/mongo/db/client.h @@ -34,6 +34,7 @@ #include "../util/net/message_port.h" #include "../util/concurrency/rwlock.h" #include "d_concurrency.h" +#include "mongo/util/paths.h" namespace mongo { diff --git a/src/mongo/db/d_concurrency.cpp b/src/mongo/db/d_concurrency.cpp index 049b8948682..55ed6f389da 100644 --- a/src/mongo/db/d_concurrency.cpp +++ b/src/mongo/db/d_concurrency.cpp @@ -12,6 +12,7 @@ #include "d_globals.h" #include "mongomutex.h" #include "server.h" +#include "dur.h" // oplog locking // no top level read locks diff --git a/src/mongo/db/database.h b/src/mongo/db/database.h index a7867e20e8c..7cac667a6f5 100644 --- a/src/mongo/db/database.h +++ b/src/mongo/db/database.h @@ -18,8 +18,8 @@ #pragma once -#include "cmdline.h" -#include "namespace.h" +#include "mongo/db/cmdline.h" +#include "mongo/db/namespace_details.h" namespace mongo { diff --git a/src/mongo/db/dbmessage.h b/src/mongo/db/dbmessage.h index a789bff849c..ad5aea2cdff 100644 --- a/src/mongo/db/dbmessage.h +++ b/src/mongo/db/dbmessage.h @@ -18,7 +18,6 @@ #pragma once -#include "diskloc.h" #include "jsobj.h" #include "namespace-inl.h" #include "../util/net/message.h" diff --git a/src/mongo/db/dur_journal.cpp b/src/mongo/db/dur_journal.cpp index 43825bf25e8..7f1e4351c46 100644 --- a/src/mongo/db/dur_journal.cpp +++ b/src/mongo/db/dur_journal.cpp @@ -38,6 +38,7 @@ #include "../util/compress.h" #include "../util/progress_meter.h" #include "../server.h" +#include "../util/mmap.h" using namespace mongoutils; diff --git a/src/mongo/db/extsort.h b/src/mongo/db/extsort.h index 8c9c173989c..c292ed12ad6 100644 --- a/src/mongo/db/extsort.h +++ b/src/mongo/db/extsort.h @@ -18,11 +18,14 @@ #pragma once -#include "../pch.h" -#include "jsobj.h" -#include "namespace-inl.h" -#include "curop-inl.h" -#include "../util/array.h" +#include "mongo/pch.h" + +#include "mongo/db/index.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/namespace-inl.h" +#include "mongo/db/curop-inl.h" +#include "mongo/util/array.h" +#include "mongo/util/mmap.h" namespace mongo { diff --git a/src/mongo/db/index.h b/src/mongo/db/index.h index d297f8a4ca1..4418f2ad382 100644 --- a/src/mongo/db/index.h +++ b/src/mongo/db/index.h @@ -23,6 +23,7 @@ #include "jsobj.h" #include "indexkey.h" #include "key.h" +#include "namespace.h" namespace mongo { diff --git a/src/mongo/db/instance.h b/src/mongo/db/instance.h index cf9698a95da..090250ace0a 100644 --- a/src/mongo/db/instance.h +++ b/src/mongo/db/instance.h @@ -20,7 +20,7 @@ #pragma once -#include "../client/dbclient.h" +#include "mongo/client/dbclient.h" #include "curop-inl.h" #include "security.h" #include "cmdline.h" diff --git a/src/mongo/db/namespace-inl.h b/src/mongo/db/namespace-inl.h index a621a229546..c18f681e0b9 100644 --- a/src/mongo/db/namespace-inl.h +++ b/src/mongo/db/namespace-inl.h @@ -18,7 +18,7 @@ #pragma once -#include "namespace.h" +#include "mongo/db/namespace.h" namespace mongo { @@ -70,63 +70,4 @@ namespace mongo { return old + "." + local; } - inline IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected ) { - if( idxNo < NIndexesBase ) { - IndexDetails& id = _indexes[idxNo]; - return id; - } - Extra *e = extra(); - if ( ! e ) { - if ( missingExpected ) - throw MsgAssertionException( 13283 , "Missing Extra" ); - massert(14045, "missing Extra", e); - } - int i = idxNo - NIndexesBase; - if( i >= NIndexesExtra ) { - e = e->next(this); - if ( ! e ) { - if ( missingExpected ) - throw MsgAssertionException( 14823 , "missing extra" ); - massert(14824, "missing Extra", e); - } - i -= NIndexesExtra; - } - return e->details[i]; - } - - inline int NamespaceDetails::idxNo(IndexDetails& idx) { - IndexIterator i = ii(); - while( i.more() ) { - if( &i.next() == &idx ) - return i.pos()-1; - } - massert( 10349 , "E12000 idxNo fails", false); - return -1; - } - - inline int NamespaceDetails::findIndexByKeyPattern(const BSONObj& keyPattern) { - IndexIterator i = ii(); - while( i.more() ) { - if( i.next().keyPattern() == keyPattern ) - return i.pos()-1; - } - return -1; - } - - // @return offset in indexes[] - inline int NamespaceDetails::findIndexByName(const char *name) { - IndexIterator i = ii(); - while( i.more() ) { - if ( strcmp(i.next().info.obj().getStringField("name"),name) == 0 ) - return i.pos()-1; - } - return -1; - } - - inline NamespaceDetails::IndexIterator::IndexIterator(NamespaceDetails *_d) { - d = _d; - i = 0; - n = d->nIndexes; - } - -} +} // namespace mongo diff --git a/src/mongo/db/namespace.cpp b/src/mongo/db/namespace.cpp index c84afa824e6..77a45fe8da1 100644 --- a/src/mongo/db/namespace.cpp +++ b/src/mongo/db/namespace.cpp @@ -16,788 +16,16 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "pch.h" -#include "pdfile.h" -#include "db.h" -#include "mongommf.h" -#include "../util/hashtab.h" -#include "../scripting/engine.h" -#include "btree.h" -#include <algorithm> -#include <list> -#include "json.h" -#include "ops/delete.h" +#include "mongo/db/namespace.h" -#include <boost/filesystem/operations.hpp> +#include <boost/static_assert.hpp> -namespace mongo { - - BOOST_STATIC_ASSERT( sizeof(Namespace) == 128 ); - - BSONObj idKeyPattern = fromjson("{\"_id\":1}"); - - /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes - so you can look for a deleterecord about the right size. - */ - int bucketSizes[] = { - 32, 64, 128, 256, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, - 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, - 0x400000, 0x800000 - }; - - NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) { - /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */ - firstExtent = lastExtent = capExtent = loc; - stats.datasize = stats.nrecords = 0; - lastExtentSize = 0; - nIndexes = 0; - capped = _capped; - max = 0x7fffffff; - paddingFactor = 1.0; - flags = 0; - capFirstNewRecord = DiskLoc(); - // Signal that we are on first allocation iteration through extents. - capFirstNewRecord.setInvalid(); - // For capped case, signal that we are doing initial extent allocation. - if ( capped ) - cappedLastDelRecLastExtent().setInvalid(); - assert( sizeof(dataFileVersion) == 2 ); - dataFileVersion = 0; - indexFileVersion = 0; - multiKeyIndexBits = 0; - reservedA = 0; - extraOffset = 0; - indexBuildInProgress = 0; - reservedB = 0; - capped2.cc2_ptr = 0; - capped2.fileNumber = 0; - memset(reserved, 0, sizeof(reserved)); - } - - bool NamespaceIndex::exists() const { - return !boost::filesystem::exists(path()); - } - - boost::filesystem::path NamespaceIndex::path() const { - boost::filesystem::path ret( dir_ ); - if ( directoryperdb ) - ret /= database_; - ret /= ( database_ + ".ns" ); - return ret; - } - - void NamespaceIndex::maybeMkdir() const { - if ( !directoryperdb ) - return; - boost::filesystem::path dir( dir_ ); - dir /= database_; - if ( !boost::filesystem::exists( dir ) ) - MONGO_BOOST_CHECK_EXCEPTION_WITH_MSG( boost::filesystem::create_directory( dir ), "create dir for db " ); - } - - unsigned lenForNewNsFiles = 16 * 1024 * 1024; - -#if defined(_DEBUG) - void NamespaceDetails::dump(const Namespace& k) { - if( !cmdLine.dur ) - cout << "ns offsets which follow will not display correctly with --journal disabled" << endl; - - size_t ofs = 1; // 1 is sentinel that the find call below failed - privateViews.find(this, /*out*/ofs); - - cout << "ns" << hex << setw(8) << ofs << ' '; - cout << k.toString() << '\n'; - - if( k.isExtra() ) { - cout << "ns\t extra" << endl; - return; - } - - cout << "ns " << firstExtent.toString() << ' ' << lastExtent.toString() << " nidx:" << nIndexes << '\n'; - cout << "ns " << stats.datasize << ' ' << stats.nrecords << ' ' << nIndexes << '\n'; - cout << "ns " << capped << ' ' << paddingFactor << ' ' << flags << ' ' << dataFileVersion << '\n'; - cout << "ns " << multiKeyIndexBits << ' ' << indexBuildInProgress << '\n'; - cout << "ns " << (int) reserved[0] << ' ' << (int) reserved[59]; - cout << endl; - } -#endif - - void NamespaceDetails::onLoad(const Namespace& k) { - - if( k.isExtra() ) { - /* overflow storage for indexes - so don't treat as a NamespaceDetails object. */ - return; - } - - if( indexBuildInProgress || capped2.cc2_ptr ) { - assertInWriteLock(); - if( indexBuildInProgress ) { - log() << "indexBuildInProgress was " << indexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl; - getDur().writingInt( indexBuildInProgress ) = 0; - } - if( capped2.cc2_ptr ) - *getDur().writing(&capped2.cc2_ptr) = 0; - } - } - - static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) { - v.onLoad(k); - } - - bool checkNsFilesOnLoad = true; - - NOINLINE_DECL void NamespaceIndex::_init() { - assert( !ht ); - - Lock::assertWriteLocked(database_); - - /* if someone manually deleted the datafiles for a database, - we need to be sure to clear any cached info for the database in - local.*. - */ - /* - if ( "local" != database_ ) { - DBInfo i(database_.c_str()); - i.dbDropped(); - } - */ - - unsigned long long len = 0; - boost::filesystem::path nsPath = path(); - string pathString = nsPath.string(); - void *p = 0; - if( boost::filesystem::exists(nsPath) ) { - if( f.open(pathString, true) ) { - len = f.length(); - if ( len % (1024*1024) != 0 ) { - log() << "bad .ns file: " << pathString << endl; - uassert( 10079 , "bad .ns file length, cannot open database", len % (1024*1024) == 0 ); - } - p = f.getView(); - } - } - else { - // use lenForNewNsFiles, we are making a new database - massert( 10343, "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 ); - maybeMkdir(); - unsigned long long l = lenForNewNsFiles; - if( f.create(pathString, l, true) ) { - getDur().createdFile(pathString, l); // always a new file - len = l; - assert( len == lenForNewNsFiles ); - p = f.getView(); - } - } - - if ( p == 0 ) { - /** TODO: this shouldn't terminate? */ - log() << "error couldn't open file " << pathString << " terminating" << endl; - dbexit( EXIT_FS ); - } - - - assert( len <= 0x7fffffff ); - ht = new HashTable<Namespace,NamespaceDetails>(p, (int) len, "namespace index"); - if( checkNsFilesOnLoad ) - ht->iterAll(namespaceOnLoadCallback); - } - - static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , void * extra ) { - list<string> * l = (list<string>*)extra; - if ( ! k.hasDollarSign() ) - l->push_back( (string)k ); - } - void NamespaceIndex::getNamespaces( list<string>& tofill , bool onlyCollections ) const { - assert( onlyCollections ); // TODO: need to implement this - // need boost::bind or something to make this less ugly - - if ( ht ) - ht->iterAll( namespaceGetNamespacesCallback , (void*)&tofill ); - } - - void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) { - BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); - - { - Record *r = (Record *) getDur().writingPtr(d, sizeof(Record)); - d = &r->asDeleted(); - // defensive code: try to make us notice if we reference a deleted record - (unsigned&) (r->data) = 0xeeeeeeee; - } - DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; - if ( capped ) { - if ( !cappedLastDelRecLastExtent().isValid() ) { - // Initial extent allocation. Insert at end. - d->nextDeleted = DiskLoc(); - if ( cappedListOfAllDeletedRecords().isNull() ) - getDur().writingDiskLoc( cappedListOfAllDeletedRecords() ) = dloc; - else { - DiskLoc i = cappedListOfAllDeletedRecords(); - for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ) - ; - i.drec()->nextDeleted.writing() = dloc; - } - } - else { - d->nextDeleted = cappedFirstDeletedInCurExtent(); - getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = dloc; - // always compact() after this so order doesn't matter - } - } - else { - int b = bucket(d->lengthWithHeaders); - DiskLoc& list = deletedList[b]; - DiskLoc oldHead = list; - getDur().writingDiskLoc(list) = dloc; - d->nextDeleted = oldHead; - } - } - - /* predetermine location of the next alloc without actually doing it. - if cannot predetermine returns null (so still call alloc() then) - */ - DiskLoc NamespaceDetails::allocWillBeAt(const char *ns, int lenToAlloc) { - if ( !capped ) { - lenToAlloc = (lenToAlloc + 3) & 0xfffffffc; - return __stdAlloc(lenToAlloc, true); - } - return DiskLoc(); - } - - /** allocate space for a new record from deleted lists. - @param lenToAlloc is WITH header - @param extentLoc OUT returns the extent location - @return null diskloc if no room - allocate a new extent then - */ - DiskLoc NamespaceDetails::alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc) { - { - // align very slightly. - // note that if doing more coarse-grained quantization (really just if it isn't always - // a constant amount but if it varied by record size) then that quantization should - // NOT be done here but rather in __stdAlloc so that we can grab a deletedrecord that - // is just big enough if we happen to run into one. - lenToAlloc = (lenToAlloc + 3) & 0xfffffffc; - } - - DiskLoc loc = _alloc(ns, lenToAlloc); - if ( loc.isNull() ) - return loc; - - const DeletedRecord *r = loc.drec(); - //r = getDur().writing(r); - - /* note we want to grab from the front so our next pointers on disk tend - to go in a forward direction which is important for performance. */ - int regionlen = r->lengthWithHeaders; - extentLoc.set(loc.a(), r->extentOfs); - assert( r->extentOfs < loc.getOfs() ); - - DEBUGGING out() << "TEMP: alloc() returns " << loc.toString() << ' ' << ns << " lentoalloc:" << lenToAlloc << " ext:" << extentLoc.toString() << endl; - - int left = regionlen - lenToAlloc; - if ( capped == 0 ) { - if ( left < 24 || left < (lenToAlloc >> 3) ) { - // you get the whole thing. - return loc; - } - } - - /* split off some for further use. */ - getDur().writingInt(r->lengthWithHeaders) = lenToAlloc; - DiskLoc newDelLoc = loc; - newDelLoc.inc(lenToAlloc); - DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left); - DeletedRecord *newDelW = getDur().writing(newDel); - newDelW->extentOfs = r->extentOfs; - newDelW->lengthWithHeaders = left; - newDelW->nextDeleted.Null(); - - addDeletedRec(newDel, newDelLoc); - - return loc; - } - - /* for non-capped collections. - @param peekOnly just look up where and don't reserve - returned item is out of the deleted list upon return - */ - DiskLoc NamespaceDetails::__stdAlloc(int len, bool peekOnly) { - DiskLoc *prev; - DiskLoc *bestprev = 0; - DiskLoc bestmatch; - int bestmatchlen = 0x7fffffff; - int b = bucket(len); - DiskLoc cur = deletedList[b]; - prev = &deletedList[b]; - int extra = 5; // look for a better fit, a little. - int chain = 0; - while ( 1 ) { - { - int a = cur.a(); - if ( a < -1 || a >= 100000 ) { - problem() << "~~ Assertion - cur out of range in _alloc() " << cur.toString() << - " a:" << a << " b:" << b << " chain:" << chain << '\n'; - sayDbContext(); - if ( cur == *prev ) - prev->Null(); - cur.Null(); - } - } - if ( cur.isNull() ) { - // move to next bucket. if we were doing "extra", just break - if ( bestmatchlen < 0x7fffffff ) - break; - b++; - if ( b > MaxBucket ) { - // out of space. alloc a new extent. - return DiskLoc(); - } - cur = deletedList[b]; - prev = &deletedList[b]; - continue; - } - DeletedRecord *r = cur.drec(); - if ( r->lengthWithHeaders >= len && - r->lengthWithHeaders < bestmatchlen ) { - bestmatchlen = r->lengthWithHeaders; - bestmatch = cur; - bestprev = prev; - } - if ( bestmatchlen < 0x7fffffff && --extra <= 0 ) - break; - if ( ++chain > 30 && b < MaxBucket ) { - // too slow, force move to next bucket to grab a big chunk - //b++; - chain = 0; - cur.Null(); - } - else { - /*this defensive check only made sense for the mmap storage engine: - if ( r->nextDeleted.getOfs() == 0 ) { - problem() << "~~ Assertion - bad nextDeleted " << r->nextDeleted.toString() << - " b:" << b << " chain:" << chain << ", fixing.\n"; - r->nextDeleted.Null(); - }*/ - cur = r->nextDeleted; - prev = &r->nextDeleted; - } - } - - /* unlink ourself from the deleted list */ - if( !peekOnly ) { - const DeletedRecord *bmr = bestmatch.drec(); - *getDur().writing(bestprev) = bmr->nextDeleted; - bmr->nextDeleted.writing().setInvalid(); // defensive. - assert(bmr->extentOfs < bestmatch.getOfs()); - } - - return bestmatch; - } - - void NamespaceDetails::dumpDeleted(set<DiskLoc> *extents) { - for ( int i = 0; i < Buckets; i++ ) { - DiskLoc dl = deletedList[i]; - while ( !dl.isNull() ) { - DeletedRecord *r = dl.drec(); - DiskLoc extLoc(dl.a(), r->extentOfs); - if ( extents == 0 || extents->count(extLoc) <= 0 ) { - out() << " bucket " << i << endl; - out() << " " << dl.toString() << " ext:" << extLoc.toString(); - if ( extents && extents->count(extLoc) <= 0 ) - out() << '?'; - out() << " len:" << r->lengthWithHeaders << endl; - } - dl = r->nextDeleted; - } - } - } - - DiskLoc NamespaceDetails::firstRecord( const DiskLoc &startExtent ) const { - for (DiskLoc i = startExtent.isNull() ? firstExtent : startExtent; - !i.isNull(); i = i.ext()->xnext ) { - if ( !i.ext()->firstRecord.isNull() ) - return i.ext()->firstRecord; - } - return DiskLoc(); - } +#include "mongo/db/namespacestring.h" - DiskLoc NamespaceDetails::lastRecord( const DiskLoc &startExtent ) const { - for (DiskLoc i = startExtent.isNull() ? lastExtent : startExtent; - !i.isNull(); i = i.ext()->xprev ) { - if ( !i.ext()->lastRecord.isNull() ) - return i.ext()->lastRecord; - } - return DiskLoc(); - } - - int n_complaints_cap = 0; - void NamespaceDetails::maybeComplain( const char *ns, int len ) const { - if ( ++n_complaints_cap < 8 ) { - out() << "couldn't make room for new record (len: " << len << ") in capped ns " << ns << '\n'; - int i = 0; - for ( DiskLoc e = firstExtent; !e.isNull(); e = e.ext()->xnext, ++i ) { - out() << " Extent " << i; - if ( e == capExtent ) - out() << " (capExtent)"; - out() << '\n'; - out() << " magic: " << hex << e.ext()->magic << dec << " extent->ns: " << e.ext()->nsDiagnostic.toString() << '\n'; - out() << " fr: " << e.ext()->firstRecord.toString() << - " lr: " << e.ext()->lastRecord.toString() << " extent->len: " << e.ext()->length << '\n'; - } - assert( len * 5 > lastExtentSize ); // assume it is unusually large record; if not, something is broken - } - } - - /* alloc with capped table handling. */ - DiskLoc NamespaceDetails::_alloc(const char *ns, int len) { - if ( !capped ) - return __stdAlloc(len, false); - - return cappedAlloc(ns,len); - } - - void NamespaceIndex::kill_ns(const char *ns) { - Lock::assertWriteLocked(ns); - if ( !ht ) - return; - Namespace n(ns); - ht->kill(n); - - for( int i = 0; i<=1; i++ ) { - try { - Namespace extra(n.extraName(i).c_str()); - ht->kill(extra); - } - catch(DBException&) { - dlog(3) << "caught exception in kill_ns" << endl; - } - } - } - - void NamespaceIndex::add_ns(const char *ns, DiskLoc& loc, bool capped) { - NamespaceDetails details( loc, capped ); - add_ns( ns, details ); - } - void NamespaceIndex::add_ns( const char *ns, const NamespaceDetails &details ) { - Lock::assertWriteLocked(ns); - init(); - Namespace n(ns); - uassert( 10081 , "too many namespaces/collections", ht->put(n, details)); - } - - /* extra space for indexes when more than 10 */ - NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) { - Lock::assertWriteLocked(ns); - assert( i >= 0 && i <= 1 ); - Namespace n(ns); - Namespace extra(n.extraName(i).c_str()); // throws userexception if ns name too long - - massert( 10350 , "allocExtra: base ns missing?", d ); - massert( 10351 , "allocExtra: extra already exists", ht->get(extra) == 0 ); - - NamespaceDetails::Extra temp; - temp.init(); - uassert( 10082 , "allocExtra: too many namespaces/collections", ht->put(extra, (NamespaceDetails&) temp)); - NamespaceDetails::Extra *e = (NamespaceDetails::Extra *) ht->get(extra); - return e; - } - NamespaceDetails::Extra* NamespaceDetails::allocExtra(const char *ns, int nindexessofar) { - NamespaceIndex *ni = nsindex(ns); - int i = (nindexessofar - NIndexesBase) / NIndexesExtra; - Extra *e = ni->newExtra(ns, i, this); - long ofs = e->ofsFrom(this); - if( i == 0 ) { - assert( extraOffset == 0 ); - *getDur().writing(&extraOffset) = ofs; - assert( extra() == e ); - } - else { - Extra *hd = extra(); - assert( hd->next(this) == 0 ); - hd->setNext(ofs); - } - return e; - } - - /* you MUST call when adding an index. see pdfile.cpp */ - IndexDetails& NamespaceDetails::addIndex(const char *thisns, bool resetTransient) { - IndexDetails *id; - try { - id = &idx(nIndexes,true); - } - catch(DBException&) { - allocExtra(thisns, nIndexes); - id = &idx(nIndexes,false); - } - - (*getDur().writing(&nIndexes))++; - if ( resetTransient ) - NamespaceDetailsTransient::get(thisns).addedIndex(); - return *id; - } - - // must be called when renaming a NS to fix up extra - void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) { - extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below. - Extra *se = src->extra(); - int n = NIndexesBase; - if( se ) { - Extra *e = allocExtra(thisns, n); - while( 1 ) { - n += NIndexesExtra; - e->copy(this, *se); - se = se->next(src); - if( se == 0 ) break; - Extra *nxt = allocExtra(thisns, n); - e->setNext( nxt->ofsFrom(this) ); - e = nxt; - } - assert( extraOffset ); - } - } - - /* returns index of the first index in which the field is present. -1 if not present. - (aug08 - this method not currently used) - */ - int NamespaceDetails::fieldIsIndexed(const char *fieldName) { - massert( 10346 , "not implemented", false); - /* - for ( int i = 0; i < nIndexes; i++ ) { - IndexDetails& idx = indexes[i]; - BSONObj idxKey = idx.info.obj().getObjectField("key"); // e.g., { ts : -1 } - if ( !idxKey.getField(fieldName).eoo() ) - return i; - }*/ - return -1; - } - - long long NamespaceDetails::storageSize( int * numExtents , BSONArrayBuilder * extentInfo ) const { - Extent * e = firstExtent.ext(); - assert( e ); - - long long total = 0; - int n = 0; - while ( e ) { - total += e->length; - n++; - - if ( extentInfo ) { - extentInfo->append( BSON( "len" << e->length << "loc: " << e->myLoc.toBSONObj() ) ); - } - - e = e->getNextExtent(); - } - - if ( numExtents ) - *numExtents = n; - - return total; - } - - NamespaceDetails *NamespaceDetails::writingWithExtra() { - vector< pair< long long, unsigned > > writeRanges; - writeRanges.push_back( make_pair( 0, sizeof( NamespaceDetails ) ) ); - for( Extra *e = extra(); e; e = e->next( this ) ) { - writeRanges.push_back( make_pair( (char*)e - (char*)this, sizeof( Extra ) ) ); - } - return reinterpret_cast< NamespaceDetails* >( getDur().writingRangesAtOffsets( this, writeRanges ) ); - } - - /* ------------------------------------------------------------------------- */ - - SimpleMutex NamespaceDetailsTransient::_qcMutex("qc"); - SimpleMutex NamespaceDetailsTransient::_isMutex("is"); - map< string, shared_ptr< NamespaceDetailsTransient > > NamespaceDetailsTransient::_nsdMap; - typedef map< string, shared_ptr< NamespaceDetailsTransient > >::iterator ouriter; - - void NamespaceDetailsTransient::reset() { - Lock::assertWriteLocked(_ns); - clearQueryCache(); - _keysComputed = false; - _indexSpecs.clear(); - } - - /*static*/ NOINLINE_DECL NamespaceDetailsTransient& NamespaceDetailsTransient::make_inlock(const char *ns) { - shared_ptr< NamespaceDetailsTransient > &t = _nsdMap[ ns ]; - assert( t.get() == 0 ); - Database *database = cc().database(); - assert( database ); - if( _nsdMap.size() % 20000 == 10000 ) { - // so we notice if insanely large #s - log() << "opening namespace " << ns << endl; - log() << _nsdMap.size() << " namespaces in nsdMap" << endl; - } - t.reset( new NamespaceDetailsTransient(database, ns) ); - return *t; - } - - // note with repair there could be two databases with the same ns name. - // that is NOT handled here yet! TODO - // repair may not use nsdt though not sure. anyway, requires work. - NamespaceDetailsTransient::NamespaceDetailsTransient(Database *db, const char *ns) : - _ns(ns), _keysComputed(false), _qcWriteCount() - { - dassert(db); - } - - NamespaceDetailsTransient::~NamespaceDetailsTransient() { - } - - void NamespaceDetailsTransient::clearForPrefix(const char *prefix) { - SimpleMutex::scoped_lock lk(_qcMutex); - vector< string > found; - for( ouriter i = _nsdMap.begin(); i != _nsdMap.end(); ++i ) { - if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) { - found.push_back( i->first ); - Lock::assertWriteLocked(i->first); - } - } - for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { - _nsdMap[ *i ].reset(); - } - } - - void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) { - SimpleMutex::scoped_lock lk(_qcMutex); - vector< string > found; - for( ouriter i = _nsdMap.begin(); i != _nsdMap.end(); ++i ) { - if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) { - found.push_back( i->first ); - Lock::assertWriteLocked(i->first); - } - } - for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { - _nsdMap.erase(*i); - } - } - - void NamespaceDetailsTransient::computeIndexKeys() { - _keysComputed = true; - _indexKeys.clear(); - NamespaceDetails *d = nsdetails(_ns.c_str()); - if ( ! d ) - return; - NamespaceDetails::IndexIterator i = d->ii(); - while( i.more() ) - i.next().keyPattern().getFieldNames(_indexKeys); - } - - - /* ------------------------------------------------------------------------- */ - - /* add a new namespace to the system catalog (<dbname>.system.namespaces). - options: { capped : ..., size : ... } - */ - void addNewNamespaceToCatalog(const char *ns, const BSONObj *options = 0) { - LOG(1) << "New namespace: " << ns << endl; - if ( strstr(ns, "system.namespaces") ) { - // system.namespaces holds all the others, so it is not explicitly listed in the catalog. - // TODO: fix above should not be strstr! - return; - } - - BSONObjBuilder b; - b.append("name", ns); - if ( options ) - b.append("options", *options); - BSONObj j = b.done(); - char database[256]; - nsToDatabase(ns, database); - string s = string(database) + ".system.namespaces"; - theDataFileMgr.insert(s.c_str(), j.objdata(), j.objsize(), true); - } - - void renameNamespace( const char *from, const char *to, bool stayTemp) { - NamespaceIndex *ni = nsindex( from ); - assert( ni ); - assert( ni->details( from ) ); - assert( ! ni->details( to ) ); - - // Our namespace and index details will move to a different - // memory location. The only references to namespace and - // index details across commands are in cursors and nsd - // transient (including query cache) so clear these. - ClientCursor::invalidate( from ); - NamespaceDetailsTransient::eraseForPrefix( from ); - - NamespaceDetails *details = ni->details( from ); - ni->add_ns( to, *details ); - NamespaceDetails *todetails = ni->details( to ); - try { - todetails->copyingFrom(to, details); // fixes extraOffset - } - catch( DBException& ) { - // could end up here if .ns is full - if so try to clean up / roll back a little - ni->kill_ns(to); - throw; - } - ni->kill_ns( from ); - details = todetails; - - BSONObj oldSpec; - char database[MaxDatabaseNameLen]; - nsToDatabase(from, database); - string s = database; - s += ".system.namespaces"; - assert( Helpers::findOne( s.c_str(), BSON( "name" << from ), oldSpec ) ); - - BSONObjBuilder newSpecB; - BSONObjIterator i( oldSpec.getObjectField( "options" ) ); - while( i.more() ) { - BSONElement e = i.next(); - if ( strcmp( e.fieldName(), "create" ) != 0 ) { - if (stayTemp || (strcmp(e.fieldName(), "temp") != 0)) - newSpecB.append( e ); - } - else { - newSpecB << "create" << to; - } - } - BSONObj newSpec = newSpecB.done(); - addNewNamespaceToCatalog( to, newSpec.isEmpty() ? 0 : &newSpec ); - - deleteObjects( s.c_str(), BSON( "name" << from ), false, false, true ); - // oldSpec variable no longer valid memory - - BSONObj oldIndexSpec; - s = database; - s += ".system.indexes"; - while( Helpers::findOne( s.c_str(), BSON( "ns" << from ), oldIndexSpec ) ) { - BSONObjBuilder newIndexSpecB; - BSONObjIterator i( oldIndexSpec ); - while( i.more() ) { - BSONElement e = i.next(); - if ( strcmp( e.fieldName(), "ns" ) != 0 ) - newIndexSpecB.append( e ); - else - newIndexSpecB << "ns" << to; - } - BSONObj newIndexSpec = newIndexSpecB.done(); - DiskLoc newIndexSpecLoc = theDataFileMgr.insert( s.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), true, false ); - int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) ); - IndexDetails &indexDetails = details->idx(indexI); - string oldIndexNs = indexDetails.indexNamespace(); - indexDetails.info = newIndexSpecLoc; - string newIndexNs = indexDetails.indexNamespace(); - - renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() ); - deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true ); - } - } - - bool legalClientSystemNS( const string& ns , bool write ) { - if( ns == "local.system.replset" ) return true; - - if ( ns.find( ".system.users" ) != string::npos ) - return true; - - if ( ns.find( ".system.js" ) != string::npos ) { - if ( write ) - Scope::storedFuncMod(); - return true; - } - - return false; - } +namespace mongo { +namespace { +BOOST_STATIC_ASSERT( sizeof(Namespace) == 128 ); +BOOST_STATIC_ASSERT( Namespace::MaxNsLen == MaxDatabaseNameLen ); +} // namespace +} // namespace mongo -} // namespace mongo diff --git a/src/mongo/db/namespace.h b/src/mongo/db/namespace.h index 0e2042940c8..b0907a58acc 100644 --- a/src/mongo/db/namespace.h +++ b/src/mongo/db/namespace.h @@ -18,25 +18,14 @@ #pragma once -#include "../pch.h" -#include "namespacestring.h" -#include "jsobj.h" -#include "querypattern.h" -#include "diskloc.h" -#include "../util/hashtab.h" -#include "mongommf.h" -#include "d_concurrency.h" -#include "queryoptimizer.h" -#include "queryoptimizercursor.h" +#include "mongo/pch.h" -namespace mongo { +#include <cstring> +#include <string> - class Database; +namespace mongo { #pragma pack(1) - /* This helper class is used to make the HashMap below in NamespaceIndex e.g. see line: - HashTable<Namespace,NamespaceDetails> *ht; - */ class Namespace { public: explicit Namespace(const char *ns) { *this = ns; } @@ -50,8 +39,8 @@ namespace mongo { size_t size() const { return strlen( buf ); } - string toString() const { return (string) buf; } - operator string() const { return (string) buf; } + string toString() const { return buf; } + operator string() const { return buf; } /* NamespaceDetails::Extra was added after fact to allow chaining of data blocks to support more than 10 indexes (more than 10 IndexDetails). It's a bit hacky because of this late addition with backward @@ -70,585 +59,4 @@ namespace mongo { }; #pragma pack() - BOOST_STATIC_ASSERT( Namespace::MaxNsLen == MaxDatabaseNameLen ); - -} // namespace mongo - -#include "index.h" - -namespace mongo { - - /** @return true if a client can modify this namespace even though it is under ".system." - For example <dbname>.system.users is ok for regular clients to update. - @param write used when .system.js - */ - bool legalClientSystemNS( const string& ns , bool write ); - - /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes - so you can look for a deleterecord about the right size. - */ - const int Buckets = 19; - const int MaxBucket = 18; - - extern int bucketSizes[]; - -#pragma pack(1) - /* NamespaceDetails : this is the "header" for a collection that has all its details. - It's in the .ns file and this is a memory mapped region (thus the pack pragma above). - */ - class NamespaceDetails { - public: - enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 }; - - /*-------- data fields, as present on disk : */ - DiskLoc firstExtent; - DiskLoc lastExtent; - /* NOTE: capped collections v1 override the meaning of deletedList. - deletedList[0] points to a list of free records (DeletedRecord's) for all extents in - the capped namespace. - deletedList[1] points to the last record in the prev extent. When the "current extent" - changes, this value is updated. !deletedList[1].isValid() when this value is not - yet computed. - */ - DiskLoc deletedList[Buckets]; - // ofs 168 (8 byte aligned) - struct Stats { - // datasize and nrecords MUST Be adjacent code assumes! - long long datasize; // this includes padding, but not record headers - long long nrecords; - } stats; - int lastExtentSize; - int nIndexes; - private: - // ofs 192 - IndexDetails _indexes[NIndexesBase]; - public: - // ofs 352 (16 byte aligned) - int capped; - int max; // max # of objects for a capped table. TODO: should this be 64 bit? - double paddingFactor; // 1.0 = no padding. - // ofs 386 (16) - int flags; - DiskLoc capExtent; - DiskLoc capFirstNewRecord; - unsigned short dataFileVersion; // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h - unsigned short indexFileVersion; - unsigned long long multiKeyIndexBits; - private: - // ofs 400 (16) - unsigned long long reservedA; - long long extraOffset; // where the $extra info is located (bytes relative to this) - public: - int indexBuildInProgress; // 1 if in prog - unsigned reservedB; - // ofs 424 (8) - struct Capped2 { - unsigned long long cc2_ptr; // see capped.cpp - unsigned fileNumber; - } capped2; - char reserved[60]; - /*-------- end data 496 bytes */ - - explicit NamespaceDetails( const DiskLoc &loc, bool _capped ); - - class Extra { - long long _next; - public: - IndexDetails details[NIndexesExtra]; - private: - unsigned reserved2; - unsigned reserved3; - Extra(const Extra&) { assert(false); } - Extra& operator=(const Extra& r) { assert(false); return *this; } - public: - Extra() { } - long ofsFrom(NamespaceDetails *d) { - return ((char *) this) - ((char *) d); - } - void init() { memset(this, 0, sizeof(Extra)); } - Extra* next(NamespaceDetails *d) { - if( _next == 0 ) return 0; - return (Extra*) (((char *) d) + _next); - } - void setNext(long ofs) { *getDur().writing(&_next) = ofs; } - void copy(NamespaceDetails *d, const Extra& e) { - memcpy(this, &e, sizeof(Extra)); - _next = 0; - } - }; - Extra* extra() { - if( extraOffset == 0 ) return 0; - return (Extra *) (((char *) this) + extraOffset); - } - /* add extra space for indexes when more than 10 */ - Extra* allocExtra(const char *ns, int nindexessofar); - void copyingFrom(const char *thisns, NamespaceDetails *src); // must be called when renaming a NS to fix up extra - - /* called when loaded from disk */ - void onLoad(const Namespace& k); - - /* dump info on this namespace. for debugging. */ - void dump(const Namespace& k); - - /* dump info on all extents for this namespace. for debugging. */ - void dumpExtents(); - - private: - Extent *theCapExtent() const { return capExtent.ext(); } - void advanceCapExtent( const char *ns ); - DiskLoc __capAlloc(int len); - DiskLoc cappedAlloc(const char *ns, int len); - DiskLoc &cappedFirstDeletedInCurExtent(); - bool nextIsInCapExtent( const DiskLoc &dl ) const; - - public: - DiskLoc& cappedListOfAllDeletedRecords() { return deletedList[0]; } - DiskLoc& cappedLastDelRecLastExtent() { return deletedList[1]; } - void cappedDumpDelInfo(); - bool capLooped() const { return capped && capFirstNewRecord.isValid(); } - bool inCapExtent( const DiskLoc &dl ) const; - void cappedCheckMigrate(); - /** - * Truncate documents newer than the document at 'end' from the capped - * collection. The collection cannot be completely emptied using this - * function. An assertion will be thrown if that is attempted. - * @param inclusive - Truncate 'end' as well iff true - */ - void cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive); - /** Remove all documents from the capped collection */ - void emptyCappedCollection(const char *ns); - - /* when a background index build is in progress, we don't count the index in nIndexes until - complete, yet need to still use it in _indexRecord() - thus we use this function for that. - */ - int nIndexesBeingBuilt() const { return nIndexes + indexBuildInProgress; } - - /* NOTE: be careful with flags. are we manipulating them in read locks? if so, - this isn't thread safe. TODO - */ - enum NamespaceFlags { - Flag_HaveIdIndex = 1 << 0 // set when we have _id index (ONLY if ensureIdIndex was called -- 0 if that has never been called) - }; - - IndexDetails& idx(int idxNo, bool missingExpected = false ); - - /** get the IndexDetails for the index currently being built in the background. (there is at most one) */ - IndexDetails& inProgIdx() { - DEV assert(indexBuildInProgress); - return idx(nIndexes); - } - - class IndexIterator { - public: - int pos() { return i; } // note this is the next one to come - bool more() { return i < n; } - IndexDetails& next() { return d->idx(i++); } - private: - friend class NamespaceDetails; - int i, n; - NamespaceDetails *d; - IndexIterator(NamespaceDetails *_d); - }; - - IndexIterator ii() { return IndexIterator(this); } - - /* hackish - find our index # in the indexes array */ - int idxNo(IndexDetails& idx); - - /* multikey indexes are indexes where there are more than one key in the index - for a single document. see multikey in wiki. - for these, we have to do some dedup work on queries. - */ - bool isMultikey(int i) const { return (multiKeyIndexBits & (((unsigned long long) 1) << i)) != 0; } - void setIndexIsMultikey(int i) { - dassert( i < NIndexesMax ); - unsigned long long x = ((unsigned long long) 1) << i; - if( multiKeyIndexBits & x ) return; - *getDur().writing(&multiKeyIndexBits) |= x; - } - void clearIndexIsMultikey(int i) { - dassert( i < NIndexesMax ); - unsigned long long x = ((unsigned long long) 1) << i; - if( (multiKeyIndexBits & x) == 0 ) return; - *getDur().writing(&multiKeyIndexBits) &= ~x; - } - - /* add a new index. does not add to system.indexes etc. - just to NamespaceDetails. - caller must populate returned object. - */ - IndexDetails& addIndex(const char *thisns, bool resetTransient=true); - - void aboutToDeleteAnIndex() { - *getDur().writing(&flags) = flags & ~Flag_HaveIdIndex; - } - - /* returns index of the first index in which the field is present. -1 if not present. */ - int fieldIsIndexed(const char *fieldName); - - /* called to indicate that an update fit in place. - fits also called on an insert -- idea there is that if you had some mix and then went to - pure inserts it would adapt and PF would trend to 1.0. note update calls insert on a move - so there is a double count there that must be adjusted for below. - - todo: greater sophistication could be helpful and added later. for example the absolute - size of documents might be considered -- in some cases smaller ones are more likely - to grow than larger ones in the same collection? (not always) - */ - void paddingFits() { - MONGO_SOMETIMES(sometimes, 4) { // do this on a sampled basis to journal less - double x = paddingFactor - 0.001; - if ( x >= 1.0 ) { - *getDur().writing(&paddingFactor) = x; - } - } - } - void paddingTooSmall() { - MONGO_SOMETIMES(sometimes, 4) { // do this on a sampled basis to journal less - /* the more indexes we have, the higher the cost of a move. so we take that into - account herein. note on a move that insert() calls paddingFits(), thus - here for example with no inserts and nIndexes = 1 we have - .001*4-.001 or a 3:1 ratio to non moves -> 75% nonmoves. insert heavy - can pushes this down considerably. further tweaking will be a good idea but - this should be an adequate starting point. - */ - double N = min(nIndexes,7) + 3; - double x = paddingFactor + (0.001 * N); - if ( x <= 2.0 ) { - *getDur().writing(&paddingFactor) = x; - } - } - } - - // @return offset in indexes[] - int findIndexByName(const char *name); - - // @return offset in indexes[] - int findIndexByKeyPattern(const BSONObj& keyPattern); - - void findIndexByType( const string& name , vector<int>& matches ) { - IndexIterator i = ii(); - while ( i.more() ) { - if ( i.next().getSpec().getTypeName() == name ) - matches.push_back( i.pos() - 1 ); - } - } - - /* @return -1 = not found - generally id is first index, so not that expensive an operation (assuming present). - */ - int findIdIndex() { - IndexIterator i = ii(); - while( i.more() ) { - if( i.next().isIdIndex() ) - return i.pos()-1; - } - return -1; - } - - bool haveIdIndex() { - return (flags & NamespaceDetails::Flag_HaveIdIndex) || findIdIndex() >= 0; - } - - /* return which "deleted bucket" for this size object */ - static int bucket(int n) { - for ( int i = 0; i < Buckets; i++ ) - if ( bucketSizes[i] > n ) - return i; - return Buckets-1; - } - - /* predetermine location of the next alloc without actually doing it. - if cannot predetermine returns null (so still call alloc() then) - */ - DiskLoc allocWillBeAt(const char *ns, int lenToAlloc); - - /* allocate a new record. lenToAlloc includes headers. */ - DiskLoc alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc); - - /* add a given record to the deleted chains for this NS */ - void addDeletedRec(DeletedRecord *d, DiskLoc dloc); - void dumpDeleted(set<DiskLoc> *extents = 0); - // Start from firstExtent by default. - DiskLoc firstRecord( const DiskLoc &startExtent = DiskLoc() ) const; - // Start from lastExtent by default. - DiskLoc lastRecord( const DiskLoc &startExtent = DiskLoc() ) const; - long long storageSize( int * numExtents = 0 , BSONArrayBuilder * extentInfo = 0 ) const; - - int averageObjectSize() { - if ( stats.nrecords == 0 ) - return 5; - return (int) (stats.datasize / stats.nrecords); - } - - NamespaceDetails *writingWithoutExtra() { - return ( NamespaceDetails* ) getDur().writingPtr( this, sizeof( NamespaceDetails ) ); - } - /** Make all linked Extra objects writeable as well */ - NamespaceDetails *writingWithExtra(); - - private: - DiskLoc _alloc(const char *ns, int len); - void maybeComplain( const char *ns, int len ) const; - DiskLoc __stdAlloc(int len, bool willBeAt); - void compact(); // combine adjacent deleted records - friend class NamespaceIndex; - struct ExtraOld { - // note we could use this field for more chaining later, so don't waste it: - unsigned long long reserved1; - IndexDetails details[NIndexesExtra]; - unsigned reserved2; - unsigned reserved3; - }; - /** Update cappedLastDelRecLastExtent() after capExtent changed in cappedTruncateAfter() */ - void cappedTruncateLastDelUpdate(); - BOOST_STATIC_ASSERT( NIndexesMax <= NIndexesBase + NIndexesExtra*2 ); - BOOST_STATIC_ASSERT( NIndexesMax <= 64 ); // multiKey bits - BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::ExtraOld) == 496 ); - BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) == 496 ); - }; // NamespaceDetails -#pragma pack() - - /* NamespaceDetailsTransient - - these are things we know / compute about a namespace that are transient -- things - we don't actually store in the .ns file. so mainly caching of frequently used - information. - - CAUTION: Are you maintaining this properly on a collection drop()? A dropdatabase()? Be careful. - The current field "allIndexKeys" may have too many keys in it on such an occurrence; - as currently used that does not cause anything terrible to happen. - - todo: cleanup code, need abstractions and separation - */ - // todo: multiple db's with the same name (repairDatbase) is not handled herein. that may be - // the way to go, if not used by repair, but need some sort of enforcement / asserts. - class NamespaceDetailsTransient : boost::noncopyable { - BOOST_STATIC_ASSERT( sizeof(NamespaceDetails) == 496 ); - - //Database *database; - const string _ns; - void reset(); - static std::map< string, shared_ptr< NamespaceDetailsTransient > > _nsdMap; - - NamespaceDetailsTransient(Database*,const char *ns); - public: - ~NamespaceDetailsTransient(); - void addedIndex() { reset(); } - void deletedIndex() { reset(); } - /* Drop cached information on all namespaces beginning with the specified prefix. - Can be useful as index namespaces share the same start as the regular collection. - SLOW - sequential scan of all NamespaceDetailsTransient objects */ - static void clearForPrefix(const char *prefix); - static void eraseForPrefix(const char *prefix); - - /** - * @return a cursor interface to the query optimizer. The implementation may utilize a - * single query plan or interleave results from multiple query plans before settling on a - * single query plan. Note that the schema of currKey() documents, indexKeyPattern(), the - * matcher(), and the isMultiKey() nature of the cursor may change over the course of - * iteration. - * - * @param query - Query used to select indexes and populate matchers; not copied if unowned - * (see bsonobj.h). - * - * @param order - Required ordering spec for documents produced by this cursor, empty object - * default indicates no order requirement. If no index exists that satisfies the required - * sort order, an empty shared_ptr is returned unless parsedQuery is also provided. This is - * not copied if unowned. - * - * @param planPolicy - A policy for selecting query plans - see queryoptimizercursor.h - * - * @param simpleEqualityMatch - Set to true for certain simple queries - see - * queryoptimizer.cpp. - * - * @param parsedQuery - Additional query parameters, as from a client query request. If - * specified, the resulting cursor may return results from out of order plans. See - * queryoptimizercursor.h for information on handling these results. - * - * @param singlePlanSummary - Query plan summary information that may be provided when a - * cursor running a single plan is returned. - * - * The returned cursor may @throw inside of advance() or recoverFromYield() in certain error - * cases, for example if a capped overrun occurred during a yield. This indicates that the - * cursor was unable to perform a complete scan. - * - * This is a work in progress. Partial list of features not yet implemented through this - * interface: - * - * - covered indexes - * - in memory sorting - */ - static shared_ptr<Cursor> getCursor( const char *ns, const BSONObj &query, - const BSONObj &order = BSONObj(), - const QueryPlanSelectionPolicy &planPolicy = - QueryPlanSelectionPolicy::any(), - bool *simpleEqualityMatch = 0, - const ParsedQuery *parsedQuery = 0, - QueryPlan::Summary *singlePlanSummary = 0 ); - - /** - * @return a single cursor that may work well for the given query. A $or style query will - * produce a single cursor, not a MultiCursor. - * It is possible no cursor is returned if the sort is not supported by an index. Clients are responsible - * for checking this if they are not sure an index for a sort exists, and defaulting to a non-sort if - * no suitable indices exist. - */ - static shared_ptr<Cursor> bestGuessCursor( const char *ns, const BSONObj &query, const BSONObj &sort ); - - /* indexKeys() cache ---------------------------------------------------- */ - /* assumed to be in write lock for this */ - private: - bool _keysComputed; - set<string> _indexKeys; - void computeIndexKeys(); - public: - /* get set of index keys for this namespace. handy to quickly check if a given - field is indexed (Note it might be a secondary component of a compound index.) - */ - set<string>& indexKeys() { - DEV Lock::assertWriteLocked(_ns); - if ( !_keysComputed ) - computeIndexKeys(); - return _indexKeys; - } - - /* IndexSpec caching */ - private: - map<const IndexDetails*,IndexSpec> _indexSpecs; - static SimpleMutex _isMutex; - public: - const IndexSpec& getIndexSpec( const IndexDetails * details ) { - IndexSpec& spec = _indexSpecs[details]; - if ( ! spec._finishedInit ) { - SimpleMutex::scoped_lock lk(_isMutex); - if ( ! spec._finishedInit ) { - spec.reset( details ); - assert( spec._finishedInit ); - } - } - return spec; - } - - /* query cache (for query optimizer) ------------------------------------- */ - private: - int _qcWriteCount; - map< QueryPattern, pair< BSONObj, long long > > _qcCache; - static NamespaceDetailsTransient& make_inlock(const char *ns); - public: - static SimpleMutex _qcMutex; - - /* you must be in the qcMutex when calling this. - A NamespaceDetailsTransient object will not go out of scope on you if you are - d.dbMutex.atLeastReadLocked(), so you do't have to stay locked. - Creates a NamespaceDetailsTransient before returning if one DNE. - todo: avoid creating too many on erroneous ns queries. - */ - static NamespaceDetailsTransient& get_inlock(const char *ns); - - static NamespaceDetailsTransient& get(const char *ns) { - // todo : _qcMutex will create bottlenecks in our parallelism - SimpleMutex::scoped_lock lk(_qcMutex); - return get_inlock(ns); - } - - void clearQueryCache() { // public for unit tests - _qcCache.clear(); - _qcWriteCount = 0; - } - /* you must notify the cache if you are doing writes, as query plan optimality will change */ - void notifyOfWriteOp() { - if ( _qcCache.empty() ) - return; - if ( ++_qcWriteCount >= 100 ) - clearQueryCache(); - } - BSONObj indexForPattern( const QueryPattern &pattern ) { - return _qcCache[ pattern ].first; - } - long long nScannedForPattern( const QueryPattern &pattern ) { - return _qcCache[ pattern ].second; - } - void registerIndexForPattern( const QueryPattern &pattern, const BSONObj &indexKey, long long nScanned ) { - _qcCache[ pattern ] = make_pair( indexKey, nScanned ); - } - - }; /* NamespaceDetailsTransient */ - - inline NamespaceDetailsTransient& NamespaceDetailsTransient::get_inlock(const char *ns) { - std::map< string, shared_ptr< NamespaceDetailsTransient > >::iterator i = _nsdMap.find(ns); - if( i != _nsdMap.end() && - i->second.get() ) { // could be null ptr from clearForPrefix - return *i->second; - } - return make_inlock(ns); - } - - /* NamespaceIndex is the ".ns" file you see in the data directory. It is the "system catalog" - if you will: at least the core parts. (Additional info in system.* collections.) - */ - class NamespaceIndex { - public: - NamespaceIndex(const string &dir, const string &database) : - ht( 0 ), dir_( dir ), database_( database ) {} - - /* returns true if new db will be created if we init lazily */ - bool exists() const; - - void init() { - if( !ht ) - _init(); - } - - void add_ns(const char *ns, DiskLoc& loc, bool capped); - void add_ns( const char *ns, const NamespaceDetails &details ); - - NamespaceDetails* details(const char *ns) { - if ( !ht ) - return 0; - Namespace n(ns); - NamespaceDetails *d = ht->get(n); - if ( d && d->capped ) - d->cappedCheckMigrate(); - return d; - } - - void kill_ns(const char *ns); - - bool find(const char *ns, DiskLoc& loc) { - NamespaceDetails *l = details(ns); - if ( l ) { - loc = l->firstExtent; - return true; - } - return false; - } - - bool allocated() const { return ht != 0; } - - void getNamespaces( list<string>& tofill , bool onlyCollections = true ) const; - - NamespaceDetails::Extra* newExtra(const char *ns, int n, NamespaceDetails *d); - - boost::filesystem::path path() const; - - unsigned long long fileLength() const { return f.length(); } - - private: - void _init(); - void maybeMkdir() const; - - MongoMMF f; - HashTable<Namespace,NamespaceDetails> *ht; - string dir_; - string database_; - }; - - extern string dbpath; // --dbpath parm - extern bool directoryperdb; - - // Rename a namespace within current 'client' db. - // (Arguments should include db name) - void renameNamespace( const char *from, const char *to, bool stayTemp); - - } // namespace mongo diff --git a/src/mongo/db/namespace_details-inl.h b/src/mongo/db/namespace_details-inl.h new file mode 100644 index 00000000000..96f85e13a13 --- /dev/null +++ b/src/mongo/db/namespace_details-inl.h @@ -0,0 +1,84 @@ +// @file namespace-inl.h + +/** +* Copyright (C) 2009 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "mongo/db/namespace_details.h" + +namespace mongo { + + inline IndexDetails& NamespaceDetails::idx(int idxNo, bool missingExpected ) { + if( idxNo < NIndexesBase ) { + IndexDetails& id = _indexes[idxNo]; + return id; + } + Extra *e = extra(); + if ( ! e ) { + if ( missingExpected ) + throw MsgAssertionException( 13283 , "Missing Extra" ); + massert(14045, "missing Extra", e); + } + int i = idxNo - NIndexesBase; + if( i >= NIndexesExtra ) { + e = e->next(this); + if ( ! e ) { + if ( missingExpected ) + throw MsgAssertionException( 14823 , "missing extra" ); + massert(14824, "missing Extra", e); + } + i -= NIndexesExtra; + } + return e->details[i]; + } + + inline int NamespaceDetails::idxNo(IndexDetails& idx) { + IndexIterator i = ii(); + while( i.more() ) { + if( &i.next() == &idx ) + return i.pos()-1; + } + massert( 10349 , "E12000 idxNo fails", false); + return -1; + } + + inline int NamespaceDetails::findIndexByKeyPattern(const BSONObj& keyPattern) { + IndexIterator i = ii(); + while( i.more() ) { + if( i.next().keyPattern() == keyPattern ) + return i.pos()-1; + } + return -1; + } + + // @return offset in indexes[] + inline int NamespaceDetails::findIndexByName(const char *name) { + IndexIterator i = ii(); + while( i.more() ) { + if ( strcmp(i.next().info.obj().getStringField("name"),name) == 0 ) + return i.pos()-1; + } + return -1; + } + + inline NamespaceDetails::IndexIterator::IndexIterator(NamespaceDetails *_d) { + d = _d; + i = 0; + n = d->nIndexes; + } + +} diff --git a/src/mongo/db/namespace_details.cpp b/src/mongo/db/namespace_details.cpp new file mode 100644 index 00000000000..2e5d3757d56 --- /dev/null +++ b/src/mongo/db/namespace_details.cpp @@ -0,0 +1,801 @@ +// namespace.cpp + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include "pch.h" +#include "pdfile.h" +#include "db.h" +#include "mongommf.h" +#include "../util/hashtab.h" +#include "../scripting/engine.h" +#include "btree.h" +#include <algorithm> +#include <list> +#include "json.h" +#include "ops/delete.h" + +#include <boost/filesystem/operations.hpp> + +namespace mongo { + + BSONObj idKeyPattern = fromjson("{\"_id\":1}"); + + /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes + so you can look for a deleterecord about the right size. + */ + int bucketSizes[] = { + 32, 64, 128, 256, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, + 0x8000, 0x10000, 0x20000, 0x40000, 0x80000, 0x100000, 0x200000, + 0x400000, 0x800000 + }; + + NamespaceDetails::NamespaceDetails( const DiskLoc &loc, bool _capped ) { + /* be sure to initialize new fields here -- doesn't default to zeroes the way we use it */ + firstExtent = lastExtent = capExtent = loc; + stats.datasize = stats.nrecords = 0; + lastExtentSize = 0; + nIndexes = 0; + capped = _capped; + max = 0x7fffffff; + paddingFactor = 1.0; + flags = 0; + capFirstNewRecord = DiskLoc(); + // Signal that we are on first allocation iteration through extents. + capFirstNewRecord.setInvalid(); + // For capped case, signal that we are doing initial extent allocation. + if ( capped ) + cappedLastDelRecLastExtent().setInvalid(); + assert( sizeof(dataFileVersion) == 2 ); + dataFileVersion = 0; + indexFileVersion = 0; + multiKeyIndexBits = 0; + reservedA = 0; + extraOffset = 0; + indexBuildInProgress = 0; + reservedB = 0; + capped2.cc2_ptr = 0; + capped2.fileNumber = 0; + memset(reserved, 0, sizeof(reserved)); + } + + bool NamespaceIndex::exists() const { + return !boost::filesystem::exists(path()); + } + + boost::filesystem::path NamespaceIndex::path() const { + boost::filesystem::path ret( dir_ ); + if ( directoryperdb ) + ret /= database_; + ret /= ( database_ + ".ns" ); + return ret; + } + + void NamespaceIndex::maybeMkdir() const { + if ( !directoryperdb ) + return; + boost::filesystem::path dir( dir_ ); + dir /= database_; + if ( !boost::filesystem::exists( dir ) ) + MONGO_BOOST_CHECK_EXCEPTION_WITH_MSG( boost::filesystem::create_directory( dir ), "create dir for db " ); + } + + unsigned lenForNewNsFiles = 16 * 1024 * 1024; + +#if defined(_DEBUG) + void NamespaceDetails::dump(const Namespace& k) { + if( !cmdLine.dur ) + cout << "ns offsets which follow will not display correctly with --journal disabled" << endl; + + size_t ofs = 1; // 1 is sentinel that the find call below failed + privateViews.find(this, /*out*/ofs); + + cout << "ns" << hex << setw(8) << ofs << ' '; + cout << k.toString() << '\n'; + + if( k.isExtra() ) { + cout << "ns\t extra" << endl; + return; + } + + cout << "ns " << firstExtent.toString() << ' ' << lastExtent.toString() << " nidx:" << nIndexes << '\n'; + cout << "ns " << stats.datasize << ' ' << stats.nrecords << ' ' << nIndexes << '\n'; + cout << "ns " << capped << ' ' << paddingFactor << ' ' << flags << ' ' << dataFileVersion << '\n'; + cout << "ns " << multiKeyIndexBits << ' ' << indexBuildInProgress << '\n'; + cout << "ns " << (int) reserved[0] << ' ' << (int) reserved[59]; + cout << endl; + } +#endif + + void NamespaceDetails::onLoad(const Namespace& k) { + + if( k.isExtra() ) { + /* overflow storage for indexes - so don't treat as a NamespaceDetails object. */ + return; + } + + if( indexBuildInProgress || capped2.cc2_ptr ) { + assertInWriteLock(); + if( indexBuildInProgress ) { + log() << "indexBuildInProgress was " << indexBuildInProgress << " for " << k << ", indicating an abnormal db shutdown" << endl; + getDur().writingInt( indexBuildInProgress ) = 0; + } + if( capped2.cc2_ptr ) + *getDur().writing(&capped2.cc2_ptr) = 0; + } + } + + static void namespaceOnLoadCallback(const Namespace& k, NamespaceDetails& v) { + v.onLoad(k); + } + + bool checkNsFilesOnLoad = true; + + NOINLINE_DECL void NamespaceIndex::_init() { + assert( !ht ); + + Lock::assertWriteLocked(database_); + + /* if someone manually deleted the datafiles for a database, + we need to be sure to clear any cached info for the database in + local.*. + */ + /* + if ( "local" != database_ ) { + DBInfo i(database_.c_str()); + i.dbDropped(); + } + */ + + unsigned long long len = 0; + boost::filesystem::path nsPath = path(); + string pathString = nsPath.string(); + void *p = 0; + if( boost::filesystem::exists(nsPath) ) { + if( f.open(pathString, true) ) { + len = f.length(); + if ( len % (1024*1024) != 0 ) { + log() << "bad .ns file: " << pathString << endl; + uassert( 10079 , "bad .ns file length, cannot open database", len % (1024*1024) == 0 ); + } + p = f.getView(); + } + } + else { + // use lenForNewNsFiles, we are making a new database + massert( 10343, "bad lenForNewNsFiles", lenForNewNsFiles >= 1024*1024 ); + maybeMkdir(); + unsigned long long l = lenForNewNsFiles; + if( f.create(pathString, l, true) ) { + getDur().createdFile(pathString, l); // always a new file + len = l; + assert( len == lenForNewNsFiles ); + p = f.getView(); + } + } + + if ( p == 0 ) { + /** TODO: this shouldn't terminate? */ + log() << "error couldn't open file " << pathString << " terminating" << endl; + dbexit( EXIT_FS ); + } + + + assert( len <= 0x7fffffff ); + ht = new HashTable<Namespace,NamespaceDetails>(p, (int) len, "namespace index"); + if( checkNsFilesOnLoad ) + ht->iterAll(namespaceOnLoadCallback); + } + + static void namespaceGetNamespacesCallback( const Namespace& k , NamespaceDetails& v , void * extra ) { + list<string> * l = (list<string>*)extra; + if ( ! k.hasDollarSign() ) + l->push_back( (string)k ); + } + void NamespaceIndex::getNamespaces( list<string>& tofill , bool onlyCollections ) const { + assert( onlyCollections ); // TODO: need to implement this + // need boost::bind or something to make this less ugly + + if ( ht ) + ht->iterAll( namespaceGetNamespacesCallback , (void*)&tofill ); + } + + void NamespaceDetails::addDeletedRec(DeletedRecord *d, DiskLoc dloc) { + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) <= sizeof(NamespaceDetails) ); + + { + Record *r = (Record *) getDur().writingPtr(d, sizeof(Record)); + d = &r->asDeleted(); + // defensive code: try to make us notice if we reference a deleted record + (unsigned&) (r->data) = 0xeeeeeeee; + } + DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs << endl; + if ( capped ) { + if ( !cappedLastDelRecLastExtent().isValid() ) { + // Initial extent allocation. Insert at end. + d->nextDeleted = DiskLoc(); + if ( cappedListOfAllDeletedRecords().isNull() ) + getDur().writingDiskLoc( cappedListOfAllDeletedRecords() ) = dloc; + else { + DiskLoc i = cappedListOfAllDeletedRecords(); + for (; !i.drec()->nextDeleted.isNull(); i = i.drec()->nextDeleted ) + ; + i.drec()->nextDeleted.writing() = dloc; + } + } + else { + d->nextDeleted = cappedFirstDeletedInCurExtent(); + getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = dloc; + // always compact() after this so order doesn't matter + } + } + else { + int b = bucket(d->lengthWithHeaders); + DiskLoc& list = deletedList[b]; + DiskLoc oldHead = list; + getDur().writingDiskLoc(list) = dloc; + d->nextDeleted = oldHead; + } + } + + /* predetermine location of the next alloc without actually doing it. + if cannot predetermine returns null (so still call alloc() then) + */ + DiskLoc NamespaceDetails::allocWillBeAt(const char *ns, int lenToAlloc) { + if ( !capped ) { + lenToAlloc = (lenToAlloc + 3) & 0xfffffffc; + return __stdAlloc(lenToAlloc, true); + } + return DiskLoc(); + } + + /** allocate space for a new record from deleted lists. + @param lenToAlloc is WITH header + @param extentLoc OUT returns the extent location + @return null diskloc if no room - allocate a new extent then + */ + DiskLoc NamespaceDetails::alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc) { + { + // align very slightly. + // note that if doing more coarse-grained quantization (really just if it isn't always + // a constant amount but if it varied by record size) then that quantization should + // NOT be done here but rather in __stdAlloc so that we can grab a deletedrecord that + // is just big enough if we happen to run into one. + lenToAlloc = (lenToAlloc + 3) & 0xfffffffc; + } + + DiskLoc loc = _alloc(ns, lenToAlloc); + if ( loc.isNull() ) + return loc; + + const DeletedRecord *r = loc.drec(); + //r = getDur().writing(r); + + /* note we want to grab from the front so our next pointers on disk tend + to go in a forward direction which is important for performance. */ + int regionlen = r->lengthWithHeaders; + extentLoc.set(loc.a(), r->extentOfs); + assert( r->extentOfs < loc.getOfs() ); + + DEBUGGING out() << "TEMP: alloc() returns " << loc.toString() << ' ' << ns << " lentoalloc:" << lenToAlloc << " ext:" << extentLoc.toString() << endl; + + int left = regionlen - lenToAlloc; + if ( capped == 0 ) { + if ( left < 24 || left < (lenToAlloc >> 3) ) { + // you get the whole thing. + return loc; + } + } + + /* split off some for further use. */ + getDur().writingInt(r->lengthWithHeaders) = lenToAlloc; + DiskLoc newDelLoc = loc; + newDelLoc.inc(lenToAlloc); + DeletedRecord *newDel = DataFileMgr::makeDeletedRecord(newDelLoc, left); + DeletedRecord *newDelW = getDur().writing(newDel); + newDelW->extentOfs = r->extentOfs; + newDelW->lengthWithHeaders = left; + newDelW->nextDeleted.Null(); + + addDeletedRec(newDel, newDelLoc); + + return loc; + } + + /* for non-capped collections. + @param peekOnly just look up where and don't reserve + returned item is out of the deleted list upon return + */ + DiskLoc NamespaceDetails::__stdAlloc(int len, bool peekOnly) { + DiskLoc *prev; + DiskLoc *bestprev = 0; + DiskLoc bestmatch; + int bestmatchlen = 0x7fffffff; + int b = bucket(len); + DiskLoc cur = deletedList[b]; + prev = &deletedList[b]; + int extra = 5; // look for a better fit, a little. + int chain = 0; + while ( 1 ) { + { + int a = cur.a(); + if ( a < -1 || a >= 100000 ) { + problem() << "~~ Assertion - cur out of range in _alloc() " << cur.toString() << + " a:" << a << " b:" << b << " chain:" << chain << '\n'; + sayDbContext(); + if ( cur == *prev ) + prev->Null(); + cur.Null(); + } + } + if ( cur.isNull() ) { + // move to next bucket. if we were doing "extra", just break + if ( bestmatchlen < 0x7fffffff ) + break; + b++; + if ( b > MaxBucket ) { + // out of space. alloc a new extent. + return DiskLoc(); + } + cur = deletedList[b]; + prev = &deletedList[b]; + continue; + } + DeletedRecord *r = cur.drec(); + if ( r->lengthWithHeaders >= len && + r->lengthWithHeaders < bestmatchlen ) { + bestmatchlen = r->lengthWithHeaders; + bestmatch = cur; + bestprev = prev; + } + if ( bestmatchlen < 0x7fffffff && --extra <= 0 ) + break; + if ( ++chain > 30 && b < MaxBucket ) { + // too slow, force move to next bucket to grab a big chunk + //b++; + chain = 0; + cur.Null(); + } + else { + /*this defensive check only made sense for the mmap storage engine: + if ( r->nextDeleted.getOfs() == 0 ) { + problem() << "~~ Assertion - bad nextDeleted " << r->nextDeleted.toString() << + " b:" << b << " chain:" << chain << ", fixing.\n"; + r->nextDeleted.Null(); + }*/ + cur = r->nextDeleted; + prev = &r->nextDeleted; + } + } + + /* unlink ourself from the deleted list */ + if( !peekOnly ) { + const DeletedRecord *bmr = bestmatch.drec(); + *getDur().writing(bestprev) = bmr->nextDeleted; + bmr->nextDeleted.writing().setInvalid(); // defensive. + assert(bmr->extentOfs < bestmatch.getOfs()); + } + + return bestmatch; + } + + void NamespaceDetails::dumpDeleted(set<DiskLoc> *extents) { + for ( int i = 0; i < Buckets; i++ ) { + DiskLoc dl = deletedList[i]; + while ( !dl.isNull() ) { + DeletedRecord *r = dl.drec(); + DiskLoc extLoc(dl.a(), r->extentOfs); + if ( extents == 0 || extents->count(extLoc) <= 0 ) { + out() << " bucket " << i << endl; + out() << " " << dl.toString() << " ext:" << extLoc.toString(); + if ( extents && extents->count(extLoc) <= 0 ) + out() << '?'; + out() << " len:" << r->lengthWithHeaders << endl; + } + dl = r->nextDeleted; + } + } + } + + DiskLoc NamespaceDetails::firstRecord( const DiskLoc &startExtent ) const { + for (DiskLoc i = startExtent.isNull() ? firstExtent : startExtent; + !i.isNull(); i = i.ext()->xnext ) { + if ( !i.ext()->firstRecord.isNull() ) + return i.ext()->firstRecord; + } + return DiskLoc(); + } + + DiskLoc NamespaceDetails::lastRecord( const DiskLoc &startExtent ) const { + for (DiskLoc i = startExtent.isNull() ? lastExtent : startExtent; + !i.isNull(); i = i.ext()->xprev ) { + if ( !i.ext()->lastRecord.isNull() ) + return i.ext()->lastRecord; + } + return DiskLoc(); + } + + int n_complaints_cap = 0; + void NamespaceDetails::maybeComplain( const char *ns, int len ) const { + if ( ++n_complaints_cap < 8 ) { + out() << "couldn't make room for new record (len: " << len << ") in capped ns " << ns << '\n'; + int i = 0; + for ( DiskLoc e = firstExtent; !e.isNull(); e = e.ext()->xnext, ++i ) { + out() << " Extent " << i; + if ( e == capExtent ) + out() << " (capExtent)"; + out() << '\n'; + out() << " magic: " << hex << e.ext()->magic << dec << " extent->ns: " << e.ext()->nsDiagnostic.toString() << '\n'; + out() << " fr: " << e.ext()->firstRecord.toString() << + " lr: " << e.ext()->lastRecord.toString() << " extent->len: " << e.ext()->length << '\n'; + } + assert( len * 5 > lastExtentSize ); // assume it is unusually large record; if not, something is broken + } + } + + /* alloc with capped table handling. */ + DiskLoc NamespaceDetails::_alloc(const char *ns, int len) { + if ( !capped ) + return __stdAlloc(len, false); + + return cappedAlloc(ns,len); + } + + void NamespaceIndex::kill_ns(const char *ns) { + Lock::assertWriteLocked(ns); + if ( !ht ) + return; + Namespace n(ns); + ht->kill(n); + + for( int i = 0; i<=1; i++ ) { + try { + Namespace extra(n.extraName(i).c_str()); + ht->kill(extra); + } + catch(DBException&) { + dlog(3) << "caught exception in kill_ns" << endl; + } + } + } + + void NamespaceIndex::add_ns(const char *ns, DiskLoc& loc, bool capped) { + NamespaceDetails details( loc, capped ); + add_ns( ns, details ); + } + void NamespaceIndex::add_ns( const char *ns, const NamespaceDetails &details ) { + Lock::assertWriteLocked(ns); + init(); + Namespace n(ns); + uassert( 10081 , "too many namespaces/collections", ht->put(n, details)); + } + + /* extra space for indexes when more than 10 */ + NamespaceDetails::Extra* NamespaceIndex::newExtra(const char *ns, int i, NamespaceDetails *d) { + Lock::assertWriteLocked(ns); + assert( i >= 0 && i <= 1 ); + Namespace n(ns); + Namespace extra(n.extraName(i).c_str()); // throws userexception if ns name too long + + massert( 10350 , "allocExtra: base ns missing?", d ); + massert( 10351 , "allocExtra: extra already exists", ht->get(extra) == 0 ); + + NamespaceDetails::Extra temp; + temp.init(); + uassert( 10082 , "allocExtra: too many namespaces/collections", ht->put(extra, (NamespaceDetails&) temp)); + NamespaceDetails::Extra *e = (NamespaceDetails::Extra *) ht->get(extra); + return e; + } + NamespaceDetails::Extra* NamespaceDetails::allocExtra(const char *ns, int nindexessofar) { + NamespaceIndex *ni = nsindex(ns); + int i = (nindexessofar - NIndexesBase) / NIndexesExtra; + Extra *e = ni->newExtra(ns, i, this); + long ofs = e->ofsFrom(this); + if( i == 0 ) { + assert( extraOffset == 0 ); + *getDur().writing(&extraOffset) = ofs; + assert( extra() == e ); + } + else { + Extra *hd = extra(); + assert( hd->next(this) == 0 ); + hd->setNext(ofs); + } + return e; + } + + /* you MUST call when adding an index. see pdfile.cpp */ + IndexDetails& NamespaceDetails::addIndex(const char *thisns, bool resetTransient) { + IndexDetails *id; + try { + id = &idx(nIndexes,true); + } + catch(DBException&) { + allocExtra(thisns, nIndexes); + id = &idx(nIndexes,false); + } + + (*getDur().writing(&nIndexes))++; + if ( resetTransient ) + NamespaceDetailsTransient::get(thisns).addedIndex(); + return *id; + } + + // must be called when renaming a NS to fix up extra + void NamespaceDetails::copyingFrom(const char *thisns, NamespaceDetails *src) { + extraOffset = 0; // we are a copy -- the old value is wrong. fixing it up below. + Extra *se = src->extra(); + int n = NIndexesBase; + if( se ) { + Extra *e = allocExtra(thisns, n); + while( 1 ) { + n += NIndexesExtra; + e->copy(this, *se); + se = se->next(src); + if( se == 0 ) break; + Extra *nxt = allocExtra(thisns, n); + e->setNext( nxt->ofsFrom(this) ); + e = nxt; + } + assert( extraOffset ); + } + } + + /* returns index of the first index in which the field is present. -1 if not present. + (aug08 - this method not currently used) + */ + int NamespaceDetails::fieldIsIndexed(const char *fieldName) { + massert( 10346 , "not implemented", false); + /* + for ( int i = 0; i < nIndexes; i++ ) { + IndexDetails& idx = indexes[i]; + BSONObj idxKey = idx.info.obj().getObjectField("key"); // e.g., { ts : -1 } + if ( !idxKey.getField(fieldName).eoo() ) + return i; + }*/ + return -1; + } + + long long NamespaceDetails::storageSize( int * numExtents , BSONArrayBuilder * extentInfo ) const { + Extent * e = firstExtent.ext(); + assert( e ); + + long long total = 0; + int n = 0; + while ( e ) { + total += e->length; + n++; + + if ( extentInfo ) { + extentInfo->append( BSON( "len" << e->length << "loc: " << e->myLoc.toBSONObj() ) ); + } + + e = e->getNextExtent(); + } + + if ( numExtents ) + *numExtents = n; + + return total; + } + + NamespaceDetails *NamespaceDetails::writingWithExtra() { + vector< pair< long long, unsigned > > writeRanges; + writeRanges.push_back( make_pair( 0, sizeof( NamespaceDetails ) ) ); + for( Extra *e = extra(); e; e = e->next( this ) ) { + writeRanges.push_back( make_pair( (char*)e - (char*)this, sizeof( Extra ) ) ); + } + return reinterpret_cast< NamespaceDetails* >( getDur().writingRangesAtOffsets( this, writeRanges ) ); + } + + /* ------------------------------------------------------------------------- */ + + SimpleMutex NamespaceDetailsTransient::_qcMutex("qc"); + SimpleMutex NamespaceDetailsTransient::_isMutex("is"); + map< string, shared_ptr< NamespaceDetailsTransient > > NamespaceDetailsTransient::_nsdMap; + typedef map< string, shared_ptr< NamespaceDetailsTransient > >::iterator ouriter; + + void NamespaceDetailsTransient::reset() { + Lock::assertWriteLocked(_ns); + clearQueryCache(); + _keysComputed = false; + _indexSpecs.clear(); + } + + /*static*/ NOINLINE_DECL NamespaceDetailsTransient& NamespaceDetailsTransient::make_inlock(const char *ns) { + shared_ptr< NamespaceDetailsTransient > &t = _nsdMap[ ns ]; + assert( t.get() == 0 ); + Database *database = cc().database(); + assert( database ); + if( _nsdMap.size() % 20000 == 10000 ) { + // so we notice if insanely large #s + log() << "opening namespace " << ns << endl; + log() << _nsdMap.size() << " namespaces in nsdMap" << endl; + } + t.reset( new NamespaceDetailsTransient(database, ns) ); + return *t; + } + + // note with repair there could be two databases with the same ns name. + // that is NOT handled here yet! TODO + // repair may not use nsdt though not sure. anyway, requires work. + NamespaceDetailsTransient::NamespaceDetailsTransient(Database *db, const char *ns) : + _ns(ns), _keysComputed(false), _qcWriteCount() + { + dassert(db); + } + + NamespaceDetailsTransient::~NamespaceDetailsTransient() { + } + + void NamespaceDetailsTransient::clearForPrefix(const char *prefix) { + SimpleMutex::scoped_lock lk(_qcMutex); + vector< string > found; + for( ouriter i = _nsdMap.begin(); i != _nsdMap.end(); ++i ) { + if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) { + found.push_back( i->first ); + Lock::assertWriteLocked(i->first); + } + } + for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { + _nsdMap[ *i ].reset(); + } + } + + void NamespaceDetailsTransient::eraseForPrefix(const char *prefix) { + SimpleMutex::scoped_lock lk(_qcMutex); + vector< string > found; + for( ouriter i = _nsdMap.begin(); i != _nsdMap.end(); ++i ) { + if ( strncmp( i->first.c_str(), prefix, strlen( prefix ) ) == 0 ) { + found.push_back( i->first ); + Lock::assertWriteLocked(i->first); + } + } + for( vector< string >::iterator i = found.begin(); i != found.end(); ++i ) { + _nsdMap.erase(*i); + } + } + + void NamespaceDetailsTransient::computeIndexKeys() { + _keysComputed = true; + _indexKeys.clear(); + NamespaceDetails *d = nsdetails(_ns.c_str()); + if ( ! d ) + return; + NamespaceDetails::IndexIterator i = d->ii(); + while( i.more() ) + i.next().keyPattern().getFieldNames(_indexKeys); + } + + + /* ------------------------------------------------------------------------- */ + + /* add a new namespace to the system catalog (<dbname>.system.namespaces). + options: { capped : ..., size : ... } + */ + void addNewNamespaceToCatalog(const char *ns, const BSONObj *options = 0) { + LOG(1) << "New namespace: " << ns << endl; + if ( strstr(ns, "system.namespaces") ) { + // system.namespaces holds all the others, so it is not explicitly listed in the catalog. + // TODO: fix above should not be strstr! + return; + } + + BSONObjBuilder b; + b.append("name", ns); + if ( options ) + b.append("options", *options); + BSONObj j = b.done(); + char database[256]; + nsToDatabase(ns, database); + string s = string(database) + ".system.namespaces"; + theDataFileMgr.insert(s.c_str(), j.objdata(), j.objsize(), true); + } + + void renameNamespace( const char *from, const char *to, bool stayTemp) { + NamespaceIndex *ni = nsindex( from ); + assert( ni ); + assert( ni->details( from ) ); + assert( ! ni->details( to ) ); + + // Our namespace and index details will move to a different + // memory location. The only references to namespace and + // index details across commands are in cursors and nsd + // transient (including query cache) so clear these. + ClientCursor::invalidate( from ); + NamespaceDetailsTransient::eraseForPrefix( from ); + + NamespaceDetails *details = ni->details( from ); + ni->add_ns( to, *details ); + NamespaceDetails *todetails = ni->details( to ); + try { + todetails->copyingFrom(to, details); // fixes extraOffset + } + catch( DBException& ) { + // could end up here if .ns is full - if so try to clean up / roll back a little + ni->kill_ns(to); + throw; + } + ni->kill_ns( from ); + details = todetails; + + BSONObj oldSpec; + char database[MaxDatabaseNameLen]; + nsToDatabase(from, database); + string s = database; + s += ".system.namespaces"; + assert( Helpers::findOne( s.c_str(), BSON( "name" << from ), oldSpec ) ); + + BSONObjBuilder newSpecB; + BSONObjIterator i( oldSpec.getObjectField( "options" ) ); + while( i.more() ) { + BSONElement e = i.next(); + if ( strcmp( e.fieldName(), "create" ) != 0 ) { + if (stayTemp || (strcmp(e.fieldName(), "temp") != 0)) + newSpecB.append( e ); + } + else { + newSpecB << "create" << to; + } + } + BSONObj newSpec = newSpecB.done(); + addNewNamespaceToCatalog( to, newSpec.isEmpty() ? 0 : &newSpec ); + + deleteObjects( s.c_str(), BSON( "name" << from ), false, false, true ); + // oldSpec variable no longer valid memory + + BSONObj oldIndexSpec; + s = database; + s += ".system.indexes"; + while( Helpers::findOne( s.c_str(), BSON( "ns" << from ), oldIndexSpec ) ) { + BSONObjBuilder newIndexSpecB; + BSONObjIterator i( oldIndexSpec ); + while( i.more() ) { + BSONElement e = i.next(); + if ( strcmp( e.fieldName(), "ns" ) != 0 ) + newIndexSpecB.append( e ); + else + newIndexSpecB << "ns" << to; + } + BSONObj newIndexSpec = newIndexSpecB.done(); + DiskLoc newIndexSpecLoc = theDataFileMgr.insert( s.c_str(), newIndexSpec.objdata(), newIndexSpec.objsize(), true, false ); + int indexI = details->findIndexByName( oldIndexSpec.getStringField( "name" ) ); + IndexDetails &indexDetails = details->idx(indexI); + string oldIndexNs = indexDetails.indexNamespace(); + indexDetails.info = newIndexSpecLoc; + string newIndexNs = indexDetails.indexNamespace(); + + renameIndexNamespace( oldIndexNs.c_str(), newIndexNs.c_str() ); + deleteObjects( s.c_str(), oldIndexSpec.getOwned(), true, false, true ); + } + } + + bool legalClientSystemNS( const string& ns , bool write ) { + if( ns == "local.system.replset" ) return true; + + if ( ns.find( ".system.users" ) != string::npos ) + return true; + + if ( ns.find( ".system.js" ) != string::npos ) { + if ( write ) + Scope::storedFuncMod(); + return true; + } + + return false; + } + +} // namespace mongo diff --git a/src/mongo/db/namespace_details.h b/src/mongo/db/namespace_details.h new file mode 100644 index 00000000000..83aa8bc5e36 --- /dev/null +++ b/src/mongo/db/namespace_details.h @@ -0,0 +1,610 @@ +// namespace_details.h + +/** +* Copyright (C) 2008 10gen Inc. +* +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU Affero General Public License, version 3, +* as published by the Free Software Foundation. +* +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU Affero General Public License for more details. +* +* You should have received a copy of the GNU Affero General Public License +* along with this program. If not, see <http://www.gnu.org/licenses/>. +*/ + +#pragma once + +#include "mongo/pch.h" + +#include "mongo/db/d_concurrency.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/index.h" +#include "mongo/db/jsobj.h" +#include "mongo/db/mongommf.h" +#include "mongo/db/namespace.h" +#include "mongo/db/queryoptimizer.h" +#include "mongo/db/queryoptimizercursor.h" +#include "mongo/db/querypattern.h" +#include "mongo/util/hashtab.h" + +namespace mongo { + class Database; + + /** @return true if a client can modify this namespace even though it is under ".system." + For example <dbname>.system.users is ok for regular clients to update. + @param write used when .system.js + */ + bool legalClientSystemNS( const string& ns , bool write ); + + /* deleted lists -- linked lists of deleted records -- are placed in 'buckets' of various sizes + so you can look for a deleterecord about the right size. + */ + const int Buckets = 19; + const int MaxBucket = 18; + + extern int bucketSizes[]; + +#pragma pack(1) + /* NamespaceDetails : this is the "header" for a collection that has all its details. + It's in the .ns file and this is a memory mapped region (thus the pack pragma above). + */ + class NamespaceDetails { + public: + enum { NIndexesMax = 64, NIndexesExtra = 30, NIndexesBase = 10 }; + + /*-------- data fields, as present on disk : */ + DiskLoc firstExtent; + DiskLoc lastExtent; + /* NOTE: capped collections v1 override the meaning of deletedList. + deletedList[0] points to a list of free records (DeletedRecord's) for all extents in + the capped namespace. + deletedList[1] points to the last record in the prev extent. When the "current extent" + changes, this value is updated. !deletedList[1].isValid() when this value is not + yet computed. + */ + DiskLoc deletedList[Buckets]; + // ofs 168 (8 byte aligned) + struct Stats { + // datasize and nrecords MUST Be adjacent code assumes! + long long datasize; // this includes padding, but not record headers + long long nrecords; + } stats; + int lastExtentSize; + int nIndexes; + private: + // ofs 192 + IndexDetails _indexes[NIndexesBase]; + public: + // ofs 352 (16 byte aligned) + int capped; + int max; // max # of objects for a capped table. TODO: should this be 64 bit? + double paddingFactor; // 1.0 = no padding. + // ofs 386 (16) + int flags; + DiskLoc capExtent; + DiskLoc capFirstNewRecord; + unsigned short dataFileVersion; // NamespaceDetails version. So we can do backward compatibility in the future. See filever.h + unsigned short indexFileVersion; + unsigned long long multiKeyIndexBits; + private: + // ofs 400 (16) + unsigned long long reservedA; + long long extraOffset; // where the $extra info is located (bytes relative to this) + public: + int indexBuildInProgress; // 1 if in prog + unsigned reservedB; + // ofs 424 (8) + struct Capped2 { + unsigned long long cc2_ptr; // see capped.cpp + unsigned fileNumber; + } capped2; + char reserved[60]; + /*-------- end data 496 bytes */ + + explicit NamespaceDetails( const DiskLoc &loc, bool _capped ); + + class Extra { + long long _next; + public: + IndexDetails details[NIndexesExtra]; + private: + unsigned reserved2; + unsigned reserved3; + Extra(const Extra&) { assert(false); } + Extra& operator=(const Extra& r) { assert(false); return *this; } + public: + Extra() { } + long ofsFrom(NamespaceDetails *d) { + return ((char *) this) - ((char *) d); + } + void init() { memset(this, 0, sizeof(Extra)); } + Extra* next(NamespaceDetails *d) { + if( _next == 0 ) return 0; + return (Extra*) (((char *) d) + _next); + } + void setNext(long ofs) { *getDur().writing(&_next) = ofs; } + void copy(NamespaceDetails *d, const Extra& e) { + memcpy(this, &e, sizeof(Extra)); + _next = 0; + } + }; + Extra* extra() { + if( extraOffset == 0 ) return 0; + return (Extra *) (((char *) this) + extraOffset); + } + /* add extra space for indexes when more than 10 */ + Extra* allocExtra(const char *ns, int nindexessofar); + void copyingFrom(const char *thisns, NamespaceDetails *src); // must be called when renaming a NS to fix up extra + + /* called when loaded from disk */ + void onLoad(const Namespace& k); + + /* dump info on this namespace. for debugging. */ + void dump(const Namespace& k); + + /* dump info on all extents for this namespace. for debugging. */ + void dumpExtents(); + + private: + Extent *theCapExtent() const { return capExtent.ext(); } + void advanceCapExtent( const char *ns ); + DiskLoc __capAlloc(int len); + DiskLoc cappedAlloc(const char *ns, int len); + DiskLoc &cappedFirstDeletedInCurExtent(); + bool nextIsInCapExtent( const DiskLoc &dl ) const; + + public: + DiskLoc& cappedListOfAllDeletedRecords() { return deletedList[0]; } + DiskLoc& cappedLastDelRecLastExtent() { return deletedList[1]; } + void cappedDumpDelInfo(); + bool capLooped() const { return capped && capFirstNewRecord.isValid(); } + bool inCapExtent( const DiskLoc &dl ) const; + void cappedCheckMigrate(); + /** + * Truncate documents newer than the document at 'end' from the capped + * collection. The collection cannot be completely emptied using this + * function. An assertion will be thrown if that is attempted. + * @param inclusive - Truncate 'end' as well iff true + */ + void cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive); + /** Remove all documents from the capped collection */ + void emptyCappedCollection(const char *ns); + + /* when a background index build is in progress, we don't count the index in nIndexes until + complete, yet need to still use it in _indexRecord() - thus we use this function for that. + */ + int nIndexesBeingBuilt() const { return nIndexes + indexBuildInProgress; } + + /* NOTE: be careful with flags. are we manipulating them in read locks? if so, + this isn't thread safe. TODO + */ + enum NamespaceFlags { + Flag_HaveIdIndex = 1 << 0 // set when we have _id index (ONLY if ensureIdIndex was called -- 0 if that has never been called) + }; + + IndexDetails& idx(int idxNo, bool missingExpected = false ); + + /** get the IndexDetails for the index currently being built in the background. (there is at most one) */ + IndexDetails& inProgIdx() { + DEV assert(indexBuildInProgress); + return idx(nIndexes); + } + + class IndexIterator { + public: + int pos() { return i; } // note this is the next one to come + bool more() { return i < n; } + IndexDetails& next() { return d->idx(i++); } + private: + friend class NamespaceDetails; + int i, n; + NamespaceDetails *d; + IndexIterator(NamespaceDetails *_d); + }; + + IndexIterator ii() { return IndexIterator(this); } + + /* hackish - find our index # in the indexes array */ + int idxNo(IndexDetails& idx); + + /* multikey indexes are indexes where there are more than one key in the index + for a single document. see multikey in wiki. + for these, we have to do some dedup work on queries. + */ + bool isMultikey(int i) const { return (multiKeyIndexBits & (((unsigned long long) 1) << i)) != 0; } + void setIndexIsMultikey(int i) { + dassert( i < NIndexesMax ); + unsigned long long x = ((unsigned long long) 1) << i; + if( multiKeyIndexBits & x ) return; + *getDur().writing(&multiKeyIndexBits) |= x; + } + void clearIndexIsMultikey(int i) { + dassert( i < NIndexesMax ); + unsigned long long x = ((unsigned long long) 1) << i; + if( (multiKeyIndexBits & x) == 0 ) return; + *getDur().writing(&multiKeyIndexBits) &= ~x; + } + + /* add a new index. does not add to system.indexes etc. - just to NamespaceDetails. + caller must populate returned object. + */ + IndexDetails& addIndex(const char *thisns, bool resetTransient=true); + + void aboutToDeleteAnIndex() { + *getDur().writing(&flags) = flags & ~Flag_HaveIdIndex; + } + + /* returns index of the first index in which the field is present. -1 if not present. */ + int fieldIsIndexed(const char *fieldName); + + /* called to indicate that an update fit in place. + fits also called on an insert -- idea there is that if you had some mix and then went to + pure inserts it would adapt and PF would trend to 1.0. note update calls insert on a move + so there is a double count there that must be adjusted for below. + + todo: greater sophistication could be helpful and added later. for example the absolute + size of documents might be considered -- in some cases smaller ones are more likely + to grow than larger ones in the same collection? (not always) + */ + void paddingFits() { + MONGO_SOMETIMES(sometimes, 4) { // do this on a sampled basis to journal less + double x = paddingFactor - 0.001; + if ( x >= 1.0 ) { + *getDur().writing(&paddingFactor) = x; + } + } + } + void paddingTooSmall() { + MONGO_SOMETIMES(sometimes, 4) { // do this on a sampled basis to journal less + /* the more indexes we have, the higher the cost of a move. so we take that into + account herein. note on a move that insert() calls paddingFits(), thus + here for example with no inserts and nIndexes = 1 we have + .001*4-.001 or a 3:1 ratio to non moves -> 75% nonmoves. insert heavy + can pushes this down considerably. further tweaking will be a good idea but + this should be an adequate starting point. + */ + double N = min(nIndexes,7) + 3; + double x = paddingFactor + (0.001 * N); + if ( x <= 2.0 ) { + *getDur().writing(&paddingFactor) = x; + } + } + } + + // @return offset in indexes[] + int findIndexByName(const char *name); + + // @return offset in indexes[] + int findIndexByKeyPattern(const BSONObj& keyPattern); + + void findIndexByType( const string& name , vector<int>& matches ) { + IndexIterator i = ii(); + while ( i.more() ) { + if ( i.next().getSpec().getTypeName() == name ) + matches.push_back( i.pos() - 1 ); + } + } + + /* @return -1 = not found + generally id is first index, so not that expensive an operation (assuming present). + */ + int findIdIndex() { + IndexIterator i = ii(); + while( i.more() ) { + if( i.next().isIdIndex() ) + return i.pos()-1; + } + return -1; + } + + bool haveIdIndex() { + return (flags & NamespaceDetails::Flag_HaveIdIndex) || findIdIndex() >= 0; + } + + /* return which "deleted bucket" for this size object */ + static int bucket(int n) { + for ( int i = 0; i < Buckets; i++ ) + if ( bucketSizes[i] > n ) + return i; + return Buckets-1; + } + + /* predetermine location of the next alloc without actually doing it. + if cannot predetermine returns null (so still call alloc() then) + */ + DiskLoc allocWillBeAt(const char *ns, int lenToAlloc); + + /* allocate a new record. lenToAlloc includes headers. */ + DiskLoc alloc(const char *ns, int lenToAlloc, DiskLoc& extentLoc); + + /* add a given record to the deleted chains for this NS */ + void addDeletedRec(DeletedRecord *d, DiskLoc dloc); + void dumpDeleted(set<DiskLoc> *extents = 0); + // Start from firstExtent by default. + DiskLoc firstRecord( const DiskLoc &startExtent = DiskLoc() ) const; + // Start from lastExtent by default. + DiskLoc lastRecord( const DiskLoc &startExtent = DiskLoc() ) const; + long long storageSize( int * numExtents = 0 , BSONArrayBuilder * extentInfo = 0 ) const; + + int averageObjectSize() { + if ( stats.nrecords == 0 ) + return 5; + return (int) (stats.datasize / stats.nrecords); + } + + NamespaceDetails *writingWithoutExtra() { + return ( NamespaceDetails* ) getDur().writingPtr( this, sizeof( NamespaceDetails ) ); + } + /** Make all linked Extra objects writeable as well */ + NamespaceDetails *writingWithExtra(); + + private: + DiskLoc _alloc(const char *ns, int len); + void maybeComplain( const char *ns, int len ) const; + DiskLoc __stdAlloc(int len, bool willBeAt); + void compact(); // combine adjacent deleted records + friend class NamespaceIndex; + struct ExtraOld { + // note we could use this field for more chaining later, so don't waste it: + unsigned long long reserved1; + IndexDetails details[NIndexesExtra]; + unsigned reserved2; + unsigned reserved3; + }; + /** Update cappedLastDelRecLastExtent() after capExtent changed in cappedTruncateAfter() */ + void cappedTruncateLastDelUpdate(); + BOOST_STATIC_ASSERT( NIndexesMax <= NIndexesBase + NIndexesExtra*2 ); + BOOST_STATIC_ASSERT( NIndexesMax <= 64 ); // multiKey bits + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::ExtraOld) == 496 ); + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails::Extra) == 496 ); + }; // NamespaceDetails +#pragma pack() + + /* NamespaceDetailsTransient + + these are things we know / compute about a namespace that are transient -- things + we don't actually store in the .ns file. so mainly caching of frequently used + information. + + CAUTION: Are you maintaining this properly on a collection drop()? A dropdatabase()? Be careful. + The current field "allIndexKeys" may have too many keys in it on such an occurrence; + as currently used that does not cause anything terrible to happen. + + todo: cleanup code, need abstractions and separation + */ + // todo: multiple db's with the same name (repairDatbase) is not handled herein. that may be + // the way to go, if not used by repair, but need some sort of enforcement / asserts. + class NamespaceDetailsTransient : boost::noncopyable { + BOOST_STATIC_ASSERT( sizeof(NamespaceDetails) == 496 ); + + //Database *database; + const string _ns; + void reset(); + static std::map< string, shared_ptr< NamespaceDetailsTransient > > _nsdMap; + + NamespaceDetailsTransient(Database*,const char *ns); + public: + ~NamespaceDetailsTransient(); + void addedIndex() { reset(); } + void deletedIndex() { reset(); } + /* Drop cached information on all namespaces beginning with the specified prefix. + Can be useful as index namespaces share the same start as the regular collection. + SLOW - sequential scan of all NamespaceDetailsTransient objects */ + static void clearForPrefix(const char *prefix); + static void eraseForPrefix(const char *prefix); + + /** + * @return a cursor interface to the query optimizer. The implementation may utilize a + * single query plan or interleave results from multiple query plans before settling on a + * single query plan. Note that the schema of currKey() documents, indexKeyPattern(), the + * matcher(), and the isMultiKey() nature of the cursor may change over the course of + * iteration. + * + * @param query - Query used to select indexes and populate matchers; not copied if unowned + * (see bsonobj.h). + * + * @param order - Required ordering spec for documents produced by this cursor, empty object + * default indicates no order requirement. If no index exists that satisfies the required + * sort order, an empty shared_ptr is returned unless parsedQuery is also provided. This is + * not copied if unowned. + * + * @param planPolicy - A policy for selecting query plans - see queryoptimizercursor.h + * + * @param simpleEqualityMatch - Set to true for certain simple queries - see + * queryoptimizer.cpp. + * + * @param parsedQuery - Additional query parameters, as from a client query request. If + * specified, the resulting cursor may return results from out of order plans. See + * queryoptimizercursor.h for information on handling these results. + * + * @param singlePlanSummary - Query plan summary information that may be provided when a + * cursor running a single plan is returned. + * + * The returned cursor may @throw inside of advance() or recoverFromYield() in certain error + * cases, for example if a capped overrun occurred during a yield. This indicates that the + * cursor was unable to perform a complete scan. + * + * This is a work in progress. Partial list of features not yet implemented through this + * interface: + * + * - covered indexes + * - in memory sorting + */ + static shared_ptr<Cursor> getCursor( const char *ns, const BSONObj &query, + const BSONObj &order = BSONObj(), + const QueryPlanSelectionPolicy &planPolicy = + QueryPlanSelectionPolicy::any(), + bool *simpleEqualityMatch = 0, + const ParsedQuery *parsedQuery = 0, + QueryPlan::Summary *singlePlanSummary = 0 ); + + /** + * @return a single cursor that may work well for the given query. A $or style query will + * produce a single cursor, not a MultiCursor. + * It is possible no cursor is returned if the sort is not supported by an index. Clients are responsible + * for checking this if they are not sure an index for a sort exists, and defaulting to a non-sort if + * no suitable indices exist. + */ + static shared_ptr<Cursor> bestGuessCursor( const char *ns, const BSONObj &query, const BSONObj &sort ); + + /* indexKeys() cache ---------------------------------------------------- */ + /* assumed to be in write lock for this */ + private: + bool _keysComputed; + set<string> _indexKeys; + void computeIndexKeys(); + public: + /* get set of index keys for this namespace. handy to quickly check if a given + field is indexed (Note it might be a secondary component of a compound index.) + */ + set<string>& indexKeys() { + DEV Lock::assertWriteLocked(_ns); + if ( !_keysComputed ) + computeIndexKeys(); + return _indexKeys; + } + + /* IndexSpec caching */ + private: + map<const IndexDetails*,IndexSpec> _indexSpecs; + static SimpleMutex _isMutex; + public: + const IndexSpec& getIndexSpec( const IndexDetails * details ) { + IndexSpec& spec = _indexSpecs[details]; + if ( ! spec._finishedInit ) { + SimpleMutex::scoped_lock lk(_isMutex); + if ( ! spec._finishedInit ) { + spec.reset( details ); + assert( spec._finishedInit ); + } + } + return spec; + } + + /* query cache (for query optimizer) ------------------------------------- */ + private: + int _qcWriteCount; + map< QueryPattern, pair< BSONObj, long long > > _qcCache; + static NamespaceDetailsTransient& make_inlock(const char *ns); + public: + static SimpleMutex _qcMutex; + + /* you must be in the qcMutex when calling this. + A NamespaceDetailsTransient object will not go out of scope on you if you are + d.dbMutex.atLeastReadLocked(), so you do't have to stay locked. + Creates a NamespaceDetailsTransient before returning if one DNE. + todo: avoid creating too many on erroneous ns queries. + */ + static NamespaceDetailsTransient& get_inlock(const char *ns); + + static NamespaceDetailsTransient& get(const char *ns) { + // todo : _qcMutex will create bottlenecks in our parallelism + SimpleMutex::scoped_lock lk(_qcMutex); + return get_inlock(ns); + } + + void clearQueryCache() { // public for unit tests + _qcCache.clear(); + _qcWriteCount = 0; + } + /* you must notify the cache if you are doing writes, as query plan optimality will change */ + void notifyOfWriteOp() { + if ( _qcCache.empty() ) + return; + if ( ++_qcWriteCount >= 100 ) + clearQueryCache(); + } + BSONObj indexForPattern( const QueryPattern &pattern ) { + return _qcCache[ pattern ].first; + } + long long nScannedForPattern( const QueryPattern &pattern ) { + return _qcCache[ pattern ].second; + } + void registerIndexForPattern( const QueryPattern &pattern, const BSONObj &indexKey, long long nScanned ) { + _qcCache[ pattern ] = make_pair( indexKey, nScanned ); + } + + }; /* NamespaceDetailsTransient */ + + inline NamespaceDetailsTransient& NamespaceDetailsTransient::get_inlock(const char *ns) { + std::map< string, shared_ptr< NamespaceDetailsTransient > >::iterator i = _nsdMap.find(ns); + if( i != _nsdMap.end() && + i->second.get() ) { // could be null ptr from clearForPrefix + return *i->second; + } + return make_inlock(ns); + } + + /* NamespaceIndex is the ".ns" file you see in the data directory. It is the "system catalog" + if you will: at least the core parts. (Additional info in system.* collections.) + */ + class NamespaceIndex { + public: + NamespaceIndex(const string &dir, const string &database) : + ht( 0 ), dir_( dir ), database_( database ) {} + + /* returns true if new db will be created if we init lazily */ + bool exists() const; + + void init() { + if( !ht ) + _init(); + } + + void add_ns(const char *ns, DiskLoc& loc, bool capped); + void add_ns( const char *ns, const NamespaceDetails &details ); + + NamespaceDetails* details(const char *ns) { + if ( !ht ) + return 0; + Namespace n(ns); + NamespaceDetails *d = ht->get(n); + if ( d && d->capped ) + d->cappedCheckMigrate(); + return d; + } + + void kill_ns(const char *ns); + + bool find(const char *ns, DiskLoc& loc) { + NamespaceDetails *l = details(ns); + if ( l ) { + loc = l->firstExtent; + return true; + } + return false; + } + + bool allocated() const { return ht != 0; } + + void getNamespaces( list<string>& tofill , bool onlyCollections = true ) const; + + NamespaceDetails::Extra* newExtra(const char *ns, int n, NamespaceDetails *d); + + boost::filesystem::path path() const; + + unsigned long long fileLength() const { return f.length(); } + + private: + void _init(); + void maybeMkdir() const; + + MongoMMF f; + HashTable<Namespace,NamespaceDetails> *ht; + string dir_; + string database_; + }; + + extern string dbpath; // --dbpath parm + extern bool directoryperdb; + + // Rename a namespace within current 'client' db. + // (Arguments should include db name) + void renameNamespace( const char *from, const char *to, bool stayTemp); + + +} // namespace mongo diff --git a/src/mongo/db/pdfile.h b/src/mongo/db/pdfile.h index ccab3eb8945..8520fb11af2 100644 --- a/src/mongo/db/pdfile.h +++ b/src/mongo/db/pdfile.h @@ -25,14 +25,16 @@ #pragma once -#include "../pch.h" -#include "../util/mmap.h" -#include "diskloc.h" -#include "jsobjmanipulator.h" -#include "namespace-inl.h" -#include "client.h" -#include "mongommf.h" -#include "memconcept.h" +#include "mongo/db/client.h" +#include "mongo/db/diskloc.h" +#include "mongo/db/jsobjmanipulator.h" +#include "mongo/db/memconcept.h" +#include "mongo/db/mongommf.h" +#include "mongo/db/namespace-inl.h" +#include "mongo/db/namespace_details-inl.h" +#include "mongo/db/namespacestring.h" +#include "mongo/pch.h" +#include "mongo/util/mmap.h" namespace mongo { diff --git a/src/mongo/db/security.h b/src/mongo/db/security.h index f193f305def..625a3e116bc 100755 --- a/src/mongo/db/security.h +++ b/src/mongo/db/security.h @@ -18,31 +18,16 @@ #pragma once -#include "nonce.h" -#include "concurrency.h" -#include "security_common.h" -#include "../util/concurrency/spin_lock.h" +#include "mongo/db/authlevel.h" +#include "mongo/db/concurrency.h" +#include "mongo/db/nonce.h" +#include "mongo/db/security_common.h" +#include "mongo/util/concurrency/spin_lock.h" // this is used by both mongos and mongod namespace mongo { - /* - * for a particular db - * levels - * 0 : none - * 1 : read - * 2 : write - */ - struct Auth { - - enum Level { NONE = 0 , READ = 1 , WRITE = 2 }; - - Auth() { level = NONE; } - Level level; - string user; - }; - class AuthenticationInfo : boost::noncopyable { public: bool isLocalHost; diff --git a/src/mongo/s/shard.h b/src/mongo/s/shard.h index 6b52c58a932..e30cd79841e 100644 --- a/src/mongo/s/shard.h +++ b/src/mongo/s/shard.h @@ -18,8 +18,10 @@ #pragma once -#include "../pch.h" -#include "../client/connpool.h" +#include "mongo/pch.h" + +#include "mongo/client/connpool.h" +#include "mongo/client/dbclient_rs.h" namespace mongo { diff --git a/src/mongo/s/strategy_shard.cpp b/src/mongo/s/strategy_shard.cpp index 97d51890615..4d971859d99 100644 --- a/src/mongo/s/strategy_shard.cpp +++ b/src/mongo/s/strategy_shard.cpp @@ -16,16 +16,17 @@ // strategy_sharded.cpp -#include "pch.h" -#include "request.h" -#include "chunk.h" -#include "cursors.h" -#include "stats.h" -#include "client.h" -#include "../bson/util/builder.h" - -#include "../client/connpool.h" -#include "../db/commands.h" +#include "mongo/pch.h" + +#include "mongo/bson/util/builder.h" +#include "mongo/client/connpool.h" +#include "mongo/db/commands.h" +#include "mongo/db/index.h" +#include "mongo/s/client.h" +#include "mongo/s/cursors.h" +#include "mongo/s/request.h" +#include "mongo/s/stats.h" +#include "mongo/s/chunk.h" // error codes 8010-8040 diff --git a/src/mongo/tools/restore.cpp b/src/mongo/tools/restore.cpp index 82cc99f8395..eaa4ea77dcb 100644 --- a/src/mongo/tools/restore.cpp +++ b/src/mongo/tools/restore.cpp @@ -16,19 +16,20 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include "../pch.h" -#include "../client/dbclient.h" -#include "../util/mmap.h" -#include "../util/version.h" -#include "tool.h" +#include "mongo/pch.h" -#include <boost/program_options.hpp> -#include <boost/filesystem/operations.hpp> #include <boost/filesystem/convenience.hpp> - +#include <boost/filesystem/operations.hpp> +#include <boost/program_options.hpp> #include <fcntl.h> -#include <set> #include <fstream> +#include <set> + +#include "mongo/client/dbclient.h" +#include "mongo/db/namespacestring.h" +#include "mongo/tools/tool.h" +#include "mongo/util/mmap.h" +#include "mongo/util/version.h" using namespace mongo; diff --git a/src/mongo/tools/tool.cpp b/src/mongo/tools/tool.cpp index 767debbf947..eafae9cdfd0 100644 --- a/src/mongo/tools/tool.cpp +++ b/src/mongo/tools/tool.cpp @@ -16,16 +16,17 @@ // Tool.cpp -#include "tool.h" +#include "mongo/tools/tool.h" #include <fstream> #include <iostream> #include "pcrecpp.h" -#include "util/file_allocator.h" -#include "util/password.h" -#include "util/version.h" +#include "mongo/db/namespace_details.h" +#include "mongo/util/file_allocator.h" +#include "mongo/util/password.h" +#include "mongo/util/version.h" #include <boost/filesystem/operations.hpp> diff --git a/src/mongo/util/mmap.cpp b/src/mongo/util/mmap.cpp index 85af905584c..a3911f83d75 100755 --- a/src/mongo/util/mmap.cpp +++ b/src/mongo/util/mmap.cpp @@ -15,16 +15,19 @@ * limitations under the License. */ -#include "pch.h" -#include "mmap.h" -#include "processinfo.h" -#include "concurrency/rwlock.h" -#include "../db/namespace.h" -#include "../db/cmdline.h" -#include "progress_meter.h" +#include "mongo/pch.h" + +#include "mongo/util/mmap.h" #include <boost/filesystem/operations.hpp> +#include "mongo/db/cmdline.h" +#include "mongo/db/namespace.h" +#include "mongo/util/concurrency/rwlock.h" +#include "mongo/util/mongoutils/str.h" +#include "mongo/util/processinfo.h" +#include "mongo/util/progress_meter.h" + namespace mongo { set<MongoFile*> MongoFile::mmfiles; @@ -58,7 +61,7 @@ namespace mongo { l = boost::filesystem::file_size( filename ); } catch(boost::filesystem::filesystem_error& e) { - uasserted(15922, str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() ); + uasserted(15922, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() ); } return map( filename , l ); } @@ -68,7 +71,7 @@ namespace mongo { l = boost::filesystem::file_size( filename ); } catch(boost::filesystem::filesystem_error& e) { - uasserted(15923, str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() ); + uasserted(15923, mongoutils::str::stream() << "couldn't get file length when opening mapping " << filename << ' ' << e.what() ); } return map( filename , l, options ); } diff --git a/src/mongo/util/version.h b/src/mongo/util/version.h index 64f8b140fd5..fcccc1494ab 100644 --- a/src/mongo/util/version.h +++ b/src/mongo/util/version.h @@ -3,21 +3,21 @@ #include <string> +#include "mongo/bson/stringdata.h" + namespace mongo { struct BSONArray; - using std::string; - // mongo version extern const char versionString[]; extern const BSONArray versionArray; - string mongodVersion(); + std::string mongodVersion(); int versionCmp(StringData rhs, StringData lhs); // like strcmp const char * gitVersion(); void printGitVersion(); - string sysInfo(); + std::string sysInfo(); void printSysInfo(); void show_warnings(); |