summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorDan Pasette <dan@10gen.com>2015-02-04 06:48:51 -0500
committerDan Pasette <dan@mongodb.com>2015-02-04 06:55:52 -0500
commit7d9ec251cf0e70bc0f9bb246aacfb6e62226ad37 (patch)
tree5b436359112bc5610dcf9fd1e1f7331854b388d6 /src
parentfc14926f9c8256edce8bbd15d439ca34667c6ebb (diff)
downloadmongo-7d9ec251cf0e70bc0f9bb246aacfb6e62226ad37.tar.gz
Import wiredtiger-wiredtiger-mongodb-2.8-rc7-105-g92325a3.tar.gz from wiredtiger branch mongodb-2.8
Diffstat (limited to 'src')
-rw-r--r--src/third_party/wiredtiger/.hgtags1
-rw-r--r--src/third_party/wiredtiger/SConstruct195
-rw-r--r--src/third_party/wiredtiger/build_win/wiredtiger.def21
-rw-r--r--src/third_party/wiredtiger/dist/api_data.py6
-rw-r--r--src/third_party/wiredtiger/dist/s_all2
-rw-r--r--src/third_party/wiredtiger/dist/s_define.list3
-rw-r--r--src/third_party/wiredtiger/dist/s_export (renamed from src/third_party/wiredtiger/dist/s_symbols)2
-rw-r--r--src/third_party/wiredtiger/dist/s_export.list (renamed from src/third_party/wiredtiger/dist/s_symbols.list)0
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_prototypes2
-rw-r--r--src/third_party/wiredtiger/dist/s_string.ok1
-rwxr-xr-x[-rw-r--r--]src/third_party/wiredtiger/dist/s_typedef8
-rwxr-xr-xsrc/third_party/wiredtiger/dist/s_win14
-rw-r--r--src/third_party/wiredtiger/dist/serial.py189
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_cursor.c16
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_debug.c6
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c84
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_huffman.c27
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_page.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c12
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_split.c172
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_stat.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_vrfy.c7
-rw-r--r--src/third_party/wiredtiger/src/btree/col_srch.c1
-rw-r--r--src/third_party/wiredtiger/src/btree/row_srch.c3
-rw-r--r--src/third_party/wiredtiger/src/config/config_def.c11
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_cache_pool.c16
-rw-r--r--src/third_party/wiredtiger/src/conn/conn_sweep.c8
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_file.c42
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_lru.c7
-rw-r--r--src/third_party/wiredtiger/src/evict/evict_page.c25
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h21
-rw-r--r--src/third_party/wiredtiger/src/include/btree.h23
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i203
-rw-r--r--src/third_party/wiredtiger/src/include/cache.h5
-rw-r--r--src/third_party/wiredtiger/src/include/cache.i2
-rw-r--r--src/third_party/wiredtiger/src/include/connection.h2
-rw-r--r--src/third_party/wiredtiger/src/include/cursor.i5
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h20
-rw-r--r--src/third_party/wiredtiger/src/include/gcc.h8
-rw-r--r--src/third_party/wiredtiger/src/include/lint.h9
-rw-r--r--src/third_party/wiredtiger/src/include/log.h4
-rw-r--r--src/third_party/wiredtiger/src/include/lsm.h4
-rw-r--r--src/third_party/wiredtiger/src/include/msvc.h15
-rw-r--r--src/third_party/wiredtiger/src/include/mutex.h8
-rw-r--r--src/third_party/wiredtiger/src/include/serial.i6
-rw-r--r--src/third_party/wiredtiger/src/include/session.h4
-rw-r--r--src/third_party/wiredtiger/src/include/txn.h4
-rw-r--r--src/third_party/wiredtiger/src/include/wt_internal.h4
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_track.c41
-rw-r--r--src/third_party/wiredtiger/src/reconcile/rec_write.c27
50 files changed, 703 insertions, 605 deletions
diff --git a/src/third_party/wiredtiger/.hgtags b/src/third_party/wiredtiger/.hgtags
index 054227388cf..ad2212f950a 100644
--- a/src/third_party/wiredtiger/.hgtags
+++ b/src/third_party/wiredtiger/.hgtags
@@ -26,3 +26,4 @@ b5c9f28d72fe1f835d24fe427e211a539f8709fe 1.5.2
5c9acd2584f2657dec2a44fd8b54211bf9c21193 mongodb-2.8-rc5
f40795b146bd35a623ef57de5b875a817925b7c9 mongodb-2.8-rc5
c3cac38f1632e0e20d0279639bb59ff11700e692 mongodb-2.8-rc6
+9b61b870d26feafd8f8058cfffc6ac817890ce2b mongodb-2.8-rc7
diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct
index b075afe696a..1c109c973c0 100644
--- a/src/third_party/wiredtiger/SConstruct
+++ b/src/third_party/wiredtiger/SConstruct
@@ -1,6 +1,10 @@
# -*- mode: python; -*-
import re
import os
+import shutil
+import subprocess
+import sys
+import tempfile
import textwrap
import distutils.sysconfig
@@ -10,33 +14,51 @@ if not os.sys.platform == "win32":
print ("SConstruct is only supported for Windows, use build_posix for other platforms")
Exit(1)
-AddOption("--with-berkeley-db", dest="bdb", type="string", nargs=1, action="store",
- help="Berkeley DB install path, ie, /usr/local")
+# Command line options
+#
+AddOption("--dynamic-crt", dest="dynamic-crt", action="store_true", default=False,
+ help="Link with the MSVCRT DLL version")
-AddOption("--enable-zlib", dest="zlib", type="string", nargs=1, action="store",
- help="Use zlib compression")
+AddOption("--enable-attach", dest="attach", action="store_true", default=False,
+ help="Configure for debugger attach on failure.")
+
+AddOption("--enable-diagnostic", dest="diagnostic", action="store_true", default=False,
+ help="Configure WiredTiger to perform various run-time diagnostic tests. DO NOT configure this option in production environments.")
+
+AddOption("--enable-python", dest="lang-python", type="string", nargs=1, action="store",
+ help="Build Python extension, specify location of swig.exe binary")
AddOption("--enable-snappy", dest="snappy", type="string", nargs=1, action="store",
help="Use snappy compression")
-AddOption("--enable-swig", dest="swig", type="string", nargs=1, action="store",
- help="Build python extension, specify location of swig.exe binary")
+AddOption("--enable-verbose", dest="verbose", action="store_true", default=False,
+ help="Configure WiredTiger to support the verbose configuration string to wiredtiger_open")
-AddOption("--dynamic-crt", dest="dynamic-crt", action="store_true", default=False,
- help="Link with the MSVCRT DLL version")
+AddOption("--enable-zlib", dest="zlib", type="string", nargs=1, action="store",
+ help="Use zlib compression")
+AddOption("--prefix", dest="prefix", type="string", nargs=1, action="store", default="package",
+ help="Install directory")
+
+AddOption("--with-berkeley-db", dest="bdb", type="string", nargs=1, action="store",
+ help="Berkeley DB install path, ie, /usr/local")
+
+# Get the swig binary from the command line option since SCONS cannot find it automatically
+#
+swig_binary = GetOption("lang-python")
+
+# Initialize environment
+#
env = Environment(
CPPPATH = ["#/src/include/",
"#/build_win",
"#/test/windows",
"#/.",
- distutils.sysconfig.get_python_inc()
],
- #CPPDEFINES = ["HAVE_DIAGNOSTIC", "HAVE_VERBOSE"],
CFLAGS = [
"/Z7", # Generate debugging symbols
"/wd4090", # Ignore warning about mismatched const qualifiers
- "/wd4996",
+ "/wd4996",
"/W3", # Warning level 3
"/we4013", # Error on undefined functions
"/TC", # Compile as C code
@@ -59,17 +81,12 @@ env = Environment(
"/DYNAMICBASE",
"/NXCOMPAT",
],
- LIBPATH=[ distutils.sysconfig.PREFIX + r"\libs"],
tools=["default", "swig", "textfile"],
- SWIGFLAGS=['-python',
- "-threads",
- "-O",
- "-nodefaultctor",
- "-nodefaultdtor"
- ],
- SWIG=GetOption("swig")
+ SWIG=swig_binary
)
+env['STATIC_AND_SHARED_OBJECTS_ARE_THE_SAME'] = 1
+
useZlib = GetOption("zlib")
useSnappy = GetOption("snappy")
useBdb = GetOption("bdb")
@@ -109,7 +126,24 @@ if useBdb:
env = conf.Finish()
+# Configure build environment variables
+#
+if GetOption("attach"):
+ env.Append(CPPDEFINES = ["HAVE_ATTACH"])
+
+if GetOption("diagnostic"):
+ env.Append(CPPDEFINES = ["HAVE_DIAGNOSTIC"])
+
+if GetOption("lang-python"):
+ env.Append(LIBPATH=[distutils.sysconfig.PREFIX + r"\libs"])
+ env.Append(CPPPATH=[distutils.sysconfig.get_python_inc()])
+if GetOption("verbose"):
+ env.Append(CPPDEFINES = ["HAVE_VERBOSE"])
+
+
+# Build WiredTiger.h file
+#
version_file = 'build_posix/aclocal/version-set.m4'
VERSION_MAJOR = None
@@ -147,7 +181,7 @@ replacements = {
'@wiredtiger_includes_decl@': wiredtiger_includes
}
-env.Substfile(
+wtheader = env.Substfile(
target='wiredtiger.h',
source=[
'src/include/wiredtiger.in',
@@ -170,11 +204,27 @@ if useZlib:
if useSnappy:
wtsources.append("ext/compressors/snappy/snappy_compress.c")
-wtlib = env.Library("wiredtiger", wtsources)
+wt_objs = [env.Object(a) for a in wtsources]
+
+# Static Library - libwiredtiger.lib
+#
+wtlib = env.Library(
+ target="libwiredtiger",
+ source=wt_objs, LIBS=wtlibs)
env.Depends(wtlib, [filelistfile, version_file])
-env.Program("wt", [
+# Dynamically Loaded Library - wiredtiger.dll
+#
+wtdll = env.SharedLibrary(
+ target="wiredtiger",
+ source=wt_objs + ['build_win/wiredtiger.def'], LIBS=wtlibs)
+
+env.Depends(wtdll, [filelistfile, version_file])
+
+Default(wtlib, wtdll)
+
+wtbin = env.Program("wt", [
"src/utilities/util_backup.c",
"src/utilities/util_cpyright.c",
"src/utilities/util_compact.c",
@@ -198,27 +248,48 @@ env.Program("wt", [
"src/utilities/util_write.c"],
LIBS=[wtlib] + wtlibs)
-if GetOption("swig"):
- swiglib = env.SharedLibrary('_wiredtiger',
+Default(wtbin)
+
+# Python SWIG wrapper for WiredTiger
+if GetOption("lang-python"):
+ # Check that this version of python is 64-bit
+ #
+ if sys.maxsize < 2**32:
+ print "The Python Interpreter must be 64-bit in order to build the python bindings"
+ Exit(1)
+
+ pythonEnv = env.Clone()
+ pythonEnv.Append(SWIGFLAGS=[
+ "-python",
+ "-threads",
+ "-O",
+ "-nodefaultctor",
+ "-nodefaultdtor",
+ ])
+
+ swiglib = pythonEnv.SharedLibrary('_wiredtiger',
[ 'lang\python\wiredtiger.i'],
SHLIBSUFFIX=".pyd",
- LIBS=[wtlib])
+ LIBS=[wtlib] + wtlibs)
- copySwig = env.Command(
+ copySwig = pythonEnv.Command(
'lang/python/wiredtiger/__init__.py',
'lang/python/wiredtiger.py',
Copy('$TARGET', '$SOURCE'))
- env.Depends(copySwig, swiglib)
+ pythonEnv.Depends(copySwig, swiglib)
- env.Install('lang/python/wiredtiger/', swiglib)
+ swiginstall = pythonEnv.Install('lang/python/wiredtiger/', swiglib)
+
+ Default(swiginstall, copySwig)
# Shim library of functions to emulate POSIX on Windows
shim = env.Library("window_shim",
["test/windows/windows_shim.c"])
-env.Program("t_bloom",
+t = env.Program("t_bloom",
"test/bloom/test_bloom.c",
- LIBS=[wtlib])
+ LIBS=[wtlib] + wtlibs)
+Default(t)
#env.Program("t_checkpoint",
#["test/checkpoint/checkpointer.c",
@@ -226,9 +297,10 @@ env.Program("t_bloom",
#"test/checkpoint/workers.c"],
#LIBS=[wtlib])
-env.Program("t_huge",
+t = env.Program("t_huge",
"test/huge/huge.c",
- LIBS=[wtlib])
+ LIBS=[wtlib] + wtlibs)
+Default(t)
#env.Program("t_fops",
#["test/fops/file.c",
@@ -241,7 +313,7 @@ if useBdb:
benv.Append(CPPDEFINES=['BERKELEY_DB_PATH=\\"' + useBdb.replace("\\", "\\\\") + '\\"'])
- benv.Program("t_format",
+ t = benv.Program("t_format",
["test/format/backup.c",
"test/format/bdb.c",
"test/format/bulk.c",
@@ -252,7 +324,8 @@ if useBdb:
"test/format/t.c",
"test/format/util.c",
"test/format/wts.c"],
- LIBS=[wtlib, shim, "libdb61"])
+ LIBS=[wtlib, shim, "libdb61"] + wtlibs)
+ Default(t)
#env.Program("t_thread",
#["test/thread/file.c",
@@ -265,13 +338,14 @@ if useBdb:
#["test/salvage/salvage.c"],
#LIBS=[wtlib])
-env.Program("wtperf", [
+t = env.Program("wtperf", [
"bench/wtperf/config.c",
"bench/wtperf/misc.c",
"bench/wtperf/track.c",
"bench/wtperf/wtperf.c",
],
- LIBS=[wtlib, shim] )
+ LIBS=[wtlib, shim] + wtlibs)
+Default(t)
examples = [
"ex_access",
@@ -293,9 +367,52 @@ examples = [
"ex_thread",
]
+# WiredTiger Smoke Test suppor
+# Runs each test in a custom temporary directory
+#
+def run_smoke_test(x):
+ print "Running Smoke Test: " + x
+
+ # Make temp dir
+ temp_dir = tempfile.mkdtemp(prefix="wt_home")
+
+ try:
+ # Set WT_HOME environment variable for test
+ os.environ["WIREDTIGER_HOME"] = temp_dir
+
+ # Run the test
+ ret = subprocess.call(x);
+ if( ret != 0):
+ sys.stderr.write("Bad exit code %d\n" % (ret))
+ raise Exception()
+
+ finally:
+ # Clean directory
+ #
+ shutil.rmtree(temp_dir)
+
+def builder_smoke_test(target, source, env):
+ run_smoke_test(source[0].abspath)
+ return None
+
+env.Append(BUILDERS={'SmokeTest' : Builder(action = builder_smoke_test)})
+
for ex in examples:
- if(ex in ['ex_async', 'ex_thread']):
- env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtlib, shim])
+ if(ex in ['ex_all', 'ex_async', 'ex_thread']):
+ exp = env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtlib, shim] + wtlibs)
+ Default(exp)
+ env.Alias("check", env.SmokeTest(exp))
else:
- env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtlib])
+ exp = env.Program(ex, "examples/c/" + ex + ".c", LIBS=[wtdll[1]] + wtlibs)
+ Default(exp)
+ if not ex == 'ex_log':
+ env.Alias("check", env.SmokeTest(exp))
+# Install Target
+#
+prefix = GetOption("prefix")
+env.Alias("install", env.Install(os.path.join(prefix, "bin"), wtbin))
+env.Alias("install", env.Install(os.path.join(prefix, "bin"), wtdll[0])) # Just the dll
+env.Alias("install", env.Install(os.path.join(prefix, "include"), wtheader))
+env.Alias("install", env.Install(os.path.join(prefix, "lib"), wtdll[1])) # Just the import lib
+env.Alias("install", env.Install(os.path.join(prefix, "lib"), wtlib))
diff --git a/src/third_party/wiredtiger/build_win/wiredtiger.def b/src/third_party/wiredtiger/build_win/wiredtiger.def
new file mode 100644
index 00000000000..02884e4fd65
--- /dev/null
+++ b/src/third_party/wiredtiger/build_win/wiredtiger.def
@@ -0,0 +1,21 @@
+LIBRARY WIREDTIGER
+EXPORTS
+ wiredtiger_config_parser_open
+ wiredtiger_open
+ wiredtiger_pack_close
+ wiredtiger_pack_int
+ wiredtiger_pack_item
+ wiredtiger_pack_start
+ wiredtiger_pack_str
+ wiredtiger_pack_uint
+ wiredtiger_strerror
+ wiredtiger_strerror_r
+ wiredtiger_struct_pack
+ wiredtiger_struct_size
+ wiredtiger_struct_unpack
+ wiredtiger_unpack_int
+ wiredtiger_unpack_item
+ wiredtiger_unpack_start
+ wiredtiger_unpack_str
+ wiredtiger_unpack_uint
+ wiredtiger_version
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 7754a3a1d13..65af833c4a2 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -241,6 +241,12 @@ file_config = format_meta + [
minimum gain before prefix compression will be used on row-store
leaf pages''',
min=0),
+ Config('split_deepen_min_child', '0', r'''
+ minimum entries in a page to consider deepening the tree''',
+ type='int', undoc=True),
+ Config('split_deepen_per_child', '0', r'''
+ entries allocated per child when deepening the tree''',
+ type='int', undoc=True),
Config('split_pct', '75', r'''
the Btree page split size as a percentage of the maximum Btree
page size, that is, when a Btree page is split, it will be
diff --git a/src/third_party/wiredtiger/dist/s_all b/src/third_party/wiredtiger/dist/s_all
index b63ed314453..96e69ddff04 100644
--- a/src/third_party/wiredtiger/dist/s_all
+++ b/src/third_party/wiredtiger/dist/s_all
@@ -66,6 +66,7 @@ run "sh ./s_tags" "building tags files"
run "sh ./s_copyright" "checking copyright notices"
run "sh ./s_define" "checking for unused #defines"
+run "sh ./s_export" "checking external symbol names"
run "sh ./s_funcs" "checking for unused functions"
run "sh ./s_getopt" "checking for incorrect getopt usage"
run "sh ./s_lang" "checking for SWIG generated name conflicts"
@@ -74,7 +75,6 @@ run "sh ./s_stat" "checking for unused statistics fields"
run "sh ./s_string" "checking string spelling"
run "python style.py" "checking style (pass 1)"
run "sh ./s_style" "checking style (pass 2)"
-run "sh ./s_symbols" "checking external symbol names"
run "sh ./s_typedef -c" "checking for unused typedefs"
run "sh ./s_whitespace" "checking whitespace"
run "sh ./s_win" "checking windows config"
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index 7803b628917..91fbc971afa 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -34,11 +34,14 @@ WT_ATOMIC_SUB2
WT_BARRIER
WT_BLOCK_DESC_SIZE
WT_CACHE_LINE_ALIGNMENT
+WT_COMPILER_TYPE_ALIGN
WT_CONN_CHECK_PANIC
WT_DEADLOCK
WT_DEBUG_BYTE
WT_HANDLE_CLOSED
WT_HANDLE_NULLABLE
+WT_PACKED_STRUCT_BEGIN
+WT_PACKED_STRUCT_END
WT_READ_BARRIER
WT_REF_SIZE
WT_SPINLOCK_MAX
diff --git a/src/third_party/wiredtiger/dist/s_symbols b/src/third_party/wiredtiger/dist/s_export
index e590ab6f62c..1212b5b2c1f 100644
--- a/src/third_party/wiredtiger/dist/s_symbols
+++ b/src/third_party/wiredtiger/dist/s_export
@@ -23,7 +23,7 @@ esac
check()
{
- (sed -e '/^#/d' s_symbols.list &&
+ (sed -e '/^#/d' s_export.list &&
eval $NM |
sed 's/.* //' |
egrep -v '^__wt') |
diff --git a/src/third_party/wiredtiger/dist/s_symbols.list b/src/third_party/wiredtiger/dist/s_export.list
index 8f469e94433..8f469e94433 100644
--- a/src/third_party/wiredtiger/dist/s_symbols.list
+++ b/src/third_party/wiredtiger/dist/s_export.list
diff --git a/src/third_party/wiredtiger/dist/s_prototypes b/src/third_party/wiredtiger/dist/s_prototypes
index f29b96a1f55..603c0f5633d 100755
--- a/src/third_party/wiredtiger/dist/s_prototypes
+++ b/src/third_party/wiredtiger/dist/s_prototypes
@@ -31,7 +31,7 @@ for i in `sed -e '/^[a-z]/!d' filelist`; do
-e 's/\* /\*/g' \
-e 's/ */ /g' \
-e 's/^/extern /' \
- -e 's/WT_GCC_FUNC_/WT_GCC_/' \
+ -e 's/WT_GCC_FUNC_/WT_GCC_FUNC_DECL_/' \
-e 's/$/;/p' \
< ../$i
done) > $t
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index cea96db2848..1658684313c 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -881,6 +881,7 @@ poptable
pos
posint
posix
+pragmas
pre
prealloc
preload
diff --git a/src/third_party/wiredtiger/dist/s_typedef b/src/third_party/wiredtiger/dist/s_typedef
index 6b230223baa..2e206757f48 100644..100755
--- a/src/third_party/wiredtiger/dist/s_typedef
+++ b/src/third_party/wiredtiger/dist/s_typedef
@@ -20,8 +20,12 @@ build() {
l=`ls ../src/include/*.[hi] ../src/include/*.in |
sed -e '/wiredtiger.*/d' -e '/queue.h/d'`
- egrep -h '^[ ]*(struct|union)[ ]*__.*[ ]*{' $l | \
- sed -e 's/^[ ]*//' -e 's/[ ]*{.*//' | sort | \
+ egrep -h \
+ '^[ ]*(((struct|union)[ ].*__wt_.*{)|WT_PACKED_STRUCT_BEGIN)' \
+ $l |
+ sed -e 's/WT_PACKED_STRUCT_BEGIN(\(.*\))/struct \1 {/' \
+ -e 's/WT_COMPILER_TYPE_ALIGN(.*)[ ]*//' \
+ -e 's/^[ ]*//' -e 's/[ ]*{.*//' | sort | \
while read t n; do
upper=`echo $n | sed -e 's/^__//' | tr [a-z] [A-Z]`
echo "$t $n;"
diff --git a/src/third_party/wiredtiger/dist/s_win b/src/third_party/wiredtiger/dist/s_win
index 187de91e498..cdfc71a8a1e 100755
--- a/src/third_party/wiredtiger/dist/s_win
+++ b/src/third_party/wiredtiger/dist/s_win
@@ -26,6 +26,19 @@ win_config()
}
}
+win_export()
+{
+ # Build the Windows list of exported symbols.
+ f='../build_win/wiredtiger.def'
+ (echo 'LIBRARY WIREDTIGER'
+ echo 'EXPORTS'
+ sed -e '/^$/d' \
+ -e '/^#/d' \
+ -e 's/^/ /') < s_export.list > $t
+ cmp $t $f > /dev/null 2>&1 ||
+ (echo "Building $f" && rm -f $f && cp $t $f)
+}
+
win_filelist()
{
f='../build_win/filelist.win'
@@ -65,6 +78,7 @@ win_filelist()
}
win_config
+win_export
win_filelist
exit 0
diff --git a/src/third_party/wiredtiger/dist/serial.py b/src/third_party/wiredtiger/dist/serial.py
deleted file mode 100644
index ddadbbdb6be..00000000000
--- a/src/third_party/wiredtiger/dist/serial.py
+++ /dev/null
@@ -1,189 +0,0 @@
-# Output serialization functions.
-
-import textwrap
-from dist import compare_srcfile
-
-class SerialArg:
- def __init__(self, typestr, name, sized=0):
- self.typestr = typestr
- self.name = name
- self.sized = sized
-
-class Serial:
- def __init__(self, name, args):
- self.name = name
- self.args = args
-
-msgtypes = [
-Serial('col_append', [
- SerialArg('WT_INSERT_HEAD *', 'ins_head'),
- SerialArg('WT_INSERT ***', 'ins_stack'),
- SerialArg('WT_INSERT *', 'new_ins', 1),
- SerialArg('uint64_t *', 'recnop'),
- SerialArg('u_int', 'skipdepth'),
- ]),
-
-Serial('insert', [
- SerialArg('WT_INSERT_HEAD *', 'ins_head'),
- SerialArg('WT_INSERT ***', 'ins_stack'),
- SerialArg('WT_INSERT *', 'new_ins', 1),
- SerialArg('u_int', 'skipdepth'),
- ]),
-
-Serial('update', [
- SerialArg('WT_UPDATE **', 'srch_upd'),
- SerialArg('WT_UPDATE *', 'upd', 1),
- ]),
-]
-
-# decl --
-# Return a declaration for the variable.
-def decl(l):
- o = l.typestr
- if o[-1] != '*':
- o += ' '
- return o + l.name
-
-# decl_p --
-# Return a declaration for a reference to the variable, which requires
-# another level of indirection.
-def decl_p(l):
- o = l.typestr
- if o[-1] != '*':
- o += ' '
- return o + '*' + l.name + 'p'
-
-# output --
-# Create serialized function calls.
-def output(entry, f):
- # Function declaration.
- f.write('static inline int\n__wt_' + entry.name + '_serial(\n')
- o = 'WT_SESSION_IMPL *session, WT_PAGE *page'
- for l in entry.args:
- if l.sized:
- o += ', ' + decl_p(l) + ', size_t ' + l.name + '_size'
- else:
- o += ', ' + decl(l)
- o += ')'
- f.write('\n'.join('\t' + l for l in textwrap.wrap(o, 70)))
- f.write('\n{')
-
- # Local variable declarations.
- for l in entry.args:
- if l.sized:
- f.write('''
-\t''' + decl(l) + ''' = *''' + l.name + '''p;
-\tWT_DECL_RET;
-\tsize_t incr_mem;
-''')
-
- # Clear memory references we now own.
- for l in entry.args:
- if l.sized:
- f.write('''
-\t/* Clear references to memory we now own. */
-\t*''' + l.name + '''p = NULL;
-''')
-
- # Check the page write generation hasn't wrapped.
- f.write('''
-\t/*
-\t * Check to see if the page's write generation is about to wrap (wildly
-\t * unlikely as it implies 4B updates between clean page reconciliations,
-\t * but technically possible), and fail the update.
-\t *
-\t * The check is outside of the serialization mutex because the page's
-\t * write generation is going to be a hot cache line, so technically it's
-\t * possible for the page's write generation to wrap between the test and
-\t * our subsequent modification of it. However, the test is (4B-1M), and
-\t * there cannot be a million threads that have done the test but not yet
-\t * completed their modification.
-\t */
-\t WT_RET(__page_write_gen_wrapped_check(page));
-''')
-
- # Call the worker function.
- if entry.name != "update":
- f.write('''
-\t/* Acquire the page's spinlock, call the worker function. */
-\tWT_PAGE_LOCK(session, page);''')
-
- f.write('''
-\tret = __''' + entry.name + '''_serial_func(
-''')
- o = 'session'
- if entry.name == "update":
- o += ', page'
- for l in entry.args:
- o += ', ' + l.name
- o += ');'
- f.write('\n'.join('\t ' + l for l in textwrap.wrap(o, 70)))
-
- if entry.name != "update":
- f.write('''
-\tWT_PAGE_UNLOCK(session, page);''')
-
- f.write('''
-
-\t/* Free unused memory on error. */
-\tif (ret != 0) {
-''')
- for l in entry.args:
- if not l.sized:
- continue
- f.write(
- '\t\t__wt_free(session, ' + l.name + ');\n')
- f.write('''
-\t\treturn (ret);
-\t}
-''')
-
- f.write('''
-\t/*
-\t * Increment in-memory footprint after releasing the mutex: that's safe
-\t * because the structures we added cannot be discarded while visible to
-\t * any running transaction, and we're a running transaction, which means
-\t * there can be no corresponding delete until we complete.
-\t */
-\tincr_mem = 0;
-''')
- for l in entry.args:
- if not l.sized:
- continue
- f.write('\tWT_ASSERT(session, ' +
- l.name + '_size != 0);\n')
- f.write('\tincr_mem += ' + l.name + '_size;\n')
- f.write('''\tif (incr_mem != 0)
-\t\t__wt_cache_page_inmem_incr(session, page, incr_mem);
-
-\t/* Mark the page dirty after updating the footprint. */
-\t__wt_page_modify_set(session, page);
-
-\treturn (0);
-}
-
-''')
-
-#####################################################################
-# Update serial.i.
-#####################################################################
-tmp_file = '__tmp'
-tfile = open(tmp_file, 'w')
-skip = 0
-for line in open('../src/include/serial.i', 'r'):
- if not skip:
- tfile.write(line)
- if line.count('Serialization function section: END'):
- tfile.write(line)
- skip = 0
- elif line.count('Serialization function section: BEGIN'):
- tfile.write(' */\n\n')
- skip = 1
-
- for entry in msgtypes:
- output(entry, tfile)
-
- tfile.write('/*\n')
-
-tfile.close()
-compare_srcfile(tmp_file, '../src/include/serial.i')
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 704b258a7dd..1960e4605ef 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -782,7 +782,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
WT_RET(__cursor_func_init(cbt, 1));
- WT_ERR(__wt_row_random(session, cbt));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __wt_row_random(session, cbt));
+ WT_ERR(ret);
if (__cursor_valid(cbt, &upd))
WT_ERR(__wt_kv_return(session, cbt, upd));
else
@@ -948,7 +950,11 @@ __cursor_truncate(WT_SESSION_IMPL *session,
} else {
do {
WT_RET(__wt_btcur_remove(start));
- for (;;) {
+ /*
+ * Reset ret each time through so that we don't loop
+ * forever in the cursor equals case.
+ */
+ for (ret = 0;;) {
if (stop != NULL &&
__cursor_equals(start, stop))
break;
@@ -1009,7 +1015,11 @@ __cursor_truncate_fix(WT_SESSION_IMPL *session,
} else {
do {
WT_RET(__wt_btcur_remove(start));
- for (;;) {
+ /*
+ * Reset ret each time through so that we don't loop
+ * forever in the cursor equals case.
+ */
+ for (ret = 0;;) {
if (stop != NULL &&
__cursor_equals(start, stop))
break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index af9f6a669f2..e84a63695f9 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -48,7 +48,7 @@ static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
static int __debug_tree(WT_SESSION_IMPL *, WT_PAGE *, const char *, uint32_t);
static void __debug_update(WT_DBG *, WT_UPDATE *, int);
static void __dmsg(WT_DBG *, const char *, ...)
- WT_GCC_ATTRIBUTE((format (printf, 2, 3)));
+ WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
static void __dmsg_wrapup(WT_DBG *);
/*
@@ -548,7 +548,9 @@ __debug_page(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
session = ds->session;
/* Dump the page metadata. */
- WT_RET(__debug_page_metadata(ds, page));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __debug_page_metadata(ds, page));
+ WT_RET(ret);
/* Dump the page. */
switch (page->type) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index f0414c4e855..b47c9c897a6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -12,7 +12,7 @@ static int __btree_conf(WT_SESSION_IMPL *, WT_CKPT *ckpt);
static int __btree_get_last_recno(WT_SESSION_IMPL *);
static int __btree_page_sizes(WT_SESSION_IMPL *);
static int __btree_preload(WT_SESSION_IMPL *);
-static int __btree_tree_open_empty(WT_SESSION_IMPL *, int, int);
+static int __btree_tree_open_empty(WT_SESSION_IMPL *, int);
/*
* __wt_btree_open --
@@ -100,8 +100,7 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
ckpt.raw.data, ckpt.raw.size,
root_addr, &root_addr_size, readonly));
if (creation || root_addr_size == 0)
- WT_ERR(__btree_tree_open_empty(
- session, creation, readonly));
+ WT_ERR(__btree_tree_open_empty(session, creation));
else {
WT_ERR(__wt_btree_tree_open(
session, root_addr, root_addr_size));
@@ -391,16 +390,17 @@ err: __wt_buf_free(session, &dsk);
* Create an empty in-memory tree.
*/
static int
-__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
+__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
{
WT_BTREE *btree;
WT_DECL_RET;
- WT_PAGE *root, *leaf;
+ WT_PAGE *leaf, *root;
WT_PAGE_INDEX *pindex;
WT_REF *ref;
btree = S2BT(session);
root = leaf = NULL;
+ ref = NULL;
/*
* Newly created objects can be used for cursor inserts or for bulk
@@ -414,13 +414,10 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
}
/*
- * A note about empty trees: the initial tree is a root page and a leaf
- * page. We need a pair of pages instead of just a single page because
- * we can reconcile the leaf page while the root stays pinned in memory.
- * If the pair is evicted without being modified, that's OK, nothing is
- * ever written.
- *
- * Create the root and leaf pages.
+ * A note about empty trees: the initial tree is a single root page.
+ * It has a single reference to a leaf page, marked deleted. The leaf
+ * page will be created by the first update. If the root is evicted
+ * without being modified, that's OK, nothing is ever written.
*
* !!!
* Be cautious about changing the order of updates in this code: to call
@@ -437,10 +434,9 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
pindex = WT_INTL_INDEX_COPY(root);
ref = pindex->index[0];
ref->home = root;
- WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
- ref->page = leaf;
+ ref->page = NULL;
ref->addr = NULL;
- ref->state = WT_REF_MEM;
+ ref->state = WT_REF_DELETED;
ref->key.recno = 1;
break;
case BTREE_ROW:
@@ -451,48 +447,20 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
pindex = WT_INTL_INDEX_COPY(root);
ref = pindex->index[0];
ref->home = root;
- WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
- ref->page = leaf;
+ ref->page = NULL;
ref->addr = NULL;
- ref->state = WT_REF_MEM;
+ ref->state = WT_REF_DELETED;
WT_ERR(__wt_row_ikey_incr(
session, root, 0, "", 1, &ref->key.ikey));
break;
WT_ILLEGAL_VALUE_ERR(session);
}
- /*
- * Mark the leaf page dirty: we didn't create an entirely valid root
- * page (specifically, the root page's disk address isn't set, and it's
- * the act of reconciling the leaf page that makes it work, we don't
- * try and use the original disk address of modified pages). We could
- * get around that by leaving the leaf page clean and building a better
- * root page, but then we get into trouble because a checkpoint marks
- * the root page dirty to force a write, and without reconciling the
- * leaf page we won't realize there's no records to write, we'll write
- * a root page, which isn't correct for an empty tree.
- *
- * Earlier versions of this code kept the leaf page clean, but with the
- * "empty" flag set in the leaf page's modification structure; in that
- * case, checkpoints works (forced reconciliation of a root with a
- * single "empty" page wouldn't write any blocks). That version had
- * memory leaks because the eviction code didn't correctly handle pages
- * that were "clean" (and so never reconciled), yet "modified" with an
- * "empty" flag. The goal of this code is to mimic a real tree that
- * simply has no records, for whatever reason, and trust reconciliation
- * to figure out it's empty and not write any blocks.
- *
- * We do not set the tree's modified flag because the checkpoint code
- * skips unmodified files in closing checkpoints (checkpoints that
- * don't require a write unless the file is actually dirty). There's
- * no need to reconcile this file unless the application does a real
- * checkpoint or it's actually modified.
- *
- * Only do this for a live tree, not for checkpoints. If we open an
- * empty checkpoint, the leaf page cannot be dirty or eviction may try
- * to write it, which will fail because checkpoints are read-only.
- */
- if (!readonly) {
+ /* Bulk loads require a leaf page for reconciliation: create it now. */
+ if (F_ISSET(btree, WT_BTREE_BULK)) {
+ WT_ERR(__wt_btree_new_leaf_page(session, &leaf));
+ ref->page = leaf;
+ ref->state = WT_REF_MEM;
WT_ERR(__wt_page_modify_init(session, leaf));
__wt_page_only_modify_set(session, leaf);
}
@@ -676,6 +644,22 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage);
/*
+ * In-memory split configuration.
+ */
+ if (__wt_config_gets(
+ session, cfg, "split_deepen_min_child", &cval) == WT_NOTFOUND ||
+ cval.val == 0)
+ btree->split_deepen_min_child = WT_SPLIT_DEEPEN_MIN_CHILD_DEF;
+ else
+ btree->split_deepen_min_child = (u_int)cval.val;
+ if (__wt_config_gets(
+ session, cfg, "split_deepen_per_child", &cval) == WT_NOTFOUND ||
+ cval.val == 0)
+ btree->split_deepen_per_child = WT_SPLIT_DEEPEN_PER_CHILD_DEF;
+ else
+ btree->split_deepen_per_child = (u_int)cval.val;
+
+ /*
* Get the maximum internal/leaf page key/value sizes.
*
* In historic versions of WiredTiger, the maximum internal/leaf page
diff --git a/src/third_party/wiredtiger/src/btree/bt_huffman.c b/src/third_party/wiredtiger/src/btree/bt_huffman.c
index c1cf3431c3b..c31b3f2fdf1 100644
--- a/src/third_party/wiredtiger/src/btree/bt_huffman.c
+++ b/src/third_party/wiredtiger/src/btree/bt_huffman.c
@@ -128,6 +128,30 @@ static const struct __wt_huffman_table __wt_huffman_nytenglish[] = {
static int __wt_huffman_read(WT_SESSION_IMPL *,
WT_CONFIG_ITEM *, struct __wt_huffman_table **, u_int *, u_int *);
+#define WT_HUFFMAN_CONFIG_VALID(str, len) \
+ (WT_STRING_CASE_MATCH("english", (str), (len)) || \
+ WT_PREFIX_MATCH((str), "utf8") || WT_PREFIX_MATCH((str), "utf16"))
+
+/*
+ * __btree_huffman_config --
+ * Verify the key or value strings passed in.
+ */
+static int
+__btree_huffman_config(WT_SESSION_IMPL *session,
+ WT_CONFIG_ITEM *key_conf, WT_CONFIG_ITEM *value_conf)
+{
+ if (key_conf->len != 0 &&
+ !WT_HUFFMAN_CONFIG_VALID(key_conf->str, key_conf->len))
+ WT_RET_MSG(
+ session, EINVAL, "illegal Huffman key configuration");
+ if (value_conf->len != 0 &&
+ !WT_HUFFMAN_CONFIG_VALID(value_conf->str, value_conf->len))
+ WT_RET_MSG(
+ session, EINVAL, "illegal Huffman value configuration");
+ return (0);
+
+}
+
/*
* __wt_btree_huffman_open --
* Configure Huffman encoding for the tree.
@@ -150,6 +174,7 @@ __wt_btree_huffman_open(WT_SESSION_IMPL *session)
__wt_config_gets_none(session, cfg, "huffman_value", &value_conf));
if (key_conf.len == 0 && value_conf.len == 0)
return (0);
+ WT_RET(__btree_huffman_config(session, &key_conf, &value_conf));
switch (btree->type) { /* Check file type compatibility. */
case BTREE_COL_FIX:
@@ -311,6 +336,8 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
tp->frequency = (uint32_t)frequency;
}
+ if (ret == EOF)
+ ret = 0;
*entriesp = lineno - 1;
*tablep = table;
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 561e1c19218..1e539b7caee 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -131,8 +131,8 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
force_attempts < 10 &&
__evict_force_check(session, page)) {
++force_attempts;
- if ((ret = __wt_page_release_busy(
- session, ref, flags)) == EBUSY) {
+ ret = __wt_page_release_evict(session, ref);
+ if (ret == EBUSY) {
/* If forced eviction fails, stall. */
ret = 0;
wait_cnt += 1000;
@@ -285,6 +285,7 @@ err: if ((pindex = WT_INTL_INDEX_COPY(page)) != NULL) {
/* Increment the cache statistics. */
__wt_cache_page_inmem_incr(session, page, size);
+ (void)WT_ATOMIC_ADD8(cache->bytes_read, size);
(void)WT_ATOMIC_ADD8(cache->pages_inmem, 1);
*pagep = page;
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index fbc3890f23b..1cf616a2f6b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -294,12 +294,16 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
switch (ss->page_type) {
case WT_PAGE_COL_FIX:
case WT_PAGE_COL_VAR:
- WT_ERR(
- __slvg_col_build_internal(session, leaf_cnt, ss));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __slvg_col_build_internal(
+ session, leaf_cnt, ss));
+ WT_ERR(ret);
break;
case WT_PAGE_ROW_LEAF:
- WT_ERR(
- __slvg_row_build_internal(session, leaf_cnt, ss));
+ WT_WITH_PAGE_INDEX(session,
+ ret = __slvg_row_build_internal(
+ session, leaf_cnt, ss));
+ WT_ERR(ret);
break;
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index e4fe51ea28f..05af1a2f885 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -8,17 +8,10 @@
#include "wt_internal.h"
-/*
- * Track allocation increments, matching the cache calculations, which add an
- * estimate of allocation overhead to every object.
- */
-#define WT_MEMSIZE_ADD(total, len) do { \
- total += (len); \
-} while (0)
-#define WT_MEMSIZE_TRANSFER(from_decr, to_incr, len) do { \
+#define WT_MEM_TRANSFER(from_decr, to_incr, len) do { \
size_t __len = (len); \
- WT_MEMSIZE_ADD(from_decr, __len); \
- WT_MEMSIZE_ADD(to_incr, __len); \
+ from_decr += __len; \
+ to_incr += __len; \
} while (0)
/*
@@ -49,7 +42,8 @@ __split_oldest_gen(WT_SESSION_IMPL *session)
* Add a new entry into the session's split stash list.
*/
static int
-__split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len)
+__split_stash_add(
+ WT_SESSION_IMPL *session, uint64_t split_gen, void *p, size_t len)
{
WT_SPLIT_STASH *stash;
@@ -60,7 +54,7 @@ __split_stash_add(WT_SESSION_IMPL *session, void *p, size_t len)
session->split_stash_cnt + 1, &session->split_stash));
stash = session->split_stash + session->split_stash_cnt++;
- stash->split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
+ stash->split_gen = split_gen;
stash->p = p;
stash->len = len;
@@ -150,14 +144,14 @@ __wt_split_stash_discard_all(
* it to be freed otherwise.
*/
static int
-__split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
+__split_safe_free(WT_SESSION_IMPL *session,
+ uint64_t split_gen, int exclusive, void *p, size_t s)
{
/*
* We have swapped something in a page: if we don't have exclusive
* access, check whether there are other threads in the same tree.
*/
- if (!exclusive &&
- __split_oldest_gen(session) == S2C(session)->split_gen + 1)
+ if (!exclusive && __split_oldest_gen(session) > split_gen)
exclusive = 1;
if (exclusive) {
@@ -165,17 +159,10 @@ __split_safe_free(WT_SESSION_IMPL *session, int exclusive, void *p, size_t s)
return (0);
}
- return (__split_stash_add(session, p, s));
+ return (__split_stash_add(session, split_gen, p, s));
}
/*
- * Tuning; global variables to allow the binary to be patched, we don't yet have
- * any real understanding of what might be useful to surface to applications.
- */
-static u_int __split_deepen_min_child = 10000;
-static u_int __split_deepen_per_child = 100;
-
-/*
* __split_should_deepen --
* Return if we should deepen the tree.
*/
@@ -183,11 +170,13 @@ static int
__split_should_deepen(
WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *childrenp)
{
- WT_PAGE_INDEX *pindex;
+ WT_BTREE *btree;
WT_PAGE *page;
+ WT_PAGE_INDEX *pindex;
*childrenp = 0;
+ btree = S2BT(session);
page = ref->page;
pindex = WT_INTL_INDEX_COPY(page);
@@ -204,8 +193,8 @@ __split_should_deepen(
* we get a significant payback (in the case of a set of large keys,
* splitting won't help).
*/
- if (pindex->entries > __split_deepen_min_child) {
- *childrenp = pindex->entries / __split_deepen_per_child;
+ if (pindex->entries > btree->split_deepen_min_child) {
+ *childrenp = pindex->entries / btree->split_deepen_per_child;
return (1);
}
@@ -296,10 +285,9 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
ref->key.ikey = ikey;
} else {
WT_RET(__split_ovfl_key_cleanup(session, parent, ref));
- WT_MEMSIZE_ADD(*parent_decrp,
- sizeof(WT_IKEY) + ikey->size);
+ *parent_decrp += sizeof(WT_IKEY) + ikey->size;
}
- WT_MEMSIZE_ADD(*child_incrp, sizeof(WT_IKEY) + ikey->size);
+ *child_incrp += sizeof(WT_IKEY) + ikey->size;
}
/*
@@ -323,7 +311,7 @@ __split_ref_deepen_move(WT_SESSION_IMPL *session,
}
/* And finally, the WT_REF itself. */
- WT_MEMSIZE_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
+ WT_MEM_TRANSFER(*parent_decrp, *child_incrp, sizeof(WT_REF));
return (0);
}
@@ -393,6 +381,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
WT_REF **alloc_refp;
WT_REF *child_ref, **child_refp, *parent_ref, **parent_refp, *ref;
size_t child_incr, parent_decr, parent_incr, size;
+ uint64_t split_gen;
uint32_t chunk, i, j, remain, slots;
int panic;
void *p;
@@ -432,7 +421,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
size = sizeof(WT_PAGE_INDEX) +
(children + SPLIT_CORRECT_2) * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- WT_MEMSIZE_ADD(parent_incr, size);
+ parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = children + SPLIT_CORRECT_2;
alloc_index->index[0] = pindex->index[0];
@@ -441,7 +430,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1,
i = 0; i < children; ++alloc_refp, ++i) {
WT_ERR(__wt_calloc_one(session, alloc_refp));
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
+ parent_incr += sizeof(WT_REF);
}
/* Allocate child pages, and connect them into the new page index. */
@@ -466,7 +455,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
__wt_ref_key(parent, *parent_refp, &p, &size);
WT_ERR(
__wt_row_ikey(session, 0, p, size, &ref->key.ikey));
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_IKEY) + size);
+ parent_incr += sizeof(WT_IKEY) + size;
} else
ref->key.recno = (*parent_refp)->key.recno;
ref->state = WT_REF_MEM;
@@ -527,6 +516,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* needs to be paid.
*/
WT_INTL_INDEX_SET(parent, alloc_index);
+ split_gen = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
panic = 1;
#ifdef HAVE_DIAGNOSTIC
@@ -596,31 +586,14 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent, uint32_t children)
* be using the new index.
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_ERR(__split_safe_free(session, 0, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ WT_ERR(__split_safe_free(session, split_gen, 0, pindex, size));
+ parent_decr += size;
-#if 0
/*
- * Adjust the parent's memory footprint. This may look odd, but we
- * have already taken the allocation overhead into account, and an
- * increment followed by a decrement will cancel out the normal
- * adjustment.
+ * Adjust the parent's memory footprint.
*/
__wt_cache_page_inmem_incr(session, parent, parent_incr);
__wt_cache_page_inmem_decr(session, parent, parent_decr);
-#else
- /*
- * XXX
- * The code to track page sizes is fundamentally flawed in the face of
- * splits: for example, we don't add in an overhead allocation constant
- * when allocating WT_REF structures as pages are created, but the
- * calculations during split assume that correction. For now, ignore
- * our carefully calculated values and force the internal page size to
- * 5% of its current value.
- */
- size = parent->memory_footprint - (parent->memory_footprint / 20);
- __wt_cache_page_inmem_decr(session, parent, size);
-#endif
if (0) {
err: __wt_free_ref_index(session, parent, alloc_index, 1);
@@ -753,11 +726,10 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
incr = 0;
/* In some cases, the underlying WT_REF has not yet been allocated. */
- if (*refp == NULL) {
+ if (*refp == NULL)
WT_RET(__wt_calloc_one(session, refp));
- WT_MEMSIZE_ADD(incr, sizeof(WT_REF));
- }
ref = *refp;
+ incr += sizeof(WT_REF);
/*
* Any parent reference must be filled in by our caller; the primary
@@ -790,7 +762,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ikey = multi->key.ikey;
WT_RET(__wt_row_ikey(session, 0,
WT_IKEY_DATA(ikey), ikey->size, &ref->key.ikey));
- WT_MEMSIZE_ADD(incr, sizeof(WT_IKEY) + ikey->size);
+ incr += sizeof(WT_IKEY) + ikey->size;
break;
default:
ref->key.recno = multi->key.recno;
@@ -815,7 +787,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
static int
__split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
uint32_t new_entries, size_t parent_decr, size_t parent_incr,
- int exclusive, int ref_discard)
+ int exclusive, int ref_discard, uint64_t *split_genp)
{
WT_DECL_RET;
WT_IKEY *ikey;
@@ -823,6 +795,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_PAGE_INDEX *alloc_index, *pindex;
WT_REF **alloc_refp, *next_ref, *parent_ref;
size_t size;
+ uint64_t split_gen;
uint32_t children, i, j;
uint32_t deleted_entries, parent_entries, result_entries;
int complete, hazard, locked;
@@ -902,7 +875,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *);
WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
- WT_MEMSIZE_ADD(parent_incr, size);
+ parent_incr += size;
alloc_index->index = (WT_REF **)(alloc_index + 1);
alloc_index->entries = result_entries;
for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) {
@@ -929,6 +902,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* to threads descending the tree.
*/
WT_INTL_INDEX_SET(parent, alloc_index);
+ split_gen = *split_genp = WT_ATOMIC_ADD8(S2C(session)->split_gen, 1);
alloc_index = NULL;
#ifdef HAVE_DIAGNOSTIC
@@ -975,8 +949,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
if (ikey != NULL) {
size = sizeof(WT_IKEY) + ikey->size;
WT_TRET(__split_safe_free(
- session, 0, ikey, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ session, split_gen, 0, ikey, size));
+ parent_decr += size;
}
/*
* The page_del structure can be freed
@@ -993,8 +967,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
}
WT_TRET(__split_safe_free(
- session, 0, next_ref, sizeof(WT_REF)));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ session, split_gen, 0, next_ref, sizeof(WT_REF)));
+ parent_decr += sizeof(WT_REF);
}
}
@@ -1003,8 +977,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
* Add it to the session discard list, to be freed when it's safe.
*/
size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, exclusive, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
+ WT_TRET(__split_safe_free(session, split_gen, exclusive, pindex, size));
+ parent_decr += size;
/*
* Row-store trees where the old version of the page is being discarded:
@@ -1020,10 +994,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
WT_TRET(__split_ovfl_key_cleanup(session, parent, ref));
/*
- * Adjust the parent's memory footprint. This may look odd, but we
- * have already taken the allocation overhead into account, and an
- * increment followed by a decrement will cancel out the normal
- * adjustment.
+ * Adjust the parent's memory footprint.
*/
__wt_cache_page_inmem_incr(session, parent, parent_incr);
__wt_cache_page_inmem_decr(session, parent, parent_decr);
@@ -1061,8 +1032,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
uint64_t __a, __b;
__a = parent->memory_footprint;
- WT_WITH_PAGE_INDEX(session,
- ret = __split_deepen(session, parent, children));
+ ret = __split_deepen(session, parent, children);
__b = parent->memory_footprint;
if (__b * 2 >= __a)
F_SET_ATOMIC(parent, WT_PAGE_REFUSE_DEEPEN);
@@ -1110,6 +1080,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_PAGE *page, *right;
WT_REF *child, *split_ref[2] = { NULL, NULL };
size_t page_decr, parent_decr, parent_incr, right_incr;
+ uint64_t split_gen;
int i;
*splitp = 0;
@@ -1198,9 +1169,9 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
} else
WT_ERR(__wt_row_leaf_key(
session, page, &page->pg_row_d[0], key, 1));
-
WT_ERR(__wt_row_ikey(
session, 0, key->data, key->size, &child->key.ikey));
+ parent_incr += sizeof(WT_REF) + sizeof(WT_IKEY) + key->size;
__wt_scr_free(session, &key);
/*
@@ -1209,8 +1180,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right));
WT_ERR(__wt_calloc_one(session, &right->pg_row_ins));
WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0]));
- WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD));
- WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *));
+ right_incr += sizeof(WT_INSERT_HEAD);
+ right_incr += sizeof(WT_INSERT_HEAD *);
WT_ERR(__wt_calloc_one(session, &split_ref[1]));
child = split_ref[1];
@@ -1219,19 +1190,18 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
WT_ERR(__wt_row_ikey(session, 0,
WT_INSERT_KEY(moved_ins), WT_INSERT_KEY_SIZE(moved_ins),
&child->key.ikey));
+ parent_incr +=
+ sizeof(WT_REF) + sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins);
/*
- * We're swapping WT_REFs in the parent, adjust the accounting, and
- * row store pages may have instantiated keys.
+ * After the split, we're going to discard the WT_REF, account for the
+ * change in memory footprint. Row store pages have keys that may be
+ * instantiated, check for that.
*/
- WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
- WT_MEMSIZE_ADD(
- parent_incr, sizeof(WT_IKEY) + WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_decr += sizeof(WT_REF);
if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT)
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- WT_MEMSIZE_ADD(
- parent_decr, sizeof(WT_IKEY) + ikey->size);
+ parent_decr += sizeof(WT_IKEY) + ikey->size;
/* The new page is dirty by definition. */
WT_ERR(__wt_page_modify_init(session, right));
@@ -1253,10 +1223,10 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
;
- WT_MEMSIZE_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
+ WT_MEM_TRANSFER(page_decr, right_incr, sizeof(WT_INSERT) +
(size_t)i * sizeof(WT_INSERT *) + WT_INSERT_KEY_SIZE(moved_ins));
- WT_MEMSIZE_TRANSFER(page_decr, right_incr,
- __wt_update_list_memsize(moved_ins->upd));
+ WT_MEM_TRANSFER(
+ page_decr, right_incr, __wt_update_list_memsize(moved_ins->upd));
/*
* Allocation operations completed, move the last insert list item from
@@ -1349,7 +1319,12 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
page->modify->inmem_split_txn = __wt_txn_new_id(session);
- /* Update the page accounting. */
+ /*
+ * Update the page accounting.
+ *
+ * XXX
+ * If we fail to split the parent, the page's accounting will be wrong.
+ */
__wt_cache_page_inmem_decr(session, page, page_decr);
__wt_cache_page_inmem_incr(session, right, right_incr);
@@ -1358,8 +1333,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
* longer locked, so we cannot safely look at it.
*/
page = NULL;
- if ((ret = __split_parent(
- session, ref, split_ref, 2, parent_decr, parent_incr, 0, 0)) != 0) {
+ if ((ret = __split_parent(session, ref, split_ref, 2,
+ parent_decr, parent_incr, 0, 0, &split_gen)) != 0) {
/*
* Move the insert list element back to the original page list.
* For simplicity, the previous skip list pointers originally
@@ -1396,8 +1371,8 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
*/
if (ikey != NULL)
WT_TRET(__split_safe_free(
- session, 0, ikey, sizeof(WT_IKEY) + ikey->size));
- WT_TRET(__split_safe_free(session, 0, ref, sizeof(WT_REF)));
+ session, split_gen, 0, ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, split_gen, 0, ref, sizeof(WT_REF)));
/*
* A note on error handling: if we completed the split, return success,
@@ -1480,6 +1455,7 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
WT_PAGE_MODIFY *mod;
WT_REF **ref_new;
size_t parent_decr, parent_incr;
+ uint64_t split_gen;
uint32_t i, new_entries;
page = ref->page;
@@ -1503,15 +1479,14 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* change in memory footprint. Row store pages have keys that may be
* instantiated, check for that.
*/
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+ parent_decr += sizeof(WT_REF);
if (page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT)
if ((ikey = __wt_ref_key_instantiated(ref)) != NULL)
- WT_MEMSIZE_ADD(
- parent_decr, sizeof(WT_IKEY) + ikey->size);
+ parent_decr += sizeof(WT_IKEY) + ikey->size;
/* Split into the parent. */
- WT_ERR(__split_parent(session,
- ref, ref_new, new_entries, parent_decr, parent_incr, exclusive, 1));
+ WT_ERR(__split_parent(session, ref, ref_new, new_entries,
+ parent_decr, parent_incr, exclusive, 1, &split_gen));
__wt_free(session, ref_new);
@@ -1534,9 +1509,10 @@ __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* safe.
*/
if (ikey != NULL)
- WT_TRET(__split_safe_free(
- session, exclusive, ikey, sizeof(WT_IKEY) + ikey->size));
- WT_TRET(__split_safe_free(session, exclusive, ref, sizeof(WT_REF)));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive,
+ ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, split_gen, exclusive,
+ ref, sizeof(WT_REF)));
/*
* A note on error handling: if we completed the split, return success,
diff --git a/src/third_party/wiredtiger/src/btree/bt_stat.c b/src/third_party/wiredtiger/src/btree/bt_stat.c
index d9ff2a6af1e..b7108b52395 100644
--- a/src/third_party/wiredtiger/src/btree/bt_stat.c
+++ b/src/third_party/wiredtiger/src/btree/bt_stat.c
@@ -45,8 +45,11 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
next_walk = NULL;
while ((ret =
- __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL)
- WT_RET(__stat_page(session, next_walk->page, stats));
+ __wt_tree_walk(session, &next_walk, 0)) == 0 && next_walk != NULL) {
+ WT_WITH_PAGE_INDEX(session,
+ ret = __stat_page(session, next_walk->page, stats));
+ WT_RET(ret);
+ }
return (ret == WT_NOTFOUND ? 0 : ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_vrfy.c b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
index fafb4b58fc4..2957eda3a49 100644
--- a/src/third_party/wiredtiger/src/btree/bt_vrfy.c
+++ b/src/third_party/wiredtiger/src/btree/bt_vrfy.c
@@ -366,11 +366,16 @@ recno_chk: if (recno != vs->record_total + 1)
goto celltype_err;
break;
case WT_PAGE_COL_VAR:
- case WT_PAGE_ROW_LEAF:
if (unpack->raw != WT_CELL_ADDR_LEAF &&
unpack->raw != WT_CELL_ADDR_LEAF_NO)
goto celltype_err;
break;
+ case WT_PAGE_ROW_LEAF:
+ if (unpack->raw != WT_CELL_ADDR_DEL &&
+ unpack->raw != WT_CELL_ADDR_LEAF &&
+ unpack->raw != WT_CELL_ADDR_LEAF_NO)
+ goto celltype_err;
+ break;
case WT_PAGE_COL_INT:
case WT_PAGE_ROW_INT:
if (unpack->raw != WT_CELL_ADDR_INT)
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 4c418f91de0..db1b565b439 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -49,6 +49,7 @@ restart: page = current->page;
WT_ASSERT(session, current->key.recno == page->pg_intl_recno);
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
base = pindex->entries;
descent = pindex->index[base - 1];
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 036e11bec6d..9967c5ecb0c 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -195,6 +195,7 @@ restart: page = current->page;
if (page->type != WT_PAGE_ROW_INT)
break;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
/*
@@ -487,6 +488,7 @@ restart:
if (page->type != WT_PAGE_ROW_INT)
break;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(page);
descent = pindex->index[
__wt_random(session->rnd) % pindex->entries];
@@ -521,6 +523,7 @@ restart:
*/
cbt->ref = current;
cbt->compare = 0;
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(btree->root.page);
cbt->slot = pindex->entries < 2 ?
__wt_random(session->rnd) % page->pg_row_entries : 0;
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 646551cdd38..a7e9419a65c 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -152,6 +152,8 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
{ "os_cache_max", "int", "min=0", NULL },
{ "prefix_compression", "boolean", NULL, NULL },
{ "prefix_compression_min", "int", "min=0", NULL },
+ { "split_deepen_min_child", "int", NULL, NULL },
+ { "split_deepen_per_child", "int", NULL, NULL },
{ "split_pct", "int", "min=25,max=100", NULL },
{ "value_format", "format", NULL, NULL },
{ "version", "string", NULL, NULL },
@@ -246,6 +248,8 @@ static const WT_CONFIG_CHECK confchk_session_create[] = {
{ "prefix_compression", "boolean", NULL, NULL },
{ "prefix_compression_min", "int", "min=0", NULL },
{ "source", "string", NULL, NULL },
+ { "split_deepen_min_child", "int", NULL, NULL },
+ { "split_deepen_per_child", "int", NULL, NULL },
{ "split_pct", "int", "min=25,max=100", NULL },
{ "type", "string", NULL, NULL },
{ "value_format", "format", NULL, NULL },
@@ -585,7 +589,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
",key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0,"
"leaf_page_max=32KB,leaf_value_max=0,memory_page_max=5MB,"
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,"
- "prefix_compression_min=4,split_pct=75,value_format=u,"
+ "prefix_compression_min=4,split_deepen_min_child=0,"
+ "split_deepen_per_child=0,split_pct=75,value_format=u,"
"version=(major=0,minor=0)",
confchk_file_meta
},
@@ -626,8 +631,8 @@ static const WT_CONFIG_ENTRY config_entries[] = {
"bloom_hash_count=8,bloom_oldest=0,chunk_max=5GB,chunk_size=10MB,"
"merge_max=15,merge_min=0),memory_page_max=5MB,"
"os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,"
- "prefix_compression_min=4,source=,split_pct=75,type=file,"
- "value_format=u",
+ "prefix_compression_min=4,source=,split_deepen_min_child=0,"
+ "split_deepen_per_child=0,split_pct=75,type=file,value_format=u",
confchk_session_create
},
{ "session.drop",
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
index 861bafed900..f5b78e33b04 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache_pool.c
@@ -448,15 +448,15 @@ __cache_pool_assess(WT_SESSION_IMPL *session, uint64_t *phighest)
continue;
cache = entry->cache;
++entries;
- new = cache->bytes_evict;
+ new = cache->bytes_read;
/* Handle wrapping of eviction requests. */
- if (new >= cache->cp_saved_evict)
- cache->cp_current_evict = new - cache->cp_saved_evict;
+ if (new >= cache->cp_saved_read)
+ cache->cp_current_read = new - cache->cp_saved_read;
else
- cache->cp_current_evict = new;
- cache->cp_saved_evict = new;
- if (cache->cp_current_evict > highest)
- highest = cache->cp_current_evict;
+ cache->cp_current_read = new;
+ cache->cp_saved_read = new;
+ if (cache->cp_current_read > highest)
+ highest = cache->cp_current_read;
}
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"Highest eviction count: %" PRIu64 ", entries: %" PRIu64,
@@ -501,7 +501,7 @@ __cache_pool_adjust(WT_SESSION_IMPL *session,
reserved = cache->cp_reserved;
adjusted = 0;
- read_pressure = cache->cp_current_evict / highest;
+ read_pressure = cache->cp_current_read / highest;
WT_RET(__wt_verbose(session, WT_VERB_SHARED_CACHE,
"\t%" PRIu64 ", %" PRIu64 ", %" PRIu32,
entry->cache_size, read_pressure, cache->cp_skip_count));
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 01f08aa5f07..a5bd8e1343c 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -32,14 +32,14 @@ __sweep(WT_SESSION_IMPL *session)
dhandle_next = SLIST_NEXT(dhandle, l);
if (WT_IS_METADATA(dhandle))
continue;
- if (dhandle->session_inuse == 0 && dhandle->timeofdeath == 0) {
+ if (dhandle->session_inuse != 0 ||
+ now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT)
+ continue;
+ if (dhandle->timeofdeath == 0) {
dhandle->timeofdeath = now;
WT_STAT_FAST_CONN_INCR(session, dh_conn_tod);
continue;
}
- if (dhandle->session_inuse != 0 ||
- now <= dhandle->timeofdeath + WT_DHANDLE_SWEEP_WAIT)
- continue;
/*
* We have a candidate for closing; if it's open, acquire an
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index 553acc988f0..5aa85872a3b 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -90,33 +90,43 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
WT_ERR(__wt_evict(session, ref, 1));
break;
case WT_SYNC_DISCARD:
- case WT_SYNC_DISCARD_FORCE:
/*
- * Discard the page, whether clean or dirty.
- *
- * Clean the page, both to keep statistics correct, and
- * to let the page-discard function assert no dirty page
- * is ever discarded.
+ * Ordinary discard of the page, whether clean or dirty.
+ * If we see a dirty page in an ordinary discard (e.g.,
+ * from sweep), give up: an update must have happened
+ * since the file was selected for sweeping.
*/
- if (__wt_page_is_modified(page)) {
- page->modify->write_gen = 0;
- __wt_cache_dirty_decr(session, page);
- }
+ if (__wt_page_is_modified(page))
+ WT_ERR(EBUSY);
+
/*
* If the page contains an update that is too recent to
* evict, stop. This should never happen during
- * connection close, and in other paths our caller
+ * connection close, but in other paths our caller
* should be prepared to deal with this case.
*/
- if (syncop == WT_SYNC_DISCARD &&
- page->modify != NULL &&
+ if (page->modify != NULL &&
!__wt_txn_visible_all(session,
page->modify->rec_max_txn))
WT_ERR(EBUSY);
- if (syncop == WT_SYNC_DISCARD_FORCE)
- F_SET(session, WT_SESSION_DISCARD_FORCE);
- __wt_rec_page_clean_update(session, ref);
+ __wt_evict_page_clean_update(session, ref);
+ break;
+ case WT_SYNC_DISCARD_FORCE:
+ /*
+ * Forced discard of the page, whether clean or dirty.
+ * If we see a dirty page in a forced discard, clean
+ * the page, both to keep statistics correct, and to
+ * let the page-discard function assert no dirty page
+ * is ever discarded.
+ */
+ if (__wt_page_is_modified(page)) {
+ page->modify->write_gen = 0;
+ __wt_cache_dirty_decr(session, page);
+ }
+
+ F_SET(session, WT_SESSION_DISCARD_FORCE);
+ __wt_evict_page_clean_update(session, ref);
F_CLR(session, WT_SESSION_DISCARD_FORCE);
break;
WT_ILLEGAL_VALUE_ERR(session);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 384ec9be5b3..6e7d3e9c6cd 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -205,11 +205,10 @@ __evict_server(void *arg)
"cache server: exiting with %" PRIu64 " pages in "
"memory and %" PRIu64 " pages evicted",
cache->pages_inmem, cache->pages_evict);
- if (cache->bytes_inmem != cache->bytes_evict)
+ if (cache->bytes_inmem != 0)
__wt_errx(session,
- "cache server: exiting with %" PRIu64 " bytes in "
- "memory and %" PRIu64 " bytes evicted",
- cache->bytes_inmem, cache->bytes_evict);
+ "cache server: exiting with %" PRIu64 " bytes in memory",
+ cache->bytes_inmem);
if (cache->bytes_dirty != 0 || cache->pages_dirty != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index 5bbf3b891f7..0cff584f2ab 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -98,7 +98,7 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
if (__wt_ref_is_root(ref))
__wt_ref_out(session, ref);
else
- __wt_rec_page_clean_update(session, ref);
+ __wt_evict_page_clean_update(session, ref);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_clean);
WT_STAT_FAST_DATA_INCR(session, cache_eviction_clean);
@@ -139,11 +139,11 @@ done: session->excl_next = 0;
}
/*
- * __wt_rec_page_clean_update --
+ * __wt_evict_page_clean_update --
* Update a clean page's reference on eviction.
*/
void
-__wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref)
{
/*
* Discard the page and update the reference structure; if the page has
@@ -327,6 +327,7 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
uint32_t flags;
btree = S2BT(session);
+ flags = WT_EVICTING;
/*
* Get exclusive access to the page if our caller doesn't have the tree
@@ -472,7 +473,6 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
* they are not expected to split).
*/
if (__wt_page_is_modified(page)) {
- flags = WT_EVICTING;
if (exclusive)
LF_SET(WT_SKIP_UPDATE_ERR);
else if (top && !WT_PAGE_IS_INTERNAL(page) &&
@@ -482,17 +482,18 @@ __evict_review(WT_SESSION_IMPL *session, WT_REF *ref,
WT_ASSERT(session,
!__wt_page_is_modified(page) ||
LF_ISSET(WT_SKIP_UPDATE_RESTORE));
- } else {
- /*
- * If the page was ever modified, make sure all of the updates
- * on the page are old enough they can be discarded from cache.
- */
- if (!exclusive && mod != NULL &&
- !__wt_txn_visible_all(session, mod->rec_max_txn))
- return (EBUSY);
}
/*
+ * If the page was ever modified, make sure all of the updates
+ * on the page are old enough they can be discarded from cache.
+ */
+ if (!exclusive && mod != NULL &&
+ !__wt_txn_visible_all(session, mod->rec_max_txn) &&
+ !LF_ISSET(WT_SKIP_UPDATE_RESTORE))
+ return (EBUSY);
+
+ /*
* Repeat the test: fail if any page in the top-level page's subtree
* won't be merged into its parent.
*/
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index aea9ee98742..ef6f9b40414 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -192,7 +192,7 @@ struct __wt_page_modify {
uint64_t inmem_split_txn;
/* Dirty bytes added to the cache. */
- uint64_t bytes_dirty;
+ size_t bytes_dirty;
/*
* When pages are reconciled, the result is one or more replacement
@@ -532,7 +532,7 @@ struct __wt_page {
#define WT_READGEN_STEP 100
uint64_t read_gen;
- uint64_t memory_footprint; /* Memory attached to the page */
+ size_t memory_footprint; /* Memory attached to the page */
#define WT_PAGE_IS_INTERNAL(page) \
((page)->type == WT_PAGE_COL_INT || (page)->type == WT_PAGE_ROW_INT)
@@ -759,11 +759,11 @@ struct __wt_col {
* with RLE counts greater than 1 when reading the page. We can do a binary
* search in this array, then an offset calculation to find the cell.
*/
-struct __wt_col_rle {
+WT_PACKED_STRUCT_BEGIN(__wt_col_rle)
uint64_t recno; /* Record number of first repeat. */
uint64_t rle; /* Repeat count. */
uint32_t indx; /* Slot of entry in col_var.d */
-} WT_GCC_ATTRIBUTE((packed));
+WT_PACKED_STRUCT_END
/*
* WT_COL_PTR, WT_COL_PTR_SET --
@@ -827,7 +827,7 @@ struct __wt_ikey {
* is done for an entry, WT_UPDATE structures are formed into a forward-linked
* list.
*/
-struct __wt_update {
+WT_PACKED_STRUCT_BEGIN(__wt_update)
uint64_t txnid; /* update transaction */
WT_UPDATE *next; /* forward-linked list */
@@ -846,7 +846,7 @@ struct __wt_update {
/* The untyped value immediately follows the WT_UPDATE structure. */
#define WT_UPDATE_DATA(upd) \
((void *)((uint8_t *)(upd) + sizeof(WT_UPDATE)))
-} WT_GCC_ATTRIBUTE((packed));
+};
/*
* WT_INSERT --
@@ -1004,11 +1004,18 @@ struct __wt_insert_head {
* already have a split generation, leave it alone. If our caller is examining
* an index, we don't want the oldest split generation to move forward and
* potentially free it.
+ *
+ * Check that we haven't raced with a split_gen update after publishing: we
+ * rely on the published value not being missed when scanning for the oldest
+ * active split_gen.
*/
#define WT_ENTER_PAGE_INDEX(session) do { \
uint64_t __prev_split_gen = (session)->split_gen; \
if (__prev_split_gen == 0) \
- WT_PUBLISH((session)->split_gen, S2C(session)->split_gen)
+ do { \
+ WT_PUBLISH((session)->split_gen, \
+ S2C(session)->split_gen); \
+ } while ((session)->split_gen != S2C(session)->split_gen)
#define WT_LEAVE_PAGE_INDEX(session) \
if (__prev_split_gen == 0) \
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index fa01dd5edc2..dd3acf6940d 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -98,14 +98,21 @@ struct __wt_btree {
CKSUM_UNCOMPRESSED=3 /* Uncompressed blocks only */
} checksum; /* Checksum configuration */
- u_int dictionary; /* Reconcile: dictionary slots */
- int internal_key_truncate; /* Reconcile: internal key truncate */
- int maximum_depth; /* Reconcile: maximum tree depth */
- int prefix_compression; /* Reconcile: prefix compression */
- u_int prefix_compression_min; /* Reconcile: prefix compression min */
- int split_pct; /* Reconcile: split page percent */
- WT_COMPRESSOR *compressor; /* Reconcile: page compressor */
- WT_RWLOCK *ovfl_lock; /* Reconcile: overflow lock */
+ /*
+ * Reconciliation...
+ */
+ u_int dictionary; /* Dictionary slots */
+ int internal_key_truncate; /* Internal key truncate */
+ int maximum_depth; /* Maximum tree depth */
+ int prefix_compression; /* Prefix compression */
+ u_int prefix_compression_min; /* Prefix compression min */
+#define WT_SPLIT_DEEPEN_MIN_CHILD_DEF 10000
+ u_int split_deepen_min_child; /* Minimum entries to deepen tree */
+#define WT_SPLIT_DEEPEN_PER_CHILD_DEF 100
+ u_int split_deepen_per_child; /* Entries per child when deepened */
+ int split_pct; /* Split page percent */
+ WT_COMPRESSOR *compressor; /* Page compressor */
+ WT_RWLOCK *ovfl_lock; /* Overflow lock */
uint64_t last_recno; /* Column-store last record number */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 0dffdc798af..a0cbb23f126 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -75,6 +75,52 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
#endif
/*
+ * __wt_cache_page_byte_dirty_decr --
+ * Decrement the page's dirty byte count, guarding from underflow.
+ */
+static inline void
+__wt_cache_page_byte_dirty_decr(
+ WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
+{
+ WT_CACHE *cache;
+ size_t decr, orig;
+ int i;
+
+ cache = S2C(session)->cache;
+
+ /*
+ * We don't have exclusive access and there are ways of decrementing the
+ * page's dirty byte count by a too-large value. For example:
+ * T1: __wt_cache_page_inmem_incr(page, size)
+ * page is clean, don't increment dirty byte count
+ * T2: mark page dirty
+ * T1: __wt_cache_page_inmem_decr(page, size)
+ * page is dirty, decrement dirty byte count
+ * and, of course, the reverse where the page is dirty at the increment
+ * and clean at the decrement.
+ *
+ * The page's dirty-byte value always reflects bytes represented in the
+ * cache's dirty-byte count, decrement the page/cache as much as we can
+ * without underflow. If we can't decrement the dirty byte counts after
+ * few tries, give up: the cache's value will be wrong, but consistent,
+ * and we'll fix it the next time this page is marked clean, or evicted.
+ */
+ for (i = 0; i < 5; ++i) {
+ /*
+ * Take care to read the dirty-byte count only once in case
+ * we're racing with updates.
+ */
+ orig = page->modify->bytes_dirty;
+ decr = WT_MIN(size, orig);
+ if (WT_ATOMIC_CAS8(
+ page->modify->bytes_dirty, orig, orig - decr)) {
+ WT_CACHE_DECR(session, cache->bytes_dirty, decr);
+ break;
+ }
+ }
+}
+
+/*
* __wt_cache_page_inmem_decr --
* Decrement a page's memory footprint in the cache.
*/
@@ -87,17 +133,16 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
- WT_CACHE_DECR(session, page->memory_footprint, size);
WT_CACHE_DECR(session, cache->bytes_inmem, size);
- if (__wt_page_is_modified(page)) {
- WT_CACHE_DECR(session, cache->bytes_dirty, size);
- WT_CACHE_DECR(session, page->modify->bytes_dirty, size);
- }
+ WT_CACHE_DECR(session, page->memory_footprint, size);
+ if (__wt_page_is_modified(page))
+ __wt_cache_page_byte_dirty_decr(session, page, size);
}
/*
* __wt_cache_dirty_incr --
- * Increment the cache dirty page/byte counts.
+ * Page switch from clean to dirty: increment the cache dirty page/byte
+ * counts.
*/
static inline void
__wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
@@ -119,42 +164,29 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
/*
* __wt_cache_dirty_decr --
- * Decrement the cache dirty page/byte counts.
+ * Page switch from dirty to clean: decrement the cache dirty page/byte
+ * counts.
*/
static inline void
__wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
- size_t size;
+ WT_PAGE_MODIFY *modify;
cache = S2C(session)->cache;
if (cache->pages_dirty < 1) {
- (void)__wt_errx(session,
- "cache dirty decrement failed: cache dirty page count went "
- "negative");
+ __wt_errx(session,
+ "cache eviction dirty-page decrement failed: dirty page"
+ "count went negative");
cache->pages_dirty = 0;
} else
(void)WT_ATOMIC_SUB8(cache->pages_dirty, 1);
- /*
- * It is possible to decrement the footprint of the page without making
- * the page dirty (for example when freeing an obsolete update list),
- * so the footprint could change between read and decrement, and we
- * might attempt to decrement by a different amount than the bytes held
- * by the page.
- *
- * We catch that by maintaining a per-page dirty size, and fixing the
- * cache stats if that is non-zero when the page is discarded.
- *
- * Also take care that the global size doesn't go negative. This may
- * lead to small accounting errors (particularly on the last page of the
- * last file in a checkpoint), but that will come out in the wash when
- * the page is evicted.
- */
- size = WT_MIN(page->memory_footprint, cache->bytes_dirty);
- (void)WT_ATOMIC_SUB8(cache->bytes_dirty, size);
- (void)WT_ATOMIC_SUB8(page->modify->bytes_dirty, size);
+ modify = page->modify;
+ if (modify != NULL && modify->bytes_dirty != 0)
+ __wt_cache_page_byte_dirty_decr(
+ session, page, modify->bytes_dirty);
}
/*
@@ -165,23 +197,28 @@ static inline void
__wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
- WT_PAGE_MODIFY *mod;
+ WT_PAGE_MODIFY *modify;
cache = S2C(session)->cache;
- mod = page->modify;
-
- /*
- * In rare cases, we may race tracking a page's dirty footprint.
- * If so, we will get here with a non-zero dirty_size in the page, and
- * we can fix the global stats.
- */
- if (mod != NULL && mod->bytes_dirty != 0)
- (void)WT_ATOMIC_SUB8(cache->bytes_dirty, mod->bytes_dirty);
+ modify = page->modify;
+
+ /* Update the bytes in-memory to reflect the eviction. */
+ WT_CACHE_DECR(session, cache->bytes_inmem, page->memory_footprint);
+
+ /* Update the cache's dirty-byte count. */
+ if (modify != NULL && modify->bytes_dirty != 0) {
+ if (cache->bytes_dirty < modify->bytes_dirty) {
+ __wt_errx(session,
+ "cache eviction dirty-bytes decrement failed: "
+ "dirty byte count went negative");
+ cache->bytes_dirty = 0;
+ } else
+ WT_CACHE_DECR(
+ session, cache->bytes_dirty, modify->bytes_dirty);
+ }
- WT_ASSERT(session, page->memory_footprint != 0);
+ /* Update pages and bytes evicted. */
(void)WT_ATOMIC_ADD8(cache->bytes_evict, page->memory_footprint);
- page->memory_footprint = 0;
-
(void)WT_ATOMIC_ADD8(cache->pages_evict, 1);
}
@@ -221,8 +258,7 @@ __wt_page_refp(WT_SESSION_IMPL *session,
WT_PAGE_INDEX *pindex;
uint32_t i;
- WT_ASSERT(session,
- WT_SESSION_TXN_STATE(session)->snap_min != WT_TXN_NONE);
+ WT_ASSERT(session, session->split_gen != 0);
/*
* Copy the parent page's index value: the page can split at any time,
@@ -894,11 +930,11 @@ __wt_ref_info(WT_SESSION_IMPL *session,
}
/*
- * __wt_page_release_busy --
- * Release a reference to a page, fail if busy during forced eviction.
+ * __wt_page_release_evict --
+ * Attempt to release and immediately evict a page.
*/
static inline int
-__wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
+__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_BTREE *btree;
WT_DECL_RET;
@@ -906,37 +942,8 @@ __wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
int locked, too_big;
btree = S2BT(session);
-
- /*
- * Discard our hazard pointer. Ignore pages we don't have and the root
- * page, which sticks in memory, regardless.
- */
- if (ref == NULL || __wt_ref_is_root(ref))
- return (0);
page = ref->page;
-
- too_big = (page->memory_footprint < btree->maxmempage) ? 0 : 1;
-
- /*
- * Attempt to evict pages with the special "oldest" read generation.
- *
- * This is set for pages that grow larger than the configured
- * memory_page_max setting, and when we are attempting to scan without
- * trashing the cache.
- *
- * Skip this if eviction is disabled for this operation or this tree,
- * or if there is no chance of eviction succeeding for dirty pages due
- * to a checkpoint or because we've already tried writing this page and
- * it contains an update that isn't stable. Also skip forced eviction
- * if we just did an in-memory split.
- */
- if (LF_ISSET(WT_READ_NO_EVICT) ||
- page->read_gen != WT_READGEN_OLDEST ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
- (__wt_page_is_modified(page) && (btree->checkpointing ||
- !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
- !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
- return (__wt_hazard_clear(session, page));
+ too_big = (page->memory_footprint > btree->maxmempage) ? 1 : 0;
/*
* Take some care with order of operations: if we release the hazard
@@ -945,8 +952,10 @@ __wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
*/
locked = WT_ATOMIC_CAS4(ref->state, WT_REF_MEM, WT_REF_LOCKED);
WT_TRET(__wt_hazard_clear(session, page));
- if (!locked)
+ if (!locked) {
+ WT_TRET(EBUSY);
return (ret);
+ }
(void)WT_ATOMIC_ADD4(btree->evict_busy, 1);
if ((ret = __wt_evict_page(session, ref)) == 0) {
@@ -970,12 +979,46 @@ __wt_page_release_busy(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
/*
* __wt_page_release --
- * Release a reference to a page.
+ * Release a reference to a page, fail if busy during forced eviction.
*/
static inline int
__wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
{
- WT_RET_BUSY_OK(__wt_page_release_busy(session, ref, flags));
+ WT_BTREE *btree;
+ WT_PAGE *page;
+
+ btree = S2BT(session);
+
+ /*
+ * Discard our hazard pointer. Ignore pages we don't have and the root
+ * page, which sticks in memory, regardless.
+ */
+ if (ref == NULL || __wt_ref_is_root(ref))
+ return (0);
+ page = ref->page;
+
+ /*
+ * Attempt to evict pages with the special "oldest" read generation.
+ *
+ * This is set for pages that grow larger than the configured
+ * memory_page_max setting, and when we are attempting to scan without
+ * trashing the cache.
+ *
+ * Skip this if eviction is disabled for this operation or this tree,
+ * or if there is no chance of eviction succeeding for dirty pages due
+ * to a checkpoint or because we've already tried writing this page and
+ * it contains an update that isn't stable. Also skip forced eviction
+ * if we just did an in-memory split.
+ */
+ if (LF_ISSET(WT_READ_NO_EVICT) ||
+ page->read_gen != WT_READGEN_OLDEST ||
+ F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
+ (__wt_page_is_modified(page) && (btree->checkpointing ||
+ !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
+ !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
+ return (__wt_hazard_clear(session, page));
+
+ WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index deccd676e26..9db0729fe3c 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -60,6 +60,7 @@ struct __wt_cache {
uint64_t pages_evict;
uint64_t bytes_dirty; /* Bytes/pages currently dirty */
uint64_t pages_dirty;
+ uint64_t bytes_read; /* Bytes read into memory */
uint64_t evict_max_page_size; /* Largest page seen at eviction */
@@ -102,8 +103,8 @@ struct __wt_cache {
/*
* Cache pool information.
*/
- uint64_t cp_saved_evict; /* Evict count from last pass */
- uint64_t cp_current_evict; /* Evict count from current pass */
+ uint64_t cp_saved_read; /* Read count from last pass */
+ uint64_t cp_current_read; /* Read count from current pass */
uint32_t cp_skip_count; /* Post change stabilization */
uint64_t cp_reserved; /* Base size for this cache */
WT_SESSION_IMPL *cp_session; /* May be used for cache management */
diff --git a/src/third_party/wiredtiger/src/include/cache.i b/src/third_party/wiredtiger/src/include/cache.i
index b1ace5e6a80..4bceb5c0d6c 100644
--- a/src/third_party/wiredtiger/src/include/cache.i
+++ b/src/third_party/wiredtiger/src/include/cache.i
@@ -62,7 +62,7 @@ __wt_cache_pages_inuse(WT_CACHE *cache)
static inline uint64_t
__wt_cache_bytes_inuse(WT_CACHE *cache)
{
- return (cache->bytes_inmem - cache->bytes_evict);
+ return (cache->bytes_inmem);
}
/*
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index ff34b014ecf..7b94a7ea94b 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -146,7 +146,7 @@ struct __wt_connection_impl {
WT_FH *lock_fh; /* Lock file handle */
- uint64_t split_gen; /* Generation number for splits */
+ volatile uint64_t split_gen; /* Generation number for splits */
/*
* The connection keeps a cache of data handles. The set of handles
diff --git a/src/third_party/wiredtiger/src/include/cursor.i b/src/third_party/wiredtiger/src/include/cursor.i
index 8fa9790e096..ad42f989bf4 100644
--- a/src/third_party/wiredtiger/src/include/cursor.i
+++ b/src/third_party/wiredtiger/src/include/cursor.i
@@ -164,8 +164,11 @@ __wt_cursor_dhandle_decr_use(WT_SESSION_IMPL *session)
dhandle = session->dhandle;
+ /* If we close a handle with a time of death set, clear it. */
WT_ASSERT(session, dhandle->session_inuse > 0);
- (void)WT_ATOMIC_SUB4(dhandle->session_inuse, 1);
+ if (WT_ATOMIC_SUB4(dhandle->session_inuse, 1) == 0 &&
+ dhandle->timeofdeath != 0)
+ dhandle->timeofdeath = 0;
}
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index ee9c27581c8..e47f4ba09c0 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -302,7 +302,7 @@ extern int __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_server);
extern int __wt_cache_wait(WT_SESSION_IMPL *session, int full);
extern void __wt_cache_dump(WT_SESSION_IMPL *session);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
-extern void __wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
+extern void __wt_evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
extern void __wt_log_written_reset(WT_SESSION_IMPL *session);
extern int __wt_log_get_all_files(WT_SESSION_IMPL *session, char ***filesp, u_int *countp, uint32_t *maxid, int active_only);
@@ -423,7 +423,7 @@ extern int __wt_meta_track_handle_lock(WT_SESSION_IMPL *session, int created);
extern int __wt_turtle_init(WT_SESSION_IMPL *session);
extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value);
-extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_ATTRIBUTE((noreturn));
+extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
extern int __wt_realloc_aligned(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
@@ -567,13 +567,13 @@ extern uint32_t __wt_cksum(const void *chunk, size_t len);
extern void __wt_cksum_init(void);
extern void __wt_event_handler_set(WT_SESSION_IMPL *session, WT_EVENT_HANDLER *handler);
extern int __wt_eventv(WT_SESSION_IMPL *session, int msg_event, int error, const char *file_name, int line_number, const char *fmt, va_list ap);
-extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
-extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 2, 3)));
-extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
-extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 2, 3)));
-extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
+extern void __wt_err(WT_SESSION_IMPL *session, int error, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern void __wt_errx(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
+extern int __wt_ext_err_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3)));
+extern int __wt_ext_msg_printf( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int __wt_progress(WT_SESSION_IMPL *session, const char *s, uint64_t v);
-extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 5, 6)));
+extern void __wt_assert(WT_SESSION_IMPL *session, int error, const char *file_name, int line_number, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 5, 6)));
extern int __wt_panic(WT_SESSION_IMPL *session);
extern int __wt_illegal_value(WT_SESSION_IMPL *session, const char *name);
extern int __wt_object_unsupported(WT_SESSION_IMPL *session, const char *uri);
@@ -616,8 +616,8 @@ extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
extern void __wt_random_init(uint32_t *rnd);
extern uint32_t __wt_random(uint32_t *rnd);
extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size);
-extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
-extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_ATTRIBUTE((format (printf, 3, 4)));
+extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
+extern int __wt_buf_catfmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
extern int
__wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp
#ifdef HAVE_DIAGNOSTIC
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index 7b606ca80b1..805838eb84b 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -7,7 +7,12 @@
*/
/* Add GCC-specific attributes to types and function declarations. */
-#define WT_GCC_ATTRIBUTE(x) __attribute__(x)
+#define WT_COMPILER_TYPE_ALIGN(x) __attribute__((aligned(x)))
+
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ struct __attribute__ ((__packed__)) name {
+#define WT_PACKED_STRUCT_END \
+ };
/*
* Attribute are only permitted on function declarations, not definitions.
@@ -15,6 +20,7 @@
* dist/s_prototypes to create extern.h.
*/
#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x) __attribute__(x)
/*
* Atomic writes:
diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h
index 9c560339e03..5668abc6dab 100644
--- a/src/third_party/wiredtiger/src/include/lint.h
+++ b/src/third_party/wiredtiger/src/include/lint.h
@@ -6,8 +6,15 @@
* See the file LICENSE for redistribution information.
*/
-#define WT_GCC_ATTRIBUTE(x)
+#define WT_COMPILER_TYPE_ALIGN(x)
+
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ struct name {
+#define WT_PACKED_STRUCT_END \
+ };
+
#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
#define __WT_ATOMIC_ADD(v, val) \
((v) += (val))
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index f88a5381227..82d90070609 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -70,7 +70,7 @@
#define WT_LOG_SLOT_FREE 1
#define WT_LOG_SLOT_PENDING 2
#define WT_LOG_SLOT_READY 3
-typedef struct {
+typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
int64_t slot_state; /* Slot state */
uint64_t slot_group_size; /* Group size */
int32_t slot_error; /* Error value */
@@ -90,7 +90,7 @@ typedef struct {
#define SLOT_SYNC 0x08 /* Needs sync on release */
#define SLOT_SYNC_DIR 0x10 /* Directory sync on release */
uint32_t flags; /* Flags */
-} WT_LOGSLOT WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+} WT_LOGSLOT;
typedef struct {
WT_LOGSLOT *slot;
diff --git a/src/third_party/wiredtiger/src/include/lsm.h b/src/third_party/wiredtiger/src/include/lsm.h
index 8038e5a34ab..8d50f3ea73b 100644
--- a/src/third_party/wiredtiger/src/include/lsm.h
+++ b/src/third_party/wiredtiger/src/include/lsm.h
@@ -73,7 +73,7 @@ struct __wt_cursor_lsm {
* WT_LSM_CHUNK --
* A single chunk (file) in an LSM tree.
*/
-struct __wt_lsm_chunk {
+struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_lsm_chunk {
const char *uri; /* Data source for this chunk */
const char *bloom_uri; /* URI of Bloom filter, if any */
struct timespec create_ts; /* Creation time (for rate limiting) */
@@ -101,7 +101,7 @@ struct __wt_lsm_chunk {
#define WT_LSM_CHUNK_ONDISK 0x04
#define WT_LSM_CHUNK_STABLE 0x08
uint32_t flags;
-} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+};
/*
* Different types of work units. Used by LSM worker threads to choose which
diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h
index f176a40f2bf..3ec74b2d629 100644
--- a/src/third_party/wiredtiger/src/include/msvc.h
+++ b/src/third_party/wiredtiger/src/include/msvc.h
@@ -13,8 +13,21 @@
#define inline __inline
-#define WT_GCC_ATTRIBUTE(x)
+/*
+ * Add MSVC-specific attributes and pragmas to types and function declarations.
+ */
+#define WT_COMPILER_TYPE_ALIGN(x) __declspec(align(x))
+
+#define WT_PACKED_STRUCT_BEGIN(name) \
+ __pragma(pack(push,1)) \
+ struct name {
+
+#define WT_PACKED_STRUCT_END \
+ }; \
+ __pragma(pack(pop))
+
#define WT_GCC_FUNC_ATTRIBUTE(x)
+#define WT_GCC_FUNC_DECL_ATTRIBUTE(x)
#define __WT_ATOMIC_ADD(v, val, n, s, t) \
(WT_STATIC_ASSERT(sizeof(v) == (n)), \
diff --git a/src/third_party/wiredtiger/src/include/mutex.h b/src/third_party/wiredtiger/src/include/mutex.h
index c5b7587303d..07aa740c525 100644
--- a/src/third_party/wiredtiger/src/include/mutex.h
+++ b/src/third_party/wiredtiger/src/include/mutex.h
@@ -68,15 +68,15 @@ struct __wt_rwlock {
#if SPINLOCK_TYPE == SPINLOCK_GCC
-typedef volatile int
- WT_SPINLOCK WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+typedef volatile int WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT)
+ WT_SPINLOCK;
#elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||\
SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE ||\
SPINLOCK_TYPE == SPINLOCK_MSVC ||\
SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_LOGGING
-typedef struct {
+typedef WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) struct {
wt_mutex_t lock;
uint64_t counter; /* Statistics: counter */
@@ -85,7 +85,7 @@ typedef struct {
int8_t id; /* Statistics: current holder ID */
int8_t initialized; /* Lock initialized, for cleanup */
-} WT_SPINLOCK WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+} WT_SPINLOCK;
#else
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index fb610383a75..b42b792f5a7 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -245,6 +245,9 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
*/
__wt_cache_page_inmem_incr(session, page, upd_size);
+ /* Mark the page dirty after updating the footprint. */
+ __wt_page_modify_set(session, page);
+
/*
* If there are subsequent WT_UPDATE structures, we're evicting pages
* and the page-scanning mutex isn't held, discard obsolete WT_UPDATE
@@ -264,8 +267,5 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
__wt_update_obsolete_free(session, page, obsolete);
}
- /* Mark the page dirty after updating the footprint. */
- __wt_page_modify_set(session, page);
-
return (0);
}
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index c2ed3473dfb..909f1daf5a4 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -42,7 +42,7 @@ struct __wt_hazard {
* WT_SESSION_IMPL --
* Implementation of WT_SESSION.
*/
-struct __wt_session_impl {
+struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
WT_SESSION iface;
void *lang_private; /* Language specific private storage */
@@ -190,4 +190,4 @@ struct __wt_session_impl {
uint32_t hazard_size; /* Allocated slots in hazard array. */
uint32_t nhazard; /* Count of active hazard pointers */
WT_HAZARD *hazard; /* Hazard pointer array */
-} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+};
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 8380e55effb..c1c4703316b 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -25,10 +25,10 @@
#define WT_SESSION_TXN_STATE(s) (&S2C(s)->txn_global.states[(s)->id])
-struct __wt_txn_state {
+struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_txn_state {
volatile uint64_t id;
volatile uint64_t snap_min;
-} WT_GCC_ATTRIBUTE((aligned(WT_CACHE_LINE_ALIGNMENT)));
+};
struct __wt_txn_global {
volatile uint64_t current; /* Current transaction ID. */
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 1b3a9b62626..576827bebcd 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -36,7 +36,9 @@ extern "C" {
#include <io.h>
#endif
#include <limits.h>
-#ifndef _WIN32
+#ifdef _WIN32
+#include <process.h>
+#else
#include <pthread.h>
#endif
#ifdef HAVE_PTHREAD_NP_H
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_track.c b/src/third_party/wiredtiger/src/reconcile/rec_track.c
index c5c72391248..2533ad9e201 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_track.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_track.c
@@ -335,12 +335,12 @@ __ovfl_reuse_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
* fixing up skiplist links.
*/
for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
- for (e = &head[i]; *e != NULL;) {
- if (F_ISSET(*e, WT_OVFL_REUSE_INUSE)) {
- e = &(*e)->next[i];
+ for (e = &head[i]; (reuse = *e) != NULL;) {
+ if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
+ e = &reuse->next[i];
continue;
}
- *e = (*e)->next[i];
+ *e = reuse->next[i];
}
/*
@@ -359,19 +359,20 @@ __ovfl_reuse_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
if (F_ISSET(reuse, WT_OVFL_REUSE_INUSE)) {
F_CLR(reuse,
WT_OVFL_REUSE_INUSE | WT_OVFL_REUSE_JUST_ADDED);
- e = &(*e)->next[0];
+ e = &reuse->next[0];
continue;
}
- *e = (*e)->next[0];
+ *e = reuse->next[0];
WT_ASSERT(session, !F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED));
- decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
WT_RET(
__ovfl_reuse_verbose(session, page, reuse, "free"));
+
WT_RET(bm->free(
bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
+ decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
__wt_free(session, reuse);
}
@@ -404,12 +405,12 @@ __ovfl_reuse_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
* fixing up skiplist links.
*/
for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
- for (e = &head[i]; *e != NULL;) {
- if (!F_ISSET(*e, WT_OVFL_REUSE_JUST_ADDED)) {
- e = &(*e)->next[i];
+ for (e = &head[i]; (reuse = *e) != NULL;) {
+ if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
+ e = &reuse->next[i];
continue;
}
- *e = (*e)->next[i];
+ *e = reuse->next[i];
}
/*
@@ -420,17 +421,17 @@ __ovfl_reuse_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page)
for (e = &head[0]; (reuse = *e) != NULL;) {
if (!F_ISSET(reuse, WT_OVFL_REUSE_JUST_ADDED)) {
F_CLR(reuse, WT_OVFL_REUSE_INUSE);
- e = &(*e)->next[0];
+ e = &reuse->next[0];
continue;
}
- *e = (*e)->next[0];
+ *e = reuse->next[0];
if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
WT_RET(
__ovfl_reuse_verbose(session, page, reuse, "free"));
+
WT_TRET(bm->free(
bm, session, WT_OVFL_REUSE_ADDR(reuse), reuse->addr_size));
-
decr += WT_OVFL_SIZE(reuse, WT_OVFL_REUSE);
__wt_free(session, reuse);
}
@@ -722,26 +723,26 @@ __ovfl_txnc_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page)
for (i = WT_SKIP_MAXDEPTH - 1; i > 0; --i)
for (e = &head[i]; (txnc = *e) != NULL;) {
if (TXNID_LE(oldest_txn, txnc->current)) {
- e = &(*e)->next[i];
+ e = &txnc->next[i];
continue;
}
- *e = (*e)->next[i];
+ *e = txnc->next[i];
}
/* Second, discard any no longer needed transaction-cache records. */
decr = 0;
for (e = &head[0]; (txnc = *e) != NULL;) {
if (TXNID_LE(oldest_txn, txnc->current)) {
- e = &(*e)->next[0];
+ e = &txnc->next[0];
continue;
}
- *e = (*e)->next[0];
-
- decr += WT_OVFL_SIZE(txnc, WT_OVFL_TXNC);
+ *e = txnc->next[0];
if (WT_VERBOSE_ISSET(session, WT_VERB_OVERFLOW))
WT_RET(
__ovfl_txnc_verbose(session, page, txnc, "free"));
+
+ decr += WT_OVFL_SIZE(txnc, WT_OVFL_TXNC);
__wt_free(session, txnc);
}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 0300596f90b..be66309c77f 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -440,8 +440,11 @@ __wt_reconcile(WT_SESSION_IMPL *session,
* Root pages are special, splits have to be done, we can't put it off
* as the parent's problem any more.
*/
- if (__wt_ref_is_root(ref))
- return (__rec_root_write(session, page, flags));
+ if (__wt_ref_is_root(ref)) {
+ WT_WITH_PAGE_INDEX(session,
+ ret = __rec_root_write(session, page, flags));
+ return (ret);
+ }
/*
* Otherwise, mark the page's parent dirty.
@@ -504,6 +507,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
WT_ILLEGAL_VALUE(session);
}
+ WT_ASSERT(session, session->split_gen != 0);
pindex = WT_INTL_INDEX_COPY(next);
for (i = 0; i < mod->mod_multi_entries; ++i) {
WT_ERR(__wt_multi_to_ref(session,
@@ -2895,7 +2899,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_RET_MSG(session, EINVAL,
"bulk-load is only possible for newly created trees");
- /* Set a reference to the empty leaf page. */
+ /* Get a reference to the empty leaf page. */
pindex = WT_INTL_INDEX_COPY(btree->root.page);
cbulk->ref = pindex->index[0];
cbulk->leaf = cbulk->ref->page;
@@ -4005,7 +4009,6 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_ERR(__rec_child_modify(session, r, ref, &hazard, &state));
addr = ref->addr;
child = ref->page;
- vtype = 0;
/* Deleted child we don't have to write. */
if (state == WT_CHILD_IGNORE) {
@@ -4023,10 +4026,6 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
continue;
}
- /* Deleted child requiring a proxy cell. */
- if (state == WT_CHILD_PROXY)
- vtype = WT_CELL_ADDR_DEL;
-
/*
* Modified child. Empty pages are merged into the parent and
* discarded.
@@ -4076,22 +4075,22 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
/*
* Build the value cell, the child page's address. Addr points
- * to an on-page cell or an off-page WT_ADDR structure. The
- * cell type has been set in the case of page deletion requiring
+ * to an on-page cell or an off-page WT_ADDR structure. There's
+ * a special cell type in the case of page deletion requiring
* a proxy cell, otherwise use the information from the addr or
* original cell.
*/
if (__wt_off_page(page, addr)) {
p = addr->addr;
size = addr->size;
- if (vtype == 0)
- vtype = __rec_vtype(addr);
+ vtype = state == WT_CHILD_PROXY ?
+ WT_CELL_ADDR_DEL : __rec_vtype(addr);
} else {
__wt_cell_unpack(ref->addr, vpack);
p = vpack->data;
size = vpack->size;
- if (vtype == 0)
- vtype = vpack->raw;
+ vtype = state == WT_CHILD_PROXY ?
+ WT_CELL_ADDR_DEL : (u_int)vpack->raw;
}
__rec_cell_build_addr(r, p, size, vtype, 0);
CHILD_RELEASE_ERR(session, hazard, ref);