summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatt Kangas <matt.kangas@mongodb.com>2014-12-02 09:02:22 -0500
committerMatt Kangas <matt.kangas@mongodb.com>2014-12-02 14:44:03 -0500
commite2a08e2cd6fb0ebd226c22cae6ba73425613e477 (patch)
tree9d935b6d12d9444138676ae444d3e35f516b2e92
parent15a2b7ca39ce472e43d8d37c6127eb4c3958456c (diff)
downloadmongo-e2a08e2cd6fb0ebd226c22cae6ba73425613e477.tar.gz
Import wiredtiger-wiredtiger-2.4.0-494-ge498f55.tar.gz from wiredtiger branch mongodb-2.8
-rw-r--r--src/third_party/wiredtiger/SConstruct10
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf8
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf12
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf11
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm-noprefix.wtperf2
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf2
-rwxr-xr-xsrc/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh33
-rw-r--r--src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i2
-rw-r--r--src/third_party/wiredtiger/dist/stat.py13
-rw-r--r--src/third_party/wiredtiger/dist/stat_data.py80
-rw-r--r--src/third_party/wiredtiger/src/block/block_ext.c2
-rw-r--r--src/third_party/wiredtiger/src/block/block_vrfy.c26
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curnext.c49
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_curprev.c65
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_evict.c5
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_handle.c8
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_slvg.c10
-rw-r--r--src/third_party/wiredtiger/src/btree/bt_sync.c17
-rw-r--r--src/third_party/wiredtiger/src/btree/rec_evict.c84
-rw-r--r--src/third_party/wiredtiger/src/btree/rec_split.c588
-rw-r--r--src/third_party/wiredtiger/src/btree/rec_write.c6
-rw-r--r--src/third_party/wiredtiger/src/cursor/cur_file.c3
-rw-r--r--src/third_party/wiredtiger/src/include/btmem.h6
-rw-r--r--src/third_party/wiredtiger/src/include/btree.i6
-rw-r--r--src/third_party/wiredtiger/src/include/extern.h9
-rw-r--r--src/third_party/wiredtiger/src/include/stat.h3
-rw-r--r--src/third_party/wiredtiger/src/include/wiredtiger.in288
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_manager.c38
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_tree.c17
-rw-r--r--src/third_party/wiredtiger/src/lsm/lsm_work_unit.c43
-rw-r--r--src/third_party/wiredtiger/src/support/stat.c44
-rw-r--r--src/third_party/wiredtiger/src/txn/txn_ckpt.c17
-rw-r--r--src/third_party/wiredtiger/tools/stat_data.py16
-rw-r--r--src/third_party/wiredtiger/tools/wtstats.py123
34 files changed, 1148 insertions, 498 deletions
diff --git a/src/third_party/wiredtiger/SConstruct b/src/third_party/wiredtiger/SConstruct
index 1ec3496d565..248e967872e 100644
--- a/src/third_party/wiredtiger/SConstruct
+++ b/src/third_party/wiredtiger/SConstruct
@@ -199,11 +199,19 @@ env.Program("wt", [
LIBS=[wtlib] + wtlibs)
if GetOption("swig"):
- env.SharedLibrary('_wiredtiger',
+ swiglib = env.SharedLibrary('_wiredtiger',
[ 'lang\python\wiredtiger.i'],
SHLIBSUFFIX=".pyd",
LIBS=[wtlib])
+ copySwig = env.Command(
+ 'lang/python/wiredtiger/__init__.py',
+ 'lang/python/wiredtiger.py',
+ Copy('$TARGET', '$SOURCE'))
+ env.Depends(copySwig, swiglib)
+
+ env.Install('lang/python/wiredtiger/', swiglib)
+
# Shim library of functions to emulate POSIX on Windows
shim = env.Library("window_shim",
["test/windows/windows_shim.c"])
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf
new file mode 100644
index 00000000000..9d0a78e3c61
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/log-append.wtperf
@@ -0,0 +1,8 @@
+# wtperf options file: Test a log file with a multi-threaded
+# append workload.
+conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)"
+table_config="type=file"
+icount=50000000
+report_interval=5
+run_time=0
+populate_threads=8
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf
new file mode 100644
index 00000000000..a078cead740
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/log-nockpt.wtperf
@@ -0,0 +1,12 @@
+# wtperf options file: Test performance with a log file enabled.
+# Set the log file reasonably small to catch log-swtich bottle
+# necks.
+conn_config="cache_size=1G,log=(enabled=true,file_max=20MB)"
+table_config="type=file"
+icount=50000
+report_interval=5
+run_time=40
+populate_threads=1
+random_range=50000000
+threads=((count=8,inserts=1))
+
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf
new file mode 100644
index 00000000000..c336c9d8a5f
--- /dev/null
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/log.wtperf
@@ -0,0 +1,11 @@
+# wtperf options file: Test performance with a log file enabled.
+# Set the log file reasonably small to catch log-swtich bottle
+# necks.
+conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)"
+table_config="type=file"
+icount=50000
+report_interval=5
+run_time=120
+populate_threads=1
+random_range=50000000
+threads=((count=8,inserts=1))
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm-noprefix.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm-noprefix.wtperf
index 413c16075d3..091c4e69ad1 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm-noprefix.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm-noprefix.wtperf
@@ -1,6 +1,6 @@
# wtperf options file: medium lsm configuration, with multiple tables.
conn_config="cache_size=1G,lsm_manager=(worker_thread_max=8)"
-table_config="lsm=(chunk_size=100MB,chunk_max=1TB),type=lsm,prefix_compression=false,os_cache_dirty_max=16MB"
+table_config="lsm=(chunk_max=1TB),type=lsm,prefix_compression=false,os_cache_dirty_max=16MB"
icount=50000000
populate_threads=1
compact=true
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf
index 99b7b49aebd..d8433352311 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/medium-multi-lsm.wtperf
@@ -1,6 +1,6 @@
# wtperf options file: medium lsm configuration, with multiple tables.
conn_config="cache_size=1G,lsm_manager=(worker_thread_max=8)"
-table_config="lsm=(chunk_size=100MB,chunk_max=1TB),type=lsm,os_cache_dirty_max=16MB"
+table_config="lsm=(chunk_max=1TB),type=lsm,os_cache_dirty_max=16MB"
icount=50000000
populate_threads=1
compact=true
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh
index 3296a4072b5..d5de7c4abdb 100755
--- a/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/wtperf_run.sh
@@ -10,19 +10,20 @@
# build_posix/bench/wtperf).
#
# This script should be invoked with the pathname of the wtperf test
-# config to run.
+# config to run and the number of runs.
#
-if test "$#" -ne "1"; then
- echo "Must specify wtperf test to run"
+if test "$#" -ne "2"; then
+ echo "Must specify wtperf test to run and number of runs"
exit 1
fi
wttest=$1
+runmax=$2
+
home=./WT_TEST
outfile=./wtperf.out
rm -f $outfile
-runmax=5
-run=1
+# Each of these has an entry for each op in ops below.
avg=(0 0 0)
max=(0 0 0)
min=(0 0 0)
@@ -72,6 +73,7 @@ isstable()
getmin=0
getmax=1
+run=1
while test "$run" -le "$runmax"; do
rm -rf $home
mkdir $home
@@ -144,21 +146,36 @@ while test "$run" -le "$runmax"; do
run=`expr $run + 1`
done
-if test "$run" -le "$runmax"; then
+skipminmax=0
+if test "$runmax" -le "2"; then
+ numruns=$(getval $getmin $run $runmax)
+ skipminmax=1
+elif test "$run" -le "$runmax"; then
numruns=`expr $run - 2`
else
numruns=`expr $runmax - 2`
fi
+if test "$numruns" -eq "0"; then
+ $numruns=1
+fi
#
# The sum contains all runs. Subtract out the min/max values.
# Average the remaining and write it out to the file.
#
for i in ${!min[*]}; do
if test "$i" -eq "$loadindex"; then
- s=`echo "scale=3; ${sum[$i]} - ${min[$i]} - ${max[$i]}" | bc`
+ if test "$skipminmax" -eq "0"; then
+ s=`echo "scale=3; ${sum[$i]} - ${min[$i]} - ${max[$i]}" | bc`
+ else
+ s=${sum[$i]}
+ fi
avg[$i]=`echo "scale=3; $s / $numruns" | bc`
else
- s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}`
+ if test "$skipminmax" -eq "0"; then
+ s=`expr ${sum[$i]} - ${min[$i]} - ${max[$i]}`
+ else
+ s=${sum[$i]}
+ fi
avg[$i]=`expr $s / $numruns`
fi
done
diff --git a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
index 3b4ddb6b3ad..3fec9bddac9 100644
--- a/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
+++ b/src/third_party/wiredtiger/bench/wtperf/wtperf_opt.i
@@ -105,7 +105,7 @@ DEF_OPT_AS_UINT32(database_count, 1,
" threads")
DEF_OPT_AS_UINT32(icount, 5000,
"number of records to initially populate. If multiple tables are "
- "configured, each table has this many items inserted.")
+ "configured the count is spread evenly across all tables.")
DEF_OPT_AS_BOOL(insert_rmw, 0,
"execute a read prior to each insert in workload phase")
DEF_OPT_AS_UINT32(key_sz, 20, "key size")
diff --git a/src/third_party/wiredtiger/dist/stat.py b/src/third_party/wiredtiger/dist/stat.py
index 6a3a1b74db3..5ffcd07e66c 100644
--- a/src/third_party/wiredtiger/dist/stat.py
+++ b/src/third_party/wiredtiger/dist/stat.py
@@ -5,7 +5,7 @@ import re, string, sys, textwrap
from dist import compare_srcfile
# Read the source files.
-from stat_data import dsrc_stats, connection_stats
+from stat_data import groups, dsrc_stats, connection_stats
def print_struct(title, name, base, stats):
'''Print the structures for the stat.h file.'''
@@ -161,23 +161,34 @@ compare_srcfile(tmp_file, '../src/support/stat.c')
# Update the statlog file with the entries we can scale per second.
scale_info = 'no_scale_per_second_list = [\n'
clear_info = 'no_clear_list = [\n'
+prefix_list = []
for l in sorted(connection_stats):
+ prefix_list.append(l.prefix)
if 'no_scale' in l.flags:
scale_info += ' \'' + l.desc + '\',\n'
if 'no_clear' in l.flags:
clear_info += ' \'' + l.desc + '\',\n'
for l in sorted(dsrc_stats):
+ prefix_list.append(l.prefix)
if 'no_scale' in l.flags:
scale_info += ' \'' + l.desc + '\',\n'
if 'no_clear' in l.flags:
clear_info += ' \'' + l.desc + '\',\n'
scale_info += ']\n'
clear_info += ']\n'
+prefix_info = 'prefix_list = [\n'
+# Remove the duplicates and print out the list
+for l in list(set(prefix_list)):
+ prefix_info += ' \'' + l + '\',\n'
+prefix_info += ']\n'
+group_info = 'groups = ' + str(groups)
tmp_file = '__tmp'
f = open(tmp_file, 'w')
f.write('# DO NOT EDIT: automatically built by dist/stat.py. */\n\n')
f.write(scale_info)
f.write(clear_info)
+f.write(prefix_info)
+f.write(group_info)
f.close()
compare_srcfile(tmp_file, '../tools/stat_data.py')
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index df6c919f808..1e4f1b41cb9 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -26,44 +26,69 @@ class Stat:
return cmp(self.desc.lower(), other.desc.lower())
class AsyncStat(Stat):
+ prefix = 'async'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'async', desc, flags)
+ Stat.__init__(self, name, AsyncStat.prefix, desc, flags)
class BlockStat(Stat):
+ prefix = 'block-manager'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'block-manager', desc, flags)
+ Stat.__init__(self, name, BlockStat.prefix, desc, flags)
class BtreeStat(Stat):
+ prefix = 'btree'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'btree', desc, flags)
+ Stat.__init__(self, name, BtreeStat.prefix, desc, flags)
class CacheStat(Stat):
+ prefix = 'cache'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'cache', desc, flags)
+ Stat.__init__(self, name, CacheStat.prefix, desc, flags)
class CompressStat(Stat):
+ prefix = 'compression'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'compression', desc, flags)
-class CursorStat(Stat):
- def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'cursor', desc, flags)
+ Stat.__init__(self, name, CompressStat.prefix, desc, flags)
class ConnStat(Stat):
+ prefix = 'connection'
+ def __init__(self, name, desc, flags=''):
+ Stat.__init__(self, name, ConnStat.prefix, desc, flags)
+class CursorStat(Stat):
+ prefix = 'cursor'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'connection', desc, flags)
+ Stat.__init__(self, name, CursorStat.prefix, desc, flags)
class DhandleStat(Stat):
+ prefix = 'data-handle'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'data-handle', desc, flags)
+ Stat.__init__(self, name, DhandleStat.prefix, desc, flags)
class LogStat(Stat):
+ prefix = 'log'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'log', desc, flags)
+ Stat.__init__(self, name, LogStat.prefix, desc, flags)
class LSMStat(Stat):
+ prefix = 'LSM'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'LSM', desc, flags)
+ Stat.__init__(self, name, LSMStat.prefix, desc, flags)
class RecStat(Stat):
+ prefix = 'reconciliation'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'reconciliation', desc, flags)
+ Stat.__init__(self, name, RecStat.prefix, desc, flags)
class SessionStat(Stat):
+ prefix = 'session'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'session', desc, flags)
+ Stat.__init__(self, name, SessionStat.prefix, desc, flags)
class TxnStat(Stat):
+ prefix = 'transaction'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, 'transaction', desc, flags)
+ Stat.__init__(self, name, TxnStat.prefix, desc, flags)
+
+##########################################
+# Groupings of useful statistics:
+# A pre-defined dictionary containing the group name as the key and the
+# list of prefix tags that comprise that group.
+##########################################
+groups = {}
+groups['cursor'] = [CursorStat.prefix, SessionStat.prefix]
+groups['evict'] = [CacheStat.prefix, ConnStat.prefix, BlockStat.prefix]
+groups['lsm'] = [LSMStat.prefix, TxnStat.prefix]
+groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix]
+groups['system'] = [ConnStat.prefix, DhandleStat.prefix, SessionStat.prefix]
##########################################
# CONNECTION statistics
@@ -123,6 +148,7 @@ connection_stats = [
'maximum bytes configured', 'no_clear,no_scale'),
CacheStat('cache_bytes_read', 'bytes read into cache'),
CacheStat('cache_bytes_write', 'bytes written from cache'),
+ CacheStat('cache_eviction_app', 'pages evicted by application threads'),
CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
CacheStat('cache_eviction_deepen',
'page split during eviction deepened the tree'),
@@ -148,6 +174,7 @@ connection_stats = [
'eviction server unable to reach eviction goal'),
CacheStat('cache_eviction_split', 'pages split during eviction'),
CacheStat('cache_eviction_walk', 'pages walked for eviction'),
+ CacheStat('cache_inmem_split', 'in-memory page splits'),
CacheStat('cache_pages_dirty',
'tracked dirty pages in the cache', 'no_scale'),
CacheStat('cache_pages_inuse',
@@ -251,17 +278,17 @@ connection_stats = [
SessionStat('session_open', 'open session count', 'no_clear,no_scale'),
##########################################
- # Total Btree cursor operations
- ##########################################
- BtreeStat('cursor_create', 'cursor create calls'),
- BtreeStat('cursor_insert', 'cursor insert calls'),
- BtreeStat('cursor_next', 'cursor next calls'),
- BtreeStat('cursor_prev', 'cursor prev calls'),
- BtreeStat('cursor_remove', 'cursor remove calls'),
- BtreeStat('cursor_reset', 'cursor reset calls'),
- BtreeStat('cursor_search', 'cursor search calls'),
- BtreeStat('cursor_search_near', 'cursor search near calls'),
- BtreeStat('cursor_update', 'cursor update calls'),
+ # Total cursor operations
+ ##########################################
+ CursorStat('cursor_create', 'cursor create calls'),
+ CursorStat('cursor_insert', 'cursor insert calls'),
+ CursorStat('cursor_next', 'cursor next calls'),
+ CursorStat('cursor_prev', 'cursor prev calls'),
+ CursorStat('cursor_remove', 'cursor remove calls'),
+ CursorStat('cursor_reset', 'cursor reset calls'),
+ CursorStat('cursor_search', 'cursor search calls'),
+ CursorStat('cursor_search_near', 'cursor search near calls'),
+ CursorStat('cursor_update', 'cursor update calls'),
]
connection_stats = sorted(connection_stats, key=attrgetter('name'))
@@ -371,6 +398,7 @@ dsrc_stats = [
'data source pages selected for eviction unable to be evicted'),
CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
CacheStat('cache_eviction_internal', 'internal pages evicted'),
+ CacheStat('cache_inmem_split', 'in-memory page splits'),
CacheStat('cache_overflow_value',
'overflow values cached in memory', 'no_scale'),
CacheStat('cache_read', 'pages read into cache'),
diff --git a/src/third_party/wiredtiger/src/block/block_ext.c b/src/third_party/wiredtiger/src/block/block_ext.c
index d500f93817a..28c39c71a08 100644
--- a/src/third_party/wiredtiger/src/block/block_ext.c
+++ b/src/third_party/wiredtiger/src/block/block_ext.c
@@ -1167,7 +1167,7 @@ __wt_block_extlist_read(WT_SESSION_IMPL *session,
* merged). Second, the "available" list is sorted by size as well as
* by offset, and the fast-path append code doesn't support that, it's
* limited to offset. The test of "track size" is short-hand for "are
- * we reading the "available" list.
+ * we reading the available-blocks list".
*/
func = el->track_size == 0 ? __block_append : __block_merge;
for (;;) {
diff --git a/src/third_party/wiredtiger/src/block/block_vrfy.c b/src/third_party/wiredtiger/src/block/block_vrfy.c
index 148b4fa9743..db4e42b79e7 100644
--- a/src/third_party/wiredtiger/src/block/block_vrfy.c
+++ b/src/third_party/wiredtiger/src/block/block_vrfy.c
@@ -17,7 +17,7 @@ static int __verify_last_avail(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
static int __verify_last_truncate(WT_SESSION_IMPL *, WT_BLOCK *, WT_CKPT *);
/* The bit list ignores the first block: convert to/from a frag/offset. */
-#define WT_wt_off_tO_FRAG(block, off) \
+#define WT_wt_off_TO_FRAG(block, off) \
((off) / (block)->allocsize - 1)
#define WT_FRAG_TO_OFF(block, frag) \
(((wt_off_t)(frag + 1)) * (block)->allocsize)
@@ -81,7 +81,7 @@ __wt_block_verify_start(
* verify many non-contiguous blocks creating too many entries on the
* list to fit into memory.
*/
- block->frags = (uint64_t)WT_wt_off_tO_FRAG(block, size);
+ block->frags = (uint64_t)WT_wt_off_TO_FRAG(block, size);
WT_RET(__bit_alloc(session, block->frags, &block->fragfile));
/*
@@ -232,6 +232,22 @@ __wt_verify_ckpt_load(
}
/*
+ * We don't need to list of blocks on a checkpoint's avail list, but we
+ * read it to ensure it wasn't corrupted. We could confirm correctness
+ * of intermediate avail lists (that is, if they're logically the result
+ * of the allocations and discards to this point). We don't because the
+ * only avail list ever used is the one for the last checkpoint, which
+ * is separately verified by checking it against all of the blocks found
+ * in the file.
+ */
+ el = &ci->avail;
+ if (el->offset != WT_BLOCK_INVALID_OFFSET) {
+ WT_RET(__wt_block_extlist_read(
+ session, block, el, ci->file_size));
+ __wt_block_extlist_free(session, el);
+ }
+
+ /*
* The root page of the checkpoint appears on the alloc list, but not,
* at least until the checkpoint is deleted, on a discard list. To
* handle this case, remove the root page from the accumulated list of
@@ -252,7 +268,7 @@ __wt_verify_ckpt_load(
WT_RET(__bit_alloc(session, block->frags, &block->fragckpt));
el = &block->verify_alloc;
WT_EXT_FOREACH(ext, el->off) {
- frag = (uint64_t)WT_wt_off_tO_FRAG(block, ext->off);
+ frag = (uint64_t)WT_wt_off_TO_FRAG(block, ext->off);
frags = (uint64_t)(ext->size / block->allocsize);
__bit_nset(block->fragckpt, frag, frag + (frags - 1));
}
@@ -342,7 +358,7 @@ __verify_filefrag_add(WT_SESSION_IMPL *session, WT_BLOCK *block,
"non-existent file blocks",
(uintmax_t)offset, (uintmax_t)(offset + size));
- frag = (uint64_t)WT_wt_off_tO_FRAG(block, offset);
+ frag = (uint64_t)WT_wt_off_TO_FRAG(block, offset);
frags = (uint64_t)(size / block->allocsize);
/* It may be illegal to reference a particular chunk more than once. */
@@ -445,7 +461,7 @@ __verify_ckptfrag_add(
"file blocks outside the checkpoint",
(uintmax_t)offset, (uintmax_t)(offset + size));
- frag = (uint64_t)WT_wt_off_tO_FRAG(block, offset);
+ frag = (uint64_t)WT_wt_off_TO_FRAG(block, offset);
frags = (uint64_t)(size / block->allocsize);
/* It is illegal to reference a particular chunk more than once. */
diff --git a/src/third_party/wiredtiger/src/btree/bt_curnext.c b/src/third_party/wiredtiger/src/btree/bt_curnext.c
index 0cc79776634..6a07969d684 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curnext.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curnext.c
@@ -120,7 +120,7 @@ new_page:
* Return the next variable-length entry on the append list.
*/
static inline int
-__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage)
+__cursor_var_append_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
{
WT_ITEM *val;
WT_SESSION_IMPL *session;
@@ -141,8 +141,10 @@ new_page: if (cbt->ins == NULL)
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL ||
- WT_UPDATE_DELETED_ISSET(upd))
+ WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
val->data = WT_UPDATE_DATA(upd);
val->size = upd->size;
break;
@@ -155,7 +157,7 @@ new_page: if (cbt->ins == NULL)
* Move to the next, variable-length column-store item.
*/
static inline int
-__cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage)
+__cursor_var_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
{
WT_CELL *cell;
WT_CELL_UNPACK unpack;
@@ -195,8 +197,10 @@ new_page: /* Find the matching WT_COL slot. */
upd = cbt->ins == NULL ?
NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd != NULL) {
- if (WT_UPDATE_DELETED_ISSET(upd))
+ if (WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
val->data = WT_UPDATE_DATA(upd);
val->size = upd->size;
@@ -233,7 +237,7 @@ new_page: /* Find the matching WT_COL slot. */
* Move to the next row-store item.
*/
static inline int
-__cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage)
+__cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
{
WT_INSERT *ins;
WT_ITEM *key, *val;
@@ -277,8 +281,10 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, int newpage)
new_insert: if ((ins = cbt->ins) != NULL) {
if ((upd = __wt_txn_read(session, ins->upd)) == NULL ||
- WT_UPDATE_DELETED_ISSET(upd))
+ WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
val->data = WT_UPDATE_DATA(upd);
@@ -307,8 +313,10 @@ new_insert: if ((ins = cbt->ins) != NULL) {
cbt->slot = cbt->row_iteration_slot / 2 - 1;
rip = &page->pg_row_d[cbt->slot];
upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
- if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd))
+ if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
return (__cursor_row_slot_return(cbt, rip, upd));
}
@@ -385,7 +393,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t flags;
- int newpage;
+ int skipped, newpage;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -410,15 +418,18 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
* found. Then, move to the next page, until we reach the end of the
* file.
*/
- page = cbt->ref == NULL ? NULL : cbt->ref->page;
- for (newpage = 0;; newpage = 1) {
+ for (skipped = newpage = 0;; skipped = 0, newpage = 1) {
+ page = cbt->ref == NULL ? NULL : cbt->ref->page;
+ WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page));
+
if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
switch (page->type) {
case WT_PAGE_COL_FIX:
ret = __cursor_fix_append_next(cbt, newpage);
break;
case WT_PAGE_COL_VAR:
- ret = __cursor_var_append_next(cbt, newpage);
+ ret = __cursor_var_append_next(
+ cbt, newpage, &skipped);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -433,10 +444,10 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
ret = __cursor_fix_next(cbt, newpage);
break;
case WT_PAGE_COL_VAR:
- ret = __cursor_var_next(cbt, newpage);
+ ret = __cursor_var_next(cbt, newpage, &skipped);
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_next(cbt, newpage);
+ ret = __cursor_row_next(cbt, newpage, &skipped);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -455,11 +466,17 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, int truncating)
}
}
+ /*
+ * If we scanned all the way through a page and only saw
+ * deleted records, try to evict the page as we release it.
+ * Otherwise repeatedly deleting from the beginning of a tree
+ * can have quadratic performance.
+ */
+ if (newpage && skipped)
+ page->read_gen = WT_READGEN_OLDEST;
+
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
-
- page = cbt->ref->page;
- WT_ASSERT(session, !WT_PAGE_IS_INTERNAL(page));
}
err: if (ret != 0)
diff --git a/src/third_party/wiredtiger/src/btree/bt_curprev.c b/src/third_party/wiredtiger/src/btree/bt_curprev.c
index 8de784d1f1d..a6be8271ea5 100644
--- a/src/third_party/wiredtiger/src/btree/bt_curprev.c
+++ b/src/third_party/wiredtiger/src/btree/bt_curprev.c
@@ -257,7 +257,7 @@ new_page:
* Return the previous variable-length entry on the append list.
*/
static inline int
-__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, int newpage)
+__cursor_var_append_prev(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
{
WT_ITEM *val;
WT_SESSION_IMPL *session;
@@ -278,8 +278,10 @@ new_page: if (cbt->ins == NULL)
__cursor_set_recno(cbt, WT_INSERT_RECNO(cbt->ins));
if ((upd = __wt_txn_read(session, cbt->ins->upd)) == NULL ||
- WT_UPDATE_DELETED_ISSET(upd))
+ WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
val->data = WT_UPDATE_DATA(upd);
val->size = upd->size;
break;
@@ -292,7 +294,7 @@ new_page: if (cbt->ins == NULL)
* Move to the previous, variable-length column-store item.
*/
static inline int
-__cursor_var_prev(WT_CURSOR_BTREE *cbt, int newpage)
+__cursor_var_prev(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
{
WT_CELL *cell;
WT_CELL_UNPACK unpack;
@@ -333,8 +335,10 @@ new_page: if (cbt->recno < page->pg_var_recno)
upd = cbt->ins == NULL ?
NULL : __wt_txn_read(session, cbt->ins->upd);
if (upd != NULL) {
- if (WT_UPDATE_DELETED_ISSET(upd))
+ if (WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
val->data = WT_UPDATE_DATA(upd);
val->size = upd->size;
@@ -352,8 +356,10 @@ new_page: if (cbt->recno < page->pg_var_recno)
if ((cell = WT_COL_PTR(page, cip)) == NULL)
continue;
__wt_cell_unpack(cell, &unpack);
- if (unpack.type == WT_CELL_DEL)
+ if (unpack.type == WT_CELL_DEL) {
+ *skipped = 1;
continue;
+ }
WT_RET(__wt_page_cell_data_ref(
session, page, &unpack, &cbt->tmp));
@@ -371,7 +377,7 @@ new_page: if (cbt->recno < page->pg_var_recno)
* Move to the previous row-store item.
*/
static inline int
-__cursor_row_prev(WT_CURSOR_BTREE *cbt, int newpage)
+__cursor_row_prev(WT_CURSOR_BTREE *cbt, int newpage, int *skipped)
{
WT_INSERT *ins;
WT_ITEM *key, *val;
@@ -426,8 +432,10 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, int newpage)
new_insert: if ((ins = cbt->ins) != NULL) {
if ((upd = __wt_txn_read(session, ins->upd)) == NULL ||
- WT_UPDATE_DELETED_ISSET(upd))
+ WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
key->data = WT_INSERT_KEY(ins);
key->size = WT_INSERT_KEY_SIZE(ins);
val->data = WT_UPDATE_DATA(upd);
@@ -458,8 +466,10 @@ new_insert: if ((ins = cbt->ins) != NULL) {
cbt->slot = cbt->row_iteration_slot / 2 - 1;
rip = &page->pg_row_d[cbt->slot];
upd = __wt_txn_read(session, WT_ROW_UPDATE(page, rip));
- if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd))
+ if (upd != NULL && WT_UPDATE_DELETED_ISSET(upd)) {
+ *skipped = 1;
continue;
+ }
return (__cursor_row_slot_return(cbt, rip, upd));
}
@@ -477,7 +487,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating)
WT_PAGE *page;
WT_SESSION_IMPL *session;
uint32_t flags;
- int newpage;
+ int skipped, newpage;
session = (WT_SESSION_IMPL *)cbt->iface.session;
@@ -502,15 +512,27 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating)
* found. Then, move to the previous page, until we reach the start
* of the file.
*/
- page = cbt->ref == NULL ? NULL : cbt->ref->page;
- for (newpage = 0;; newpage = 1) {
+ for (skipped = newpage = 0;; skipped = 0, newpage = 1) {
+ page = cbt->ref == NULL ? NULL : cbt->ref->page;
+ WT_ASSERT(session, page == NULL || !WT_PAGE_IS_INTERNAL(page));
+
+ /*
+ * The last page in a column-store has appended entries.
+ * We handle it separately from the usual cursor code:
+ * it's only that one page and it's in a simple format.
+ */
+ if (newpage && page != NULL && page->type != WT_PAGE_ROW_LEAF &&
+ (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
+ F_SET(cbt, WT_CBT_ITERATE_APPEND);
+
if (F_ISSET(cbt, WT_CBT_ITERATE_APPEND)) {
switch (page->type) {
case WT_PAGE_COL_FIX:
ret = __cursor_fix_append_prev(cbt, newpage);
break;
case WT_PAGE_COL_VAR:
- ret = __cursor_var_append_prev(cbt, newpage);
+ ret = __cursor_var_append_prev(
+ cbt, newpage, &skipped);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -527,10 +549,10 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating)
ret = __cursor_fix_prev(cbt, newpage);
break;
case WT_PAGE_COL_VAR:
- ret = __cursor_var_prev(cbt, newpage);
+ ret = __cursor_var_prev(cbt, newpage, &skipped);
break;
case WT_PAGE_ROW_LEAF:
- ret = __cursor_row_prev(cbt, newpage);
+ ret = __cursor_row_prev(cbt, newpage, &skipped);
break;
WT_ILLEGAL_VALUE_ERR(session);
}
@@ -538,20 +560,11 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, int truncating)
break;
}
+ if (newpage && skipped)
+ page->read_gen = WT_READGEN_OLDEST;
+
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
-
- page = cbt->ref->page;
- WT_ASSERT(session, !WT_PAGE_IS_INTERNAL(page));
-
- /*
- * The last page in a column-store has appended entries.
- * We handle it separately from the usual cursor code:
- * it's only that one page and it's in a simple format.
- */
- if (page->type != WT_PAGE_ROW_LEAF &&
- (cbt->ins_head = WT_COL_APPEND(page)) != NULL)
- F_SET(cbt, WT_CBT_ITERATE_APPEND);
}
err: if (ret != 0)
diff --git a/src/third_party/wiredtiger/src/btree/bt_evict.c b/src/third_party/wiredtiger/src/btree/bt_evict.c
index 91a7885e63f..2af9f0024db 100644
--- a/src/third_party/wiredtiger/src/btree/bt_evict.c
+++ b/src/third_party/wiredtiger/src/btree/bt_evict.c
@@ -615,7 +615,7 @@ __wt_evict_page(WT_SESSION_IMPL *session, WT_REF *ref)
WT_ASSERT(session,
!F_ISSET(txn, TXN_HAS_ID) || !__wt_txn_visible(session, txn->id));
- ret = __wt_rec_evict(session, ref, 0);
+ ret = __wt_evict(session, ref, 0);
txn->isolation = saved_iso;
return (ret);
@@ -1266,6 +1266,9 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session, int is_app)
WT_PAGE *page;
WT_REF *ref;
+ if (is_app)
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_app);
+
WT_RET(__evict_get_ref(session, is_app, &btree, &ref));
WT_ASSERT(session, ref->state == WT_REF_LOCKED);
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index c9cda548d43..b5af14ab376 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -511,7 +511,7 @@ err: if (leaf != NULL)
/*
* __wt_btree_new_leaf_page --
- * Create an empty leaf page and link it into a reference in its parent.
+ * Create an empty leaf page.
*/
int
__wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep)
@@ -523,15 +523,15 @@ __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep)
switch (btree->type) {
case BTREE_COL_FIX:
WT_RET(
- __wt_page_alloc(session, WT_PAGE_COL_FIX, 1, 0, 1, pagep));
+ __wt_page_alloc(session, WT_PAGE_COL_FIX, 1, 0, 0, pagep));
break;
case BTREE_COL_VAR:
WT_RET(
- __wt_page_alloc(session, WT_PAGE_COL_VAR, 1, 0, 1, pagep));
+ __wt_page_alloc(session, WT_PAGE_COL_VAR, 1, 0, 0, pagep));
break;
case BTREE_ROW:
WT_RET(
- __wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 1, pagep));
+ __wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, pagep));
break;
WT_ILLEGAL_VALUE(session);
}
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index 10366e91a0e..6e70c9ea2b6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -324,7 +324,7 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
*/
if (ss->root_ref.page != NULL) {
btree->ckpt = ckptbase;
- ret = __wt_rec_evict(session, &ss->root_ref, 1);
+ ret = __wt_evict(session, &ss->root_ref, 1);
ss->root_ref.page = NULL;
btree->ckpt = NULL;
}
@@ -1302,7 +1302,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
/* Write the new version of the leaf page to disk. */
WT_ERR(__slvg_modify_init(session, page));
- WT_ERR(__wt_rec_write(session, ref, cookie, WT_SKIP_UPDATE_ERR));
+ WT_ERR(__wt_reconcile(session, ref, cookie, WT_SKIP_UPDATE_ERR));
/* Reset the page. */
page->pg_var_d = save_col_var;
@@ -1310,7 +1310,7 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
ret = __wt_page_release(session, ref, 0);
if (ret == 0)
- ret = __wt_rec_evict(session, ref, 1);
+ ret = __wt_evict(session, ref, 1);
if (0) {
err: WT_TRET(__wt_page_release(session, ref, 0));
@@ -2009,7 +2009,7 @@ __slvg_row_build_leaf(
/* Write the new version of the leaf page to disk. */
WT_ERR(__slvg_modify_init(session, page));
- WT_ERR(__wt_rec_write(session, ref, cookie, WT_SKIP_UPDATE_ERR));
+ WT_ERR(__wt_reconcile(session, ref, cookie, WT_SKIP_UPDATE_ERR));
/* Reset the page. */
page->pg_row_entries += skip_stop;
@@ -2020,7 +2020,7 @@ __slvg_row_build_leaf(
*/
ret = __wt_page_release(session, ref, 0);
if (ret == 0)
- ret = __wt_rec_evict(session, ref, 1);
+ ret = __wt_evict(session, ref, 1);
if (0) {
err: WT_TRET(__wt_page_release(session, ref, 0));
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 0c4064dfdd1..cccd2e5bede 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -66,7 +66,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
__wt_txn_refresh(session, 1);
leaf_bytes += page->memory_footprint;
++leaf_pages;
- WT_ERR(__wt_rec_write(session, walk, NULL, 0));
+ WT_ERR(__wt_reconcile(session, walk, NULL, 0));
}
}
break;
@@ -132,7 +132,7 @@ __sync_file(WT_SESSION_IMPL *session, int syncop)
leaf_bytes += page->memory_footprint;
++leaf_pages;
}
- WT_ERR(__wt_rec_write(session, walk, NULL, 0));
+ WT_ERR(__wt_reconcile(session, walk, NULL, 0));
}
}
break;
@@ -244,7 +244,7 @@ __evict_file(WT_SESSION_IMPL *session, int syncop)
* error, retrying later.
*/
if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page))
- WT_ERR(__wt_rec_write(session, ref, NULL, WT_EVICTING));
+ WT_ERR(__wt_reconcile(session, ref, NULL, WT_EVICTING));
/*
* We can't evict the page just returned to us (it marks our
@@ -269,7 +269,7 @@ __evict_file(WT_SESSION_IMPL *session, int syncop)
if (__wt_ref_is_root(ref) ||
page->modify == NULL ||
!F_ISSET(page->modify, WT_PM_REC_EMPTY))
- WT_ERR(__wt_rec_evict(session, ref, 1));
+ WT_ERR(__wt_evict(session, ref, 1));
break;
case WT_SYNC_DISCARD:
case WT_SYNC_DISCARD_FORCE:
@@ -295,15 +295,10 @@ __evict_file(WT_SESSION_IMPL *session, int syncop)
!__wt_txn_visible_all(session,
page->modify->rec_max_txn))
WT_ERR(EBUSY);
+
if (syncop == WT_SYNC_DISCARD_FORCE)
F_SET(session, WT_SESSION_DISCARD_FORCE);
- __wt_ref_out(session, ref);
- /*
- * In case we don't discard the whole tree, make sure
- * that future readers know that the page is no longer
- * in cache.
- */
- ref->state = WT_REF_DISK;
+ __wt_rec_page_clean_update(session, ref);
F_CLR(session, WT_SESSION_DISCARD_FORCE);
break;
WT_ILLEGAL_VALUE_ERR(session);
diff --git a/src/third_party/wiredtiger/src/btree/rec_evict.c b/src/third_party/wiredtiger/src/btree/rec_evict.c
index 4696e78059e..f8dd4708ffd 100644
--- a/src/third_party/wiredtiger/src/btree/rec_evict.c
+++ b/src/third_party/wiredtiger/src/btree/rec_evict.c
@@ -10,25 +10,24 @@
static int __hazard_exclusive(WT_SESSION_IMPL *, WT_REF *, int);
static void __rec_discard_tree(WT_SESSION_IMPL *, WT_REF *, int, int);
static void __rec_excl_clear(WT_SESSION_IMPL *);
-static void __rec_page_clean_update(WT_SESSION_IMPL *, WT_REF *);
static int __rec_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, int);
-static int __rec_review(WT_SESSION_IMPL *, WT_REF *, int, int, int *);
+static int __rec_review(WT_SESSION_IMPL *, WT_REF *, int, int, int *, int *);
/*
- * __wt_rec_evict --
- * Reconciliation plus eviction.
+ * __wt_evict --
+ * Eviction.
*/
int
-__wt_rec_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
+__wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
{
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_TXN_STATE *txn_state;
- int istree;
+ int inmem_split, istree;
page = ref->page;
- istree = 0;
+ inmem_split = istree = 0;
WT_RET(__wt_verbose(session, WT_VERB_EVICT,
"page %p (%s)", page, __wt_page_type_string(page->type)));
@@ -51,7 +50,14 @@ __wt_rec_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* unlikely eviction would choose an internal page with children, it's
* not disallowed anywhere.
*/
- WT_ERR(__rec_review(session, ref, exclusive, 1, &istree));
+ WT_ERR(__rec_review(session, ref, exclusive, 1, &inmem_split, &istree));
+
+ /*
+ * If there was an in-memory split, the tree has been left in the state
+ * we want: there is nothing more to do.
+ */
+ if (inmem_split)
+ goto done;
/*
* Update the page's modification reference, reconciliation might have
@@ -77,7 +83,7 @@ __wt_rec_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
if (__wt_ref_is_root(ref))
__wt_ref_out(session, ref);
else
- __rec_page_clean_update(session, ref);
+ __wt_rec_page_clean_update(session, ref);
WT_STAT_FAST_CONN_INCR(session, cache_eviction_clean);
WT_STAT_FAST_DATA_INCR(session, cache_eviction_clean);
@@ -103,7 +109,7 @@ err: /*
WT_STAT_FAST_CONN_INCR(session, cache_eviction_fail);
WT_STAT_FAST_DATA_INCR(session, cache_eviction_fail);
}
- session->excl_next = 0;
+done: session->excl_next = 0;
if (txn_state != NULL)
txn_state->snap_min = WT_TXN_NONE;
@@ -112,11 +118,11 @@ err: /*
}
/*
- * __rec_page_clean_update --
+ * __wt_rec_page_clean_update --
* Update a clean page's reference on eviction.
*/
-static void
-__rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref)
+void
+__wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref)
{
/*
* Discard the page and update the reference structure; if the page has
@@ -166,8 +172,26 @@ __rec_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
WT_PUBLISH(ref->state, WT_REF_DELETED);
break;
case WT_PM_REC_MULTIBLOCK: /* Multiple blocks */
- /* Split the page in memory. */
- WT_RET(__wt_split_evict(session, ref, exclusive));
+ /*
+ * There are two cases in this code.
+ *
+ * First, an in-memory page that got too large, we forcibly
+ * evicted it, and there wasn't anything to write. (Imagine two
+ * threads updating a small set keys on a leaf page. The page is
+ * too large so we try to evict it, but after reconciliation
+ * there's only a small amount of data (so it's a single page we
+ * can't split), and because there are two threads, there's some
+ * data we can't write (so we can't evict it). In that case, we
+ * take advantage of the fact we have exclusive access to the
+ * page and rewrite it in memory.)
+ *
+ * Second, a real split where we reconciled a page and it turned
+ * into a lot of pages.
+ */
+ if (mod->mod_multi_entries == 1)
+ WT_RET(__wt_split_rewrite(session, ref));
+ else
+ WT_RET(__wt_split_multi(session, ref, exclusive));
break;
case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
if (ref->addr != NULL && __wt_off_page(parent, ref->addr)) {
@@ -233,8 +257,8 @@ __rec_discard_tree(
* for conditions that would block its eviction.
*/
static int
-__rec_review(
- WT_SESSION_IMPL *session, WT_REF *ref, int exclusive, int top, int *istree)
+__rec_review(WT_SESSION_IMPL *session, WT_REF *ref,
+ int exclusive, int top, int *inmem_splitp, int *istreep)
{
WT_BTREE *btree;
WT_PAGE *page;
@@ -260,6 +284,24 @@ __rec_review(
* valid memory.
*/
__wt_evict_list_clear_page(session, ref);
+
+ /*
+ * Check for an append-only workload needing an in-memory split.
+ *
+ * We can't do this earlier because in-memory splits require
+ * exclusive access. If an in-memory split completes, the page
+ * stays in memory and the tree is left in the desired state:
+ * avoid the usual cleanup.
+ *
+ * Attempt the split before checking whether a checkpoint is
+ * running - that's not a problem here because we aren't
+ * evicting any dirty pages.
+ */
+ if (top) {
+ WT_RET(__wt_split_insert(session, ref, inmem_splitp));
+ if (*inmem_splitp)
+ return (0);
+ }
}
/*
@@ -279,9 +321,9 @@ __rec_review(
* know to do a full walk when discarding the
* page.
*/
- *istree = 1;
- WT_RET(__rec_review(
- session, child, exclusive, 0, istree));
+ *istreep = 1;
+ WT_RET(__rec_review(session, child, exclusive,
+ 0, inmem_splitp, istreep));
break;
case WT_REF_LOCKED: /* Being evicted */
case WT_REF_READING: /* Being read */
@@ -387,7 +429,7 @@ __rec_review(
else if (top && !WT_PAGE_IS_INTERNAL(page) &&
page->memory_footprint > 10 * btree->maxleafpage)
LF_SET(WT_SKIP_UPDATE_RESTORE);
- WT_RET(__wt_rec_write(session, ref, NULL, flags));
+ WT_RET(__wt_reconcile(session, ref, NULL, flags));
WT_ASSERT(session,
!__wt_page_is_modified(page) ||
LF_ISSET(WT_SKIP_UPDATE_RESTORE));
diff --git a/src/third_party/wiredtiger/src/btree/rec_split.c b/src/third_party/wiredtiger/src/btree/rec_split.c
index babec2cc295..dea44503c55 100644
--- a/src/third_party/wiredtiger/src/btree/rec_split.c
+++ b/src/third_party/wiredtiger/src/btree/rec_split.c
@@ -631,12 +631,12 @@ err: __wt_free_ref_index(session, parent, alloc_index, 1);
}
/*
- * __split_inmem_build --
+ * __split_multi_inmem --
* Instantiate a page in a multi-block set, when an update couldn't be
* written.
*/
static int
-__split_inmem_build(
+__split_multi_inmem(
WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref, WT_MULTI *multi)
{
WT_CURSOR_BTREE cbt;
@@ -722,9 +722,10 @@ __split_inmem_build(
*/
page->modify->first_dirty_txn = S2C(session)->txn_global.oldest_id;
-err: __wt_scr_free(&key);
- /* Free any resources that may have been cached in the cursor. */
+err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt));
+
+ __wt_scr_free(&key);
return (ret);
}
@@ -774,10 +775,9 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
addr->type = multi->addr.type;
WT_RET(__wt_strndup(session,
multi->addr.addr, addr->size, &addr->addr));
- /* Need a cast to avoid an implicit conversion warning. */
WT_MEMSIZE_ADD(incr, addr->size);
} else
- WT_RET(__split_inmem_build(session, page, ref, multi));
+ WT_RET(__split_multi_inmem(session, page, ref, multi));
switch (page->type) {
case WT_PAGE_ROW_INT:
@@ -804,42 +804,27 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
}
/*
- * __split_evict_multi --
+ * __split_parent --
* Resolve a multi-page split, inserting new information into the parent.
*/
static int
-__split_evict_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
+__split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
+ uint32_t new_entries, size_t parent_decr, size_t parent_incr,
+ int exclusive, int ref_discard)
{
WT_DECL_RET;
- WT_IKEY *ikey;
- WT_PAGE *parent, *child;
+ WT_PAGE *parent;
WT_PAGE_INDEX *alloc_index, *pindex;
- WT_PAGE_MODIFY *mod;
- WT_REF **alloc_refp, *parent_ref, ref_copy, **ref_tmp;
- size_t parent_decr, parent_incr, size;
- uint32_t i, j, parent_entries, result_entries, split_entries;
+ WT_REF **alloc_refp, *parent_ref;
+ size_t size;
+ uint32_t i, j, parent_entries, result_entries;
int complete, hazard, locked;
parent = NULL; /* -Wconditional-uninitialized */
alloc_index = NULL;
parent_ref = NULL;
- ref_tmp = NULL;
- parent_decr = parent_incr = 0;
complete = hazard = locked = 0;
- child = ref->page;
- mod = child->modify;
-
- /*
- * Convert the split page's multiblock reconciliation information into
- * an array of page reference structures.
- */
- split_entries = mod->mod_multi_entries;
- WT_RET(__wt_calloc_def(session, split_entries, &ref_tmp));
- for (i = 0; i < split_entries; ++i)
- WT_ERR(__wt_multi_to_ref(session,
- child, &mod->mod_multi[i], &ref_tmp[i], &parent_incr));
-
/*
* Get a page-level lock on the parent to single-thread splits into the
* page because we need to single-thread sizing/growing the page index.
@@ -879,7 +864,7 @@ __split_evict_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
pindex = WT_INTL_INDEX_COPY(parent);
parent_entries = pindex->entries;
- result_entries = (parent_entries - 1) + split_entries;
+ result_entries = (parent_entries - 1) + new_entries;
/*
* Allocate and initialize a new page index array for the parent, then
@@ -893,20 +878,19 @@ __split_evict_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
alloc_index->entries = result_entries;
for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i)
if (pindex->index[i] == ref)
- for (j = 0; j < split_entries; ++j) {
- ref_tmp[j]->home = parent;
- *alloc_refp++ = ref_tmp[j];
+ for (j = 0; j < new_entries; ++j) {
+ ref_new[j]->home = parent;
+ *alloc_refp++ = ref_new[j];
/*
* Clear the split reference as it moves to the
* allocated page index, so it never appears on
* both after an error.
*/
- ref_tmp[j] = NULL;
+ ref_new[j] = NULL;
}
else
*alloc_refp++ = pindex->index[i];
- __wt_free(session, ref_tmp);
/*
* Update the parent page's index: this update makes the split visible
@@ -930,19 +914,26 @@ __split_evict_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
/*
* A note on error handling: failures before we swapped the new page
- * index into the parent can be resolved by simply freeing allocated
- * memory because the original page is unchanged, we can continue to
- * use it and we have not yet modified the parent. (See below for an
- * exception, we cannot discard pages referencing unresolved changes.)
- * Failures after we swap the new page index into the parent are also
- * relatively benign because the split is OK and complete and the page
- * is reset so it will be discarded by eviction. For that reason, we
- * mostly ignore further errors unless there's a panic.
+ * index into the parent can be resolved by freeing allocated memory
+ * because the original page is unchanged, we can continue to use it
+ * and we have not yet modified the parent. Failures after we swap
+ * the new page index into the parent are also relatively benign, the
+ * split is OK and complete. For those reasons, we ignore errors past
+ * this point unless there's a panic.
*/
complete = 1;
/*
- * The previous parent page's key for this child page may have been an
+ * We can't free the previous page index, there may be threads using it.
+ * Add it to the session discard list, to be freed when it's safe.
+ */
+ size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
+ WT_TRET(__split_safe_free(session, exclusive, pindex, size));
+ WT_MEMSIZE_ADD(parent_decr, size);
+
+ /*
+ * Row-store trees where the old version of the page is being discarded:
+ * the previous parent page's key for this child page may have been an
* on-page overflow key. In that case, if the key hasn't been deleted,
* delete it now, including its backing blocks. We are exchanging the
* WT_REF that referenced it for the split page WT_REFs and their keys,
@@ -950,32 +941,10 @@ __split_evict_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
* split (if we failed, we'd leak the underlying blocks, but the parent
* page would be unaffected).
*/
- if (parent->type == WT_PAGE_ROW_INT)
+ if (ref_discard && parent->type == WT_PAGE_ROW_INT)
WT_TRET(__split_ovfl_key_cleanup(session, parent, ref));
/*
- * We can't free the previous page index, or the page's original WT_REF
- * structure and instantiated key, there may be threads using them. Add
- * them to the session discard list, to be freed once we know it's safe.
- */
- size = sizeof(WT_PAGE_INDEX) + pindex->entries * sizeof(WT_REF *);
- WT_TRET(__split_safe_free(session, exclusive, pindex, size));
- WT_MEMSIZE_ADD(parent_decr, size);
- if (parent->type == WT_PAGE_ROW_INT &&
- (ikey = __wt_ref_key_instantiated(ref)) != NULL) {
- size = sizeof(WT_IKEY) + ikey->size;
- WT_TRET(__split_safe_free(session, exclusive, ikey, size));
- WT_MEMSIZE_ADD(parent_decr, size);
- }
- /*
- * Take a copy of the ref in case we can free it immediately: we still
- * need to discard the page.
- */
- ref_copy = *ref;
- WT_TRET(__split_safe_free(session, exclusive, ref, sizeof(WT_REF)));
- WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
-
- /*
* Adjust the parent's memory footprint. This may look odd, but we
* have already taken the allocation overhead into account, and an
* increment followed by a decrement will cancel out the normal
@@ -986,9 +955,9 @@ __split_evict_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
WT_STAT_FAST_CONN_INCR(session, cache_eviction_split);
WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
- "%p: %s split into parent %p %" PRIu32 " -> %" PRIu32
+ "%s split into parent %" PRIu32 " -> %" PRIu32
" (%" PRIu32 ")",
- child, __wt_page_type_string(child->type), parent, parent_entries,
+ __wt_page_type_string(ref->page->type), parent_entries,
result_entries, result_entries - parent_entries));
/*
@@ -1016,55 +985,348 @@ err: if (locked)
if (hazard)
WT_TRET(__wt_hazard_clear(session, parent));
+ __wt_free_ref_index(session, NULL, alloc_index, 0);
+
/*
- * Discard the child; test for split completion instead of errors, there
- * might be a relatively innocuous error, and if we split the parent, we
- * want to discard the child.
+ * A note on error handling: if we completed the split, return success,
+ * nothing really bad can have happened, and our caller has to proceed
+ * with the split.
*/
- if (complete) {
+ if (ret != 0 && ret != WT_PANIC)
+ __wt_err(session, ret,
+ "ignoring not-fatal error during parent page split");
+ return (ret == WT_PANIC || !complete ? ret : 0);
+}
+
+/*
+ * __wt_split_insert --
+ * Check for pages with append-only workloads and split their last insert
+ * list into a separate page.
+ */
+int
+__wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_IKEY *ikey;
+ WT_DECL_ITEM(key);
+ WT_INSERT *ins, **insp, *moved_ins, *prev_ins;
+ WT_INSERT_HEAD *ins_head;
+ WT_PAGE *page, *right;
+ WT_REF *child, *split_ref[2] = { NULL, NULL };
+ WT_UPDATE *upd;
+ size_t page_decr, parent_incr, right_incr, size;
+ int i;
+
+ *splitp = 0;
+
+ btree = S2BT(session);
+ page = ref->page;
+ right = NULL;
+ page_decr = parent_incr = right_incr = 0;
+
+ /*
+ * Check for pages with append-only workloads. A common application
+ * pattern is to have multiple threads frantically appending to the
+ * tree. We want to reconcile and evict this page, but we'd like to
+ * do it without making the appending threads wait. If we're not
+ * discarding the tree, check and see if it's worth doing a split to
+ * let the threads continue before doing eviction.
+ *
+ * Ignore anything other than row-store leaf pages.
+ * Ignore small pages.
+ *
+ * XXX KEITH
+ * Need a better test for append-only workloads.
+ */
+ if (page->type != WT_PAGE_ROW_LEAF)
+ return (0);
+ if (page->memory_footprint < 10 * btree->maxleafpage)
+ return (0);
+
+ /*
+ * There is no point splitting if the list is small, no deep items is
+ * our heuristic for that. (A 1/4 probability of adding a new skiplist
+ * level means there will be a new 6th level for roughly each 4KB of
+ * entries in the list. If we have at least two 6th level entries, the
+ * list is at least large enough to work with.)
+ *
+ * The following code requires at least two items on the insert list,
+ * this test serves the additional purpose of confirming that.
+ */
+#define WT_MIN_SPLIT_SKIPLIST_DEPTH WT_MIN(6, WT_SKIP_MAXDEPTH - 1)
+ ins_head = page->pg_row_entries == 0 ?
+ WT_ROW_INSERT_SMALLEST(page) :
+ WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1);
+ if (ins_head == NULL ||
+ ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH] == NULL ||
+ ins_head->head[WT_MIN_SPLIT_SKIPLIST_DEPTH] ==
+ ins_head->tail[WT_MIN_SPLIT_SKIPLIST_DEPTH])
+ return (0);
+
+ /* Find the last item in the insert list. */
+ moved_ins = WT_SKIP_LAST(ins_head);
+
+ /*
+ * Only split a page once, otherwise workloads that update in the middle
+ * of the page could continually split without benefit.
+ */
+ if (F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_INSERT))
+ return (0);
+ F_SET_ATOMIC(page, WT_PAGE_SPLIT_INSERT);
+
+ /*
+ * The first page in the split is the current page, but we still need to
+ * create a replacement WT_REF and make a copy of the key (the original
+ * WT_REF is set to split-status and eventually freed).
+ *
+ * The new reference is visible to readers once the split completes.
+ */
+ WT_ERR(__wt_calloc_def(session, 1, &split_ref[0]));
+ child = split_ref[0];
+ *child = *ref;
+ child->state = WT_REF_MEM;
+
+ /*
+ * Copy the first key from the original page into first ref in the new
+ * parent. Pages created in memory always have a "smallest" insert
+ * list, so look there first. If we don't find one, get the first key
+ * from the disk image.
+ *
+ * We can't just use the key from the original ref: it may have been
+ * suffix-compressed, and after the split the truncated key may not be
+ * valid.
+ */
+ WT_ERR(__wt_scr_alloc(session, 0, &key));
+ if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL) {
+ key->data = WT_INSERT_KEY(ins);
+ key->size = WT_INSERT_KEY_SIZE(ins);
+ } else
+ WT_ERR(__wt_row_leaf_key(
+ session, page, &page->pg_row_d[0], key, 1));
+
+ WT_ERR(__wt_row_ikey(
+ session, 0, key->data, key->size, &child->key.ikey));
+ __wt_scr_free(&key);
+
+ /*
+ * The second page in the split is a new WT_REF/page pair.
+ */
+ WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right));
+ WT_ERR(__wt_calloc_def(session, 1, &right->pg_row_ins));
+ WT_ERR(__wt_calloc_def(session, 1, &right->pg_row_ins[0]));
+ WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD));
+ WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *));
+
+ WT_ERR(__wt_calloc_def(session, 1, &split_ref[1]));
+ child = split_ref[1];
+ child->page = right;
+ child->state = WT_REF_MEM;
+ WT_ERR(__wt_row_ikey(session, 0,
+ WT_INSERT_KEY(moved_ins), WT_INSERT_KEY_SIZE(moved_ins),
+ &child->key.ikey));
+ WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
+ WT_MEMSIZE_ADD(parent_incr, sizeof(WT_IKEY));
+ WT_MEMSIZE_ADD(parent_incr, WT_INSERT_KEY_SIZE(moved_ins));
+
+ /* The new page is dirty by definition. */
+ WT_ERR(__wt_page_modify_init(session, right));
+ __wt_page_only_modify_set(session, right);
+
+ /*
+ * We modified the page above, which will have set the first dirty
+ * transaction to the last transaction current running. However, the
+ * updates we are moving may be older than that: inherit the original
+ * page's transaction ID.
+ */
+ right->modify->first_dirty_txn = page->modify->first_dirty_txn;
+
+ /*
+ * Calculate how much memory we're moving: figure out how deep the skip
+ * list stack is for the element we are moving, and the memory used by
+ * the item's list of updates.
+ */
+ for (i = 0; i < WT_SKIP_MAXDEPTH && ins_head->tail[i] == moved_ins; ++i)
+ ;
+ size = ((size_t)i - 1) * sizeof(WT_INSERT *);
+ size += sizeof(WT_INSERT) + WT_INSERT_KEY_SIZE(moved_ins);
+ for (upd = moved_ins->upd; upd != NULL; upd = upd->next)
+ size += sizeof(WT_UPDATE) + upd->size;
+ WT_MEMSIZE_ADD(right_incr, size);
+ WT_MEMSIZE_ADD(page_decr, size);
+ __wt_cache_page_inmem_decr(session, page, page_decr);
+ __wt_cache_page_inmem_incr(session, right, right_incr);
+
+ /*
+ * Allocation operations completed, move the last insert list item from
+ * the original page to the new page.
+ *
+ * First, update the item to the new child page. (Just append the entry
+ * for simplicity, the previous skip list pointers originally allocated
+ * can be ignored.)
+ */
+ right->pg_row_ins[0]->head[0] =
+ right->pg_row_ins[0]->tail[0] = moved_ins;
+
+ /*
+ * Remove the entry from the orig page (i.e truncate the skip list).
+ * Following is an example skip list that might help.
+ *
+ * __
+ * |c3|
+ * |
+ * __ __ __
+ * |a2|--------|c2|--|d2|
+ * | | |
+ * __ __ __ __
+ * |a1|--------|c1|--|d1|--------|f1|
+ * | | | |
+ * __ __ __ __ __ __
+ * |a0|--|b0|--|c0|--|d0|--|e0|--|f0|
+ *
+ * From the above picture.
+ * The head array will be: a0, a1, a2, c3, NULL
+ * The tail array will be: f0, f1, d2, c3, NULL
+ * We are looking for: e1, d2, NULL
+ * If there were no f1, we'd be looking for: e0, NULL
+ * If there were an f2, we'd be looking for: e0, d1, d2, NULL
+ *
+ * The algorithm does:
+ * 1) Start at the top of the head list.
+ * 2) Step down until we find a level that contains more than one
+ * element.
+ * 3) Step across until we reach the tail of the level.
+ * 4) If the tail is the item being moved, remove it.
+ * 5) Drop down a level, and go to step 3 until at level 0.
+ */
+ prev_ins = NULL; /* -Wconditional-uninitialized */
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i];
+ i >= 0;
+ i--, insp--) {
+ /* Level empty, or a single element. */
+ if (ins_head->head[i] == NULL ||
+ ins_head->head[i] == ins_head->tail[i]) {
+ /* Remove if it is the element being moved. */
+ if (ins_head->head[i] == moved_ins)
+ ins_head->head[i] = ins_head->tail[i] = NULL;
+ continue;
+ }
+
+ for (ins = *insp; ins != ins_head->tail[i]; ins = ins->next[i])
+ prev_ins = ins;
+
/*
- * Pages with unresolved changes are not marked clean during
- * reconciliation, do it now.
+ * Update the stack head so that we step down as far to the
+ * right as possible. We know that prev_ins is valid since
+ * levels must contain at least two items to be here.
*/
- if (__wt_page_is_modified(child)) {
- mod->write_gen = 0;
- __wt_cache_dirty_decr(session, child);
+ insp = &prev_ins->next[i];
+ if (ins == moved_ins) {
+ /* Remove the item being moved. */
+ WT_ASSERT(session, ins_head->head[i] != moved_ins);
+ WT_ASSERT(session, prev_ins->next[i] == moved_ins);
+ *insp = NULL;
+ ins_head->tail[i] = prev_ins;
}
- __wt_ref_out(session, &ref_copy);
}
+#ifdef HAVE_DIAGNOSTIC
/*
- * A note on error handling: in the case of evicting a page that has
- * unresolved changes, we just instantiated some in-memory pages that
- * reflect those unresolved changes. The problem is those pages
- * reference the same WT_UPDATE chains as the page we're splitting,
- * that is, we simply copied references into the new pages. If the
- * split fails, the original page is fine, but discarding the created
- * page would free those update chains, and that's wrong. There isn't
- * an easy solution, there's a lot of small memory allocations in some
- * common code paths, and unwinding those changes will be difficult.
- * For now, leak the memory by not discarding the instantiated pages.
+ * Verify the moved insert item appears nowhere on the skip list.
*/
- __wt_free_ref_index(session, NULL, alloc_index, 0);
- if (ref_tmp != NULL) {
- for (i = 0; i < split_entries; ++i)
- __wt_free_ref(session, child, ref_tmp[i], 0);
- __wt_free(session, ref_tmp);
+ for (i = WT_SKIP_MAXDEPTH - 1, insp = &ins_head->head[i];
+ i >= 0;
+ i--, insp--)
+ for (ins = *insp; ins != NULL; ins = ins->next[i])
+ WT_ASSERT(session, ins != moved_ins);
+#endif
+
+ /*
+ * Split into the parent.
+ */
+ if ((ret = __split_parent(
+ session, ref, split_ref, 2, 0, parent_incr, 0, 0)) != 0) {
+ /*
+ * Move the insert list element back to the original page list.
+ * For simplicity, the previous skip list pointers originally
+ * allocated can be ignored, just append the entry to the end of
+ * the level 0 list. As before, we depend on the list having
+ * multiple elements and ignore the edge cases small lists have.
+ */
+ right->pg_row_ins[0]->head[0] =
+ right->pg_row_ins[0]->tail[0] = NULL;
+ ins_head->tail[0]->next[0] = moved_ins;
+ ins_head->tail[0] = moved_ins;
+
+ /*
+ * We marked the new page dirty; we're going to discard it, but
+ * first mark it clean and fix up the cache statistics.
+ */
+ right->modify->write_gen = 0;
+ __wt_cache_dirty_decr(session, right);
+
+ WT_ERR(ret);
}
/*
+ * Save the transaction ID when the split happened. Application
+ * threads will not try to forcibly evict the page again until
+ * all concurrent transactions commit.
+ */
+ page->modify->inmem_split_txn = __wt_txn_new_id(session);
+
+ /* Let our caller know that we split. */
+ *splitp = 1;
+
+ WT_STAT_FAST_CONN_INCR(session, cache_inmem_split);
+ WT_STAT_FAST_DATA_INCR(session, cache_inmem_split);
+
+ /*
+ * We may not be able to immediately free the page's original WT_REF
+ * structure and instantiated key, there may be threads using them.
+ * Add them to the session discard list, to be freed once we know it's
+ * safe.
+ *
+ * After the split, we're going to discard the WT_REF, account for the
+ * change in memory footprint. Row store pages have keys that may be
+ * instantiated, check for that.
+ */
+ if ((page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT) &&
+ (ikey = __wt_ref_key_instantiated(ref)) != NULL)
+ WT_TRET(__split_safe_free(
+ session, 0, ikey, sizeof(WT_IKEY) + ikey->size));
+ WT_TRET(__split_safe_free(session, 0, ref, sizeof(WT_REF)));
+
+ /*
* A note on error handling: if we completed the split, return success,
- * nothing really bad can have happened.
+ * nothing really bad can have happened, and our caller has to proceed
+ * with the split.
*/
- return (ret == WT_PANIC || !complete ? ret : 0);
+ if (ret != 0 && ret != WT_PANIC)
+ __wt_err(session, ret,
+ "ignoring not-fatal error during insert page split");
+ return (ret == WT_PANIC ? WT_PANIC : 0);
+
+err: if (split_ref[0] != NULL) {
+ __wt_free(session, split_ref[0]->key.ikey);
+ __wt_free(session, split_ref[0]);
+ }
+ if (split_ref[1] != NULL) {
+ __wt_free(session, split_ref[1]->key.ikey);
+ __wt_free(session, split_ref[1]);
+ }
+ if (right != NULL)
+ __wt_page_out(session, &right);
+ __wt_scr_free(&key);
+ return (ret);
}
/*
- * __split_evict_single --
- * Resolve a single page split, replacing a page with a new version.
+ * __wt_split_rewrite --
+ * Resolve a failed reconciliation by replacing a page with a new version.
*/
-static int
-__split_evict_single(WT_SESSION_IMPL *session, WT_REF *ref)
+int
+__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
@@ -1073,17 +1335,28 @@ __split_evict_single(WT_SESSION_IMPL *session, WT_REF *ref)
page = ref->page;
mod = page->modify;
- /* Build the new page. */
+ /*
+ * This isn't a split: a reconciliation failed because we couldn't write
+ * something, and in the case of forced eviction, we need to stop this
+ * page from being such a problem. We have exclusive access, rewrite the
+ * page in memory. The code lives here because the split code knows how
+ * to re-create a page in memory after it's been reconciled, and that's
+ * exactly what we want to do.
+ *
+ * Build the new page.
+ */
memset(&new, 0, sizeof(new));
- WT_RET(__split_inmem_build(session, page, &new, &mod->mod_multi[0]));
+ WT_RET(__split_multi_inmem(session, page, &new, &mod->mod_multi[0]));
/*
- * Discard the original page. Pages with unresolved changes are not
- * marked clean during reconciliation, do it now.
+ * Discard the original page.
+ *
+ * Pages with unresolved changes are not marked clean during
+ * reconciliation, do it now.
*/
mod->write_gen = 0;
__wt_cache_dirty_decr(session, page);
- __wt_page_out(session, &page);
+ __wt_ref_out(session, ref);
/* Swap the new page into place. */
ref->page = new.page;
@@ -1093,29 +1366,100 @@ __split_evict_single(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
- * __wt_split_evict --
+ * __wt_split_multi --
* Resolve a page split.
*/
int
-__wt_split_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
+__wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
{
- uint32_t split_entries;
+ WT_IKEY *ikey;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ WT_PAGE_MODIFY *mod;
+ WT_REF **ref_new;
+ size_t ikey_size, parent_decr, parent_incr;
+ uint32_t i, new_entries;
+
+ page = ref->page;
+ mod = page->modify;
+ new_entries = mod->mod_multi_entries;
+
+ ikey = NULL;
+ ikey_size = parent_decr = parent_incr = 0;
+
+ /*
+ * Convert the split page's multiblock reconciliation information into
+ * an array of page reference structures.
+ */
+ WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
+ for (i = 0; i < new_entries; ++i)
+ WT_ERR(__wt_multi_to_ref(session,
+ page, &mod->mod_multi[i], &ref_new[i], &parent_incr));
+
+ /*
+ * After the split, we're going to discard the WT_REF, account for the
+ * change in memory footprint. Row store pages have keys that may be
+ * instantiated, check for that.
+ */
+ if ((page->type == WT_PAGE_ROW_LEAF || page->type == WT_PAGE_ROW_INT) &&
+ (ikey = __wt_ref_key_instantiated(ref)) != NULL) {
+ ikey_size = sizeof(WT_IKEY) + ikey->size;
+ WT_MEMSIZE_ADD(parent_decr, ikey_size);
+ }
+ WT_MEMSIZE_ADD(parent_decr, sizeof(WT_REF));
+
+ /* Split into the parent. */
+ WT_ERR(__split_parent(session,
+ ref, ref_new, new_entries, parent_decr, parent_incr, exclusive, 1));
+
+ __wt_free(session, ref_new);
/*
- * There are two cases entering this code. First, an in-memory page that
- * got too large, we forcibly evicted it, and there wasn't anything to
- * write. (Imagine two threads updating a small set keys on a leaf page.
- * The page is too large so we try to evict it, but after reconciliation
- * there's only a small amount of data (so it's a single page we can't
- * split), and because there are two threads, there's some data we can't
- * write (so we can't evict it). In that case, we take advantage of the
- * fact we have exclusive access to the page and rewrite it in memory.)
+ * The split succeeded, discard the page.
*
- * Second, a real split where we reconciled a page and it turned into a
- * lot of pages.
+ * Pages with unresolved changes are not marked clean during
+ * reconciliation, do it now.
+ */
+ if (__wt_page_is_modified(page)) {
+ mod->write_gen = 0;
+ __wt_cache_dirty_decr(session, page);
+ }
+ __wt_ref_out(session, ref);
+
+ /*
+ * We may not be able to immediately free the page's original WT_REF
+ * structure and instantiated key, there may be threads using them.
+ * Add them to the session discard list, to be freed once we know it's
+ * safe.
+ */
+ if (ikey != NULL)
+ WT_TRET(__split_safe_free(session, exclusive, ikey, ikey_size));
+ WT_TRET(__split_safe_free(session, exclusive, ref, sizeof(WT_REF)));
+
+ /*
+ * A note on error handling: if we completed the split, return success,
+ * nothing really bad can have happened, and our caller has to proceed
+ * with the split.
*/
- split_entries = ref->page->modify->mod_multi_entries;
- return (split_entries == 1 ?
- __split_evict_single(session, ref) :
- __split_evict_multi(session, ref, exclusive));
+ if (ret != 0 && ret != WT_PANIC)
+ __wt_err(session, ret,
+ "ignoring not-fatal error during multi-page split");
+ return (ret == WT_PANIC ? WT_PANIC : 0);
+
+err: /*
+ * A note on error handling: in the case of evicting a page that has
+ * unresolved changes, we just instantiated some in-memory pages that
+ * reflect those unresolved changes. The problem is those pages
+ * reference the same WT_UPDATE chains as the page we're splitting,
+ * that is, we simply copied references into the new pages. If the
+ * split fails, the original page is fine, but discarding the created
+ * page would free those update chains, and that's wrong. There isn't
+ * an easy solution, there's a lot of small memory allocations in some
+ * common code paths, and unwinding those changes will be difficult.
+ * For now, leak the memory by not discarding the instantiated pages.
+ */
+ for (i = 0; i < new_entries; ++i)
+ __wt_free_ref(session, page, ref_new[i], 0);
+ __wt_free(session, ref_new);
+ return (ret);
}
diff --git a/src/third_party/wiredtiger/src/btree/rec_write.c b/src/third_party/wiredtiger/src/btree/rec_write.c
index 46f6ed92aae..c72447ae841 100644
--- a/src/third_party/wiredtiger/src/btree/rec_write.c
+++ b/src/third_party/wiredtiger/src/btree/rec_write.c
@@ -331,11 +331,11 @@ static int __rec_dictionary_lookup(
static void __rec_dictionary_reset(WT_RECONCILE *);
/*
- * __wt_rec_write --
+ * __wt_reconcile --
* Reconcile an in-memory page into its on-disk format, and write it.
*/
int
-__wt_rec_write(WT_SESSION_IMPL *session,
+__wt_reconcile(WT_SESSION_IMPL *session,
WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags)
{
WT_CONNECTION_IMPL *conn;
@@ -523,7 +523,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
* Fake up a reference structure, and write the next root page.
*/
__wt_root_ref_init(&fake_ref, next, page->type == WT_PAGE_COL_INT);
- return (__wt_rec_write(session, &fake_ref, NULL, flags));
+ return (__wt_reconcile(session, &fake_ref, NULL, flags));
err: __wt_page_out(session, &next);
return (ret);
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index caa2a938954..e8428b76691 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -465,8 +465,7 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
if (bulk)
__wt_spin_lock(
session, &S2C(session)->checkpoint_lock);
- ret = __wt_session_get_btree_ckpt(
- session, uri, cfg, flags);
+ ret = __wt_session_get_btree_ckpt(session, uri, cfg, flags);
if (bulk)
__wt_spin_unlock(
session, &S2C(session)->checkpoint_lock);
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 0c4fe876e5e..082bf9fa9d0 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -187,6 +187,9 @@ struct __wt_page_modify {
/* The largest update transaction ID (approximate). */
uint64_t update_txn;
+ /* In-memory split transaction ID. */
+ uint64_t inmem_split_txn;
+
/* Dirty bytes added to the cache. */
uint64_t bytes_dirty;
@@ -549,7 +552,8 @@ struct __wt_page {
#define WT_PAGE_DISK_MAPPED 0x04 /* Disk image in mapped memory */
#define WT_PAGE_EVICT_LRU 0x08 /* Page is on the LRU queue */
#define WT_PAGE_SCANNING 0x10 /* Obsolete updates are being scanned */
-#define WT_PAGE_SPLITTING 0x20 /* An internal page is growing. */
+#define WT_PAGE_SPLITTING 0x20 /* An internal page is growing */
+#define WT_PAGE_SPLIT_INSERT 0x40 /* A leaf page was split for append */
uint8_t flags_atomic; /* Atomic flags, use F_*_ATOMIC */
};
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index b7957e6647f..464b491c480 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -937,13 +937,15 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
* Skip this if eviction is disabled for this operation or this tree,
* or if there is no chance of eviction succeeding for dirty pages due
* to a checkpoint or because we've already tried writing this page and
- * it contains an update that isn't stable.
+ * it contains an update that isn't stable. Also skip forced eviction
+ * if we just did an in-memory split.
*/
if (LF_ISSET(WT_READ_NO_EVICT) ||
page->read_gen != WT_READGEN_OLDEST ||
F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
(__wt_page_is_modified(page) && (btree->checkpointing ||
- !__wt_txn_visible_all(session, page->modify->first_dirty_txn))))
+ !__wt_txn_visible_all(session, page->modify->first_dirty_txn) ||
+ !__wt_txn_visible_all(session, page->modify->inmem_split_txn))))
return (__wt_hazard_clear(session, page));
/*
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index d6b952a3841..8ecb81d638a 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -163,11 +163,14 @@ extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *addr, WT_ITEM *
extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags);
extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd, int is_remove);
extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt);
-extern int __wt_rec_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
+extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
+extern void __wt_rec_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref);
extern void __wt_split_stash_discard(WT_SESSION_IMPL *session);
extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session);
extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp);
-extern int __wt_split_evict(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
+extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp);
+extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref);
+extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive);
extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell);
extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_ovfl_reuse_search(WT_SESSION_IMPL *session, WT_PAGE *page, uint8_t **addrp, size_t *addr_sizep, const void *value, size_t value_size);
@@ -178,7 +181,7 @@ extern int __wt_ovfl_txnc_add(WT_SESSION_IMPL *session, WT_PAGE *page, const uin
extern void __wt_ovfl_txnc_free(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page);
extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page);
-extern int __wt_rec_write(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags);
+extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags);
extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index ee2baa9037b..879394e2cc5 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -156,6 +156,7 @@ struct __wt_connection_stats {
WT_STATS cache_bytes_max;
WT_STATS cache_bytes_read;
WT_STATS cache_bytes_write;
+ WT_STATS cache_eviction_app;
WT_STATS cache_eviction_checkpoint;
WT_STATS cache_eviction_clean;
WT_STATS cache_eviction_deepen;
@@ -172,6 +173,7 @@ struct __wt_connection_stats {
WT_STATS cache_eviction_slow;
WT_STATS cache_eviction_split;
WT_STATS cache_eviction_walk;
+ WT_STATS cache_inmem_split;
WT_STATS cache_pages_dirty;
WT_STATS cache_pages_inuse;
WT_STATS cache_read;
@@ -290,6 +292,7 @@ struct __wt_dsrc_stats {
WT_STATS cache_eviction_fail;
WT_STATS cache_eviction_hazard;
WT_STATS cache_eviction_internal;
+ WT_STATS cache_inmem_split;
WT_STATS cache_overflow_value;
WT_STATS cache_read;
WT_STATS cache_read_overflow;
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index a911ec1acdb..6edbe55197e 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -3104,180 +3104,184 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_CACHE_BYTES_READ 1023
/*! cache: bytes written from cache */
#define WT_STAT_CONN_CACHE_BYTES_WRITE 1024
+/*! cache: pages evicted by application threads */
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1025
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1025
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1026
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1026
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1027
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1027
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1028
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1028
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1029
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1029
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1030
/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1030
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1031
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1031
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1032
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1032
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1033
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1033
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1034
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1034
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1035
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1035
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1036
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1036
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1037
/*! cache: eviction server populating queue, but not evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1037
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1038
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1038
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1039
/*! cache: pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1039
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT 1040
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1040
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1041
+/*! cache: in-memory page splits */
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1042
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1041
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1043
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1042
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1044
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1043
+#define WT_STAT_CONN_CACHE_READ 1045
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1044
+#define WT_STAT_CONN_CACHE_WRITE 1046
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1045
-/*! btree: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1046
-/*! btree: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1047
-/*! btree: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1048
-/*! btree: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1049
-/*! btree: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1050
-/*! btree: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1051
-/*! btree: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1052
-/*! btree: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1053
-/*! btree: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1054
+#define WT_STAT_CONN_COND_WAIT 1047
+/*! cursor: cursor create calls */
+#define WT_STAT_CONN_CURSOR_CREATE 1048
+/*! cursor: cursor insert calls */
+#define WT_STAT_CONN_CURSOR_INSERT 1049
+/*! cursor: cursor next calls */
+#define WT_STAT_CONN_CURSOR_NEXT 1050
+/*! cursor: cursor prev calls */
+#define WT_STAT_CONN_CURSOR_PREV 1051
+/*! cursor: cursor remove calls */
+#define WT_STAT_CONN_CURSOR_REMOVE 1052
+/*! cursor: cursor reset calls */
+#define WT_STAT_CONN_CURSOR_RESET 1053
+/*! cursor: cursor search calls */
+#define WT_STAT_CONN_CURSOR_SEARCH 1054
+/*! cursor: cursor search near calls */
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1055
+/*! cursor: cursor update calls */
+#define WT_STAT_CONN_CURSOR_UPDATE 1056
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1055
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1057
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1056
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1058
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1057
+#define WT_STAT_CONN_FILE_OPEN 1059
/*! log: log buffer size increases */
-#define WT_STAT_CONN_LOG_BUFFER_GROW 1058
+#define WT_STAT_CONN_LOG_BUFFER_GROW 1060
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1059
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1061
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1060
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1062
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1061
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1063
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1062
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1064
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1063
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1065
/*! log: log read operations */
-#define WT_STAT_CONN_LOG_READS 1064
+#define WT_STAT_CONN_LOG_READS 1066
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1065
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1067
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1066
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1068
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1067
+#define WT_STAT_CONN_LOG_SCANS 1069
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1068
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1070
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1069
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1071
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1070
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1072
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1071
+#define WT_STAT_CONN_LOG_SLOT_RACES 1073
/*! log: slots selected for switching that were unavailable */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1072
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_FAILS 1074
/*! log: record size exceeded maximum */
-#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1073
+#define WT_STAT_CONN_LOG_SLOT_TOOBIG 1075
/*! log: failed to find a slot large enough for record */
-#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1074
+#define WT_STAT_CONN_LOG_SLOT_TOOSMALL 1076
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1075
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1077
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1076
+#define WT_STAT_CONN_LOG_SYNC 1078
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1077
+#define WT_STAT_CONN_LOG_WRITES 1079
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1078
+#define WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE 1080
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1079
+#define WT_STAT_CONN_LSM_MERGE_THROTTLE 1081
/*! LSM: rows merged in an LSM tree */
-#define WT_STAT_CONN_LSM_ROWS_MERGED 1080
+#define WT_STAT_CONN_LSM_ROWS_MERGED 1082
/*! LSM: application work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1081
+#define WT_STAT_CONN_LSM_WORK_QUEUE_APP 1083
/*! LSM: merge work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1082
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MANAGER 1084
/*! LSM: tree queue hit maximum */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1083
+#define WT_STAT_CONN_LSM_WORK_QUEUE_MAX 1085
/*! LSM: switch work units currently queued */
-#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1084
+#define WT_STAT_CONN_LSM_WORK_QUEUE_SWITCH 1086
/*! LSM: tree maintenance operations scheduled */
-#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1085
+#define WT_STAT_CONN_LSM_WORK_UNITS_CREATED 1087
/*! LSM: tree maintenance operations discarded */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1086
+#define WT_STAT_CONN_LSM_WORK_UNITS_DISCARDED 1088
/*! LSM: tree maintenance operations executed */
-#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1087
+#define WT_STAT_CONN_LSM_WORK_UNITS_DONE 1089
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1088
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1090
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1089
+#define WT_STAT_CONN_MEMORY_FREE 1091
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1090
+#define WT_STAT_CONN_MEMORY_GROW 1092
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1091
+#define WT_STAT_CONN_READ_IO 1093
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1092
+#define WT_STAT_CONN_REC_PAGES 1094
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1093
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1095
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1094
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1096
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1095
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1097
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1096
+#define WT_STAT_CONN_RWLOCK_READ 1098
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1097
+#define WT_STAT_CONN_RWLOCK_WRITE 1099
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1098
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1100
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1099
+#define WT_STAT_CONN_SESSION_OPEN 1101
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1100
+#define WT_STAT_CONN_TXN_BEGIN 1102
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1101
+#define WT_STAT_CONN_TXN_CHECKPOINT 1103
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1102
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1104
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1103
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1105
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1104
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1106
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1105
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1107
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1106
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1108
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1107
+#define WT_STAT_CONN_TXN_COMMIT 1109
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1108
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1110
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1109
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1111
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1110
+#define WT_STAT_CONN_TXN_ROLLBACK 1112
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1111
+#define WT_STAT_CONN_WRITE_IO 1113
/*!
* @}
@@ -3365,98 +3369,100 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2038
/*! cache: internal pages evicted */
#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2039
+/*! cache: in-memory page splits */
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2040
/*! cache: overflow values cached in memory */
-#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2040
+#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2041
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2041
+#define WT_STAT_DSRC_CACHE_READ 2042
/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2042
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2043
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2043
+#define WT_STAT_DSRC_CACHE_WRITE 2044
/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2044
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2045
/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2045
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2046
/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2046
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2047
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2047
+#define WT_STAT_DSRC_COMPRESS_READ 2048
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2048
+#define WT_STAT_DSRC_COMPRESS_WRITE 2049
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2049
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2050
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2050
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2051
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2051
+#define WT_STAT_DSRC_CURSOR_CREATE 2052
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2052
+#define WT_STAT_DSRC_CURSOR_INSERT 2053
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2053
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2054
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2054
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2055
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2055
+#define WT_STAT_DSRC_CURSOR_NEXT 2056
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2056
+#define WT_STAT_DSRC_CURSOR_PREV 2057
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2057
+#define WT_STAT_DSRC_CURSOR_REMOVE 2058
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2058
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2059
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2059
+#define WT_STAT_DSRC_CURSOR_RESET 2060
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2060
+#define WT_STAT_DSRC_CURSOR_SEARCH 2061
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2061
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2062
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2062
+#define WT_STAT_DSRC_CURSOR_UPDATE 2063
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2063
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2064
/*! LSM: sleep for LSM checkpoint throttle */
-#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2064
+#define WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE 2065
/*! LSM: chunks in the LSM tree */
-#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2065
+#define WT_STAT_DSRC_LSM_CHUNK_COUNT 2066
/*! LSM: highest merge generation in the LSM tree */
-#define WT_STAT_DSRC_LSM_GENERATION_MAX 2066
+#define WT_STAT_DSRC_LSM_GENERATION_MAX 2067
/*! LSM: queries that could have benefited from a Bloom filter that did
* not exist */
-#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2067
+#define WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM 2068
/*! LSM: sleep for LSM merge throttle */
-#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2068
+#define WT_STAT_DSRC_LSM_MERGE_THROTTLE 2069
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2069
+#define WT_STAT_DSRC_REC_DICTIONARY 2070
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2070
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2071
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2071
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2072
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2072
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2073
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2073
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2074
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2074
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2075
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2075
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2076
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2076
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2077
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2077
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2078
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2078
+#define WT_STAT_DSRC_REC_PAGES 2079
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2079
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2080
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2080
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2081
/*! reconciliation: internal page key bytes discarded using suffix
* compression */
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2081
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2082
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2082
+#define WT_STAT_DSRC_SESSION_COMPACT 2083
/*! session: open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2083
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2084
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2084
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2085
/*! @} */
/*
* Statistics section: END
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_manager.c b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
index 71e4724f91c..5cdc4e0783e 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_manager.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_manager.c
@@ -406,23 +406,28 @@ static int
__lsm_manager_run_server(WT_SESSION_IMPL *session)
{
WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
WT_LSM_TREE *lsm_tree;
struct timespec now;
uint64_t fillms, pushms;
+ int dhandle_locked;
conn = S2C(session);
+ dhandle_locked = 0;
+
while (F_ISSET(conn, WT_CONN_SERVER_RUN)) {
- if (TAILQ_EMPTY(&conn->lsmqh)) {
- __wt_sleep(0, 10000);
- continue;
- }
__wt_sleep(0, 10000);
+ if (TAILQ_EMPTY(&conn->lsmqh))
+ continue;
+ __wt_spin_lock(session, &conn->dhandle_lock);
+ F_SET(session, WT_SESSION_HANDLE_LIST_LOCKED);
+ dhandle_locked = 1;
TAILQ_FOREACH(lsm_tree, &S2C(session)->lsmqh, q) {
if (!F_ISSET(lsm_tree, WT_LSM_TREE_ACTIVE))
continue;
- WT_RET(__lsm_manager_aggressive_update(
+ WT_ERR(__lsm_manager_aggressive_update(
session, lsm_tree));
- WT_RET(__wt_epoch(session, &now));
+ WT_ERR(__wt_epoch(session, &now));
pushms = lsm_tree->work_push_ts.tv_sec == 0 ? 0 :
WT_TIMEDIFF(
now, lsm_tree->work_push_ts) / WT_MILLION;
@@ -453,15 +458,15 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
(lsm_tree->merge_aggressiveness > 3 &&
!F_ISSET(lsm_tree, WT_LSM_TREE_COMPACTING)) ||
pushms > fillms) {
- WT_RET(__wt_lsm_manager_push_entry(
+ WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_SWITCH, 0, lsm_tree));
- WT_RET(__wt_lsm_manager_push_entry(
+ WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_DROP, 0, lsm_tree));
- WT_RET(__wt_lsm_manager_push_entry(
+ WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
- WT_RET(__wt_lsm_manager_push_entry(
+ WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_BLOOM, 0, lsm_tree));
- WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ WT_ERR(__wt_verbose(session, WT_VERB_LSM,
"MGR %s: queue %d mod %d nchunks %d"
" flags 0x%x aggressive %d pushms %" PRIu64
" fillms %" PRIu64,
@@ -470,13 +475,20 @@ __lsm_manager_run_server(WT_SESSION_IMPL *session)
lsm_tree->flags,
lsm_tree->merge_aggressiveness,
pushms, fillms));
- WT_RET(__wt_lsm_manager_push_entry(
+ WT_ERR(__wt_lsm_manager_push_entry(
session, WT_LSM_WORK_MERGE, 0, lsm_tree));
}
}
+ __wt_spin_unlock(session, &conn->dhandle_lock);
+ F_CLR(session, WT_SESSION_HANDLE_LIST_LOCKED);
+ dhandle_locked = 0;
}
- return (0);
+err: if (dhandle_locked) {
+ __wt_spin_unlock(session, &conn->dhandle_lock);
+ F_CLR(session, WT_SESSION_HANDLE_LIST_LOCKED);
+ }
+ return (ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index 077b5564276..888f12bdd94 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -786,7 +786,7 @@ int
__wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
{
WT_DECL_RET;
- WT_LSM_CHUNK *chunk;
+ WT_LSM_CHUNK *chunk, *last_chunk;
uint32_t nchunks, new_id;
int first_switch;
@@ -795,21 +795,18 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
nchunks = lsm_tree->nchunks;
first_switch = nchunks == 0 ? 1 : 0;
+
/*
* Check if a switch is still needed: we may have raced while waiting
* for a lock.
*/
- chunk = NULL;
+ last_chunk = NULL;
if (!first_switch &&
- (chunk = lsm_tree->chunk[nchunks - 1]) != NULL &&
- !F_ISSET(chunk, WT_LSM_CHUNK_ONDISK) &&
+ (last_chunk = lsm_tree->chunk[nchunks - 1]) != NULL &&
+ !F_ISSET(last_chunk, WT_LSM_CHUNK_ONDISK) &&
!F_ISSET(lsm_tree, WT_LSM_TREE_NEED_SWITCH))
goto err;
- /* Set the switch transaction in the previous chunk, if necessary. */
- if (chunk != NULL && chunk->switch_txn == WT_TXN_NONE)
- chunk->switch_txn = __wt_txn_new_id(session);
-
/* Update the throttle time. */
__wt_lsm_tree_throttle(session, lsm_tree, 0);
@@ -835,6 +832,10 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
lsm_tree->modified = 1;
+ /* Set the switch transaction in the previous chunk, if necessary. */
+ if (last_chunk != NULL && last_chunk->switch_txn == WT_TXN_NONE)
+ last_chunk->switch_txn = __wt_txn_new_id(session);
+
err: WT_TRET(__wt_lsm_tree_writeunlock(session, lsm_tree));
/*
* Errors that happen during a tree switch leave the tree in a state
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 60c28a3cc06..c27b7edb234 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -69,9 +69,12 @@ int
__wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree, int force, WT_LSM_CHUNK **chunkp)
{
+ WT_DECL_RET;
+ WT_LSM_CHUNK *chunk;
u_int i, end;
*chunkp = NULL;
+ chunk = NULL;
WT_ASSERT(session, lsm_tree->queue_ref > 0);
WT_RET(__wt_lsm_tree_readlock(session, lsm_tree));
@@ -86,29 +89,43 @@ __wt_lsm_get_chunk_to_flush(WT_SESSION_IMPL *session,
end = force ? lsm_tree->nchunks : lsm_tree->nchunks - 1;
for (i = 0; i < end; i++) {
if (!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK) ||
- (*chunkp == NULL &&
+ (chunk == NULL &&
!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_STABLE) &&
!lsm_tree->chunk[i]->evicted)) {
- (void)WT_ATOMIC_ADD4(lsm_tree->chunk[i]->refcnt, 1);
- WT_RET(__wt_verbose(session, WT_VERB_LSM,
+ chunk = lsm_tree->chunk[i];
+ (void)WT_ATOMIC_ADD4(chunk->refcnt, 1);
+ WT_ERR(__wt_verbose(session, WT_VERB_LSM,
"Flush%s: return chunk %u of %u: %s",
- force ? " w/ force" : "", i, end - 1,
- lsm_tree->chunk[i]->uri));
- *chunkp = lsm_tree->chunk[i];
+ force ? " w/ force" : "", i, end - 1, chunk->uri));
+
/*
- * Discards are opportunistic, flip a coin to decide
- * whether to try, but take the first real flush we
- * find.
+ * If retrying a discard push an additional work unit
+ * so there are enough to trigger checkpoints.
*/
- if (!F_ISSET(lsm_tree->chunk[i], WT_LSM_CHUNK_ONDISK) ||
- __wt_random(session->rnd) & 1)
- break;
+ if (F_ISSET(chunk, WT_LSM_CHUNK_ONDISK)) {
+ /*
+ * Don't be overly zealous about pushing old
+ * chunks from cache. Attempting too many drops
+ * can interfere with checkpoints.
+ */
+ if (__wt_random(session->rnd) & 1) {
+ (void)WT_ATOMIC_SUB4(chunk->refcnt, 1);
+ chunk = NULL;
+ continue;
+ }
+ WT_ERR(__wt_lsm_manager_push_entry(
+ session, WT_LSM_WORK_FLUSH, 0, lsm_tree));
+ }
+ break;
}
}
+err: if (ret != 0 && chunk != NULL)
+ (void)WT_ATOMIC_SUB4(chunk->refcnt, 1);
WT_RET(__wt_lsm_tree_readunlock(session, lsm_tree));
- return (0);
+ *chunkp = chunk;
+ return (ret);
}
/*
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 2b53746bb46..ac0b854c9df 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -50,6 +50,7 @@ __wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats)
"cache: data source pages selected for eviction unable to be evicted";
stats->cache_eviction_hazard.desc =
"cache: hazard pointer blocked page eviction";
+ stats->cache_inmem_split.desc = "cache: in-memory page splits";
stats->cache_eviction_internal.desc = "cache: internal pages evicted";
stats->cache_eviction_dirty.desc = "cache: modified pages evicted";
stats->cache_read_overflow.desc =
@@ -168,6 +169,7 @@ __wt_stat_refresh_dsrc_stats(void *stats_arg)
stats->cache_eviction_checkpoint.v = 0;
stats->cache_eviction_fail.v = 0;
stats->cache_eviction_hazard.v = 0;
+ stats->cache_inmem_split.v = 0;
stats->cache_eviction_internal.v = 0;
stats->cache_eviction_dirty.v = 0;
stats->cache_read_overflow.v = 0;
@@ -253,6 +255,7 @@ __wt_stat_aggregate_dsrc_stats(const void *child, const void *parent)
p->cache_eviction_checkpoint.v += c->cache_eviction_checkpoint.v;
p->cache_eviction_fail.v += c->cache_eviction_fail.v;
p->cache_eviction_hazard.v += c->cache_eviction_hazard.v;
+ p->cache_inmem_split.v += c->cache_inmem_split.v;
p->cache_eviction_internal.v += c->cache_eviction_internal.v;
p->cache_eviction_dirty.v += c->cache_eviction_dirty.v;
p->cache_read_overflow.v += c->cache_read_overflow.v;
@@ -341,15 +344,6 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
stats->block_byte_write.desc = "block-manager: bytes written";
stats->block_map_read.desc = "block-manager: mapped blocks read";
stats->block_byte_map_read.desc = "block-manager: mapped bytes read";
- stats->cursor_create.desc = "btree: cursor create calls";
- stats->cursor_insert.desc = "btree: cursor insert calls";
- stats->cursor_next.desc = "btree: cursor next calls";
- stats->cursor_prev.desc = "btree: cursor prev calls";
- stats->cursor_remove.desc = "btree: cursor remove calls";
- stats->cursor_reset.desc = "btree: cursor reset calls";
- stats->cursor_search.desc = "btree: cursor search calls";
- stats->cursor_search_near.desc = "btree: cursor search near calls";
- stats->cursor_update.desc = "btree: cursor update calls";
stats->cache_bytes_inuse.desc = "cache: bytes currently in the cache";
stats->cache_bytes_read.desc = "cache: bytes read into cache";
stats->cache_bytes_write.desc = "cache: bytes written from cache";
@@ -369,6 +363,7 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"cache: failed eviction of pages that exceeded the in-memory maximum";
stats->cache_eviction_hazard.desc =
"cache: hazard pointer blocked page eviction";
+ stats->cache_inmem_split.desc = "cache: in-memory page splits";
stats->cache_eviction_internal.desc = "cache: internal pages evicted";
stats->cache_bytes_max.desc = "cache: maximum bytes configured";
stats->cache_eviction_dirty.desc = "cache: modified pages evicted";
@@ -378,6 +373,8 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"cache: pages currently held in the cache";
stats->cache_eviction_force.desc =
"cache: pages evicted because they exceeded the in-memory maximum";
+ stats->cache_eviction_app.desc =
+ "cache: pages evicted by application threads";
stats->cache_read.desc = "cache: pages read into cache";
stats->cache_eviction_fail.desc =
"cache: pages selected for eviction unable to be evicted";
@@ -402,6 +399,15 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
"connection: pthread mutex shared lock write-lock calls";
stats->read_io.desc = "connection: total read I/Os";
stats->write_io.desc = "connection: total write I/Os";
+ stats->cursor_create.desc = "cursor: cursor create calls";
+ stats->cursor_insert.desc = "cursor: cursor insert calls";
+ stats->cursor_next.desc = "cursor: cursor next calls";
+ stats->cursor_prev.desc = "cursor: cursor prev calls";
+ stats->cursor_remove.desc = "cursor: cursor remove calls";
+ stats->cursor_reset.desc = "cursor: cursor reset calls";
+ stats->cursor_search.desc = "cursor: cursor search calls";
+ stats->cursor_search_near.desc = "cursor: cursor search near calls";
+ stats->cursor_update.desc = "cursor: cursor update calls";
stats->dh_session_handles.desc = "data-handle: session dhandles swept";
stats->dh_session_sweeps.desc = "data-handle: session sweep attempts";
stats->log_slot_closes.desc = "log: consolidated slot closures";
@@ -501,15 +507,6 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->block_byte_write.v = 0;
stats->block_map_read.v = 0;
stats->block_byte_map_read.v = 0;
- stats->cursor_create.v = 0;
- stats->cursor_insert.v = 0;
- stats->cursor_next.v = 0;
- stats->cursor_prev.v = 0;
- stats->cursor_remove.v = 0;
- stats->cursor_reset.v = 0;
- stats->cursor_search.v = 0;
- stats->cursor_search_near.v = 0;
- stats->cursor_update.v = 0;
stats->cache_bytes_read.v = 0;
stats->cache_bytes_write.v = 0;
stats->cache_eviction_checkpoint.v = 0;
@@ -520,10 +517,12 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->cache_eviction_slow.v = 0;
stats->cache_eviction_force_fail.v = 0;
stats->cache_eviction_hazard.v = 0;
+ stats->cache_inmem_split.v = 0;
stats->cache_eviction_internal.v = 0;
stats->cache_eviction_dirty.v = 0;
stats->cache_eviction_deepen.v = 0;
stats->cache_eviction_force.v = 0;
+ stats->cache_eviction_app.v = 0;
stats->cache_read.v = 0;
stats->cache_eviction_fail.v = 0;
stats->cache_eviction_split.v = 0;
@@ -540,6 +539,15 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
stats->rwlock_write.v = 0;
stats->read_io.v = 0;
stats->write_io.v = 0;
+ stats->cursor_create.v = 0;
+ stats->cursor_insert.v = 0;
+ stats->cursor_next.v = 0;
+ stats->cursor_prev.v = 0;
+ stats->cursor_remove.v = 0;
+ stats->cursor_reset.v = 0;
+ stats->cursor_search.v = 0;
+ stats->cursor_search_near.v = 0;
+ stats->cursor_update.v = 0;
stats->dh_session_handles.v = 0;
stats->dh_session_sweeps.v = 0;
stats->log_slot_closes.v = 0;
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 228b2919a39..9254692ea93 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -706,17 +706,18 @@ __checkpoint_worker(
if (F_ISSET(ckpt, WT_CKPT_DELETE))
++deleted;
/*
- * Complicated test: if we only deleted a single checkpoint, and
- * it was the last checkpoint in the object, and it has the same
- * name as the checkpoint we're taking (correcting for internal
- * checkpoint names with their generational suffix numbers), we
- * can skip the checkpoint, there's nothing to do.
+ * Complicated test: if the last checkpoint in the object has
+ * the same name as the checkpoint we're taking (correcting for
+ * internal checkpoint names with their generational suffix
+ * numbers), we can skip the checkpoint, there's nothing to do.
+ * The exception is if we're deleting two or more checkpoints:
+ * then we may save space.
*/
- if (deleted == 1 &&
- F_ISSET(ckpt - 1, WT_CKPT_DELETE) &&
+ if (ckpt > ckptbase &&
(strcmp(name, (ckpt - 1)->name) == 0 ||
(WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
- WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))))
+ WT_PREFIX_MATCH((ckpt - 1)->name, WT_CHECKPOINT))) &&
+ deleted < 2)
goto done;
}
diff --git a/src/third_party/wiredtiger/tools/stat_data.py b/src/third_party/wiredtiger/tools/stat_data.py
index 56218f497b7..e596fc014e7 100644
--- a/src/third_party/wiredtiger/tools/stat_data.py
+++ b/src/third_party/wiredtiger/tools/stat_data.py
@@ -72,3 +72,19 @@ no_clear_list = [
'transaction: transaction range of IDs currently pinned',
'session: open cursor count',
]
+prefix_list = [
+ 'data-handle',
+ 'reconciliation',
+ 'LSM',
+ 'log',
+ 'cache',
+ 'transaction',
+ 'cursor',
+ 'connection',
+ 'session',
+ 'block-manager',
+ 'async',
+ 'btree',
+ 'compression',
+]
+groups = {'cursor': ['cursor', 'session'], 'lsm': ['LSM', 'transaction'], 'system': ['connection', 'data-handle', 'session'], 'evict': ['cache', 'connection', 'block-manager'], 'memory': ['cache', 'connection', 'reconciliation']} \ No newline at end of file
diff --git a/src/third_party/wiredtiger/tools/wtstats.py b/src/third_party/wiredtiger/tools/wtstats.py
index 882a6fb5be2..cc2ebd80877 100644
--- a/src/third_party/wiredtiger/tools/wtstats.py
+++ b/src/third_party/wiredtiger/tools/wtstats.py
@@ -28,6 +28,7 @@
import fileinput, os, re, shutil, sys, textwrap
from collections import defaultdict
+from glob import glob
from time import mktime
from subprocess import call
@@ -39,7 +40,8 @@ tool_dir = os.path.split(sys.argv[0])[0]
sys.path = [ os.path.join(tool_dir, "3rdparty") ] + sys.path
try:
- from stat_data import no_scale_per_second_list, no_clear_list
+ from stat_data \
+ import groups, no_scale_per_second_list, no_clear_list, prefix_list
except ImportError:
print >>sys.stderr, "Could not import stat_data.py, it should be\
in the same directory as %s" % sys.argv[0]
@@ -118,6 +120,8 @@ import argparse
parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.')
parser.add_argument('--abstime', action='store_true',
help='use absolute time on the x axis')
+parser.add_argument('--all', '-A', action='store_true',
+ help='generate all series as separate HTML output files by category')
parser.add_argument('--clear', action='store_true',
help='WiredTiger stats gathered with clear set')
parser.add_argument('--focus', action='store_true',
@@ -127,15 +131,15 @@ parser.add_argument('--include', '-I', metavar='regexp',
help='include series with titles matching the specifed regexp')
parser.add_argument('--list', action='store_true',
help='list the series that would be displayed')
-parser.add_argument('--output', '-o', metavar='file', default='wtstats.html',
- help='HTML output file')
+parser.add_argument('--output', '-o', metavar='file', default='wtstats',
+ help='HTML output file prefix')
parser.add_argument('--right', '-R', metavar='regexp',
type=re.compile, action='append',
help='use the right axis for series with titles matching the specifed regexp')
parser.add_argument('--wtperf', '-w', action='store_true',
help='Plot wtperf statistics on the same graph')
parser.add_argument('files', metavar='file', nargs='+',
- help='input files generated by WiredTiger statistics logging')
+ help='input files or directories generated by WiredTiger statistics logging')
args = parser.parse_args()
# Don't require users to specify regexps twice for right axis
@@ -148,8 +152,17 @@ if args.include and args.right:
args.include += args.right
# Read the input file(s) into a dictionary of lists.
+def getfiles(l):
+ for f in l:
+ if os.path.isfile(f):
+ yield f
+ elif os.path.isdir(f):
+ for s in glob(os.path.join(f, 'WiredTigerStat*')):
+ print 'Processing ' + s
+ yield s
+
d = defaultdict(list)
-for f in args.files:
+for f in getfiles(args.files):
for line in open(f, 'rU'):
month, day, time, v, title = line.strip('\n').split(" ", 4)
d[title].append((month + " " + day + " " + time, v))
@@ -179,13 +192,75 @@ def common_suffix(a, b):
b = b[1:]
return b
+def output_series(results, prefix=None, grouplist=[]):
+ # open the output file based on prefix
+ if prefix == None:
+ outputname = args.output + '.html'
+ elif len(grouplist) == 0:
+ outputname = args.output +'.' + prefix + '.html'
+ else:
+ outputname = args.output +'.group.' + prefix + '.html'
+
+ if prefix != None and len(grouplist) == 0:
+ this_series = []
+ for title, yaxis, ydata in results:
+ if not prefix in title:
+ continue
+ #print 'Appending to dataset: ' + title
+ this_series.append((title, yaxis, ydata))
+ elif prefix != None and len(grouplist) > 0:
+ this_series = []
+ for title, yaxis, ydata in results:
+ for subgroup in grouplist:
+ if not subgroup in title:
+ continue
+ # print 'Appending to dataset: ' + title
+ this_series.append((title, yaxis, ydata))
+ else:
+ this_series = results
+
+ if len(this_series) == 0:
+ print 'Output: ' + outputname + ' has no data. Do not create.'
+ return
+
+ #---------------------------------------
+ if args.right:
+ charttype = multiChart
+ elif args.focus:
+ charttype = lineWithFocusChart
+ else:
+ charttype = lineChart
+
+ chart_extra = {}
+ # Add in the x axis if the user wants time.
+ if args.abstime:
+ chart_extra['x_axis_format'] = '%H:%M:%S'
+
+ # Create the chart, add the series
+ chart = charttype(name='statlog', height=450+10*len(this_series), resize=True, x_is_date=args.abstime, y_axis_format='g', assets_directory='http://source.wiredtiger.com/graphs/', **chart_extra)
+
+ for title, yaxis, ydata in this_series:
+ chart.add_serie(x=xdata, y=(ydata.get(x, 0) for x in xdata), name=title,
+ type="line", yaxis="2" if yaxis else "1")
+
+ if args.wtperf:
+ addPlotsToStatsChart(chart, os.path.dirname(args.files[0]), args.abstime)
+
+ chart.buildhtml()
+ output_file = open(outputname, 'w')
+ output_file.write(chart.htmlcontent)
+
+ #close Html file
+ output_file.close()
+
+
# Split out the data, convert timestamps
results = []
for title, values in sorted(d.iteritems()):
title, ydata = munge(title, values)
# Ignore entries if a list of regular expressions was given
if args.include and not [r for r in args.include if r.search(title)]:
- continue
+ continue
yaxis = args.right and [r for r in args.right if r.search(title)]
prefix = title if prefix is None else common_prefix(prefix, title)
suffix = title if suffix is None else common_suffix(title, suffix)
@@ -215,33 +290,11 @@ if args.list:
# Figure out the full set of x axis values
xdata = sorted(set(k for k in ydata.iterkeys() for ydata in results))
-# open the output file
-output_file = open(args.output, 'w')
-#---------------------------------------
-if args.right:
- charttype = multiChart
-elif args.focus:
- charttype = lineWithFocusChart
-else:
- charttype = lineChart
-
-chart_extra = {}
-# Add in the x axis if the user wants time.
-if args.abstime:
- chart_extra['x_axis_format'] = '%H:%M:%S'
-
-# Create the chart, add the series
-chart = charttype(name='statlog', height=450+10*len(results), resize=True, x_is_date=args.abstime, y_axis_format='g', assets_directory='http://source.wiredtiger.com/graphs/', **chart_extra)
-
-for title, yaxis, ydata in results:
- chart.add_serie(x=xdata, y=(ydata.get(x, 0) for x in xdata), name=title,
- type="line", yaxis="2" if yaxis else "1")
-
-if args.wtperf:
- addPlotsToStatsChart(chart, os.path.dirname(args.files[0]), args.abstime)
-
-chart.buildhtml()
-output_file.write(chart.htmlcontent)
+output_series(results)
-#close Html file
-output_file.close()
+# If the user wants the stats split up by prefix type do so.
+if args.all:
+ for prefix in prefix_list:
+ output_series(results, prefix)
+ for group in groups.keys():
+ output_series(results, group, groups[group])