summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlex Gorrod <alexander.gorrod@mongodb.com>2016-08-04 16:07:16 +1000
committerAlex Gorrod <alexander.gorrod@mongodb.com>2016-08-04 16:07:16 +1000
commita9e96961abc9dd20d464bdeb120d792166ee4cf9 (patch)
treeaa569dc7ba001747a4c10902dcad759ae0999ae0
parentd8fb874fc40989cb9675e56ca80b3b64e6fa2ee3 (diff)
parent034ecbf55bb22d05c137aa8ef62a070964bf2748 (diff)
downloadmongo-a9e96961abc9dd20d464bdeb120d792166ee4cf9.tar.gz
Merge branch 'develop' into mongodb-3.4
-rw-r--r--SConstruct2
-rw-r--r--bench/wtperf/Makefile.am12
-rw-r--r--bench/wtperf/idle_table_cycle.c3
-rw-r--r--bench/wtperf/runners/checkpoint_schema_race.wtperf20
-rw-r--r--bench/wtperf/runners/evict-btree-stress-multi.wtperf12
-rw-r--r--bench/wtperf/runners/fruit-lsm.wtperf22
-rw-r--r--bench/wtperf/runners/fruit-short.wtperf20
-rw-r--r--bench/wtperf/runners/log-append-large.wtperf10
-rw-r--r--bench/wtperf/runners/log-append-zero.wtperf8
-rw-r--r--bench/wtperf/runners/log-append.wtperf8
-rw-r--r--bench/wtperf/runners/log-nockpt.wtperf12
-rw-r--r--bench/wtperf/runners/log-noprealloc.wtperf11
-rw-r--r--bench/wtperf/runners/log.wtperf27
-rw-r--r--bench/wtperf/runners/multi-btree-zipfian-populate.wtperf19
-rw-r--r--bench/wtperf/runners/multi-btree-zipfian-workload.wtperf18
-rw-r--r--bench/wtperf/runners/overflow-10k-short.wtperf19
-rw-r--r--bench/wtperf/runners/overflow-10k.wtperf16
-rw-r--r--bench/wtperf/runners/overflow-130k-short.wtperf19
-rw-r--r--bench/wtperf/runners/overflow-130k.wtperf18
-rw-r--r--bench/wtperf/wtperf.c166
-rw-r--r--bench/wtperf/wtperf.h84
-rw-r--r--bench/wtperf/wtperf_opt.i1
-rw-r--r--bench/wtperf/wtperf_throttle.c13
-rw-r--r--build_posix/Make.subdirs8
-rw-r--r--build_posix/aclocal/ax_pkg_swig.m410
-rw-r--r--dist/api_data.py228
-rw-r--r--dist/flags.py5
-rwxr-xr-xdist/s_all16
-rw-r--r--dist/s_string.ok9
-rwxr-xr-xdist/s_style2
-rw-r--r--dist/stat_data.py48
-rw-r--r--examples/c/ex_all.c27
-rw-r--r--examples/c/ex_file_system.c39
-rw-r--r--examples/java/com/wiredtiger/examples/ex_all.java22
-rw-r--r--ext/compressors/zlib/zlib_compress.c2
-rw-r--r--src/async/async_api.c12
-rw-r--r--src/block/block_ckpt.c2
-rw-r--r--src/block/block_ext.c7
-rw-r--r--src/block/block_mgr.c13
-rw-r--r--src/block/block_open.c27
-rw-r--r--src/block/block_session.c2
-rw-r--r--src/block/block_write.c50
-rw-r--r--src/btree/bt_curnext.c14
-rw-r--r--src/btree/bt_curprev.c6
-rw-r--r--src/btree/bt_discard.c4
-rw-r--r--src/btree/bt_handle.c2
-rw-r--r--src/btree/bt_huffman.c3
-rw-r--r--src/btree/bt_io.c9
-rw-r--r--src/btree/bt_ovfl.c2
-rw-r--r--src/btree/bt_page.c1
-rw-r--r--src/btree/bt_read.c27
-rw-r--r--src/btree/bt_split.c141
-rw-r--r--src/btree/bt_stat.c5
-rw-r--r--src/btree/bt_sync.c58
-rw-r--r--src/btree/bt_walk.c20
-rw-r--r--src/btree/row_srch.c2
-rw-r--r--src/config/config_def.c209
-rw-r--r--src/conn/conn_api.c19
-rw-r--r--src/conn/conn_cache.c40
-rw-r--r--src/conn/conn_ckpt.c71
-rw-r--r--src/conn/conn_dhandle.c10
-rw-r--r--src/conn/conn_log.c52
-rw-r--r--src/conn/conn_stat.c159
-rw-r--r--src/cursor/cur_backup.c58
-rw-r--r--src/cursor/cur_config.c4
-rw-r--r--src/cursor/cur_ds.c5
-rw-r--r--src/cursor/cur_dump.c4
-rw-r--r--src/cursor/cur_file.c38
-rw-r--r--src/cursor/cur_index.c52
-rw-r--r--src/cursor/cur_log.c26
-rw-r--r--src/cursor/cur_metadata.c11
-rw-r--r--src/cursor/cur_stat.c28
-rw-r--r--src/cursor/cur_table.c15
-rw-r--r--src/docs/security.dox21
-rw-r--r--src/docs/spell.ok3
-rw-r--r--src/docs/statistics.dox31
-rw-r--r--src/docs/upgrading.dox44
-rw-r--r--src/docs/wtperf.dox2
-rw-r--r--src/evict/evict_lru.c634
-rw-r--r--src/evict/evict_page.c190
-rw-r--r--src/include/api.h2
-rw-r--r--src/include/block.h2
-rw-r--r--src/include/btmem.h36
-rw-r--r--src/include/btree.h15
-rw-r--r--src/include/btree.i188
-rw-r--r--src/include/cache.h35
-rw-r--r--src/include/cache.i95
-rw-r--r--src/include/connection.h2
-rw-r--r--src/include/cursor.h9
-rw-r--r--src/include/cursor.i7
-rw-r--r--src/include/extern.h23
-rw-r--r--src/include/flags.h13
-rw-r--r--src/include/hardware.h11
-rw-r--r--src/include/intpack.i8
-rw-r--r--src/include/os_fhandle.i12
-rw-r--r--src/include/os_fs.i75
-rw-r--r--src/include/os_fstream.i2
-rw-r--r--src/include/queue.h174
-rw-r--r--src/include/stat.h38
-rw-r--r--src/include/txn.h1
-rw-r--r--src/include/txn.i14
-rw-r--r--src/include/wiredtiger.in667
-rw-r--r--src/log/log.c59
-rw-r--r--src/lsm/lsm_cursor.c23
-rw-r--r--src/lsm/lsm_tree.c14
-rw-r--r--src/lsm/lsm_work_unit.c2
-rw-r--r--src/meta/meta_table.c3
-rw-r--r--src/meta/meta_track.c13
-rw-r--r--src/meta/meta_turtle.c11
-rw-r--r--src/os_common/filename.c54
-rw-r--r--src/os_common/os_fhandle.c32
-rw-r--r--src/os_common/os_fs_inmemory.c12
-rw-r--r--src/os_common/os_fstream.c2
-rw-r--r--src/os_posix/os_fs.c119
-rw-r--r--src/os_win/os_fs.c28
-rw-r--r--src/os_win/os_path.c26
-rw-r--r--src/reconcile/rec_write.c319
-rw-r--r--src/schema/schema_rename.c2
-rw-r--r--src/session/session_api.c168
-rw-r--r--src/session/session_compact.c14
-rw-r--r--src/support/err.c6
-rw-r--r--src/support/stat.c121
-rw-r--r--src/txn/txn.c59
-rw-r--r--src/txn/txn_ckpt.c474
-rw-r--r--src/txn/txn_log.c2
-rw-r--r--src/utilities/util_dump.c48
-rw-r--r--test/cursor_order/cursor_order_ops.c3
-rw-r--r--test/format/ops.c6
-rwxr-xr-xtest/format/smoke.sh2
-rw-r--r--test/manydbs/Makefile.am3
-rwxr-xr-xtest/manydbs/smoke.sh18
-rw-r--r--test/mciproject.yml32
-rw-r--r--test/recovery/Makefile.am3
-rw-r--r--test/recovery/random-abort.c18
-rwxr-xr-xtest/recovery/smoke.sh8
-rw-r--r--test/suite/helper.py43
-rw-r--r--test/suite/run.py41
-rw-r--r--test/suite/test_async01.py4
-rw-r--r--test/suite/test_async02.py4
-rw-r--r--test/suite/test_backup02.py4
-rw-r--r--test/suite/test_backup03.py36
-rw-r--r--test/suite/test_backup04.py4
-rw-r--r--test/suite/test_backup05.py1
-rw-r--r--test/suite/test_base02.py4
-rw-r--r--test/suite/test_base05.py4
-rw-r--r--test/suite/test_bug003.py4
-rw-r--r--test/suite/test_bug006.py4
-rw-r--r--test/suite/test_bug008.py4
-rw-r--r--test/suite/test_bug009.py1
-rw-r--r--test/suite/test_bug011.py2
-rw-r--r--test/suite/test_bug016.py109
-rw-r--r--test/suite/test_bulk01.py4
-rw-r--r--test/suite/test_bulk02.py7
-rw-r--r--test/suite/test_checkpoint01.py14
-rw-r--r--test/suite/test_checkpoint02.py4
-rw-r--r--test/suite/test_colgap.py6
-rw-r--r--test/suite/test_collator.py1
-rw-r--r--test/suite/test_compact01.py4
-rw-r--r--test/suite/test_compact02.py7
-rw-r--r--test/suite/test_compress01.py4
-rw-r--r--test/suite/test_config03.py7
-rw-r--r--test/suite/test_cursor01.py4
-rw-r--r--test/suite/test_cursor02.py4
-rw-r--r--test/suite/test_cursor03.py4
-rw-r--r--test/suite/test_cursor04.py4
-rw-r--r--test/suite/test_cursor06.py4
-rw-r--r--test/suite/test_cursor07.py4
-rw-r--r--test/suite/test_cursor08.py12
-rw-r--r--test/suite/test_cursor09.py4
-rw-r--r--test/suite/test_cursor_compare.py4
-rw-r--r--test/suite/test_cursor_pin.py4
-rw-r--r--test/suite/test_cursor_random.py8
-rw-r--r--test/suite/test_cursor_random02.py4
-rw-r--r--test/suite/test_drop.py4
-rw-r--r--test/suite/test_dump.py11
-rw-r--r--test/suite/test_dupc.py4
-rw-r--r--test/suite/test_durability01.py1
-rw-r--r--test/suite/test_empty.py4
-rw-r--r--test/suite/test_encrypt01.py5
-rw-r--r--test/suite/test_encrypt02.py4
-rw-r--r--test/suite/test_encrypt03.py4
-rw-r--r--test/suite/test_encrypt04.py5
-rw-r--r--test/suite/test_encrypt05.py5
-rw-r--r--test/suite/test_encrypt06.py4
-rw-r--r--test/suite/test_encrypt07.py1
-rw-r--r--test/suite/test_excl.py4
-rw-r--r--test/suite/test_huffman01.py4
-rw-r--r--test/suite/test_huffman02.py4
-rw-r--r--test/suite/test_index02.py68
-rw-r--r--test/suite/test_inmem01.py88
-rw-r--r--test/suite/test_intpack.py35
-rw-r--r--test/suite/test_join01.py10
-rw-r--r--test/suite/test_join02.py4
-rw-r--r--test/suite/test_join03.py1
-rw-r--r--test/suite/test_join04.py1
-rw-r--r--test/suite/test_join05.py1
-rw-r--r--test/suite/test_join06.py4
-rw-r--r--test/suite/test_join07.py4
-rw-r--r--test/suite/test_join08.py1
-rw-r--r--test/suite/test_jsondump01.py50
-rw-r--r--test/suite/test_lsm01.py8
-rw-r--r--test/suite/test_metadata_cursor01.py4
-rw-r--r--test/suite/test_nsnap01.py1
-rw-r--r--test/suite/test_nsnap02.py1
-rw-r--r--test/suite/test_nsnap03.py1
-rw-r--r--test/suite/test_nsnap04.py1
-rw-r--r--test/suite/test_overwrite.py4
-rw-r--r--test/suite/test_perf001.py4
-rw-r--r--test/suite/test_readonly01.py5
-rw-r--r--test/suite/test_rebalance.py4
-rw-r--r--test/suite/test_reconfig01.py19
-rw-r--r--test/suite/test_reconfig02.py33
-rw-r--r--test/suite/test_rename.py4
-rw-r--r--test/suite/test_schema02.py4
-rw-r--r--test/suite/test_schema03.py15
-rw-r--r--test/suite/test_schema04.py4
-rw-r--r--test/suite/test_schema05.py4
-rw-r--r--test/suite/test_schema06.py5
-rw-r--r--test/suite/test_split.py1
-rw-r--r--test/suite/test_stat01.py4
-rw-r--r--test/suite/test_stat02.py35
-rw-r--r--test/suite/test_stat03.py4
-rw-r--r--test/suite/test_stat04.py10
-rw-r--r--test/suite/test_stat05.py13
-rw-r--r--test/suite/test_stat_log01.py16
-rw-r--r--test/suite/test_sweep01.py4
-rw-r--r--test/suite/test_sweep03.py4
-rw-r--r--test/suite/test_truncate01.py13
-rw-r--r--test/suite/test_truncate02.py5
-rw-r--r--test/suite/test_txn01.py4
-rw-r--r--test/suite/test_txn02.py12
-rw-r--r--test/suite/test_txn03.py4
-rw-r--r--test/suite/test_txn04.py4
-rw-r--r--test/suite/test_txn05.py5
-rw-r--r--test/suite/test_txn06.py1
-rw-r--r--test/suite/test_txn07.py5
-rw-r--r--test/suite/test_txn08.py1
-rw-r--r--test/suite/test_txn09.py9
-rw-r--r--test/suite/test_txn10.py1
-rw-r--r--test/suite/test_txn12.py1
-rw-r--r--test/suite/test_txn13.py4
-rw-r--r--test/suite/test_txn14.py4
-rw-r--r--test/suite/test_txn15.py4
-rw-r--r--test/suite/test_upgrade.py4
-rw-r--r--test/suite/test_util02.py4
-rw-r--r--test/suite/test_util03.py4
-rw-r--r--test/suite/test_util13.py4
-rw-r--r--test/suite/wtscenario.py36
-rw-r--r--test/suite/wttest.py6
-rwxr-xr-xtest/thread/smoke.sh6
-rw-r--r--test/utility/misc.c15
-rw-r--r--test/utility/test_util.h1
-rw-r--r--tools/wtstats/stat_data.py47
-rwxr-xr-xtools/wtstats/wtstats.py2
254 files changed, 4447 insertions, 2911 deletions
diff --git a/SConstruct b/SConstruct
index a5dd8761d6c..b0ce771e9bd 100644
--- a/SConstruct
+++ b/SConstruct
@@ -484,7 +484,7 @@ t = env.Program("wtperf", [
"bench/wtperf/wtperf_throttle.c",
"bench/wtperf/wtperf_truncate.c",
],
- LIBS=[wtlib, shim] + wtlibs)
+ LIBS=[wtlib, shim, testutil] + wtlibs)
Default(t)
#Build the Examples
diff --git a/bench/wtperf/Makefile.am b/bench/wtperf/Makefile.am
index cc1f84b5406..57792e3887f 100644
--- a/bench/wtperf/Makefile.am
+++ b/bench/wtperf/Makefile.am
@@ -1,13 +1,17 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
-
-LDADD = $(top_builddir)/libwiredtiger.la -lm
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
noinst_PROGRAMS = wtperf
-wtperf_LDFLAGS = -static
wtperf_SOURCES =\
config.c idle_table_cycle.c misc.c track.c wtperf.c \
wtperf.h wtperf_opt.i wtperf_throttle.c wtperf_truncate.c
+wtperf_LDADD = $(top_builddir)/test/utility/libtest_util.la
+wtperf_LDADD +=$(top_builddir)/libwiredtiger.la
+wtperf_LDADD +=-lm
+wtperf_LDFLAGS = -static
+
TESTS = smoke.sh
AM_TESTS_ENVIRONMENT = rm -rf WT_TEST ; mkdir WT_TEST ;
# automake 1.11 compatibility
diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c
index b699b5b9dd1..3c079bb560f 100644
--- a/bench/wtperf/idle_table_cycle.c
+++ b/bench/wtperf/idle_table_cycle.c
@@ -129,7 +129,8 @@ cycle_idle_tables(void *arg)
* Drop the table. Keep retrying on EBUSY failure - it is an
* expected return when checkpoints are happening.
*/
- while ((ret = session->drop(session, uri, "force")) == EBUSY)
+ while ((ret = session->drop(
+ session, uri, "force,checkpoint_wait=false")) == EBUSY)
__wt_sleep(1, 0);
if (ret != 0 && ret != EBUSY) {
diff --git a/bench/wtperf/runners/checkpoint_schema_race.wtperf b/bench/wtperf/runners/checkpoint_schema_race.wtperf
new file mode 100644
index 00000000000..ade8e88ee9b
--- /dev/null
+++ b/bench/wtperf/runners/checkpoint_schema_race.wtperf
@@ -0,0 +1,20 @@
+# Check create and drop behavior concurrent with checkpoints (WT-2798).
+# Setup a multiple tables and a cache size large enough that checkpoints can
+# take a long time.
+conn_config="cache_size=8GB,log=(enabled=false),checkpoint=(wait=30)"
+table_config="leaf_page_max=4k,internal_page_max=16k,type=file"
+icount=10000000
+table_count=100
+table_count_idle=100
+# Turn on create/drop of idle tables, but don't worry if individual operations
+# take a long time.
+idle_table_cycle=120
+populate_threads=5
+checkpoint_threads=0
+report_interval=5
+# 100 million
+random_range=10000000
+run_time=300
+# Setup a workload that dirties a lot of the cache
+threads=((count=2,reads=1),(count=2,inserts=1),(count=2,updates=1))
+value_sz=500
diff --git a/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/bench/wtperf/runners/evict-btree-stress-multi.wtperf
new file mode 100644
index 00000000000..9699b9ae3bb
--- /dev/null
+++ b/bench/wtperf/runners/evict-btree-stress-multi.wtperf
@@ -0,0 +1,12 @@
+conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000"
+table_config="type=file"
+table_count=100
+icount=100000000
+report_interval=5
+run_time=600
+populate_threads=1
+threads=((count=100,updates=1,reads=4,ops_per_txn=30))
+# Warn if a latency over a quarter second is seen
+max_latency=250
+sample_interval=5
+sample_rate=1
diff --git a/bench/wtperf/runners/fruit-lsm.wtperf b/bench/wtperf/runners/fruit-lsm.wtperf
deleted file mode 100644
index e5817554201..00000000000
--- a/bench/wtperf/runners/fruit-lsm.wtperf
+++ /dev/null
@@ -1,22 +0,0 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600),log=(enabled=true),transaction_sync=(enabled=true,method=none),checkpoint=(wait=180),lsm_manager=(worker_thread_max=12)"
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
-compact=true
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,leaf_item_max=4K,os_cache_dirty_max=16MB"
-icount=25000000
-key_sz=40
-value_sz=800
-#max_latency=2000
-pareto=20
-populate_threads=20
-report_interval=10
-random_value=true
-run_time=18000
-sample_interval=10
-table_count=8
-threads=((count=20,read=6,update=1))
diff --git a/bench/wtperf/runners/fruit-short.wtperf b/bench/wtperf/runners/fruit-short.wtperf
deleted file mode 100644
index 10cb423a92d..00000000000
--- a/bench/wtperf/runners/fruit-short.wtperf
+++ /dev/null
@@ -1,20 +0,0 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
-compact=true
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K"
-icount=25000000
-key_sz=40
-value_sz=800
-max_latency=2000
-pareto=20
-populate_threads=20
-report_interval=10
-random_value=true
-run_time=1800
-sample_interval=10
-threads=((count=20,read=6,update=1))
diff --git a/bench/wtperf/runners/log-append-large.wtperf b/bench/wtperf/runners/log-append-large.wtperf
deleted file mode 100644
index c1364c17c28..00000000000
--- a/bench/wtperf/runners/log-append-large.wtperf
+++ /dev/null
@@ -1,10 +0,0 @@
-# wtperf options file: Test a log file with a multi-threaded
-# append workload. We want to create a very large number of log file
-# switches with fewer records per log file than we have active threads.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)"
-table_config="type=file"
-icount=1000
-report_interval=5
-run_time=0
-value_sz=5000000
-populate_threads=8
diff --git a/bench/wtperf/runners/log-append-zero.wtperf b/bench/wtperf/runners/log-append-zero.wtperf
deleted file mode 100644
index 973d2cddd0d..00000000000
--- a/bench/wtperf/runners/log-append-zero.wtperf
+++ /dev/null
@@ -1,8 +0,0 @@
-# wtperf options file: Test a log file with a multi-threaded
-# append workload.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB,zero_fill=true),checkpoint=(log_size=1G)"
-table_config="type=file"
-icount=50000000
-report_interval=5
-run_time=0
-populate_threads=8
diff --git a/bench/wtperf/runners/log-append.wtperf b/bench/wtperf/runners/log-append.wtperf
deleted file mode 100644
index 9d0a78e3c61..00000000000
--- a/bench/wtperf/runners/log-append.wtperf
+++ /dev/null
@@ -1,8 +0,0 @@
-# wtperf options file: Test a log file with a multi-threaded
-# append workload.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)"
-table_config="type=file"
-icount=50000000
-report_interval=5
-run_time=0
-populate_threads=8
diff --git a/bench/wtperf/runners/log-nockpt.wtperf b/bench/wtperf/runners/log-nockpt.wtperf
deleted file mode 100644
index a078cead740..00000000000
--- a/bench/wtperf/runners/log-nockpt.wtperf
+++ /dev/null
@@ -1,12 +0,0 @@
-# wtperf options file: Test performance with a log file enabled.
-# Set the log file reasonably small to catch log-swtich bottle
-# necks.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB)"
-table_config="type=file"
-icount=50000
-report_interval=5
-run_time=40
-populate_threads=1
-random_range=50000000
-threads=((count=8,inserts=1))
-
diff --git a/bench/wtperf/runners/log-noprealloc.wtperf b/bench/wtperf/runners/log-noprealloc.wtperf
deleted file mode 100644
index 66032f599aa..00000000000
--- a/bench/wtperf/runners/log-noprealloc.wtperf
+++ /dev/null
@@ -1,11 +0,0 @@
-# wtperf options file: Test performance with a log file enabled.
-# Set the log file reasonably small to catch log-swtich bottle
-# necks.
-conn_config="cache_size=1G,log=(enabled=true,file_max=200K,prealloc=false),checkpoint=(log_size=500MB)"
-table_config="type=file"
-icount=50000
-report_interval=5
-run_time=120
-populate_threads=1
-random_range=50000000
-threads=((count=8,inserts=1))
diff --git a/bench/wtperf/runners/log.wtperf b/bench/wtperf/runners/log.wtperf
index 32a9cc3b0a6..6cf50dfb5a5 100644
--- a/bench/wtperf/runners/log.wtperf
+++ b/bench/wtperf/runners/log.wtperf
@@ -1,10 +1,27 @@
+#
# wtperf options file: Test performance with a log file enabled.
# Set the log file small to catch log-swtich bottlenecks.
-conn_config="cache_size=1G,log=(enabled=true,file_max=200K),checkpoint=(log_size=500MB)"
+#
+# Perform updates instead of inserts to stress logging not eviction,
+# page splits or reconciliation. Have it fit in cache.
+#
+# We expect this test can and will be run in other forms from the command
+# line to change log file size, pre-allocation, zero filling, logging off
+# and checkpoint off.
+#
+# Jenkins runs for perf testing:
+# - Config as-is
+# - Config + "-C "log=(enabled,file_max=1M)": small log files and switching
+# - Config + "-C "log=(enabled,zero_fill=true,file_max=1M)": zero-filling
+# - Config + "-C "checkpoint=(wait=0)": no checkpoints
+# - Config + "-C "log=(enabled,prealloc=false,file_max=1M)": no pre-allocation
+#
+conn_config="cache_size=5G,log=(enabled=true),checkpoint=(log_size=500M),eviction=(threads_max=4)"
table_config="type=file"
-icount=50000
+icount=1000000
report_interval=5
-run_time=120
+run_time=180
populate_threads=1
-random_range=50000000
-threads=((count=8,inserts=1))
+threads=((count=8,updates=1))
+# Warm up the cache for a minute.
+warmup=60
diff --git a/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf b/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf
new file mode 100644
index 00000000000..ddd9c055eac
--- /dev/null
+++ b/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf
@@ -0,0 +1,19 @@
+# Create a set of tables with uneven distribution of data
+conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000"
+table_config="type=file"
+table_count=100
+icount=0
+random_range=1000000000
+pareto=10
+range_partition=true
+report_interval=5
+
+run_ops=10000000
+populate_threads=0
+icount=0
+threads=((count=20,inserts=1))
+
+# Warn if a latency over 1 second is seen
+max_latency=1000
+sample_interval=5
+sample_rate=1
diff --git a/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf b/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf
new file mode 100644
index 00000000000..380350c88c8
--- /dev/null
+++ b/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf
@@ -0,0 +1,18 @@
+# Read from a set of tables with uneven distribution of data
+conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000"
+table_config="type=file"
+table_count=100
+icount=0
+random_range=1000000000
+pareto=10
+range_partition=true
+report_interval=5
+create=false
+
+run_time=600
+threads=((count=20,reads=1))
+
+# Warn if a latency over 1 second is seen
+max_latency=1000
+sample_interval=5
+sample_rate=1
diff --git a/bench/wtperf/runners/overflow-10k-short.wtperf b/bench/wtperf/runners/overflow-10k-short.wtperf
deleted file mode 100644
index 47228079db8..00000000000
--- a/bench/wtperf/runners/overflow-10k-short.wtperf
+++ /dev/null
@@ -1,19 +0,0 @@
-# wtperf options file: simulate riak and a short form of its voxer config.
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
-compact=true
-compression="snappy"
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB"
-icount=15000
-key_sz=40
-value_sz=10000
-max_latency=2000
-populate_threads=1
-report_interval=5
-random_value=true
-run_time=300
-threads=((count=10,read=1),(count=10,update=1))
diff --git a/bench/wtperf/runners/overflow-10k.wtperf b/bench/wtperf/runners/overflow-10k.wtperf
index 9b4ed2acaee..5d7eeea9cf2 100644
--- a/bench/wtperf/runners/overflow-10k.wtperf
+++ b/bench/wtperf/runners/overflow-10k.wtperf
@@ -1,9 +1,7 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
+# Run with overflow items and LSM.
+#
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,lsm_manager=(worker_thread_max=6),statistics=(fast),statistics_log=(wait=10)"
compact=true
compression="snappy"
sess_config="isolation=snapshot"
@@ -13,8 +11,8 @@ key_sz=40
value_sz=10000
max_latency=2000
populate_threads=1
-report_interval=10
+report_interval=5
random_value=true
-run_time=18000
-sample_interval=10
-threads=((count=20,read=1,update=1))
+run_time=300
+threads=((count=10,read=1),(count=10,update=1))
+warmup=30
diff --git a/bench/wtperf/runners/overflow-130k-short.wtperf b/bench/wtperf/runners/overflow-130k-short.wtperf
deleted file mode 100644
index 83f67062bf8..00000000000
--- a/bench/wtperf/runners/overflow-130k-short.wtperf
+++ /dev/null
@@ -1,19 +0,0 @@
-# wtperf options file: simulate riak and a short form of its voxer config.
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
-compact=true
-compression="snappy"
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB"
-icount=15000
-key_sz=40
-value_sz=130000
-max_latency=2000
-populate_threads=1
-report_interval=5
-random_value=true
-run_time=300
-threads=((count=10,read=1),(count=10,update=1))
diff --git a/bench/wtperf/runners/overflow-130k.wtperf b/bench/wtperf/runners/overflow-130k.wtperf
index a3439f0c575..2be01afd08a 100644
--- a/bench/wtperf/runners/overflow-130k.wtperf
+++ b/bench/wtperf/runners/overflow-130k.wtperf
@@ -1,20 +1,18 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
+# Run with very large overflow items and btree.
+#
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,statistics=(fast),statistics_log=(wait=10)"
compact=true
compression="snappy"
sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB"
+table_config="internal_page_max=128K,type=file,leaf_page_max=16K,os_cache_dirty_max=16MB,leaf_value_max=32K"
icount=15000
key_sz=40
value_sz=130000
max_latency=2000
populate_threads=1
-report_interval=10
+report_interval=5
random_value=true
-run_time=18000
-sample_interval=10
-threads=((count=20,read=1,update=1))
+run_time=300
+threads=((count=10,read=1),(count=10,update=1))
+warmup=30
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 9d35f6fa640..58271106d61 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -36,7 +36,6 @@ static const CONFIG default_cfg = {
NULL, /* reopen config */
NULL, /* base_uri */
NULL, /* uris */
- NULL, /* helium_mount */
NULL, /* conn */
NULL, /* logf */
NULL, /* async */
@@ -73,14 +72,14 @@ static const char * const debug_cconfig = "";
static const char * const debug_tconfig = "";
static void *checkpoint_worker(void *);
-static int create_tables(CONFIG *);
-static int drop_all_tables(CONFIG *);
+static int drop_all_tables(CONFIG *);
static int execute_populate(CONFIG *);
static int execute_workload(CONFIG *);
static int find_table_count(CONFIG *);
static void *monitor(void *);
static void *populate_thread(void *);
static void randomize_value(CONFIG_THREAD *, char *);
+static void recreate_dir(const char *);
static int start_all_runs(CONFIG *);
static int start_run(CONFIG *);
static int start_threads(CONFIG *,
@@ -93,10 +92,6 @@ static void *worker(void *);
static uint64_t wtperf_rand(CONFIG_THREAD *);
static uint64_t wtperf_value_range(CONFIG *);
-#define HELIUM_NAME "dev1"
-#define HELIUM_PATH \
- "../../ext/test/helium/.libs/libwiredtiger_helium.so"
-#define HELIUM_CONFIG ",type=helium"
#define INDEX_COL_NAMES ",columns=(key,val)"
/* Retrieve an ID for the next insert operation. */
@@ -155,6 +150,23 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf)
}
/*
+ * Partition data by key ranges.
+ */
+static uint32_t
+map_key_to_table(CONFIG *cfg, uint64_t k)
+{
+ if (cfg->range_partition) {
+ /* Take care to return a result in [0..table_count-1]. */
+ if (k > cfg->icount + cfg->random_range)
+ return (0);
+ return ((uint32_t)((k - 1) /
+ ((cfg->icount + cfg->random_range + cfg->table_count - 1) /
+ cfg->table_count)));
+ } else
+ return ((uint32_t)(k % cfg->table_count));
+}
+
+/*
* Figure out and extend the size of the value string, used for growing
* updates. We know that the value to be updated is in the threads value
* scratch buffer.
@@ -393,7 +405,7 @@ worker_async(void *arg)
* Then retry to get an async op.
*/
while ((ret = conn->async_new_op(
- conn, cfg->uris[next_val % cfg->table_count],
+ conn, cfg->uris[map_key_to_table(cfg, next_val)],
NULL, &cb, &asyncop)) == EBUSY)
(void)usleep(10000);
if (ret != 0)
@@ -466,7 +478,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
range_key_buf = &buf[0];
/* Save where the first key is for comparisons. */
- cursor->get_key(cursor, &range_key_buf);
+ testutil_check(cursor->get_key(cursor, &range_key_buf));
extract_key(range_key_buf, &next_val);
for (range = 0; range < cfg->read_range; ++range) {
@@ -477,7 +489,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
break;
/* Retrieve and decode the key */
- cursor->get_key(cursor, &range_key_buf);
+ testutil_check(cursor->get_key(cursor, &range_key_buf));
extract_key(range_key_buf, &next_val);
if (next_val < prev_val) {
lprintf(cfg, EINVAL, 0,
@@ -547,9 +559,8 @@ worker(void *arg)
}
}
/* Setup the timer for throttling. */
- if (thread->workload->throttle != 0 &&
- (ret = setup_throttle(thread)) != 0)
- goto err;
+ if (thread->workload->throttle != 0)
+ setup_throttle(thread);
/* Setup for truncate */
if (thread->workload->truncate != 0)
@@ -611,7 +622,7 @@ worker(void *arg)
/*
* Spread the data out around the multiple databases.
*/
- cursor = cursors[next_val % cfg->table_count];
+ cursor = cursors[map_key_to_table(cfg, next_val)];
/*
* Skip the first time we do an operation, when trk->ops
@@ -1010,7 +1021,7 @@ populate_thread(void *arg)
/*
* Figure out which table this op belongs to.
*/
- cursor = cursors[op % cfg->table_count];
+ cursor = cursors[map_key_to_table(cfg, op)];
generate_key(cfg, key_buf, op);
measure_latency =
cfg->sample_interval != 0 &&
@@ -1148,7 +1159,7 @@ populate_async(void *arg)
* Allocate an async op for whichever table.
*/
while ((ret = conn->async_new_op(
- conn, cfg->uris[op % cfg->table_count],
+ conn, cfg->uris[map_key_to_table(cfg, op)],
NULL, &cb, &asyncop)) == EBUSY)
(void)usleep(10000);
if (ret != 0)
@@ -1858,7 +1869,7 @@ create_uris(CONFIG *cfg)
base_uri_len = strlen(cfg->base_uri);
cfg->uris = dcalloc(cfg->table_count, sizeof(char *));
for (i = 0; i < cfg->table_count; i++) {
- uri = cfg->uris[i] = dcalloc(base_uri_len + 5, 1);
+ uri = cfg->uris[i] = dcalloc(base_uri_len + 6, 1);
/*
* If there is only one table, just use base name.
*/
@@ -1877,9 +1888,6 @@ create_tables(CONFIG *cfg)
int ret;
char buf[512];
- if (cfg->create == 0)
- return (0);
-
if ((ret = cfg->conn->open_session(
cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
lprintf(cfg, ret, 0,
@@ -1971,13 +1979,10 @@ start_all_runs(CONFIG *cfg)
if (strcmp(cfg->monitor_dir, cfg->home) == 0)
next_cfg->monitor_dir = new_home;
- /* Create clean home directories. */
- snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s",
- next_cfg->home, next_cfg->home);
- if ((ret = system(cmd_buf)) != 0) {
- fprintf(stderr, "%s: failed\n", cmd_buf);
- goto err;
- }
+ /* If creating the sub-database, recreate it's home */
+ if (cfg->create != 0)
+ recreate_dir(next_cfg->home);
+
if ((ret = pthread_create(
&threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) {
lprintf(cfg, ret, 0, "Error creating thread");
@@ -2024,8 +2029,8 @@ start_run(CONFIG *cfg)
{
pthread_t monitor_thread;
uint64_t total_ops;
+ uint32_t run_time;
int monitor_created, ret, t_ret;
- char helium_buf[256];
monitor_created = ret = 0;
/* [-Wconditional-uninitialized] */
@@ -2040,21 +2045,10 @@ start_run(CONFIG *cfg)
goto err;
}
- /* Configure optional Helium volume. */
- if (cfg->helium_mount != NULL) {
- snprintf(helium_buf, sizeof(helium_buf),
- "entry=wiredtiger_extension_init,config=["
- "%s=[helium_devices=\"he://./%s\","
- "helium_o_volume_truncate=1]]",
- HELIUM_NAME, cfg->helium_mount);
- if ((ret = cfg->conn->load_extension(
- cfg->conn, HELIUM_PATH, helium_buf)) != 0)
- lprintf(cfg,
- ret, 0, "Error loading Helium: %s", helium_buf);
- }
-
create_uris(cfg);
- if ((ret = create_tables(cfg)) != 0)
+
+ /* If creating, create the tables. */
+ if (cfg->create != 0 && (ret = create_tables(cfg)) != 0)
goto err;
/* Start the monitor thread. */
@@ -2083,7 +2077,8 @@ start_run(CONFIG *cfg)
goto err;
/* Didn't create, set insert count. */
- if (cfg->create == 0 && find_table_count(cfg) != 0)
+ if (cfg->create == 0 && cfg->random_range == 0 &&
+ find_table_count(cfg) != 0)
goto err;
/* Start the checkpoint thread. */
if (cfg->checkpoint_threads != 0) {
@@ -2108,26 +2103,27 @@ start_run(CONFIG *cfg)
cfg->ckpt_ops = sum_ckpt_ops(cfg);
total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops;
+ run_time = cfg->run_time == 0 ? 1 : cfg->run_time;
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " read operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
cfg->read_ops, (cfg->read_ops * 100) / total_ops,
- cfg->read_ops / cfg->run_time);
+ cfg->read_ops / run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " insert operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
cfg->insert_ops, (cfg->insert_ops * 100) / total_ops,
- cfg->insert_ops / cfg->run_time);
+ cfg->insert_ops / run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " truncate operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
cfg->truncate_ops, (cfg->truncate_ops * 100) / total_ops,
- cfg->truncate_ops / cfg->run_time);
+ cfg->truncate_ops / run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " update operations (%" PRIu64
"%%) %" PRIu64 " ops/sec",
cfg->update_ops, (cfg->update_ops * 100) / total_ops,
- cfg->update_ops / cfg->run_time);
+ cfg->update_ops / run_time);
lprintf(cfg, 0, 1,
"Executed %" PRIu64 " checkpoint operations",
cfg->ckpt_ops);
@@ -2182,18 +2178,21 @@ err: if (ret == 0)
extern int __wt_optind, __wt_optreset;
extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
int
main(int argc, char *argv[])
{
CONFIG *cfg, _cfg;
size_t req_len, sreq_len;
- int ch, monitor_set, ret;
- const char *opts = "C:H:h:m:O:o:T:";
+ bool monitor_set;
+ int ch, ret;
+ const char *opts = "C:h:m:O:o:T:";
const char *config_opts;
char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
- monitor_set = ret = 0;
+ monitor_set = false;
+ ret = 0;
config_opts = NULL;
cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
@@ -2219,8 +2218,12 @@ main(int argc, char *argv[])
strcat(user_cconfig, __wt_optarg);
}
break;
- case 'H':
- cfg->helium_mount = __wt_optarg;
+ case 'h':
+ cfg->home = __wt_optarg;
+ break;
+ case 'm':
+ cfg->monitor_dir = __wt_optarg;
+ monitor_set = true;
break;
case 'O':
config_opts = __wt_optarg;
@@ -2236,15 +2239,7 @@ main(int argc, char *argv[])
strcat(user_tconfig, __wt_optarg);
}
break;
- case 'h':
- cfg->home = __wt_optarg;
- break;
- case 'm':
- cfg->monitor_dir = __wt_optarg;
- monitor_set = 1;
- break;
case '?':
- fprintf(stderr, "Invalid option\n");
usage();
goto einval;
}
@@ -2300,7 +2295,7 @@ main(int argc, char *argv[])
* to 4096 if needed.
*/
req_len = strlen(",async=(enabled=true,threads=)") + 4;
- cfg->async_config = dcalloc(req_len, 1);
+ cfg->async_config = dmalloc(req_len);
snprintf(cfg->async_config, req_len,
",async=(enabled=true,threads=%" PRIu32 ")",
cfg->async_threads);
@@ -2321,13 +2316,9 @@ main(int argc, char *argv[])
}
/* Build the URI from the table name. */
- req_len = strlen("table:") +
- strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
- cfg->base_uri = dcalloc(req_len, 1);
- snprintf(cfg->base_uri, req_len, "table:%s%s%s",
- cfg->helium_mount == NULL ? "" : HELIUM_NAME,
- cfg->helium_mount == NULL ? "" : "/",
- cfg->table_name);
+ req_len = strlen("table:") + strlen(cfg->table_name) + 2;
+ cfg->base_uri = dmalloc(req_len);
+ snprintf(cfg->base_uri, req_len, "table:%s", cfg->table_name);
/* Make stdout line buffered, so verbose output appears quickly. */
__wt_stream_set_line_buffer(stdout);
@@ -2346,13 +2337,13 @@ main(int argc, char *argv[])
if (cfg->session_count_idle > 0) {
sreq_len = strlen(",session_max=") + 6;
req_len += sreq_len;
- sess_cfg = dcalloc(sreq_len, 1);
+ sess_cfg = dmalloc(sreq_len);
snprintf(sess_cfg, sreq_len,
",session_max=%" PRIu32,
cfg->session_count_idle + cfg->workers_cnt +
cfg->populate_threads + 10);
}
- cc_buf = dcalloc(req_len, 1);
+ cc_buf = dmalloc(req_len);
/*
* This is getting hard to parse.
*/
@@ -2368,36 +2359,34 @@ main(int argc, char *argv[])
if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
goto err;
}
- if (cfg->verbose > 1 || cfg->index || cfg->helium_mount != NULL ||
+ if (cfg->verbose > 1 || cfg->index ||
user_tconfig != NULL || cfg->compress_table != NULL) {
- req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) +
- strlen(debug_tconfig) + 3;
+ req_len = strlen(cfg->table_config) + strlen(debug_tconfig) + 3;
if (user_tconfig != NULL)
req_len += strlen(user_tconfig);
if (cfg->compress_table != NULL)
req_len += strlen(cfg->compress_table);
if (cfg->index)
req_len += strlen(INDEX_COL_NAMES);
- tc_buf = dcalloc(req_len, 1);
+ tc_buf = dmalloc(req_len);
/*
* This is getting hard to parse.
*/
- snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s%s",
+ snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s",
cfg->table_config,
cfg->index ? INDEX_COL_NAMES : "",
cfg->compress_table ? cfg->compress_table : "",
cfg->verbose > 1 ? ",": "",
cfg->verbose > 1 ? debug_tconfig : "",
user_tconfig ? ",": "",
- user_tconfig ? user_tconfig : "",
- cfg->helium_mount == NULL ? "" : HELIUM_CONFIG);
+ user_tconfig ? user_tconfig : "");
if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
goto err;
}
if (cfg->log_partial && cfg->table_count > 1) {
req_len = strlen(cfg->table_config) +
strlen(LOG_PARTIAL_CONFIG) + 1;
- cfg->partial_config = dcalloc(req_len, 1);
+ cfg->partial_config = dmalloc(req_len);
snprintf(cfg->partial_config, req_len, "%s%s",
cfg->table_config, LOG_PARTIAL_CONFIG);
}
@@ -2410,7 +2399,7 @@ main(int argc, char *argv[])
strlen(READONLY_CONFIG) + 1;
else
req_len = strlen(cfg->conn_config) + 1;
- cfg->reopen_config = dcalloc(req_len, 1);
+ cfg->reopen_config = dmalloc(req_len);
if (cfg->readonly)
snprintf(cfg->reopen_config, req_len, "%s%s",
cfg->conn_config, READONLY_CONFIG);
@@ -2422,6 +2411,10 @@ main(int argc, char *argv[])
if ((ret = config_sanity(cfg)) != 0)
goto err;
+ /* If creating, remove and re-create the home directory. */
+ if (cfg->create != 0)
+ recreate_dir(cfg->home);
+
/* Write a copy of the config. */
config_to_file(cfg);
@@ -2536,6 +2529,19 @@ stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads)
return (0);
}
+static void
+recreate_dir(const char *name)
+{
+ char *buf;
+ size_t len;
+
+ len = strlen(name) * 2 + 100;
+ buf = dmalloc(len);
+ (void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name);
+ testutil_checkfmt(system(buf), "system: %s", buf);
+ free(buf);
+}
+
static int
drop_all_tables(CONFIG *cfg)
{
@@ -2615,7 +2621,7 @@ wtperf_rand(CONFIG_THREAD *thread)
* first item in the table being "hot".
*/
if (rval > wtperf_value_range(cfg))
- rval = wtperf_value_range(cfg);
+ rval = 0;
}
/*
* Wrap the key to within the expected range and avoid zero: we never
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index d874fa4eefe..27c3832d316 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -29,14 +29,11 @@
#ifndef HAVE_WTPERF_H
#define HAVE_WTPERF_H
-#include <wt_internal.h>
+#include "test_util.h"
+
#include <assert.h>
#include <math.h>
-#ifdef _WIN32
-#include "windows_shim.h"
-#endif
-
#include "config_opt.h"
typedef struct __config CONFIG;
@@ -83,7 +80,6 @@ typedef struct {
typedef struct {
uint64_t stone_gap;
uint64_t needed_stones;
- uint64_t final_stone_gap;
uint64_t expected_total;
uint64_t total_inserts;
uint64_t last_total_inserts;
@@ -126,7 +122,6 @@ struct __config { /* Configuration structure */
char *reopen_config; /* Config string for conn reopen */
char *base_uri; /* Object URI */
char **uris; /* URIs if multiple tables */
- const char *helium_mount; /* Optional Helium mount point */
WT_CONNECTION *conn; /* Database connection */
@@ -281,7 +276,7 @@ void latency_print(CONFIG *);
int run_truncate(
CONFIG *, CONFIG_THREAD *, WT_CURSOR *, WT_SESSION *, int *);
int setup_log_file(CONFIG *);
-int setup_throttle(CONFIG_THREAD*);
+void setup_throttle(CONFIG_THREAD*);
int setup_truncate(CONFIG *, CONFIG_THREAD *, WT_SESSION *);
int start_idle_table_cycle(CONFIG *, pthread_t *);
int stop_idle_table_cycle(CONFIG *, pthread_t);
@@ -292,7 +287,7 @@ uint64_t sum_read_ops(CONFIG *);
uint64_t sum_truncate_ops(CONFIG *);
uint64_t sum_update_ops(CONFIG *);
void usage(void);
-int worker_throttle(CONFIG_THREAD*);
+void worker_throttle(CONFIG_THREAD*);
void lprintf(const CONFIG *, int err, uint32_t, const char *, ...)
#if defined(__GNUC__)
@@ -328,75 +323,4 @@ die(int e, const char *str)
fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e));
exit(EXIT_FAILURE);
}
-
-/*
- * dmalloc --
- * Call malloc, dying on failure.
- */
-static inline void *
-dmalloc(size_t len)
-{
- void *p;
-
- if ((p = malloc(len)) == NULL)
- die(errno, "malloc");
- return (p);
-}
-
-/*
- * dcalloc --
- * Call calloc, dying on failure.
- */
-static inline void *
-dcalloc(size_t num, size_t size)
-{
- void *p;
-
- if ((p = calloc(num, size)) == NULL)
- die(errno, "calloc");
- return (p);
-}
-
-/*
- * drealloc --
- * Call realloc, dying on failure.
- */
-static inline void *
-drealloc(void *p, size_t len)
-{
- void *repl;
-
- if ((repl = realloc(p, len)) == NULL)
- die(errno, "realloc");
- return (repl);
-}
-
-/*
- * dstrdup --
- * Call strdup, dying on failure.
- */
-static inline char *
-dstrdup(const char *str)
-{
- char *p;
-
- if ((p = strdup(str)) == NULL)
- die(errno, "strdup");
- return (p);
-}
-
-/*
- * dstrndup --
- * Call emulating strndup, dying on failure. Don't use actual strndup here
- * as it is not supported within MSVC.
- */
-static inline char *
-dstrndup(const char *str, const size_t len)
-{
- char *p;
-
- p = dcalloc(len + 1, sizeof(char));
- memcpy(p, str, len);
- return (p);
-}
#endif
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 2afd20f777f..f6c96febc85 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -144,6 +144,7 @@ DEF_OPT_AS_UINT32(random_range, 0,
"if non zero choose a value from within this range as the key for "
"insert operations")
DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value")
+DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)")
DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search")
DEF_OPT_AS_BOOL(readonly, 0,
"reopen the connection between populate and workload phases in readonly "
diff --git a/bench/wtperf/wtperf_throttle.c b/bench/wtperf/wtperf_throttle.c
index a98fd9b18d7..e49bca00d07 100644
--- a/bench/wtperf/wtperf_throttle.c
+++ b/bench/wtperf/wtperf_throttle.c
@@ -31,7 +31,7 @@
/*
* Put the initial config together for running a throttled workload.
*/
-int
+void
setup_throttle(CONFIG_THREAD *thread)
{
THROTTLE_CONFIG *throttle_cfg;
@@ -70,15 +70,14 @@ setup_throttle(CONFIG_THREAD *thread)
throttle_cfg->ops_count = throttle_cfg->ops_per_increment;
/* Set the first timestamp of when we incremented */
- WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment));
- return (0);
+ testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment));
}
/*
* Run the throttle function. We will sleep if needed and then reload the
* counter to perform more operations.
*/
-int
+void
worker_throttle(CONFIG_THREAD *thread)
{
THROTTLE_CONFIG *throttle_cfg;
@@ -87,7 +86,7 @@ worker_throttle(CONFIG_THREAD *thread)
throttle_cfg = &thread->throttle_cfg;
- WT_RET(__wt_epoch(NULL, &now));
+ testutil_check(__wt_epoch(NULL, &now));
/*
* If we did enough operations in the current interval, sleep for
@@ -102,7 +101,7 @@ worker_throttle(CONFIG_THREAD *thread)
/*
* After sleeping, set the interval to the current time.
*/
- WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment));
+ testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment));
} else {
throttle_cfg->ops_count = (usecs_delta *
throttle_cfg->ops_per_increment) /
@@ -115,6 +114,4 @@ worker_throttle(CONFIG_THREAD *thread)
*/
throttle_cfg->ops_count =
WT_MIN(throttle_cfg->ops_count, thread->workload->throttle);
-
- return (0);
}
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index 64749378ed1..0b5175e4196 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -18,14 +18,15 @@ ext/extractors/csv
ext/test/kvs_bdb HAVE_BERKELEY_DB
.
api/leveldb LEVELDB
-bench/wtperf
examples/c
lang/java JAVA
examples/java JAVA
lang/python PYTHON
-# Make the tests
+# Test/Benchmark support library.
test/utility
+
+# Test programs.
test/bloom
test/checkpoint
test/csuite
@@ -39,3 +40,6 @@ test/readonly
test/recovery
test/salvage
test/thread
+
+# Benchmark programs.
+bench/wtperf
diff --git a/build_posix/aclocal/ax_pkg_swig.m4 b/build_posix/aclocal/ax_pkg_swig.m4
index 9ebdeb531b9..89941bc3fa9 100644
--- a/build_posix/aclocal/ax_pkg_swig.m4
+++ b/build_posix/aclocal/ax_pkg_swig.m4
@@ -32,9 +32,9 @@
# LICENSE
#
# Copyright (c) 2008 Sebastian Huber <sebastian-huber@web.de>
-# Copyright (c) 2008 Alan W. Irwin <irwin@beluga.phys.uvic.ca>
+# Copyright (c) 2008 Alan W. Irwin
# Copyright (c) 2008 Rafael Laboissiere <rafael@laboissiere.net>
-# Copyright (c) 2008 Andrew Collier <colliera@ukzn.ac.za>
+# Copyright (c) 2008 Andrew Collier
# Copyright (c) 2011 Murray Cumming <murrayc@openismus.com>
#
# This program is free software; you can redistribute it and/or modify it
@@ -63,11 +63,11 @@
# modified version of the Autoconf Macro, you may extend this special
# exception to the GPL to apply to your modified version as well.
-#serial 8
+#serial 11
AC_DEFUN([AX_PKG_SWIG],[
- # Some systems have SWIG 2.0 named "swig2.0"
- AC_PATH_PROGS([SWIG],[swig2.0 swig])
+ # Ubuntu has swig 2.0 as /usr/bin/swig2.0
+ AC_PATH_PROGS([SWIG],[swig swig3.0 swig2.0])
if test -z "$SWIG" ; then
m4_ifval([$3],[$3],[:])
elif test -n "$1" ; then
diff --git a/dist/api_data.py b/dist/api_data.py
index 90b1c8378a2..1302247e88e 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -247,8 +247,8 @@ file_config = format_meta + [
Config('memory_page_max', '5MB', r'''
the maximum size a page can grow to in memory before being
reconciled to disk. The specified size will be adjusted to a lower
- bound of <code>50 * leaf_page_max</code>, and an upper bound of
- <code>cache_size / 2</code>. This limit is soft - it is possible
+ bound of <code>leaf_page_max</code>, and an upper bound of
+ <code>cache_size / 10</code>. This limit is soft - it is possible
for pages to be temporarily larger than this value. This setting
is ignored for LSM trees, see \c chunk_size''',
min='512B', max='10TB'),
@@ -373,8 +373,6 @@ connection_runtime_config = [
periodically checkpoint the database. Enabling the checkpoint server
uses a session from the configured session_max''',
type='category', subconfig=[
- Config('name', '"WiredTigerCheckpoint"', r'''
- the checkpoint name'''),
Config('log_size', '0', r'''
wait for this amount of log record bytes to be written to
the log between each checkpoint. A database can configure
@@ -388,16 +386,31 @@ connection_runtime_config = [
]),
Config('error_prefix', '', r'''
prefix string for error messages'''),
- Config('eviction_dirty_target', '80', r'''
+ Config('eviction', '', r'''
+ eviction configuration options.''',
+ type='category', subconfig=[
+ Config('threads_max', '1', r'''
+ maximum number of threads WiredTiger will start to help evict
+ pages from cache. The number of threads started will vary
+ depending on the current eviction load. Each eviction worker
+ thread uses a session from the configured session_max''',
+ min=1, max=20),
+ Config('threads_min', '1', r'''
+ minimum number of threads WiredTiger will start to help evict
+ pages from cache. The number of threads currently running will
+ vary depending on the current eviction load''',
+ min=1, max=20),
+ ]),
+ Config('eviction_dirty_target', '5', r'''
continue evicting until the cache has less dirty memory than the
value, as a percentage of the total cache size. Dirty pages will
only be evicted if the cache is full enough to trigger eviction''',
- min=5, max=99),
- Config('eviction_dirty_trigger', '95', r'''
+ min=1, max=99),
+ Config('eviction_dirty_trigger', '20', r'''
trigger eviction when the cache is using this much memory for dirty
content, as a percentage of the total cache size. This setting only
alters behavior if it is lower than eviction_trigger''',
- min=5, max=99),
+ min=1, max=99),
Config('eviction_target', '80', r'''
continue evicting until the cache has less total memory than the
value, as a percentage of the total cache size. Must be less than
@@ -420,40 +433,6 @@ connection_runtime_config = [
interval in seconds at which to check for files that are
inactive and close them''', min=1, max=100000),
]),
- Config('log', '', r'''
- enable logging. Enabling logging uses three sessions from the
- configured session_max''',
- type='category', subconfig=[
- Config('archive', 'true', r'''
- automatically archive unneeded log files''',
- type='boolean'),
- Config('compressor', 'none', r'''
- configure a compressor for log records. Permitted values are
- \c "none" or custom compression engine name created with
- WT_CONNECTION::add_compressor. If WiredTiger has builtin support
- for \c "snappy", \c "lz4" or \c "zlib" compression, these names
- are also available. See @ref compression for more information'''),
- Config('enabled', 'false', r'''
- enable logging subsystem''',
- type='boolean'),
- Config('file_max', '100MB', r'''
- the maximum size of log files''',
- min='100KB', max='2GB'),
- Config('path', '"."', r'''
- the path to a directory into which the log files are written.
- If the value is not an absolute path name, the files are created
- relative to the database home'''),
- Config('prealloc', 'true', r'''
- pre-allocate log files.''',
- type='boolean'),
- Config('recover', 'on', r'''
- run recovery or error if recovery needs to run after an
- unclean shutdown.''',
- choices=['error','on']),
- Config('zero_fill', 'false', r'''
- manually write zeroes into log files''',
- type='boolean'),
- ]),
Config('lsm_manager', '', r'''
configure database wide options for LSM tree management. The LSM
manager is started automatically the first time an LSM tree is opened.
@@ -472,21 +451,6 @@ connection_runtime_config = [
Config('lsm_merge', 'true', r'''
merge LSM chunks where possible (deprecated)''',
type='boolean', undoc=True),
- Config('eviction', '', r'''
- eviction configuration options.''',
- type='category', subconfig=[
- Config('threads_max', '1', r'''
- maximum number of threads WiredTiger will start to help evict
- pages from cache. The number of threads started will vary
- depending on the current eviction load. Each eviction worker
- thread uses a session from the configured session_max''',
- min=1, max=20),
- Config('threads_min', '1', r'''
- minimum number of threads WiredTiger will start to help evict
- pages from cache. The number of threads currently running will
- vary depending on the current eviction load''',
- min=1, max=20),
- ]),
Config('shared_cache', '', r'''
shared cache configuration options. A database should configure
either a cache_size or a shared_cache not both. Enabling a
@@ -525,38 +489,6 @@ connection_runtime_config = [
are logged using the \c statistics_log configuration. See
@ref statistics for more information''',
type='list', choices=['all', 'fast', 'none', 'clear']),
- Config('statistics_log', '', r'''
- log any statistics the database is configured to maintain,
- to a file. See @ref statistics for more information. Enabling
- the statistics log server uses a session from the configured
- session_max''',
- type='category', subconfig=[
- Config('json', 'false', r'''
- encode statistics in JSON format''',
- type='boolean'),
- Config('on_close', 'false', r'''log statistics on database close''',
- type='boolean'),
- Config('path', '"WiredTigerStat.%d.%H"', r'''
- the pathname to a file into which the log records are written,
- may contain ISO C standard strftime conversion specifications.
- If the value is not an absolute path name, the file is created
- relative to the database home'''),
- Config('sources', '', r'''
- if non-empty, include statistics for the list of data source
- URIs, if they are open at the time of the statistics logging.
- The list may include URIs matching a single data source
- ("table:mytable"), or a URI matching all data sources of a
- particular type ("table:")''',
- type='list'),
- Config('timestamp', '"%b %d %H:%M:%S"', r'''
- a timestamp prepended to each log record, may contain strftime
- conversion specifications, when \c json is configured, defaults
- to \c "%FT%Y.000Z"'''),
- Config('wait', '0', r'''
- seconds to wait between each write of the log records; setting
- this value above 0 configures statistics logging''',
- min='0', max='100000'),
- ]),
Config('verbose', '', r'''
enable messages for various events. Only available if WiredTiger
is configured with --enable-verbose. Options are given as a
@@ -590,13 +522,113 @@ connection_runtime_config = [
'write']),
]
+# wiredtiger_open and WT_CONNECTION.reconfigure log configurations.
+log_configuration_common = [
+ Config('archive', 'true', r'''
+ automatically archive unneeded log files''',
+ type='boolean'),
+ Config('prealloc', 'true', r'''
+ pre-allocate log files.''',
+ type='boolean'),
+ Config('zero_fill', 'false', r'''
+ manually write zeroes into log files''',
+ type='boolean')
+]
+connection_reconfigure_log_configuration = [
+ Config('log', '', r'''
+ enable logging. Enabling logging uses three sessions from the
+ configured session_max''',
+ type='category', subconfig=
+ log_configuration_common)
+]
+wiredtiger_open_log_configuration = [
+ Config('log', '', r'''
+ enable logging. Enabling logging uses three sessions from the
+ configured session_max''',
+ type='category', subconfig=
+ log_configuration_common + [
+ Config('enabled', 'false', r'''
+ enable logging subsystem''',
+ type='boolean'),
+ Config('compressor', 'none', r'''
+ configure a compressor for log records. Permitted values are
+ \c "none" or custom compression engine name created with
+ WT_CONNECTION::add_compressor. If WiredTiger has builtin support
+ for \c "snappy", \c "lz4" or \c "zlib" compression, these names
+ are also available. See @ref compression for more information'''),
+ Config('file_max', '100MB', r'''
+ the maximum size of log files''',
+ min='100KB', max='2GB'),
+ Config('path', '"."', r'''
+ the name of a directory into which log files are written. The
+ directory must already exist. If the value is not an absolute
+ path, the path is relative to the database home (see @ref
+ absolute_path for more information)'''),
+ Config('recover', 'on', r'''
+ run recovery or error if recovery needs to run after an
+ unclean shutdown''',
+ choices=['error','on'])
+ ]),
+]
+
+# wiredtiger_open and WT_CONNECTION.reconfigure statistics log configurations.
+statistics_log_configuration_common = [
+ Config('json', 'false', r'''
+ encode statistics in JSON format''',
+ type='boolean'),
+ Config('on_close', 'false', r'''log statistics on database close''',
+ type='boolean'),
+ Config('sources', '', r'''
+ if non-empty, include statistics for the list of data source
+ URIs, if they are open at the time of the statistics logging.
+ The list may include URIs matching a single data source
+ ("table:mytable"), or a URI matching all data sources of a
+ particular type ("table:")''',
+ type='list'),
+ Config('timestamp', '"%b %d %H:%M:%S"', r'''
+ a timestamp prepended to each log record, may contain strftime
+ conversion specifications, when \c json is configured, defaults
+ to \c "%FT%Y.000Z"'''),
+ Config('wait', '0', r'''
+ seconds to wait between each write of the log records; setting
+ this value above 0 configures statistics logging''',
+ min='0', max='100000'),
+]
+connection_reconfigure_statistics_log_configuration = [
+ Config('statistics_log', '', r'''
+ log any statistics the database is configured to maintain,
+ to a file. See @ref statistics for more information. Enabling
+ the statistics log server uses a session from the configured
+ session_max''',
+ type='category', subconfig=
+ statistics_log_configuration_common)
+]
+wiredtiger_open_statistics_log_configuration = [
+ Config('statistics_log', '', r'''
+ log any statistics the database is configured to maintain,
+ to a file. See @ref statistics for more information. Enabling
+ the statistics log server uses a session from the configured
+ session_max''',
+ type='category', subconfig=
+ statistics_log_configuration_common + [
+ Config('path', '"."', r'''
+ the name of a directory into which statistics files are written.
+ The directory must already exist. If the value is not an absolute
+ path, the path is relative to the database home (see @ref
+ absolute_path for more information)''')
+ ])
+]
+
session_config = [
Config('isolation', 'read-committed', r'''
the default isolation level for operations in this session''',
choices=['read-uncommitted', 'read-committed', 'snapshot']),
]
-wiredtiger_open_common = connection_runtime_config + [
+wiredtiger_open_common =\
+ connection_runtime_config +\
+ wiredtiger_open_log_configuration +\
+ wiredtiger_open_statistics_log_configuration + [
Config('buffer_alignment', '-1', r'''
in-memory alignment (in bytes) for buffers used for I/O. The
default value of -1 indicates a platform-specific alignment value
@@ -788,8 +820,9 @@ methods = {
'WT_SESSION.drop' : Method([
Config('checkpoint_wait', 'true', r'''
- wait for the checkpoint lock, if \c checkpoint_wait=false, fail if
- this lock is not available immediately''',
+ wait for the checkpoint lock, if \c checkpoint_wait=false, perform
+ the drop operation without taking a lock, returning EBUSY if the
+ operation conflicts with a running checkpoint''',
type='boolean', undoc=True),
Config('force', 'false', r'''
return success if the object does not exist''',
@@ -870,6 +903,11 @@ methods = {
"WiredTigerCheckpoint" opens the most recent internal
checkpoint taken for the object). The cursor does not
support data modification'''),
+ Config('checkpoint_wait', 'true', r'''
+ wait for the checkpoint lock, if \c checkpoint_wait=false, open the
+ cursor without taking a lock, returning EBUSY if the operation
+ conflicts with a running checkpoint''',
+ type='boolean', undoc=True),
Config('dump', '', r'''
configure the cursor for dump format inputs and outputs: "hex"
selects a simple hexadecimal format, "json" selects a JSON format
@@ -1084,7 +1122,11 @@ methods = {
don't free memory during close''',
type='boolean'),
]),
-'WT_CONNECTION.reconfigure' : Method(connection_runtime_config),
+'WT_CONNECTION.reconfigure' : Method(
+ connection_reconfigure_log_configuration +\
+ connection_reconfigure_statistics_log_configuration +\
+ connection_runtime_config
+),
'WT_CONNECTION.set_file_system' : Method([]),
'WT_CONNECTION.load_extension' : Method([
diff --git a/dist/flags.py b/dist/flags.py
index b5f36fb707a..8091283a8c0 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -37,10 +37,13 @@ flags = {
'READ_WONT_NEED',
],
'rec_write' : [
+ 'CHECKPOINTING',
+ 'EVICTING',
'EVICT_IN_MEMORY',
+ 'EVICT_INMEM_SPLIT',
'EVICT_LOOKASIDE',
+ 'EVICT_SCRUB',
'EVICT_UPDATE_RESTORE',
- 'EVICTING',
'VISIBILITY_ERR',
],
'txn_log_checkpoint' : [
diff --git a/dist/s_all b/dist/s_all
index 46a68864906..33b8f6a76ba 100755
--- a/dist/s_all
+++ b/dist/s_all
@@ -15,6 +15,8 @@ echo 'dist/s_all run started...'
force=
reconf=0
+errmode=0
+errfound=0
while :
do case "$1" in
-A) # Reconfigure the library build.
@@ -23,6 +25,9 @@ while :
-f) # Force versions to be updated
force="-f"
shift;;
+ -E) # Return an error code on failure
+ errmode=1
+ shift;;
*)
break;;
esac
@@ -48,6 +53,14 @@ errchk()
echo "#######################"
rm -f $2
+
+ # Some tests shouldn't return an error, we exclude them here.
+ case "$1" in
+ *s_export*)
+ break;;
+ *)
+ errfound=1;;
+ esac
}
run()
@@ -108,3 +121,6 @@ for f in `find . -name ${t_pfx}\*`; do
done
echo 'dist/s_all run finished'
+if test $errmode -ne 0; then
+ exit $errfound;
+fi
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 7966ff2cf2e..8c5f1e99bff 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -86,6 +86,7 @@ DbEnv
Decrement
Decrypt
DeleteFileA
+EACCES
EAGAIN
EB
EBUSY
@@ -117,6 +118,7 @@ FLv
FNV
FORALL
FOREACH
+FS
FULLFSYNC
FindClose
FindFirstFile
@@ -204,6 +206,7 @@ MERCHANTABILITY
METADATA
MONGODB
MSVC
+MULTI
MULTIBLOCK
MUTEX
Manos
@@ -326,6 +329,7 @@ UID
UIDs
UINT
ULINE
+UNC
URI
URIs
UTF
@@ -528,6 +532,7 @@ cust
customp
cv
cxa
+dT
data's
database's
datalen
@@ -557,6 +562,7 @@ dequeued
der
dereference
desc
+designator
dest
destSize
dev
@@ -932,6 +938,7 @@ prepend
prepended
prepending
presize
+presync
primary's
printf
printlog
@@ -1065,6 +1072,7 @@ tV
tablename
tcbench
td
+tempdir
testutil
th
tid
@@ -1091,6 +1099,7 @@ txn
txnc
txnid
txnmin
+txt
typedef
uB
uS
diff --git a/dist/s_style b/dist/s_style
index a222c004cc3..e33db5a5fab 100755
--- a/dist/s_style
+++ b/dist/s_style
@@ -33,7 +33,7 @@ else
exit 1;
fi
- egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t
+ egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in[^-]|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t
test -s $t && {
echo "paired typo"
echo "============================"
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 694ffc86ee4..51cc487f04c 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -81,10 +81,10 @@ class SessionStat(Stat):
prefix = 'session'
def __init__(self, name, desc, flags=''):
Stat.__init__(self, name, SessionStat.prefix, desc, flags)
-class ThreadState(Stat):
+class ThreadStat(Stat):
prefix = 'thread-state'
def __init__(self, name, desc, flags=''):
- Stat.__init__(self, name, ThreadState.prefix, desc, flags)
+ Stat.__init__(self, name, ThreadStat.prefix, desc, flags)
class TxnStat(Stat):
prefix = 'transaction'
def __init__(self, name, desc, flags=''):
@@ -105,7 +105,7 @@ groups['evict'] = [
BlockStat.prefix,
CacheStat.prefix,
ConnStat.prefix,
- ThreadState.prefix
+ ThreadStat.prefix
]
groups['lsm'] = [LSMStat.prefix, TxnStat.prefix]
groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix]
@@ -113,7 +113,7 @@ groups['system'] = [
ConnStat.prefix,
DhandleStat.prefix,
SessionStat.prefix,
- ThreadState.prefix
+ ThreadStat.prefix
]
##########################################
@@ -159,6 +159,7 @@ connection_stats = [
BlockStat('block_byte_map_read', 'mapped bytes read', 'size'),
BlockStat('block_byte_read', 'bytes read', 'size'),
BlockStat('block_byte_write', 'bytes written', 'size'),
+ BlockStat('block_byte_write_checkpoint', 'bytes written for checkpoint', 'size'),
BlockStat('block_map_read', 'mapped blocks read'),
BlockStat('block_preload', 'blocks pre-loaded'),
BlockStat('block_read', 'blocks read'),
@@ -168,11 +169,12 @@ connection_stats = [
# Cache and eviction statistics
##########################################
CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_image', 'bytes belonging to page images in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale,size'),
- CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale,size'),
+ CacheStat('cache_bytes_other', 'bytes not belonging to page images in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'),
@@ -193,7 +195,8 @@ connection_stats = [
CacheStat('cache_eviction_internal', 'internal pages evicted'),
CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'),
CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'),
- CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction'),
+ CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction during walk'),
+ CacheStat('cache_eviction_pages_queued_urgent', 'pages queued for urgent eviction'),
CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'),
CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'),
CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'),
@@ -215,12 +218,14 @@ connection_stats = [
CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
CacheStat('cache_lookaside_insert', 'lookaside table insert calls'),
CacheStat('cache_lookaside_remove', 'lookaside table remove calls'),
+ CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'),
CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'),
CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
CacheStat('cache_pages_requested', 'pages requested from the cache'),
CacheStat('cache_read', 'pages read into cache'),
CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
+ CacheStat('cache_read_overflow', 'overflow pages read into cache'),
CacheStat('cache_write', 'pages written from cache'),
CacheStat('cache_write_lookaside', 'page written requiring lookaside records'),
CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
@@ -294,11 +299,11 @@ connection_stats = [
TxnStat('txn_begin', 'transaction begins'),
TxnStat('txn_checkpoint', 'transaction checkpoints'),
TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
- TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)'),
- TxnStat('txn_checkpoint_fsync_pre', 'transaction fsync calls for checkpoint before allocating the transaction ID'),
- TxnStat('txn_checkpoint_fsync_pre_duration', 'transaction fsync duration for checkpoint before allocating the transaction ID (usecs)'),
+ TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
+ TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
@@ -332,6 +337,22 @@ connection_stats = [
##########################################
SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
SessionStat('session_open', 'open session count', 'no_clear,no_scale'),
+ SessionStat('session_table_compact_fail', 'table compact failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_compact_success', 'table compact successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_create_fail', 'table create failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_create_success', 'table create successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_drop_fail', 'table drop failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_drop_success', 'table drop successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_rebalance_fail', 'table rebalance failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_rebalance_success', 'table rebalance successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_rename_fail', 'table rename failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_rename_success', 'table rename successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_salvage_fail', 'table salvage failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_salvage_success', 'table salvage successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_truncate_fail', 'table truncate failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_truncate_success', 'table truncate successful calls', 'no_clear,no_scale'),
+ SessionStat('session_table_verify_fail', 'table verify failed calls', 'no_clear,no_scale'),
+ SessionStat('session_table_verify_success', 'table verify successful calls', 'no_clear,no_scale'),
##########################################
# Total cursor operations
@@ -349,11 +370,11 @@ connection_stats = [
CursorStat('cursor_update', 'cursor update calls'),
##########################################
- # Thread State statistics
+ # Thread Count statistics
##########################################
- ThreadState('fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
- ThreadState('read_active', 'active filesystem read calls','no_clear,no_scale'),
- ThreadState('write_active', 'active filesystem write calls','no_clear,no_scale'),
+ ThreadStat('thread_fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
+ ThreadStat('thread_read_active', 'active filesystem read calls','no_clear,no_scale'),
+ ThreadStat('thread_write_active', 'active filesystem write calls','no_clear,no_scale'),
##########################################
# Yield statistics
@@ -451,6 +472,7 @@ dsrc_stats = [
##########################################
# Cache and eviction statistics
##########################################
+ CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index dd807922c10..e8727df3f60 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -1160,34 +1160,27 @@ main(void)
if (ret == 0)
(void)conn->close(conn, NULL);
+#ifdef MIGHT_NOT_RUN
+ /*
+ * Don't run this code, statistics logging doesn't yet support tables.
+ */
/*! [Statistics logging with a table] */
ret = wiredtiger_open(home, NULL,
"create, statistics_log=("
- "sources=(\"lsm:table1\",\"lsm:table2\"), wait=5)",
+ "sources=(\"table:table1\",\"table:table2\"), wait=5)",
&conn);
/*! [Statistics logging with a table] */
if (ret == 0)
(void)conn->close(conn, NULL);
- /*! [Statistics logging with all tables] */
- ret = wiredtiger_open(home, NULL,
- "create, statistics_log=(sources=(\"lsm:\"), wait=5)",
- &conn);
- /*! [Statistics logging with all tables] */
- if (ret == 0)
- (void)conn->close(conn, NULL);
-
-#ifdef MIGHT_NOT_RUN
/*
- * This example code gets run, and a non-existent log file path might
- * cause the open to fail. The documentation requires code snippets,
- * use #ifdef's to avoid running it.
+ * Don't run this code, statistics logging doesn't yet support indexes.
*/
- /*! [Statistics logging with path] */
+ /*! [Statistics logging with a source type] */
ret = wiredtiger_open(home, NULL,
- "create,"
- "statistics_log=(wait=120,path=/log/log.%m.%d.%y)", &conn);
- /*! [Statistics logging with path] */
+ "create, statistics_log=(sources=(\"index:\"), wait=5)",
+ &conn);
+ /*! [Statistics logging with a source type] */
if (ret == 0)
(void)conn->close(conn, NULL);
diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c
index 77e8f40480b..55ee20e9331 100644
--- a/examples/c/ex_file_system.c
+++ b/examples/c/ex_file_system.c
@@ -118,18 +118,17 @@ int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *);
/*
* Forward function declarations for file system API implementation
*/
-static int demo_fs_open(WT_FILE_SYSTEM *,
- WT_SESSION *, const char *, WT_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
+static int demo_fs_open(WT_FILE_SYSTEM *, WT_SESSION *,
+ const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *,
const char *, const char *, char ***, uint32_t *);
static int demo_fs_directory_list_free(
WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
-static int demo_fs_directory_sync(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *directory);
static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
-static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *);
+static int demo_fs_remove(
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
static int demo_fs_rename(
- WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *);
+ WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
static int demo_fs_size(
WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
@@ -255,7 +254,6 @@ demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config)
/* Initialize the in-memory jump table. */
file_system->fs_directory_list = demo_fs_directory_list;
file_system->fs_directory_list_free = demo_fs_directory_list_free;
- file_system->fs_directory_sync = demo_fs_directory_sync;
file_system->fs_exist = demo_fs_exist;
file_system->fs_open_file = demo_fs_open;
file_system->fs_remove = demo_fs_remove;
@@ -282,7 +280,7 @@ err: free(demo_fs);
*/
static int
demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
- const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
WT_FILE_HANDLE **file_handlep)
{
DEMO_FILE_HANDLE *demo_fh;
@@ -469,21 +467,6 @@ demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
}
/*
- * demo_fs_directory_sync --
- * Directory sync for our demo file system, which is a no-op.
- */
-static int
-demo_fs_directory_sync(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *directory)
-{
- (void)file_system; /* Unused */
- (void)session; /* Unused */
- (void)directory; /* Unused */
-
- return (0);
-}
-
-/*
* demo_fs_exist --
* Return if the file exists.
*/
@@ -507,13 +490,15 @@ demo_fs_exist(WT_FILE_SYSTEM *file_system,
* POSIX remove.
*/
static int
-demo_fs_remove(
- WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name)
+demo_fs_remove(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *name, uint32_t flags)
{
DEMO_FILE_SYSTEM *demo_fs;
DEMO_FILE_HANDLE *demo_fh;
int ret = 0;
+ (void)flags; /* Unused */
+
demo_fs = (DEMO_FILE_SYSTEM *)file_system;
ret = ENOENT;
@@ -531,13 +516,15 @@ demo_fs_remove(
*/
static int
demo_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *from, const char *to)
+ WT_SESSION *session, const char *from, const char *to, uint32_t flags)
{
DEMO_FILE_HANDLE *demo_fh;
DEMO_FILE_SYSTEM *demo_fs;
char *copy;
int ret = 0;
+ (void)flags; /* Unused */
+
demo_fs = (DEMO_FILE_SYSTEM *)file_system;
LOCK_FILE_SYSTEM(session, demo_fs);
diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java
index 48e85c9fade..83a37e9a6a5 100644
--- a/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/examples/java/com/wiredtiger/examples/ex_all.java
@@ -988,6 +988,10 @@ allExample()
/*! [Statistics logging] */
conn.close(null);
+ if (false) { // MIGHT_NOT_RUN
+ /*
+ * Don't run this code, statistics logging doesn't yet support tables.
+ */
/*! [Statistics logging with a table] */
conn = wiredtiger.open(home,
"create," +
@@ -995,23 +999,13 @@ allExample()
/*! [Statistics logging with a table] */
conn.close(null);
- /*! [Statistics logging with all tables] */
- conn = wiredtiger.open(home,
- "create,statistics_log=(sources=(\"table:\"))");
- /*! [Statistics logging with all tables] */
- conn.close(null);
-
- if (false) { // MIGHT_NOT_RUN
/*
- * This example code gets run, and a non-existent log file path might
- * cause the open to fail. The documentation requires code snippets,
- * use if (false) to avoid running it.
+ * Don't run this code, statistics logging doesn't yet support indexes.
*/
- /*! [Statistics logging with path] */
+ /*! [Statistics logging with a source type] */
conn = wiredtiger.open(home,
- "create," +
- "statistics_log=(wait=120,path=/log/log.%m.%d.%y)");
- /*! [Statistics logging with path] */
+ "create,statistics_log=(sources=(\"index:\"))");
+ /*! [Statistics logging with a source type] */
conn.close(null);
/*
diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c
index 9aede2ed907..484df0a6785 100644
--- a/ext/compressors/zlib/zlib_compress.c
+++ b/ext/compressors/zlib/zlib_compress.c
@@ -92,7 +92,7 @@ zalloc(void *cookie, uint32_t number, uint32_t size)
opaque = cookie;
wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api;
return (wt_api->scr_alloc(
- wt_api, opaque->session, (size_t)(number * size)));
+ wt_api, opaque->session, (size_t)number * size));
}
/*
diff --git a/src/async/async_api.c b/src/async/async_api.c
index fea8714176b..d53a6c65c1d 100644
--- a/src/async/async_api.c
+++ b/src/async/async_api.c
@@ -490,12 +490,24 @@ __wt_async_flush(WT_SESSION_IMPL *session)
WT_ASYNC *async;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
+ uint32_t i, workers;
conn = S2C(session);
if (!conn->async_cfg)
return (0);
async = conn->async;
+ /*
+ * Only add a flush operation if there are workers who can process
+ * it. Otherwise we will wait forever.
+ */
+ workers = 0;
+ for (i = 0; i < conn->async_workers; ++i)
+ if (async->worker_tids[i] != 0)
+ ++workers;
+ if (workers == 0)
+ return (0);
+
WT_STAT_FAST_CONN_INCR(session, async_flush);
/*
* We have to do several things. First we have to prevent
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index b9f0ec25d53..3584efc7671 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -252,7 +252,7 @@ __wt_block_checkpoint(WT_SESSION_IMPL *session,
} else
WT_ERR(__wt_block_write_off(session, block, buf,
&ci->root_offset, &ci->root_size, &ci->root_cksum,
- data_cksum, false));
+ data_cksum, true, false));
/*
* Checkpoints are potentially reading/writing/merging lots of blocks,
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index 0d3e7b54f17..bad4d8d7990 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -1245,8 +1245,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
WT_DECL_RET;
WT_EXT *ext;
WT_PAGE_HEADER *dsk;
- size_t size;
- uint32_t entries;
+ size_t entries, size;
uint8_t *p;
WT_RET(__block_extlist_dump(session, block, el, "write"));
@@ -1311,7 +1310,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
/* Write the extent list to disk. */
WT_ERR(__wt_block_write_off(session,
- block, tmp, &el->offset, &el->size, &el->cksum, true, true));
+ block, tmp, &el->offset, &el->size, &el->cksum, true, true, true));
/*
* Remove the allocated blocks from the system's allocation list, extent
@@ -1450,7 +1449,7 @@ __block_extlist_dump(
tag, el->name, el->entries,
__wt_buf_set_size(session, el->bytes, true, t1)));
- if (ret != 0 || el->entries == 0)
+ if (el->entries == 0)
goto done;
memset(sizes, 0, sizeof(sizes));
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 971fe713f83..eff25f34304 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -479,11 +479,11 @@ __bm_verify_start(WT_BM *bm,
* Write a buffer into a block, returning the block's address cookie.
*/
static int
-__bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
+ uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io)
{
- return (__wt_block_write(
- session, bm->block, buf, addr, addr_sizep, data_cksum));
+ return (__wt_block_write(session,
+ bm->block, buf, addr, addr_sizep, data_cksum, checkpoint_io));
}
/*
@@ -492,13 +492,14 @@ __bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
* readonly version.
*/
static int
-__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session,
- WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
+ uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io)
{
WT_UNUSED(buf);
WT_UNUSED(addr);
WT_UNUSED(addr_sizep);
WT_UNUSED(data_cksum);
+ WT_UNUSED(checkpoint_io);
return (__bm_readonly(bm, session));
}
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 1603b1574e7..7cff7eab629 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -15,9 +15,10 @@ static int __desc_read(WT_SESSION_IMPL *, WT_BLOCK *);
* Drop a file.
*/
int
-__wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename)
+__wt_block_manager_drop(
+ WT_SESSION_IMPL *session, const char *filename, bool durable)
{
- return (__wt_remove_if_exists(session, filename));
+ return (__wt_remove_if_exists(session, filename, durable));
}
/*
@@ -43,8 +44,9 @@ __wt_block_manager_create(
* in our space. Move any existing files out of the way and complain.
*/
for (;;) {
- if ((ret = __wt_open(session, filename, WT_OPEN_FILE_TYPE_DATA,
- WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0)
+ if ((ret = __wt_open(session, filename,
+ WT_FS_OPEN_FILE_TYPE_DATA, WT_FS_OPEN_CREATE |
+ WT_FS_OPEN_DURABLE | WT_FS_OPEN_EXCLUSIVE, &fh)) == 0)
break;
WT_ERR_TEST(ret != EEXIST, ret);
@@ -56,7 +58,7 @@ __wt_block_manager_create(
WT_ERR(__wt_fs_exist(session, tmp->data, &exists));
if (!exists) {
WT_ERR(__wt_fs_rename(
- session, filename, tmp->data));
+ session, filename, tmp->data, false));
WT_ERR(__wt_msg(session,
"unexpected file %s found, renamed to %s",
filename, (const char *)tmp->data));
@@ -77,16 +79,9 @@ __wt_block_manager_create(
/* Close the file handle. */
WT_TRET(__wt_close(session, &fh));
- /*
- * Some filesystems require that we sync the directory to be confident
- * that the file will appear.
- */
- if (ret == 0)
- WT_TRET(__wt_fs_directory_sync(session, filename));
-
/* Undo any create on error. */
if (ret != 0)
- WT_TRET(__wt_fs_remove(session, filename));
+ WT_TRET(__wt_fs_remove(session, filename, false));
err: __wt_scr_free(session, &tmp);
@@ -207,11 +202,11 @@ __wt_block_open(WT_SESSION_IMPL *session,
*/
flags = 0;
if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT))
- LF_SET(WT_OPEN_DIRECTIO);
+ LF_SET(WT_FS_OPEN_DIRECTIO);
if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA))
- LF_SET(WT_OPEN_DIRECTIO);
+ LF_SET(WT_FS_OPEN_DIRECTIO);
WT_ERR(__wt_open(
- session, filename, WT_OPEN_FILE_TYPE_DATA, flags, &block->fh));
+ session, filename, WT_FS_OPEN_FILE_TYPE_DATA, flags, &block->fh));
/* Set the file's size. */
WT_ERR(__wt_filesize(session, block->fh, &block->size));
diff --git a/src/block/block_session.c b/src/block/block_session.c
index 268adb530cf..6223751effa 100644
--- a/src/block/block_session.c
+++ b/src/block/block_session.c
@@ -28,7 +28,7 @@ __block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp)
{
WT_EXT *ext;
- u_int skipdepth;
+ size_t skipdepth;
skipdepth = __wt_skip_choose_depth(session);
WT_RET(__wt_calloc(session, 1,
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 1fefeee09da..30d06e6259a 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -210,15 +210,15 @@ __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep)
* Write a buffer into a block, returning the block's address cookie.
*/
int
-__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
- WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+ uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io)
{
wt_off_t offset;
uint32_t size, cksum;
uint8_t *endp;
- WT_RET(__wt_block_write_off(
- session, block, buf, &offset, &size, &cksum, data_cksum, false));
+ WT_RET(__wt_block_write_off(session, block,
+ buf, &offset, &size, &cksum, data_cksum, checkpoint_io, false));
endp = addr;
WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
@@ -228,14 +228,14 @@ __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
}
/*
- * __wt_block_write_off --
+ * __block_write_off --
* Write a buffer into a block, returning the block's offset, size and
* checksum.
*/
-int
-__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
+static int
+__block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump,
- bool data_cksum, bool caller_locked)
+ bool data_cksum, bool checkpoint_io, bool caller_locked)
{
WT_BLOCK_HEADER *blk;
WT_DECL_RET;
@@ -254,12 +254,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
blk = WT_BLOCK_HEADER_REF(buf->mem);
memset(blk, 0, sizeof(*blk));
- /*
- * Swap the page-header as needed; this doesn't belong here, but it's
- * the best place to catch all callers.
- */
- __wt_page_header_byteswap(buf->mem);
-
/* Buffers should be aligned for writing. */
if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED));
@@ -380,6 +374,9 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
WT_STAT_FAST_CONN_INCR(session, block_write);
WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size);
+ if (checkpoint_io)
+ WT_STAT_FAST_CONN_INCRV(
+ session, block_byte_write_checkpoint, align_size);
WT_RET(__wt_verbose(session, WT_VERB_WRITE,
"off %" PRIuMAX ", size %" PRIuMAX ", cksum %" PRIu32,
@@ -391,3 +388,28 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
return (0);
}
+
+/*
+ * __wt_block_write_off --
+ * Write a buffer into a block, returning the block's offset, size and
+ * checksum.
+ */
+int
+__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
+ WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump,
+ bool data_cksum, bool checkpoint_io, bool caller_locked)
+{
+ WT_DECL_RET;
+
+ /*
+ * Ensure the page header is in little endian order; this doesn't belong
+ * here, but it's the best place to catch all callers. After the write,
+ * swap values back to native order so callers never see anything other
+ * than their original content.
+ */
+ __wt_page_header_byteswap(buf->mem);
+ ret = __block_write_off(session, block, buf,
+ offsetp, sizep, cksump, data_cksum, checkpoint_io, caller_locked);
+ __wt_page_header_byteswap(buf->mem);
+ return (ret);
+}
diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c
index 70b3ba56e31..e1b097c22a5 100644
--- a/src/btree/bt_curnext.c
+++ b/src/btree/bt_curnext.c
@@ -183,6 +183,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage)
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->ref->ref_recno);
+ cbt->cip_saved = NULL;
goto new_page;
}
@@ -301,12 +302,13 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage)
* WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
* odd-numbered slots, and WT_ROW array slots are even-numbered slots.
*
- * New page configuration.
+ * Initialize for each new page.
*/
if (newpage) {
cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
cbt->row_iteration_slot = 1;
+ cbt->rip_saved = NULL;
goto new_insert;
}
@@ -517,11 +519,13 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
*/
F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
- /*
- * Clear the count of deleted items on the page.
- */
+ /* Clear the count of deleted items on the page. */
cbt->page_deleted_count = 0;
+ /* Clear saved iteration cursor position information. */
+ cbt->cip_saved = NULL;
+ cbt->rip_saved = NULL;
+
/*
* If we don't have a search page, then we're done, we're starting at
* the beginning or end of the tree, not as a result of a search.
@@ -661,7 +665,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
if (page != NULL &&
(cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
(newpage && cbt->page_deleted_count > 0)))
- __wt_page_evict_soon(page);
+ WT_ERR(__wt_page_evict_soon(session, cbt->ref));
cbt->page_deleted_count = 0;
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index 872f648446c..e39dffa357f 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -329,6 +329,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
if (cbt->last_standard_recno == 0)
return (WT_NOTFOUND);
__cursor_set_recno(cbt, cbt->last_standard_recno);
+ cbt->cip_saved = NULL;
goto new_page;
}
@@ -447,7 +448,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
* WT_INSERT_HEAD[0], and so on. This means WT_INSERT lists are
* odd-numbered slots, and WT_ROW array slots are even-numbered slots.
*
- * New page configuration.
+ * Initialize for each new page.
*/
if (newpage) {
/*
@@ -464,6 +465,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1);
cbt->ins = WT_SKIP_LAST(cbt->ins_head);
cbt->row_iteration_slot = page->pg_row_entries * 2 + 1;
+ cbt->rip_saved = NULL;
goto new_insert;
}
@@ -619,7 +621,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
if (page != NULL &&
(cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
(newpage && cbt->page_deleted_count > 0)))
- __wt_page_evict_soon(page);
+ WT_ERR(__wt_page_evict_soon(session, cbt->ref));
cbt->page_deleted_count = 0;
WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index a00bb7dc2b5..965aec16fc2 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -131,8 +131,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
/* Discard any disk image. */
dsk = (WT_PAGE_HEADER *)page->dsk;
- if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
+ if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) {
+ __wt_cache_page_image_decr(session, dsk->mem_size);
__wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
+ }
/* Discard any mapped image. */
if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index c97e05d74a7..cacf1369430 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -690,6 +690,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
* Don't let pages grow large compared to the cache size or we can end
* up in a situation where nothing can be evicted. Take care getting
* the cache size: with a shared cache, it may not have been set.
+ * Don't forget to update the API documentation if you alter the
+ * bounds for any of the parameters here.
*/
WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
btree->maxmempage = (uint64_t)cval.val;
diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c
index 9e9d69c342e..918791d9c6e 100644
--- a/src/btree/bt_huffman.c
+++ b/src/btree/bt_huffman.c
@@ -157,7 +157,8 @@ __huffman_confchk_file(WT_SESSION_IMPL *session,
/* Check the file exists. */
WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
- WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs));
+ WT_ERR(__wt_fopen(
+ session, fname, WT_FS_OPEN_FIXED, WT_STREAM_READ, &fs));
/* Optionally return the file handle. */
if (fsp == NULL)
diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c
index 4339de6f25c..6c2e2f1b3fb 100644
--- a/src/btree/bt_io.c
+++ b/src/btree/bt_io.c
@@ -117,7 +117,7 @@ __wt_bt_read(WT_SESSION_IMPL *session,
*/
if (ret != 0 ||
result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) {
- fail_msg = "block decryption failed";
+ fail_msg = "block decompression failed";
goto corrupt;
}
} else
@@ -168,7 +168,8 @@ err: __wt_scr_free(session, &tmp);
*/
int
__wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
- uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed)
+ uint8_t *addr, size_t *addr_sizep,
+ bool checkpoint, bool checkpoint_io, bool compressed)
{
WT_BM *bm;
WT_BTREE *btree;
@@ -359,10 +360,12 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
/* Call the block manager to write the block. */
WT_ERR(checkpoint ?
bm->checkpoint(bm, session, ip, btree->ckpt, data_cksum) :
- bm->write(bm, session, ip, addr, addr_sizep, data_cksum));
+ bm->write(
+ bm, session, ip, addr, addr_sizep, data_cksum, checkpoint_io));
WT_STAT_FAST_CONN_INCR(session, cache_write);
WT_STAT_FAST_DATA_INCR(session, cache_write);
+ S2C(session)->cache->bytes_written += dsk->mem_size;
WT_STAT_FAST_CONN_INCRV(session, cache_bytes_write, dsk->mem_size);
WT_STAT_FAST_DATA_INCRV(session, cache_bytes_write, dsk->mem_size);
diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c
index fbe361e000a..1f080041a23 100644
--- a/src/btree/bt_ovfl.c
+++ b/src/btree/bt_ovfl.c
@@ -33,6 +33,7 @@ __ovfl_read(WT_SESSION_IMPL *session,
store->data = WT_PAGE_HEADER_BYTE(btree, dsk);
store->size = dsk->u.datalen;
+ WT_STAT_FAST_CONN_INCR(session, cache_read_overflow);
WT_STAT_FAST_DATA_INCR(session, cache_read_overflow);
return (0);
@@ -208,6 +209,7 @@ __wt_ovfl_cache(WT_SESSION_IMPL *session,
*/
if (!visible) {
WT_RET(__ovfl_cache(session, page, vpack));
+ WT_STAT_FAST_CONN_INCR(session, cache_overflow_value);
WT_STAT_FAST_DATA_INCR(session, cache_overflow_value);
}
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index 00ec8aa4494..89e5f428628 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -219,6 +219,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
/* Update the page's in-memory size and the cache statistics. */
__wt_cache_page_inmem_incr(session, page, size);
+ __wt_cache_page_image_incr(session, dsk->mem_size);
/* Link the new internal page to the parent. */
if (ref != NULL) {
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 086500c8b2f..3d396d5ae5b 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -296,7 +296,7 @@ err: WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
* __evict_force_check --
* Check if a page matches the criteria for forced eviction.
*/
-static int
+static bool
__evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
{
WT_BTREE *btree;
@@ -307,26 +307,26 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
/* Leaf pages only. */
if (WT_PAGE_IS_INTERNAL(page))
- return (0);
+ return (false);
/*
* It's hard to imagine a page with a huge memory footprint that has
* never been modified, but check to be sure.
*/
if (page->modify == NULL)
- return (0);
+ return (false);
/* Pages are usually small enough, check that first. */
if (page->memory_footprint < btree->splitmempage)
- return (0);
+ return (false);
else if (page->memory_footprint < btree->maxmempage)
return (__wt_leaf_page_can_split(session, page));
/* Trigger eviction on the next page release. */
- __wt_page_evict_soon(page);
+ (void)__wt_page_evict_soon(session, ref);
/* Bump the oldest ID, we're about to do some visibility checks. */
- WT_RET(__wt_txn_update_oldest(session, 0));
+ (void)__wt_txn_update_oldest(session, 0);
/* If eviction cannot succeed, don't try. */
return (__wt_page_can_evict(session, ref, NULL));
@@ -548,10 +548,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
* if the page qualifies for forced eviction and update
* the page's generation number. If eviction isn't being
* done on this file, we're done.
+ * In-memory split of large pages is allowed while
+ * no_eviction is set on btree, whereas reconciliation
+ * is not allowed.
*/
if (LF_ISSET(WT_READ_NO_EVICT) ||
F_ISSET(session, WT_SESSION_NO_EVICTION) ||
- F_ISSET(btree, WT_BTREE_NO_EVICTION))
+ (F_ISSET(btree, WT_BTREE_NO_EVICTION) &&
+ !F_ISSET(btree, WT_BTREE_NO_RECONCILE)))
goto skip_evict;
/*
@@ -595,7 +599,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
page = ref->page;
if (page->read_gen == WT_READGEN_NOTSET) {
if (evict_soon)
- __wt_page_evict_soon(page);
+ /*
+ * Ignore error returns, since the
+ * evict soon call is advisory and we
+ * are holding a hazard pointer to the
+ * page already.
+ */
+ (void)__wt_page_evict_soon(
+ session, ref);
else
__wt_cache_read_gen_new(session, page);
} else if (!LF_ISSET(WT_READ_NO_GEN))
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 7a05a883f83..4f6f300802e 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -298,7 +298,7 @@ static int
__split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp)
{
- WT_ADDR *addr;
+ WT_ADDR *addr, *ref_addr;
WT_CELL_UNPACK unpack;
WT_DECL_RET;
WT_IKEY *ikey;
@@ -345,13 +345,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
}
/*
- * If there's no address (the page has never been written), or the
- * address has been instantiated, there's no work to do. Otherwise,
- * instantiate the address in-memory, from the on-page cell.
+ * If there's no address at all (the page has never been written), or
+ * the address has already been instantiated, there's no work to do.
+ * Otherwise, the address still references a split page on-page cell,
+ * instantiate it. We can race with reconciliation and/or eviction of
+ * the child pages, be cautious: read the address and verify it, and
+ * only update it if the value is unchanged from the original. In the
+ * case of a race, the address must no longer reference the split page,
+ * we're done.
*/
- addr = ref->addr;
- if (addr != NULL && !__wt_off_page(from_home, addr)) {
- __wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
+ WT_ORDERED_READ(ref_addr, ref->addr);
+ if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
+ __wt_cell_unpack((WT_CELL *)ref_addr, &unpack);
WT_RET(__wt_calloc_one(session, &addr));
if ((ret = __wt_strndup(
session, unpack.data, unpack.size, &addr->addr)) != 0) {
@@ -371,7 +376,10 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
break;
WT_ILLEGAL_VALUE(session);
}
- ref->addr = addr;
+ if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) {
+ __wt_free(session, addr->addr);
+ __wt_free(session, addr);
+ }
}
/* And finally, copy the WT_REF pointer itself. */
@@ -786,7 +794,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
*/
if (result_entries == 0) {
empty_parent = true;
- __wt_page_evict_soon(parent);
+ if (!__wt_ref_is_root(parent->pg_intl_parent_ref))
+ ret = __wt_page_evict_soon(
+ session, parent->pg_intl_parent_ref);
goto err;
}
@@ -1462,11 +1472,11 @@ err: if (parent != NULL)
/*
* __split_multi_inmem --
- * Instantiate a page in a multi-block set.
+ * Instantiate a page from a disk image.
*/
static int
__split_multi_inmem(
- WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref, WT_MULTI *multi)
+ WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref)
{
WT_CURSOR_BTREE cbt;
WT_DECL_ITEM(key);
@@ -1487,13 +1497,12 @@ __split_multi_inmem(
orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0);
/*
- * This code re-creates an in-memory page that is part of a set created
- * while evicting a large page, and adds references to any unresolved
- * update chains to the new page. We get here due to choosing to keep
- * the results of a split in memory or because and update could not be
- * written when attempting to evict a page.
+ * This code re-creates an in-memory page from a disk image, and adds
+ * references to any unresolved update chains to the new page. We get
+ * here either because an update could not be written when evicting a
+ * page, or eviction chose to keep a page in memory.
*
- * Clear the disk image and link the page into the passed-in WT_REF to
+ * Steal the disk image and link the page into the passed-in WT_REF to
* simplify error handling: our caller will not discard the disk image
* when discarding the original page, and our caller will discard the
* allocated page on error, when discarding the allocated WT_REF.
@@ -1503,6 +1512,19 @@ __split_multi_inmem(
WT_PAGE_DISK_ALLOC, &page));
multi->disk_image = NULL;
+ /*
+ * Put the re-instantiated page in the same LRU queue location as the
+ * original page, unless this was a forced eviction, in which case we
+ * leave the new page with the read generation unset. Eviction will
+ * set the read generation next time it visits this page.
+ */
+ if (orig->read_gen != WT_READGEN_OLDEST)
+ page->read_gen = orig->read_gen;
+
+ /* If there are no updates to apply to the page, we're done. */
+ if (multi->supd_entries == 0)
+ return (0);
+
if (orig->type == WT_PAGE_ROW_LEAF)
WT_RET(__wt_scr_alloc(session, 0, &key));
@@ -1551,14 +1573,12 @@ __split_multi_inmem(
}
/*
- * If we modified the page above, it will have set the first dirty
- * transaction to the last transaction currently running. However, the
- * updates we installed may be older than that. Set the first dirty
- * transaction to an impossibly old value so this page is never skipped
- * in a checkpoint.
+ * When modifying the page we set the first dirty transaction to the
+ * last transaction currently running. However, the updates we made
+ * might be older than that. Set the first dirty transaction to an
+ * impossibly old value so this page is never skipped in a checkpoint.
*/
- if (page->modify != NULL)
- page->modify->first_dirty_txn = WT_TXN_FIRST;
+ page->modify->first_dirty_txn = WT_TXN_FIRST;
err: /* Free any resources that may have been cached in the cursor. */
WT_TRET(__wt_btcur_close(&cbt, true));
@@ -1629,19 +1649,17 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
*/
int
__wt_multi_to_ref(WT_SESSION_IMPL *session,
- WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp)
+ WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing)
{
WT_ADDR *addr;
WT_IKEY *ikey;
WT_REF *ref;
- size_t incr;
-
- incr = 0;
/* Allocate an underlying WT_REF. */
WT_RET(__wt_calloc_one(session, refp));
ref = *refp;
- incr += sizeof(WT_REF);
+ if (incrp)
+ *incrp += sizeof(WT_REF);
/*
* Set the WT_REF key before (optionally) building the page, underlying
@@ -1653,21 +1671,34 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
ikey = multi->key.ikey;
WT_RET(__wt_row_ikey(
session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
- incr += sizeof(WT_IKEY) + ikey->size;
+ if (incrp)
+ *incrp += sizeof(WT_IKEY) + ikey->size;
break;
default:
ref->ref_recno = multi->key.recno;
break;
}
- /* If there's a disk image, build a page, otherwise set the address. */
- if (multi->disk_image == NULL) {
- /*
- * Copy the address: we could simply take the buffer, but that
- * would complicate error handling, freeing the reference array
- * would have to avoid freeing the memory, and it's not worth
- * the confusion.
- */
+ /* There should be an address or a disk image (or both). */
+ WT_ASSERT(session,
+ multi->addr.addr != NULL || multi->disk_image != NULL);
+
+ /* If we're closing the file, there better be an address. */
+ WT_ASSERT(session, multi->addr.addr != NULL || !closing);
+
+ /* Verify any disk image we have. */
+ WT_ASSERT(session, multi->disk_image == NULL ||
+ __wt_verify_dsk_image(session,
+ "[page instantiate]", multi->disk_image, 0, false) == 0);
+
+ /*
+ * If there's an address, the page was written, set it.
+ *
+ * Copy the address: we could simply take the buffer, but that would
+ * complicate error handling, freeing the reference array would have
+ * to avoid freeing the memory, and it's not worth the confusion.
+ */
+ if (multi->addr.addr != NULL) {
WT_RET(__wt_calloc_one(session, &addr));
ref->addr = addr;
addr->size = multi->addr.size;
@@ -1675,14 +1706,20 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
WT_RET(__wt_strndup(session,
multi->addr.addr, addr->size, &addr->addr));
ref->state = WT_REF_DISK;
- } else {
- WT_RET(__split_multi_inmem(session, page, ref, multi));
+ }
+
+ /*
+ * If we have a disk image and we're not closing the file,
+ * re-instantiate the page.
+ *
+ * Discard any page image we don't use.
+ */
+ if (multi->disk_image != NULL && !closing) {
+ WT_RET(__split_multi_inmem(session, page, multi, ref));
ref->state = WT_REF_MEM;
}
+ __wt_free(session, multi->disk_image);
- /* Optionally return changes in the memory footprint. */
- if (incrp != NULL)
- *incrp += incr;
return (0);
}
@@ -2086,8 +2123,8 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
*/
WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
for (i = 0; i < new_entries; ++i)
- WT_ERR(__wt_multi_to_ref(session,
- page, &mod->mod_multi[i], &ref_new[i], &parent_incr));
+ WT_ERR(__wt_multi_to_ref(session, page,
+ &mod->mod_multi[i], &ref_new[i], &parent_incr, closing));
/*
* Split into the parent; if we're closing the file, we hold it
@@ -2175,15 +2212,13 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
* Rewrite an in-memory page with a new version.
*/
int
-__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
{
WT_DECL_RET;
WT_PAGE *page;
- WT_PAGE_MODIFY *mod;
WT_REF *new;
page = ref->page;
- mod = page->modify;
WT_RET(__wt_verbose(
session, WT_VERB_SPLIT, "%p: split-rewrite", ref->page));
@@ -2198,14 +2233,14 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
*
* Build the new page.
*
- * Allocate a WT_REF because the error path uses routines that will ea
- * free memory. The only field we need to set is the record number, as
- * it's used by the search routines.
+ * Allocate a WT_REF, the error path calls routines that free memory.
+ * The only field we need to set is the record number, as it's used by
+ * the search routines.
*/
WT_RET(__wt_calloc_one(session, &new));
new->ref_recno = ref->ref_recno;
- WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0]));
+ WT_ERR(__split_multi_inmem(session, page, multi, new));
/*
* The rewrite succeeded, we can no longer fail.
@@ -2213,7 +2248,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
* Finalize the move, discarding moved update lists from the original
* page.
*/
- __split_multi_inmem_final(page, &mod->mod_multi[0]);
+ __split_multi_inmem_final(page, multi);
/*
* Discard the original page.
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index 3d5abf34147..d3ddf33446e 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -41,6 +41,9 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
+ WT_STAT_SET(session, stats, cache_bytes_inuse,
+ __wt_btree_bytes_inuse(session));
+
/* Everything else is really, really expensive. */
if (!F_ISSET(cst, WT_CONN_STAT_ALL))
return (0);
@@ -139,7 +142,7 @@ __stat_page_col_var(
} else {
orig_deleted = false;
__wt_cell_unpack(cell, unpack);
- if (unpack->type == WT_CELL_ADDR_DEL)
+ if (unpack->type == WT_CELL_DEL)
orig_deleted = true;
else {
entry_cnt += __wt_cell_rle(unpack);
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index da6c53aa316..df794c96cda 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -84,7 +84,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
WT_ERR(__wt_txn_get_snapshot(session));
leaf_bytes += page->memory_footprint;
++leaf_pages;
- WT_ERR(__wt_reconcile(session, walk, NULL, 0));
+ WT_ERR(__wt_reconcile(
+ session, walk, NULL, WT_CHECKPOINTING));
}
}
break;
@@ -92,7 +93,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
/*
* If we are flushing a file at read-committed isolation, which
* is of particular interest for flushing the metadata to make
- * schema-changing operation durable, get a transactional
+ * a schema-changing operation durable, get a transactional
* snapshot now.
*
* All changes committed up to this point should be included.
@@ -126,7 +127,17 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
*/
WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE);
- WT_ERR(__wt_evict_file_exclusive_on(session));
+ /*
+ * Sync for checkpoint allows splits to happen while the queue
+ * is being drained, but not reconciliation. We need to do this,
+ * since draining the queue can take long enough for hot pages
+ * to grow significantly larger than the configured maximum
+ * size.
+ */
+ F_SET(btree, WT_BTREE_NO_RECONCILE);
+ ret = __wt_evict_file_exclusive_on(session);
+ F_CLR(btree, WT_BTREE_NO_RECONCILE);
+ WT_ERR(ret);
__wt_evict_file_exclusive_off(session);
WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING);
@@ -183,7 +194,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
leaf_bytes += page->memory_footprint;
++leaf_pages;
}
- WT_ERR(__wt_reconcile(session, walk, NULL, 0));
+ WT_ERR(__wt_reconcile(
+ session, walk, NULL, WT_CHECKPOINTING));
}
break;
case WT_SYNC_CLOSE:
@@ -217,41 +229,9 @@ err: /* On error, clear any left-over tree walk. */
saved_snap_min == WT_TXN_NONE)
__wt_txn_release_snapshot(session);
- if (btree->checkpointing != WT_CKPT_OFF) {
- /*
- * Update the checkpoint generation for this handle so visible
- * updates newer than the checkpoint can be evicted.
- *
- * This has to be published before eviction is enabled again,
- * so that eviction knows that the checkpoint has completed.
- */
- WT_PUBLISH(btree->checkpoint_gen,
- conn->txn_global.checkpoint_gen);
- WT_STAT_FAST_DATA_SET(session,
- btree_checkpoint_generation, btree->checkpoint_gen);
-
- /*
- * Clear the checkpoint flag and push the change; not required,
- * but publishing the change means stalled eviction gets moving
- * as soon as possible.
- */
- btree->checkpointing = WT_CKPT_OFF;
- WT_FULL_BARRIER();
-
- /*
- * If this tree was being skipped by the eviction server during
- * the checkpoint, clear the wait.
- */
- btree->evict_walk_period = 0;
-
- /*
- * Wake the eviction server, in case application threads have
- * stalled while the eviction server decided it couldn't make
- * progress. Without this, application threads will be stalled
- * until the eviction server next wakes.
- */
- WT_TRET(__wt_evict_server_wake(session));
- }
+ /* Clear the checkpoint flag and push the change. */
+ if (btree->checkpointing != WT_CKPT_OFF)
+ WT_PUBLISH(btree->checkpointing, WT_CKPT_OFF);
__wt_spin_unlock(session, &btree->flush_lock);
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index bb8a750d848..17d32d6ed63 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -381,16 +381,6 @@ restart: /*
__ref_ascend(session, &ref, &pindex, &slot);
/*
- * If we got all the way through an internal page and
- * all of the child pages were deleted, mark it for
- * eviction.
- */
- if (empty_internal && pindex->entries > 1) {
- __wt_page_evict_soon(ref->page);
- empty_internal = false;
- }
-
- /*
* If at the root and returning internal pages, return
* the root page, otherwise we're done. Regardless, no
* hazard pointer is required, release the one we hold.
@@ -404,6 +394,16 @@ restart: /*
}
/*
+ * If we got all the way through an internal page and
+ * all of the child pages were deleted, mark it for
+ * eviction.
+ */
+ if (empty_internal && pindex->entries > 1) {
+ WT_ERR(__wt_page_evict_soon(session, ref));
+ empty_internal = false;
+ }
+
+ /*
* Optionally return internal pages. Swap our previous
* hazard pointer for the page we'll return. We don't
* handle restart or not-found returns, it would require
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 4afcd74520f..0f70e84de7e 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -775,7 +775,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
* traversing the skip list each time accumulates to real time.
*/
if (samples > 5000)
- __wt_page_evict_soon(page);
+ WT_RET(__wt_page_evict_soon(session, cbt->ref));
return (0);
}
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 1b656c5a0aa..192b80bb359 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -42,7 +42,6 @@ static const WT_CONFIG_CHECK
static const WT_CONFIG_CHECK
confchk_wiredtiger_open_checkpoint_subconfigs[] = {
{ "log_size", "int", NULL, "min=0,max=2GB", NULL, 0 },
- { "name", "string", NULL, NULL, NULL, 0 },
{ "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -67,16 +66,9 @@ static const WT_CONFIG_CHECK
};
static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_log_subconfigs[] = {
+ confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = {
{ "archive", "boolean", NULL, NULL, NULL, 0 },
- { "compressor", "string", NULL, NULL, NULL, 0 },
- { "enabled", "boolean", NULL, NULL, NULL, 0 },
- { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 },
- { "path", "string", NULL, NULL, NULL, 0 },
{ "prealloc", "boolean", NULL, NULL, NULL, 0 },
- { "recover", "string",
- NULL, "choices=[\"error\",\"on\"]",
- NULL, 0 },
{ "zero_fill", "boolean", NULL, NULL, NULL, 0 },
{ NULL, NULL, NULL, NULL, NULL, 0 }
};
@@ -99,10 +91,9 @@ static const WT_CONFIG_CHECK
};
static const WT_CONFIG_CHECK
- confchk_wiredtiger_open_statistics_log_subconfigs[] = {
+ confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs[] = {
{ "json", "boolean", NULL, NULL, NULL, 0 },
{ "on_close", "boolean", NULL, NULL, NULL, 0 },
- { "path", "string", NULL, NULL, NULL, 0 },
{ "sources", "list", NULL, NULL, NULL, 0 },
{ "timestamp", "string", NULL, NULL, NULL, 0 },
{ "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
@@ -117,16 +108,16 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+ confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "error_prefix", "string", NULL, NULL, NULL, 0 },
{ "eviction", "category",
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_dirty_target", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -135,7 +126,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
confchk_wiredtiger_open_file_manager_subconfigs, 3 },
{ "log", "category",
NULL, NULL,
- confchk_wiredtiger_open_log_subconfigs, 8 },
+ confchk_WT_CONNECTION_reconfigure_log_subconfigs, 3 },
{ "lsm_manager", "category",
NULL, NULL,
confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
@@ -148,7 +139,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
NULL, 0 },
{ "statistics_log", "category",
NULL, NULL,
- confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
+ confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 },
{ "verbose", "list",
NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
"\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
@@ -326,6 +317,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
{ "append", "boolean", NULL, NULL, NULL, 0 },
{ "bulk", "string", NULL, NULL, NULL, 0 },
{ "checkpoint", "string", NULL, NULL, NULL, 0 },
+ { "checkpoint_wait", "boolean", NULL, NULL, NULL, 0 },
{ "dump", "string",
NULL, "choices=[\"hex\",\"json\",\"print\"]",
NULL, 0 },
@@ -608,6 +600,32 @@ static const WT_CONFIG_CHECK
};
static const WT_CONFIG_CHECK
+ confchk_wiredtiger_open_log_subconfigs[] = {
+ { "archive", "boolean", NULL, NULL, NULL, 0 },
+ { "compressor", "string", NULL, NULL, NULL, 0 },
+ { "enabled", "boolean", NULL, NULL, NULL, 0 },
+ { "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 },
+ { "path", "string", NULL, NULL, NULL, 0 },
+ { "prealloc", "boolean", NULL, NULL, NULL, 0 },
+ { "recover", "string",
+ NULL, "choices=[\"error\",\"on\"]",
+ NULL, 0 },
+ { "zero_fill", "boolean", NULL, NULL, NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
+ confchk_wiredtiger_open_statistics_log_subconfigs[] = {
+ { "json", "boolean", NULL, NULL, NULL, 0 },
+ { "on_close", "boolean", NULL, NULL, NULL, 0 },
+ { "path", "string", NULL, NULL, NULL, 0 },
+ { "sources", "list", NULL, NULL, NULL, 0 },
+ { "timestamp", "string", NULL, NULL, NULL, 0 },
+ { "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
+ { NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
confchk_wiredtiger_open_transaction_sync_subconfigs[] = {
{ "enabled", "boolean", NULL, NULL, NULL, 0 },
{ "method", "string",
@@ -625,7 +643,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+ confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
{ "config_base", "boolean", NULL, NULL, NULL, 0 },
{ "create", "boolean", NULL, NULL, NULL, 0 },
@@ -640,10 +658,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_dirty_target", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -706,7 +724,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+ confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
{ "config_base", "boolean", NULL, NULL, NULL, 0 },
{ "create", "boolean", NULL, NULL, NULL, 0 },
@@ -721,10 +739,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_dirty_target", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -788,7 +806,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+ confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
{ "direct_io", "list",
NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
@@ -801,10 +819,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_dirty_target", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -864,7 +882,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
{ "checkpoint", "category",
NULL, NULL,
- confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+ confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
{ "direct_io", "list",
NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
@@ -877,10 +895,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
NULL, NULL,
confchk_wiredtiger_open_eviction_subconfigs, 2 },
{ "eviction_dirty_target", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_dirty_trigger", "int",
- NULL, "min=5,max=99",
+ NULL, "min=1,max=99",
NULL, 0 },
{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -970,17 +988,14 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "WT_CONNECTION.reconfigure",
"async=(enabled=0,ops_max=1024,threads=2),cache_overhead=8,"
- "cache_size=100MB,checkpoint=(log_size=0,"
- "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=,"
- "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
- "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
+ "cache_size=100MB,checkpoint=(log_size=0,wait=0),error_prefix=,"
+ "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
",file_manager=(close_handle_minimum=250,close_idle_time=30,"
- "close_scan_interval=10),log=(archive=,compressor=,enabled=0,"
- "file_max=100MB,path=\".\",prealloc=,recover=on,zero_fill=0),"
+ "close_scan_interval=10),log=(archive=,prealloc=,zero_fill=0),"
"lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,"
"shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=0,on_close=0,"
- "path=\"WiredTigerStat.%d.%H\",sources=,"
+ "statistics=none,statistics_log=(json=0,on_close=0,sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=",
confchk_WT_CONNECTION_reconfigure, 18
},
@@ -1052,10 +1067,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
NULL, 0
},
{ "WT_SESSION.open_cursor",
- "append=0,bulk=0,checkpoint=,dump=,next_random=0,"
- "next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
+ "append=0,bulk=0,checkpoint=,checkpoint_wait=,dump=,next_random=0"
+ ",next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
"skip_sort_check=0,statistics=,target=",
- confchk_WT_SESSION_open_cursor, 12
+ confchk_WT_SESSION_open_cursor, 13
},
{ "WT_SESSION.rebalance",
"",
@@ -1168,21 +1183,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
- "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
- "config_base=,create=0,direct_io=,encryption=(keyid=,name=,"
- "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
- ",eviction_dirty_target=80,eviction_dirty_trigger=95,"
- "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
- "file_extend=,file_manager=(close_handle_minimum=250,"
- "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
- "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge="
- ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0"
- ",session_max=100,session_scratch_max=2MB,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=0,on_close=0,"
- "path=\"WiredTigerStat.%d.%H\",sources=,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+ ",checkpoint_sync=,config_base=,create=0,direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",exclusive=0,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,in_memory=0,"
+ "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
+ "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
+ "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
+ "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
+ ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
+ "statistics_log=(json=0,on_close=0,path=\".\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
",method=fsync),use_environment=,use_environment_priv=0,verbose=,"
"write_through=",
@@ -1190,21 +1204,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open_all",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
- "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
- "config_base=,create=0,direct_io=,encryption=(keyid=,name=,"
- "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
- ",eviction_dirty_target=80,eviction_dirty_trigger=95,"
- "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
- "file_extend=,file_manager=(close_handle_minimum=250,"
- "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
- "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge="
- ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0"
- ",session_max=100,session_scratch_max=2MB,"
- "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
- "statistics=none,statistics_log=(json=0,on_close=0,"
- "path=\"WiredTigerStat.%d.%H\",sources=,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+ ",checkpoint_sync=,config_base=,create=0,direct_io=,"
+ "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+ "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5,"
+ "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+ ",exclusive=0,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,in_memory=0,"
+ "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
+ "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
+ "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
+ "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
+ ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
+ "statistics_log=(json=0,on_close=0,path=\".\",sources=,"
"timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
",method=fsync),use_environment=,use_environment_priv=0,verbose=,"
"version=(major=0,minor=0),write_through=",
@@ -1212,41 +1225,39 @@ static const WT_CONFIG_ENTRY config_entries[] = {
},
{ "wiredtiger_open_basecfg",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
- "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
- "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
- "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
- ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
- ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
- "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
- "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
- ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
- "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=0,method=fsync),verbose=,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+ ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=,"
+ "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
+ ",eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,log=(archive=,"
+ "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=,"
+ "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)"
+ ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100,"
+ "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=0,"
+ "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\","
+ "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=,"
"version=(major=0,minor=0),write_through=",
confchk_wiredtiger_open_basecfg, 33
},
{ "wiredtiger_open_usercfg",
"async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
- "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
- "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
- "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=,"
- "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
- "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
- ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
- ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
- "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
- "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
- "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
- "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
- ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
- "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
- "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
- "transaction_sync=(enabled=0,method=fsync),verbose=,"
+ "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+ ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=,"
+ "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
+ ",eviction_dirty_target=5,eviction_dirty_trigger=20,"
+ "eviction_target=80,eviction_trigger=95,extensions=,file_extend=,"
+ "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+ "close_scan_interval=10),hazard_max=1000,log=(archive=,"
+ "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=,"
+ "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)"
+ ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100,"
+ "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0,"
+ "reserve=0,size=500MB),statistics=none,statistics_log=(json=0,"
+ "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\","
+ "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=,"
"write_through=",
confchk_wiredtiger_open_usercfg, 32
},
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 98267eeeb2c..1c6b0c2b500 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1217,7 +1217,8 @@ __conn_config_file(WT_SESSION_IMPL *session,
return (0);
/* Open the configuration file. */
- WT_RET(__wt_open(session, filename, WT_OPEN_FILE_TYPE_REGULAR, 0, &fh));
+ WT_RET(__wt_open(
+ session, filename, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &fh));
WT_ERR(__wt_filesize(session, fh, &size));
if (size == 0)
goto err;
@@ -1510,8 +1511,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
exist = false;
if (!is_create)
WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist));
- ret = __wt_open(session, WT_SINGLETHREAD, WT_OPEN_FILE_TYPE_REGULAR,
- is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh);
+ ret = __wt_open(session, WT_SINGLETHREAD, WT_FS_OPEN_FILE_TYPE_REGULAR,
+ is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh);
/*
* If this is a read-only connection and we cannot grab the lock
@@ -1554,7 +1555,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
*/
#define WT_SINGLETHREAD_STRING "WiredTiger lock file\n"
WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
- if (size != strlen(WT_SINGLETHREAD_STRING))
+ if ((size_t)size != strlen(WT_SINGLETHREAD_STRING))
WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0,
strlen(WT_SINGLETHREAD_STRING),
WT_SINGLETHREAD_STRING));
@@ -1563,7 +1564,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
/* We own the lock file, optionally create the WiredTiger file. */
ret = __wt_open(session, WT_WIREDTIGER,
- WT_OPEN_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh);
+ WT_FS_OPEN_FILE_TYPE_REGULAR, is_create ? WT_FS_OPEN_CREATE : 0,
+ &fh);
/*
* If we're read-only, check for handled errors. Even if able to open
@@ -1784,7 +1786,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
* runs. This doesn't matter for correctness, it's just cleaning up
* random files.
*/
- WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
+ WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
/*
* The base configuration file is only written if creating the database,
@@ -1809,7 +1811,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
return (0);
WT_RET(__wt_fopen(session, WT_BASECONFIG_SET,
- WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
+ WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
WT_ERR(__wt_fprintf(session, fs, "%s\n\n",
"# Do not modify this file.\n"
@@ -1870,7 +1872,8 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
if (0) {
/* Close open file handle, remove any temporary file. */
err: WT_TRET(__wt_fclose(session, &fs));
- WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
+ WT_TRET(
+ __wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
}
__wt_free(session, base_config);
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 9f15db5382b..e8bb7187418 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -176,6 +176,10 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
&cache->evict_queues[i].evict_lock, "cache eviction"));
}
+ /* Ensure there is always a non-NULL current queue. */
+ cache->evict_current_queue =
+ &cache->evict_queues[WT_EVICT_URGENT_QUEUE + 1];
+
/*
* We get/set some values in the cache statistics (rather than have
* two copies), configure them.
@@ -197,7 +201,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_CONNECTION_STATS **stats;
- uint64_t inuse, leaf, used;
+ uint64_t inuse, leaf;
conn = S2C(session);
cache = conn->cache;
@@ -208,26 +212,29 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
* There are races updating the different cache tracking values so
* be paranoid calculating the leaf byte usage.
*/
- used = cache->bytes_overflow + cache->bytes_internal;
- leaf = inuse > used ? inuse - used : 0;
+ leaf = inuse > cache->bytes_internal ?
+ inuse - cache->bytes_internal : 0;
WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size);
WT_STAT_SET(session, stats, cache_bytes_inuse, inuse);
-
WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct);
- WT_STAT_SET(
- session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
+
WT_STAT_SET(
session, stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache));
- WT_STAT_SET(session, stats,
- cache_eviction_maximum_page_size, cache->evict_max_page_size);
- WT_STAT_SET(session, stats, cache_pages_dirty, cache->pages_dirty);
-
WT_STAT_SET(
- session, stats, cache_bytes_internal, cache->bytes_internal);
+ session, stats, cache_bytes_image, __wt_cache_bytes_image(cache));
WT_STAT_SET(
- session, stats, cache_bytes_overflow, cache->bytes_overflow);
+ session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
+ WT_STAT_SET(
+ session, stats, cache_bytes_internal, cache->bytes_internal);
WT_STAT_SET(session, stats, cache_bytes_leaf, leaf);
+ WT_STAT_SET(
+ session, stats, cache_bytes_other, __wt_cache_bytes_other(cache));
+
+ WT_STAT_SET(session, stats,
+ cache_eviction_maximum_page_size, cache->evict_max_page_size);
+ WT_STAT_SET(session, stats, cache_pages_dirty,
+ cache->pages_dirty_intl + cache->pages_dirty_leaf);
/*
* The number of files with active walks ~= number of hazard pointers
@@ -235,7 +242,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
*/
if (conn->evict_session != NULL)
WT_STAT_SET(session, stats, cache_eviction_walks_active,
- conn->evict_session->nhazard);
+ cache->walk_session->nhazard);
}
/*
@@ -267,11 +274,13 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
__wt_errx(session,
"cache server: exiting with %" PRIu64 " bytes in memory",
cache->bytes_inmem);
- if (cache->bytes_dirty != 0 || cache->pages_dirty != 0)
+ if (cache->bytes_dirty_intl + cache->bytes_dirty_leaf != 0 ||
+ cache->pages_dirty_intl + cache->pages_dirty_leaf != 0)
__wt_errx(session,
"cache server: exiting with %" PRIu64
" bytes dirty and %" PRIu64 " pages dirty",
- cache->bytes_dirty, cache->pages_dirty);
+ cache->bytes_dirty_intl + cache->bytes_dirty_leaf,
+ cache->pages_dirty_intl + cache->pages_dirty_leaf);
WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond));
WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond));
@@ -286,6 +295,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
__wt_spin_destroy(session, &cache->evict_queues[i].evict_lock);
__wt_free(session, cache->evict_queues[i].evict_queue);
}
+
__wt_free(session, conn->cache);
return (ret);
}
diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c
index a23350a5e46..d54c65c4767 100644
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -19,61 +19,38 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp)
{
WT_CONFIG_ITEM cval;
WT_CONNECTION_IMPL *conn;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- char *p;
+
+ *startp = false;
conn = S2C(session);
- /*
- * The checkpoint configuration requires a wait time and/or a log
- * size -- if one is not set, we're not running at all.
- * Checkpoints based on log size also require logging be enabled.
- */
WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
conn->ckpt_usecs = (uint64_t)cval.val * WT_MILLION;
WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval));
conn->ckpt_logsize = (wt_off_t)cval.val;
- /* Checkpoints are incompatible with in-memory configuration */
- if (conn->ckpt_usecs != 0 || conn->ckpt_logsize != 0) {
+ /*
+ * The checkpoint configuration requires a wait time and/or a log size,
+ * if neither is set, we're not running at all. Checkpoints based on log
+ * size also require logging be enabled.
+ */
+ if (conn->ckpt_usecs != 0 ||
+ (conn->ckpt_logsize != 0 &&
+ FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
+ /* Checkpoints are incompatible with in-memory configuration */
WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
if (cval.val != 0)
WT_RET_MSG(session, EINVAL,
- "In memory configuration incompatible with "
- "checkpoints");
- }
+ "checkpoint configuration incompatible with "
+ "in-memory configuration");
- __wt_log_written_reset(session);
- if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) ||
- (conn->ckpt_logsize && conn->ckpt_usecs == 0 &&
- !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
- *startp = false;
- return (0);
- }
- *startp = true;
+ __wt_log_written_reset(session);
- /*
- * The application can specify a checkpoint name, which we ignore if
- * it's our default.
- */
- WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval));
- if (cval.len != 0 &&
- !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
- WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));
-
- WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp));
- WT_ERR(__wt_buf_fmt(
- session, tmp, "name=%.*s", (int)cval.len, cval.str));
- WT_ERR(__wt_strdup(session, tmp->data, &p));
-
- __wt_free(session, conn->ckpt_config);
- conn->ckpt_config = p;
+ *startp = true;
}
-err: __wt_scr_free(session, &tmp);
- return (ret);
+ return (0);
}
/*
@@ -103,7 +80,7 @@ __ckpt_server(void *arg)
__wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs));
/* Checkpoint the database. */
- WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config));
+ WT_ERR(wt_session->checkpoint(wt_session, NULL));
/* Reset. */
if (conn->ckpt_logsize) {
@@ -179,7 +156,16 @@ __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[])
conn = S2C(session);
start = false;
- /* If there is already a server running, shut it down. */
+ /*
+ * Stop any server that is already running. This means that each time
+ * reconfigure is called we'll bounce the server even if there are no
+ * configuration changes. This makes our life easier as the underlying
+ * configuration routine doesn't have to worry about freeing objects
+ * in the connection structure (it's guaranteed to always start with a
+ * blank slate), and we don't have to worry about races where a running
+ * server is reading configuration information that we're updating, and
+ * it's not expected that reconfiguration will happen a lot.
+ */
if (conn->ckpt_session != NULL)
WT_RET(__wt_checkpoint_server_destroy(session));
@@ -211,8 +197,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
}
WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond));
- __wt_free(session, conn->ckpt_config);
-
/* Close the server thread's session. */
if (conn->ckpt_session != NULL) {
wt_session = &conn->ckpt_session->iface;
@@ -226,7 +210,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
conn->ckpt_session = NULL;
conn->ckpt_tid_set = false;
conn->ckpt_cond = NULL;
- conn->ckpt_config = NULL;
conn->ckpt_usecs = 0;
return (ret);
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 08fb2b24468..f52fccc7d1c 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -64,6 +64,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
F_SET(dhandle, WT_DHANDLE_IS_METADATA);
/*
+ * We are holding the data handle list lock, which protects most
+ * threads from seeing the new handle until that lock is released.
+ *
+ * However, the sweep server scans the list of handles without holding
+ * that lock, so we need a write barrier here to ensure the sweep
+ * server doesn't see a partially filled in structure.
+ */
+ WT_WRITE_BARRIER();
+
+ /*
* Prepend the handle to the connection list, assuming we're likely to
* need new files again soon, until they are cached by all sessions.
*/
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 1ae370ef2fa..18ed71e4688 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -51,6 +51,25 @@ __logmgr_config(
WT_CONNECTION_IMPL *conn;
bool enabled;
+ /*
+ * A note on reconfiguration: the standard "is this configuration string
+ * allowed" checks should fail if reconfiguration has invalid strings,
+ * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
+ * the connection reconfiguration method doesn't allow those strings.
+ * Additionally, the base configuration values during reconfiguration
+ * are the currently configured values (so we don't revert to default
+ * values when repeatedly reconfiguring), and configuration processing
+ * of a currently set value should not change the currently set value.
+ *
+ * In this code path, log server reconfiguration does not stop/restart
+ * the log server, so there's no point in re-evaluating configuration
+ * strings that cannot be reconfigured, risking bugs in configuration
+ * setup, and depending on evaluation of currently set values to always
+ * result in the currently set value. Skip tests for any configuration
+ * strings which don't make sense during reconfiguration, but don't
+ * worry about error reporting because it should never happen.
+ */
+
conn = S2C(session);
WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
@@ -62,6 +81,8 @@ __logmgr_config(
*
* If it is off and the user it turning it on, or it is on
* and the user is turning it off, return an error.
+ *
+ * See above: should never happen.
*/
if (reconfig &&
((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
@@ -83,6 +104,8 @@ __logmgr_config(
* Setup a log path and compression even if logging is disabled in case
* we are going to print a log. Only do this on creation. Once a
* compressor or log path are set they cannot be changed.
+ *
+ * See above: should never happen.
*/
if (!reconfig) {
conn->log_compressor = NULL;
@@ -95,6 +118,7 @@ __logmgr_config(
WT_RET(__wt_strndup(
session, cval.str, cval.len, &conn->log_path));
}
+
/* We are done if logging isn't enabled. */
if (!*runp)
return (0);
@@ -103,13 +127,14 @@ __logmgr_config(
if (cval.val != 0)
FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);
+ /*
+ * The file size cannot be reconfigured. The amount of memory allocated
+ * to the log slots may be based on the log file size at creation and we
+ * don't want to re-allocate that memory while running.
+ *
+ * See above: should never happen.
+ */
if (!reconfig) {
- /*
- * Ignore if the user tries to change the file size. The
- * amount of memory allocated to the log slots may be based
- * on the log file size at creation and we don't want to
- * re-allocate that memory while running.
- */
WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
conn->log_file_max = (wt_off_t)cval.val;
WT_STAT_FAST_CONN_SET(session,
@@ -125,12 +150,17 @@ __logmgr_config(
conn->log_prealloc = 1;
/*
- * Note that it is meaningless to reconfigure this value during
- * runtime. It only matters on create before recovery runs.
+ * Note it's meaningless to reconfigure this value during runtime, it
+ * only matters on create before recovery runs.
+ *
+ * See above: should never happen.
*/
- WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
- if (cval.len != 0 && WT_STRING_MATCH("error", cval.str, cval.len))
- FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
+ if (!reconfig) {
+ WT_RET(__wt_config_gets_def(
+ session, cfg, "log.recover", 0, &cval));
+ if (WT_STRING_MATCH("error", cval.str, cval.len))
+ FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
+ }
WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
if (cval.val != 0) {
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 855ff57808e..4e7cac59c4a 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -36,6 +36,31 @@ __stat_sources_free(WT_SESSION_IMPL *session, char ***sources)
}
/*
+ * __stat_config_discard --
+ * Discard all statistics-log configuration.
+ */
+static int
+__stat_config_discard(WT_SESSION_IMPL *session)
+{
+ WT_CONNECTION_IMPL *conn;
+ WT_DECL_RET;
+
+ conn = S2C(session);
+
+ /*
+ * Discard all statistics-log configuration information, called when
+ * reconfiguring or destroying the statistics logging setup,
+ */
+ __wt_free(session, conn->stat_format);
+ ret = __wt_fclose(session, &conn->stat_fs);
+ __wt_free(session, conn->stat_path);
+ __stat_sources_free(session, &conn->stat_sources);
+ conn->stat_stamp = NULL;
+ conn->stat_usecs = 0;
+ return (ret);
+}
+
+/*
* __wt_conn_stat_init --
* Initialize the per-connection statistics.
*/
@@ -73,20 +98,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
WT_CONFIG objectconf;
WT_CONFIG_ITEM cval, k, v;
WT_CONNECTION_IMPL *conn;
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
int cnt;
char **sources;
+ /*
+ * A note on reconfiguration: the standard "is this configuration string
+ * allowed" checks should fail if reconfiguration has invalid strings,
+ * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
+ * the connection reconfiguration method doesn't allow those strings.
+ * Additionally, the base configuration values during reconfiguration
+ * are the currently configured values (so we don't revert to default
+ * values when repeatedly reconfiguring), and configuration processing
+ * of a currently set value should not change the currently set value.
+ *
+ * In this code path, a previous statistics log server reconfiguration
+ * may have stopped the server (and we're about to restart it). Because
+ * stopping the server discarded the configured information stored in
+ * the connection structure, we have to re-evaluate all configuration
+ * values, reconfiguration can't skip any of them.
+ */
+
conn = S2C(session);
sources = NULL;
- WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
/* Only start the server if wait time is non-zero */
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
*runp = cval.val != 0;
conn->stat_usecs = (uint64_t)cval.val * WT_MILLION;
- WT_RET(__wt_config_gets(
- session, cfg, "statistics_log.json", &cval));
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval));
if (cval.val != 0)
FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON);
@@ -96,24 +138,30 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE);
/*
- * Statistics logging configuration requires either a wait time or an
- * on-close setting.
- */
- if (!*runp && !FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE))
- return (0);
-
- /*
- * If any statistics logging is done, this must not be a read-only
- * connection.
+ * We don't allow the log path to be reconfigured for security reasons.
+ * (Applications passing input strings directly to reconfigure would
+ * expose themselves to a potential security problem, the utility of
+ * reconfiguring a statistics log path isn't worth the security risk.)
+ *
+ * See above for the details, but during reconfiguration we're loading
+ * the path value from the saved configuration information, and it's
+ * required during reconfiguration because we potentially stopped and
+ * are restarting, the server.
*/
- WT_RET(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
- WT_RET(__wt_config_subinit(session, &objectconf, &cval));
+ WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
+ WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_fmt(session,
+ tmp, "%.*s/%s", (int)cval.len, cval.str, WT_STATLOG_FILENAME));
+ WT_ERR(__wt_filename(session, tmp->data, &conn->stat_path));
+
+ WT_ERR(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
+ WT_ERR(__wt_config_subinit(session, &objectconf, &cval));
for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt)
;
- WT_RET_NOTFOUND_OK(ret);
+ WT_ERR_NOTFOUND_OK(ret);
if (cnt != 0) {
- WT_RET(__wt_calloc_def(session, cnt + 1, &sources));
- WT_RET(__wt_config_subinit(session, &objectconf, &cval));
+ WT_ERR(__wt_calloc_def(session, cnt + 1, &sources));
+ WT_ERR(__wt_config_subinit(session, &objectconf, &cval));
for (cnt = 0;
(ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) {
/*
@@ -138,29 +186,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
sources = NULL;
}
- WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
- WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path));
-
/*
* When using JSON format, use the same timestamp format as MongoDB by
- * default.
+ * default. This requires caution: the user might have set the timestamp
+ * in a previous reconfigure call and we don't want to override that, so
+ * compare the retrieved value with the default value to decide if we
+ * should use the JSON default.
+ *
+ * (This still implies if the user explicitly sets the timestamp to the
+ * default value, then sets the JSON flag in a separate reconfigure
+ * call, or vice-versa, we will incorrectly switch to the JSON default
+ * timestamp. But there's no way to detect that, and this is all a low
+ * probability path.)
+ *
+ * !!!
+ * Don't rewrite in the compressed "%FT%T.000Z" form, MSVC13 segfaults.
*/
- if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
- ret = __wt_config_gets(
- session, &cfg[1], "statistics_log.timestamp", &cval);
- if (ret == WT_NOTFOUND)
- WT_ERR(__wt_strdup(
- session, "%FT%T.000Z", &conn->stat_format));
- WT_ERR_NOTFOUND_OK(ret);
- }
- if (conn->stat_format == NULL) {
- WT_ERR(__wt_config_gets(
- session, cfg, "statistics_log.timestamp", &cval));
+#define WT_TIMESTAMP_DEFAULT "%b %d %H:%M:%S"
+#define WT_TIMESTAMP_JSON_DEFAULT "%Y-%m-%dT%H:%M:%S.000Z"
+ WT_ERR(__wt_config_gets(
+ session, cfg, "statistics_log.timestamp", &cval));
+ if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON) &&
+ WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len))
+ WT_ERR(__wt_strdup(
+ session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format));
+ else
WT_ERR(__wt_strndup(
session, cval.str, cval.len, &conn->stat_format));
- }
err: __stat_sources_free(session, &sources);
+ __wt_scr_free(session, &tmp);
+
return (ret);
}
@@ -373,7 +429,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
if (path != NULL)
(void)strcpy(path->mem, tmp->mem);
WT_RET(__wt_fopen(session, tmp->mem,
- WT_OPEN_CREATE | WT_OPEN_FIXED, WT_STREAM_APPEND,
+ WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND,
&log_stream));
}
conn->stat_fs = log_stream;
@@ -538,14 +594,23 @@ __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[])
bool start;
conn = S2C(session);
- start = false;
/*
* Stop any server that is already running. This means that each time
* reconfigure is called we'll bounce the server even if there are no
- * configuration changes - but that makes our lives easier.
+ * configuration changes. This makes our life easier as the underlying
+ * configuration routine doesn't have to worry about freeing objects
+ * in the connection structure (it's guaranteed to always start with a
+ * blank slate), and we don't have to worry about races where a running
+ * server is reading configuration information that we're updating, and
+ * it's not expected that reconfiguration will happen a lot.
+ *
+ * If there's no server running, discard any configuration information
+ * so we don't leak memory during reconfiguration.
*/
- if (conn->stat_session != NULL)
+ if (conn->stat_session == NULL)
+ WT_RET(__stat_config_discard(session));
+ else
WT_RET(__wt_statlog_destroy(session, false));
WT_RET(__statlog_config(session, cfg, &start));
@@ -568,38 +633,28 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
conn = S2C(session);
+ /* Stop the server thread. */
F_CLR(conn, WT_CONN_SERVER_STATISTICS);
if (conn->stat_tid_set) {
WT_TRET(__wt_cond_signal(session, conn->stat_cond));
WT_TRET(__wt_thread_join(session, conn->stat_tid));
conn->stat_tid_set = false;
}
+ WT_TRET(__wt_cond_destroy(session, &conn->stat_cond));
/* Log a set of statistics on shutdown if configured. */
if (is_close)
WT_TRET(__wt_statlog_log_one(session));
- WT_TRET(__wt_cond_destroy(session, &conn->stat_cond));
-
- __stat_sources_free(session, &conn->stat_sources);
- __wt_free(session, conn->stat_path);
- __wt_free(session, conn->stat_format);
+ /* Discard all configuration information. */
+ WT_TRET(__stat_config_discard(session));
/* Close the server thread's session. */
if (conn->stat_session != NULL) {
wt_session = &conn->stat_session->iface;
WT_TRET(wt_session->close(wt_session, NULL));
+ conn->stat_session = NULL;
}
- /* Clear connection settings so reconfigure is reliable. */
- conn->stat_session = NULL;
- conn->stat_tid_set = false;
- conn->stat_format = NULL;
- WT_TRET(__wt_fclose(session, &conn->stat_fs));
- conn->stat_path = NULL;
- conn->stat_sources = NULL;
- conn->stat_stamp = NULL;
- conn->stat_usecs = 0;
-
return (ret);
}
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 4ee23008687..63952169566 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -9,13 +9,12 @@
#include "wt_internal.h"
static int __backup_all(WT_SESSION_IMPL *);
-static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
static int __backup_list_append(
WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *);
static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *);
static int __backup_start(
WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *[]);
-static int __backup_stop(WT_SESSION_IMPL *);
+static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
static int __backup_uri(WT_SESSION_IMPL *, const char *[], bool *, bool *);
/*
@@ -76,20 +75,26 @@ __curbackup_close(WT_CURSOR *cursor)
WT_CURSOR_BACKUP *cb;
WT_DECL_RET;
WT_SESSION_IMPL *session;
- int tret;
cb = (WT_CURSOR_BACKUP *)cursor;
CURSOR_API_CALL(cursor, session, close, NULL);
- WT_TRET(__backup_cleanup_handles(session, cb));
+ /*
+ * When starting a hot backup, we serialize hot backup cursors and set
+ * the connection's hot-backup flag. Once that's done, we set the
+ * cursor's backup-locker flag, implying the cursor owns all necessary
+ * cleanup (including removing temporary files), regardless of error or
+ * success. The cursor's backup-locker flag is never cleared (it's just
+ * discarded when the cursor is closed), because that cursor will never
+ * not be responsible for cleanup.
+ */
+ if (F_ISSET(cb, WT_CURBACKUP_LOCKER))
+ WT_TRET(__backup_stop(session, cb));
+
WT_TRET(__wt_cursor_close(cursor));
session->bkp_cursor = NULL;
- WT_WITH_SCHEMA_LOCK(session, tret,
- tret = __backup_stop(session)); /* Stop the backup. */
- WT_TRET(tret);
-
err: API_END_RET(session, ret);
}
@@ -144,11 +149,11 @@ __wt_curbackup_open(WT_SESSION_IMPL *session,
ret = __backup_start(session, cb, cfg)));
WT_ERR(ret);
- /* __wt_cursor_init is last so we don't have to clean up on error. */
WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
if (0) {
-err: __wt_free(session, cb);
+err: WT_TRET(__curbackup_close(cursor));
+ *cursorp = NULL;
}
return (ret);
@@ -226,6 +231,9 @@ __backup_start(
conn->hot_backup = true;
WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock));
+ /* We're the lock holder, we own cleanup. */
+ F_SET(cb, WT_CURBACKUP_LOCKER);
+
/*
* Create a temporary backup file. This must be opened before
* generating the list of targets in backup_uri. This file will
@@ -235,7 +243,7 @@ __backup_start(
* doesn't confuse restarting in the source database.
*/
WT_ERR(__wt_fopen(session, WT_BACKUP_TMP,
- WT_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
+ WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
/*
* If a list of targets was specified, work our way through them.
* Else, generate a list of all database objects.
@@ -261,7 +269,7 @@ __backup_start(
*/
dest = WT_INCREMENTAL_BACKUP;
WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC,
- WT_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
+ WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
WT_ERR(__backup_list_append(
session, cb, WT_INCREMENTAL_BACKUP));
} else {
@@ -282,12 +290,9 @@ err: /* Close the hot backup file. */
WT_TRET(__wt_fclose(session, &cb->bfs));
if (srcfs != NULL)
WT_TRET(__wt_fclose(session, &srcfs));
- if (ret != 0) {
- WT_TRET(__backup_cleanup_handles(session, cb));
- WT_TRET(__backup_stop(session));
- } else {
+ if (ret == 0) {
WT_ASSERT(session, dest != NULL);
- WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest));
+ WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false));
}
return (ret);
@@ -295,9 +300,7 @@ err: /* Close the hot backup file. */
/*
* __backup_cleanup_handles --
- * Release and free all btree handles held by the backup. This is kept
- * separate from __backup_stop because it can be called without the
- * schema lock held.
+ * Release and free all btree handles held by the backup.
*/
static int
__backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
@@ -325,15 +328,18 @@ __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
* Stop a backup.
*/
static int
-__backup_stop(WT_SESSION_IMPL *session)
+__backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
conn = S2C(session);
+ /* Release all btree handles held by the backup. */
+ WT_TRET(__backup_cleanup_handles(session, cb));
+
/* Remove any backup specific file. */
- ret = __wt_backup_file_remove(session);
+ WT_TRET(__wt_backup_file_remove(session));
/* Checkpoint deletion can proceed, as can the next hot backup. */
WT_TRET(__wt_writelock(session, conn->hot_backup_lock));
@@ -443,10 +449,10 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session)
* always know we were a source directory while there's any chance of
* an incremental backup file existing.
*/
- WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP));
- WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP));
- WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC));
- WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP));
+ WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC, true));
+ WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true));
return (ret);
}
diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c
index e0d270e4245..2d3f3ffd176 100644
--- a/src/cursor/cur_config.c
+++ b/src/cursor/cur_config.c
@@ -58,11 +58,11 @@ __wt_curconfig_open(WT_SESSION_IMPL *session,
cursor->session = &session->iface;
cursor->key_format = cursor->value_format = "S";
- /* __wt_cursor_init is last so we don't have to clean up on error. */
WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
if (0) {
-err: __wt_free(session, cconfig);
+err: WT_TRET(__curconfig_close(cursor));
+ *cursorp = NULL;
}
return (ret);
}
diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c
index d2b8d81ab37..8d4b7a9384b 100644
--- a/src/cursor/cur_ds.c
+++ b/src/cursor/cur_ds.c
@@ -518,10 +518,7 @@ __wt_curds_open(
source->flags = 0;
if (0) {
-err: if (F_ISSET(cursor, WT_CURSTD_OPEN))
- WT_TRET(cursor->close(cursor));
- else
- __wt_free(session, data_source);
+err: WT_TRET(__curds_close(cursor));
*cursorp = NULL;
}
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index 595915df7b7..d7f18bb61ac 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -401,13 +401,13 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
cursor->json_private = child->json_private = json;
}
- /* __wt_cursor_init is last so we don't have to clean up on error. */
cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
cfg[1] = NULL;
WT_ERR(__wt_cursor_init(cursor, NULL, owner, cfg, cursorp));
if (0) {
-err: __wt_free(session, cursor);
+err: WT_TRET(__curdump_close(cursor));
+ *cursorp = NULL;
}
return (ret);
}
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index fac903b4770..8e7bd4bbea5 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -388,11 +388,11 @@ err: API_END_RET(session, ret);
}
/*
- * __wt_curfile_create --
+ * __curfile_create --
* Open a cursor for a given btree handle.
*/
-int
-__wt_curfile_create(WT_SESSION_IMPL *session,
+static int
+__curfile_create(WT_SESSION_IMPL *session,
WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap,
WT_CURSOR **cursorp)
{
@@ -439,6 +439,13 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
cursor->value_format = btree->value_format;
cbt->btree = btree;
+ /*
+ * Increment the data-source's in-use counter; done now because closing
+ * the cursor will decrement it, and all failure paths from here close
+ * the cursor.
+ */
+ __wt_cursor_dhandle_incr_use(session);
+
if (session->dhandle->checkpoint != NULL)
F_SET(cbt, WT_CBT_NO_TXN);
@@ -478,7 +485,6 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
/* Underlying btree initialization. */
__wt_btcur_open(cbt);
- /* __wt_cursor_init is last so we don't have to clean up on error. */
WT_ERR(__wt_cursor_init(
cursor, cursor->internal_uri, owner, cfg, cursorp));
@@ -486,7 +492,8 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
WT_STAT_FAST_DATA_INCR(session, cursor_create);
if (0) {
-err: __wt_free(session, cbt);
+err: WT_TRET(__curfile_close(cursor));
+ *cursorp = NULL;
}
return (ret);
@@ -503,9 +510,10 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
WT_CONFIG_ITEM cval;
WT_DECL_RET;
uint32_t flags;
- bool bitmap, bulk;
+ bool bitmap, bulk, checkpoint_wait;
bitmap = bulk = false;
+ checkpoint_wait = true;
flags = 0;
/*
@@ -531,6 +539,12 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
else if (!WT_STRING_MATCH("unordered", cval.str, cval.len))
WT_RET_MSG(session, EINVAL,
"Value for 'bulk' must be a boolean or 'bitmap'");
+
+ if (bulk) {
+ WT_RET(__wt_config_gets(session,
+ cfg, "checkpoint_wait", &cval));
+ checkpoint_wait = cval.val != 0;
+ }
}
/* Bulk handles require exclusive access. */
@@ -540,11 +554,11 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
/* Get the handle and lock it while the cursor is using it. */
if (WT_PREFIX_MATCH(uri, "file:")) {
/*
- * If we are opening exclusive, get the handle while holding
- * the checkpoint lock. This prevents a bulk cursor open
- * failing with EBUSY due to a database-wide checkpoint.
+ * If we are opening exclusive and don't want a bulk cursor
+ * open to fail with EBUSY due to a database-wide checkpoint,
+ * get the handle while holding the checkpoint lock.
*/
- if (LF_ISSET(WT_DHANDLE_EXCLUSIVE))
+ if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait)
WT_WITH_CHECKPOINT_LOCK(session, ret,
ret = __wt_session_get_btree_ckpt(
session, uri, cfg, flags));
@@ -555,10 +569,8 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
} else
WT_RET(__wt_bad_object_type(session, uri));
- WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
+ WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
- /* Increment the data-source's in-use counter. */
- __wt_cursor_dhandle_incr_use(session);
return (0);
err: /* If the cursor could not be opened, release the handle. */
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c
index 6de68d86778..82a27d65ce6 100644
--- a/src/cursor/cur_index.c
+++ b/src/cursor/cur_index.c
@@ -263,19 +263,57 @@ err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
static int
__curindex_search_near(WT_CURSOR *cursor, int *exact)
{
+ WT_CURSOR *child;
WT_CURSOR_INDEX *cindex;
WT_DECL_RET;
+ WT_ITEM found_key;
WT_SESSION_IMPL *session;
+ int cmp;
cindex = (WT_CURSOR_INDEX *)cursor;
- JOINABLE_CURSOR_API_CALL(cursor, session, search_near, NULL);
- __wt_cursor_set_raw_key(cindex->child, &cursor->key);
- if ((ret = cindex->child->search_near(cindex->child, exact)) == 0)
- ret = __curindex_move(cindex);
- else
- F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ child = cindex->child;
+ JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
-err: API_END_RET(session, ret);
+ /*
+ * We are searching using the application-specified key, which
+ * (usually) doesn't contain the primary key, so it is just a prefix of
+ * any matching index key. That said, if there is an exact match, we
+ * want to find the first matching index entry and set exact equal to
+ * zero. Do a search_near, step to the next entry if we land on one
+ * that is too small, then check that the prefix matches.
+ */
+ __wt_cursor_set_raw_key(child, &cursor->key);
+ WT_ERR(child->search_near(child, &cmp));
+
+ if (cmp < 0)
+ WT_ERR(child->next(child));
+
+ /*
+ * We expect partial matches, and want the smallest record with a key
+ * greater than or equal to the search key.
+ *
+ * If the key we find is shorter than the search key, it can't possibly
+ * match.
+ *
+ * The only way for the key to be exactly equal is if there is an index
+ * on the primary key, because otherwise the primary key columns will
+ * be appended to the index key, but we don't disallow that (odd) case.
+ */
+ found_key = child->key;
+ if (found_key.size < cursor->key.size)
+ WT_ERR(WT_NOTFOUND);
+ found_key.size = cursor->key.size;
+
+ WT_ERR(__wt_compare(
+ session, cindex->index->collator, &cursor->key, &found_key, exact));
+
+ WT_ERR(__curindex_move(cindex));
+
+ if (0) {
+err: F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+ }
+
+ API_END_RET(session, ret);
}
/*
diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c
index 0a13803da5d..2adf0c2b8ab 100644
--- a/src/cursor/cur_log.c
+++ b/src/cursor/cur_log.c
@@ -315,16 +315,16 @@ __curlog_close(WT_CURSOR *cursor)
WT_CONNECTION_IMPL *conn;
WT_CURSOR_LOG *cl;
WT_DECL_RET;
- WT_LOG *log;
WT_SESSION_IMPL *session;
CURSOR_API_CALL(cursor, session, close, NULL);
cl = (WT_CURSOR_LOG *)cursor;
conn = S2C(session);
+
WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED));
- log = conn->log;
- WT_TRET(__wt_readunlock(session, log->log_archive_lock));
- WT_TRET(__curlog_reset(cursor));
+ if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK))
+ WT_TRET(__wt_readunlock(session, conn->log->log_archive_lock));
+
__wt_free(session, cl->cur_lsn);
__wt_free(session, cl->next_lsn);
__wt_scr_free(session, &cl->logrec);
@@ -332,6 +332,7 @@ __curlog_close(WT_CURSOR *cursor)
__wt_scr_free(session, &cl->opvalue);
__wt_free(session, cl->packed_key);
__wt_free(session, cl->packed_value);
+
WT_TRET(__wt_cursor_close(cursor));
err: API_END_RET(session, ret);
@@ -401,23 +402,10 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
/* Log cursors block archiving. */
WT_ERR(__wt_readlock(session, log->log_archive_lock));
+ F_SET(cl, WT_CURLOG_ARCHIVE_LOCK);
if (0) {
-err: if (F_ISSET(cursor, WT_CURSTD_OPEN))
- WT_TRET(cursor->close(cursor));
- else {
- __wt_free(session, cl->cur_lsn);
- __wt_free(session, cl->next_lsn);
- __wt_scr_free(session, &cl->logrec);
- __wt_scr_free(session, &cl->opkey);
- __wt_scr_free(session, &cl->opvalue);
- /*
- * NOTE: We cannot get on the error path with the
- * readlock held. No need to unlock it unless that
- * changes above.
- */
- __wt_free(session, cl);
- }
+err: WT_TRET(__curlog_close(cursor));
*cursorp = NULL;
}
diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c
index 3d702e2ea8c..fc63ca13f7c 100644
--- a/src/cursor/cur_metadata.c
+++ b/src/cursor/cur_metadata.c
@@ -475,9 +475,11 @@ __curmetadata_close(WT_CURSOR *cursor)
mdc = (WT_CURSOR_METADATA *)cursor;
file_cursor = mdc->file_cursor;
CURSOR_API_CALL(cursor, session,
- close, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+ close, file_cursor == NULL ?
+ NULL : ((WT_CURSOR_BTREE *)file_cursor)->btree);
- ret = file_cursor->close(file_cursor);
+ if (file_cursor != NULL)
+ ret = file_cursor->close(file_cursor);
WT_TRET(__wt_cursor_close(cursor));
err: API_END_RET(session, ret);
@@ -552,9 +554,8 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
}
if (0) {
-err: if (mdc->file_cursor != NULL)
- WT_TRET(mdc->file_cursor->close(mdc->file_cursor));
- __wt_free(session, mdc);
+err: WT_TRET(__curmetadata_close(cursor));
+ *cursorp = NULL;
}
return (ret);
}
diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c
index f7a8f5fc866..5c9159a4c0b 100644
--- a/src/cursor/cur_stat.c
+++ b/src/cursor/cur_stat.c
@@ -37,22 +37,6 @@ __curstat_print_value(WT_SESSION_IMPL *session, uint64_t v, WT_ITEM *buf)
}
/*
- * __curstat_free_config --
- * Free the saved configuration string stack
- */
-static void
-__curstat_free_config(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
-{
- size_t i;
-
- if (cst->cfg != NULL) {
- for (i = 0; cst->cfg[i] != NULL; ++i)
- __wt_free(session, cst->cfg[i]);
- __wt_free(session, cst->cfg);
- }
-}
-
-/*
* __curstat_get_key --
* WT_CURSOR->get_key for statistics cursors.
*/
@@ -334,11 +318,16 @@ __curstat_close(WT_CURSOR *cursor)
WT_CURSOR_STAT *cst;
WT_DECL_RET;
WT_SESSION_IMPL *session;
+ size_t i;
cst = (WT_CURSOR_STAT *)cursor;
CURSOR_API_CALL(cursor, session, close, NULL);
- __curstat_free_config(session, cst);
+ if (cst->cfg != NULL) {
+ for (i = 0; cst->cfg[i] != NULL; ++i)
+ __wt_free(session, cst->cfg[i]);
+ __wt_free(session, cst->cfg);
+ }
__wt_buf_free(session, &cst->pv);
__wt_free(session, cst->desc_buf);
@@ -691,7 +680,6 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
/* The cursor isn't yet positioned. */
cst->notpositioned = true;
- /* __wt_cursor_init is last so we don't have to clean up on error. */
WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
if (0) {
@@ -701,8 +689,8 @@ config_err: WT_ERR_MSG(session, EINVAL,
}
if (0) {
-err: __curstat_free_config(session, cst);
- __wt_free(session, cst);
+err: WT_TRET(__curstat_close(cursor));
+ *cursorp = NULL;
}
return (ret);
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 6d50523043a..a14b40a1150 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -525,15 +525,20 @@ __curtable_insert(WT_CURSOR *cursor)
}
/*
- * WT_CURSOR.insert doesn't leave the cursor positioned, and the
- * application may want to free the memory used to configure the
- * insert; don't read that memory again (matching the underlying
- * file object cursor insert semantics).
+ * Insert is the one cursor operation that doesn't end with the cursor
+ * pointing to an on-page item (except for column-store appends, where
+ * we are returning a key). That is, the application's cursor continues
+ * to reference the application's memory after a successful cursor call,
+ * which isn't true anywhere else. We don't want to have to explain that
+ * scoping corner case, so we reset the application's cursor so it can
+ * free the referenced memory and continue on without risking subsequent
+ * core dumps.
*/
F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+ if (F_ISSET(primary, WT_CURSTD_APPEND))
+ F_SET(primary, WT_CURSTD_KEY_INT);
err: CURSOR_UPDATE_API_END(session, ret);
-
return (ret);
}
diff --git a/src/docs/security.dox b/src/docs/security.dox
index 331f74d969b..82e13ae7ad3 100644
--- a/src/docs/security.dox
+++ b/src/docs/security.dox
@@ -2,10 +2,23 @@
@section directory_permissions Database directory permissions
-All WiredTiger files are stored in the database home directory, and the
-WiredTiger database directory should have its permissions set to ensure
-database objects are not accessible to users without appropriate
-permissions. See @ref home for more information.
+By default, WiredTiger files are stored beneath the database home directory.
+The WiredTiger database directory should have its permissions set to ensure
+database objects are not accessible to users without appropriate permissions.
+See @ref home for more information.
+
+@section absolute_path Absolute paths
+
+WiredTiger prepends the name of the database home to file names which
+do not appear to be absolute paths. (The absolute path test is
+simplistic, matching a leading slash character on POSIX systems or a
+leading alphabetic character and colon on Windows.) No file path
+sanitization or validation is done by WiredTiger, for example, file
+paths may match universal naming conventions (UNC), or include \c "../"
+(dot dot slash) components.
+
+Applications are responsible for validating user-supplied file paths as
+necessary to prevent directory traversal attacks.
@section file_permissions File permissions
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 96fe04d7426..e08eb7d1447 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -80,6 +80,7 @@ SCons
Seward's
SiH
TXT
+UNC
URIs
WILLNEED
WiredTiger
@@ -368,6 +369,7 @@ php
png
posix
pre
+prepends
primary's
printf
printlog
@@ -411,6 +413,7 @@ runtime
rwlock
sHQ
sHq
+sanitization
scalable
scanf
schemas
diff --git a/src/docs/statistics.dox b/src/docs/statistics.dox
index 0a29e351e4e..36ce2711dc5 100644
--- a/src/docs/statistics.dox
+++ b/src/docs/statistics.dox
@@ -90,11 +90,20 @@ cursor.
@section statistics_log Statistics logging
-WiredTiger will optionally log database statistics into a file when the
+WiredTiger will optionally log database statistics into files when the
the ::wiredtiger_open \c statistics_log configuration is set.
-The resulting statistics can be displayed using the \c wtstats visualization
-tool. For more information, see @ref_single wtstats.
+The log files are named \c WiredTiger.%%d.%%H, where \c %%d is replaced
+with the day of the month as a decimal number (01-31), and \c %%H
+is replaced by the hour (24-hour clock) as a decimal number (00-23).
+Each log file contains the statistics for the hour specified in its name.
+
+The location of the log files may be changed with the \c statistics_log.path
+configuration string.
+
+The resulting statistics can be displayed and interactively examined
+using the \c wtstats visualization tool. For more information, see
+@ref_single wtstats.
The following example logs statistics every 30 seconds:
@@ -120,7 +129,7 @@ Statistics for all underlying data sources of a particular type may be
included by adding a partial data source URI to the \c statistics_log
configuration string:
-@snippet ex_all.c Statistics logging with all tables
+@snippet ex_all.c Statistics logging with a source type
When database statistics are logged, the database home will be the first
space-separated entry for each record in the log file. For example:
@@ -151,23 +160,9 @@ currently open in the database, nor will any statistics requiring the
traversal of a tree (as if the \c statistics_fast configuration string
were set).
-The location of the log files may be changed with the \c statistics_log.path
-configuration string. The \c path value value may contain ISO C90 standard
-strftime conversion specifications. WiredTiger will not create non-existent
-directories in the path, they must exist before ::wiredtiger_open is called.
-
-The following example logs statistics into files named with the month,
-day and year:
-
-@snippet ex_all.c Statistics logging with path
-
A Python script that parses the default logging output and uses the
<a href="http://www.gnuplot.info/">gnuplot</a>, utility to generate
Portable Network Graphics (PNG) format graphs is included in the
WiredTiger distribution in the file \c tools/statlog.py.
-@m_if{c}
-To interactively examine statistics results, see @ref wtstats.
-@m_endif
-
*/
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index 5e824fee977..9d3d2239bb4 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,5 +1,48 @@
/*! @page upgrading Upgrading WiredTiger applications
+@section version_281 Upgrading to Version 2.8.1
+<dl>
+<dt>Cache management defaults</dt>
+<dd>
+The default values for the \c eviction_dirty_target and \c
+eviction_dirty_trigger settings to ::wiredtiger_open have changed to 5 and 20,
+respectively. This means that by default, WiredTiger will start writing dirty
+pages from cache when it becomes 5% dirty and will throttle activity to keep
+the volume of dirty data in cache under 20%. For write-heavy workloads, the
+new defaults may result in lower throughput and more threads writing to data
+files concurrently.
+
+These settings also now determine how much work is done at the beginning of a
+checkpoint to make the critical section of checkpoints complete more quickly.
+</dd>
+
+<dt>Checkpoint server created checkpoint names</dt>
+<dd>
+The ::wiredtiger_open checkpoint configuration no longer supports the
+\c name configuration, and checkpoint server created checkpoints will
+always be named the default WiredTiger checkpoint name,
+"WiredTigerCheckpoint". Applications depending on the ability to set the
+checkpoint name for the checkpoint server will require modification.
+</dd>
+
+<dt>Statistics logging path</dt>
+<dd>
+The ::wiredtiger_open statistics logging path configuration has been
+simplified to be only a path to a directory, and the file name component
+of the path may no longer be specified. Applications depending on the
+ability to set statistics log file names will require modification.
+</dd>
+
+<dt>Deprecated statistics field</dt>
+<dd>
+The connection statistic \c WT_STAT_CONN_CACHE_BYTES_OVERFLOW has been
+removed. Overflow information is now available in the
+\c WT_STAT_CONN_CACHE_BYTES_OVERFLOW and \c WT_STAT_CONN_CACHE_OVERFLOW_VALUE.
+Applications specifically looking for that statistic will require
+modification.
+</dd>
+
+</dl><hr>
@section version_280 Upgrading to Version 2.8.0
<dl>
<dt>LSM metadata</dt>
@@ -55,7 +98,6 @@ The WiredTiger public API used to define a structure that could encapsulate
log sequence numbers. That structure is no longer exposed publicly.
</dd>
-<dt>
</dl><hr>
@section version_270 Upgrading to Version 2.7.0
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index e06272d117c..17b95660f79 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -210,6 +210,8 @@ if non zero choose a value from within this range as the key for
insert operations
@par random_value (boolean, default=false)
generate random content for the value
+@par range_partition (boolean, default=false)
+partition data by range (vs hash)
@par read_range (unsigned int, default=0)
scan a range of keys after each search
@par readonly (boolean, default=false)
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 8ea487bbf83..7d3fd838dcd 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -17,7 +17,7 @@ static int __evict_page(WT_SESSION_IMPL *, bool);
static int __evict_pass(WT_SESSION_IMPL *);
static int __evict_server(WT_SESSION_IMPL *, bool *);
static int __evict_walk(WT_SESSION_IMPL *, uint32_t);
-static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *);
+static int __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int, u_int *);
/*
* __evict_read_gen --
@@ -31,11 +31,6 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
uint64_t read_gen;
btree = entry->btree;
-
- /* Never prioritize empty slots. */
- if (entry->ref == NULL)
- return (UINT64_MAX);
-
page = entry->ref->page;
/* Any page set to the oldest generation should be discarded. */
@@ -70,14 +65,15 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
* Qsort function: sort the eviction array.
*/
static int WT_CDECL
-__evict_lru_cmp(const void *a, const void *b)
+__evict_lru_cmp(const void *a_arg, const void *b_arg)
{
- uint64_t a_lru, b_lru;
+ const WT_EVICT_ENTRY *a = a_arg, *b = b_arg;
+ uint64_t a_score, b_score;
- a_lru = __evict_read_gen(a);
- b_lru = __evict_read_gen(b);
+ a_score = (a->ref == NULL ? UINT64_MAX : a->score);
+ b_score = (b->ref == NULL ? UINT64_MAX : b->score);
- return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1);
+ return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
}
/*
@@ -542,54 +538,49 @@ __evict_update_work(WT_SESSION_IMPL *session)
cache->evict_max_refs_per_file =
WT_MAX(100, WT_MILLION / (conn->open_file_count + 1));
+ if (cache->evict_queues[WT_EVICT_URGENT_QUEUE].evict_current != NULL)
+ FLD_SET(cache->state, WT_EVICT_STATE_URGENT);
+
/*
- * Page eviction overrides the dirty target and other types of eviction,
- * that is, we don't care where we are with respect to the dirty target
- * if page eviction is configured.
+ * If we need space in the cache, try to find clean pages to evict.
*
* Avoid division by zero if the cache size has not yet been set in a
* shared cache.
*/
bytes_max = conn->cache_size + 1;
bytes_inuse = __wt_cache_bytes_inuse(cache);
- if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) {
- FLD_SET(cache->state, WT_EVICT_PASS_ALL);
- goto done;
- }
+ if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
+ FLD_SET(cache->state, WT_EVICT_STATE_CLEAN);
/*
- * If the cache has been stuck and is now under control, clear the
- * stuck flag.
+ * Scrub dirty pages and keep them in cache if we are less than half
+ * way between the cache target and trigger.
*/
- if (bytes_inuse < bytes_max)
- F_CLR(cache, WT_CACHE_STUCK);
+ if (bytes_inuse < ((cache->eviction_target + cache->eviction_trigger) *
+ bytes_max) / 200)
+ FLD_SET(cache->state, WT_EVICT_STATE_SCRUB);
- dirty_inuse = __wt_cache_dirty_inuse(cache);
- if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) {
- FLD_SET(cache->state, WT_EVICT_PASS_DIRTY);
- goto done;
- }
+ dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
+ if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
+ FLD_SET(cache->state, WT_EVICT_STATE_DIRTY);
/*
- * Evict pages with oldest generation (which would otherwise block
- * application threads), set regardless of whether we have reached
- * the eviction trigger.
+ * If the cache has been stuck and is now under control, clear the
+ * stuck flag.
*/
- if (F_ISSET(cache, WT_CACHE_WOULD_BLOCK)) {
- FLD_SET(cache->state, WT_EVICT_PASS_WOULD_BLOCK);
-
- F_CLR(cache, WT_CACHE_WOULD_BLOCK);
- goto done;
- }
-
- return (false);
+ if (bytes_inuse < bytes_max &&
+ dirty_inuse < (cache->eviction_dirty_trigger * bytes_max) / 100)
+ F_CLR(cache, WT_CACHE_STUCK);
-done: if (F_ISSET(cache, WT_CACHE_STUCK)) {
+ if (F_ISSET(cache, WT_CACHE_STUCK)) {
+ WT_ASSERT(session, cache->state != 0);
WT_STAT_FAST_CONN_SET(session,
cache_eviction_aggressive_set, 1);
- FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE);
+ FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE);
}
- return (true);
+
+ return (FLD_ISSET(cache->state,
+ WT_EVICT_STATE_ALL | WT_EVICT_STATE_URGENT));
}
/*
@@ -603,7 +594,7 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_CONNECTION_IMPL *conn;
WT_EVICT_WORKER *worker;
uint64_t pages_evicted;
- int loop;
+ u_int loop;
conn = S2C(session);
cache = conn->cache;
@@ -647,15 +638,14 @@ __evict_pass(WT_SESSION_IMPL *session)
if (loop > 10) {
WT_STAT_FAST_CONN_SET(session,
cache_eviction_aggressive_set, 1);
- FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE);
+ FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE);
}
/*
* Start a worker if we have capacity and we haven't reached
* the eviction targets.
*/
- if (FLD_ISSET(cache->state, WT_EVICT_PASS_ALL |
- WT_EVICT_PASS_DIRTY | WT_EVICT_PASS_WOULD_BLOCK) &&
+ if (FLD_ISSET(cache->state, WT_EVICT_STATE_ALL) &&
conn->evict_workers < conn->evict_workers_max) {
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"Starting evict worker: %"PRIu32"\n",
@@ -671,7 +661,8 @@ __evict_pass(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
"Eviction pass with: Max: %" PRIu64
" In use: %" PRIu64 " Dirty: %" PRIu64,
- conn->cache_size, cache->bytes_inmem, cache->bytes_dirty));
+ conn->cache_size, cache->bytes_inmem,
+ cache->bytes_dirty_intl + cache->bytes_dirty_leaf));
WT_RET(__evict_lru_walk(session));
WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true));
@@ -682,29 +673,32 @@ __evict_pass(WT_SESSION_IMPL *session)
* sleep, it's not something we can fix.
*/
if (pages_evicted == cache->pages_evict) {
- WT_STAT_FAST_CONN_INCR(session,
- cache_eviction_server_slept);
/*
* Back off if we aren't making progress: walks hold
- * the handle list lock, which blocks other operations
- * that can free space in cache, such as LSM discarding
+ * the handle list lock, blocking other operations that
+ * can free space in cache, such as LSM discarding
* handles.
+ *
+ * Allow this wait to be interrupted (e.g. if a
+ * checkpoint completes): make sure we wait for a
+ * non-zero number of microseconds).
*/
- __wt_sleep(0, WT_THOUSAND * (uint64_t)loop);
+ WT_STAT_FAST_CONN_INCR(session,
+ cache_eviction_server_slept);
+ WT_RET(__wt_cond_wait(session,
+ cache->evict_cond, WT_THOUSAND * WT_MAX(loop, 1)));
+
if (loop == 100) {
/*
* Mark the cache as stuck if we need space
* and aren't evicting any pages.
*/
- if (!FLD_ISSET(cache->state,
- WT_EVICT_PASS_WOULD_BLOCK)) {
- F_SET(cache, WT_CACHE_STUCK);
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_slow);
- WT_RET(__wt_verbose(
- session, WT_VERB_EVICTSERVER,
- "unable to reach eviction goal"));
- }
+ F_SET(cache, WT_CACHE_STUCK);
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_slow);
+ WT_RET(__wt_verbose(
+ session, WT_VERB_EVICTSERVER,
+ "unable to reach eviction goal"));
break;
}
} else {
@@ -927,26 +921,29 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
{
WT_CACHE *cache;
WT_DECL_RET;
- WT_EVICT_QUEUE *evict_queue;
- uint64_t cutoff, read_gen_oldest;
+ WT_EVICT_QUEUE *queue;
+ uint64_t read_gen_oldest;
uint32_t candidates, entries, queue_index;
cache = S2C(session)->cache;
- queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX;
- evict_queue = &cache->evict_queues[queue_index];
+ /* Fill the next queue (that isn't the urgent queue). */
+ queue_index =
+ 1 + (cache->evict_queue_fill++ % (WT_EVICT_QUEUE_MAX - 1));
+ queue = &cache->evict_queues[queue_index];
+
/* Get some more pages to consider for eviction. */
if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0)
return (ret == EBUSY ? 0 : ret);
/* Sort the list into LRU order and restart. */
- __wt_spin_lock(session, &evict_queue->evict_lock);
+ __wt_spin_lock(session, &queue->evict_lock);
- entries = evict_queue->evict_entries;
- qsort(evict_queue->evict_queue,
+ entries = queue->evict_entries;
+ qsort(queue->evict_queue,
entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
- while (entries > 0 && evict_queue->evict_queue[entries - 1].ref == NULL)
+ while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL)
--entries;
/*
@@ -956,9 +953,9 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
*/
while (entries > WT_EVICT_WALK_BASE)
__evict_list_clear(session,
- &evict_queue->evict_queue[--entries]);
+ &queue->evict_queue[--entries]);
- evict_queue->evict_entries = entries;
+ queue->evict_entries = entries;
if (entries == 0) {
/*
@@ -966,23 +963,19 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* Make sure application threads don't read past the end of the
* candidate list, or they may race with the next walk.
*/
- evict_queue->evict_candidates = 0;
- __wt_spin_unlock(session, &evict_queue->evict_lock);
- __wt_spin_lock(session, &cache->evict_queue_lock);
- cache->evict_current = NULL;
- cache->evict_current_queue = NULL;
- __wt_spin_unlock(session, &cache->evict_queue_lock);
+ queue->evict_candidates = 0;
+ queue->evict_current = NULL;
+ __wt_spin_unlock(session, &queue->evict_lock);
return (0);
}
/* Decide how many of the candidates we're going to try and evict. */
- if (FLD_ISSET(cache->state,
- WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
+ if (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) {
/*
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*/
- evict_queue->evict_candidates = entries;
+ queue->evict_candidates = entries;
} else {
/*
* Find the oldest read generation we have in the queue, used
@@ -992,8 +985,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
read_gen_oldest = WT_READGEN_OLDEST;
for (candidates = 0; candidates < entries; ++candidates) {
read_gen_oldest =
- __evict_read_gen(
- &evict_queue->evict_queue[candidates]);
+ queue->evict_queue[candidates].score;
if (read_gen_oldest != WT_READGEN_OLDEST)
break;
}
@@ -1002,51 +994,45 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
* Take all candidates if we only gathered pages with an oldest
* read generation set.
*
- * We normally never take more than 50% of the entries; if 50%
- * of the entries were at the oldest read generation, take them.
+ * We normally never take more than 50% of the entries but if
+ * 50% of the entries were at the oldest read generation, take
+ * all of them.
*/
if (read_gen_oldest == WT_READGEN_OLDEST)
- evict_queue->evict_candidates = entries;
- else if (candidates >= entries / 2)
- evict_queue->evict_candidates = candidates;
+ queue->evict_candidates = entries;
+ else if (candidates > entries / 2)
+ queue->evict_candidates = candidates;
else {
- /* Save the calculated oldest generation. */
- cache->read_gen_oldest = read_gen_oldest;
-
- /* Find the bottom 25% of read generations. */
- cutoff =
- (3 * read_gen_oldest + __evict_read_gen(
- &evict_queue->evict_queue[entries - 1])) / 4;
-
/*
- * Don't take less than 10% or more than 50% of entries,
- * regardless. That said, if there is only one entry,
- * which is normal when populating an empty file, don't
- * exclude it.
+ * Take all of the urgent pages plus a third of
+ * ordinary candidates (which could be expressed as
+ * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE). In the
+ * steady state, we want to get as many candidates as
+ * the eviction walk adds to the queue.
+ *
+ * That said, if there is only one entry, which is
+ * normal when populating an empty file, don't exclude
+ * it.
*/
- for (candidates = 1 + entries / 10;
- candidates < entries / 2;
- candidates++)
- if (__evict_read_gen(
- &evict_queue->evict_queue[candidates]) >
- cutoff)
- break;
- evict_queue->evict_candidates = candidates;
+ queue->evict_candidates =
+ 1 + candidates + ((entries - candidates) - 1) / 3;
+ cache->read_gen_oldest = read_gen_oldest;
}
}
- __wt_spin_unlock(session, &evict_queue->evict_lock);
+ queue->evict_current = queue->evict_queue;
+ __wt_spin_unlock(session, &queue->evict_lock);
+
/*
* Now we can set the next queue.
*/
__wt_spin_lock(session, &cache->evict_queue_lock);
- if (cache->evict_current == NULL)
+ if (cache->evict_current_queue->evict_current == NULL)
WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty);
else
WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty);
- cache->evict_current = evict_queue->evict_queue;
- cache->evict_current_queue = evict_queue;
+ cache->evict_current_queue = queue;
__wt_spin_unlock(session, &cache->evict_queue_lock);
/*
@@ -1070,9 +1056,8 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index)
WT_CONNECTION_IMPL *conn;
WT_DATA_HANDLE *dhandle;
WT_DECL_RET;
- WT_EVICT_QUEUE *evict_queue;
- u_int max_entries, prev_slot, retries;
- u_int slot, start_slot, spins;
+ WT_EVICT_QUEUE *queue;
+ u_int max_entries, prev_slot, retries, slot, start_slot, spins;
bool dhandle_locked, incr;
conn = S2C(session);
@@ -1086,9 +1071,9 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index)
* Set the starting slot in the queue and the maximum pages added
* per walk.
*/
- evict_queue = &cache->evict_queues[queue_index];
- start_slot = slot = evict_queue->evict_entries;
- max_entries = slot + WT_EVICT_WALK_INCR;
+ queue = &cache->evict_queues[queue_index];
+ start_slot = slot = queue->evict_entries;
+ max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
retry: while (slot < max_entries && ret == 0) {
/*
@@ -1158,7 +1143,7 @@ retry: while (slot < max_entries && ret == 0) {
*/
if ((btree->checkpointing != WT_CKPT_OFF ||
btree->evict_priority != 0) &&
- !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE))
+ !FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE))
continue;
/* Skip files if we have used all available hazard pointers. */
@@ -1171,7 +1156,6 @@ retry: while (slot < max_entries && ret == 0) {
* useful in the past.
*/
if (btree->evict_walk_period != 0 &&
- evict_queue->evict_entries >= WT_EVICT_WALK_INCR &&
btree->evict_walk_skips++ < btree->evict_walk_period)
continue;
btree->evict_walk_skips = 0;
@@ -1197,8 +1181,8 @@ retry: while (slot < max_entries && ret == 0) {
if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
cache->evict_file_next = dhandle;
WT_WITH_DHANDLE(session, dhandle,
- ret = __evict_walk_file(
- session, queue_index, &slot));
+ ret = __evict_walk_file(session,
+ queue_index, max_entries, &slot));
WT_ASSERT(session, session->split_gen == 0);
}
__wt_spin_unlock(session, &cache->evict_walk_lock);
@@ -1234,39 +1218,49 @@ retry: while (slot < max_entries && ret == 0) {
if (cache->pass_intr == 0 && ret == 0 &&
slot < max_entries && (retries < 2 ||
(retries < 10 &&
- !FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
- (slot == evict_queue->evict_entries || slot > start_slot)))) {
+ (slot == queue->evict_entries || slot > start_slot)))) {
start_slot = slot;
++retries;
goto retry;
}
- evict_queue->evict_entries = slot;
+ queue->evict_entries = slot;
return (ret);
}
/*
- * __evict_init_candidate --
+ * __evict_push_candidate --
* Initialize a WT_EVICT_ENTRY structure with a given page.
*/
-static void
-__evict_init_candidate(WT_SESSION_IMPL *session,
- WT_EVICT_QUEUE *evict_queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
+static bool
+__evict_push_candidate(WT_SESSION_IMPL *session,
+ WT_EVICT_QUEUE *queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
{
u_int slot;
+ uint8_t orig_flags, new_flags;
+
+ /*
+ * Threads can race to queue a page (e.g., an ordinary LRU walk can
+ * race with a page being queued for urgent eviction.
+ */
+ orig_flags = new_flags = ref->page->flags_atomic;
+ FLD_SET(new_flags, WT_PAGE_EVICT_LRU);
+ if (orig_flags == new_flags ||
+ !__wt_atomic_cas8(&ref->page->flags_atomic, orig_flags, new_flags))
+ return (false);
/* Keep track of the maximum slot we are using. */
- slot = (u_int)(evict - evict_queue->evict_queue);
- if (slot >= evict_queue->evict_max)
- evict_queue->evict_max = slot + 1;
+ slot = (u_int)(evict - queue->evict_queue);
+ if (slot >= queue->evict_max)
+ queue->evict_max = slot + 1;
if (evict->ref != NULL)
__evict_list_clear(session, evict);
- evict->ref = ref;
- evict->btree = S2BT(session);
- /* Mark the page on the list; set last to flush the other updates. */
- F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU);
+ evict->btree = S2BT(session);
+ evict->ref = ref;
+ evict->score = __evict_read_gen(evict);
+ return (true);
}
/*
@@ -1274,34 +1268,73 @@ __evict_init_candidate(WT_SESSION_IMPL *session,
* Get a few page eviction candidates from a single underlying file.
*/
static int
-__evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
+__evict_walk_file(WT_SESSION_IMPL *session,
+ uint32_t queue_index, u_int max_entries, u_int *slotp)
{
WT_BTREE *btree;
WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_EVICT_ENTRY *end, *evict, *start;
- WT_EVICT_QUEUE *evict_queue;
+ WT_EVICT_QUEUE *queue;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
WT_REF *ref;
+ uint64_t btree_inuse, bytes_per_slot, cache_inuse;
uint64_t pages_seen, refs_walked;
- uint32_t walk_flags;
+ uint32_t remaining_slots, target_pages, total_slots, walk_flags;
int internal_pages, restarts;
bool enough, modified;
conn = S2C(session);
btree = S2BT(session);
cache = conn->cache;
- evict_queue = &cache->evict_queues[queue_index];
+ queue = &cache->evict_queues[queue_index];
internal_pages = restarts = 0;
enough = false;
- start = evict_queue->evict_queue + *slotp;
- end = start + WT_EVICT_WALK_PER_FILE;
+ /*
+ * Figure out how many slots to fill from this tree.
+ * Note that some care is taken in the calculation to avoid overflow.
+ */
+ start = queue->evict_queue + *slotp;
+ remaining_slots = max_entries - *slotp;
+ btree_inuse = __wt_btree_bytes_inuse(session);
+ cache_inuse = __wt_cache_bytes_inuse(cache);
+ total_slots = max_entries - queue->evict_entries;
+
+ /*
+ * The target number of pages for this tree is proportional to the
+ * space it is taking up in cache. Round to the nearest number of
+ * slots so we assign all of the slots to a tree filling 99+% of the
+ * cache (and only have to walk it once).
+ */
+ bytes_per_slot = cache_inuse / total_slots;
+ target_pages = (uint32_t)(
+ (btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
+ if (target_pages == 0) {
+ /*
+ * Randomly walk trees with a tiny fraction of the cache in
+ * case there are so many trees that none of them use enough of
+ * the cache to be allocated slots.
+ *
+ * Map a random number into the range [0..1], and if the result
+ * is greater than the fraction of the cache used by this tree,
+ * give up. In other words, there is a small chance we will
+ * visit trees that use a small fraction of the cache. Arrange
+ * this calculation to avoid overflow (e.g., don't multiply
+ * anything by UINT32_MAX).
+ */
+ if (__wt_random(&session->rnd) / (double)UINT32_MAX >
+ btree_inuse / (double)cache_inuse)
+ return (0);
+ target_pages = 10;
+ }
+
if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
- end > evict_queue->evict_queue + cache->evict_slots)
- end = evict_queue->evict_queue + cache->evict_slots;
+ target_pages > remaining_slots)
+ target_pages = remaining_slots;
+ end = start + target_pages;
walk_flags =
WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
@@ -1352,14 +1385,11 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
/*
* It's possible (but unlikely) to visit a page without a read
* generation, if we race with the read instantiating the page.
- * Ignore those pages, but set the page's read generation here
- * to ensure a bug doesn't somehow leave a page without a read
- * generation.
+ * Set the page's read generation here to ensure a bug doesn't
+ * somehow leave a page without a read generation.
*/
- if (page->read_gen == WT_READGEN_NOTSET) {
+ if (page->read_gen == WT_READGEN_NOTSET)
__wt_cache_read_gen_new(session, page);
- continue;
- }
/* Pages we no longer need (clean or dirty), are found money. */
if (page->read_gen == WT_READGEN_OLDEST) {
@@ -1367,26 +1397,23 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
session, cache_eviction_pages_queued_oldest);
goto fast;
}
+
if (__wt_page_is_empty(page) ||
- F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
+ F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+ FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE))
goto fast;
/* Skip clean pages if appropriate. */
if (!modified && (F_ISSET(conn, WT_CONN_IN_MEMORY) ||
- FLD_ISSET(cache->state, WT_EVICT_PASS_DIRTY)))
+ !FLD_ISSET(cache->state, WT_EVICT_STATE_CLEAN)))
continue;
- /*
- * If we are only trickling out pages marked for definite
- * eviction, skip anything that isn't marked.
- */
- if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
- page->memory_footprint < btree->splitmempage)
+ /* Skip dirty pages if appropriate. */
+ if (modified && !FLD_ISSET(cache->state, WT_EVICT_STATE_DIRTY))
continue;
- /* Limit internal pages to 50% unless we get aggressive. */
+ /* Limit internal pages to 50% of the total. */
if (WT_PAGE_IS_INTERNAL(page) &&
- !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE) &&
internal_pages >= (int)(evict - start) / 2)
continue;
@@ -1410,8 +1437,7 @@ fast: /* If the page can't be evicted, give up. */
* configure lookaside table writes in reconciliation, allowing
* us to evict pages we can't usually evict.
*/
- if (!FLD_ISSET(cache->state,
- WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
+ if (!FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) {
/*
* If the page is clean but has modifications that
* appear too new to evict, skip it.
@@ -1422,7 +1448,8 @@ fast: /* If the page can't be evicted, give up. */
}
WT_ASSERT(session, evict->ref == NULL);
- __evict_init_candidate(session, evict_queue, evict, ref);
+ if (!__evict_push_candidate(session, queue, evict, ref))
+ continue;
++evict;
if (WT_PAGE_IS_INTERNAL(page))
@@ -1479,19 +1506,21 @@ __evict_check_entry_size(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *entry)
cache = S2C(session)->cache;
- if (cache->pages_evict == 0)
+ if (cache->pages_evict == 0 || cache->bytes_evict < WT_MEGABYTE)
return (true);
max = (cache->bytes_evict / cache->pages_evict) * 4;
if ((ref = entry->ref) != NULL) {
if ((page = ref->page) == NULL)
return (true);
+
/*
- * If this page is more than four times the average evicted page
- * size then return false. Return true in all other cases.
- * XXX Should we care here if the page is dirty? Probably...
+ * If this page is dirty and more than four times the average
+ * evicted page size then return false. Return true in all
+ * other cases.
*/
- if (page->memory_footprint > max) {
+ if (__wt_page_is_modified(page) &&
+ page->memory_footprint > max) {
WT_STAT_FAST_CONN_INCR(
session, cache_eviction_server_toobig);
return (false);
@@ -1510,71 +1539,85 @@ __evict_get_ref(
{
WT_CACHE *cache;
WT_EVICT_ENTRY *evict;
- WT_EVICT_QUEUE *evict_queue;
+ WT_EVICT_QUEUE *queue, *urgent_queue;
uint32_t candidates;
cache = S2C(session)->cache;
+ urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
*btreep = NULL;
*refp = NULL;
- /*
- * Avoid the LRU lock if no pages are available.
- */
+ /* Avoid the LRU lock if no pages are available. */
WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref);
- if (cache->evict_current == NULL) {
+ if (cache->evict_current_queue->evict_current == NULL &&
+ urgent_queue->evict_current == NULL) {
WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty);
return (WT_NOTFOUND);
}
+
__wt_spin_lock(session, &cache->evict_queue_lock);
+
+ /* Check the urgent queue first. */
+ queue = urgent_queue->evict_current != NULL &&
+ (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE) ||
+ (F_ISSET(session, WT_SESSION_INTERNAL) &&
+ (!is_server || S2C(session)->evict_workers <= 1))) ?
+ urgent_queue : cache->evict_current_queue;
+
+ __wt_spin_unlock(session, &cache->evict_queue_lock);
+
/*
- * Verify there are still pages available.
+ * Only evict half of the pages before looking for more. The remainder
+ * are left to eviction workers (if configured), or application threads
+ * if necessary.
*/
- if (cache->evict_current == NULL) {
- __wt_spin_unlock(session, &cache->evict_queue_lock);
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty2);
- return (WT_NOTFOUND);
- }
+ candidates = queue->evict_candidates;
+ if (is_server && queue != urgent_queue && candidates > 1)
+ candidates /= 2;
+
/*
- * We got the queue lock, which should be fast, and now we want to
- * get the lock on the individual queue. We know that the shared
- * queue fields cannot change now.
+ * We got the queue lock, which should be fast, and chose a queue.
+ * Now we want to get the lock on the individual queue.
*/
- evict_queue = cache->evict_current_queue;
for (;;) {
- if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0)
- break;
- if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
- __wt_spin_unlock(session, &cache->evict_queue_lock);
+ /* Verify there are still pages available. */
+ if (queue->evict_current == NULL || (uint32_t)
+ (queue->evict_current - queue->evict_queue) >= candidates) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_get_ref_empty2);
return (WT_NOTFOUND);
}
- __wt_yield();
+ if (!is_server)
+ __wt_spin_lock(session, &queue->evict_lock);
+ else if (__wt_spin_trylock(session, &queue->evict_lock) != 0)
+ continue;
+ break;
}
- /*
- * Only evict half of the pages before looking for more. The remainder
- * are left to eviction workers (if configured), or application threads
- * if necessary.
- */
- candidates = evict_queue->evict_candidates;
- if (is_server && candidates > 1)
- candidates /= 2;
/* Get the next page queued for eviction. */
- while ((evict = cache->evict_current) != NULL &&
- evict < evict_queue->evict_queue + candidates &&
- evict->ref != NULL) {
+ for (evict = queue->evict_current;
+ evict >= queue->evict_queue &&
+ evict < queue->evict_queue + candidates;
+ ++evict) {
+ if (evict->ref == NULL)
+ continue;
WT_ASSERT(session, evict->btree != NULL);
+
/*
- * If the server is helping out and encounters an entry that
- * is too large, it stops helping. Evicting a very large
- * page in the server thread could stall eviction from finding
- * new work.
+ * If the server is helping out and encounters an entry that is
+ * too large, it stops helping. Evicting a very large page in
+ * the server thread could stall eviction from finding new
+ * work.
+ *
+ * However, we can't skip entries in the urgent queue or they
+ * may never be found again.
*/
- if (is_server && S2C(session)->evict_workers > 1 &&
- !__evict_check_entry_size(session, evict))
+ if (is_server && queue != urgent_queue &&
+ S2C(session)->evict_workers > 1 &&
+ !__evict_check_entry_size(session, evict)) {
+ --evict;
break;
-
- /* Move to the next item. */
- ++cache->evict_current;
+ }
/*
* Lock the page while holding the eviction mutex to prevent
@@ -1604,11 +1647,14 @@ __evict_get_ref(
break;
}
- /* Clear the current pointer if there are no more candidates. */
- if (evict >= evict_queue->evict_queue + evict_queue->evict_candidates)
- cache->evict_current = NULL;
- __wt_spin_unlock(session, &evict_queue->evict_lock);
- __wt_spin_unlock(session, &cache->evict_queue_lock);
+ /* Move to the next item. */
+ if (evict != NULL && evict + 1 <
+ queue->evict_queue + queue->evict_candidates)
+ queue->evict_current = evict + 1;
+ else /* Clear the current pointer if there are no more candidates. */
+ queue->evict_current = NULL;
+
+ __wt_spin_unlock(session, &queue->evict_lock);
return ((*refp == NULL) ? WT_NOTFOUND : 0);
}
@@ -1633,16 +1679,14 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
* An internal session flags either the server itself or an eviction
* worker thread.
*/
- if (F_ISSET(session, WT_SESSION_INTERNAL)) {
- if (is_server) {
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_server_evicting);
- cache->server_evicts++;
- } else {
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_worker_evicting);
- cache->worker_evicts++;
- }
+ if (is_server) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_server_evicting);
+ cache->server_evicts++;
+ } else if (F_ISSET(session, WT_SESSION_INTERNAL)) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_worker_evicting);
+ cache->worker_evicts++;
} else {
if (__wt_page_is_modified(ref->page))
WT_STAT_FAST_CONN_INCR(
@@ -1768,6 +1812,64 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
}
/*
+ * __wt_page_evict_soon --
+ * Set a page to be evicted as soon as possible.
+ */
+int
+__wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ WT_CACHE *cache;
+ WT_EVICT_ENTRY *evict;
+ WT_EVICT_QUEUE *urgent_queue;
+ WT_PAGE *page;
+ bool queued;
+
+ /* Root pages should never be evicted via LRU. */
+ WT_ASSERT(session, !__wt_ref_is_root(ref));
+
+ page = ref->page;
+ page->read_gen = WT_READGEN_OLDEST;
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) ||
+ F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION))
+ return (0);
+
+ /* Append to the urgent queue if we can. */
+ cache = S2C(session)->cache;
+ urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
+ queued = false;
+
+ __wt_spin_lock(session, &cache->evict_queue_lock);
+ if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) ||
+ F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION))
+ goto done;
+
+ __wt_spin_lock(session, &urgent_queue->evict_lock);
+ if (urgent_queue->evict_current == NULL) {
+ urgent_queue->evict_current = urgent_queue->evict_queue;
+ urgent_queue->evict_candidates = 0;
+ }
+ evict = urgent_queue->evict_queue + urgent_queue->evict_candidates;
+ if (evict < urgent_queue->evict_queue + WT_EVICT_QUEUE_MAX &&
+ __evict_push_candidate(session, urgent_queue, evict, ref)) {
+ ++urgent_queue->evict_candidates;
+ queued = true;
+ }
+ __wt_spin_unlock(session, &urgent_queue->evict_lock);
+
+done: __wt_spin_unlock(session, &cache->evict_queue_lock);
+ if (queued) {
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_pages_queued_urgent);
+ if (S2C(session)->evict_workers > 1)
+ WT_RET(__wt_cond_signal(
+ session, cache->evict_waiter_cond));
+ else
+ WT_RET(__wt_evict_server_wake(session));
+ }
+ return (0);
+}
+
+/*
* __wt_evict_priority_set --
* Set a tree's eviction priority.
*/
@@ -1801,13 +1903,15 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
WT_DATA_HANDLE *dhandle, *saved_dhandle;
WT_PAGE *page;
WT_REF *next_walk;
- uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages;
- uint64_t leaf_bytes, leaf_pages;
- uint64_t max_dirty_bytes, max_intl_bytes, max_leaf_bytes, total_bytes;
+ uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes;
+ uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages;
+ uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes;
+ uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages;
+ uint64_t total_bytes, total_dirty_bytes;
size_t size;
conn = S2C(session);
- total_bytes = 0;
+ total_bytes = total_dirty_bytes = 0;
if (ofile == NULL)
fp = stderr;
@@ -1823,9 +1927,10 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
!F_ISSET(dhandle, WT_DHANDLE_OPEN))
continue;
- dirty_bytes = dirty_pages = intl_bytes = intl_pages = 0;
- leaf_bytes = leaf_pages = 0;
- max_dirty_bytes = max_intl_bytes = max_leaf_bytes = 0;
+ intl_bytes = intl_bytes_max = intl_dirty_bytes = 0;
+ intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0;
+ leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0;
+ leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0;
next_walk = NULL;
session->dhandle = dhandle;
@@ -1838,17 +1943,23 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
if (WT_PAGE_IS_INTERNAL(page)) {
++intl_pages;
intl_bytes += size;
- max_intl_bytes = WT_MAX(max_intl_bytes, size);
+ intl_bytes_max = WT_MAX(intl_bytes_max, size);
+ if (__wt_page_is_modified(page)) {
+ ++intl_dirty_pages;
+ intl_dirty_bytes += size;
+ intl_dirty_bytes_max =
+ WT_MAX(intl_dirty_bytes_max, size);
+ }
} else {
++leaf_pages;
leaf_bytes += size;
- max_leaf_bytes = WT_MAX(max_leaf_bytes, size);
- }
- if (__wt_page_is_modified(page)) {
- ++dirty_pages;
- dirty_bytes += size;
- max_dirty_bytes =
- WT_MAX(max_dirty_bytes, size);
+ leaf_bytes_max = WT_MAX(leaf_bytes_max, size);
+ if (__wt_page_is_modified(page)) {
+ ++leaf_dirty_pages;
+ leaf_dirty_bytes += size;
+ leaf_dirty_bytes_max =
+ WT_MAX(leaf_dirty_bytes_max, size);
+ }
}
}
session->dhandle = NULL;
@@ -1860,21 +1971,41 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
dhandle->name, dhandle->checkpoint);
if (intl_pages != 0)
(void)fprintf(fp,
- "\t" "internal pages: %" PRIu64 " pages, %" PRIu64
- " max, %" PRIu64 "MB total\n",
- intl_pages, max_intl_bytes, intl_bytes >> 20);
+ "\t" "internal: "
+ "%" PRIu64 " pages, "
+ "%" PRIu64 "MB, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
+ "%" PRIu64 "MB max page, "
+ "%" PRIu64 "MB max dirty page\n",
+ intl_pages,
+ intl_bytes >> 20,
+ intl_pages - intl_dirty_pages,
+ intl_dirty_pages,
+ (intl_bytes - intl_dirty_bytes) >> 20,
+ intl_dirty_bytes >> 20,
+ intl_bytes_max >> 20,
+ intl_dirty_bytes_max >> 20);
if (leaf_pages != 0)
(void)fprintf(fp,
- "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64
- " max, %" PRIu64 "MB total\n",
- leaf_pages, max_leaf_bytes, leaf_bytes >> 20);
- if (dirty_pages != 0)
- (void)fprintf(fp,
- "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64
- " max, %" PRIu64 "MB total\n",
- dirty_pages, max_dirty_bytes, dirty_bytes >> 20);
+ "\t" "leaf: "
+ "%" PRIu64 " pages, "
+ "%" PRIu64 "MB, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
+ "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
+ "%" PRIu64 "MB max page, "
+ "%" PRIu64 "MB max dirty page\n",
+ leaf_pages,
+ leaf_bytes >> 20,
+ leaf_pages - leaf_dirty_pages,
+ leaf_dirty_pages,
+ (leaf_bytes - leaf_dirty_bytes) >> 20,
+ leaf_dirty_bytes >> 20,
+ leaf_bytes_max >> 20,
+ leaf_dirty_bytes_max >> 20);
total_bytes += intl_bytes + leaf_bytes;
+ total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes;
}
session->dhandle = saved_dhandle;
@@ -1886,10 +2017,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
total_bytes +=
(total_bytes * (uint64_t)conn->cache->overhead_pct) / 100;
(void)fprintf(fp,
- "cache dump: total found = %" PRIu64
- "MB vs tracked inuse %" PRIu64 "MB\n",
- total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20);
+ "cache dump: "
+ "total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n"
+ "total dirty bytes = %" PRIu64 "MB\n",
+ total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20,
+ total_dirty_bytes >> 20);
(void)fprintf(fp, "==========\n");
+
if (ofile != NULL && fclose(fp) != 0)
return (EIO);
return (0);
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 305b81fe69e..d4c4e3e311a 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -10,7 +10,7 @@
static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool);
static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool);
-static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool *, bool);
+static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t *, bool);
/*
* __evict_exclusive_clear --
@@ -46,6 +46,55 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
}
/*
+ * __wt_page_release_evict --
+ * Release a reference to a page, and attempt to immediately evict it.
+ */
+int
+__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+ WT_BTREE *btree;
+ WT_DECL_RET;
+ WT_PAGE *page;
+ bool locked, too_big;
+
+ btree = S2BT(session);
+ page = ref->page;
+
+ /*
+ * Take some care with order of operations: if we release the hazard
+ * reference without first locking the page, it could be evicted in
+ * between.
+ */
+ locked = __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED);
+ if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) {
+ if (locked)
+ ref->state = WT_REF_MEM;
+ return (ret == 0 ? EBUSY : ret);
+ }
+
+ (void)__wt_atomic_addv32(&btree->evict_busy, 1);
+
+ too_big = page->memory_footprint > btree->splitmempage;
+ if ((ret = __wt_evict(session, ref, false)) == 0) {
+ if (too_big)
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
+ else
+ /*
+ * If the page isn't too big, we are evicting it because
+ * it had a chain of deleted entries that make traversal
+ * expensive.
+ */
+ WT_STAT_FAST_CONN_INCR(
+ session, cache_eviction_force_delete);
+ } else
+ WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail);
+
+ (void)__wt_atomic_subv32(&btree->evict_busy, 1);
+
+ return (ret);
+}
+
+/*
* __wt_evict --
* Evict a page.
*/
@@ -56,7 +105,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_DECL_RET;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
- bool clean_page, forced_eviction, inmem_split, tree_dead;
+ uint32_t flags;
+ bool clean_page, tree_dead;
conn = S2C(session);
@@ -64,8 +114,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session));
page = ref->page;
- forced_eviction = page->read_gen == WT_READGEN_OLDEST;
- inmem_split = false;
tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
WT_RET(__wt_verbose(session, WT_VERB_EVICT,
@@ -78,20 +126,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* to make this check for clean pages, too: while unlikely eviction
* would choose an internal page with children, it's not disallowed.
*/
- WT_ERR(__evict_review(session, ref, &inmem_split, closing));
+ WT_ERR(__evict_review(session, ref, &flags, closing));
/*
* If there was an in-memory split, the tree has been left in the state
* we want: there is nothing more to do.
*/
- if (inmem_split)
- goto done;
-
- /*
- * Update the page's modification reference, reconciliation might have
- * changed it.
- */
- mod = page->modify;
+ if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
+ return (0);
/* Count evictions of internal pages during normal operation. */
if (!closing && WT_PAGE_IS_INTERNAL(page)) {
@@ -108,12 +150,13 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
conn->cache->evict_max_page_size = page->memory_footprint;
/* Figure out whether reconciliation was done on the page */
+ mod = page->modify;
clean_page = mod == NULL || mod->rec_result == 0;
/* Update the reference and discard the page. */
if (__wt_ref_is_root(ref))
__wt_ref_out(session, ref);
- else if (tree_dead || (clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)))
+ else if ((clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)) || tree_dead)
/*
* Pages that belong to dead trees never write back to disk
* and can't support page splits.
@@ -139,14 +182,9 @@ err: if (!closing)
WT_STAT_FAST_DATA_INCR(session, cache_eviction_fail);
}
-done: if (((inmem_split && ret == 0) || (forced_eviction && ret == EBUSY)) &&
- !F_ISSET(conn->cache, WT_CACHE_WOULD_BLOCK)) {
- F_SET(conn->cache, WT_CACHE_WOULD_BLOCK);
- WT_TRET(__wt_evict_server_wake(session));
- }
-
return (ret);
}
+
/*
* __evict_delete_ref --
* Mark a page reference deleted and check if the parent can reverse
@@ -210,13 +248,6 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_DECL_RET;
/*
- * If doing normal system eviction, but only in the service of reducing
- * the number of dirty pages, leave the clean page in cache.
- */
- if (!closing && __wt_eviction_dirty_target(session))
- return (EBUSY);
-
- /*
* Discard the page and update the reference structure; if the page has
* an address, it's a disk page; if it has no address, it's a deleted
* page re-instantiated (for example, by searching) and never written.
@@ -242,6 +273,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
WT_ADDR *addr;
WT_DECL_RET;
WT_PAGE_MODIFY *mod;
+ WT_MULTI multi;
mod = ref->page->modify;
@@ -284,24 +316,15 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
* write. Take advantage of the fact we have exclusive access
* to the page and rewrite it in memory.
*/
- if (mod->mod_multi_entries == 1)
- WT_RET(__wt_split_rewrite(session, ref));
- else
+ if (mod->mod_multi_entries == 1) {
+ WT_ASSERT(session, closing == false);
+ WT_RET(__wt_split_rewrite(
+ session, ref, &mod->mod_multi[0]));
+ } else
WT_RET(__wt_split_multi(session, ref, closing));
break;
case WT_PM_REC_REPLACE: /* 1-for-1 page swap */
/*
- * If doing normal system eviction, but only in the service of
- * reducing the number of dirty pages, leave the clean page in
- * cache. Only do this when replacing a page with another one,
- * because when a page splits into multiple pages, we want to
- * push it out of cache (and read it back in, when needed), we
- * would rather have more, smaller pages than fewer large pages.
- */
- if (!closing && __wt_eviction_dirty_target(session))
- return (EBUSY);
-
- /*
* Update the parent to reference the replacement page.
*
* Publish: a barrier to ensure the structure fields are set
@@ -311,10 +334,26 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
*addr = mod->mod_replace;
mod->mod_replace.addr = NULL;
mod->mod_replace.size = 0;
-
- __wt_ref_out(session, ref);
ref->addr = addr;
- WT_PUBLISH(ref->state, WT_REF_DISK);
+
+ /*
+ * Eviction wants to keep this page if we have a disk image,
+ * re-instantiate the page in memory, else discard the page.
+ */
+ if (mod->mod_disk_image == NULL) {
+ __wt_ref_out(session, ref);
+ WT_PUBLISH(ref->state, WT_REF_DISK);
+ } else {
+ /*
+ * The split code works with WT_MULTI structures, build
+ * one for the disk image.
+ */
+ memset(&multi, 0, sizeof(multi));
+ multi.disk_image = mod->mod_disk_image;
+
+ WT_RET(__wt_split_rewrite(session, ref, &multi));
+ }
+
break;
WT_ILLEGAL_VALUE(session);
}
@@ -351,13 +390,17 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
*/
static int
__evict_review(
- WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp, bool closing)
+ WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *flagsp, bool closing)
{
+ WT_CACHE *cache;
WT_DECL_RET;
WT_PAGE *page;
uint32_t flags;
bool modified;
+ flags = WT_EVICTING;
+ *flagsp = flags;
+
/*
* Get exclusive access to the page if our caller doesn't have the tree
* locked down.
@@ -423,8 +466,9 @@ __evict_review(
WT_RET(__wt_txn_update_oldest(
session, WT_TXN_OLDEST_STRICT));
- if (!__wt_page_can_evict(session, ref, inmem_splitp))
+ if (!__wt_page_can_evict(session, ref, flagsp))
return (EBUSY);
+ flags = *flagsp;
/*
* Check for an append-only workload needing an in-memory
@@ -433,8 +477,12 @@ __evict_review(
* the page stays in memory and the tree is left in the desired
* state: avoid the usual cleanup.
*/
- if (*inmem_splitp)
+ if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
return (__wt_split_insert(session, ref));
+
+ /* We are done if reconciliation is disabled. */
+ if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE))
+ return (EBUSY);
}
/* If the page is clean, we're done and we can evict. */
@@ -447,10 +495,15 @@ __evict_review(
* If we have an exclusive lock (we're discarding the tree), assert
* there are no updates we cannot read.
*
- * Otherwise, if the page we're evicting is a leaf page marked for
- * forced eviction, set the update-restore flag, so reconciliation will
- * write blocks it can write and create a list of skipped updates for
- * blocks it cannot write. This is how forced eviction of active, huge
+ * Don't set any other flags for internal pages: they don't have update
+ * lists to be saved and restored, nor can we re-create them in memory.
+ *
+ * For leaf pages:
+ *
+ * If an in-memory configuration or the page is being forcibly evicted,
+ * set the update-restore flag, so reconciliation will write blocks it
+ * can write and create a list of skipped updates for blocks it cannot
+ * write, along with disk images. This is how eviction of active, huge
* pages works: we take a big page and reconcile it into blocks, some of
* which we write and discard, the rest of which we re-create as smaller
* in-memory pages, (restoring the updates that stopped us from writing
@@ -461,32 +514,43 @@ __evict_review(
* allowing the eviction of pages we'd otherwise have to retain in cache
* to support older readers.
*
- * Don't set the update-restore or lookaside table flags for internal
- * pages, they don't have update lists that can be saved and restored.
+ * Finally, if we don't need to do eviction at the moment, create disk
+ * images of split pages in order to re-instantiate them.
*/
- flags = WT_EVICTING;
+ cache = S2C(session)->cache;
if (closing)
LF_SET(WT_VISIBILITY_ERR);
else if (!WT_PAGE_IS_INTERNAL(page)) {
if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
- LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE);
- else if (page->read_gen == WT_READGEN_OLDEST)
- LF_SET(WT_EVICT_UPDATE_RESTORE);
- else if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK))
+ LF_SET(WT_EVICT_IN_MEMORY |
+ WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE);
+ else if (F_ISSET(cache, WT_CACHE_STUCK))
LF_SET(WT_EVICT_LOOKASIDE);
+ else if (!__wt_txn_visible_all(
+ session, page->modify->update_txn))
+ LF_SET(WT_EVICT_UPDATE_RESTORE);
+
+ /*
+ * If we aren't trying to free space in the cache, scrub the
+ * page and keep it around.
+ */
+ if (!LF_ISSET(WT_EVICT_LOOKASIDE) &&
+ FLD_ISSET(cache->state, WT_EVICT_STATE_SCRUB))
+ LF_SET(WT_EVICT_SCRUB);
}
+ *flagsp = flags;
WT_RET(__wt_reconcile(session, ref, NULL, flags));
/*
* Success: assert the page is clean or reconciliation was configured
- * for an update/restore split. If the page is clean, assert that
- * reconciliation was configured for a lookaside table, or it's not a
- * durable object (currently the lookaside table), or all page updates
- * were globally visible.
+ * for update/restore. If the page is clean, assert that reconciliation
+ * was configured for a lookaside table, or it's not a durable object
+ * (currently the lookaside table), or all page updates were globally
+ * visible.
*/
WT_ASSERT(session,
- LF_ISSET(WT_EVICT_UPDATE_RESTORE) || !__wt_page_is_modified(page));
+ !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE));
WT_ASSERT(session,
__wt_page_is_modified(page) ||
LF_ISSET(WT_EVICT_LOOKASIDE) ||
diff --git a/src/include/api.h b/src/include/api.h
index 50b2eab83b8..0a4593178dc 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -66,6 +66,8 @@
else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR)) \
ret = __wt_txn_commit((s), NULL); \
else { \
+ if (retry) \
+ WT_TRET(__wt_session_copy_values(s)); \
WT_TRET(__wt_txn_rollback((s), NULL)); \
if ((ret == 0 || ret == WT_ROLLBACK) && \
(retry)) { \
diff --git a/src/include/block.h b/src/include/block.h
index a8080c1651c..3342f9b1e5e 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -192,7 +192,7 @@ struct __wt_bm {
int (*verify_start)
(WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]);
int (*write) (WT_BM *,
- WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool);
+ WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool);
int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *);
WT_BLOCK *block; /* Underlying file */
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 9700b6f4761..817ce892952 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -250,9 +250,19 @@ struct __wt_page_modify {
* a replace address and multiple replacement blocks.
*/
union {
- WT_ADDR replace; /* Single, written replacement block */
+ struct { /* Single, written replacement block */
+ WT_ADDR replace;
+
+ /*
+ * A disk image that may or may not have been written, used to
+ * re-instantiate the page in memory.
+ */
+ void *disk_image;
+ } r;
#undef mod_replace
-#define mod_replace u1.replace
+#define mod_replace u1.r.replace
+#undef mod_disk_image
+#define mod_disk_image u1.r.disk_image
struct { /* Multiple replacement blocks */
struct __wt_multi {
@@ -266,14 +276,19 @@ struct __wt_page_modify {
} key;
/*
- * Eviction, but the block wasn't written: either an in-memory
- * configuration or unresolved updates prevented the write.
- * There may be a list of unresolved updates, there's always an
- * associated disk image.
+ * A disk image that may or may not have been written, used to
+ * re-instantiate the page in memory.
+ */
+ void *disk_image;
+
+ /*
+ * List of unresolved updates. Updates are either a WT_INSERT
+ * or a row-store leaf page entry; when creating lookaside
+ * records, there is an additional value, the committed item's
+ * transaction ID.
*
- * Saved updates are either a WT_INSERT, or a row-store leaf
- * page entry; in the case of creating lookaside records, there
- * is an additional value, the committed item's transaction ID.
+ * If there are unresolved updates, the block wasn't written and
+ * there will always be a disk image.
*/
struct __wt_save_upd {
WT_INSERT *ins;
@@ -281,10 +296,9 @@ struct __wt_page_modify {
uint64_t onpage_txn;
} *supd;
uint32_t supd_entries;
- void *disk_image;
/*
- * Block was written: address, size and checksum.
+ * Disk image was written: address, size and checksum.
* On subsequent reconciliations of this page, we avoid writing
* the block if it's unchanged by comparing size and checksum;
* the reuse flag is set when the block is unchanged and we're
diff --git a/src/include/btree.h b/src/include/btree.h
index fd921677751..432474f9dc1 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -126,12 +126,16 @@ struct __wt_btree {
u_int block_header; /* WT_PAGE_HEADER_BYTE_SIZE */
uint64_t checkpoint_gen; /* Checkpoint generation */
+ bool include_checkpoint_txn;/* ID checks include checkpoint */
uint64_t rec_max_txn; /* Maximum txn seen (clean trees) */
uint64_t write_gen; /* Write generation */
+ uint64_t bytes_inmem; /* Cache bytes in memory. */
+
WT_REF *evict_ref; /* Eviction thread's location */
uint64_t evict_priority; /* Relative priority of cached pages */
u_int evict_walk_period; /* Skip this many LRU walks */
+ u_int evict_walk_saved; /* Saved walk skips for checkpoints */
u_int evict_walk_skips; /* Number of walks skipped */
u_int evict_disabled; /* Eviction disabled count */
volatile uint32_t evict_busy; /* Count of threads in eviction */
@@ -154,11 +158,12 @@ struct __wt_btree {
#define WT_BTREE_NO_CHECKPOINT 0x00800 /* Disable checkpoints */
#define WT_BTREE_NO_EVICTION 0x01000 /* Disable eviction */
#define WT_BTREE_NO_LOGGING 0x02000 /* Disable logging */
-#define WT_BTREE_REBALANCE 0x04000 /* Handle is for rebalance */
-#define WT_BTREE_SALVAGE 0x08000 /* Handle is for salvage */
-#define WT_BTREE_SKIP_CKPT 0x10000 /* Handle skipped checkpoint */
-#define WT_BTREE_UPGRADE 0x20000 /* Handle is for upgrade */
-#define WT_BTREE_VERIFY 0x40000 /* Handle is for verify */
+#define WT_BTREE_NO_RECONCILE 0x04000 /* Allow splits, even with no evict */
+#define WT_BTREE_REBALANCE 0x08000 /* Handle is for rebalance */
+#define WT_BTREE_SALVAGE 0x10000 /* Handle is for salvage */
+#define WT_BTREE_SKIP_CKPT 0x20000 /* Handle skipped checkpoint */
+#define WT_BTREE_UPGRADE 0x40000 /* Handle is for upgrade */
+#define WT_BTREE_VERIFY 0x80000 /* Handle is for verify */
uint32_t flags;
};
diff --git a/src/include/btree.i b/src/include/btree.i
index e0102a11511..3234ad1ed41 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -55,6 +55,27 @@ __wt_btree_block_free(
}
/*
+ * __wt_btree_bytes_inuse --
+ * Return the number of bytes in use.
+ */
+static inline uint64_t
+__wt_btree_bytes_inuse(WT_SESSION_IMPL *session)
+{
+ WT_CACHE *cache;
+ uint64_t bytes_inuse;
+
+ cache = S2C(session)->cache;
+
+ /* Adjust the cache size to take allocation overhead into account. */
+ bytes_inuse = S2BT(session)->bytes_inmem;
+ if (cache->overhead_pct != 0)
+ bytes_inuse +=
+ (bytes_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+ return (bytes_inuse);
+}
+
+/*
* __wt_cache_page_inmem_incr --
* Increment a page's memory footprint in the cache.
*/
@@ -66,17 +87,17 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
cache = S2C(session)->cache;
+ (void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size);
(void)__wt_atomic_add64(&cache->bytes_inmem, size);
(void)__wt_atomic_addsize(&page->memory_footprint, size);
if (__wt_page_is_modified(page)) {
- (void)__wt_atomic_add64(&cache->bytes_dirty, size);
(void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
+ (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ?
+ &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size);
}
- /* Track internal and overflow size in cache. */
+ /* Track internal size in cache. */
if (WT_PAGE_IS_INTERNAL(page))
(void)__wt_atomic_add64(&cache->bytes_internal, size);
- else if (page->type == WT_PAGE_OVFL)
- (void)__wt_atomic_add64(&cache->bytes_overflow, size);
}
/*
@@ -144,10 +165,16 @@ __wt_cache_page_byte_dirty_decr(
WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
{
WT_CACHE *cache;
+ const char *destname;
+ uint64_t *dest;
size_t decr, orig;
int i;
cache = S2C(session)->cache;
+ dest = WT_PAGE_IS_INTERNAL(page) ?
+ &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf;
+ destname = WT_PAGE_IS_INTERNAL(page) ?
+ "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf";
/*
* We don't have exclusive access and there are ways of decrementing the
@@ -175,8 +202,8 @@ __wt_cache_page_byte_dirty_decr(
decr = WT_MIN(size, orig);
if (__wt_atomic_cassize(
&page->modify->bytes_dirty, orig, orig - decr)) {
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_dirty, decr, "WT_CACHE.bytes_dirty");
+ __wt_cache_decr_check_uint64(
+ session, dest, decr, destname);
break;
}
}
@@ -196,18 +223,17 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
WT_ASSERT(session, size < WT_EXABYTE);
__wt_cache_decr_check_uint64(
+ session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem");
+ __wt_cache_decr_check_uint64(
session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem");
__wt_cache_decr_check_size(
session, &page->memory_footprint, size, "WT_PAGE.memory_footprint");
if (__wt_page_is_modified(page))
__wt_cache_page_byte_dirty_decr(session, page, size);
- /* Track internal and overflow size in cache. */
+ /* Track internal size in cache. */
if (WT_PAGE_IS_INTERNAL(page))
__wt_cache_decr_check_uint64(session,
&cache->bytes_internal, size, "WT_CACHE.bytes_internal");
- else if (page->type == WT_PAGE_OVFL)
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_overflow, size, "WT_CACHE.bytes_overflow");
}
/*
@@ -222,14 +248,16 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
size_t size;
cache = S2C(session)->cache;
- (void)__wt_atomic_add64(&cache->pages_dirty, 1);
+ (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ?
+ &cache->pages_dirty_intl : &cache->pages_dirty_leaf, 1);
/*
* Take care to read the memory_footprint once in case we are racing
* with updates.
*/
size = page->memory_footprint;
- (void)__wt_atomic_add64(&cache->bytes_dirty, size);
+ (void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ?
+ &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size);
(void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
}
@@ -243,16 +271,19 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
WT_PAGE_MODIFY *modify;
+ uint64_t *pages_dirty;
cache = S2C(session)->cache;
+ pages_dirty = WT_PAGE_IS_INTERNAL(page) ?
+ &cache->pages_dirty_intl : &cache->pages_dirty_leaf;
- if (cache->pages_dirty < 1) {
+ if (*pages_dirty < 1) {
__wt_errx(session,
"cache eviction dirty-page decrement failed: dirty page"
"count went negative");
- cache->pages_dirty = 0;
+ *pages_dirty = 0;
} else
- (void)__wt_atomic_sub64(&cache->pages_dirty, 1);
+ (void)__wt_atomic_sub64(pages_dirty, 1);
modify = page->modify;
if (modify != NULL && modify->bytes_dirty != 0)
@@ -261,6 +292,34 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
}
/*
+ * __wt_cache_page_image_decr --
+ * Decrement a page image's size to the cache.
+ */
+static inline void
+__wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size)
+{
+ WT_CACHE *cache;
+
+ cache = S2C(session)->cache;
+
+ __wt_cache_decr_check_uint64(
+ session, &cache->bytes_image, size, "WT_CACHE.image_inmem");
+}
+
+/*
+ * __wt_cache_page_image_incr --
+ * Increment a page image's size to the cache.
+ */
+static inline void
+__wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size)
+{
+ WT_CACHE *cache;
+
+ cache = S2C(session)->cache;
+ (void)__wt_atomic_add64(&cache->bytes_image, size);
+}
+
+/*
* __wt_cache_page_evict --
* Evict pages from the cache.
*/
@@ -269,13 +328,20 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
{
WT_CACHE *cache;
WT_PAGE_MODIFY *modify;
+ uint64_t *dest;
+ const char *destname;
cache = S2C(session)->cache;
+ dest = WT_PAGE_IS_INTERNAL(page) ?
+ &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf;
+ destname = WT_PAGE_IS_INTERNAL(page) ?
+ "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf";
modify = page->modify;
/* Update the bytes in-memory to reflect the eviction. */
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_inmem,
+ __wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem,
+ page->memory_footprint, "WT_BTREE.bytes_inmem");
+ __wt_cache_decr_check_uint64(session, &cache->bytes_inmem,
page->memory_footprint, "WT_CACHE.bytes_inmem");
/* Update the bytes_internal value to reflect the eviction */
@@ -286,15 +352,14 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
/* Update the cache's dirty-byte count. */
if (modify != NULL && modify->bytes_dirty != 0) {
- if (cache->bytes_dirty < modify->bytes_dirty) {
+ if ((size_t)*dest < modify->bytes_dirty) {
__wt_errx(session,
- "cache eviction dirty-bytes decrement failed: "
- "dirty byte count went negative");
- cache->bytes_dirty = 0;
+ "%s decrement failed: "
+ "dirty byte count went negative", destname);
+ *dest = 0;
} else
- __wt_cache_decr_check_uint64(session,
- &cache->bytes_dirty,
- modify->bytes_dirty, "WT_CACHE.bytes_dirty");
+ __wt_cache_decr_check_uint64(session, dest,
+ modify->bytes_dirty, destname);
}
/* Update pages and bytes evicted. */
@@ -318,16 +383,6 @@ __wt_update_list_memsize(WT_UPDATE *upd)
}
/*
- * __wt_page_evict_soon --
- * Set a page to be evicted as soon as possible.
- */
-static inline void
-__wt_page_evict_soon(WT_PAGE *page)
-{
- page->read_gen = WT_READGEN_OLDEST;
-}
-
-/*
* __wt_page_modify_init --
* A page is about to be modified, allocate the modification structure.
*/
@@ -1099,16 +1154,14 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
* Check whether a page can be evicted.
*/
static inline bool
-__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
+__wt_page_can_evict(
+ WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *evict_flagsp)
{
WT_BTREE *btree;
WT_PAGE *page;
WT_PAGE_MODIFY *mod;
bool modified;
- if (inmem_splitp != NULL)
- *inmem_splitp = false;
-
btree = S2BT(session);
page = ref->page;
mod = page->modify;
@@ -1124,8 +1177,8 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
* won't be written or discarded from the cache.
*/
if (__wt_leaf_page_can_split(session, page)) {
- if (inmem_splitp != NULL)
- *inmem_splitp = true;
+ if (evict_flagsp != NULL)
+ FLD_SET(*evict_flagsp, WT_EVICT_INMEM_SPLIT);
return (true);
}
@@ -1164,6 +1217,10 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK))
return (false);
+ /* If the cache is stuck, try anything else. */
+ if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK))
+ return (true);
+
/*
* If the oldest transaction hasn't changed since the last time
* this page was written, it's unlikely we can make progress.
@@ -1172,7 +1229,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
* attempt to avoid repeated attempts to evict the same page.
*/
if (modified &&
- !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) &&
(mod->last_oldest_id == __wt_txn_oldest_id(session) ||
!__wt_txn_visible_all(session, mod->update_txn)))
return (false);
@@ -1181,56 +1237,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
}
/*
- * __wt_page_release_evict --
- * Release a reference to a page, and attempt to immediately evict it.
- */
-static inline int
-__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
-{
- WT_BTREE *btree;
- WT_DECL_RET;
- WT_PAGE *page;
- bool locked, too_big;
-
- btree = S2BT(session);
- page = ref->page;
-
- /*
- * Take some care with order of operations: if we release the hazard
- * reference without first locking the page, it could be evicted in
- * between.
- */
- locked = __wt_atomic_casv32(
- &ref->state, WT_REF_MEM, WT_REF_LOCKED) ? true : false;
- if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) {
- if (locked)
- ref->state = WT_REF_MEM;
- return (ret == 0 ? EBUSY : ret);
- }
-
- (void)__wt_atomic_addv32(&btree->evict_busy, 1);
-
- too_big = page->memory_footprint > btree->maxmempage;
- if ((ret = __wt_evict(session, ref, false)) == 0) {
- if (too_big)
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
- else
- /*
- * If the page isn't too big, we are evicting it because
- * it had a chain of deleted entries that make traversal
- * expensive.
- */
- WT_STAT_FAST_CONN_INCR(
- session, cache_eviction_force_delete);
- } else
- WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail);
-
- (void)__wt_atomic_subv32(&btree->evict_busy, 1);
-
- return (ret);
-}
-
-/*
* __wt_page_release --
* Release a reference to a page.
*/
diff --git a/src/include/cache.h b/src/include/cache.h
index f4a35de7201..e3a003ccc56 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -13,7 +13,6 @@
#define WT_EVICT_INT_SKEW (1<<20) /* Prefer leaf pages over internal
pages by this many increments of the
read generation. */
-#define WT_EVICT_WALK_PER_FILE 10 /* Pages to queue per file */
#define WT_EVICT_WALK_BASE 300 /* Pages tracked across file visits */
#define WT_EVICT_WALK_INCR 100 /* Pages added each walk */
@@ -24,9 +23,12 @@
struct __wt_evict_entry {
WT_BTREE *btree; /* Enclosing btree object */
WT_REF *ref; /* Page to flush/evict */
+ uint64_t score; /* Relative eviction priority */
};
-#define WT_EVICT_QUEUE_MAX 2
+#define WT_EVICT_URGENT_QUEUE 0 /* Urgent queue index */
+#define WT_EVICT_QUEUE_MAX 3 /* Urgent plus two ordinary queues */
+
/*
* WT_EVICT_QUEUE --
* Encapsulation of an eviction candidate queue.
@@ -34,6 +36,7 @@ struct __wt_evict_entry {
struct __wt_evict_queue {
WT_SPINLOCK evict_lock; /* Eviction LRU queue */
WT_EVICT_ENTRY *evict_queue; /* LRU pages being tracked */
+ WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */
uint32_t evict_candidates; /* LRU list pages to evict */
uint32_t evict_entries; /* LRU entries in the queue */
volatile uint32_t evict_max; /* LRU maximum eviction slot used */
@@ -70,16 +73,19 @@ struct __wt_cache {
* be exact, they can't be garbage, we track what comes in and what goes
* out and calculate the difference as needed.
*/
- uint64_t bytes_inmem; /* Bytes/pages in memory */
- uint64_t pages_inmem;
- uint64_t bytes_internal; /* Bytes of internal pages */
- uint64_t bytes_overflow; /* Bytes of overflow pages */
+ uint64_t bytes_dirty_intl; /* Bytes/pages currently dirty */
+ uint64_t pages_dirty_intl;
+ uint64_t bytes_dirty_leaf;
+ uint64_t pages_dirty_leaf;
uint64_t bytes_evict; /* Bytes/pages discarded by eviction */
uint64_t pages_evict;
uint64_t pages_evicted; /* Pages evicted during a pass */
- uint64_t bytes_dirty; /* Bytes/pages currently dirty */
- uint64_t pages_dirty;
+ uint64_t bytes_image; /* Bytes of disk images */
+ uint64_t bytes_inmem; /* Bytes/pages in memory */
+ uint64_t pages_inmem;
+ uint64_t bytes_internal; /* Bytes of internal pages */
uint64_t bytes_read; /* Bytes read into memory */
+ uint64_t bytes_written;
uint64_t app_waits; /* User threads waited for cache */
uint64_t app_evicts; /* Pages evicted by user threads */
@@ -121,7 +127,6 @@ struct __wt_cache {
WT_SPINLOCK evict_queue_lock; /* Eviction current queue lock */
WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX];
WT_EVICT_QUEUE *evict_current_queue;/* LRU current queue in use */
- WT_EVICT_ENTRY *evict_current; /* LRU current page to be evicted */
uint32_t evict_queue_fill; /* LRU eviction queue index to fill */
uint32_t evict_slots; /* LRU list eviction slots */
WT_DATA_HANDLE
@@ -145,10 +150,13 @@ struct __wt_cache {
/*
* Work state.
*/
-#define WT_EVICT_PASS_AGGRESSIVE 0x01
-#define WT_EVICT_PASS_ALL 0x02
-#define WT_EVICT_PASS_DIRTY 0x04
-#define WT_EVICT_PASS_WOULD_BLOCK 0x08
+#define WT_EVICT_STATE_AGGRESSIVE 0x01 /* Eviction isn't making progress:
+ try harder */
+#define WT_EVICT_STATE_CLEAN 0x02 /* Evict clean pages */
+#define WT_EVICT_STATE_DIRTY 0x04 /* Evict dirty pages */
+#define WT_EVICT_STATE_SCRUB 0x08 /* Scrub dirty pages pages */
+#define WT_EVICT_STATE_URGENT 0x10 /* Pages are in the urgent queue */
+#define WT_EVICT_STATE_ALL (WT_EVICT_STATE_CLEAN | WT_EVICT_STATE_DIRTY)
uint32_t state;
/*
* Pass interrupt counter.
@@ -162,7 +170,6 @@ struct __wt_cache {
#define WT_CACHE_POOL_RUN 0x02 /* Cache pool thread running */
#define WT_CACHE_STUCK 0x04 /* Eviction server is stuck */
#define WT_CACHE_WALK_REVERSE 0x08 /* Scan backwards for candidates */
-#define WT_CACHE_WOULD_BLOCK 0x10 /* Pages that would block apps */
uint32_t flags;
};
diff --git a/src/include/cache.i b/src/include/cache.i
index 72c8307756d..b5cb79afb3c 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -104,7 +104,7 @@ __wt_cache_dirty_inuse(WT_CACHE *cache)
{
uint64_t dirty_inuse;
- dirty_inuse = cache->bytes_dirty;
+ dirty_inuse = cache->bytes_dirty_intl + cache->bytes_dirty_leaf;
if (cache->overhead_pct != 0)
dirty_inuse +=
(dirty_inuse * (uint64_t)cache->overhead_pct) / 100;
@@ -113,6 +113,67 @@ __wt_cache_dirty_inuse(WT_CACHE *cache)
}
/*
+ * __wt_cache_dirty_leaf_inuse --
+ * Return the number of dirty bytes in use by leaf pages.
+ */
+static inline uint64_t
+__wt_cache_dirty_leaf_inuse(WT_CACHE *cache)
+{
+ uint64_t dirty_inuse;
+
+ dirty_inuse = cache->bytes_dirty_leaf;
+ if (cache->overhead_pct != 0)
+ dirty_inuse +=
+ (dirty_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+ return (dirty_inuse);
+}
+
+/*
+ * __wt_cache_bytes_image --
+ * Return the number of page image bytes in use.
+ */
+static inline uint64_t
+__wt_cache_bytes_image(WT_CACHE *cache)
+{
+ uint64_t bytes_image;
+
+ bytes_image = cache->bytes_image;
+ if (cache->overhead_pct != 0)
+ bytes_image +=
+ (bytes_image * (uint64_t)cache->overhead_pct) / 100;
+
+ return (bytes_image);
+}
+
+/*
+ * __wt_cache_bytes_other --
+ * Return the number of bytes in use not for page images.
+ */
+static inline uint64_t
+__wt_cache_bytes_other(WT_CACHE *cache)
+{
+ uint64_t bytes_image, bytes_inmem, bytes_other;
+
+ bytes_image = cache->bytes_image;
+ bytes_inmem = cache->bytes_inmem;
+
+ /*
+ * The reads above could race with changes to the values, so protect
+ * against underflow.
+ */
+ if (bytes_image > bytes_inmem)
+ return (0);
+
+ bytes_other = bytes_inmem - bytes_image;
+ if (cache->overhead_pct != 0)
+ bytes_other +=
+ (bytes_other * (uint64_t)cache->overhead_pct) / 100;
+
+ return (bytes_other);
+}
+
+/*
* __wt_session_can_wait --
* Return if a session available for a potentially slow operation.
*/
@@ -139,20 +200,9 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
}
/*
- * __wt_eviction_dirty_target --
- * Return if the eviction server is running to reduce the number of dirty
- * pages (versus running to discard pages from the cache).
- */
-static inline bool
-__wt_eviction_dirty_target(WT_SESSION_IMPL *session)
-{
- return (FLD_ISSET(S2C(session)->cache->state, WT_EVICT_PASS_DIRTY));
-}
-
-/*
* __wt_eviction_needed --
* Return if an application thread should do eviction, and the cache full
- * percentage as a side-effect.
+ * percentage as a side-effect.
*/
static inline bool
__wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
@@ -186,22 +236,21 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
pct_full = (u_int)((100 * bytes_inuse) / bytes_max);
if (pct_fullp != NULL)
*pct_fullp = pct_full;
- /*
- * If the connection is closing we do not need eviction from an
- * application thread. The eviction subsystem is already closed.
- * We return here because some callers depend on the percent full
- * having been filled in.
- */
- if (F_ISSET(conn, WT_CONN_CLOSING))
- return (false);
if (pct_full > cache->eviction_trigger)
return (true);
- /* Return if there are too many dirty bytes in cache. */
- if (__wt_cache_dirty_inuse(cache) >
+ /*
+ * Check if there are too many dirty bytes in cache.
+ *
+ * We try to avoid penalizing read-only operations by only checking the
+ * dirty limit once a transaction ID has been allocated, or if the last
+ * transaction did an update.
+ */
+ if (__wt_cache_dirty_leaf_inuse(cache) >
(cache->eviction_dirty_trigger * bytes_max) / 100)
return (true);
+
return (false);
}
diff --git a/src/include/connection.h b/src/include/connection.h
index 0e0c357279a..a9855e42980 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -271,7 +271,6 @@ struct __wt_connection_impl {
wt_thread_t ckpt_tid; /* Checkpoint thread */
bool ckpt_tid_set; /* Checkpoint thread set */
WT_CONDVAR *ckpt_cond; /* Checkpoint wait mutex */
- const char *ckpt_config; /* Checkpoint configuration */
#define WT_CKPT_LOGSIZE(conn) ((conn)->ckpt_logsize != 0)
wt_off_t ckpt_logsize; /* Checkpoint log size period */
uint32_t ckpt_signalled;/* Checkpoint signalled */
@@ -314,6 +313,7 @@ struct __wt_connection_impl {
uint32_t evict_workers; /* Number of eviction workers */
WT_EVICT_WORKER *evict_workctx; /* Eviction worker context */
+#define WT_STATLOG_FILENAME "WiredTigerStat.%d.%H"
WT_SESSION_IMPL *stat_session; /* Statistics log session */
wt_thread_t stat_tid; /* Statistics log thread */
bool stat_tid_set; /* Statistics log thread set */
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 6357523a03f..dce24f20844 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -73,6 +73,9 @@ struct __wt_cursor_backup {
WT_CURSOR_BACKUP_ENTRY *list; /* List of files to be copied. */
size_t list_allocated;
size_t list_next;
+
+#define WT_CURBACKUP_LOCKER 0x01 /* Hot-backup started */
+ uint8_t flags;
};
#define WT_CURSOR_BACKUP_ID(cursor) (((WT_CURSOR_BACKUP *)cursor)->maxid)
@@ -413,7 +416,9 @@ struct __wt_cursor_log {
uint32_t step_count; /* Intra-record count */
uint32_t rectype; /* Record type */
uint64_t txnid; /* Record txnid */
- uint32_t flags;
+
+#define WT_CURLOG_ARCHIVE_LOCK 0x01 /* Archive lock held */
+ uint8_t flags;
};
struct __wt_cursor_metadata {
@@ -424,7 +429,7 @@ struct __wt_cursor_metadata {
#define WT_MDC_CREATEONLY 0x01
#define WT_MDC_ONMETADATA 0x02
#define WT_MDC_POSITIONED 0x04
- uint32_t flags;
+ uint8_t flags;
};
struct __wt_join_stats_group {
diff --git a/src/include/cursor.i b/src/include/cursor.i
index 553dd03f958..76a08138afb 100644
--- a/src/include/cursor.i
+++ b/src/include/cursor.i
@@ -38,9 +38,6 @@ __cursor_pos_clear(WT_CURSOR_BTREE *cbt)
cbt->ins_head = NULL;
cbt->ins_stack[0] = NULL;
- cbt->cip_saved = NULL;
- cbt->rip_saved = NULL;
-
F_CLR(cbt, WT_CBT_POSITION_MASK);
}
@@ -120,7 +117,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt)
*/
if (cbt->ref != NULL &&
cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD)
- __wt_page_evict_soon(cbt->ref->page);
+ WT_TRET(__wt_page_evict_soon(session, cbt->ref));
cbt->page_deleted_count = 0;
/*
@@ -130,7 +127,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt)
*
* Clear the reference regardless, so we don't try the release twice.
*/
- ret = __wt_page_release(session, cbt->ref, 0);
+ WT_TRET(__wt_page_release(session, cbt->ref, 0));
cbt->ref = NULL;
return (ret);
diff --git a/src/include/extern.h b/src/include/extern.h
index b0c0f6eccad..f3a639ac07f 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -44,7 +44,7 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep);
extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie);
extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp);
-extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename);
+extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable);
extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize);
extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp);
@@ -76,8 +76,8 @@ extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, con
extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len);
extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size);
extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep);
-extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum);
-extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked);
+extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io);
+extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool checkpoint_io, bool caller_locked);
extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp);
extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp);
extern int __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key);
@@ -139,7 +139,7 @@ extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on);
extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session);
extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);
extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size);
-extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed);
+extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool checkpoint_io, bool compressed);
extern const char *__wt_page_type_string(u_int type);
extern const char *__wt_cell_type_string(uint8_t type);
extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf);
@@ -161,11 +161,11 @@ extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPD
extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]);
extern void __wt_split_stash_discard(WT_SESSION_IMPL *session);
extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session);
-extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp);
+extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing);
extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing);
extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref);
-extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref);
+extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi);
extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst);
extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op);
extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
@@ -282,7 +282,6 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const
extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp);
extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp);
extern int __wt_curfile_update_check(WT_CURSOR *cursor);
-extern int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp);
extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
extern int __wt_curjoin_joined(WT_CURSOR *cursor);
@@ -346,9 +345,11 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session);
extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session);
extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full);
+extern int __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref);
extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile);
+extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref);
extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing);
extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start);
@@ -485,8 +486,7 @@ extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **va
extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value);
extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path);
extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path);
-extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to);
+extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable);
extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to);
extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
@@ -500,7 +500,7 @@ extern int __wt_errno(void);
extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error);
extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp);
+extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp);
extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
extern int __wt_close_connection_close(WT_SESSION_IMPL *session);
extern int __wt_os_inmemory(WT_SESSION_IMPL *session);
@@ -585,7 +585,7 @@ extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const ch
extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str);
extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len);
extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags);
-extern int __wt_session_notsup(WT_SESSION *wt_session);
+extern int __wt_session_notsup(WT_SESSION_IMPL *session);
extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers);
extern int __wt_session_copy_values(WT_SESSION_IMPL *session);
extern int __wt_session_release_resources(WT_SESSION_IMPL *session);
@@ -719,7 +719,6 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session);
-extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len);
extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
diff --git a/src/include/flags.h b/src/include/flags.h
index f134af69d29..9346605ed24 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -2,6 +2,7 @@
* DO NOT EDIT: automatically built by dist/flags.py.
* flags section: BEGIN
*/
+#define WT_CHECKPOINTING 0x00000001
#define WT_CONN_CACHE_POOL 0x00000001
#define WT_CONN_CKPT_SYNC 0x00000002
#define WT_CONN_CLOSING 0x00000004
@@ -21,10 +22,12 @@
#define WT_CONN_SERVER_STATISTICS 0x00010000
#define WT_CONN_SERVER_SWEEP 0x00020000
#define WT_CONN_WAS_BACKUP 0x00040000
-#define WT_EVICTING 0x00000001
-#define WT_EVICT_IN_MEMORY 0x00000002
-#define WT_EVICT_LOOKASIDE 0x00000004
-#define WT_EVICT_UPDATE_RESTORE 0x00000008
+#define WT_EVICTING 0x00000002
+#define WT_EVICT_INMEM_SPLIT 0x00000004
+#define WT_EVICT_IN_MEMORY 0x00000008
+#define WT_EVICT_LOOKASIDE 0x00000010
+#define WT_EVICT_SCRUB 0x00000020
+#define WT_EVICT_UPDATE_RESTORE 0x00000040
#define WT_LOGSCAN_FIRST 0x00000001
#define WT_LOGSCAN_FROM_CKP 0x00000002
#define WT_LOGSCAN_ONE 0x00000004
@@ -100,7 +103,7 @@
#define WT_VERB_VERIFY 0x00800000
#define WT_VERB_VERSION 0x01000000
#define WT_VERB_WRITE 0x02000000
-#define WT_VISIBILITY_ERR 0x00000010
+#define WT_VISIBILITY_ERR 0x00000080
/*
* flags section: END
* DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/include/hardware.h b/src/include/hardware.h
index 93ed8a868b6..0e52818ae05 100644
--- a/src/include/hardware.h
+++ b/src/include/hardware.h
@@ -45,7 +45,16 @@
&(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask))); \
} while (0)
-#define WT_CACHE_LINE_ALIGNMENT 64 /* Cache line alignment */
+/*
+ * Cache line alignment.
+ */
+#if defined(__PPC64__) || defined(PPC64)
+#define WT_CACHE_LINE_ALIGNMENT 128
+#elif defined(__s390x__)
+#define WT_CACHE_LINE_ALIGNMENT 256
+#else
+#define WT_CACHE_LINE_ALIGNMENT 64
+#endif
#define WT_CACHE_LINE_ALIGNMENT_VERIFY(session, a) \
WT_ASSERT(session, \
WT_PTRDIFF(&(a)[1], &(a)[0]) >= WT_CACHE_LINE_ALIGNMENT && \
diff --git a/src/include/intpack.i b/src/include/intpack.i
index b27afd24e6c..e8bea58cede 100644
--- a/src/include/intpack.i
+++ b/src/include/intpack.i
@@ -59,7 +59,7 @@
/* Count the leading zero bytes. */
#if defined(__GNUC__)
#define WT_LEADING_ZEROS(x, i) \
- (i = (x == 0) ? (int)sizeof (x) : __builtin_clzll(x) >> 3)
+ (i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
#elif defined(_MSC_VER)
#define WT_LEADING_ZEROS(x, i) do { \
if (x == 0) i = (int)sizeof(x); \
@@ -89,7 +89,7 @@ __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
int len, lz, shift;
WT_LEADING_ZEROS(x, lz);
- len = (int)sizeof (x) - lz;
+ len = (int)sizeof(x) - lz;
WT_SIZE_CHECK_PACK(len + 1, maxlen);
p = *pp;
@@ -114,7 +114,7 @@ __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
int len, lz, shift;
WT_LEADING_ZEROS(~x, lz);
- len = (int)sizeof (x) - lz;
+ len = (int)sizeof(x) - lz;
WT_SIZE_CHECK_PACK(len + 1, maxlen);
p = *pp;
@@ -170,7 +170,7 @@ __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
/* There are four length bits in the first byte. */
p = *pp;
- len = (int)sizeof (x) - (*p++ & 0xf);
+ len = (int)sizeof(x) - (*p++ & 0xf);
WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
for (x = UINT64_MAX; len != 0; --len)
diff --git a/src/include/os_fhandle.i b/src/include/os_fhandle.i
index 313bf8eca3f..9bf5ce0e60b 100644
--- a/src/include/os_fhandle.i
+++ b/src/include/os_fhandle.i
@@ -26,7 +26,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
* There is no way to check when the non-blocking sync-file-range is
* complete, but we track the time taken in the call for completeness.
*/
- WT_STAT_FAST_CONN_INCR_ATOMIC(session, fsync_active);
+ WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_fsync_active);
WT_STAT_FAST_CONN_INCR(session, fsync_io);
if (block)
ret = (handle->fh_sync == NULL ? 0 :
@@ -34,7 +34,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
else
ret = (handle->fh_sync_nowait == NULL ? 0 :
handle->fh_sync_nowait(handle, (WT_SESSION *)session));
- WT_STAT_FAST_CONN_DECR_ATOMIC(session, fsync_active);
+ WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_fsync_active);
return (ret);
}
@@ -107,13 +107,13 @@ __wt_read(
"%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
fh->handle->name, len, (uintmax_t)offset));
- WT_STAT_FAST_CONN_INCR_ATOMIC(session, read_active);
+ WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_read_active);
WT_STAT_FAST_CONN_INCR(session, read_io);
ret = fh->handle->fh_read(
fh->handle, (WT_SESSION *)session, offset, len, buf);
- WT_STAT_FAST_CONN_DECR_ATOMIC(session, read_active);
+ WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_read_active);
return (ret);
}
@@ -165,12 +165,12 @@ __wt_write(WT_SESSION_IMPL *session,
"%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
fh->handle->name, len, (uintmax_t)offset));
- WT_STAT_FAST_CONN_INCR_ATOMIC(session, write_active);
+ WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_write_active);
WT_STAT_FAST_CONN_INCR(session, write_io);
ret = fh->handle->fh_write(
fh->handle, (WT_SESSION *)session, offset, len, buf);
- WT_STAT_FAST_CONN_DECR_ATOMIC(session, write_active);
+ WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_write_active);
return (ret);
}
diff --git a/src/include/os_fs.i b/src/include/os_fs.i
index 88ee71d953a..a3a2fe29b65 100644
--- a/src/include/os_fs.i
+++ b/src/include/os_fs.i
@@ -8,7 +8,7 @@
/*
* __wt_fs_directory_list --
- * Get a list of files from a directory.
+ * Return a list of files from a directory.
*/
static inline int
__wt_fs_directory_list(WT_SESSION_IMPL *session,
@@ -61,61 +61,6 @@ __wt_fs_directory_list_free(
}
/*
- * __wt_fs_directory_sync --
- * Flush a directory to ensure file creation is durable.
- */
-static inline int
-__wt_fs_directory_sync(WT_SESSION_IMPL *session, const char *name)
-{
- WT_DECL_RET;
- WT_FILE_SYSTEM *file_system;
- WT_SESSION *wt_session;
- char *copy, *dir;
-
- WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
- WT_RET(__wt_verbose(
- session, WT_VERB_FILEOPS, "%s: directory-sync", name));
-
- /*
- * POSIX 1003.1 does not require that fsync of a file handle ensures the
- * entry in the directory containing the file has also reached disk (and
- * there are historic Linux filesystems requiring it). If the underlying
- * filesystem method is set, do an explicit fsync on a file descriptor
- * for the directory to be sure.
- *
- * directory-sync is not a required call, no method means the call isn't
- * needed.
- */
- file_system = S2C(session)->file_system;
- if (file_system->fs_directory_sync == NULL)
- return (0);
-
- copy = NULL;
- if (name == NULL || strchr(name, '/') == NULL)
- name = S2C(session)->home;
- else {
- /*
- * File name construction should not return a path without any
- * slash separator, but caution isn't unreasonable.
- */
- WT_RET(__wt_filename(session, name, &copy));
- if ((dir = strrchr(copy, '/')) == NULL)
- name = S2C(session)->home;
- else {
- dir[1] = '\0';
- name = copy;
- }
- }
-
- wt_session = (WT_SESSION *)session;
- ret = file_system->fs_directory_sync(file_system, wt_session, name);
-
- __wt_free(session, copy);
- return (ret);
-}
-
-/*
* __wt_fs_exist --
* Return if the file exists.
*/
@@ -141,10 +86,10 @@ __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
/*
* __wt_fs_remove --
- * POSIX remove.
+ * Remove the file.
*/
static inline int
-__wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
+__wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable)
{
WT_DECL_RET;
WT_FILE_SYSTEM *file_system;
@@ -169,7 +114,8 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
file_system = S2C(session)->file_system;
wt_session = (WT_SESSION *)session;
- ret = file_system->fs_remove(file_system, wt_session, path);
+ ret = file_system->fs_remove(
+ file_system, wt_session, path, durable ? WT_FS_DURABLE : 0);
__wt_free(session, path);
return (ret);
@@ -177,10 +123,11 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
/*
* __wt_fs_rename --
- * POSIX rename.
+ * Rename the file.
*/
static inline int
-__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+__wt_fs_rename(
+ WT_SESSION_IMPL *session, const char *from, const char *to, bool durable)
{
WT_DECL_RET;
WT_FILE_SYSTEM *file_system;
@@ -211,8 +158,8 @@ __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
file_system = S2C(session)->file_system;
wt_session = (WT_SESSION *)session;
- ret = file_system->fs_rename(
- file_system, wt_session, from_path, to_path);
+ ret = file_system->fs_rename(file_system,
+ wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0);
err: __wt_free(session, from_path);
__wt_free(session, to_path);
@@ -221,7 +168,7 @@ err: __wt_free(session, from_path);
/*
* __wt_fs_size --
- * Get the size of a file in bytes, by file name.
+ * Return the size of a file in bytes, by file name.
*/
static inline int
__wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
diff --git a/src/include/os_fstream.i b/src/include/os_fstream.i
index 8c0fdadbdb0..92274431011 100644
--- a/src/include/os_fstream.i
+++ b/src/include/os_fstream.i
@@ -93,5 +93,5 @@ __wt_sync_and_rename(WT_SESSION_IMPL *session,
WT_TRET(__wt_fclose(session, &fstr));
WT_RET(ret);
- return (__wt_rename_and_sync_directory(session, from, to));
+ return (__wt_fs_rename(session, from, to, true));
}
diff --git a/src/include/queue.h b/src/include/queue.h
index 1d494875cf6..e3d4daf0f4c 100644
--- a/src/include/queue.h
+++ b/src/include/queue.h
@@ -1,4 +1,4 @@
-/*
+/*-
* Copyright (c) 1991, 1993
* The Regents of the University of California. All rights reserved.
*
@@ -27,28 +27,18 @@
* SUCH DAMAGE.
*
* @(#)queue.h 8.5 (Berkeley) 8/20/94
- * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $
+ * $FreeBSD$
*/
-#ifndef _DB_QUEUE_H_
-#define _DB_QUEUE_H_
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
/*
+ * This is a stripped-down version of the FreeBSD sys/queue.h include file.
+ *
* WiredTiger only uses the TAILQ macros (we've gotten into trouble in the past
* by trying to use simpler queues and subsequently discovering a list we didn't
* think would ever get to be large could, under some workloads, become large,
* and the linear performance for removal of elements from simpler macros proved
* to be more trouble than the memory savings were worth.
*
- * Additionally, we've altered the TAILQ_INSERT_XXX functions to include a write
- * barrier, in order to ensure we never insert a partially built structure onto
- * a list (this is required because the spinlocks we use don't necessarily imply
- * a write barrier).
- *
* We #undef all of the macros because there are incompatible versions of this
* file and these macros on various systems. What makes the problem worse is
* they are included and/or defined by system include files which we may have
@@ -57,13 +47,28 @@ extern "C" {
* several of the LIST_XXX macros. Visual C.NET 7.0 also defines some of these
* same macros in Vc7\PlatformSDK\Include\WinNT.h. Make sure we use ours.
*/
-
+#undef QMD_SAVELINK
+#undef QMD_TAILQ_CHECK_HEAD
+#undef QMD_TAILQ_CHECK_NEXT
+#undef QMD_TAILQ_CHECK_PREV
+#undef QMD_TAILQ_CHECK_TAIL
+#undef QMD_TRACE_ELEM
+#undef QMD_TRACE_HEAD
+#undef QUEUE_TYPEOF
+#undef TAILQ_CLASS_ENTRY
+#undef TAILQ_CLASS_HEAD
#undef TAILQ_CONCAT
#undef TAILQ_EMPTY
#undef TAILQ_ENTRY
#undef TAILQ_FIRST
#undef TAILQ_FOREACH
+#undef TAILQ_FOREACH_FROM
+#undef TAILQ_FOREACH_FROM_SAFE
#undef TAILQ_FOREACH_REVERSE
+#undef TAILQ_FOREACH_REVERSE_FROM
+#undef TAILQ_FOREACH_REVERSE_FROM_SAFE
+#undef TAILQ_FOREACH_REVERSE_SAFE
+#undef TAILQ_FOREACH_SAFE
#undef TAILQ_HEAD
#undef TAILQ_HEAD_INITIALIZER
#undef TAILQ_INIT
@@ -76,41 +81,25 @@ extern "C" {
#undef TAILQ_PREV
#undef TAILQ_REMOVE
#undef TRACEBUF
+#undef TRACEBUF_INITIALIZER
#undef TRASHIT
+#undef TAILQ_SWAP
-#define QUEUE_MACRO_DEBUG 0
-#if QUEUE_MACRO_DEBUG
-/* Store the last 2 places the queue element or head was altered */
-struct qm_trace {
- char * lastfile;
- int lastline;
- char * prevfile;
- int prevline;
-};
-
-#define TRACEBUF struct qm_trace trace;
-#define TRASHIT(x) do {(x) = (void *)-1;} while (0)
-
-#define QMD_TRACE_HEAD(head) do { \
- (head)->trace.prevline = (head)->trace.lastline; \
- (head)->trace.prevfile = (head)->trace.lastfile; \
- (head)->trace.lastline = __LINE__; \
- (head)->trace.lastfile = __FILE__; \
-} while (0)
-
-#define QMD_TRACE_ELEM(elem) do { \
- (elem)->trace.prevline = (elem)->trace.lastline; \
- (elem)->trace.prevfile = (elem)->trace.lastfile; \
- (elem)->trace.lastline = __LINE__; \
- (elem)->trace.lastfile = __FILE__; \
-} while (0)
-
-#else
#define QMD_TRACE_ELEM(elem)
#define QMD_TRACE_HEAD(head)
+#define QMD_SAVELINK(name, link)
#define TRACEBUF
+#define TRACEBUF_INITIALIZER
#define TRASHIT(x)
-#endif /* QUEUE_MACRO_DEBUG */
+
+#ifdef __cplusplus
+/*
+ * In C++ there can be structure lists and class lists:
+ */
+#define QUEUE_TYPEOF(type) type
+#else
+#define QUEUE_TYPEOF(type) struct type
+#endif
/*
* Tail queue declarations.
@@ -122,8 +111,15 @@ struct name { \
TRACEBUF \
}
+#define TAILQ_CLASS_HEAD(name, type) \
+struct name { \
+ class type *tqh_first; /* first element */ \
+ class type **tqh_last; /* addr of last next element */ \
+ TRACEBUF \
+}
+
#define TAILQ_HEAD_INITIALIZER(head) \
- { NULL, &(head).tqh_first }
+ { NULL, &(head).tqh_first, TRACEBUF_INITIALIZER }
#define TAILQ_ENTRY(type) \
struct { \
@@ -132,16 +128,28 @@ struct { \
TRACEBUF \
}
+#define TAILQ_CLASS_ENTRY(type) \
+struct { \
+ class type *tqe_next; /* next element */ \
+ class type **tqe_prev; /* address of previous next element */ \
+ TRACEBUF \
+}
+
/*
* Tail queue functions.
*/
+#define QMD_TAILQ_CHECK_HEAD(head, field)
+#define QMD_TAILQ_CHECK_TAIL(head, headname)
+#define QMD_TAILQ_CHECK_NEXT(elm, field)
+#define QMD_TAILQ_CHECK_PREV(elm, field)
+
#define TAILQ_CONCAT(head1, head2, field) do { \
if (!TAILQ_EMPTY(head2)) { \
*(head1)->tqh_last = (head2)->tqh_first; \
(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last; \
(head1)->tqh_last = (head2)->tqh_last; \
TAILQ_INIT((head2)); \
- QMD_TRACE_HEAD(head); \
+ QMD_TRACE_HEAD(head1); \
QMD_TRACE_HEAD(head2); \
} \
} while (0)
@@ -155,11 +163,41 @@ struct { \
(var); \
(var) = TAILQ_NEXT((var), field))
+#define TAILQ_FOREACH_FROM(var, head, field) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var); \
+ (var) = TAILQ_NEXT((var), field))
+
+#define TAILQ_FOREACH_SAFE(var, head, field, tvar) \
+ for ((var) = TAILQ_FIRST((head)); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_FIRST((head))); \
+ (var) && ((tvar) = TAILQ_NEXT((var), field), 1); \
+ (var) = (tvar))
+
#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
for ((var) = TAILQ_LAST((head), headname); \
(var); \
(var) = TAILQ_PREV((var), headname, field))
+#define TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var); \
+ (var) = TAILQ_PREV((var), headname, field))
+
+#define TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar) \
+ for ((var) = TAILQ_LAST((head), headname); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
+#define TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+ for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname)); \
+ (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1); \
+ (var) = (tvar))
+
#define TAILQ_INIT(head) do { \
TAILQ_FIRST((head)) = NULL; \
(head)->tqh_last = &TAILQ_FIRST((head)); \
@@ -167,9 +205,9 @@ struct { \
} while (0)
#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
- WT_WRITE_BARRIER(); \
+ QMD_TAILQ_CHECK_NEXT(listelm, field); \
if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\
- TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
&TAILQ_NEXT((elm), field); \
else { \
(head)->tqh_last = &TAILQ_NEXT((elm), field); \
@@ -178,21 +216,21 @@ struct { \
TAILQ_NEXT((listelm), field) = (elm); \
(elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field); \
QMD_TRACE_ELEM(&(elm)->field); \
- QMD_TRACE_ELEM(&listelm->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
} while (0)
#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
- WT_WRITE_BARRIER(); \
+ QMD_TAILQ_CHECK_PREV(listelm, field); \
(elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
TAILQ_NEXT((elm), field) = (listelm); \
*(listelm)->field.tqe_prev = (elm); \
(listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field); \
QMD_TRACE_ELEM(&(elm)->field); \
- QMD_TRACE_ELEM(&listelm->field); \
+ QMD_TRACE_ELEM(&(listelm)->field); \
} while (0)
#define TAILQ_INSERT_HEAD(head, elm, field) do { \
- WT_WRITE_BARRIER(); \
+ QMD_TAILQ_CHECK_HEAD(head, field); \
if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL) \
TAILQ_FIRST((head))->field.tqe_prev = \
&TAILQ_NEXT((elm), field); \
@@ -205,7 +243,7 @@ struct { \
} while (0)
#define TAILQ_INSERT_TAIL(head, elm, field) do { \
- WT_WRITE_BARRIER(); \
+ QMD_TAILQ_CHECK_TAIL(head, field); \
TAILQ_NEXT((elm), field) = NULL; \
(elm)->field.tqe_prev = (head)->tqh_last; \
*(head)->tqh_last = (elm); \
@@ -223,20 +261,36 @@ struct { \
(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
#define TAILQ_REMOVE(head, elm, field) do { \
+ QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \
+ QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \
+ QMD_TAILQ_CHECK_NEXT(elm, field); \
+ QMD_TAILQ_CHECK_PREV(elm, field); \
if ((TAILQ_NEXT((elm), field)) != NULL) \
- TAILQ_NEXT((elm), field)->field.tqe_prev = \
+ TAILQ_NEXT((elm), field)->field.tqe_prev = \
(elm)->field.tqe_prev; \
else { \
(head)->tqh_last = (elm)->field.tqe_prev; \
QMD_TRACE_HEAD(head); \
} \
*(elm)->field.tqe_prev = TAILQ_NEXT((elm), field); \
- TRASHIT((elm)->field.tqe_next); \
- TRASHIT((elm)->field.tqe_prev); \
+ TRASHIT(*oldnext); \
+ TRASHIT(*oldprev); \
QMD_TRACE_ELEM(&(elm)->field); \
} while (0)
-#if defined(__cplusplus)
-}
-#endif
-#endif /* !_DB_QUEUE_H_ */
+#define TAILQ_SWAP(head1, head2, type, field) do { \
+ QUEUE_TYPEOF(type) *swap_first = (head1)->tqh_first; \
+ QUEUE_TYPEOF(type) **swap_last = (head1)->tqh_last; \
+ (head1)->tqh_first = (head2)->tqh_first; \
+ (head1)->tqh_last = (head2)->tqh_last; \
+ (head2)->tqh_first = swap_first; \
+ (head2)->tqh_last = swap_last; \
+ if ((swap_first = (head1)->tqh_first) != NULL) \
+ swap_first->field.tqe_prev = &(head1)->tqh_first; \
+ else \
+ (head1)->tqh_last = &(head1)->tqh_first; \
+ if ((swap_first = (head2)->tqh_first) != NULL) \
+ swap_first->field.tqe_prev = &(head2)->tqh_first; \
+ else \
+ (head2)->tqh_last = &(head2)->tqh_first; \
+} while (0)
diff --git a/src/include/stat.h b/src/include/stat.h
index 57126af8aa4..1df24382236 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -145,14 +145,14 @@ __wt_stats_clear(void *stats_arg, int slot)
#define WT_STAT_DECRV(session, stats, fld, value) \
(stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value)
#define WT_STAT_DECRV_ATOMIC(session, stats, fld, value) \
- __wt_atomic_addi64( \
+ __wt_atomic_subi64( \
&(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value))
#define WT_STAT_DECR(session, stats, fld) \
WT_STAT_DECRV(session, stats, fld, 1)
#define WT_STAT_INCRV(session, stats, fld, value) \
(stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value)
#define WT_STAT_INCRV_ATOMIC(session, stats, fld, value) \
- __wt_atomic_subi64( \
+ __wt_atomic_addi64( \
&(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value))
#define WT_STAT_INCR(session, stats, fld) \
WT_STAT_INCRV(session, stats, fld, 1)
@@ -273,9 +273,12 @@ struct __wt_connection_stats {
int64_t block_write;
int64_t block_byte_read;
int64_t block_byte_write;
+ int64_t block_byte_write_checkpoint;
int64_t block_map_read;
int64_t block_byte_map_read;
+ int64_t cache_bytes_image;
int64_t cache_bytes_inuse;
+ int64_t cache_bytes_other;
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
@@ -309,6 +312,8 @@ struct __wt_connection_stats {
int64_t cache_eviction_maximum_page_size;
int64_t cache_eviction_dirty;
int64_t cache_eviction_app_dirty;
+ int64_t cache_read_overflow;
+ int64_t cache_overflow_value;
int64_t cache_eviction_deepen;
int64_t cache_write_lookaside;
int64_t cache_pages_inuse;
@@ -316,6 +321,7 @@ struct __wt_connection_stats {
int64_t cache_eviction_force_delete;
int64_t cache_eviction_app;
int64_t cache_eviction_pages_queued;
+ int64_t cache_eviction_pages_queued_urgent;
int64_t cache_eviction_pages_queued_oldest;
int64_t cache_read;
int64_t cache_read_lookaside;
@@ -328,7 +334,6 @@ struct __wt_connection_stats {
int64_t cache_overhead;
int64_t cache_bytes_internal;
int64_t cache_bytes_leaf;
- int64_t cache_bytes_overflow;
int64_t cache_bytes_dirty;
int64_t cache_pages_dirty;
int64_t cache_eviction_clean;
@@ -408,9 +413,25 @@ struct __wt_connection_stats {
int64_t rec_split_stashed_objects;
int64_t session_cursor_open;
int64_t session_open;
- int64_t fsync_active;
- int64_t read_active;
- int64_t write_active;
+ int64_t session_table_compact_fail;
+ int64_t session_table_compact_success;
+ int64_t session_table_create_fail;
+ int64_t session_table_create_success;
+ int64_t session_table_drop_fail;
+ int64_t session_table_drop_success;
+ int64_t session_table_rebalance_fail;
+ int64_t session_table_rebalance_success;
+ int64_t session_table_rename_fail;
+ int64_t session_table_rename_success;
+ int64_t session_table_salvage_fail;
+ int64_t session_table_salvage_success;
+ int64_t session_table_truncate_fail;
+ int64_t session_table_truncate_success;
+ int64_t session_table_verify_fail;
+ int64_t session_table_verify_success;
+ int64_t thread_fsync_active;
+ int64_t thread_read_active;
+ int64_t thread_write_active;
int64_t page_busy_blocked;
int64_t page_forcible_evict_blocked;
int64_t page_locked_blocked;
@@ -424,13 +445,13 @@ struct __wt_connection_stats {
int64_t txn_checkpoint_time_max;
int64_t txn_checkpoint_time_min;
int64_t txn_checkpoint_time_recent;
+ int64_t txn_checkpoint_scrub_target;
+ int64_t txn_checkpoint_scrub_time;
int64_t txn_checkpoint_time_total;
int64_t txn_checkpoint;
int64_t txn_fail_cache;
int64_t txn_checkpoint_fsync_post;
- int64_t txn_checkpoint_fsync_pre;
int64_t txn_checkpoint_fsync_post_duration;
- int64_t txn_checkpoint_fsync_pre_duration;
int64_t txn_pinned_range;
int64_t txn_pinned_checkpoint_range;
int64_t txn_pinned_snapshot_range;
@@ -484,6 +505,7 @@ struct __wt_dsrc_stats {
int64_t btree_compact_rewrite;
int64_t btree_row_internal;
int64_t btree_row_leaf;
+ int64_t cache_bytes_inuse;
int64_t cache_bytes_read;
int64_t cache_bytes_write;
int64_t cache_eviction_checkpoint;
diff --git a/src/include/txn.h b/src/include/txn.h
index d10738cc670..2e41ae8620d 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -98,6 +98,7 @@ struct __wt_txn_global {
volatile uint32_t checkpoint_id; /* Checkpoint's session ID */
volatile uint64_t checkpoint_gen;
volatile uint64_t checkpoint_pinned;
+ volatile uint64_t checkpoint_txnid; /* Checkpoint's txn ID */
/* Named snapshot state. */
WT_RWLOCK *nsnap_rwlock;
diff --git a/src/include/txn.i b/src/include/txn.i
index 96f7426e421..8f0f49d9676 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -105,7 +105,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
{
WT_BTREE *btree;
WT_TXN_GLOBAL *txn_global;
- uint64_t checkpoint_gen, checkpoint_pinned, oldest_id;
+ uint64_t checkpoint_pinned, oldest_id;
+ bool include_checkpoint_txn;
txn_global = &S2C(session)->txn_global;
btree = S2BT_SAFE(session);
@@ -117,7 +118,11 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
* we take the minimum of the other two IDs, which is what we want.
*/
oldest_id = txn_global->oldest_id;
- WT_ORDERED_READ(checkpoint_gen, txn_global->checkpoint_gen);
+ if (btree == NULL)
+ include_checkpoint_txn = false;
+ else
+ WT_ORDERED_READ(
+ include_checkpoint_txn, btree->include_checkpoint_txn);
checkpoint_pinned = txn_global->checkpoint_pinned;
/*
@@ -130,10 +135,9 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
* checkpoint, or this handle is up to date with the active checkpoint
* then it's safe to ignore the checkpoint ID in the visibility check.
*/
- if (checkpoint_pinned == WT_TXN_NONE ||
+ if (!include_checkpoint_txn || checkpoint_pinned == WT_TXN_NONE ||
WT_TXNID_LT(oldest_id, checkpoint_pinned) ||
- WT_SESSION_IS_CHECKPOINT(session) ||
- (btree != NULL && btree->checkpoint_gen == checkpoint_gen))
+ WT_SESSION_IS_CHECKPOINT(session))
return (oldest_id);
return (checkpoint_pinned);
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index f578f4e6c08..0e022048835 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -131,13 +131,13 @@ struct __wt_item {
* The maximum packed size of a 64-bit integer. The ::wiredtiger_struct_pack
* function will pack single long integers into at most this many bytes.
*/
-#define WT_INTPACK64_MAXSIZE ((int)sizeof (int64_t) + 1)
+#define WT_INTPACK64_MAXSIZE ((int)sizeof(int64_t) + 1)
/*!
* The maximum packed size of a 32-bit integer. The ::wiredtiger_struct_pack
* function will pack single integers into at most this many bytes.
*/
-#define WT_INTPACK32_MAXSIZE ((int)sizeof (int32_t) + 1)
+#define WT_INTPACK32_MAXSIZE ((int)sizeof(int32_t) + 1)
/*!
* A WT_CURSOR handle is the interface to a cursor.
@@ -405,6 +405,12 @@ struct __wt_cursor {
* WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the
* beginning (end) of the table.
*
+ * If the cursor does not have record number keys or was not configured
+ * with "append=true", the cursor ends with no key set and a subsequent
+ * call to the WT_CURSOR::get_key method will fail. The cursor ends with
+ * no value set and a subsequent call to the WT_CURSOR::get_value method
+ * will fail.
+ *
* Inserting a new record after the current maximum record in a
* fixed-length bit field column-store (that is, a store with an
* 'r' type key and 't' type value) may implicitly create the missing
@@ -1159,8 +1165,8 @@ struct __wt_session {
* @config{ ),,}
* @config{memory_page_max, the maximum size a page can grow to in
* memory before being reconciled to disk. The specified size will be
- * adjusted to a lower bound of <code>50 * leaf_page_max</code>\, and an
- * upper bound of <code>cache_size / 2</code>. This limit is soft - it
+ * adjusted to a lower bound of <code>leaf_page_max</code>\, and an
+ * upper bound of <code>cache_size / 10</code>. This limit is soft - it
* is possible for pages to be temporarily larger than this value. This
* setting is ignored for LSM trees\, see \c chunk_size., an integer
* between 512B and 10TB; default \c 5MB.}
@@ -1783,8 +1789,6 @@ struct __wt_connection {
* database can configure both log_size and wait to set an upper bound
* for checkpoints; setting this value above 0 configures periodic
* checkpoints., an integer between 0 and 2GB; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the checkpoint name., a string;
- * default \c "WiredTigerCheckpoint".}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
* checkpoint; setting this value above 0 configures periodic
* checkpoints., an integer between 0 and 100000; default \c 0.}
@@ -1806,11 +1810,11 @@ struct __wt_connection {
* @config{eviction_dirty_target, continue evicting until the cache has
* less dirty memory than the value\, as a percentage of the total cache
* size. Dirty pages will only be evicted if the cache is full enough
- * to trigger eviction., an integer between 5 and 99; default \c 80.}
+ * to trigger eviction., an integer between 1 and 99; default \c 5.}
* @config{eviction_dirty_trigger, trigger eviction when the cache is
* using this much memory for dirty content\, as a percentage of the
* total cache size. This setting only alters behavior if it is lower
- * than eviction_trigger., an integer between 5 and 99; default \c 95.}
+ * than eviction_trigger., an integer between 1 and 99; default \c 20.}
* @config{eviction_target, continue evicting until the cache has less
* total memory than the value\, as a percentage of the total cache
* size. Must be less than \c eviction_trigger., an integer between 10
@@ -1836,25 +1840,8 @@ struct __wt_connection {
* configuration options defined below.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;archive, automatically archive
* unneeded log files., a boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor
- * for log records. Permitted values are \c "none" or custom
- * compression engine name created with WT_CONNECTION::add_compressor.
- * If WiredTiger has builtin support for \c "snappy"\, \c "lz4" or \c
- * "zlib" compression\, these names are also available. See @ref
- * compression for more information., a string; default \c none.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging subsystem., a
- * boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log
- * files., an integer between 100KB and 2GB; default \c 100MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into
- * which the log files are written. If the value is not an absolute
- * path name\, the files are created relative to the database home., a
- * string; default \c ".".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc,
- * pre-allocate log files., a boolean flag; default \c true.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery or error if
- * recovery needs to run after an unclean shutdown., a string\, chosen
- * from the following options: \c "error"\, \c "on"; default \c on.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a
+ * boolean flag; default \c true.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;zero_fill, manually write zeroes into
* log files., a boolean flag; default \c false.}
* @config{ ),,}
@@ -1914,11 +1901,6 @@ struct __wt_connection {
* statistics in JSON format., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log statistics on database
* close., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the pathname to a file into
- * which the log records are written\, may contain ISO C standard
- * strftime conversion specifications. If the value is not an absolute
- * path name\, the file is created relative to the database home., a
- * string; default \c "WiredTigerStat.%d.%H".}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include
* statistics for the list of data source URIs\, if they are open at the
* time of the statistics logging. The list may include URIs matching a
@@ -2223,11 +2205,10 @@ struct __wt_connection {
* configure both log_size and wait to set an upper bound for checkpoints;
* setting this value above 0 configures periodic checkpoints., an integer
* between 0 and 2GB; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the
- * checkpoint name., a string; default \c "WiredTigerCheckpoint".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
- * checkpoint; setting this value above 0 configures periodic checkpoints., an
- * integer between 0 and 100000; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait,
+ * seconds to wait between each checkpoint; setting this value above 0
+ * configures periodic checkpoints., an integer between 0 and 100000; default \c
+ * 0.}
* @config{ ),,}
* @config{checkpoint_sync, flush files to stable storage when closing or
* writing checkpoints., a boolean flag; default \c true.}
@@ -2288,11 +2269,11 @@ struct __wt_connection {
* @config{eviction_dirty_target, continue evicting until the cache has less
* dirty memory than the value\, as a percentage of the total cache size. Dirty
* pages will only be evicted if the cache is full enough to trigger eviction.,
- * an integer between 5 and 99; default \c 80.}
+ * an integer between 1 and 99; default \c 5.}
* @config{eviction_dirty_trigger, trigger eviction when the cache is using this
* much memory for dirty content\, as a percentage of the total cache size.
* This setting only alters behavior if it is lower than eviction_trigger., an
- * integer between 5 and 99; default \c 95.}
+ * integer between 1 and 99; default \c 20.}
* @config{eviction_target, continue evicting until the cache has less total
* memory than the value\, as a percentage of the total cache size. Must be
* less than \c eviction_trigger., an integer between 10 and 99; default \c 80.}
@@ -2343,9 +2324,10 @@ struct __wt_connection {
* subsystem., a boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log files., an
* integer between 100KB and 2GB; default \c 100MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into which the
- * log files are written. If the value is not an absolute path name\, the files
- * are created relative to the database home., a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which log
+ * files are written. The directory must already exist. If the value is not an
+ * absolute path\, the path is relative to the database home (see @ref
+ * absolute_path for more information)., a string; default \c ".".}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean
* flag; default \c true.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery
@@ -2415,16 +2397,15 @@ struct __wt_connection {
* boolean flag; default \c false.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close,
* log statistics on database close., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the pathname to a file into which the
- * log records are written\, may contain ISO C standard strftime conversion
- * specifications. If the value is not an absolute path name\, the file is
- * created relative to the database home., a string; default \c
- * "WiredTigerStat.%d.%H".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if
- * non-empty\, include statistics for the list of data source URIs\, if they are
- * open at the time of the statistics logging. The list may include URIs
- * matching a single data source ("table:mytable")\, or a URI matching all data
- * sources of a particular type ("table:")., a list of strings; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which
+ * statistics files are written. The directory must already exist. If the
+ * value is not an absolute path\, the path is relative to the database home
+ * (see @ref absolute_path for more information)., a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include statistics
+ * for the list of data source URIs\, if they are open at the time of the
+ * statistics logging. The list may include URIs matching a single data source
+ * ("table:mytable")\, or a URI matching all data sources of a particular type
+ * ("table:")., a list of strings; default empty.}
* @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp prepended to each log
* record\, may contain strftime conversion specifications\, when \c json is
* configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d
@@ -3701,24 +3682,34 @@ struct __wt_extractor {
#if !defined(SWIG)
/*! WT_FILE_SYSTEM::open_file file types */
typedef enum {
- WT_OPEN_FILE_TYPE_CHECKPOINT, /*!< open a data file checkpoint */
- WT_OPEN_FILE_TYPE_DATA, /*!< open a data file */
- WT_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */
- WT_OPEN_FILE_TYPE_LOG, /*!< open a log file */
- WT_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */
-} WT_OPEN_FILE_TYPE;
+ WT_FS_OPEN_FILE_TYPE_CHECKPOINT,/*!< open a data file checkpoint */
+ WT_FS_OPEN_FILE_TYPE_DATA, /*!< open a data file */
+ WT_FS_OPEN_FILE_TYPE_DIRECTORY, /*!< open a directory */
+ WT_FS_OPEN_FILE_TYPE_LOG, /*!< open a log file */
+ WT_FS_OPEN_FILE_TYPE_REGULAR /*!< open a regular file */
+} WT_FS_OPEN_FILE_TYPE;
/*! WT_FILE_SYSTEM::open_file flags: create if does not exist */
-#define WT_OPEN_CREATE 0x001
+#define WT_FS_OPEN_CREATE 0x001
/*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */
-#define WT_OPEN_DIRECTIO 0x002
-/*! WT_FILE_SYSTEM::open_file flags: error if exclusive use not available */
-#define WT_OPEN_EXCLUSIVE 0x004
+#define WT_FS_OPEN_DIRECTIO 0x002
+/*! WT_FILE_SYSTEM::open_file flags: file creation must be durable */
+#define WT_FS_OPEN_DURABLE 0x004
+/*!
+ * WT_FILE_SYSTEM::open_file flags: return EBUSY if exclusive use not available
+ */
+#define WT_FS_OPEN_EXCLUSIVE 0x008
#ifndef DOXYGEN
-#define WT_OPEN_FIXED 0x008 /* Path not home relative (internal) */
+#define WT_FS_OPEN_FIXED 0x010 /* Path not home relative (internal) */
#endif
/*! WT_FILE_SYSTEM::open_file flags: open is read-only */
-#define WT_OPEN_READONLY 0x010
+#define WT_FS_OPEN_READONLY 0x020
+
+/*!
+ * WT_FILE_SYSTEM::remove or WT_FILE_SYSTEM::rename flags: the remove or rename
+ * operation must be durable
+ */
+#define WT_FS_DURABLE 0x001
/*!
* The interface implemented by applications to provide a custom file system
@@ -3748,7 +3739,7 @@ struct __wt_file_system {
* @param[out] dirlist the method returns an allocated array of
* individually allocated strings, one for each entry in the
* directory.
- * @param[out] countp the method the number of entries returned
+ * @param[out] countp the number of entries returned
*/
int (*fs_directory_list)(WT_FILE_SYSTEM *file_system,
WT_SESSION *session, const char *directory, const char *prefix,
@@ -3768,23 +3759,6 @@ struct __wt_file_system {
WT_SESSION *session, char **dirlist, uint32_t count);
/*!
- * Flush the named directory.
- *
- * This method is not required for readonly file systems or file systems
- * where it is not necessary to flush a file's directory to ensure the
- * durability of file system operations, and should be set to NULL when
- * not required by the file system.
- *
- * @errors
- *
- * @param file_system the WT_FILE_SYSTEM
- * @param session the current WiredTiger session
- * @param directory the name of the directory
- */
- int (*fs_directory_sync)(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *directory);
-
- /*!
* Return if the named file system object exists.
*
* @errors
@@ -3800,6 +3774,16 @@ struct __wt_file_system {
/*!
* Open a handle for a named file system object
*
+ * The method should return ENOENT if the file is not being created and
+ * does not exist.
+ *
+ * The method should return EACCES if the file cannot be opened in the
+ * requested mode (for example, a file opened for writing in a readonly
+ * file system).
+ *
+ * The method should return EBUSY if ::WT_FS_OPEN_EXCLUSIVE is set and
+ * the file is in use.
+ *
* @errors
*
* @param file_system the WT_FILE_SYSTEM
@@ -3809,8 +3793,8 @@ struct __wt_file_system {
* The file type is provided to allow optimization for different file
* access patterns.
* @param flags flags indicating how to open the file, one or more of
- * ::WT_OPEN_CREATE, ::WT_OPEN_DIRECTIO, ::WT_OPEN_EXCLUSIVE or
- * ::WT_OPEN_READONLY.
+ * ::WT_FS_OPEN_CREATE, ::WT_FS_OPEN_DIRECTIO, ::WT_FS_OPEN_DURABLE,
+ * ::WT_FS_OPEN_EXCLUSIVE or ::WT_FS_OPEN_READONLY.
* @param[out] file_handlep the handle to the newly opened file. File
* system implementations must allocate memory for the handle and
* the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE::
@@ -3819,7 +3803,7 @@ struct __wt_file_system {
* their own structure as a superset of a WT_FILE_HANDLE:: structure.
*/
int (*fs_open_file)(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
- const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
WT_FILE_HANDLE **file_handlep);
/*!
@@ -3833,9 +3817,11 @@ struct __wt_file_system {
* @param file_system the WT_FILE_SYSTEM
* @param session the current WiredTiger session
* @param name the name of the file system object
+ * @param durable if the operation requires durability
+ * @param flags 0 or ::WT_FS_DURABLE
*/
- int (*fs_remove)(
- WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name);
+ int (*fs_remove)(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *session, const char *name, uint32_t flags);
/*!
* Rename a named file system object
@@ -3849,9 +3835,10 @@ struct __wt_file_system {
* @param session the current WiredTiger session
* @param from the original name of the object
* @param to the new name for the object
+ * @param flags 0 or ::WT_FS_DURABLE
*/
- int (*fs_rename)(WT_FILE_SYSTEM *file_system,
- WT_SESSION *session, const char *from, const char *to);
+ int (*fs_rename)(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
+ const char *from, const char *to, uint32_t flags);
/*!
* Return the size of a named file system object
@@ -3981,7 +3968,7 @@ struct __wt_file_handle {
/*!
* Lock/unlock a file from the perspective of other processes running
- * in the system.
+ * in the system, where necessary.
*
* @errors
*
@@ -4256,340 +4243,380 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_CONN_BLOCK_BYTE_READ 1026
/*! block-manager: bytes written */
#define WT_STAT_CONN_BLOCK_BYTE_WRITE 1027
+/*! block-manager: bytes written for checkpoint */
+#define WT_STAT_CONN_BLOCK_BYTE_WRITE_CHECKPOINT 1028
/*! block-manager: mapped blocks read */
-#define WT_STAT_CONN_BLOCK_MAP_READ 1028
+#define WT_STAT_CONN_BLOCK_MAP_READ 1029
/*! block-manager: mapped bytes read */
-#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1029
+#define WT_STAT_CONN_BLOCK_BYTE_MAP_READ 1030
+/*! cache: bytes belonging to page images in the cache */
+#define WT_STAT_CONN_CACHE_BYTES_IMAGE 1031
/*! cache: bytes currently in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INUSE 1030
+#define WT_STAT_CONN_CACHE_BYTES_INUSE 1032
+/*! cache: bytes not belonging to page images in the cache */
+#define WT_STAT_CONN_CACHE_BYTES_OTHER 1033
/*! cache: bytes read into cache */
-#define WT_STAT_CONN_CACHE_BYTES_READ 1031
+#define WT_STAT_CONN_CACHE_BYTES_READ 1034
/*! cache: bytes written from cache */
-#define WT_STAT_CONN_CACHE_BYTES_WRITE 1032
+#define WT_STAT_CONN_CACHE_BYTES_WRITE 1035
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1033
+#define WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT 1036
/*! cache: eviction calls to get a page */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1034
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF 1037
/*! cache: eviction calls to get a page found queue empty */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1035
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY 1038
/*! cache: eviction calls to get a page found queue empty after locking */
-#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1036
+#define WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2 1039
/*! cache: eviction currently operating in aggressive mode */
-#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1037
+#define WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET 1040
/*! cache: eviction server candidate queue empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1038
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY 1041
/*! cache: eviction server candidate queue not empty when topping up */
-#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1039
+#define WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY 1042
/*! cache: eviction server evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1040
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING 1043
/*! cache: eviction server populating queue, but not evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1041
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING 1044
/*! cache: eviction server skipped very large page */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1042
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG 1045
/*! cache: eviction server slept, because we did not make progress with
* eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1043
+#define WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT 1046
/*! cache: eviction server unable to reach eviction goal */
-#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1044
+#define WT_STAT_CONN_CACHE_EVICTION_SLOW 1047
/*! cache: eviction worker thread evicting pages */
-#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1045
+#define WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING 1048
/*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1046
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL 1049
/*! cache: files with active eviction walks */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1047
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE 1050
/*! cache: files with new eviction walks started */
-#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1048
+#define WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED 1051
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1049
+#define WT_STAT_CONN_CACHE_EVICTION_HAZARD 1052
/*! cache: hazard pointer check calls */
-#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1050
+#define WT_STAT_CONN_CACHE_HAZARD_CHECKS 1053
/*! cache: hazard pointer check entries walked */
-#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1051
+#define WT_STAT_CONN_CACHE_HAZARD_WALKS 1054
/*! cache: hazard pointer maximum array length */
-#define WT_STAT_CONN_CACHE_HAZARD_MAX 1052
+#define WT_STAT_CONN_CACHE_HAZARD_MAX 1055
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1053
+#define WT_STAT_CONN_CACHE_INMEM_SPLITTABLE 1056
/*! cache: in-memory page splits */
-#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1054
+#define WT_STAT_CONN_CACHE_INMEM_SPLIT 1057
/*! cache: internal pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1055
+#define WT_STAT_CONN_CACHE_EVICTION_INTERNAL 1058
/*! cache: internal pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1056
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL 1059
/*! cache: leaf pages split during eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1057
+#define WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF 1060
/*! cache: lookaside table insert calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1058
+#define WT_STAT_CONN_CACHE_LOOKASIDE_INSERT 1061
/*! cache: lookaside table remove calls */
-#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1059
+#define WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE 1062
/*! cache: maximum bytes configured */
-#define WT_STAT_CONN_CACHE_BYTES_MAX 1060
+#define WT_STAT_CONN_CACHE_BYTES_MAX 1063
/*! cache: maximum page size at eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1061
+#define WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE 1064
/*! cache: modified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1062
+#define WT_STAT_CONN_CACHE_EVICTION_DIRTY 1065
/*! cache: modified pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1063
+#define WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY 1066
+/*! cache: overflow pages read into cache */
+#define WT_STAT_CONN_CACHE_READ_OVERFLOW 1067
+/*! cache: overflow values cached in memory */
+#define WT_STAT_CONN_CACHE_OVERFLOW_VALUE 1068
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1064
+#define WT_STAT_CONN_CACHE_EVICTION_DEEPEN 1069
/*! cache: page written requiring lookaside records */
-#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1065
+#define WT_STAT_CONN_CACHE_WRITE_LOOKASIDE 1070
/*! cache: pages currently held in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_INUSE 1066
+#define WT_STAT_CONN_CACHE_PAGES_INUSE 1071
/*! cache: pages evicted because they exceeded the in-memory maximum */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1067
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE 1072
/*! cache: pages evicted because they had chains of deleted items */
-#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1068
+#define WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE 1073
/*! cache: pages evicted by application threads */
-#define WT_STAT_CONN_CACHE_EVICTION_APP 1069
+#define WT_STAT_CONN_CACHE_EVICTION_APP 1074
/*! cache: pages queued for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1070
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED 1075
/*! cache: pages queued for urgent eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1071
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT 1076
+/*! cache: pages queued for urgent eviction during walk */
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST 1077
/*! cache: pages read into cache */
-#define WT_STAT_CONN_CACHE_READ 1072
+#define WT_STAT_CONN_CACHE_READ 1078
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1073
+#define WT_STAT_CONN_CACHE_READ_LOOKASIDE 1079
/*! cache: pages requested from the cache */
-#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1074
+#define WT_STAT_CONN_CACHE_PAGES_REQUESTED 1080
/*! cache: pages seen by eviction walk */
-#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1075
+#define WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN 1081
/*! cache: pages selected for eviction unable to be evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1076
+#define WT_STAT_CONN_CACHE_EVICTION_FAIL 1082
/*! cache: pages walked for eviction */
-#define WT_STAT_CONN_CACHE_EVICTION_WALK 1077
+#define WT_STAT_CONN_CACHE_EVICTION_WALK 1083
/*! cache: pages written from cache */
-#define WT_STAT_CONN_CACHE_WRITE 1078
+#define WT_STAT_CONN_CACHE_WRITE 1084
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1079
+#define WT_STAT_CONN_CACHE_WRITE_RESTORE 1085
/*! cache: percentage overhead */
-#define WT_STAT_CONN_CACHE_OVERHEAD 1080
+#define WT_STAT_CONN_CACHE_OVERHEAD 1086
/*! cache: tracked bytes belonging to internal pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1081
+#define WT_STAT_CONN_CACHE_BYTES_INTERNAL 1087
/*! cache: tracked bytes belonging to leaf pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_LEAF 1082
-/*! cache: tracked bytes belonging to overflow pages in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_OVERFLOW 1083
+#define WT_STAT_CONN_CACHE_BYTES_LEAF 1088
/*! cache: tracked dirty bytes in the cache */
-#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1084
+#define WT_STAT_CONN_CACHE_BYTES_DIRTY 1089
/*! cache: tracked dirty pages in the cache */
-#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1085
+#define WT_STAT_CONN_CACHE_PAGES_DIRTY 1090
/*! cache: unmodified pages evicted */
-#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1086
+#define WT_STAT_CONN_CACHE_EVICTION_CLEAN 1091
/*! connection: auto adjusting condition resets */
-#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1087
+#define WT_STAT_CONN_COND_AUTO_WAIT_RESET 1092
/*! connection: auto adjusting condition wait calls */
-#define WT_STAT_CONN_COND_AUTO_WAIT 1088
+#define WT_STAT_CONN_COND_AUTO_WAIT 1093
/*! connection: files currently open */
-#define WT_STAT_CONN_FILE_OPEN 1089
+#define WT_STAT_CONN_FILE_OPEN 1094
/*! connection: memory allocations */
-#define WT_STAT_CONN_MEMORY_ALLOCATION 1090
+#define WT_STAT_CONN_MEMORY_ALLOCATION 1095
/*! connection: memory frees */
-#define WT_STAT_CONN_MEMORY_FREE 1091
+#define WT_STAT_CONN_MEMORY_FREE 1096
/*! connection: memory re-allocations */
-#define WT_STAT_CONN_MEMORY_GROW 1092
+#define WT_STAT_CONN_MEMORY_GROW 1097
/*! connection: pthread mutex condition wait calls */
-#define WT_STAT_CONN_COND_WAIT 1093
+#define WT_STAT_CONN_COND_WAIT 1098
/*! connection: pthread mutex shared lock read-lock calls */
-#define WT_STAT_CONN_RWLOCK_READ 1094
+#define WT_STAT_CONN_RWLOCK_READ 1099
/*! connection: pthread mutex shared lock write-lock calls */
-#define WT_STAT_CONN_RWLOCK_WRITE 1095
+#define WT_STAT_CONN_RWLOCK_WRITE 1100
/*! connection: total fsync I/Os */
-#define WT_STAT_CONN_FSYNC_IO 1096
+#define WT_STAT_CONN_FSYNC_IO 1101
/*! connection: total read I/Os */
-#define WT_STAT_CONN_READ_IO 1097
+#define WT_STAT_CONN_READ_IO 1102
/*! connection: total write I/Os */
-#define WT_STAT_CONN_WRITE_IO 1098
+#define WT_STAT_CONN_WRITE_IO 1103
/*! cursor: cursor create calls */
-#define WT_STAT_CONN_CURSOR_CREATE 1099
+#define WT_STAT_CONN_CURSOR_CREATE 1104
/*! cursor: cursor insert calls */
-#define WT_STAT_CONN_CURSOR_INSERT 1100
+#define WT_STAT_CONN_CURSOR_INSERT 1105
/*! cursor: cursor next calls */
-#define WT_STAT_CONN_CURSOR_NEXT 1101
+#define WT_STAT_CONN_CURSOR_NEXT 1106
/*! cursor: cursor prev calls */
-#define WT_STAT_CONN_CURSOR_PREV 1102
+#define WT_STAT_CONN_CURSOR_PREV 1107
/*! cursor: cursor remove calls */
-#define WT_STAT_CONN_CURSOR_REMOVE 1103
+#define WT_STAT_CONN_CURSOR_REMOVE 1108
/*! cursor: cursor reset calls */
-#define WT_STAT_CONN_CURSOR_RESET 1104
+#define WT_STAT_CONN_CURSOR_RESET 1109
/*! cursor: cursor restarted searches */
-#define WT_STAT_CONN_CURSOR_RESTART 1105
+#define WT_STAT_CONN_CURSOR_RESTART 1110
/*! cursor: cursor search calls */
-#define WT_STAT_CONN_CURSOR_SEARCH 1106
+#define WT_STAT_CONN_CURSOR_SEARCH 1111
/*! cursor: cursor search near calls */
-#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1107
+#define WT_STAT_CONN_CURSOR_SEARCH_NEAR 1112
/*! cursor: cursor update calls */
-#define WT_STAT_CONN_CURSOR_UPDATE 1108
+#define WT_STAT_CONN_CURSOR_UPDATE 1113
/*! cursor: truncate calls */
-#define WT_STAT_CONN_CURSOR_TRUNCATE 1109
+#define WT_STAT_CONN_CURSOR_TRUNCATE 1114
/*! data-handle: connection data handles currently active */
-#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1110
+#define WT_STAT_CONN_DH_CONN_HANDLE_COUNT 1115
/*! data-handle: connection sweep candidate became referenced */
-#define WT_STAT_CONN_DH_SWEEP_REF 1111
+#define WT_STAT_CONN_DH_SWEEP_REF 1116
/*! data-handle: connection sweep dhandles closed */
-#define WT_STAT_CONN_DH_SWEEP_CLOSE 1112
+#define WT_STAT_CONN_DH_SWEEP_CLOSE 1117
/*! data-handle: connection sweep dhandles removed from hash list */
-#define WT_STAT_CONN_DH_SWEEP_REMOVE 1113
+#define WT_STAT_CONN_DH_SWEEP_REMOVE 1118
/*! data-handle: connection sweep time-of-death sets */
-#define WT_STAT_CONN_DH_SWEEP_TOD 1114
+#define WT_STAT_CONN_DH_SWEEP_TOD 1119
/*! data-handle: connection sweeps */
-#define WT_STAT_CONN_DH_SWEEPS 1115
+#define WT_STAT_CONN_DH_SWEEPS 1120
/*! data-handle: session dhandles swept */
-#define WT_STAT_CONN_DH_SESSION_HANDLES 1116
+#define WT_STAT_CONN_DH_SESSION_HANDLES 1121
/*! data-handle: session sweep attempts */
-#define WT_STAT_CONN_DH_SESSION_SWEEPS 1117
+#define WT_STAT_CONN_DH_SESSION_SWEEPS 1122
/*! log: busy returns attempting to switch slots */
-#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1118
+#define WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY 1123
/*! log: consolidated slot closures */
-#define WT_STAT_CONN_LOG_SLOT_CLOSES 1119
+#define WT_STAT_CONN_LOG_SLOT_CLOSES 1124
/*! log: consolidated slot join races */
-#define WT_STAT_CONN_LOG_SLOT_RACES 1120
+#define WT_STAT_CONN_LOG_SLOT_RACES 1125
/*! log: consolidated slot join transitions */
-#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1121
+#define WT_STAT_CONN_LOG_SLOT_TRANSITIONS 1126
/*! log: consolidated slot joins */
-#define WT_STAT_CONN_LOG_SLOT_JOINS 1122
+#define WT_STAT_CONN_LOG_SLOT_JOINS 1127
/*! log: consolidated slot unbuffered writes */
-#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1123
+#define WT_STAT_CONN_LOG_SLOT_UNBUFFERED 1128
/*! log: log bytes of payload data */
-#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1124
+#define WT_STAT_CONN_LOG_BYTES_PAYLOAD 1129
/*! log: log bytes written */
-#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1125
+#define WT_STAT_CONN_LOG_BYTES_WRITTEN 1130
/*! log: log files manually zero-filled */
-#define WT_STAT_CONN_LOG_ZERO_FILLS 1126
+#define WT_STAT_CONN_LOG_ZERO_FILLS 1131
/*! log: log flush operations */
-#define WT_STAT_CONN_LOG_FLUSH 1127
+#define WT_STAT_CONN_LOG_FLUSH 1132
/*! log: log force write operations */
-#define WT_STAT_CONN_LOG_FORCE_WRITE 1128
+#define WT_STAT_CONN_LOG_FORCE_WRITE 1133
/*! log: log force write operations skipped */
-#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1129
+#define WT_STAT_CONN_LOG_FORCE_WRITE_SKIP 1134
/*! log: log records compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1130
+#define WT_STAT_CONN_LOG_COMPRESS_WRITES 1135
/*! log: log records not compressed */
-#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1131
+#define WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS 1136
/*! log: log records too small to compress */
-#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1132
+#define WT_STAT_CONN_LOG_COMPRESS_SMALL 1137
/*! log: log release advances write LSN */
-#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1133
+#define WT_STAT_CONN_LOG_RELEASE_WRITE_LSN 1138
/*! log: log scan operations */
-#define WT_STAT_CONN_LOG_SCANS 1134
+#define WT_STAT_CONN_LOG_SCANS 1139
/*! log: log scan records requiring two reads */
-#define WT_STAT_CONN_LOG_SCAN_REREADS 1135
+#define WT_STAT_CONN_LOG_SCAN_REREADS 1140
/*! log: log server thread advances write LSN */
-#define WT_STAT_CONN_LOG_WRITE_LSN 1136
+#define WT_STAT_CONN_LOG_WRITE_LSN 1141
/*! log: log server thread write LSN walk skipped */
-#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1137
+#define WT_STAT_CONN_LOG_WRITE_LSN_SKIP 1142
/*! log: log sync operations */
-#define WT_STAT_CONN_LOG_SYNC 1138
+#define WT_STAT_CONN_LOG_SYNC 1143
/*! log: log sync time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DURATION 1139
+#define WT_STAT_CONN_LOG_SYNC_DURATION 1144
/*! log: log sync_dir operations */
-#define WT_STAT_CONN_LOG_SYNC_DIR 1140
+#define WT_STAT_CONN_LOG_SYNC_DIR 1145
/*! log: log sync_dir time duration (usecs) */
-#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1141
+#define WT_STAT_CONN_LOG_SYNC_DIR_DURATION 1146
/*! log: log write operations */
-#define WT_STAT_CONN_LOG_WRITES 1142
+#define WT_STAT_CONN_LOG_WRITES 1147
/*! log: logging bytes consolidated */
-#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1143
+#define WT_STAT_CONN_LOG_SLOT_CONSOLIDATED 1148
/*! log: maximum log file size */
-#define WT_STAT_CONN_LOG_MAX_FILESIZE 1144
+#define WT_STAT_CONN_LOG_MAX_FILESIZE 1149
/*! log: number of pre-allocated log files to create */
-#define WT_STAT_CONN_LOG_PREALLOC_MAX 1145
+#define WT_STAT_CONN_LOG_PREALLOC_MAX 1150
/*! log: pre-allocated log files not ready and missed */
-#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1146
+#define WT_STAT_CONN_LOG_PREALLOC_MISSED 1151
/*! log: pre-allocated log files prepared */
-#define WT_STAT_CONN_LOG_PREALLOC_FILES 1147
+#define WT_STAT_CONN_LOG_PREALLOC_FILES 1152
/*! log: pre-allocated log files used */
-#define WT_STAT_CONN_LOG_PREALLOC_USED 1148
+#define WT_STAT_CONN_LOG_PREALLOC_USED 1153
/*! log: records processed by log scan */
-#define WT_STAT_CONN_LOG_SCAN_RECORDS 1149
+#define WT_STAT_CONN_LOG_SCAN_RECORDS 1154
/*! log: total in-memory size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_MEM 1150
+#define WT_STAT_CONN_LOG_COMPRESS_MEM 1155
/*! log: total log buffer size */
-#define WT_STAT_CONN_LOG_BUFFER_SIZE 1151
+#define WT_STAT_CONN_LOG_BUFFER_SIZE 1156
/*! log: total size of compressed records */
-#define WT_STAT_CONN_LOG_COMPRESS_LEN 1152
+#define WT_STAT_CONN_LOG_COMPRESS_LEN 1157
/*! log: written slots coalesced */
-#define WT_STAT_CONN_LOG_SLOT_COALESCED 1153
+#define WT_STAT_CONN_LOG_SLOT_COALESCED 1158
/*! log: yields waiting for previous log file close */
-#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1154
+#define WT_STAT_CONN_LOG_CLOSE_YIELDS 1159
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1155
+#define WT_STAT_CONN_REC_PAGE_DELETE_FAST 1160
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_CONN_REC_PAGES 1156
+#define WT_STAT_CONN_REC_PAGES 1161
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_CONN_REC_PAGES_EVICTION 1157
+#define WT_STAT_CONN_REC_PAGES_EVICTION 1162
/*! reconciliation: pages deleted */
-#define WT_STAT_CONN_REC_PAGE_DELETE 1158
+#define WT_STAT_CONN_REC_PAGE_DELETE 1163
/*! reconciliation: split bytes currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1159
+#define WT_STAT_CONN_REC_SPLIT_STASHED_BYTES 1164
/*! reconciliation: split objects currently awaiting free */
-#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1160
+#define WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS 1165
/*! session: open cursor count */
-#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1161
+#define WT_STAT_CONN_SESSION_CURSOR_OPEN 1166
/*! session: open session count */
-#define WT_STAT_CONN_SESSION_OPEN 1162
+#define WT_STAT_CONN_SESSION_OPEN 1167
+/*! session: table compact failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL 1168
+/*! session: table compact successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS 1169
+/*! session: table create failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL 1170
+/*! session: table create successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS 1171
+/*! session: table drop failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_DROP_FAIL 1172
+/*! session: table drop successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS 1173
+/*! session: table rebalance failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL 1174
+/*! session: table rebalance successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS 1175
+/*! session: table rename failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL 1176
+/*! session: table rename successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS 1177
+/*! session: table salvage failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL 1178
+/*! session: table salvage successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS 1179
+/*! session: table truncate failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL 1180
+/*! session: table truncate successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS 1181
+/*! session: table verify failed calls */
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL 1182
+/*! session: table verify successful calls */
+#define WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS 1183
/*! thread-state: active filesystem fsync calls */
-#define WT_STAT_CONN_FSYNC_ACTIVE 1163
+#define WT_STAT_CONN_THREAD_FSYNC_ACTIVE 1184
/*! thread-state: active filesystem read calls */
-#define WT_STAT_CONN_READ_ACTIVE 1164
+#define WT_STAT_CONN_THREAD_READ_ACTIVE 1185
/*! thread-state: active filesystem write calls */
-#define WT_STAT_CONN_WRITE_ACTIVE 1165
+#define WT_STAT_CONN_THREAD_WRITE_ACTIVE 1186
/*! thread-yield: page acquire busy blocked */
-#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1166
+#define WT_STAT_CONN_PAGE_BUSY_BLOCKED 1187
/*! thread-yield: page acquire eviction blocked */
-#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1167
+#define WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED 1188
/*! thread-yield: page acquire locked blocked */
-#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1168
+#define WT_STAT_CONN_PAGE_LOCKED_BLOCKED 1189
/*! thread-yield: page acquire read blocked */
-#define WT_STAT_CONN_PAGE_READ_BLOCKED 1169
+#define WT_STAT_CONN_PAGE_READ_BLOCKED 1190
/*! thread-yield: page acquire time sleeping (usecs) */
-#define WT_STAT_CONN_PAGE_SLEEP 1170
+#define WT_STAT_CONN_PAGE_SLEEP 1191
/*! transaction: number of named snapshots created */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1171
+#define WT_STAT_CONN_TXN_SNAPSHOTS_CREATED 1192
/*! transaction: number of named snapshots dropped */
-#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1172
+#define WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED 1193
/*! transaction: transaction begins */
-#define WT_STAT_CONN_TXN_BEGIN 1173
+#define WT_STAT_CONN_TXN_BEGIN 1194
/*! transaction: transaction checkpoint currently running */
-#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1174
+#define WT_STAT_CONN_TXN_CHECKPOINT_RUNNING 1195
/*! transaction: transaction checkpoint generation */
-#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1175
+#define WT_STAT_CONN_TXN_CHECKPOINT_GENERATION 1196
/*! transaction: transaction checkpoint max time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1176
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX 1197
/*! transaction: transaction checkpoint min time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1177
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN 1198
/*! transaction: transaction checkpoint most recent time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1178
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT 1199
+/*! transaction: transaction checkpoint scrub dirty target */
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET 1200
+/*! transaction: transaction checkpoint scrub time (msecs) */
+#define WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME 1201
/*! transaction: transaction checkpoint total time (msecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1179
+#define WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL 1202
/*! transaction: transaction checkpoints */
-#define WT_STAT_CONN_TXN_CHECKPOINT 1180
+#define WT_STAT_CONN_TXN_CHECKPOINT 1203
/*! transaction: transaction failures due to cache overflow */
-#define WT_STAT_CONN_TXN_FAIL_CACHE 1181
+#define WT_STAT_CONN_TXN_FAIL_CACHE 1204
/*! transaction: transaction fsync calls for checkpoint after allocating
* the transaction ID */
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1182
-/*! transaction: transaction fsync calls for checkpoint before allocating
- * the transaction ID */
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE 1183
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST 1205
/*! transaction: transaction fsync duration for checkpoint after
* allocating the transaction ID (usecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1184
-/*! transaction: transaction fsync duration for checkpoint before
- * allocating the transaction ID (usecs) */
-#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE_DURATION 1185
+#define WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION 1206
/*! transaction: transaction range of IDs currently pinned */
-#define WT_STAT_CONN_TXN_PINNED_RANGE 1186
+#define WT_STAT_CONN_TXN_PINNED_RANGE 1207
/*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1187
+#define WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE 1208
/*! transaction: transaction range of IDs currently pinned by named
* snapshots */
-#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1188
+#define WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE 1209
/*! transaction: transaction sync calls */
-#define WT_STAT_CONN_TXN_SYNC 1189
+#define WT_STAT_CONN_TXN_SYNC 1210
/*! transaction: transactions committed */
-#define WT_STAT_CONN_TXN_COMMIT 1190
+#define WT_STAT_CONN_TXN_COMMIT 1211
/*! transaction: transactions rolled back */
-#define WT_STAT_CONN_TXN_ROLLBACK 1191
+#define WT_STAT_CONN_TXN_ROLLBACK 1212
/*!
* @}
@@ -4678,127 +4705,129 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
#define WT_STAT_DSRC_BTREE_ROW_INTERNAL 2038
/*! btree: row-store leaf pages */
#define WT_STAT_DSRC_BTREE_ROW_LEAF 2039
+/*! cache: bytes currently in the cache */
+#define WT_STAT_DSRC_CACHE_BYTES_INUSE 2040
/*! cache: bytes read into cache */
-#define WT_STAT_DSRC_CACHE_BYTES_READ 2040
+#define WT_STAT_DSRC_CACHE_BYTES_READ 2041
/*! cache: bytes written from cache */
-#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2041
+#define WT_STAT_DSRC_CACHE_BYTES_WRITE 2042
/*! cache: checkpoint blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2042
+#define WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT 2043
/*! cache: data source pages selected for eviction unable to be evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2043
+#define WT_STAT_DSRC_CACHE_EVICTION_FAIL 2044
/*! cache: hazard pointer blocked page eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2044
+#define WT_STAT_DSRC_CACHE_EVICTION_HAZARD 2045
/*! cache: in-memory page passed criteria to be split */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2045
+#define WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE 2046
/*! cache: in-memory page splits */
-#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2046
+#define WT_STAT_DSRC_CACHE_INMEM_SPLIT 2047
/*! cache: internal pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2047
+#define WT_STAT_DSRC_CACHE_EVICTION_INTERNAL 2048
/*! cache: internal pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2048
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL 2049
/*! cache: leaf pages split during eviction */
-#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2049
+#define WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF 2050
/*! cache: modified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2050
+#define WT_STAT_DSRC_CACHE_EVICTION_DIRTY 2051
/*! cache: overflow pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2051
+#define WT_STAT_DSRC_CACHE_READ_OVERFLOW 2052
/*! cache: overflow values cached in memory */
-#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2052
+#define WT_STAT_DSRC_CACHE_OVERFLOW_VALUE 2053
/*! cache: page split during eviction deepened the tree */
-#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2053
+#define WT_STAT_DSRC_CACHE_EVICTION_DEEPEN 2054
/*! cache: page written requiring lookaside records */
-#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2054
+#define WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE 2055
/*! cache: pages read into cache */
-#define WT_STAT_DSRC_CACHE_READ 2055
+#define WT_STAT_DSRC_CACHE_READ 2056
/*! cache: pages read into cache requiring lookaside entries */
-#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2056
+#define WT_STAT_DSRC_CACHE_READ_LOOKASIDE 2057
/*! cache: pages requested from the cache */
-#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2057
+#define WT_STAT_DSRC_CACHE_PAGES_REQUESTED 2058
/*! cache: pages written from cache */
-#define WT_STAT_DSRC_CACHE_WRITE 2058
+#define WT_STAT_DSRC_CACHE_WRITE 2059
/*! cache: pages written requiring in-memory restoration */
-#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2059
+#define WT_STAT_DSRC_CACHE_WRITE_RESTORE 2060
/*! cache: unmodified pages evicted */
-#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2060
+#define WT_STAT_DSRC_CACHE_EVICTION_CLEAN 2061
/*! compression: compressed pages read */
-#define WT_STAT_DSRC_COMPRESS_READ 2061
+#define WT_STAT_DSRC_COMPRESS_READ 2062
/*! compression: compressed pages written */
-#define WT_STAT_DSRC_COMPRESS_WRITE 2062
+#define WT_STAT_DSRC_COMPRESS_WRITE 2063
/*! compression: page written failed to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2063
+#define WT_STAT_DSRC_COMPRESS_WRITE_FAIL 2064
/*! compression: page written was too small to compress */
-#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2064
+#define WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL 2065
/*! compression: raw compression call failed, additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2065
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY 2066
/*! compression: raw compression call failed, no additional data available */
-#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2066
+#define WT_STAT_DSRC_COMPRESS_RAW_FAIL 2067
/*! compression: raw compression call succeeded */
-#define WT_STAT_DSRC_COMPRESS_RAW_OK 2067
+#define WT_STAT_DSRC_COMPRESS_RAW_OK 2068
/*! cursor: bulk-loaded cursor-insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2068
+#define WT_STAT_DSRC_CURSOR_INSERT_BULK 2069
/*! cursor: create calls */
-#define WT_STAT_DSRC_CURSOR_CREATE 2069
+#define WT_STAT_DSRC_CURSOR_CREATE 2070
/*! cursor: cursor-insert key and value bytes inserted */
-#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2070
+#define WT_STAT_DSRC_CURSOR_INSERT_BYTES 2071
/*! cursor: cursor-remove key bytes removed */
-#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2071
+#define WT_STAT_DSRC_CURSOR_REMOVE_BYTES 2072
/*! cursor: cursor-update value bytes updated */
-#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2072
+#define WT_STAT_DSRC_CURSOR_UPDATE_BYTES 2073
/*! cursor: insert calls */
-#define WT_STAT_DSRC_CURSOR_INSERT 2073
+#define WT_STAT_DSRC_CURSOR_INSERT 2074
/*! cursor: next calls */
-#define WT_STAT_DSRC_CURSOR_NEXT 2074
+#define WT_STAT_DSRC_CURSOR_NEXT 2075
/*! cursor: prev calls */
-#define WT_STAT_DSRC_CURSOR_PREV 2075
+#define WT_STAT_DSRC_CURSOR_PREV 2076
/*! cursor: remove calls */
-#define WT_STAT_DSRC_CURSOR_REMOVE 2076
+#define WT_STAT_DSRC_CURSOR_REMOVE 2077
/*! cursor: reset calls */
-#define WT_STAT_DSRC_CURSOR_RESET 2077
+#define WT_STAT_DSRC_CURSOR_RESET 2078
/*! cursor: restarted searches */
-#define WT_STAT_DSRC_CURSOR_RESTART 2078
+#define WT_STAT_DSRC_CURSOR_RESTART 2079
/*! cursor: search calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH 2079
+#define WT_STAT_DSRC_CURSOR_SEARCH 2080
/*! cursor: search near calls */
-#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2080
+#define WT_STAT_DSRC_CURSOR_SEARCH_NEAR 2081
/*! cursor: truncate calls */
-#define WT_STAT_DSRC_CURSOR_TRUNCATE 2081
+#define WT_STAT_DSRC_CURSOR_TRUNCATE 2082
/*! cursor: update calls */
-#define WT_STAT_DSRC_CURSOR_UPDATE 2082
+#define WT_STAT_DSRC_CURSOR_UPDATE 2083
/*! reconciliation: dictionary matches */
-#define WT_STAT_DSRC_REC_DICTIONARY 2083
+#define WT_STAT_DSRC_REC_DICTIONARY 2084
/*! reconciliation: fast-path pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2084
+#define WT_STAT_DSRC_REC_PAGE_DELETE_FAST 2085
/*! reconciliation: internal page key bytes discarded using suffix
* compression */
-#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2085
+#define WT_STAT_DSRC_REC_SUFFIX_COMPRESSION 2086
/*! reconciliation: internal page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2086
+#define WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL 2087
/*! reconciliation: internal-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2087
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL 2088
/*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2088
+#define WT_STAT_DSRC_REC_PREFIX_COMPRESSION 2089
/*! reconciliation: leaf page multi-block writes */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2089
+#define WT_STAT_DSRC_REC_MULTIBLOCK_LEAF 2090
/*! reconciliation: leaf-page overflow keys */
-#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2090
+#define WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF 2091
/*! reconciliation: maximum blocks required for a page */
-#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2091
+#define WT_STAT_DSRC_REC_MULTIBLOCK_MAX 2092
/*! reconciliation: overflow values written */
-#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2092
+#define WT_STAT_DSRC_REC_OVERFLOW_VALUE 2093
/*! reconciliation: page checksum matches */
-#define WT_STAT_DSRC_REC_PAGE_MATCH 2093
+#define WT_STAT_DSRC_REC_PAGE_MATCH 2094
/*! reconciliation: page reconciliation calls */
-#define WT_STAT_DSRC_REC_PAGES 2094
+#define WT_STAT_DSRC_REC_PAGES 2095
/*! reconciliation: page reconciliation calls for eviction */
-#define WT_STAT_DSRC_REC_PAGES_EVICTION 2095
+#define WT_STAT_DSRC_REC_PAGES_EVICTION 2096
/*! reconciliation: pages deleted */
-#define WT_STAT_DSRC_REC_PAGE_DELETE 2096
+#define WT_STAT_DSRC_REC_PAGE_DELETE 2097
/*! session: object compaction */
-#define WT_STAT_DSRC_SESSION_COMPACT 2097
+#define WT_STAT_DSRC_SESSION_COMPACT 2098
/*! session: open cursor count */
-#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2098
+#define WT_STAT_DSRC_SESSION_CURSOR_OPEN 2099
/*! transaction: update conflicts */
-#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2099
+#define WT_STAT_DSRC_TXN_UPDATE_CONFLICT 2100
/*!
* @}
diff --git a/src/log/log.c b/src/log/log.c
index bf83c280d8d..8ec910115ac 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -9,13 +9,17 @@
#include "wt_internal.h"
static int __log_openfile(
- WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t);
+ WT_SESSION_IMPL *, WT_FH **, const char *, uint32_t, uint32_t);
static int __log_write_internal(
WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t);
#define WT_LOG_COMPRESS_SKIP (offsetof(WT_LOG_RECORD, record))
#define WT_LOG_ENCRYPT_SKIP (offsetof(WT_LOG_RECORD, record))
+/* Flags to __log_openfile */
+#define WT_LOG_OPEN_CREATE_OK 0x01
+#define WT_LOG_OPEN_VERIFY 0x02
+
/*
* __wt_log_ckpt --
* Record the given LSN as the checkpoint LSN and signal the archive
@@ -146,7 +150,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
* file than we want.
*/
WT_ERR(__log_openfile(session,
- false, &log_fh, WT_LOG_FILENAME, min_lsn->l.file));
+ &log_fh, WT_LOG_FILENAME, min_lsn->l.file, 0));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
log_fh->name, min_lsn->l.file, min_lsn->l.offset));
@@ -277,7 +281,8 @@ __log_get_files(WT_SESSION_IMPL *session,
/*
* __wt_log_get_all_files --
* Retrieve the list of log files, either all of them or only the active
- * ones (those that are not candidates for archiving).
+ * ones (those that are not candidates for archiving). The caller is
+ * responsible for freeing the directory list returned.
*/
int
__wt_log_get_all_files(WT_SESSION_IMPL *session,
@@ -307,6 +312,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
for (max = 0, i = 0; i < count; ) {
WT_ERR(__wt_log_extract_lognum(session, files[i], &id));
if (active_only && id < log->ckpt_lsn.l.file) {
+ /*
+ * Any files not being returned are individually freed
+ * and the array adjusted.
+ */
__wt_free(session, files[i]);
files[i] = files[count - 1];
files[--count] = NULL;
@@ -321,6 +330,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
*filesp = files;
*countp = count;
+ /*
+ * Only free on error. The caller is responsible for calling free
+ * once it is done using the returned list.
+ */
if (0) {
err: WT_TRET(__wt_fs_directory_list_free(session, &files, count));
}
@@ -674,7 +687,7 @@ err: __wt_scr_free(session, &buf);
*/
static int
__log_openfile(WT_SESSION_IMPL *session,
- bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id)
+ WT_FH **fhp, const char *file_prefix, uint32_t id, uint32_t flags)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_ITEM(buf);
@@ -683,7 +696,7 @@ __log_openfile(WT_SESSION_IMPL *session,
WT_LOG_DESC *desc;
WT_LOG_RECORD *logrec;
uint32_t allocsize;
- u_int flags;
+ u_int wtopen_flags;
conn = S2C(session);
log = conn->log;
@@ -695,19 +708,19 @@ __log_openfile(WT_SESSION_IMPL *session,
WT_ERR(__log_filename(session, id, file_prefix, buf));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"opening log %s", (const char *)buf->data));
- flags = 0;
- if (ok_create)
- LF_SET(WT_OPEN_CREATE);
+ wtopen_flags = 0;
+ if (LF_ISSET(WT_LOG_OPEN_CREATE_OK))
+ FLD_SET(wtopen_flags, WT_FS_OPEN_CREATE);
if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
- LF_SET(WT_OPEN_DIRECTIO);
+ FLD_SET(wtopen_flags, WT_FS_OPEN_DIRECTIO);
WT_ERR(__wt_open(
- session, buf->data, WT_OPEN_FILE_TYPE_LOG, flags, fhp));
+ session, buf->data, WT_FS_OPEN_FILE_TYPE_LOG, wtopen_flags, fhp));
/*
* If we are not creating the log file but opening it for reading,
* check that the magic number and versions are correct.
*/
- if (!ok_create) {
+ if (LF_ISSET(WT_LOG_OPEN_VERIFY)) {
WT_ERR(__wt_buf_grow(session, buf, allocsize));
memset(buf->mem, 0, allocsize);
WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem));
@@ -773,7 +786,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num)
* All file setup, writing the header and pre-allocation was done
* before. We only need to rename it.
*/
- WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data));
+ WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
err: __wt_scr_free(session, &from_path);
__wt_scr_free(session, &to_path);
@@ -870,7 +883,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
* window where another thread could see a NULL log file handle.
*/
WT_RET(__log_openfile(session,
- false, &log_fh, WT_LOG_FILENAME, log->fileid));
+ &log_fh, WT_LOG_FILENAME, log->fileid, 0));
WT_PUBLISH(log->log_fh, log_fh);
/*
* We need to setup the LSNs. Set the end LSN and alloc LSN to
@@ -978,7 +991,7 @@ __log_truncate(WT_SESSION_IMPL *session,
* Truncate the log file to the given LSN.
*/
WT_ERR(__log_openfile(session,
- false, &log_fh, file_prefix, lsn->l.file));
+ &log_fh, file_prefix, lsn->l.file, 0));
WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset));
WT_ERR(__wt_fsync(session, log_fh, true));
WT_ERR(__wt_close(session, &log_fh));
@@ -995,7 +1008,7 @@ __log_truncate(WT_SESSION_IMPL *session,
if (lognum > lsn->l.file &&
lognum < log->trunc_lsn.l.file) {
WT_ERR(__log_openfile(session,
- false, &log_fh, file_prefix, lognum));
+ &log_fh, file_prefix, lognum, 0));
/*
* If there are intervening files pre-allocated,
* truncate them to the end of the log file header.
@@ -1047,7 +1060,8 @@ __wt_log_allocfile(
/*
* Set up the temporary file.
*/
- WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id));
+ WT_ERR(__log_openfile(session,
+ &log_fh, WT_LOG_TMPNAME, tmp_id, WT_LOG_OPEN_CREATE_OK));
WT_ERR(__log_file_header(session, log_fh, NULL, true));
WT_ERR(__log_prealloc(session, log_fh));
WT_ERR(__wt_fsync(session, log_fh, true));
@@ -1058,7 +1072,7 @@ __wt_log_allocfile(
/*
* Rename it into place and make it available.
*/
- WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data));
+ WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
err: __wt_scr_free(session, &from_path);
__wt_scr_free(session, &to_path);
@@ -1081,7 +1095,7 @@ __wt_log_remove(WT_SESSION_IMPL *session,
WT_ERR(__log_filename(session, lognum, file_prefix, path));
WT_ERR(__wt_verbose(session, WT_VERB_LOG,
"log_remove: remove log %s", (char *)path->data));
- WT_ERR(__wt_fs_remove(session, path->data));
+ WT_ERR(__wt_fs_remove(session, path->data, false));
err: __wt_scr_free(session, &path);
return (ret);
}
@@ -1117,7 +1131,7 @@ __wt_log_open(WT_SESSION_IMPL *session)
WT_RET(__wt_verbose(session, WT_VERB_LOG,
"log_open: open fh to directory %s", conn->log_path));
WT_RET(__wt_open(session, conn->log_path,
- WT_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
+ WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
}
if (!F_ISSET(conn, WT_CONN_READONLY)) {
@@ -1587,8 +1601,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
WT_ERR(
__wt_fs_directory_list_free(session, &logfiles, logcount));
}
- WT_ERR(__log_openfile(
- session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file));
+ WT_ERR(__log_openfile(session,
+ &log_fh, WT_LOG_FILENAME, start_lsn.l.file, WT_LOG_OPEN_VERIFY));
WT_ERR(__wt_filesize(session, log_fh, &log_size));
rd_lsn = start_lsn;
@@ -1637,7 +1651,8 @@ advance:
if (rd_lsn.l.file > end_lsn.l.file)
break;
WT_ERR(__log_openfile(session,
- false, &log_fh, WT_LOG_FILENAME, rd_lsn.l.file));
+ &log_fh, WT_LOG_FILENAME,
+ rd_lsn.l.file, WT_LOG_OPEN_VERIFY));
WT_ERR(__wt_filesize(session, log_fh, &log_size));
eol = false;
continue;
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 78235fb6a92..bedef6a8596 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -205,6 +205,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
WT_RET(__wt_txn_id_check(session));
WT_RET(__clsm_enter_update(clsm));
+ /*
+ * Switching the tree will update the generation before
+ * updating the switch transaction. We test the
+ * transaction in clsm_enter_update. Now test the
+ * disk generation to avoid races.
+ */
if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
goto open;
@@ -219,13 +225,20 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
* transaction ID in each chunk: any transaction ID
* that overlaps with our snapshot is a potential
* conflict.
+ *
+ * Note that the global snap_min is correct here: it
+ * tracks concurrent transactions excluding special
+ * transactions such as checkpoint (which we can't
+ * conflict with because checkpoint only writes the
+ * metadata, which is not an LSM tree).
*/
clsm->nupdates = 1;
if (txn->isolation == WT_ISO_SNAPSHOT &&
F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
WT_ASSERT(session,
F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
- snap_min = txn->snap_min;
+ snap_min =
+ WT_SESSION_TXN_STATE(session)->snap_min;
for (switch_txnp =
&clsm->switch_txn[clsm->nchunks - 2];
clsm->nupdates < clsm->nchunks;
@@ -1521,6 +1534,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
WT_LSM_TREE *lsm_tree;
bool bulk;
+ WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
+
clsm = NULL;
cursor = NULL;
lsm_tree = NULL;
@@ -1566,6 +1581,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
cursor->value_format = lsm_tree->value_format;
clsm->lsm_tree = lsm_tree;
+ lsm_tree = NULL;
/*
* The tree's dsk_gen starts at one, so starting the cursor on zero
@@ -1573,7 +1589,6 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
*/
clsm->dsk_gen = 0;
- WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
if (bulk)
@@ -1585,10 +1600,6 @@ err: if (clsm != NULL)
else if (lsm_tree != NULL)
__wt_lsm_tree_release(session, lsm_tree);
- /*
- * We open bulk cursors after setting the returned cursor.
- * Fix that here.
- */
*cursorp = NULL;
}
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index da106ae2089..2ecfb614eee 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -771,6 +771,11 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
++lsm_tree->dsk_gen;
lsm_tree->modified = true;
+ /*
+ * Ensure the updated disk generation is visible to all other threads
+ * before updating the transaction ID.
+ */
+ WT_FULL_BARRIER();
/*
* Set the switch transaction in the previous chunk unless this is
@@ -1187,8 +1192,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
*/
if (lsm_tree->nchunks > 0 &&
(chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) {
- if (chunk->switch_txn == WT_TXN_NONE)
+ if (chunk->switch_txn == WT_TXN_NONE) {
+ /*
+ * Make sure any cursors open on the tree see the
+ * new switch generation before updating.
+ */
+ ++lsm_tree->dsk_gen;
+ WT_FULL_BARRIER();
chunk->switch_txn = __wt_txn_id_alloc(session, false);
+ }
/*
* If we have a chunk, we want to look for it to be on-disk.
* So we need to add a reference to keep it available.
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index c19f42327be..0f2a407c70d 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -526,7 +526,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
ret = __wt_schema_drop(session, uri, drop_cfg));
if (ret == 0)
- ret = __wt_fs_remove(session, uri + strlen("file:"));
+ ret = __wt_fs_remove(session, uri + strlen("file:"), false);
WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri));
if (ret == EBUSY || ret == ENOENT)
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index 38a2edd7219..d39df163daf 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -68,6 +68,9 @@ __wt_metadata_cursor_open(
if (F_ISSET(btree, WT_BTREE_NO_LOGGING))
F_CLR(btree, WT_BTREE_NO_LOGGING);
+ /* The metadata file always uses checkpoint IDs in visibility checks. */
+ btree->include_checkpoint_txn = true;
+
return (0);
}
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index eb06b2bed66..3d8b7c46500 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -141,7 +141,8 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
ret = bm->checkpoint_resolve(bm, session));
break;
case WT_ST_DROP_COMMIT:
- if ((ret = __wt_block_manager_drop(session, trk->a)) != 0)
+ if ((ret =
+ __wt_block_manager_drop(session, trk->a, false)) != 0)
__wt_err(session, ret,
"metadata remove dropped file %s", trk->a);
break;
@@ -188,13 +189,15 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
* For removes, b is NULL.
*/
if (trk->a != NULL && trk->b != NULL &&
- (ret = __wt_rename_and_sync_directory(session,
- trk->b + strlen("file:"), trk->a + strlen("file:"))) != 0)
+ (ret = __wt_fs_rename(session,
+ trk->b + strlen("file:"), trk->a + strlen("file:"),
+ true)) != 0)
__wt_err(session, ret,
"metadata unroll rename %s to %s", trk->b, trk->a);
- if (trk->a == NULL && (ret =
- __wt_fs_remove(session, trk->b + strlen("file:"))) != 0)
+ if (trk->a == NULL &&
+ (ret = __wt_fs_remove(session,
+ trk->b + strlen("file:"), false)) != 0)
__wt_err(session, ret,
"metadata unroll create %s", trk->b);
diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c
index 4d2b359bbed..ace0fabab48 100644
--- a/src/meta/meta_turtle.c
+++ b/src/meta/meta_turtle.c
@@ -158,7 +158,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
* Discard any turtle setup file left-over from previous runs. This
* doesn't matter for correctness, it's just cleaning up random files.
*/
- WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET));
+ WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
/*
* We could die after creating the turtle file and before creating the
@@ -197,9 +197,10 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
"Both %s and %s exist; recreating metadata from "
"backup",
WT_METADATA_TURTLE, WT_METADATA_BACKUP));
- WT_RET(__wt_remove_if_exists(session, WT_METAFILE));
+ WT_RET(
+ __wt_remove_if_exists(session, WT_METAFILE, false));
WT_RET(__wt_remove_if_exists(
- session, WT_METADATA_TURTLE));
+ session, WT_METADATA_TURTLE, false));
load = true;
}
} else
@@ -305,7 +306,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
* every time.
*/
WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET,
- WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
+ WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
version = wiredtiger_version(&vmajor, &vminor, &vpatch);
WT_ERR(__wt_fprintf(session, fs,
@@ -320,7 +321,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
/* Close any file handle left open, remove any temporary file. */
err: WT_TRET(__wt_fclose(session, &fs));
- WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET));
+ WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
return (ret);
}
diff --git a/src/os_common/filename.c b/src/os_common/filename.c
index 5f174288350..8b6c1269829 100644
--- a/src/os_common/filename.c
+++ b/src/os_common/filename.c
@@ -56,55 +56,17 @@ __wt_nfilename(
* Remove a file if it exists.
*/
int
-__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name)
+__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable)
{
bool exist;
WT_RET(__wt_fs_exist(session, name, &exist));
if (exist)
- WT_RET(__wt_fs_remove(session, name));
+ WT_RET(__wt_fs_remove(session, name, durable));
return (0);
}
/*
- * __wt_rename_and_sync_directory --
- * Rename a file and sync the enclosing directory.
- */
-int
-__wt_rename_and_sync_directory(
- WT_SESSION_IMPL *session, const char *from, const char *to)
-{
- const char *fp, *tp;
- bool same_directory;
-
- /* Rename the source file to the target. */
- WT_RET(__wt_fs_rename(session, from, to));
-
- /*
- * Flush the backing directory to guarantee the rename. My reading of
- * POSIX 1003.1 is there's no guarantee flushing only one of the from
- * or to directories, or flushing a common parent, is sufficient, and
- * even if POSIX were to make that guarantee, existing filesystems are
- * known to not provide the guarantee or only provide the guarantee
- * with specific mount options. Flush both of the from/to directories
- * until it's a performance problem.
- */
- WT_RET(__wt_fs_directory_sync(session, from));
-
- /*
- * In almost all cases, we're going to be renaming files in the same
- * directory, we can at least fast-path that.
- */
- fp = strrchr(from, '/');
- tp = strrchr(to, '/');
- same_directory = (fp == NULL && tp == NULL) ||
- (fp != NULL && tp != NULL &&
- fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
-
- return (same_directory ? 0 : __wt_fs_directory_sync(session, to));
-}
-
-/*
* __wt_copy_and_sync --
* Copy a file safely; here to support the wt utility.
*/
@@ -134,13 +96,13 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
WT_ERR(__wt_scr_alloc(session, 0, &tmp));
WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to));
- WT_ERR(__wt_remove_if_exists(session, to));
- WT_ERR(__wt_remove_if_exists(session, tmp->data));
+ WT_ERR(__wt_remove_if_exists(session, to, false));
+ WT_ERR(__wt_remove_if_exists(session, tmp->data, false));
/* Open the from and temporary file handles. */
- WT_ERR(__wt_open(session, from, WT_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
- WT_ERR(__wt_open(session, tmp->data, WT_OPEN_FILE_TYPE_REGULAR,
- WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh));
+ WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
+ WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
+ WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh));
/*
* Allocate a copy buffer. Don't use a scratch buffer, this thing is
@@ -162,7 +124,7 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
WT_ERR(__wt_fsync(session, tfh, true));
WT_ERR(__wt_close(session, &tfh));
- ret = __wt_rename_and_sync_directory(session, tmp->data, to);
+ ret = __wt_fs_rename(session, tmp->data, to, true);
err: WT_TRET(__wt_close(session, &ffh));
WT_TRET(__wt_close(session, &tfh));
diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c
index 81e4cc14ccb..184a9df0e72 100644
--- a/src/os_common/os_fhandle.c
+++ b/src/os_common/os_fhandle.c
@@ -150,19 +150,19 @@ __open_verbose(
*/
switch (file_type) {
- case WT_OPEN_FILE_TYPE_CHECKPOINT:
+ case WT_FS_OPEN_FILE_TYPE_CHECKPOINT:
file_type_tag = "checkpoint";
break;
- case WT_OPEN_FILE_TYPE_DATA:
+ case WT_FS_OPEN_FILE_TYPE_DATA:
file_type_tag = "data";
break;
- case WT_OPEN_FILE_TYPE_DIRECTORY:
+ case WT_FS_OPEN_FILE_TYPE_DIRECTORY:
file_type_tag = "directory";
break;
- case WT_OPEN_FILE_TYPE_LOG:
+ case WT_FS_OPEN_FILE_TYPE_LOG:
file_type_tag = "log";
break;
- case WT_OPEN_FILE_TYPE_REGULAR:
+ case WT_FS_OPEN_FILE_TYPE_REGULAR:
file_type_tag = "regular";
break;
default:
@@ -172,18 +172,18 @@ __open_verbose(
WT_RET(__wt_scr_alloc(session, 0, &tmp));
sep = " (";
-#define WT_OPEN_VERBOSE_FLAG(f, name) \
+#define WT_FS_OPEN_VERBOSE_FLAG(f, name) \
if (LF_ISSET(f)) { \
WT_ERR(__wt_buf_catfmt( \
session, tmp, "%s%s", sep, name)); \
sep = ", "; \
}
- WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create");
- WT_OPEN_VERBOSE_FLAG(WT_OPEN_DIRECTIO, "direct-IO");
- WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive");
- WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed");
- WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed");
+ WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly");
if (tmp->size != 0)
WT_ERR(__wt_buf_catfmt(session, tmp, ")"));
@@ -209,7 +209,7 @@ err: __wt_scr_free(session, &tmp);
*/
int
__wt_open(WT_SESSION_IMPL *session,
- const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp)
+ const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp)
{
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
@@ -247,12 +247,12 @@ __wt_open(WT_SESSION_IMPL *session,
if (F_ISSET(conn, WT_CONN_READONLY)) {
lock_file = strcmp(name, WT_SINGLETHREAD) == 0;
if (!lock_file)
- LF_SET(WT_OPEN_READONLY);
- WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE));
+ LF_SET(WT_FS_OPEN_READONLY);
+ WT_ASSERT(session, lock_file || !LF_ISSET(WT_FS_OPEN_CREATE));
}
/* Create the path to the file. */
- if (!LF_ISSET(WT_OPEN_FIXED))
+ if (!LF_ISSET(WT_FS_OPEN_FIXED))
WT_ERR(__wt_filename(session, name, &path));
/* Call the underlying open function. */
@@ -261,7 +261,7 @@ __wt_open(WT_SESSION_IMPL *session,
open_called = true;
WT_ERR(__fhandle_method_finalize(
- session, fh->handle, LF_ISSET(WT_OPEN_READONLY)));
+ session, fh->handle, LF_ISSET(WT_FS_OPEN_READONLY)));
/*
* Repeat the check for a match: if there's no match, link our newly
diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c
index 09c2e08db83..178adc1dac8 100644
--- a/src/os_common/os_fs_inmemory.c
+++ b/src/os_common/os_fs_inmemory.c
@@ -188,14 +188,16 @@ __im_fs_exist(WT_FILE_SYSTEM *file_system,
* POSIX remove.
*/
static int
-__im_fs_remove(
- WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
+__im_fs_remove(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, uint32_t flags)
{
WT_DECL_RET;
WT_FILE_HANDLE_INMEM *im_fh;
WT_FILE_SYSTEM_INMEM *im_fs;
WT_SESSION_IMPL *session;
+ WT_UNUSED(flags);
+
im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
session = (WT_SESSION_IMPL *)wt_session;
@@ -215,7 +217,7 @@ __im_fs_remove(
*/
static int
__im_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to)
+ WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
{
WT_DECL_RET;
WT_FILE_HANDLE_INMEM *im_fh;
@@ -224,6 +226,8 @@ __im_fs_rename(WT_FILE_SYSTEM *file_system,
uint64_t bucket;
char *copy;
+ WT_UNUSED(flags);
+
im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
session = (WT_SESSION_IMPL *)wt_session;
@@ -463,7 +467,7 @@ err: __wt_spin_unlock(session, &im_fs->lock);
*/
static int
__im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
WT_FILE_HANDLE **file_handlep)
{
WT_DECL_RET;
diff --git a/src/os_common/os_fstream.c b/src/os_common/os_fstream.c
index 0b199529e19..5a368ea75e6 100644
--- a/src/os_common/os_fstream.c
+++ b/src/os_common/os_fstream.c
@@ -187,7 +187,7 @@ __wt_fopen(WT_SESSION_IMPL *session,
fstr = NULL;
WT_RET(__wt_open(
- session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
+ session, name, WT_FS_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
WT_ERR(__wt_calloc_one(session, &fstr));
fstr->fh = fh;
diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c
index 86fa2e8f117..11f38ec063b 100644
--- a/src/os_posix/os_fs.c
+++ b/src/os_posix/os_fs.c
@@ -30,7 +30,7 @@
/*
* __posix_sync --
- * Underlying support function to flush a file handle.
+ * Underlying support function to flush a file descriptor.
*/
static int
__posix_sync(
@@ -77,33 +77,42 @@ __posix_sync(
#ifdef __linux__
/*
* __posix_directory_sync --
- * Flush a directory to ensure file creation is durable.
+ * Flush a directory to ensure file creation, remove or rename is durable.
*/
static int
-__posix_directory_sync(
- WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *path)
+__posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
{
+ WT_DECL_ITEM(tmp);
WT_DECL_RET;
- WT_SESSION_IMPL *session;
int fd, tret;
+ char *dir;
- WT_UNUSED(file_system);
+ WT_RET(__wt_scr_alloc(session, 0, &tmp));
+ WT_ERR(__wt_buf_setstr(session, tmp, path));
- session = (WT_SESSION_IMPL *)wt_session;
+ /*
+ * This layer should never see a path that doesn't include a trailing
+ * path separator, this code asserts that fact.
+ */
+ dir = tmp->mem;
+ strrchr(dir, '/')[1] = '\0';
+ fd = -1; /* -Wconditional-uninitialized */
WT_SYSCALL_RETRY((
- (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
+ (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
if (ret != 0)
- WT_RET_MSG(session, ret, "%s: directory-sync: open", path);
+ WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
- ret = __posix_sync(session, fd, path, "directory-sync");
+ ret = __posix_sync(session, fd, dir, "directory-sync");
WT_SYSCALL(close(fd), tret);
if (tret != 0) {
- __wt_err(session, tret, "%s: directory-sync: close", path);
+ __wt_err(session, tret, "%s: directory-sync: close", dir);
if (ret == 0)
ret = tret;
}
+
+err: __wt_scr_free(session, &tmp);
return (ret);
}
#endif
@@ -141,8 +150,8 @@ __posix_fs_exist(WT_FILE_SYSTEM *file_system,
* Remove a file.
*/
static int
-__posix_fs_remove(
- WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
+__posix_fs_remove(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, uint32_t flags)
{
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -159,9 +168,17 @@ __posix_fs_remove(
* using unlink may be marginally safer.
*/
WT_SYSCALL(unlink(name), ret);
- if (ret == 0)
+ if (ret != 0)
+ WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
+
+ if (!LF_ISSET(WT_FS_DURABLE))
return (0);
- WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
+
+#ifdef __linux__
+ /* Flush the backing directory to guarantee the remove. */
+ WT_RET (__posix_directory_sync(session, name));
+#endif
+ return (0);
}
/*
@@ -170,7 +187,7 @@ __posix_fs_remove(
*/
static int
__posix_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to)
+ WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
{
WT_DECL_RET;
WT_SESSION_IMPL *session;
@@ -187,9 +204,43 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system,
* return (if errno is 0), but we've done the best we can.
*/
WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
- if (ret == 0)
+ if (ret != 0)
+ WT_RET_MSG(
+ session, ret, "%s to %s: file-rename: rename", from, to);
+
+ if (!LF_ISSET(WT_FS_DURABLE))
return (0);
- WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to);
+#ifdef __linux__
+ /*
+ * Flush the backing directory to guarantee the rename. My reading of
+ * POSIX 1003.1 is there's no guarantee flushing only one of the from
+ * or to directories, or flushing a common parent, is sufficient, and
+ * even if POSIX were to make that guarantee, existing filesystems are
+ * known to not provide the guarantee or only provide the guarantee
+ * with specific mount options. Flush both of the from/to directories
+ * until it's a performance problem.
+ */
+ WT_RET(__posix_directory_sync(session, from));
+
+ /*
+ * In almost all cases, we're going to be renaming files in the same
+ * directory, we can at least fast-path that.
+ */
+ {
+ bool same_directory;
+ const char *fp, *tp;
+
+ fp = strrchr(from, '/');
+ tp = strrchr(to, '/');
+ same_directory = (fp == NULL && tp == NULL) ||
+ (fp != NULL && tp != NULL &&
+ fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
+
+ if (!same_directory)
+ WT_RET(__posix_directory_sync(session, to));
+ }
+#endif
+ return (0);
}
/*
@@ -513,7 +564,7 @@ __posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
*/
static int
__posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
WT_FILE_HANDLE **file_handlep)
{
WT_CONNECTION_IMPL *conn;
@@ -536,7 +587,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
/* Set up error handling. */
pfh->fd = -1;
- if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) {
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) {
f = O_RDONLY;
#ifdef O_CLOEXEC
/*
@@ -554,10 +605,10 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
goto directory_open;
}
- f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR;
- if (LF_ISSET(WT_OPEN_CREATE)) {
+ f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR;
+ if (LF_ISSET(WT_FS_OPEN_CREATE)) {
f |= O_CREAT;
- if (LF_ISSET(WT_OPEN_EXCLUSIVE))
+ if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
f |= O_EXCL;
mode = 0666;
} else
@@ -577,7 +628,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
#endif
#ifdef O_DIRECT
/* Direct I/O. */
- if (LF_ISSET(WT_OPEN_DIRECTIO)) {
+ if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
f |= O_DIRECT;
pfh->direct_io = true;
} else
@@ -585,11 +636,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
#endif
#ifdef O_NOATIME
/* Avoid updating metadata for read-only workloads. */
- if (file_type == WT_OPEN_FILE_TYPE_DATA)
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
f |= O_NOATIME;
#endif
- if (file_type == WT_OPEN_FILE_TYPE_LOG &&
+ if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
#ifdef O_DSYNC
f |= O_DSYNC;
@@ -601,6 +652,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
#endif
}
+ /* Create/Open the file. */
WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
if (ret != 0)
WT_ERR_MSG(session, ret,
@@ -608,6 +660,16 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
"%s: handle-open: open: failed with direct I/O configured, "
"some filesystem types do not support direct I/O" :
"%s: handle-open: open", name);
+
+#ifdef __linux__
+ /*
+ * Durability: some filesystems require a directory sync to be confident
+ * the file will appear.
+ */
+ if (LF_ISSET(WT_FS_OPEN_DURABLE))
+ WT_ERR(__posix_directory_sync(session, name));
+#endif
+
WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
#if defined(HAVE_POSIX_FADVISE)
@@ -616,7 +678,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
* Ignore fadvise when doing direct I/O, the kernel cache isn't
* interesting.
*/
- if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) {
+ if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA) {
WT_SYSCALL(
posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret);
if (ret != 0)
@@ -705,9 +767,6 @@ __wt_os_posix(WT_SESSION_IMPL *session)
/* Initialize the POSIX jump table. */
file_system->fs_directory_list = __wt_posix_directory_list;
file_system->fs_directory_list_free = __wt_posix_directory_list_free;
-#ifdef __linux__
- file_system->fs_directory_sync = __posix_directory_sync;
-#endif
file_system->fs_exist = __posix_fs_exist;
file_system->fs_open_file = __posix_open_file;
file_system->fs_remove = __posix_fs_remove;
diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c
index 5daba124e90..fc03e0a2595 100644
--- a/src/os_win/os_fs.c
+++ b/src/os_win/os_fs.c
@@ -36,13 +36,14 @@ __win_fs_exist(WT_FILE_SYSTEM *file_system,
* Remove a file.
*/
static int
-__win_fs_remove(
- WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
+__win_fs_remove(WT_FILE_SYSTEM *file_system,
+ WT_SESSION *wt_session, const char *name, uint32_t flags)
{
DWORD windows_error;
WT_SESSION_IMPL *session;
WT_UNUSED(file_system);
+ WT_UNUSED(flags);
session = (WT_SESSION_IMPL *)wt_session;
@@ -62,12 +63,13 @@ __win_fs_remove(
*/
static int
__win_fs_rename(WT_FILE_SYSTEM *file_system,
- WT_SESSION *wt_session, const char *from, const char *to)
+ WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
{
DWORD windows_error;
WT_SESSION_IMPL *session;
WT_UNUSED(file_system);
+ WT_UNUSED(flags);
session = (WT_SESSION_IMPL *)wt_session;
@@ -426,7 +428,7 @@ __win_file_write(WT_FILE_HANDLE *file_handle,
*/
static int
__win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
- const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+ const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
WT_FILE_HANDLE **file_handlep)
{
DWORD dwCreationDisposition, windows_error;
@@ -458,11 +460,11 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
* require that functionality: create an empty WT_FH structure with
* invalid handles.
*/
- if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY)
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
goto directory_open;
desired_access = GENERIC_READ;
- if (!LF_ISSET(WT_OPEN_READONLY))
+ if (!LF_ISSET(WT_FS_OPEN_READONLY))
desired_access |= GENERIC_WRITE;
/*
@@ -476,15 +478,15 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
f = FILE_ATTRIBUTE_NORMAL;
dwCreationDisposition = 0;
- if (LF_ISSET(WT_OPEN_CREATE)) {
+ if (LF_ISSET(WT_FS_OPEN_CREATE)) {
dwCreationDisposition = CREATE_NEW;
- if (LF_ISSET(WT_OPEN_EXCLUSIVE))
+ if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
dwCreationDisposition = CREATE_ALWAYS;
} else
dwCreationDisposition = OPEN_EXISTING;
/* Direct I/O. */
- if (LF_ISSET(WT_OPEN_DIRECTIO)) {
+ if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
f |= FILE_FLAG_NO_BUFFERING;
win_fh->direct_io = true;
}
@@ -493,19 +495,19 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
if (FLD_ISSET(conn->write_through, file_type))
f |= FILE_FLAG_WRITE_THROUGH;
- if (file_type == WT_OPEN_FILE_TYPE_LOG &&
+ if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
f |= FILE_FLAG_WRITE_THROUGH;
/* Disable read-ahead on trees: it slows down random read workloads. */
- if (file_type == WT_OPEN_FILE_TYPE_DATA)
+ if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
f |= FILE_FLAG_RANDOM_ACCESS;
win_fh->filehandle = CreateFileA(name, desired_access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, dwCreationDisposition, f, NULL);
if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
- if (LF_ISSET(WT_OPEN_CREATE) &&
+ if (LF_ISSET(WT_FS_OPEN_CREATE) &&
GetLastError() == ERROR_FILE_EXISTS)
win_fh->filehandle = CreateFileA(name, desired_access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -528,7 +530,7 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
* concurrently with reads on the file. Writes would also move the file
* pointer.
*/
- if (!LF_ISSET(WT_OPEN_READONLY)) {
+ if (!LF_ISSET(WT_FS_OPEN_READONLY)) {
win_fh->filehandle_secondary = CreateFileA(name, desired_access,
FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL, OPEN_EXISTING, f, NULL);
diff --git a/src/os_win/os_path.c b/src/os_win/os_path.c
index 220752ce7a1..74050600417 100644
--- a/src/os_win/os_path.c
+++ b/src/os_win/os_path.c
@@ -16,8 +16,30 @@ bool
__wt_absolute_path(const char *path)
{
/*
- * Check for a drive name (for example, "D:"), allow both forward and
- * backward slashes.
+ * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247
+ *
+ * For Windows API functions that manipulate files, file names can often
+ * be relative to the current directory, while some APIs require a fully
+ * qualified path. A file name is relative to the current directory if
+ * it does not begin with one of the following:
+ *
+ * -- A UNC name of any format, which always start with two backslash
+ * characters ("\\").
+ * -- A disk designator with a backslash, for example "C:\" or "d:\".
+ * -- A single backslash, for example, "\directory" or "\file.txt". This
+ * is also referred to as an absolute path.
+ *
+ * If a file name begins with only a disk designator but not the
+ * backslash after the colon, it is interpreted as a relative path to
+ * the current directory on the drive with the specified letter. Note
+ * that the current directory may or may not be the root directory
+ * depending on what it was set to during the most recent "change
+ * directory" operation on that disk.
+ *
+ * -- "C:tmp.txt" refers to a file named "tmp.txt" in the current
+ * directory on drive C.
+ * -- "C:tempdir\tmp.txt" refers to a file in a subdirectory to the
+ * current directory on drive C.
*/
if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':')
path += 2;
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index b49946bb10e..b96b34594b0 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -115,6 +115,7 @@ typedef struct {
*/
uint32_t page_size; /* Set page size */
uint32_t page_size_orig; /* Saved set page size */
+ uint32_t max_raw_page_size; /* Max page size with raw compression */
/*
* Second, the split size: if we're doing the page layout, split to a
@@ -159,9 +160,16 @@ typedef struct {
WT_ADDR addr; /* Split's written location */
uint32_t size; /* Split's size */
uint32_t cksum; /* Split's checksum */
+
void *disk_image; /* Split's disk image */
/*
+ * Raw compression, the disk image being written is already
+ * compressed.
+ */
+ bool already_compressed;
+
+ /*
* Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and
* WT_EVICT_LOOKASIDE configurations.
*/
@@ -175,13 +183,6 @@ typedef struct {
* column-store key.
*/
WT_ITEM key; /* Promoted row-store key */
-
- /*
- * During wrapup, after reconciling the root page, we write a
- * final block as part of a checkpoint. If raw compression
- * was configured, that block may have already been compressed.
- */
- bool already_compressed;
} *bnd; /* Saved boundaries */
uint32_t bnd_next; /* Next boundary slot */
uint32_t bnd_next_max; /* Maximum boundary slots used */
@@ -445,17 +446,32 @@ __wt_reconcile(WT_SESSION_IMPL *session,
}
/*
- * Clean up reconciliation resources: some workloads have millions of
- * boundary structures, and if associated with an application session
- * pulled into doing forced eviction, they won't be discarded for the
- * life of the session (or until session.reset is called). Discard all
- * of the reconciliation resources if an application thread, not doing
- * a checkpoint.
- */
- __rec_bnd_cleanup(session, r,
- F_ISSET(session, WT_SESSION_INTERNAL) ||
- WT_SESSION_IS_CHECKPOINT(session) ? false : true);
+ * When application threads perform eviction, don't cache block manager
+ * or reconciliation structures (even across calls), we can have a
+ * significant number of application threads doing eviction at the same
+ * time with large items. We ignore checkpoints, once the checkpoint
+ * completes, all unnecessary session resources will be discarded.
+ *
+ * Even in application threads doing checkpoints or in internal threads
+ * doing any reconciliation, clean up reconciliation resources. Some
+ * workloads have millions of boundary structures in a reconciliation
+ * and we don't want to tie that memory down, even across calls.
+ */
+ if (WT_SESSION_IS_CHECKPOINT(session) ||
+ F_ISSET(session, WT_SESSION_INTERNAL))
+ __rec_bnd_cleanup(session, r, false);
+ else {
+ /*
+ * Clean up the underlying block manager memory too: it's not
+ * reconciliation, but threads discarding reconciliation
+ * structures want to clean up the block manager's structures
+ * as well, and there's no obvious place to do that.
+ */
+ if (session->block_manager_cleanup != NULL)
+ WT_TRET(session->block_manager_cleanup(session));
+ WT_TRET(__rec_destroy_session(session));
+ }
WT_RET(ret);
/*
@@ -652,7 +668,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
WT_ASSERT(session, mod->mod_multi[i].supd == NULL);
WT_ERR(__wt_multi_to_ref(session,
- next, &mod->mod_multi[i], &pindex->index[i], NULL));
+ next, &mod->mod_multi[i], &pindex->index[i], NULL, false));
pindex->index[i]->home = next;
}
@@ -1135,8 +1151,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
*/
if (!skipped &&
(F_ISSET(btree, WT_BTREE_LOOKASIDE) ||
- __wt_txn_visible_all(session, max_txn)))
+ __wt_txn_visible_all(session, max_txn))) {
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * The checkpoint transaction is special. Make sure we never
+ * write (metadata) updates from a checkpoint in a concurrent
+ * session.
+ */
+ txnid = *updp == NULL ? WT_TXN_NONE : (*updp)->txnid;
+ WT_ASSERT(session, txnid == WT_TXN_NONE ||
+ txnid != S2C(session)->txn_global.checkpoint_txnid ||
+ WT_SESSION_IS_CHECKPOINT(session));
+#endif
return (0);
+ }
/*
* In some cases, there had better not be skipped updates or updates not
@@ -1845,18 +1873,19 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd)
WT_CLEAR(bnd->addr);
bnd->size = 0;
bnd->cksum = 0;
+
__wt_free(session, bnd->disk_image);
__wt_free(session, bnd->supd);
bnd->supd_next = 0;
bnd->supd_allocated = 0;
+ bnd->already_compressed = false;
+
/*
* Don't touch the key, we re-use that memory in each new
* reconciliation.
*/
-
- bnd->already_compressed = false;
}
/*
@@ -1950,10 +1979,19 @@ __rec_split_init(WT_SESSION_IMPL *session,
* additional data because we don't know how well it will compress, and
* we don't want to increment our way up to the amount of data needed by
* the application to successfully compress to the target page size.
+ * Ideally accumulate data several times the page size without
+ * approaching the memory page maximum, but at least have data worth
+ * one page.
+ *
+ * There are cases when we grow the page size to accommodate large
+ * records, in those cases we split the pages once they have crossed
+ * the maximum size for a page with raw compression.
*/
r->page_size = r->page_size_orig = max;
if (r->raw_compression)
- r->page_size *= 10;
+ r->max_raw_page_size = r->page_size =
+ (uint32_t)WT_MIN(r->page_size * 10,
+ WT_MAX(r->page_size, btree->maxmempage / 2));
/*
* Ensure the disk image buffer is large enough for the max object, as
@@ -2295,7 +2333,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
/* Hitting a page boundary resets the dictionary, in all cases. */
__rec_dictionary_reset(r);
- inuse = WT_PTRDIFF32(r->first_free, dsk);
+ inuse = WT_PTRDIFF(r->first_free, dsk);
switch (r->bnd_state) {
case SPLIT_BOUNDARY:
/*
@@ -2465,7 +2503,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
WT_COMPRESSOR *compressor;
WT_DECL_RET;
WT_ITEM *dst, *write_ref;
- WT_PAGE_HEADER *dsk, *dsk_dst;
+ WT_PAGE_HEADER *dsk, *dsk_dst, *disk_image;
WT_SESSION *wt_session;
size_t corrected_page_size, extra_skip, len, result_len;
uint64_t recno;
@@ -2582,11 +2620,9 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
/*
* Don't create an image so large that any future update will
- * cause a split in memory. Use half of the maximum size so
- * we split very compressible pages that have reached the
- * maximum size in memory into two equal blocks.
+ * cause a split in memory.
*/
- if (len > (size_t)btree->maxmempage / 2)
+ if (max_image_slot == 0 && len > (size_t)r->max_raw_page_size)
max_image_slot = slots;
}
@@ -2648,7 +2684,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
r->page_size_orig, btree->split_pct,
WT_BLOCK_COMPRESS_SKIP + extra_skip,
(uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets,
- no_more_rows || max_image_slot == 0 ? slots : max_image_slot,
+ max_image_slot == 0 ? slots : max_image_slot,
(uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP,
result_len,
no_more_rows || max_image_slot != 0,
@@ -2751,7 +2787,8 @@ no_slots:
if (result_slots != 0) {
/*
- * We have a block, finalize the header information.
+ * We have a block, finalize the compressed disk image's header
+ * information.
*/
dst->size = result_len + WT_BLOCK_COMPRESS_SKIP;
dsk_dst = dst->mem;
@@ -2761,6 +2798,26 @@ no_slots:
dsk_dst->u.entries = r->raw_entries[result_slots - 1];
/*
+ * Optionally keep the disk image in cache. Update the initial
+ * page-header fields to reflect the actual data being written.
+ *
+ * If updates are saved and need to be restored, we have to keep
+ * a copy of the disk image. Unfortunately, we don't yet know if
+ * there are updates to restore for the key range covered by the
+ * disk image just created. If there are any saved updates, take
+ * a copy of the disk image, it's freed later if not needed.
+ */
+ if (F_ISSET(r, WT_EVICT_SCRUB) ||
+ (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->supd_next > 0)) {
+ WT_RET(__wt_strndup(session, dsk,
+ dsk_dst->mem_size, &last->disk_image));
+ disk_image = last->disk_image;
+ disk_image->recno = last->recno;
+ disk_image->mem_size = dsk_dst->mem_size;
+ disk_image->u.entries = dsk_dst->u.entries;
+ }
+
+ /*
* There is likely a remnant in the working buffer that didn't
* get compressed; copy it down to the start of the buffer and
* update the starting record number, free space and so on.
@@ -2874,48 +2931,6 @@ split_grow: /*
}
/*
- * __rec_raw_decompress --
- * Decompress a raw-compressed image.
- */
-static int
-__rec_raw_decompress(
- WT_SESSION_IMPL *session, const void *image, size_t size, void *retp)
-{
- WT_BTREE *btree;
- WT_DECL_ITEM(tmp);
- WT_DECL_RET;
- WT_PAGE_HEADER const *dsk;
- size_t result_len;
-
- btree = S2BT(session);
- dsk = image;
-
- /*
- * We skipped an update and we can't write a block, but unfortunately,
- * the block has already been compressed. Decompress the block so we
- * can subsequently re-instantiate it in memory.
- */
- WT_RET(__wt_scr_alloc(session, dsk->mem_size, &tmp));
- memcpy(tmp->mem, image, WT_BLOCK_COMPRESS_SKIP);
- WT_ERR(btree->compressor->decompress(btree->compressor,
- &session->iface,
- (uint8_t *)image + WT_BLOCK_COMPRESS_SKIP,
- size - WT_BLOCK_COMPRESS_SKIP,
- (uint8_t *)tmp->mem + WT_BLOCK_COMPRESS_SKIP,
- dsk->mem_size - WT_BLOCK_COMPRESS_SKIP,
- &result_len));
- if (result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP)
- WT_ERR(__wt_illegal_value(session, btree->dhandle->name));
-
- WT_ERR(__wt_strndup(session, tmp->data, dsk->mem_size, retp));
- WT_ASSERT(session, __wt_verify_dsk_image(session,
- "[raw evict split]", tmp->data, dsk->mem_size, false) == 0);
-
-err: __wt_scr_free(session, &tmp);
- return (ret);
-}
-
-/*
* __rec_split_raw --
* Raw compression split routine.
*/
@@ -3022,7 +3037,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
if (r->raw_compression && r->entries != 0) {
while (r->entries != 0) {
data_size =
- WT_PTRDIFF32(r->first_free, r->disk_image.mem);
+ WT_PTRDIFF(r->first_free, r->disk_image.mem);
if (data_size <= btree->allocsize)
break;
WT_RET(__rec_split_raw_worker(session, r, 0, true));
@@ -3145,14 +3160,13 @@ __rec_split_write(WT_SESSION_IMPL *session,
uint32_t bnd_slot, i, j;
int cmp;
uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+ bool need_image;
btree = S2BT(session);
dsk = buf->mem;
page = r->page;
mod = page->modify;
- WT_RET(__wt_scr_alloc(session, 0, &key));
-
/* Set the zero-length value flag in the page header. */
if (dsk->type == WT_PAGE_ROW_LEAF) {
F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE);
@@ -3163,6 +3177,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
F_SET(dsk, WT_PAGE_EMPTY_V_NONE);
}
+ bnd->entries = r->entries;
+
/* Initialize the address (set the page type for the parent). */
switch (dsk->type) {
case WT_PAGE_COL_FIX:
@@ -3176,9 +3192,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
case WT_PAGE_ROW_INT:
bnd->addr.type = WT_ADDR_INT;
break;
- WT_ILLEGAL_VALUE_ERR(session);
+ WT_ILLEGAL_VALUE(session);
}
-
bnd->size = (uint32_t)buf->size;
bnd->cksum = 0;
@@ -3190,6 +3205,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
* This code requires a key be filled in for the next block (or the
* last block flag be set, if there's no next block).
*/
+ if (page->type == WT_PAGE_ROW_LEAF)
+ WT_RET(__wt_scr_alloc(session, 0, &key));
for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) {
/* The last block gets all remaining saved updates. */
if (last_block) {
@@ -3254,33 +3271,11 @@ supd_check_complete:
* image, we can't actually write it. Instead, we will re-instantiate
* the page using the disk image and any list of updates we skipped.
*/
- if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
- (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) {
-
- /* Statistics tracking that we used update/restore. */
- if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)
- r->cache_write_restore = true;
-
- /*
- * If the buffer is compressed (raw compression was configured),
- * we have to decompress it so we can instantiate it later. It's
- * a slow and convoluted path, but it's also a rare one and it's
- * not worth making it faster. Else, the disk image is ready,
- * copy it into place for later. It's possible the disk image
- * has no items; we have to flag that for verification, it's a
- * special case since read/writing empty pages isn't generally
- * allowed.
- */
- if (bnd->already_compressed)
- WT_ERR(__rec_raw_decompress(
- session, buf->data, buf->size, &bnd->disk_image));
- else {
- WT_ERR(__wt_strndup(
- session, buf->data, buf->size, &bnd->disk_image));
- WT_ASSERT(session, __wt_verify_dsk_image(session,
- "[evict split]", buf->data, buf->size, true) == 0);
- }
- goto done;
+ if (F_ISSET(r, WT_EVICT_IN_MEMORY))
+ goto copy_image;
+ if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) {
+ r->cache_write_restore = true;
+ goto copy_image;
}
/*
@@ -3324,13 +3319,11 @@ supd_check_complete:
bnd->addr = multi->addr;
WT_STAT_FAST_DATA_INCR(session, rec_page_match);
- goto done;
+ goto copy_image;
}
}
}
- bnd->entries = r->entries;
-
#ifdef HAVE_VERBOSE
/* Output a verbose message if we create a page without many entries */
if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6)
@@ -3343,8 +3336,8 @@ supd_check_complete:
r->bnd_state));
#endif
- WT_ERR(__wt_bt_write(session,
- buf, addr, &addr_size, false, bnd->already_compressed));
+ WT_ERR(__wt_bt_write(session, buf, addr, &addr_size,
+ false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed));
WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr));
bnd->addr.size = (uint8_t)addr_size;
@@ -3354,9 +3347,29 @@ supd_check_complete:
* the database's lookaside store.
*/
if (F_ISSET(r, WT_EVICT_LOOKASIDE) && bnd->supd != NULL)
- ret = __rec_update_las(session, r, btree->id, bnd);
+ WT_ERR(__rec_update_las(session, r, btree->id, bnd));
+
+copy_image:
+ /*
+ * If re-instantiating this page in memory (either because eviction
+ * wants to, or because we skipped updates to build the disk image),
+ * save a copy of the disk image.
+ *
+ * Raw compression might have already saved a copy of the disk image
+ * before we could know if we skipped updates to create it, and now
+ * we know if we're going to need it.
+ *
+ * Copy the disk image if we need a copy and don't already have one,
+ * discard any already saved copy we don't need.
+ */
+ need_image = F_ISSET(r, WT_EVICT_SCRUB) ||
+ (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL);
+ if (need_image && bnd->disk_image == NULL)
+ WT_ERR(__wt_strndup(
+ session, buf->data, buf->size, &bnd->disk_image));
+ if (!need_image)
+ __wt_free(session, bnd->disk_image);
-done:
err: __wt_scr_free(session, &key);
return (ret);
}
@@ -3556,8 +3569,9 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
WT_PAGE *parent;
WT_RECONCILE *r;
- r = cbulk->reconcile;
btree = S2BT(session);
+ if ((r = cbulk->reconcile) == NULL)
+ return (0);
switch (btree->type) {
case BTREE_COL_FIX:
@@ -5601,9 +5615,10 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_RET(__wt_btree_block_free(session,
mod->mod_replace.addr, mod->mod_replace.size));
- /* Discard the replacement page's address. */
+ /* Discard the replacement page's address and disk image. */
__wt_free(session, mod->mod_replace.addr);
mod->mod_replace.size = 0;
+ __wt_free(session, mod->mod_disk_image);
break;
WT_ILLEGAL_VALUE(session);
}
@@ -5651,26 +5666,33 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
bnd = &r->bnd[0];
/*
- * If saving/restoring changes for this page and there's only
- * one block, there's nothing to write. This is an in-memory
- * configuration or a special case of forced eviction: set up
+ * If in-memory, or saving/restoring changes for this page and
+ * there's only one block, there's nothing to write. Set up
* a single block as if to split, then use that disk image to
- * rewrite the page in memory.
+ * rewrite the page in memory. This is separate from simple
+ * replacements where eviction has decided to retain the page
+ * in memory because the latter can't handle update lists and
+ * splits can.
*/
- if (bnd->disk_image != NULL)
+ if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
+ (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL))
goto split;
/*
- * If this is a root page, then we don't have an address and we
- * have to create a sync point. The address was cleared when
- * we were about to write the buffer so we know what to do here.
+ * A root page, we don't have an address and we have to create
+ * a sync point. The address was cleared when we were about to
+ * write the buffer so we know what to do here.
*/
if (bnd->addr.addr == NULL)
WT_RET(__wt_bt_write(session, &r->disk_image,
- NULL, NULL, true, bnd->already_compressed));
+ NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING),
+ bnd->already_compressed));
else {
mod->mod_replace = bnd->addr;
bnd->addr.addr = NULL;
+
+ mod->mod_disk_image = bnd->disk_image;
+ bnd->disk_image = NULL;
}
mod->rec_result = WT_PM_REC_REPLACE;
@@ -5805,19 +5827,26 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
WT_RET(__wt_row_ikey_alloc(session, 0,
bnd->key.data, bnd->key.size, &multi->key.ikey));
- if (bnd->disk_image == NULL) {
- multi->addr = bnd->addr;
- multi->addr.reuse = 0;
- multi->size = bnd->size;
- multi->cksum = bnd->cksum;
- bnd->addr.addr = NULL;
- } else {
+ /*
+ * Copy any disk image. Don't take saved updates without a
+ * disk image (which happens if they have been saved to the
+ * lookaside table): they should be discarded along with the
+ * original page.
+ */
+ multi->disk_image = bnd->disk_image;
+ bnd->disk_image = NULL;
+ if (multi->disk_image != NULL) {
multi->supd = bnd->supd;
multi->supd_entries = bnd->supd_next;
bnd->supd = NULL;
- multi->disk_image = bnd->disk_image;
- bnd->disk_image = NULL;
}
+
+ /* Copy any address. */
+ multi->addr = bnd->addr;
+ multi->addr.reuse = 0;
+ multi->size = bnd->size;
+ multi->cksum = bnd->cksum;
+ bnd->addr.addr = NULL;
}
mod->mod_multi_entries = r->bnd_next;
@@ -5845,19 +5874,26 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) {
multi->key.recno = bnd->recno;
- if (bnd->disk_image == NULL) {
- multi->addr = bnd->addr;
- multi->addr.reuse = 0;
- multi->size = bnd->size;
- multi->cksum = bnd->cksum;
- bnd->addr.addr = NULL;
- } else {
+ /*
+ * Copy any disk image. Don't take saved updates without a
+ * disk image (which happens if they have been saved to the
+ * lookaside table): they should be discarded along with the
+ * original page.
+ */
+ multi->disk_image = bnd->disk_image;
+ bnd->disk_image = NULL;
+ if (multi->disk_image != NULL) {
multi->supd = bnd->supd;
multi->supd_entries = bnd->supd_next;
bnd->supd = NULL;
- multi->disk_image = bnd->disk_image;
- bnd->disk_image = NULL;
}
+
+ /* Copy any address. */
+ multi->addr = bnd->addr;
+ multi->addr.reuse = 0;
+ multi->size = bnd->size;
+ multi->cksum = bnd->cksum;
+ bnd->addr.addr = NULL;
}
mod->mod_multi_entries = r->bnd_next;
@@ -6133,7 +6169,8 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
/* Write the buffer. */
addr = buf;
- WT_ERR(__wt_bt_write(session, tmp, addr, &size, false, false));
+ WT_ERR(__wt_bt_write(session, tmp,
+ addr, &size, false, F_ISSET(r, WT_CHECKPOINTING), false));
/*
* Track the overflow record (unless it's a bulk load, which
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 8f4d374fd22..bc92c882117 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -64,7 +64,7 @@ __rename_file(
WT_ERR(__wt_metadata_insert(session, newuri, oldvalue));
/* Rename the underlying file. */
- WT_ERR(__wt_fs_rename(session, filename, newfile));
+ WT_ERR(__wt_fs_rename(session, filename, newfile, false));
if (WT_META_TRACKING(session))
WT_ERR(__wt_meta_track_fileop(session, uri, newuri));
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 77d1dc74c84..0072d7e1445 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -17,12 +17,8 @@ static int __session_rollback_transaction(WT_SESSION *, const char *);
* Unsupported session method.
*/
int
-__wt_session_notsup(WT_SESSION *wt_session)
+__wt_session_notsup(WT_SESSION_IMPL *session)
{
- WT_SESSION_IMPL *session;
-
- session = (WT_SESSION_IMPL *)wt_session;
-
WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
}
@@ -66,6 +62,17 @@ __wt_session_copy_values(WT_SESSION_IMPL *session)
TAILQ_FOREACH(cursor, &session->cursors, q)
if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
+#ifdef HAVE_DIAGNOSTIC
+ /*
+ * We have to do this with a transaction ID pinned
+ * unless the cursor is reading from a checkpoint.
+ */
+ WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
+ WT_ASSERT(session, txn_state->snap_min != WT_TXN_NONE ||
+ (WT_PREFIX_MATCH(cursor->uri, "file:") &&
+ F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
+#endif
+
F_CLR(cursor, WT_CURSTD_VALUE_INT);
WT_RET(__wt_buf_set(session, &cursor->value,
cursor->value.data, cursor->value.size));
@@ -509,7 +516,11 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
ret = __wt_session_create(session, uri, config);
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err: if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_create_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_create_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
@@ -520,10 +531,18 @@ static int
__session_create_readonly(
WT_SESSION *wt_session, const char *uri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, create);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_create_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -570,9 +589,16 @@ err: API_END_RET(session, ret);
static int
__session_log_flush_readonly(WT_SESSION *wt_session, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, log_flush);
+
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -605,9 +631,16 @@ static int
__session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...)
WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(fmt);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, log_printf);
+
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -630,7 +663,12 @@ __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config)
ret = __wt_schema_worker(session, uri, __wt_bt_rebalance,
NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE)));
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err: if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session,
+ session_table_rebalance_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
@@ -641,10 +679,18 @@ static int
__session_rebalance_readonly(
WT_SESSION *wt_session, const char *uri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, rebalance);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -670,7 +716,11 @@ __session_rename(WT_SESSION *wt_session,
WT_WITH_TABLE_LOCK(session, ret,
ret = __wt_schema_rename(session, uri, newuri, cfg))));
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err: if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_rename_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
@@ -681,11 +731,19 @@ static int
__session_rename_readonly(WT_SESSION *wt_session,
const char *uri, const char *newuri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(newuri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, rename);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -733,8 +791,8 @@ __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
F_SET(session, WT_SESSION_LOCK_NO_WAIT);
/*
- * The checkpoint lock only is needed to avoid a spurious EBUSY error
- * return.
+ * Take the checkpoint lock if there is a need to prevent the drop
+ * operation from failing with EBUSY due to an ongoing checkpoint.
*/
if (checkpoint_wait)
WT_WITH_CHECKPOINT_LOCK(session, ret,
@@ -770,7 +828,12 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
ret = __wt_session_drop(session, uri, cfg);
-err: /* Note: drop operations cannot be unrolled (yet?). */
+err: if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_drop_success);
+
+ /* Note: drop operations cannot be unrolled (yet?). */
API_END_RET_NOTFOUND_MAP(session, ret);
}
@@ -782,10 +845,18 @@ static int
__session_drop_readonly(
WT_SESSION *wt_session, const char *uri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, drop);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -943,7 +1014,11 @@ __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config)
ret = __wt_schema_worker(session, uri, __wt_salvage,
NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)));
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err: if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_salvage_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
@@ -954,10 +1029,18 @@ static int
__session_salvage_readonly(
WT_SESSION *wt_session, const char *uri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, salvage);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -1135,6 +1218,10 @@ __session_truncate(WT_SESSION *wt_session,
err: TXN_API_END_RETRY(session, ret, 0);
+ if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_truncate_success);
/*
* Only map WT_NOTFOUND to ENOENT if a URI was specified.
*/
@@ -1149,12 +1236,20 @@ static int
__session_truncate_readonly(WT_SESSION *wt_session,
const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(start);
WT_UNUSED(stop);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, truncate);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -1187,10 +1282,17 @@ static int
__session_upgrade_readonly(
WT_SESSION *wt_session, const char *uri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, upgrade);
+
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -1216,7 +1318,11 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
ret = __wt_schema_worker(session, uri, __wt_verify,
NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
-err: API_END_RET_NOTFOUND_MAP(session, ret);
+err: if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_verify_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_verify_success);
+ API_END_RET_NOTFOUND_MAP(session, ret);
}
/*
@@ -1421,9 +1527,16 @@ err: API_END_RET(session, ret);
static int
__session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, transaction_sync);
+
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
@@ -1481,9 +1594,16 @@ err: API_END_RET_NOTFOUND_MAP(session, ret);
static int
__session_checkpoint_readonly(WT_SESSION *wt_session, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, checkpoint);
+
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
/*
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 3f7b34d132f..47ed5298304 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -333,6 +333,10 @@ err: session->compact = NULL;
*/
WT_TRET(__wt_session_release_resources(session));
+ if (ret != 0)
+ WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail);
+ else
+ WT_STAT_FAST_CONN_INCR(session, session_table_compact_success);
API_END_RET_NOTFOUND_MAP(session, ret);
}
@@ -344,8 +348,16 @@ int
__wt_session_compact_readonly(
WT_SESSION *wt_session, const char *uri, const char *config)
{
+ WT_DECL_RET;
+ WT_SESSION_IMPL *session;
+
WT_UNUSED(uri);
WT_UNUSED(config);
- return (__wt_session_notsup(wt_session));
+ session = (WT_SESSION_IMPL *)wt_session;
+ SESSION_API_CALL_NOCONF(session, compact);
+
+ WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail);
+ ret = __wt_session_notsup(session);
+err: API_END_RET(session, ret);
}
diff --git a/src/support/err.c b/src/support/err.c
index 93c0af37328..60fc53cecd0 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -118,7 +118,13 @@ __handler_failure(WT_SESSION_IMPL *session,
handler->handle_error(handler, wt_session, error, s) == 0)
return;
+ /*
+ * In case there is a failure in the default error handler, make sure
+ * we don't recursively try to report *that* error.
+ */
+ session->event_handler = &__event_handler_default;
(void)__handle_error_default(NULL, wt_session, error, s);
+ session->event_handler = handler;
}
/*
diff --git a/src/support/stat.c b/src/support/stat.c
index d972f0c140f..49cb3bebc07 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -43,6 +43,7 @@ static const char * const __stats_dsrc_desc[] = {
"btree: pages rewritten by compaction",
"btree: row-store internal pages",
"btree: row-store leaf pages",
+ "cache: bytes currently in the cache",
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
@@ -173,6 +174,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
stats->btree_compact_rewrite = 0;
stats->btree_row_internal = 0;
stats->btree_row_leaf = 0;
+ /* not clearing cache_bytes_inuse */
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
@@ -300,6 +302,7 @@ __wt_stat_dsrc_aggregate_single(
to->btree_compact_rewrite += from->btree_compact_rewrite;
to->btree_row_internal += from->btree_row_internal;
to->btree_row_leaf += from->btree_row_leaf;
+ to->cache_bytes_inuse += from->cache_bytes_inuse;
to->cache_bytes_read += from->cache_bytes_read;
to->cache_bytes_write += from->cache_bytes_write;
to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
@@ -433,6 +436,7 @@ __wt_stat_dsrc_aggregate(
WT_STAT_READ(from, btree_compact_rewrite);
to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
+ to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint +=
@@ -542,9 +546,12 @@ static const char * const __stats_connection_desc[] = {
"block-manager: blocks written",
"block-manager: bytes read",
"block-manager: bytes written",
+ "block-manager: bytes written for checkpoint",
"block-manager: mapped blocks read",
"block-manager: mapped bytes read",
+ "cache: bytes belonging to page images in the cache",
"cache: bytes currently in the cache",
+ "cache: bytes not belonging to page images in the cache",
"cache: bytes read into cache",
"cache: bytes written from cache",
"cache: checkpoint blocked page eviction",
@@ -578,6 +585,8 @@ static const char * const __stats_connection_desc[] = {
"cache: maximum page size at eviction",
"cache: modified pages evicted",
"cache: modified pages evicted by application threads",
+ "cache: overflow pages read into cache",
+ "cache: overflow values cached in memory",
"cache: page split during eviction deepened the tree",
"cache: page written requiring lookaside records",
"cache: pages currently held in the cache",
@@ -586,6 +595,7 @@ static const char * const __stats_connection_desc[] = {
"cache: pages evicted by application threads",
"cache: pages queued for eviction",
"cache: pages queued for urgent eviction",
+ "cache: pages queued for urgent eviction during walk",
"cache: pages read into cache",
"cache: pages read into cache requiring lookaside entries",
"cache: pages requested from the cache",
@@ -597,7 +607,6 @@ static const char * const __stats_connection_desc[] = {
"cache: percentage overhead",
"cache: tracked bytes belonging to internal pages in the cache",
"cache: tracked bytes belonging to leaf pages in the cache",
- "cache: tracked bytes belonging to overflow pages in the cache",
"cache: tracked dirty bytes in the cache",
"cache: tracked dirty pages in the cache",
"cache: unmodified pages evicted",
@@ -677,6 +686,22 @@ static const char * const __stats_connection_desc[] = {
"reconciliation: split objects currently awaiting free",
"session: open cursor count",
"session: open session count",
+ "session: table compact failed calls",
+ "session: table compact successful calls",
+ "session: table create failed calls",
+ "session: table create successful calls",
+ "session: table drop failed calls",
+ "session: table drop successful calls",
+ "session: table rebalance failed calls",
+ "session: table rebalance successful calls",
+ "session: table rename failed calls",
+ "session: table rename successful calls",
+ "session: table salvage failed calls",
+ "session: table salvage successful calls",
+ "session: table truncate failed calls",
+ "session: table truncate successful calls",
+ "session: table verify failed calls",
+ "session: table verify successful calls",
"thread-state: active filesystem fsync calls",
"thread-state: active filesystem read calls",
"thread-state: active filesystem write calls",
@@ -693,13 +718,13 @@ static const char * const __stats_connection_desc[] = {
"transaction: transaction checkpoint max time (msecs)",
"transaction: transaction checkpoint min time (msecs)",
"transaction: transaction checkpoint most recent time (msecs)",
+ "transaction: transaction checkpoint scrub dirty target",
+ "transaction: transaction checkpoint scrub time (msecs)",
"transaction: transaction checkpoint total time (msecs)",
"transaction: transaction checkpoints",
"transaction: transaction failures due to cache overflow",
"transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
- "transaction: transaction fsync calls for checkpoint before allocating the transaction ID",
"transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)",
- "transaction: transaction fsync duration for checkpoint before allocating the transaction ID (usecs)",
"transaction: transaction range of IDs currently pinned",
"transaction: transaction range of IDs currently pinned by a checkpoint",
"transaction: transaction range of IDs currently pinned by named snapshots",
@@ -764,9 +789,12 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->block_write = 0;
stats->block_byte_read = 0;
stats->block_byte_write = 0;
+ stats->block_byte_write_checkpoint = 0;
stats->block_map_read = 0;
stats->block_byte_map_read = 0;
+ /* not clearing cache_bytes_image */
/* not clearing cache_bytes_inuse */
+ /* not clearing cache_bytes_other */
stats->cache_bytes_read = 0;
stats->cache_bytes_write = 0;
stats->cache_eviction_checkpoint = 0;
@@ -800,6 +828,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_eviction_maximum_page_size */
stats->cache_eviction_dirty = 0;
stats->cache_eviction_app_dirty = 0;
+ stats->cache_read_overflow = 0;
+ stats->cache_overflow_value = 0;
stats->cache_eviction_deepen = 0;
stats->cache_write_lookaside = 0;
/* not clearing cache_pages_inuse */
@@ -807,6 +837,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
stats->cache_eviction_force_delete = 0;
stats->cache_eviction_app = 0;
stats->cache_eviction_pages_queued = 0;
+ stats->cache_eviction_pages_queued_urgent = 0;
stats->cache_eviction_pages_queued_oldest = 0;
stats->cache_read = 0;
stats->cache_read_lookaside = 0;
@@ -819,7 +850,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing cache_overhead */
/* not clearing cache_bytes_internal */
/* not clearing cache_bytes_leaf */
- /* not clearing cache_bytes_overflow */
/* not clearing cache_bytes_dirty */
/* not clearing cache_pages_dirty */
stats->cache_eviction_clean = 0;
@@ -899,9 +929,25 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing rec_split_stashed_objects */
/* not clearing session_cursor_open */
/* not clearing session_open */
- /* not clearing fsync_active */
- /* not clearing read_active */
- /* not clearing write_active */
+ /* not clearing session_table_compact_fail */
+ /* not clearing session_table_compact_success */
+ /* not clearing session_table_create_fail */
+ /* not clearing session_table_create_success */
+ /* not clearing session_table_drop_fail */
+ /* not clearing session_table_drop_success */
+ /* not clearing session_table_rebalance_fail */
+ /* not clearing session_table_rebalance_success */
+ /* not clearing session_table_rename_fail */
+ /* not clearing session_table_rename_success */
+ /* not clearing session_table_salvage_fail */
+ /* not clearing session_table_salvage_success */
+ /* not clearing session_table_truncate_fail */
+ /* not clearing session_table_truncate_success */
+ /* not clearing session_table_verify_fail */
+ /* not clearing session_table_verify_success */
+ /* not clearing thread_fsync_active */
+ /* not clearing thread_read_active */
+ /* not clearing thread_write_active */
stats->page_busy_blocked = 0;
stats->page_forcible_evict_blocked = 0;
stats->page_locked_blocked = 0;
@@ -915,13 +961,13 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
/* not clearing txn_checkpoint_time_max */
/* not clearing txn_checkpoint_time_min */
/* not clearing txn_checkpoint_time_recent */
+ /* not clearing txn_checkpoint_scrub_target */
+ /* not clearing txn_checkpoint_scrub_time */
/* not clearing txn_checkpoint_time_total */
stats->txn_checkpoint = 0;
stats->txn_fail_cache = 0;
stats->txn_checkpoint_fsync_post = 0;
- stats->txn_checkpoint_fsync_pre = 0;
- stats->txn_checkpoint_fsync_post_duration = 0;
- stats->txn_checkpoint_fsync_pre_duration = 0;
+ /* not clearing txn_checkpoint_fsync_post_duration */
/* not clearing txn_pinned_range */
/* not clearing txn_pinned_checkpoint_range */
/* not clearing txn_pinned_snapshot_range */
@@ -978,9 +1024,13 @@ __wt_stat_connection_aggregate(
to->block_write += WT_STAT_READ(from, block_write);
to->block_byte_read += WT_STAT_READ(from, block_byte_read);
to->block_byte_write += WT_STAT_READ(from, block_byte_write);
+ to->block_byte_write_checkpoint +=
+ WT_STAT_READ(from, block_byte_write_checkpoint);
to->block_map_read += WT_STAT_READ(from, block_map_read);
to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read);
+ to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image);
to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
+ to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other);
to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
to->cache_eviction_checkpoint +=
@@ -1039,6 +1089,8 @@ __wt_stat_connection_aggregate(
to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
to->cache_eviction_app_dirty +=
WT_STAT_READ(from, cache_eviction_app_dirty);
+ to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
+ to->cache_overflow_value += WT_STAT_READ(from, cache_overflow_value);
to->cache_eviction_deepen +=
WT_STAT_READ(from, cache_eviction_deepen);
to->cache_write_lookaside +=
@@ -1050,6 +1102,8 @@ __wt_stat_connection_aggregate(
to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app);
to->cache_eviction_pages_queued +=
WT_STAT_READ(from, cache_eviction_pages_queued);
+ to->cache_eviction_pages_queued_urgent +=
+ WT_STAT_READ(from, cache_eviction_pages_queued_urgent);
to->cache_eviction_pages_queued_oldest +=
WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
to->cache_read += WT_STAT_READ(from, cache_read);
@@ -1065,7 +1119,6 @@ __wt_stat_connection_aggregate(
to->cache_overhead += WT_STAT_READ(from, cache_overhead);
to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal);
to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf);
- to->cache_bytes_overflow += WT_STAT_READ(from, cache_bytes_overflow);
to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty);
to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
@@ -1151,9 +1204,41 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, rec_split_stashed_objects);
to->session_cursor_open += WT_STAT_READ(from, session_cursor_open);
to->session_open += WT_STAT_READ(from, session_open);
- to->fsync_active += WT_STAT_READ(from, fsync_active);
- to->read_active += WT_STAT_READ(from, read_active);
- to->write_active += WT_STAT_READ(from, write_active);
+ to->session_table_compact_fail +=
+ WT_STAT_READ(from, session_table_compact_fail);
+ to->session_table_compact_success +=
+ WT_STAT_READ(from, session_table_compact_success);
+ to->session_table_create_fail +=
+ WT_STAT_READ(from, session_table_create_fail);
+ to->session_table_create_success +=
+ WT_STAT_READ(from, session_table_create_success);
+ to->session_table_drop_fail +=
+ WT_STAT_READ(from, session_table_drop_fail);
+ to->session_table_drop_success +=
+ WT_STAT_READ(from, session_table_drop_success);
+ to->session_table_rebalance_fail +=
+ WT_STAT_READ(from, session_table_rebalance_fail);
+ to->session_table_rebalance_success +=
+ WT_STAT_READ(from, session_table_rebalance_success);
+ to->session_table_rename_fail +=
+ WT_STAT_READ(from, session_table_rename_fail);
+ to->session_table_rename_success +=
+ WT_STAT_READ(from, session_table_rename_success);
+ to->session_table_salvage_fail +=
+ WT_STAT_READ(from, session_table_salvage_fail);
+ to->session_table_salvage_success +=
+ WT_STAT_READ(from, session_table_salvage_success);
+ to->session_table_truncate_fail +=
+ WT_STAT_READ(from, session_table_truncate_fail);
+ to->session_table_truncate_success +=
+ WT_STAT_READ(from, session_table_truncate_success);
+ to->session_table_verify_fail +=
+ WT_STAT_READ(from, session_table_verify_fail);
+ to->session_table_verify_success +=
+ WT_STAT_READ(from, session_table_verify_success);
+ to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active);
+ to->thread_read_active += WT_STAT_READ(from, thread_read_active);
+ to->thread_write_active += WT_STAT_READ(from, thread_write_active);
to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
to->page_forcible_evict_blocked +=
WT_STAT_READ(from, page_forcible_evict_blocked);
@@ -1175,18 +1260,18 @@ __wt_stat_connection_aggregate(
WT_STAT_READ(from, txn_checkpoint_time_min);
to->txn_checkpoint_time_recent +=
WT_STAT_READ(from, txn_checkpoint_time_recent);
+ to->txn_checkpoint_scrub_target +=
+ WT_STAT_READ(from, txn_checkpoint_scrub_target);
+ to->txn_checkpoint_scrub_time +=
+ WT_STAT_READ(from, txn_checkpoint_scrub_time);
to->txn_checkpoint_time_total +=
WT_STAT_READ(from, txn_checkpoint_time_total);
to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
to->txn_checkpoint_fsync_post +=
WT_STAT_READ(from, txn_checkpoint_fsync_post);
- to->txn_checkpoint_fsync_pre +=
- WT_STAT_READ(from, txn_checkpoint_fsync_pre);
to->txn_checkpoint_fsync_post_duration +=
WT_STAT_READ(from, txn_checkpoint_fsync_post_duration);
- to->txn_checkpoint_fsync_pre_duration +=
- WT_STAT_READ(from, txn_checkpoint_fsync_pre_duration);
to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range);
to->txn_pinned_checkpoint_range +=
WT_STAT_READ(from, txn_pinned_checkpoint_range);
diff --git a/src/txn/txn.c b/src/txn/txn.c
index dd4384d9a9a..87b74433769 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -124,6 +124,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
txn = &session->txn;
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
+ n = 0;
/*
* Spin waiting for the lock: the sleeps in our blocking readlock
@@ -137,20 +138,26 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
current_id = snap_min = txn_global->current;
prev_oldest_id = txn_global->oldest_id;
+ /*
+ * Include the checkpoint transaction, if one is running: we should
+ * ignore any uncommitted changes the checkpoint has written to the
+ * metadata. We don't have to keep the checkpoint's changes pinned so
+ * don't including it in the published snap_min.
+ */
+ if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
+ txn->snapshot[n++] = id;
+
/* For pure read-only workloads, avoid scanning. */
if (prev_oldest_id == current_id) {
txn_state->snap_min = current_id;
- __txn_sort_snapshot(session, 0, current_id);
-
/* Check that the oldest ID has not moved in the meantime. */
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
- WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
- return (0);
+ goto done;
}
/* Walk the array of concurrent transactions. */
WT_ORDERED_READ(session_cnt, conn->session_cnt);
- for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
+ for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
/*
* Build our snapshot of any concurrent transaction IDs.
*
@@ -178,8 +185,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
txn_state->snap_min = snap_min;
- WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
-
+done: WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
__txn_sort_snapshot(session, n, current_id);
return (0);
}
@@ -433,18 +439,22 @@ __wt_txn_release(WT_SESSION_IMPL *session)
WT_TXN_STATE *txn_state;
txn = &session->txn;
- WT_ASSERT(session, txn->mod_count == 0);
- txn->notify = NULL;
-
txn_global = &S2C(session)->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
+ WT_ASSERT(session, txn->mod_count == 0);
+ txn->notify = NULL;
+
/* Clear the transaction's ID from the global table. */
if (WT_SESSION_IS_CHECKPOINT(session)) {
WT_ASSERT(session, txn_state->id == WT_TXN_NONE);
- txn->id = WT_TXN_NONE;
+ txn->id = txn_global->checkpoint_txnid = WT_TXN_NONE;
- /* Clear the global checkpoint transaction IDs. */
+ /*
+ * Be extra careful to cleanup everything for checkpoints: once
+ * the global checkpoint ID is cleared, we can no longer tell
+ * if this session is doing a checkpoint.
+ */
txn_global->checkpoint_id = 0;
txn_global->checkpoint_pinned = WT_TXN_NONE;
} else if (F_ISSET(txn, WT_TXN_HAS_ID)) {
@@ -470,6 +480,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
*/
__wt_txn_release_snapshot(session);
txn->isolation = session->isolation;
+
/* Ensure the transaction flags are cleared on exit */
txn->flags = 0;
}
@@ -487,10 +498,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN *txn;
WT_TXN_OP *op;
u_int i;
+ bool did_update;
txn = &session->txn;
conn = S2C(session);
- WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
+ did_update = txn->mod_count != 0;
+ WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update);
if (!F_ISSET(txn, WT_TXN_RUNNING))
WT_RET_MSG(session, EINVAL, "No transaction is active");
@@ -540,8 +553,18 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
WT_TRET(txn->notify->notify(txn->notify,
(WT_SESSION *)session, txn->id, 1));
+ /*
+ * We are about to release the snapshot: copy values into any
+ * positioned cursors so they don't point to updates that could be
+ * freed once we don't have a snapshot.
+ */
+ if (session->ncursors > 0) {
+ WT_DIAGNOSTIC_YIELD;
+ WT_RET(__wt_session_copy_values(session));
+ }
+
/* If we are logging, write a commit log record. */
- if (ret == 0 && txn->mod_count > 0 &&
+ if (ret == 0 && did_update &&
FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
!F_ISSET(session, WT_SESSION_NO_LOGGING)) {
/*
@@ -569,14 +592,6 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
__wt_txn_op_free(session, op);
txn->mod_count = 0;
- /*
- * We are about to release the snapshot: copy values into any
- * positioned cursors so they don't point to updates that could be
- * freed once we don't have a transaction ID pinned.
- */
- if (session->ncursors > 0)
- WT_RET(__wt_session_copy_values(session));
-
__wt_txn_release(session);
return (0);
}
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 51d26b9aed6..c23f293154a 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -10,14 +10,16 @@
static int __checkpoint_lock_tree(
WT_SESSION_IMPL *, bool, bool, const char *[]);
+static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]);
+static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]);
static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]);
/*
- * __wt_checkpoint_name_ok --
+ * __checkpoint_name_ok --
* Complain if the checkpoint name isn't acceptable.
*/
-int
-__wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
+static int
+__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
{
/* Check for characters we don't want to see in a metadata file. */
WT_RET(__wt_name_check(session, name, len));
@@ -107,7 +109,7 @@ __checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[],
WT_RET(__wt_config_gets(session, cfg, "name", &cval));
named = cval.len != 0;
if (named)
- WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));
+ WT_RET(__checkpoint_name_ok(session, cval.str, cval.len));
/* Step through the targets and optionally operate on each one. */
WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
@@ -183,6 +185,8 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
/* If we have already locked the handles, apply the operation. */
for (i = 0; i < session->ckpt_handle_next; ++i) {
+ if (session->ckpt_handle[i] == NULL)
+ continue;
WT_WITH_DHANDLE(session, session->ckpt_handle[i],
ret = (*op)(session, cfg));
WT_RET(ret);
@@ -234,6 +238,7 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[])
int
__wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
{
+ WT_BTREE *btree;
WT_DECL_RET;
const char *name;
@@ -258,6 +263,14 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0)
return (ret == EBUSY ? 0 : ret);
+ /*
+ * Save the current eviction walk setting: checkpoint can interfere
+ * with eviction and we don't want to unfairly penalize (or promote)
+ * eviction in trees due to checkpoints.
+ */
+ btree = S2BT(session);
+ btree->evict_walk_saved = btree->evict_walk_period;
+
WT_SAVE_DHANDLE(session,
ret = __checkpoint_lock_tree(session, true, true, cfg));
if (ret != 0) {
@@ -265,20 +278,164 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
return (ret);
}
+ /*
+ * Flag that the handle is part of a checkpoint for the purposes
+ * of transaction visibility checks.
+ */
+ WT_PUBLISH(btree->include_checkpoint_txn, true);
+
session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle;
return (0);
}
/*
- * __checkpoint_write_leaves --
- * Write any dirty leaf pages for all checkpoint handles.
+ * __checkpoint_update_generation --
+ * Update the checkpoint generation of the current tree.
+ *
+ * This indicates that the tree will not be visited again by the current
+ * checkpoint.
+ */
+static void
+__checkpoint_update_generation(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+
+ btree = S2BT(session);
+ if (!WT_IS_METADATA(session, session->dhandle))
+ WT_PUBLISH(btree->include_checkpoint_txn, false);
+
+ WT_PUBLISH(btree->checkpoint_gen,
+ S2C(session)->txn_global.checkpoint_gen);
+ WT_STAT_FAST_DATA_SET(session,
+ btree_checkpoint_generation, btree->checkpoint_gen);
+}
+
+/*
+ * __checkpoint_reduce_dirty_cache --
+ * Release clean trees from the list cached for checkpoints.
*/
static int
-__checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[])
+__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
{
- WT_UNUSED(cfg);
+ WT_CACHE *cache;
+ WT_CONNECTION_IMPL *conn;
+ struct timespec start, last, stop;
+ u_int current_dirty;
+ uint64_t bytes_written_last, bytes_written_start, bytes_written_total;
+ uint64_t current_us, stepdown_us, total_ms;
+ bool progress;
+
+ conn = S2C(session);
+ cache = conn->cache;
+
+ WT_RET(__wt_epoch(session, &start));
+ last = start;
+ bytes_written_last = 0;
+ bytes_written_start = cache->bytes_written;
+ stepdown_us = 10000;
+ progress = false;
+
+ /* Step down the dirty target to the eviction trigger */
+ for (;;) {
+ current_dirty = (u_int)((100 *
+ __wt_cache_dirty_leaf_inuse(cache)) / conn->cache_size);
+ if (current_dirty <= cache->eviction_dirty_target)
+ break;
+
+ __wt_sleep(0, stepdown_us / 4);
+ WT_RET(__wt_epoch(session, &stop));
+ current_us = WT_TIMEDIFF_US(stop, last);
+ total_ms = WT_TIMEDIFF_MS(stop, start);
+ bytes_written_total =
+ cache->bytes_written - bytes_written_start;
- return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES));
+ /*
+ * Estimate how long the next step down of 1% of dirty data
+ * should take.
+ *
+ * The calculation here assumes that the system is writing from
+ * cache as fast as it can, and determines the write throughput
+ * based on the change in the bytes written from cache since
+ * the start of the call. We use that to estimate how long it
+ * will take to step the dirty target down by 1%.
+ *
+ * Take care to avoid dividing by zero.
+ */
+ if (bytes_written_total - bytes_written_last > WT_MEGABYTE &&
+ bytes_written_total > total_ms && total_ms > 0 &&
+ (!progress ||
+ current_dirty <= cache->eviction_dirty_trigger)) {
+ stepdown_us = (uint64_t)(WT_THOUSAND * (
+ (double)(conn->cache_size / 100) /
+ (double)(bytes_written_total / total_ms)));
+ if (!progress)
+ stepdown_us = WT_MIN(stepdown_us, 200000);
+ }
+
+ bytes_written_last = bytes_written_total;
+
+ if (current_dirty <= cache->eviction_dirty_trigger) {
+ progress = true;
+
+ /*
+ * Smooth out step down: try to limit the impact on
+ * performance to 10% by waiting once we reach the last
+ * level.
+ */
+ __wt_sleep(0, 10 * stepdown_us);
+ cache->eviction_dirty_trigger = current_dirty - 1;
+ WT_STAT_FAST_CONN_SET(session,
+ txn_checkpoint_scrub_target, current_dirty - 1);
+ WT_RET(__wt_epoch(session, &last));
+ continue;
+ }
+
+ /*
+ * We haven't reached the current target.
+ *
+ * Don't wait indefinitely: there might be dirty pages that
+ * can't be evicted. If we can't meet the target, give up
+ * and start the checkpoint for real.
+ */
+ if (current_us > 10 * stepdown_us)
+ break;
+ }
+
+ WT_RET(__wt_epoch(session, &stop));
+ total_ms = WT_TIMEDIFF_MS(stop, start);
+ WT_STAT_FAST_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
+
+ return (0);
+}
+
+/*
+ * __checkpoint_release_clean_trees --
+ * Release clean trees from the list cached for checkpoints.
+ */
+static int
+__checkpoint_release_clean_trees(WT_SESSION_IMPL *session)
+{
+ WT_BTREE *btree;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ u_int i;
+
+ for (i = 0; i < session->ckpt_handle_next; i++) {
+ dhandle = session->ckpt_handle[i];
+ btree = dhandle->handle;
+ if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT))
+ continue;
+ __wt_meta_ckptlist_free(session, btree->ckpt);
+ btree->ckpt = NULL;
+ WT_WITH_DHANDLE(session, dhandle,
+ __checkpoint_update_generation(session));
+ session->ckpt_handle[i] = NULL;
+ WT_WITH_DHANDLE(session, dhandle,
+ ret = __wt_session_release_btree(session));
+ WT_RET(ret);
+ }
+
+ return (0);
}
/*
@@ -352,6 +509,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
{
struct timespec fsync_start, fsync_stop;
struct timespec start, stop, verb_timer;
+ WT_CACHE *cache;
WT_CONNECTION_IMPL *conn;
WT_DECL_RET;
WT_TXN *txn;
@@ -359,13 +517,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_TXN_ISOLATION saved_isolation;
WT_TXN_STATE *txn_state;
void *saved_meta_next;
- u_int i;
+ u_int i, orig_trigger;
uint64_t fsync_duration_usecs;
bool full, idle, logging, tracking;
const char *txn_cfg[] = { WT_CONFIG_BASE(session,
WT_SESSION_begin_transaction), "isolation=snapshot", NULL };
conn = S2C(session);
+ cache = conn->cache;
+ orig_trigger = cache->eviction_dirty_trigger;
txn = &session->txn;
txn_global = &conn->txn_global;
txn_state = WT_SESSION_TXN_STATE(session);
@@ -384,21 +544,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
/* Configure logging only if doing a full checkpoint. */
logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
- /* Keep track of handles acquired for locking. */
- WT_ERR(__wt_meta_track_on(session));
- tracking = true;
-
- /*
- * Get a list of handles we want to flush; this may pull closed objects
- * into the session cache, but we're going to do that eventually anyway.
- */
- WT_ASSERT(session, session->ckpt_handle_next == 0);
- WT_WITH_SCHEMA_LOCK(session, ret,
- WT_WITH_TABLE_LOCK(session, ret,
- WT_WITH_HANDLE_LIST_LOCK(session,
- ret = __checkpoint_apply_all(
- session, cfg, __wt_checkpoint_get_handles, NULL))));
- WT_ERR(ret);
+ /* Reset the maximum page size seen by eviction. */
+ conn->cache->evict_max_page_size = 0;
/*
* Update the global oldest ID so we do all possible cleanup.
@@ -412,28 +559,11 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
/* Flush data-sources before we start the checkpoint. */
WT_ERR(__checkpoint_data_source(session, cfg));
- WT_ERR(__wt_epoch(session, &verb_timer));
- WT_ERR(__checkpoint_verbose_track(session,
- "starting write leaves", &verb_timer));
-
- /* Flush dirty leaf pages before we start the checkpoint. */
- session->isolation = txn->isolation = WT_ISO_READ_COMMITTED;
- WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_write_leaves));
-
/*
- * The underlying flush routine scheduled an asynchronous flush
- * after writing the leaf pages, but in order to minimize I/O
- * while holding the schema lock, do a flush and wait for the
- * completion. Do it after flushing the pages to give the
- * asynchronous flush as much time as possible before we wait.
+ * Try to reduce the amount of dirty data in cache so there is less
+ * work do during the critical section of the checkpoint.
*/
- WT_ERR(__wt_epoch(session, &fsync_start));
- WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
- WT_ERR(__wt_epoch(session, &fsync_stop));
- fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
- WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_pre);
- WT_STAT_FAST_CONN_INCRV(session,
- txn_checkpoint_fsync_pre_duration, fsync_duration_usecs);
+ WT_ERR(__checkpoint_reduce_dirty_cache(session));
/* Tell logging that we are about to start a database checkpoint. */
if (full && logging)
@@ -462,6 +592,36 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_STAT_FAST_CONN_SET(session,
txn_checkpoint_generation, txn_global->checkpoint_gen);
+ /* Keep track of handles acquired for locking. */
+ WT_ERR(__wt_meta_track_on(session));
+ tracking = true;
+
+ /*
+ * Get a list of handles we want to flush; for named checkpoints this
+ * may pull closed objects into the session cache.
+ *
+ * We want to skip checkpointing clean handles whenever possible. That
+ * is, when the checkpoint is not named or forced. However, we need to
+ * take care about ordering with respect to the checkpoint transaction.
+ *
+ * If we skip clean handles before starting the transaction, the
+ * checkpoint can miss updates in trees that become dirty as the
+ * checkpoint is starting. If we wait until the transaction has
+ * started before locking a handle, there could be a metadata-changing
+ * operation in between (e.g., salvage) that will cause a write
+ * conflict when the checkpoint goes to write the metadata.
+ *
+ * First, gather all handles, then start the checkpoint transaction,
+ * then release any clean handles.
+ */
+ WT_ASSERT(session, session->ckpt_handle_next == 0);
+ WT_WITH_SCHEMA_LOCK(session, ret,
+ WT_WITH_TABLE_LOCK(session, ret,
+ WT_WITH_HANDLE_LIST_LOCK(session,
+ ret = __checkpoint_apply_all(
+ session, cfg, __wt_checkpoint_get_handles, NULL))));
+ WT_ERR(ret);
+
/*
* Start a snapshot transaction for the checkpoint.
*
@@ -475,21 +635,22 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_ERR(__wt_txn_id_check(session));
/*
- * Save the checkpoint session ID. We never do checkpoints in the
- * default session (with id zero).
+ * Save the checkpoint session ID.
+ *
+ * We never do checkpoints in the default session (with id zero).
*/
WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0);
txn_global->checkpoint_id = session->id;
- txn_global->checkpoint_pinned =
- WT_MIN(txn_state->id, txn_state->snap_min);
-
/*
- * We're about to clear the checkpoint transaction from the global
- * state table so the oldest ID can move forward. Make sure everything
- * we've done above is scheduled.
+ * Remove the checkpoint transaction from the global table.
+ *
+ * This allows ordinary visibility checks to move forward because
+ * checkpoints often take a long time and only write to the metadata.
*/
- WT_FULL_BARRIER();
+ WT_ERR(__wt_writelock(session, txn_global->scan_rwlock));
+ txn_global->checkpoint_txnid = txn->id;
+ txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min);
/*
* Sanity check that the oldest ID hasn't moved on before we have
@@ -507,6 +668,25 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
* details).
*/
txn_state->id = txn_state->snap_min = WT_TXN_NONE;
+ WT_ERR(__wt_writeunlock(session, txn_global->scan_rwlock));
+
+ /*
+ * Unblock updates -- we can figure out that any updates to clean pages
+ * after this point are too new to be written in the checkpoint.
+ */
+ cache->eviction_dirty_trigger = orig_trigger;
+ WT_STAT_FAST_CONN_SET(
+ session, txn_checkpoint_scrub_target, orig_trigger);
+
+ /*
+ * Mark old checkpoints that are being deleted and figure out which
+ * trees we can skip in this checkpoint.
+ *
+ * Release clean trees. Any updates made after this point will not
+ * visible to the checkpoint transaction.
+ */
+ WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes));
+ WT_ERR(__checkpoint_release_clean_trees(session));
/* Tell logging that we have started a database checkpoint. */
if (full && logging)
@@ -522,9 +702,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
*/
session->dhandle = NULL;
- /* Release the snapshot so we aren't pinning pages in cache. */
+ /* Release the snapshot so we aren't pinning updates in cache. */
__wt_txn_release_snapshot(session);
+ /* Mark all trees as open for business (particularly eviction). */
+ WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
+ WT_ERR(__wt_evict_server_wake(session));
+
WT_ERR(__checkpoint_verbose_track(session,
"committing transaction", &verb_timer));
@@ -587,6 +771,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
ret = __wt_txn_checkpoint_log(
session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
+ /*
+ * Now that the metadata is stable, re-open the metadata file for
+ * regular eviction by clearing the checkpoint_pinned flag.
+ */
+ txn_global->checkpoint_pinned = WT_TXN_NONE;
+
if (full) {
WT_ERR(__wt_epoch(session, &stop));
__checkpoint_stats(session, &start, &stop);
@@ -609,6 +799,10 @@ err: /*
if (tracking)
WT_TRET(__wt_meta_track_off(session, false, ret != 0));
+ cache->eviction_dirty_trigger = orig_trigger;
+ WT_STAT_FAST_CONN_SET(
+ session, txn_checkpoint_scrub_target, orig_trigger);
+
if (F_ISSET(txn, WT_TXN_RUNNING)) {
/*
* Clear the dhandle so the visibility check doesn't get
@@ -634,9 +828,12 @@ err: /*
WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
}
- for (i = 0; i < session->ckpt_handle_next; ++i)
+ for (i = 0; i < session->ckpt_handle_next; ++i) {
+ if (session->ckpt_handle[i] == NULL)
+ continue;
WT_WITH_DHANDLE(session, session->ckpt_handle[i],
WT_TRET(__wt_session_release_btree(session)));
+ }
__wt_free(session, session->ckpt_handle);
session->ckpt_handle_allocated = session->ckpt_handle_next = 0;
@@ -836,7 +1033,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session,
if (cval.len == 0)
name = WT_CHECKPOINT;
else {
- WT_ERR(__wt_checkpoint_name_ok(session, cval.str, cval.len));
+ WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len));
WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
name = name_alloc;
}
@@ -851,10 +1048,10 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session,
__wt_config_next(&dropconf, &k, &v)) == 0) {
/* Disallow unsafe checkpoint names. */
if (v.len == 0)
- WT_ERR(__wt_checkpoint_name_ok(
+ WT_ERR(__checkpoint_name_ok(
session, k.str, k.len));
else
- WT_ERR(__wt_checkpoint_name_ok(
+ WT_ERR(__checkpoint_name_ok(
session, v.str, v.len));
if (v.len == 0)
@@ -986,42 +1183,23 @@ err: if (hot_backup_locked)
}
/*
- * __checkpoint_tree --
- * Checkpoint a single tree.
- * Assumes all necessary locks have been acquired by the caller.
+ * __checkpoint_mark_deletes --
+ * Figure out what old checkpoints will be deleted, and whether the
+ * checkpoint can be skipped entirely.
*/
static int
-__checkpoint_tree(
- WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
+__checkpoint_mark_deletes(
+ WT_SESSION_IMPL *session, const char *cfg[])
{
- WT_BM *bm;
WT_BTREE *btree;
WT_CKPT *ckpt, *ckptbase;
WT_CONFIG_ITEM cval;
- WT_CONNECTION_IMPL *conn;
- WT_DATA_HANDLE *dhandle;
- WT_DECL_RET;
- WT_LSN ckptlsn;
const char *name;
- int deleted, was_modified;
- bool fake_ckpt, force;
+ int deleted;
+ bool force;
btree = S2BT(session);
- bm = btree->bm;
ckptbase = btree->ckpt;
- conn = S2C(session);
- dhandle = session->dhandle;
- fake_ckpt = false;
- was_modified = btree->modified;
-
- /*
- * Set the checkpoint LSN to the maximum LSN so that if logging is
- * disabled, recovery will never roll old changes forward over the
- * non-logged changes in this checkpoint. If logging is enabled, a
- * real checkpoint LSN will be assigned for this checkpoint and
- * overwrite this.
- */
- WT_MAX_LSN(&ckptlsn);
/*
* Check for clean objects not requiring a checkpoint.
@@ -1050,20 +1228,15 @@ __checkpoint_tree(
force = false;
F_CLR(btree, WT_BTREE_SKIP_CKPT);
if (!btree->modified && cfg != NULL) {
- ret = __wt_config_gets(session, cfg, "force", &cval);
- if (ret != 0 && ret != WT_NOTFOUND)
- WT_ERR(ret);
- if (ret == 0 && cval.val != 0)
- force = true;
+ WT_RET(__wt_config_gets(session, cfg, "force", &cval));
+ force = cval.val != 0;
}
if (!btree->modified && !force) {
- if (!is_checkpoint)
- goto nockpt;
-
deleted = 0;
WT_CKPT_FOREACH(ckptbase, ckpt)
if (F_ISSET(ckpt, WT_CKPT_DELETE))
++deleted;
+
/*
* Complicated test: if the tree is clean and last two
* checkpoints have the same name (correcting for internal
@@ -1077,17 +1250,52 @@ __checkpoint_tree(
(strcmp(name, (ckpt - 2)->name) == 0 ||
(WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
-nockpt: F_SET(btree, WT_BTREE_SKIP_CKPT);
- WT_PUBLISH(btree->checkpoint_gen,
- S2C(session)->txn_global.checkpoint_gen);
- WT_STAT_FAST_DATA_SET(session,
- btree_checkpoint_generation,
- btree->checkpoint_gen);
- ret = 0;
- goto err;
+ F_SET(btree, WT_BTREE_SKIP_CKPT);
+ return (0);
}
}
+ return (0);
+}
+
+/*
+ * __checkpoint_tree --
+ * Checkpoint a single tree.
+ * Assumes all necessary locks have been acquired by the caller.
+ */
+static int
+__checkpoint_tree(
+ WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
+{
+ WT_BM *bm;
+ WT_BTREE *btree;
+ WT_CKPT *ckpt, *ckptbase;
+ WT_CONNECTION_IMPL *conn;
+ WT_DATA_HANDLE *dhandle;
+ WT_DECL_RET;
+ WT_LSN ckptlsn;
+ int was_modified;
+ bool fake_ckpt;
+
+ WT_UNUSED(cfg);
+
+ btree = S2BT(session);
+ bm = btree->bm;
+ ckptbase = btree->ckpt;
+ conn = S2C(session);
+ dhandle = session->dhandle;
+ fake_ckpt = false;
+ was_modified = btree->modified;
+
+ /*
+ * Set the checkpoint LSN to the maximum LSN so that if logging is
+ * disabled, recovery will never roll old changes forward over the
+ * non-logged changes in this checkpoint. If logging is enabled, a
+ * real checkpoint LSN will be assigned for this checkpoint and
+ * overwrite this.
+ */
+ WT_MAX_LSN(&ckptlsn);
+
/*
* If an object has never been used (in other words, if it could become
* a bulk-loaded file), then we must fake the checkpoint. This is good
@@ -1183,10 +1391,10 @@ fake: /*
/*
* If we wrote a checkpoint (rather than faking one), pages may be
- * available for re-use. If tracking enabled, defer making pages
- * available until transaction end. The exception is if the handle
- * is being discarded, in which case the handle will be gone by the
- * time we try to apply or unroll the meta tracking event.
+ * available for re-use. If tracking is enabled, defer making pages
+ * available until transaction end. The exception is if the handle is
+ * being discarded, in which case the handle will be gone by the time
+ * we try to apply or unroll the meta tracking event.
*/
if (!fake_ckpt) {
if (WT_META_TRACKING(session) && is_checkpoint)
@@ -1214,13 +1422,59 @@ err: /*
}
/*
+ * __checkpoint_presync --
+ * Visit all handles after the checkpoint writes are complete and before
+ * syncing. At this point, all trees should be completely open for
+ * business.
+ */
+static int
+__checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[])
+{
+ WT_BTREE *btree;
+
+ WT_UNUSED(cfg);
+
+ btree = S2BT(session);
+ WT_ASSERT(session, !btree->include_checkpoint_txn);
+ btree->evict_walk_period = btree->evict_walk_saved;
+ return (0);
+}
+
+/*
* __checkpoint_tree_helper --
* Checkpoint a tree (suitable for use in *_apply functions).
*/
static int
__checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[])
{
- return (__checkpoint_tree(session, true, cfg));
+ WT_BTREE *btree;
+ WT_DECL_RET;
+
+ btree = S2BT(session);
+
+ ret = __checkpoint_tree(session, true, cfg);
+
+ /*
+ * Whatever happened, we aren't visiting this tree again in this
+ * checkpoint. Don't keep updates pinned any longer.
+ */
+ __checkpoint_update_generation(session);
+
+ /*
+ * In case this tree was being skipped by the eviction server
+ * during the checkpoint, restore the previous state.
+ */
+ btree->evict_walk_period = btree->evict_walk_saved;
+
+ /*
+ * Wake the eviction server, in case application threads have
+ * stalled while the eviction server decided it couldn't make
+ * progress. Without this, application threads will be stalled
+ * until the eviction server next wakes.
+ */
+ WT_TRET(__wt_evict_server_wake(session));
+
+ return (ret);
}
/*
@@ -1242,6 +1496,9 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
WT_SAVE_DHANDLE(session,
ret = __checkpoint_lock_tree(session, true, true, cfg));
WT_RET(ret);
+ WT_SAVE_DHANDLE(session,
+ ret = __checkpoint_mark_deletes(session, cfg));
+ WT_RET(ret);
return (__checkpoint_tree(session, true, cfg));
}
@@ -1319,6 +1576,11 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
WT_SAVE_DHANDLE(session,
ret = __checkpoint_lock_tree(session, false, need_tracking, NULL));
WT_ASSERT(session, ret == 0);
+ if (ret == 0) {
+ WT_SAVE_DHANDLE(session,
+ ret = __checkpoint_mark_deletes(session, NULL));
+ WT_ASSERT(session, ret == 0);
+ }
if (ret == 0)
ret = __checkpoint_tree(session, false, NULL);
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 470515244f3..e73ff00f5b7 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -329,7 +329,7 @@ __wt_txn_checkpoint_log(
case WT_TXN_LOG_CKPT_START:
/* Take a copy of the transaction snapshot. */
txn->ckpt_nsnapshot = txn->snapshot_count;
- recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
+ recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot));
p = txn->ckpt_snapshot->mem;
end = p + recsize;
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index da70aea35be..6344a90dddd 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -242,6 +242,7 @@ dump_table_config(
char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL};
p = NULL;
+ srch = NULL;
cfg = &_cfg[3];
/* Get the table name. */
@@ -306,32 +307,31 @@ dump_table_config(
WT_ERR(print_config(session, uri, cfg, json, true));
- if (complex_table) {
- /*
- * The underlying table configuration function needs a second
- * cursor: open one before calling it, it makes error handling
- * hugely simpler.
- */
- if ((ret = session->open_cursor(
- session, "metadata:", NULL, NULL, &srch)) != 0)
- WT_ERR(util_cerr(cursor, "open_cursor", ret));
-
- if ((ret = dump_table_config_complex(
- session, cursor, srch, name, "colgroup:", json)) == 0)
- ret = dump_table_config_complex(
- session, cursor, srch, name, "index:", json);
-
- if ((tret = srch->close(srch)) != 0) {
- tret = util_cerr(cursor, "close", tret);
- if (ret == 0)
- ret = tret;
- }
- } else if (json && printf(
- " \"colgroups\" : [],\n"
- " \"indices\" : []\n") < 0)
+ /*
+ * The underlying table configuration function needs a second
+ * cursor: open one before calling it, it makes error handling
+ * hugely simpler.
+ */
+ if ((ret = session->open_cursor(
+ session, "metadata:", NULL, NULL, &srch)) != 0)
+ WT_ERR(util_cerr(cursor, "open_cursor", ret));
+
+ if (complex_table)
+ WT_ERR(dump_table_config_complex(
+ session, cursor, srch, name, "colgroup:", json));
+ else if (json && printf(
+ " \"colgroups\" : [],\n") < 0)
WT_ERR(util_cerr(cursor, NULL, EIO));
-err: free(p);
+ WT_ERR(dump_table_config_complex(
+ session, cursor, srch, name, "index:", json));
+
+err: if (srch != NULL && (tret = srch->close(srch)) != 0) {
+ tret = util_cerr(cursor, "close", tret);
+ if (ret == 0)
+ ret = tret;
+ }
+ free(p);
free(_cfg[0]);
free(_cfg[1]);
free(_cfg[2]);
diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c
index a2185dd123f..58da49b2991 100644
--- a/test/cursor_order/cursor_order_ops.c
+++ b/test/cursor_order/cursor_order_ops.c
@@ -130,7 +130,8 @@ ops_start(SHARED_CONFIG *cfg)
seconds = (stop.tv_sec - start.tv_sec) +
(stop.tv_usec - start.tv_usec) * 1e-6;
fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n",
- seconds, (int)(((cfg->reverse_scanners + cfg->append_inserters) *
+ seconds, (int)
+ (((double)(cfg->reverse_scanners + cfg->append_inserters) *
total_nops) / seconds));
/* Verify the files. */
diff --git a/test/format/ops.c b/test/format/ops.c
index c97d82809a1..283e2912daa 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -342,7 +342,7 @@ snap_check(WT_CURSOR *cursor,
switch (g.type) {
case FIX:
testutil_die(ret,
- "snap_check: %" PRIu64 " search: "
+ "snapshot-isolation: %" PRIu64 " search: "
"expected {0x%02x}, found {0x%02x}",
start->keyno,
start->deleted ? 0 : *(uint8_t *)start->vdata,
@@ -350,7 +350,7 @@ snap_check(WT_CURSOR *cursor,
/* NOTREACHED */
case ROW:
testutil_die(ret,
- "snap_check: %.*s search: "
+ "snapshot-isolation: %.*s search: "
"expected {%.*s}, found {%.*s}",
(int)key->size, key->data,
start->deleted ?
@@ -362,7 +362,7 @@ snap_check(WT_CURSOR *cursor,
/* NOTREACHED */
case VAR:
testutil_die(ret,
- "snap_check: %" PRIu64 " search: "
+ "snapshot-isolation: %" PRIu64 " search: "
"expected {%.*s}, found {%.*s}",
start->keyno,
start->deleted ?
diff --git a/test/format/smoke.sh b/test/format/smoke.sh
index 5fbc349f242..0c86b5e57c6 100755
--- a/test/format/smoke.sh
+++ b/test/format/smoke.sh
@@ -3,7 +3,7 @@
set -e
# Smoke-test format as part of running "make check".
-args="-1 -c "." data_source=table ops=100000 rows=10000 threads=4 compression=none logging_compression=none"
+args="-1 -c "." data_source=table ops=50000 rows=10000 threads=4 compression=none logging_compression=none"
$TEST_WRAPPER ./t $args file_type=fix
$TEST_WRAPPER ./t $args file_type=row
diff --git a/test/manydbs/Makefile.am b/test/manydbs/Makefile.am
index 2bc47ad7f2e..ff5985cf2a4 100644
--- a/test/manydbs/Makefile.am
+++ b/test/manydbs/Makefile.am
@@ -10,7 +10,8 @@ t_LDADD +=$(top_builddir)/libwiredtiger.la
t_LDFLAGS = -static
# Run this during a "make check" smoke test.
-TESTS = smoke.sh
+TESTS = $(noinst_PROGRAMS)
+LOG_COMPILER = $(TEST_WRAPPER)
clean-local:
rm -rf WT_TEST *.core
diff --git a/test/manydbs/smoke.sh b/test/manydbs/smoke.sh
deleted file mode 100755
index c0e2976f154..00000000000
--- a/test/manydbs/smoke.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/sh
-
-set -e
-
-# Smoke-test format as part of running "make check".
-# Run with:
-# 1. The defaults
-# 2. Set idle flag to turn off operations.
-# 3. More dbs.
-#
-echo "manydbs: default with operations turned on"
-$TEST_WRAPPER ./t
-echo "manydbs: totally idle databases"
-$TEST_WRAPPER ./t -I
-echo "manydbs: 40 databases with operations"
-$TEST_WRAPPER ./t -D 40
-echo "manydbs: 40 idle databases"
-$TEST_WRAPPER ./t -I -D 40
diff --git a/test/mciproject.yml b/test/mciproject.yml
index 3df1ce5805e..8825bb65052 100644
--- a/test/mciproject.yml
+++ b/test/mciproject.yml
@@ -8,12 +8,12 @@ functions:
command: git.get_project
params:
directory: wiredtiger
- "fetch artifacts" : &fetch_artifacts
+ "fetch binaries" : &fetch_binaries
- command: s3.get
params:
aws_key: ${aws_key}
aws_secret: ${aws_secret}
- remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
+ remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz
bucket: build_external
extract_to: wiredtiger
@@ -23,6 +23,22 @@ pre:
script: |
rm -rf "wiredtiger"
post:
+ - command: archive.targz_pack
+ params:
+ target: "wiredtiger.tgz"
+ source_dir: "wiredtiger"
+ include:
+ - "./**"
+ - command: s3.put
+ params:
+ aws_secret: ${aws_secret}
+ aws_key: ${aws_key}
+ local_file: wiredtiger.tgz
+ bucket: build_external
+ permissions: public-read
+ content_type: application/tar
+ display_name: Artifacts
+ remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
- command: shell.exec
params:
script: |
@@ -49,7 +65,7 @@ tasks:
./build_posix/reconf
${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose
${make_command|make} ${smp_command|} 2>&1
- ${make_command|make} check 2>&1
+ ${make_command|make} VERBOSE=1 check 2>&1
fi
- command: archive.targz_pack
params:
@@ -65,14 +81,14 @@ tasks:
bucket: build_external
permissions: public-read
content_type: application/tar
- display_name: Artifacts
- remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
+ display_name: Binaries
+ remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz
- name: unit-test
depends_on:
- name: compile
commands:
- - func: "fetch artifacts"
+ - func: "fetch binaries"
- command: shell.exec
params:
working_dir: "wiredtiger"
@@ -85,7 +101,7 @@ tasks:
depends_on:
- name: compile
commands:
- - func: "fetch artifacts"
+ - func: "fetch binaries"
- command: shell.exec
params:
working_dir: "wiredtiger"
@@ -99,7 +115,7 @@ tasks:
depends_on:
- name: compile
commands:
- - func: "fetch artifacts"
+ - func: "fetch binaries"
- command: shell.exec
params:
working_dir: "wiredtiger"
diff --git a/test/recovery/Makefile.am b/test/recovery/Makefile.am
index 19fc48dce47..3e7fce17d0e 100644
--- a/test/recovery/Makefile.am
+++ b/test/recovery/Makefile.am
@@ -14,8 +14,7 @@ truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la
truncated_log_LDFLAGS = -static
# Run this during a "make check" smoke test.
-TESTS = $(noinst_PROGRAMS)
-LOG_COMPILER = $(TEST_WRAPPER)
+TESTS = smoke.sh
clean-local:
rm -rf WT_TEST.* *.core
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index 85629eddec4..16065cec29e 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -91,7 +91,8 @@ thread_run(void *arg)
if ((fp = fopen(buf, "w")) == NULL)
testutil_die(errno, "fopen");
/*
- * Set to no buffering.
+ * Set to line buffering. But that is advisory only. We've seen
+ * cases where the result files end up with partial lines.
*/
__wt_stream_set_line_buffer(fp);
if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
@@ -188,7 +189,7 @@ main(int argc, char *argv[])
WT_CURSOR *cursor;
WT_SESSION *session;
WT_RAND_STATE rnd;
- uint64_t key;
+ uint64_t key, last_key;
uint32_t absent, count, i, nth, timeout;
int ch, status, ret;
pid_t pid;
@@ -317,12 +318,23 @@ main(int argc, char *argv[])
* in the table after recovery. Since we did write-no-sync, we
* expect every key to have been recovered.
*/
- for (;; ++count) {
+ for (last_key = UINT64_MAX;; ++count, last_key = key) {
ret = fscanf(fp, "%" SCNu64 "\n", &key);
if (ret != EOF && ret != 1)
testutil_die(errno, "fscanf");
if (ret == EOF)
break;
+ /*
+ * If we're unlucky, the last line may be a partially
+ * written key at the end that can result in a false
+ * negative error for a missing record. Detect it.
+ */
+ if (last_key != UINT64_MAX && key != last_key + 1) {
+ printf("%s: Ignore partial record %" PRIu64
+ " last valid key %" PRIu64 "\n",
+ fname, key, last_key);
+ break;
+ }
snprintf(kname, sizeof(kname), "%" PRIu64, key);
cursor->set_key(cursor, kname);
if ((ret = cursor->search(cursor)) != 0) {
diff --git a/test/recovery/smoke.sh b/test/recovery/smoke.sh
new file mode 100755
index 00000000000..c7677b64503
--- /dev/null
+++ b/test/recovery/smoke.sh
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+set -e
+
+# Smoke-test recovery as part of running "make check".
+
+$TEST_WRAPPER ./random-abort -t 10 -T 5
+$TEST_WRAPPER ./truncated-log
diff --git a/test/suite/helper.py b/test/suite/helper.py
index f85d708880f..9f34b566b3c 100644
--- a/test/suite/helper.py
+++ b/test/suite/helper.py
@@ -179,6 +179,49 @@ def simple_populate_check(self, uri, rows):
simple_populate_check_cursor(self, cursor, rows)
cursor.close()
+# population of a simple object, with a single index
+# uri: object
+# config: prefix of the session.create configuration string (defaults
+# to string value formats)
+# rows: entries to insert
+def simple_index_populate(self, uri, config, rows):
+ self.pr('simple_index_populate: ' + uri + ' with ' + str(rows) + ' rows')
+ self.session.create(uri, 'value_format=S,columns=(key0,value0),' + config)
+ indxname = 'index:' + uri.split(":")[1]
+ self.session.create(indxname + ':index1', 'columns=(value0,key0)')
+ cursor = self.session.open_cursor(uri, None)
+ for i in range(1, rows + 1):
+ cursor[key_populate(cursor, i)] = value_populate(cursor, i)
+ cursor.close()
+
+def simple_index_populate_check_cursor(self, cursor, rows):
+ i = 0
+ for key,val in cursor:
+ i += 1
+ self.assertEqual(key, key_populate(cursor, i))
+ if cursor.value_format == '8t' and val == 0: # deleted
+ continue
+ self.assertEqual(val, value_populate(cursor, i))
+ self.assertEqual(i, rows)
+
+def simple_index_populate_check(self, uri, rows):
+ self.pr('simple_index_populate_check: ' + uri)
+
+ # Check values in the main table.
+ cursor = self.session.open_cursor(uri, None)
+ simple_index_populate_check_cursor(self, cursor, rows)
+
+ # Check values in the index.
+ indxname = 'index:' + uri.split(":")[1]
+ idxcursor = self.session.open_cursor(indxname + ':index1')
+ for i in range(1, rows + 1):
+ k = key_populate(cursor, i)
+ v = value_populate(cursor, i)
+ ik = (v,k) # The index key is columns=(v,k).
+ self.assertEqual(v, idxcursor[ik])
+ idxcursor.close()
+ cursor.close()
+
# Return the value stored in a complex object.
def complex_value_populate(cursor, i):
return [str(i) + ': abcdefghijklmnopqrstuvwxyz'[0:i%26],
diff --git a/test/suite/run.py b/test/suite/run.py
index 6e7421b8b96..c37093a2a55 100644
--- a/test/suite/run.py
+++ b/test/suite/run.py
@@ -87,6 +87,7 @@ Options:\n\
-j N | --parallel N run all tests in parallel using N processes\n\
-l | --long run the entire test suite\n\
-p | --preserve preserve output files in WT_TEST/<testname>\n\
+ -s N | --scenario N use scenario N (N can be number or symbolic)\n\
-t | --timestamp name WT_TEST according to timestamp\n\
-v N | --verbose N set verboseness to N (0<=N<=3, default=1)\n\
\n\
@@ -95,15 +96,27 @@ Tests:\n\
may be a subsuite name (e.g. \'base\' runs test_base*.py)\n\
\n\
When -C or -c are present, there may not be any tests named.\n\
+ When -s is present, there must be a test named.\n\
'
# capture the category (AKA 'subsuite') part of a test name,
# e.g. test_util03 -> util
reCatname = re.compile(r"test_([^0-9]+)[0-9]*")
-def addScenarioTests(tests, loader, testname):
+def restrictScenario(testcases, restrict):
+ if restrict == '':
+ return testcases
+ elif restrict.isdigit():
+ s = int(restrict)
+ return [t for t in testcases
+ if hasattr(t, 'scenario_number') and t.scenario_number == s]
+ else:
+ return [t for t in testcases
+ if hasattr(t, 'scenario_name') and t.scenario_name == restrict]
+
+def addScenarioTests(tests, loader, testname, scenario):
loaded = loader.loadTestsFromName(testname)
- tests.addTests(generate_scenarios(loaded))
+ tests.addTests(restrictScenario(generate_scenarios(loaded), scenario))
def configRecord(cmap, tup):
"""
@@ -195,20 +208,20 @@ def configApply(suites, configfilename, configwrite):
json.dump(configmap, f, sort_keys=True, indent=4)
return newsuite
-def testsFromArg(tests, loader, arg):
+def testsFromArg(tests, loader, arg, scenario):
# If a group of test is mentioned, do all tests in that group
# e.g. 'run.py base'
groupedfiles = glob.glob(suitedir + os.sep + 'test_' + arg + '*.py')
if len(groupedfiles) > 0:
for file in groupedfiles:
- testsFromArg(tests, loader, os.path.basename(file))
+ testsFromArg(tests, loader, os.path.basename(file), scenario)
return
# Explicit test class names
if not arg[0].isdigit():
if arg.endswith('.py'):
arg = arg[:-3]
- addScenarioTests(tests, loader, arg)
+ addScenarioTests(tests, loader, arg, scenario)
return
# Deal with ranges
@@ -217,7 +230,7 @@ def testsFromArg(tests, loader, arg):
else:
start, end = int(arg), int(arg)
for t in xrange(start, end+1):
- addScenarioTests(tests, loader, 'test%03d' % t)
+ addScenarioTests(tests, loader, 'test%03d' % t, scenario)
if __name__ == '__main__':
tests = unittest.TestSuite()
@@ -228,6 +241,7 @@ if __name__ == '__main__':
configfile = None
configwrite = False
dirarg = None
+ scenario = ''
verbose = 1
args = sys.argv[1:]
testargs = []
@@ -265,6 +279,12 @@ if __name__ == '__main__':
if option == '-preserve' or option == 'p':
preserve = True
continue
+ if option == '-scenario' or option == 's':
+ if scenario != '' or len(args) == 0:
+ usage()
+ sys.exit(2)
+ scenario = args.pop(0)
+ continue
if option == '-timestamp' or option == 't':
timestamp = True
continue
@@ -303,15 +323,20 @@ if __name__ == '__main__':
# Without any tests listed as arguments, do discovery
if len(testargs) == 0:
+ if scenario != '':
+ sys.stderr.write(
+ 'run.py: specifying a scenario requires a test name\n')
+ usage()
+ sys.exit(2)
from discover import defaultTestLoader as loader
suites = loader.discover(suitedir)
suites = sorted(suites, key=lambda c: str(list(c)[0]))
if configfile != None:
suites = configApply(suites, configfile, configwrite)
- tests.addTests(generate_scenarios(suites))
+ tests.addTests(restrictScenario(generate_scenarios(suites), ''))
else:
for arg in testargs:
- testsFromArg(tests, loader, arg)
+ testsFromArg(tests, loader, arg, scenario)
if debug:
import pdb
diff --git a/test/suite/test_async01.py b/test/suite/test_async01.py
index 71a18a68121..9322748c30f 100644
--- a/test/suite/test_async01.py
+++ b/test/suite/test_async01.py
@@ -29,7 +29,7 @@
import sys, threading, wiredtiger, wttest
from suite_subprocess import suite_subprocess
from wiredtiger import WiredTigerError
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# TODO - tmp code
def tty_pr(s):
@@ -122,7 +122,7 @@ class test_async01(wttest.WiredTigerTestCase, suite_subprocess):
async_threads = 3
current = {}
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-col', dict(tablekind='col',uri='file')),
('file-fix', dict(tablekind='fix',uri='file')),
('file-row', dict(tablekind='row',uri='file')),
diff --git a/test/suite/test_async02.py b/test/suite/test_async02.py
index 7aa1b85a2f3..bc6b389fc27 100644
--- a/test/suite/test_async02.py
+++ b/test/suite/test_async02.py
@@ -29,7 +29,7 @@
import sys, threading, wiredtiger, wttest
from suite_subprocess import suite_subprocess
from wiredtiger import WiredTigerError
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
class Callback(wiredtiger.AsyncCallback):
def __init__(self, current):
@@ -119,7 +119,7 @@ class test_async02(wttest.WiredTigerTestCase, suite_subprocess):
async_threads = 3
current = {}
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-col', dict(tablekind='col',uri='file')),
('file-fix', dict(tablekind='fix',uri='file')),
('file-row', dict(tablekind='row',uri='file')),
diff --git a/test/suite/test_backup02.py b/test/suite/test_backup02.py
index 095bfbe404a..398d55abd7a 100644
--- a/test/suite/test_backup02.py
+++ b/test/suite/test_backup02.py
@@ -30,13 +30,13 @@ import Queue
import threading, time, wiredtiger, wttest
from helper import key_populate, simple_populate
from wtthread import backup_thread, checkpoint_thread, op_thread
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_backup02.py
# Run background checkpoints and backsups repeatedly while doing inserts
# in another thread
class test_backup02(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('table', dict(uri='table:test',fmt='L',dsize=100,nops=200,nthreads=1,time=30)),
])
diff --git a/test/suite/test_backup03.py b/test/suite/test_backup03.py
index e810a2ec714..053009c6edb 100644
--- a/test/suite/test_backup03.py
+++ b/test/suite/test_backup03.py
@@ -28,7 +28,7 @@
import glob, os, shutil, string
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wiredtiger, wttest
from helper import compare_files,\
complex_populate, complex_populate_lsm, simple_populate
@@ -56,25 +56,25 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess):
('table:' + pfx + '.4', complex_populate_lsm, 3),
]
list = [
- ( '1', dict(big=0,list=[0])), # Target objects individually
- ( '2', dict(big=1,list=[1])),
- ( '3', dict(big=2,list=[2])),
- ( '4', dict(big=3,list=[3])),
- ('5a', dict(big=0,list=[0,2])), # Target groups of objects
- ('5b', dict(big=2,list=[0,2])),
- ('6a', dict(big=1,list=[1,3])),
- ('6b', dict(big=3,list=[1,3])),
- ('7a', dict(big=0,list=[0,1,2])),
- ('7b', dict(big=1,list=[0,1,2])),
- ('7c', dict(big=2,list=[0,1,2])),
- ('8a', dict(big=0,list=[0,1,2,3])),
- ('8b', dict(big=1,list=[0,1,2,3])),
- ('8c', dict(big=2,list=[0,1,2,3])),
- ('8d', dict(big=3,list=[0,1,2,3])),
- ( '9', dict(big=3,list=[])), # Backup everything
+ ( 'backup_1', dict(big=0,list=[0])), # Target objects individually
+ ( 'backup_2', dict(big=1,list=[1])),
+ ( 'backup_3', dict(big=2,list=[2])),
+ ( 'backup_4', dict(big=3,list=[3])),
+ ('backup_5a', dict(big=0,list=[0,2])), # Target groups of objects
+ ('backup_5b', dict(big=2,list=[0,2])),
+ ('backup_6a', dict(big=1,list=[1,3])),
+ ('backup_6b', dict(big=3,list=[1,3])),
+ ('backup_7a', dict(big=0,list=[0,1,2])),
+ ('backup_7b', dict(big=1,list=[0,1,2])),
+ ('backup_7c', dict(big=2,list=[0,1,2])),
+ ('backup_8a', dict(big=0,list=[0,1,2,3])),
+ ('backup_8b', dict(big=1,list=[0,1,2,3])),
+ ('backup_8c', dict(big=2,list=[0,1,2,3])),
+ ('backup_8d', dict(big=3,list=[0,1,2,3])),
+ ('backup_9', dict(big=3,list=[])), # Backup everything
]
- scenarios = number_scenarios(multiply_scenarios('.', list))
+ scenarios = make_scenarios(list)
# Create a large cache, otherwise this test runs quite slowly.
conn_config = 'cache_size=1G'
diff --git a/test/suite/test_backup04.py b/test/suite/test_backup04.py
index 852a22c1e0c..866e673dccb 100644
--- a/test/suite/test_backup04.py
+++ b/test/suite/test_backup04.py
@@ -30,7 +30,7 @@ import Queue
import threading, time, wiredtiger, wttest
import glob, os, shutil
from suite_subprocess import suite_subprocess
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
from wtthread import op_thread
from helper import compare_files, key_populate
@@ -54,7 +54,7 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess):
# and that is not what we want here.
#
pfx = 'test_backup'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('table', dict(uri='table:test',dsize=100,nops=2000,nthreads=1,time=30)),
])
diff --git a/test/suite/test_backup05.py b/test/suite/test_backup05.py
index fbe219d8de8..131732e9a89 100644
--- a/test/suite/test_backup05.py
+++ b/test/suite/test_backup05.py
@@ -35,7 +35,6 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
from helper import copy_wiredtiger_home
import wiredtiger, wttest
diff --git a/test/suite/test_base02.py b/test/suite/test_base02.py
index 70117573241..2b51fe1b530 100644
--- a/test/suite/test_base02.py
+++ b/test/suite/test_base02.py
@@ -32,14 +32,14 @@
import json
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# Test configuration strings.
class test_base02(wttest.WiredTigerTestCase):
name = 'test_base02a'
extra_config = ''
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:')),
('lsm', dict(uri='lsm:')),
diff --git a/test/suite/test_base05.py b/test/suite/test_base05.py
index f191f23561f..4bee0efcfe2 100644
--- a/test/suite/test_base05.py
+++ b/test/suite/test_base05.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_base05.py
# Cursor operations
@@ -40,7 +40,7 @@ class test_base05(wttest.WiredTigerTestCase):
table_name1 = 'test_base05a'
table_name2 = 'test_base05b'
nentries = 1000
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('no_huffman', dict(extraconfig='')),
('huffman_key', dict(extraconfig='huffman_key="english"')),
('huffman_val', dict(extraconfig='huffman_value="english"')),
diff --git a/test/suite/test_bug003.py b/test/suite/test_bug003.py
index 739279a0141..28d71a534e2 100644
--- a/test/suite/test_bug003.py
+++ b/test/suite/test_bug003.py
@@ -30,7 +30,7 @@
# Regression tests.
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Regression tests.
class test_bug003(wttest.WiredTigerTestCase):
@@ -43,7 +43,7 @@ class test_bug003(wttest.WiredTigerTestCase):
('yes', dict(name=1)),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, ckpt))
+ scenarios = make_scenarios(types, ckpt)
# Confirm bulk-load isn't stopped by checkpoints.
def test_bug003(self):
diff --git a/test/suite/test_bug006.py b/test/suite/test_bug006.py
index e522cdf96f7..314ba57038f 100644
--- a/test/suite/test_bug006.py
+++ b/test/suite/test_bug006.py
@@ -31,13 +31,13 @@
import wiredtiger, wttest
from helper import key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# Check that verify and salvage both raise exceptions if there is an open
# cursor.
class test_bug006(wttest.WiredTigerTestCase):
name = 'test_bug006'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:')),
])
diff --git a/test/suite/test_bug008.py b/test/suite/test_bug008.py
index 0243887e258..c4fa411f55e 100644
--- a/test/suite/test_bug008.py
+++ b/test/suite/test_bug008.py
@@ -31,13 +31,13 @@
import wiredtiger, wttest
from helper import simple_populate, key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# Test search/search-near operations, including invisible values and keys
# past the end of the table.
class test_bug008(wttest.WiredTigerTestCase):
uri = 'file:test_bug008' # This is a btree layer test.
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)),
('row', dict(fmt='key_format=S', empty=0, colvar=0)),
('var', dict(fmt='key_format=r', empty=0, colvar=1))
diff --git a/test/suite/test_bug009.py b/test/suite/test_bug009.py
index 4d10e4391d9..2bdfb7dec52 100644
--- a/test/suite/test_bug009.py
+++ b/test/suite/test_bug009.py
@@ -33,7 +33,6 @@
import wiredtiger, wttest
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
class test_bug009(wttest.WiredTigerTestCase):
name = 'test_bug009'
diff --git a/test/suite/test_bug011.py b/test/suite/test_bug011.py
index 50dba1c48be..fceb7a22ddb 100644
--- a/test/suite/test_bug011.py
+++ b/test/suite/test_bug011.py
@@ -42,7 +42,7 @@ class test_bug011(wttest.WiredTigerTestCase):
nops = 10000
# Add connection configuration for this test.
def conn_config(self, dir):
- return 'cache_size=10MB,hazard_max=' + str(self.ntables / 2)
+ return 'cache_size=10MB,eviction_dirty_target=99,eviction_dirty_trigger=99,hazard_max=' + str(self.ntables / 2)
def test_eviction(self):
cursors = []
diff --git a/test/suite/test_bug016.py b/test/suite/test_bug016.py
new file mode 100644
index 00000000000..f7cb3c32559
--- /dev/null
+++ b/test/suite/test_bug016.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_bug016.py
+# WT-2757: WT_CURSOR.get_key() fails after WT_CURSOR.insert unless the
+# cursor has a record number key with append configured.
+class test_bug016(wttest.WiredTigerTestCase):
+
+ # Insert a row into a simple column-store table configured to append.
+ # WT_CURSOR.get_key should succeed.
+ def test_simple_column_store_append(self):
+ uri='file:bug016'
+ self.session.create(uri, 'key_format=r,value_format=S')
+ cursor = self.session.open_cursor(uri, None, 'append')
+ cursor.set_value('value')
+ cursor.insert()
+ self.assertEquals(cursor.get_key(), 1)
+
+ # Insert a row into a simple column-store table.
+ # WT_CURSOR.get_key should fail.
+ def test_simple_column_store(self):
+ uri='file:bug016'
+ self.session.create(uri, 'key_format=r,value_format=S')
+ cursor = self.session.open_cursor(uri, None)
+ cursor.set_key(37)
+ cursor.set_value('value')
+ cursor.insert()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.get_key(), "/requires key be set/")
+
+ # Insert a row into a simple row-store table.
+ # WT_CURSOR.get_key should fail.
+ def test_simple_row_store(self):
+ uri='file:bug016'
+ self.session.create(uri, 'key_format=S,value_format=S')
+ cursor = self.session.open_cursor(uri, None)
+ cursor.set_key('key')
+ cursor.set_value('value')
+ cursor.insert()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.get_key(), "/requires key be set/")
+
+ # Insert a row into a complex column-store table configured to append.
+ # WT_CURSOR.get_key should succeed.
+ def test_complex_column_store_append(self):
+ uri='table:bug016'
+ self.session.create(
+ uri, 'key_format=r,value_format=S,columns=(key,value)')
+ cursor = self.session.open_cursor(uri, None, 'append')
+ cursor.set_value('value')
+ cursor.insert()
+ self.assertEquals(cursor.get_key(), 1)
+
+ # Insert a row into a complex column-store table.
+ # WT_CURSOR.get_key should fail.
+ def test_complex_column_store(self):
+ uri='table:bug016'
+ self.session.create(
+ uri, 'key_format=r,value_format=S,columns=(key,value)')
+ cursor = self.session.open_cursor(uri, None)
+ cursor.set_key(37)
+ cursor.set_value('value')
+ cursor.insert()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.get_key(), "/requires key be set/")
+
+ # Insert a row into a complex row-store table.
+ # WT_CURSOR.get_key should fail.
+ def test_complex_row_store(self):
+ uri='table:bug016'
+ self.session.create(
+ uri, 'key_format=S,value_format=S,columns=(key,value)')
+ cursor = self.session.open_cursor(uri, None)
+ cursor.set_key('key')
+ cursor.set_value('value')
+ cursor.insert()
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: cursor.get_key(), "/requires key be set/")
+
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_bulk01.py b/test/suite/test_bulk01.py
index 1add11af26b..5bacfafaa20 100644
--- a/test/suite/test_bulk01.py
+++ b/test/suite/test_bulk01.py
@@ -32,7 +32,7 @@
import wiredtiger, wttest
from helper import key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Smoke test bulk-load.
class test_bulk_load(wttest.WiredTigerTestCase):
@@ -52,7 +52,7 @@ class test_bulk_load(wttest.WiredTigerTestCase):
('integer', dict(valfmt='i')),
('string', dict(valfmt='S')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt, valfmt))
+ scenarios = make_scenarios(types, keyfmt, valfmt)
# Test a simple bulk-load
def test_bulk_load(self):
diff --git a/test/suite/test_bulk02.py b/test/suite/test_bulk02.py
index fe8118209f2..af0b6d4485d 100644
--- a/test/suite/test_bulk02.py
+++ b/test/suite/test_bulk02.py
@@ -32,7 +32,7 @@
import shutil, os
from helper import confirm_empty, key_populate, value_populate
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wiredtiger, wttest
# test_bulkload_checkpoint
@@ -47,7 +47,7 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess):
('unnamed', dict(ckpt_type='unnamed')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_type))
+ scenarios = make_scenarios(types, ckpt_type)
# Bulk-load handles are skipped by checkpoints.
# Named and unnamed checkpoint versions.
@@ -90,8 +90,7 @@ class test_bulkload_backup(wttest.WiredTigerTestCase, suite_subprocess):
('different', dict(session_type='different')),
('same', dict(session_type='same')),
]
- scenarios = number_scenarios(
- multiply_scenarios('.', types, ckpt_type, session_type))
+ scenarios = make_scenarios(types, ckpt_type, session_type)
# Backup a set of chosen tables/files using the wt backup command.
# The only files are bulk-load files, so they shouldn't be copied.
diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py
index 6e1ad7814ed..78754dc82fa 100644
--- a/test/suite/test_checkpoint01.py
+++ b/test/suite/test_checkpoint01.py
@@ -28,7 +28,7 @@
import wiredtiger, wttest
from helper import key_populate, complex_populate_lsm, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_checkpoint01.py
# Checkpoint tests
@@ -36,7 +36,7 @@ from wtscenario import check_scenarios
# with a set of checkpoints, then confirm the checkpoint's values are correct,
# including after other checkpoints are dropped.
class test_checkpoint(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:checkpoint',fmt='S')),
('table', dict(uri='table:checkpoint',fmt='S'))
])
@@ -139,7 +139,7 @@ class test_checkpoint(wttest.WiredTigerTestCase):
# Check some specific cursor checkpoint combinations.
class test_checkpoint_cursor(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:checkpoint',fmt='S')),
('table', dict(uri='table:checkpoint',fmt='S'))
])
@@ -205,7 +205,7 @@ class test_checkpoint_cursor(wttest.WiredTigerTestCase):
# Check that you can checkpoint targets.
class test_checkpoint_target(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:checkpoint',fmt='S')),
('table', dict(uri='table:checkpoint',fmt='S'))
])
@@ -252,7 +252,7 @@ class test_checkpoint_target(wttest.WiredTigerTestCase):
# Check that you can't write checkpoint cursors.
class test_checkpoint_cursor_update(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-r', dict(uri='file:checkpoint',fmt='r')),
('file-S', dict(uri='file:checkpoint',fmt='S')),
('table-r', dict(uri='table:checkpoint',fmt='r')),
@@ -277,7 +277,7 @@ class test_checkpoint_cursor_update(wttest.WiredTigerTestCase):
# Check that WiredTigerCheckpoint works as a checkpoint specifier.
class test_checkpoint_last(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:checkpoint',fmt='S')),
('table', dict(uri='table:checkpoint',fmt='S'))
])
@@ -343,7 +343,7 @@ class test_checkpoint_lsm_name(wttest.WiredTigerTestCase):
class test_checkpoint_empty(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:checkpoint')),
('table', dict(uri='table:checkpoint')),
])
diff --git a/test/suite/test_checkpoint02.py b/test/suite/test_checkpoint02.py
index 71c8792359c..ac57499a9e4 100644
--- a/test/suite/test_checkpoint02.py
+++ b/test/suite/test_checkpoint02.py
@@ -30,13 +30,13 @@ import Queue
import threading, time, wiredtiger, wttest
from helper import key_populate, simple_populate
from wtthread import checkpoint_thread, op_thread
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_checkpoint02.py
# Run background checkpoints repeatedly while doing inserts and other
# operations in another thread
class test_checkpoint02(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('table-100', dict(uri='table:test',fmt='L',dsize=100,nops=50000,nthreads=10)),
('table-10', dict(uri='table:test',fmt='L',dsize=10,nops=50000,nthreads=30))
])
diff --git a/test/suite/test_colgap.py b/test/suite/test_colgap.py
index 46682c23167..5cc363dbd4a 100644
--- a/test/suite/test_colgap.py
+++ b/test/suite/test_colgap.py
@@ -28,7 +28,7 @@
import wiredtiger, wttest
from helper import simple_populate, key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_colgap.py
# Test variable-length column-store gap performance.
@@ -149,8 +149,8 @@ class test_colmax(wttest.WiredTigerTestCase):
('not-single', dict(single=0)),
]
- scenarios = number_scenarios(multiply_scenarios(\
- '.', types, valfmt, record_number, bulk, reopen, single))
+ scenarios = make_scenarios(\
+ types, valfmt, record_number, bulk, reopen, single)
# Test that variable-length column-store correctly/efficiently handles big
# records (if it's not efficient, we'll just hang).
diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py
index 34b5c20247f..a8103fb3671 100644
--- a/test/suite/test_collator.py
+++ b/test/suite/test_collator.py
@@ -28,7 +28,6 @@
import os
import wiredtiger, wttest, run
-from wtscenario import check_scenarios, number_scenarios
# test_collator.py
# Test indices using a custom extractor and collator.
diff --git a/test/suite/test_compact01.py b/test/suite/test_compact01.py
index 3af550708ed..183d75f9d31 100644
--- a/test/suite/test_compact01.py
+++ b/test/suite/test_compact01.py
@@ -30,7 +30,7 @@ import wiredtiger, wttest
from helper import complex_populate, simple_populate, key_populate
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_compact.py
# session level compact operation
@@ -53,7 +53,7 @@ class test_compact(wttest.WiredTigerTestCase, suite_subprocess):
('method_reopen', dict(utility=0,reopen=1)),
('utility', dict(utility=1,reopen=0)),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, compact))
+ scenarios = make_scenarios(types, compact)
# We want a large cache so that eviction doesn't happen
# (which could skew our compaction results).
conn_config = 'cache_size=250MB,statistics=(all)'
diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py
index 7ad05cd2536..eb21817bd90 100644
--- a/test/suite/test_compact02.py
+++ b/test/suite/test_compact02.py
@@ -32,7 +32,7 @@
import wiredtiger, wttest
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test basic compression
class test_compact02(wttest.WiredTigerTestCase):
@@ -57,8 +57,7 @@ class test_compact02(wttest.WiredTigerTestCase):
('64KB', dict(fileConfig='leaf_page_max=64KB')),
('128KB', dict(fileConfig='leaf_page_max=128KB')),
]
- scenarios = \
- number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig))
+ scenarios = make_scenarios(types, cacheSize, fileConfig)
# We want about 22K records that total about 130Mb. That is an average
# of 6196 bytes per record. Half the records should be smaller, about
@@ -97,7 +96,7 @@ class test_compact02(wttest.WiredTigerTestCase):
self.home = '.'
conn_params = 'create,' + \
cacheSize + ',error_prefix="%s: ",' % self.shortid() + \
- 'statistics=(fast)'
+ 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99'
try:
self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
except wiredtiger.WiredTigerError as e:
diff --git a/test/suite/test_compress01.py b/test/suite/test_compress01.py
index 94c748fc3e5..2a7e2a7e1a8 100644
--- a/test/suite/test_compress01.py
+++ b/test/suite/test_compress01.py
@@ -32,7 +32,7 @@
import os, run
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test basic compression
class test_compress01(wttest.WiredTigerTestCase):
@@ -46,7 +46,7 @@ class test_compress01(wttest.WiredTigerTestCase):
('snappy', dict(compress='snappy')),
('none', dict(compress=None)),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, compress))
+ scenarios = make_scenarios(types, compress)
nrecords = 10000
bigvalue = "abcdefghij" * 1000
diff --git a/test/suite/test_config03.py b/test/suite/test_config03.py
index e91c5de62f8..88ca6ae3f39 100644
--- a/test/suite/test_config03.py
+++ b/test/suite/test_config03.py
@@ -69,14 +69,11 @@ class test_config03(test_base03.test_base03):
'eviction_trigger', 'hazard_max', 'multiprocess',
'session_max', 'verbose' ]
- all_scenarios = wtscenario.multiply_scenarios('_',
+ scenarios = wtscenario.make_scenarios(
cache_size_scenarios, create_scenarios, error_prefix_scenarios,
eviction_target_scenarios, eviction_trigger_scenarios,
hazard_max_scenarios, multiprocess_scenarios, session_max_scenarios,
- transactional_scenarios, verbose_scenarios)
-
- scenarios = wtscenario.prune_scenarios(all_scenarios, 1000)
- scenarios = wtscenario.number_scenarios(scenarios)
+ transactional_scenarios, verbose_scenarios, prune=1000)
#wttest.WiredTigerTestCase.printVerbose(2, 'test_config03: running ' + \
# str(len(scenarios)) + ' of ' + \
diff --git a/test/suite/test_cursor01.py b/test/suite/test_cursor01.py
index cf39d4a4ba4..8c66042eec0 100644
--- a/test/suite/test_cursor01.py
+++ b/test/suite/test_cursor01.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_cursor01.py
# Cursor operations
@@ -41,7 +41,7 @@ class test_cursor01(wttest.WiredTigerTestCase):
table_name1 = 'test_cursor01'
nentries = 10
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-col', dict(tablekind='col',uri='file')),
('file-fix', dict(tablekind='fix',uri='file')),
('file-row', dict(tablekind='row',uri='file')),
diff --git a/test/suite/test_cursor02.py b/test/suite/test_cursor02.py
index eb1ba4dfc41..a83d30def47 100644
--- a/test/suite/test_cursor02.py
+++ b/test/suite/test_cursor02.py
@@ -28,7 +28,7 @@
import wiredtiger
from test_cursor_tracker import TestCursorTracker
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_cursor02.py
# Cursor operations on small tables.
@@ -39,7 +39,7 @@ class test_cursor02(TestCursorTracker):
key/value content and to track/verify content
after inserts and removes.
"""
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('row', dict(tablekind='row', uri='table')),
('lsm-row', dict(tablekind='row', uri='lsm')),
('col', dict(tablekind='col', uri='table')),
diff --git a/test/suite/test_cursor03.py b/test/suite/test_cursor03.py
index 63237f942ca..b4598483c12 100644
--- a/test/suite/test_cursor03.py
+++ b/test/suite/test_cursor03.py
@@ -28,7 +28,7 @@
import wiredtiger
from test_cursor_tracker import TestCursorTracker
-from wtscenario import multiply_scenarios
+from wtscenario import make_scenarios
# test_cursor03.py
# Cursor operations on tables of various sizes, with key/values of various
@@ -40,7 +40,7 @@ class test_cursor03(TestCursorTracker):
key/value content and to track/verify content
after inserts and removes.
"""
- scenarios = multiply_scenarios('.', [
+ scenarios = make_scenarios([
('row', dict(tablekind='row', keysize=None, valsize=None, uri='table')),
('lsm-row', dict(tablekind='row', keysize=None, valsize=None, uri='lsm')),
('col', dict(tablekind='col', keysize=None, valsize=None, uri='table')),
diff --git a/test/suite/test_cursor04.py b/test/suite/test_cursor04.py
index 6576c623f8a..8cbf922b5eb 100644
--- a/test/suite/test_cursor04.py
+++ b/test/suite/test_cursor04.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_base04.py
# Cursor operations
@@ -38,7 +38,7 @@ class test_cursor04(wttest.WiredTigerTestCase):
table_name1 = 'test_cursor04'
nentries = 20
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('row', dict(tablekind='row', uri='table')),
('lsm-row', dict(tablekind='row', uri='lsm')),
('col', dict(tablekind='col', uri='table')),
diff --git a/test/suite/test_cursor06.py b/test/suite/test_cursor06.py
index 5545c862dd7..3a6240bc6c7 100644
--- a/test/suite/test_cursor06.py
+++ b/test/suite/test_cursor06.py
@@ -29,13 +29,13 @@
import wiredtiger, wttest
from helper import key_populate, value_populate, simple_populate
from helper import complex_value_populate, complex_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_cursor06.py
# Test cursor reconfiguration.
class test_cursor06(wttest.WiredTigerTestCase):
name = 'reconfigure'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-r', dict(type='file:', config='key_format=r', complex=0)),
('file-S', dict(type='file:', config='key_format=S', complex=0)),
('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)),
diff --git a/test/suite/test_cursor07.py b/test/suite/test_cursor07.py
index d8de0874d7f..d6078183fc1 100644
--- a/test/suite/test_cursor07.py
+++ b/test/suite/test_cursor07.py
@@ -33,7 +33,7 @@
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
import wttest
class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess):
@@ -44,7 +44,7 @@ class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess):
# test that scenario for log cursors.
nkeys = 7000
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('regular', dict(reopen=False)),
('reopen', dict(reopen=True))
])
diff --git a/test/suite/test_cursor08.py b/test/suite/test_cursor08.py
index 1a379518224..3f8f50defa7 100644
--- a/test/suite/test_cursor08.py
+++ b/test/suite/test_cursor08.py
@@ -33,7 +33,7 @@
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
from wiredtiger import stat, WiredTigerError
-from wtscenario import multiply_scenarios, number_scenarios, check_scenarios
+from wtscenario import make_scenarios
import wttest
class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess):
@@ -42,17 +42,17 @@ class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess):
uri = 'table:' + tablename
nkeys = 500
- reopens = check_scenarios([
+ reopens = [
('regular', dict(reopen=False)),
('reopen', dict(reopen=True))
- ])
- compress = check_scenarios([
+ ]
+ compress = [
('nop', dict(compress='nop')),
('snappy', dict(compress='snappy')),
('zlib', dict(compress='zlib')),
('none', dict(compress='none')),
- ])
- scenarios = number_scenarios(multiply_scenarios('.', reopens, compress))
+ ]
+ scenarios = make_scenarios(reopens, compress)
# Load the compression extension, and enable it for logging.
def conn_config(self, dir):
return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \
diff --git a/test/suite/test_cursor09.py b/test/suite/test_cursor09.py
index b77336bc1d7..a05caea4f1f 100644
--- a/test/suite/test_cursor09.py
+++ b/test/suite/test_cursor09.py
@@ -29,12 +29,12 @@
import wiredtiger, wttest
from helper import key_populate, value_populate, simple_populate
from helper import complex_populate, complex_value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_cursor09.py
# JIRA WT-2217: insert resets key/value "set".
class test_cursor09(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-r', dict(type='file:', config='key_format=r', complex=0)),
('file-S', dict(type='file:', config='key_format=S', complex=0)),
('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)),
diff --git a/test/suite/test_cursor_compare.py b/test/suite/test_cursor_compare.py
index 130f4e8ca96..179e20682d2 100644
--- a/test/suite/test_cursor_compare.py
+++ b/test/suite/test_cursor_compare.py
@@ -29,7 +29,7 @@
import wiredtiger, wttest, exceptions
from helper import complex_populate, simple_populate, key_populate
from helper import complex_populate_index_name
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test cursor comparisons.
class test_cursor_comparison(wttest.WiredTigerTestCase):
@@ -45,7 +45,7 @@ class test_cursor_comparison(wttest.WiredTigerTestCase):
('recno', dict(keyfmt='r')),
('string', dict(keyfmt='S'))
]
- scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+ scenarios = make_scenarios(types, keyfmt)
def test_cursor_comparison(self):
uri = self.type + 'compare'
diff --git a/test/suite/test_cursor_pin.py b/test/suite/test_cursor_pin.py
index 329759d8fc8..1aea49c32b0 100644
--- a/test/suite/test_cursor_pin.py
+++ b/test/suite/test_cursor_pin.py
@@ -28,7 +28,7 @@
import wiredtiger, wttest
from helper import simple_populate, key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_cursor_pin.py
# Smoke-test fast-path searching for pinned pages before re-descending
@@ -37,7 +37,7 @@ class test_cursor_pin(wttest.WiredTigerTestCase):
uri = 'file:cursor_pin'
nentries = 10000
config = 'allocation_size=512,leaf_page_max=512,value_format=S,key_format='
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('recno', dict(keyfmt='r')),
('string', dict(keyfmt='S')),
])
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index 16ce5cae685..8d7c230043b 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -29,7 +29,7 @@
import wiredtiger, wttest
from helper import complex_populate, simple_populate
from helper import key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_cursor_random.py
# Cursor next_random operations
@@ -42,7 +42,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
('sample', dict(config='next_random=true,next_random_sample_size=35')),
('not-sample', dict(config='next_random=true'))
]
- scenarios =number_scenarios(multiply_scenarios('.', types, config))
+ scenarios = make_scenarios(types, config)
# Check that opening a random cursor on a row-store returns not-supported
# for methods other than next, reconfigure and reset, and next returns
@@ -136,7 +136,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
# Check that opening a random cursor on column-store returns not-supported.
class test_cursor_random_column(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:random')),
('table', dict(uri='table:random'))
])
@@ -159,7 +159,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
('sample', dict(config='next_random=true,next_random_sample_size=35')),
('not-sample', dict(config='next_random=true'))
]
- scenarios =number_scenarios(multiply_scenarios('.', types, config))
+ scenarios = make_scenarios(types, config)
def test_cursor_random_invisible_all(self):
uri = self.type
diff --git a/test/suite/test_cursor_random02.py b/test/suite/test_cursor_random02.py
index 84ac0279fc4..93aa97f2282 100644
--- a/test/suite/test_cursor_random02.py
+++ b/test/suite/test_cursor_random02.py
@@ -29,7 +29,7 @@
import wiredtiger, wttest
from helper import complex_populate, simple_populate
from helper import key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_cursor_random02.py
# Cursor next_random operations
@@ -46,7 +46,7 @@ class test_cursor_random02(wttest.WiredTigerTestCase):
('10000', dict(records=10000)),
('50000', dict(records=50000)),
]
- scenarios = number_scenarios(multiply_scenarios('.', config, records))
+ scenarios = make_scenarios(config, records)
# Check that next_random works in the presence of a larger set of values,
# where the values are in an insert list.
diff --git a/test/suite/test_drop.py b/test/suite/test_drop.py
index 52ea7251ab5..a3e80214295 100644
--- a/test/suite/test_drop.py
+++ b/test/suite/test_drop.py
@@ -30,7 +30,7 @@ import os, time
import wiredtiger, wttest
from helper import confirm_does_not_exist, complex_populate, \
complex_populate_index_name, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_drop.py
# session level drop operation
@@ -38,7 +38,7 @@ class test_drop(wttest.WiredTigerTestCase):
name = 'test_drop'
extra_config = ''
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:')),
('table-lsm', dict(uri='table:', extra_config=',type=lsm')),
diff --git a/test/suite/test_dump.py b/test/suite/test_dump.py
index 85196174c1b..280d5870359 100644
--- a/test/suite/test_dump.py
+++ b/test/suite/test_dump.py
@@ -30,9 +30,10 @@ import os, shutil
import wiredtiger, wttest
from helper import \
complex_populate, complex_populate_check, \
- simple_populate, simple_populate_check
+ simple_populate, simple_populate_check, \
+ simple_index_populate, simple_index_populate_check
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_dump.py
# Utilities: wt dump
@@ -64,6 +65,9 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
('table-simple', dict(uri='table:', config='', lsm=False,
populate=simple_populate,
populate_check=simple_populate_check)),
+ ('table-index', dict(uri='table:', config='', lsm=False,
+ populate=simple_index_populate,
+ populate_check=simple_index_populate_check)),
('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
populate=simple_populate,
populate_check=simple_populate_check)),
@@ -74,8 +78,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
populate=complex_populate,
populate_check=complex_populate_check))
]
- scenarios = number_scenarios(
- multiply_scenarios('.', types, keyfmt, dumpfmt))
+ scenarios = make_scenarios(types, keyfmt, dumpfmt)
# Extract the values lines from the dump output.
def value_lines(self, fname):
diff --git a/test/suite/test_dupc.py b/test/suite/test_dupc.py
index ec55a36df4c..12b18f1ba79 100644
--- a/test/suite/test_dupc.py
+++ b/test/suite/test_dupc.py
@@ -33,7 +33,7 @@
import os, time
import wiredtiger, wttest
from helper import complex_populate, key_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# Test session.open_cursor with cursor duplication.
class test_duplicate_cursor(wttest.WiredTigerTestCase):
@@ -42,7 +42,7 @@ class test_duplicate_cursor(wttest.WiredTigerTestCase):
config = 'key_format='
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-r', dict(uri='file:', fmt='r')),
('file-S', dict(uri='file:', fmt='S')),
('table-r', dict(uri='table:', fmt='r')),
diff --git a/test/suite/test_durability01.py b/test/suite/test_durability01.py
index f578a79baf1..32cdd795914 100644
--- a/test/suite/test_durability01.py
+++ b/test/suite/test_durability01.py
@@ -34,7 +34,6 @@
import fnmatch, os, shutil, time
from helper import copy_wiredtiger_home
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
import wttest
class test_durability01(wttest.WiredTigerTestCase, suite_subprocess):
diff --git a/test/suite/test_empty.py b/test/suite/test_empty.py
index 50b79db70e4..9fe88107412 100644
--- a/test/suite/test_empty.py
+++ b/test/suite/test_empty.py
@@ -29,14 +29,14 @@
import os
import wiredtiger, wttest
from helper import key_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_empty.py
# Test that empty objects don't write anything other than a single sector.
class test_empty(wttest.WiredTigerTestCase):
name = 'test_empty'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-r', dict(type='file:', fmt='r')),
('file-S', dict(type='file:', fmt='S')),
('table-r', dict(type='table:', fmt='r')),
diff --git a/test/suite/test_encrypt01.py b/test/suite/test_encrypt01.py
index 0f2782204d2..d48605aaa83 100644
--- a/test/suite/test_encrypt01.py
+++ b/test/suite/test_encrypt01.py
@@ -32,7 +32,7 @@
import os, run, random
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test basic encryption
class test_encrypt01(wttest.WiredTigerTestCase):
@@ -60,8 +60,7 @@ class test_encrypt01(wttest.WiredTigerTestCase):
('none-snappy', dict(log_compress=None, block_compress='snappy')),
('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types,
- encrypt, compress))
+ scenarios = make_scenarios(types, encrypt, compress)
nrecords = 5000
bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010
diff --git a/test/suite/test_encrypt02.py b/test/suite/test_encrypt02.py
index 0376b3e42e4..648686274c4 100644
--- a/test/suite/test_encrypt02.py
+++ b/test/suite/test_encrypt02.py
@@ -33,7 +33,7 @@
import os, run, random
import wiredtiger, wttest
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test basic encryption
class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess):
@@ -48,7 +48,7 @@ class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess):
('keyid-pass', dict( encrypt='rotn', encrypt_args='name=rotn,keyid=11',
secret_arg='ABC')),
]
- scenarios = number_scenarios(encrypt_type)
+ scenarios = make_scenarios(encrypt_type)
nrecords = 5000
bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010
diff --git a/test/suite/test_encrypt03.py b/test/suite/test_encrypt03.py
index 702d0a2369f..0dc1755d6eb 100644
--- a/test/suite/test_encrypt03.py
+++ b/test/suite/test_encrypt03.py
@@ -32,7 +32,7 @@
import os, run, random
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test basic encryption
class test_encrypt03(wttest.WiredTigerTestCase):
@@ -48,7 +48,7 @@ class test_encrypt03(wttest.WiredTigerTestCase):
#('noname', dict( sys_encrypt='rotn', sys_encrypt_args=',keyid=11',
# file_encrypt='none', file_encrypt_args=',keyid=13')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, encrypt))
+ scenarios = make_scenarios(types, encrypt)
# Override WiredTigerTestCase, we have extensions.
def setUpConnectionOpen(self, dir):
diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py
index d7c12d2cba8..97d2cee03a0 100644
--- a/test/suite/test_encrypt04.py
+++ b/test/suite/test_encrypt04.py
@@ -32,7 +32,7 @@
import os, run, random
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
from suite_subprocess import suite_subprocess
# Test basic encryption with mismatched configuration
@@ -69,8 +69,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess):
('rotn11xyz_and_clear', dict( name2='rotn', keyid2='11',
secretkey2='XYZ', fileinclear2=True))
]
- scenarios = number_scenarios(multiply_scenarios \
- ('.', encrypt_scen_1, encrypt_scen_2))
+ scenarios = make_scenarios(encrypt_scen_1, encrypt_scen_2)
nrecords = 5000
bigvalue = "abcdefghij" * 1001 # len(bigvalue) = 10010
diff --git a/test/suite/test_encrypt05.py b/test/suite/test_encrypt05.py
index afd8a8103f9..19a3522b3d5 100644
--- a/test/suite/test_encrypt05.py
+++ b/test/suite/test_encrypt05.py
@@ -32,7 +32,7 @@
import os, run, random
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test raw compression with encryption
class test_encrypt05(wttest.WiredTigerTestCase):
@@ -44,8 +44,7 @@ class test_encrypt05(wttest.WiredTigerTestCase):
compress = [
('zlib', dict(log_compress='zlib', block_compress='zlib')),
]
- scenarios = number_scenarios(multiply_scenarios('.',
- encrypt, compress))
+ scenarios = make_scenarios(encrypt, compress)
nrecords = 500
bigvalue = 'a' * 500 # we use values that will definitely give compression
diff --git a/test/suite/test_encrypt06.py b/test/suite/test_encrypt06.py
index 5c88b698aeb..9300583d099 100644
--- a/test/suite/test_encrypt06.py
+++ b/test/suite/test_encrypt06.py
@@ -32,7 +32,7 @@
import os, run, random
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test encryption, when on, does not leak any information
class test_encrypt06(wttest.WiredTigerTestCase):
@@ -86,7 +86,7 @@ class test_encrypt06(wttest.WiredTigerTestCase):
file0_encrypt='rotn', file0_encrypt_args=key13, encrypt0=True,
file1_encrypt='none', file1_encrypt_args='', encrypt1=False)),
]
- scenarios = number_scenarios(multiply_scenarios('.', encrypt, storagetype))
+ scenarios = make_scenarios(encrypt, storagetype)
nrecords = 1000
# Override WiredTigerTestCase, we have extensions.
diff --git a/test/suite/test_encrypt07.py b/test/suite/test_encrypt07.py
index 30f28e096a8..97ab1987d4f 100644
--- a/test/suite/test_encrypt07.py
+++ b/test/suite/test_encrypt07.py
@@ -32,7 +32,6 @@
import os, run, string, codecs
import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
import test_salvage
# Run the regular salvage test, but with encryption on
diff --git a/test/suite/test_excl.py b/test/suite/test_excl.py
index 90926f51877..cea5756dfbb 100644
--- a/test/suite/test_excl.py
+++ b/test/suite/test_excl.py
@@ -27,11 +27,11 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# Test session.create with the exclusive configuration.
class test_create_excl(wttest.WiredTigerTestCase):
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(type='file:')),
('table', dict(type='table:'))
])
diff --git a/test/suite/test_huffman01.py b/test/suite/test_huffman01.py
index d71198e3151..be307550f2e 100644
--- a/test/suite/test_huffman01.py
+++ b/test/suite/test_huffman01.py
@@ -28,7 +28,7 @@
import os
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wiredtiger, wttest
# test_huffman01.py
@@ -52,7 +52,7 @@ class test_huffman01(wttest.WiredTigerTestCase, suite_subprocess):
('utf8', dict(huffval=',huffman_value=utf8t8file',vfile='t8file')),
('utf16', dict(huffval=',huffman_value=utf16t16file',vfile='t16file')),
]
- scenarios = number_scenarios(multiply_scenarios('.', huffkey, huffval))
+ scenarios = make_scenarios(huffkey, huffval)
def test_huffman(self):
dir = self.conn.get_home()
diff --git a/test/suite/test_huffman02.py b/test/suite/test_huffman02.py
index aa4329415a4..d74704daf58 100644
--- a/test/suite/test_huffman02.py
+++ b/test/suite/test_huffman02.py
@@ -28,7 +28,7 @@
import os
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wiredtiger, wttest
# test_huffman02.py
@@ -48,7 +48,7 @@ class test_huffman02(wttest.WiredTigerTestCase, suite_subprocess):
('file', dict(uri='file:huff')),
('table', dict(uri='table:huff')),
]
- scenarios = number_scenarios(multiply_scenarios('.',type,huffkey, huffval))
+ scenarios = make_scenarios(type, huffkey, huffval)
def test_huffman(self):
if self.keybad or self.valbad:
diff --git a/test/suite/test_index02.py b/test/suite/test_index02.py
new file mode 100644
index 00000000000..9f39df003b1
--- /dev/null
+++ b/test/suite/test_index02.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_index02.py
+# test search_near in indices
+class test_index02(wttest.WiredTigerTestCase):
+ '''Test search_near in indices'''
+
+ basename = 'test_index02'
+ tablename = 'table:' + basename
+ indexname = 'index:' + basename + ":inverse"
+
+ def test_search_near(self):
+ '''Create a table, look for a nonexistent key'''
+ self.session.create(self.tablename, 'key_format=r,value_format=Q,columns=(k,v)')
+ self.session.create(self.indexname, 'columns=(v)')
+ cur = self.session.open_cursor(self.tablename, None, "append")
+ cur.set_value(1)
+ cur.insert()
+ cur.set_value(5)
+ cur.insert()
+ cur.set_value(5)
+ cur.insert()
+ cur.set_value(5)
+ cur.insert()
+ cur.set_value(10)
+ cur.insert()
+
+ # search near should find a match
+ cur2 = self.session.open_cursor(self.indexname, None, None)
+ cur2.set_key(5)
+ self.assertEqual(cur2.search_near(), 0)
+
+ # Retry after reopening
+ self.reopen_conn()
+ cur3 = self.session.open_cursor(self.indexname, None, None)
+ cur3.set_key(5)
+ self.assertEqual(cur3.search_near(), 0)
+
+if __name__ == '__main__':
+ wttest.run()
diff --git a/test/suite/test_inmem01.py b/test/suite/test_inmem01.py
index 875ebb2bfa7..c6ae7ff6c4b 100644
--- a/test/suite/test_inmem01.py
+++ b/test/suite/test_inmem01.py
@@ -30,95 +30,73 @@ import wiredtiger, wttest
from time import sleep
from helper import simple_populate, simple_populate_check
from helper import key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_inmem01.py
# Test in-memory configuration.
class test_inmem01(wttest.WiredTigerTestCase):
- name = 'inmem01'
- """
- In memory configuration still creates files on disk, but has limits
- in terms of how much data can be written.
- Test various scenarios including:
- - Add a small amount of data, ensure it is present.
- - Add more data than would fit into the configured cache.
- - Fill the cache with data, remove some data, ensure more data can be
- inserted (after a reasonable amount of time for space to be reclaimed)
- - Run queries after adding, removing and re-inserting data.
- - Try out keeping a cursor open while adding new data.
- """
- scenarios = check_scenarios([
- ('col', dict(tablekind='col')),
- # Fixed length is very slow, disable it for now
- #('fix', dict(tablekind='fix')),
- ('row', dict(tablekind='row'))
- ])
-
- # create an in-memory database
- conn_config = 'cache_size=5MB,' + \
- 'file_manager=(close_idle_time=0),in_memory=true'
+ uri = 'table:inmem01'
+ conn_config = \
+ 'cache_size=5MB,file_manager=(close_idle_time=0),in_memory=true'
+ table_config = ',memory_page_max=32k,leaf_page_max=4k'
- def get_table_config(self):
- kf = 'key_format='
- vf = 'value_format='
- if self.tablekind == 'row':
- kf = kf + 'S'
- else:
- kf = kf + 'r' # record format
- if self.tablekind == 'fix':
- vf = vf + '8t'
- else:
- vf = vf + 'S'
- return 'memory_page_max=32k,leaf_page_max=4k,' + kf + ',' + vf
+ scenarios = make_scenarios([
+ ('col', dict(fmt='key_format=r,value_format=S')),
+ ('fix', dict(fmt='key_format=r,value_format=8t')),
+ ('row', dict(fmt='key_format=S,value_format=S'))
+ ])
+ # Smoke-test in-memory configurations, add a small amount of data and
+ # ensure it's visible.
def test_insert(self):
- table_config = self.get_table_config()
- simple_populate(self,
- "table:" + self.name, table_config, 1000)
- # Ensure the data is visible.
- simple_populate_check(self, 'table:' + self.name, 1000)
+ config = self.fmt + self.table_config
+ simple_populate(self, self.uri, config, 1000)
+ simple_populate_check(self, self.uri, 1000)
+ # Add more data than fits into the configured cache and verify it fails.
def test_insert_over_capacity(self):
- table_config = self.get_table_config()
+ config = self.fmt + self.table_config
msg = '/WT_CACHE_FULL.*/'
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda:simple_populate(self,
- "table:" + self.name, table_config, 10000000), msg)
+ lambda:simple_populate(self, self.uri, config, 10000000), msg)
- # Figure out the last key we inserted.
- cursor = self.session.open_cursor('table:' + self.name, None)
+ # Figure out the last key we successfully inserted, and check all
+ # previous inserts are still there.
+ cursor = self.session.open_cursor(self.uri, None)
cursor.prev()
last_key = int(cursor.get_key())
- simple_populate_check(self, 'table:' + self.name, last_key)
+ simple_populate_check(self, self.uri, last_key)
+ # Fill the cache with data, remove some data, ensure more data can be
+ # inserted (after a reasonable amount of time for space to be reclaimed).
def test_insert_over_delete(self):
- table_config = self.get_table_config()
+ config = self.fmt + self.table_config
msg = '/WT_CACHE_FULL.*/'
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda:simple_populate(self,
- "table:" + self.name, table_config, 10000000), msg)
+ lambda:simple_populate(self, self.uri, config, 10000000), msg)
# Now that the database contains as much data as will fit into
# the configured cache, verify removes succeed.
- cursor = self.session.open_cursor('table:' + self.name, None)
+ cursor = self.session.open_cursor(self.uri, None)
for i in range(1, 100):
cursor.set_key(key_populate(cursor, i))
cursor.remove()
+ # Run queries after adding, removing and re-inserting data.
+ # Try out keeping a cursor open while adding new data.
def test_insert_over_delete_replace(self):
- table_config = self.get_table_config()
+ config = self.fmt + self.table_config
msg = '/WT_CACHE_FULL.*/'
self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
- lambda:simple_populate(self,
- "table:" + self.name, table_config, 10000000), msg)
+ lambda:simple_populate(self, self.uri, config, 10000000), msg)
- cursor = self.session.open_cursor('table:' + self.name, None)
+ cursor = self.session.open_cursor(self.uri, None)
cursor.prev()
last_key = int(cursor.get_key())
# Now that the database contains as much data as will fit into
# the configured cache, verify removes succeed.
- cursor = self.session.open_cursor('table:' + self.name, None)
+ cursor = self.session.open_cursor(self.uri, None)
for i in range(1, last_key / 4, 1):
cursor.set_key(key_populate(cursor, i))
cursor.remove()
diff --git a/test/suite/test_intpack.py b/test/suite/test_intpack.py
index 187b2d7f579..b0cece09494 100644
--- a/test/suite/test_intpack.py
+++ b/test/suite/test_intpack.py
@@ -31,7 +31,7 @@
#
import wiredtiger, wttest
-from wtscenario import check_scenarios, number_scenarios
+from wtscenario import make_scenarios
class PackTester:
def __init__(self, formatcode, validlow, validhigh, equals):
@@ -126,22 +126,27 @@ class PackTester:
class test_intpack(wttest.WiredTigerTestCase):
name = 'test_intpack'
- scenarios = check_scenarios([
- ('b', dict(formatcode='b', low=-128, high=127, nbits=8)),
- ('B', dict(formatcode='B', low=0, high=255, nbits=8)),
- ('8t', dict(formatcode='8t', low=0, high=255, nbits=8)),
- ('5t', dict(formatcode='5t', low=0, high=31, nbits=5)),
- ('h', dict(formatcode='h', low=-32768, high=32767, nbits=16)),
- ('H', dict(formatcode='H', low=0, high=65535, nbits=16)),
- ('i', dict(formatcode='i', low=-2147483648, high=2147483647, nbits=32)),
- ('I', dict(formatcode='I', low=0, high=4294967295, nbits=32)),
- ('l', dict(formatcode='l', low=-2147483648, high=2147483647, nbits=32)),
- ('L', dict(formatcode='L', low=0, high=4294967295, nbits=32)),
- ('q', dict(formatcode='q', low=-9223372036854775808,
+ # We have to be a bit verbose here with naming, as there can be problems with
+ # case insensitive test names:w
+
+ scenarios = make_scenarios([
+ ('int8_t_b', dict(formatcode='b', low=-128, high=127, nbits=8)),
+ ('uint8_t_B', dict(formatcode='B', low=0, high=255, nbits=8)),
+ ('fix_len_8t', dict(formatcode='8t', low=0, high=255, nbits=8)),
+ ('fix_len_5t', dict(formatcode='5t', low=0, high=31, nbits=5)),
+ ('int16_t_h', dict(formatcode='h', low=-32768, high=32767, nbits=16)),
+ ('uint16_t_H', dict(formatcode='H', low=0, high=65535, nbits=16)),
+ ('int32_t_i', dict(formatcode='i', low=-2147483648, high=2147483647,
+ nbits=32)),
+ ('uint32_t_I', dict(formatcode='I', low=0, high=4294967295, nbits=32)),
+ ('int32_t_l', dict(formatcode='l', low=-2147483648, high=2147483647,
+ nbits=32)),
+ ('uint32_t_L', dict(formatcode='L', low=0, high=4294967295, nbits=32)),
+ ('int64_t_q', dict(formatcode='q', low=-9223372036854775808,
high=9223372036854775807, nbits=64)),
- ('Q', dict(formatcode='Q', low=0, high=18446744073709551615, nbits=64)),
+ ('uint64_t_Q', dict(formatcode='Q', low=0, high=18446744073709551615,
+ nbits=64)),
])
- scenarios = check_scenarios(number_scenarios(scenarios))
def test_packing(self):
pt = PackTester(self.formatcode, self.low, self.high, self.assertEquals)
diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py
index f8d96a2718a..f3b13026896 100644
--- a/test/suite/test_join01.py
+++ b/test/suite/test_join01.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_join01.py
# Join operations
@@ -67,11 +67,9 @@ class test_join01(wttest.WiredTigerTestCase):
('order=2', dict(join_order=2)),
('order=3', dict(join_order=3)),
]
- scenarios = number_scenarios(multiply_scenarios('.', type_scen,
- bloom0_scen, bloom1_scen,
- projection_scen,
- nested_scen, stats_scen,
- order_scen))
+ scenarios = make_scenarios(type_scen, bloom0_scen, bloom1_scen,
+ projection_scen, nested_scen, stats_scen,
+ order_scen)
# We need statistics for these tests.
conn_config = 'statistics=(all)'
diff --git a/test/suite/test_join02.py b/test/suite/test_join02.py
index a691c499cf6..db11ed01039 100644
--- a/test/suite/test_join02.py
+++ b/test/suite/test_join02.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest, suite_random
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_join02.py
# Join operations
@@ -48,7 +48,7 @@ class test_join02(wttest.WiredTigerTestCase):
('nobloom', dict(usebloom=False))
]
- scenarios = number_scenarios(multiply_scenarios('.', keyscen, bloomscen))
+ scenarios = make_scenarios(keyscen, bloomscen)
# Start our range from 1, since WT record numbers start at 1,
# it makes things work out nicer.
diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py
index 613d2396b07..af19d934d70 100644
--- a/test/suite/test_join03.py
+++ b/test/suite/test_join03.py
@@ -28,7 +28,6 @@
import os
import wiredtiger, wttest, run
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
# test_join03.py
# Join operations
diff --git a/test/suite/test_join04.py b/test/suite/test_join04.py
index 7e2afb15285..b270cb7a21c 100644
--- a/test/suite/test_join04.py
+++ b/test/suite/test_join04.py
@@ -28,7 +28,6 @@
import os
import wiredtiger, wttest, run
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
# test_join04.py
# Join operations
diff --git a/test/suite/test_join05.py b/test/suite/test_join05.py
index ef2be4c6460..7dcb3e08911 100644
--- a/test/suite/test_join05.py
+++ b/test/suite/test_join05.py
@@ -27,7 +27,6 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
# test_join05.py
# Tests based on JIRA reports
diff --git a/test/suite/test_join06.py b/test/suite/test_join06.py
index 9af6f93792f..5fedd365712 100644
--- a/test/suite/test_join06.py
+++ b/test/suite/test_join06.py
@@ -28,7 +28,7 @@
import os
import wiredtiger, wttest, run
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_join06.py
# Join operations
@@ -46,7 +46,7 @@ class test_join06(wttest.WiredTigerTestCase):
('nobloom', dict(bloom=False))
]
- scenarios = number_scenarios(multiply_scenarios('.', isoscen, bloomscen))
+ scenarios = make_scenarios(isoscen, bloomscen)
def gen_values(self, i):
s = str(i) # 345 => "345"
diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py
index 36e91361329..2a32e678d72 100644
--- a/test/suite/test_join07.py
+++ b/test/suite/test_join07.py
@@ -28,7 +28,7 @@
import os, re, run
import wiredtiger, wttest, suite_random
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
class ParseException(Exception):
def __init__(self, msg):
@@ -198,7 +198,7 @@ class test_join07(wttest.WiredTigerTestCase):
('noextractor', dict(extractor=False))
]
- scenarios = number_scenarios(extractscen)
+ scenarios = make_scenarios(extractscen)
# Return the wiredtiger_open extension argument for a shared library.
def extensionArg(self, exts):
diff --git a/test/suite/test_join08.py b/test/suite/test_join08.py
index 6d674ab8193..d389fad706b 100644
--- a/test/suite/test_join08.py
+++ b/test/suite/test_join08.py
@@ -27,7 +27,6 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
# test_join08.py
# Test join error paths
diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py
index 10262edc777..dc8027c2115 100644
--- a/test/suite/test_jsondump01.py
+++ b/test/suite/test_jsondump01.py
@@ -29,10 +29,12 @@
import os, json
import wiredtiger, wttest
from helper import \
- complex_populate, complex_populate_check_cursor,\
- simple_populate, simple_populate_check_cursor
+ complex_populate, complex_populate_check, complex_populate_check_cursor,\
+ simple_populate, simple_populate_check, simple_populate_check_cursor, \
+ simple_index_populate, simple_index_populate_check, \
+ simple_index_populate_check_cursor, compare_files
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# A 'fake' cursor based on a set of rows.
# It emulates a WT cursor well enough for the *_check_cursor methods.
@@ -79,25 +81,34 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
types = [
('file', dict(uri='file:', config='', lsm=False,
populate=simple_populate,
- populate_check=simple_populate_check_cursor)),
+ populate_check=simple_populate_check,
+ populate_check_cursor=simple_populate_check_cursor)),
('lsm', dict(uri='lsm:', config='', lsm=True,
populate=simple_populate,
- populate_check=simple_populate_check_cursor)),
+ populate_check=simple_populate_check,
+ populate_check_cursor=simple_populate_check_cursor)),
('table-simple', dict(uri='table:', config='', lsm=False,
populate=simple_populate,
- populate_check=simple_populate_check_cursor)),
+ populate_check=simple_populate_check,
+ populate_check_cursor=simple_populate_check_cursor)),
+ ('table-index', dict(uri='table:', config='', lsm=False,
+ populate=simple_index_populate,
+ populate_check=simple_index_populate_check,
+ populate_check_cursor=simple_index_populate_check_cursor)),
('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
populate=simple_populate,
- populate_check=simple_populate_check_cursor)),
+ populate_check=simple_populate_check,
+ populate_check_cursor=simple_populate_check_cursor)),
('table-complex', dict(uri='table:', config='', lsm=False,
populate=complex_populate,
- populate_check=complex_populate_check_cursor)),
+ populate_check=complex_populate_check,
+ populate_check_cursor=complex_populate_check_cursor)),
('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True,
populate=complex_populate,
- populate_check=complex_populate_check_cursor))
+ populate_check=complex_populate_check,
+ populate_check_cursor=complex_populate_check_cursor))
]
- scenarios = number_scenarios(
- multiply_scenarios('.', types, keyfmt))
+ scenarios = make_scenarios(types, keyfmt)
# Dump using util, re-load using python's JSON, and do a content comparison.
def test_jsondump_util(self):
@@ -132,7 +143,7 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
cursor = self.session.open_cursor(uri, None)
fake = FakeCursor(cursor.key_format, cursor.value_format, data)
cursor.close()
- self.populate_check(self, fake, self.nentries)
+ self.populate_check_cursor(self, fake, self.nentries)
# Dump using util, re-load using python's JSON, and do a content comparison.
def test_jsonload_util(self):
@@ -153,9 +164,18 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
loadcmd.append('-a')
self.runWt(loadcmd)
- # check the contents of the data we read.
- cursor = self.session.open_cursor(uri2, None)
- self.populate_check(self, cursor, self.nentries)
+ # Check the contents of the data we read.
+ self.populate_check(self, uri2, self.nentries)
+
+ # Reload into the original uri, and dump into another file.
+ self.session.drop(uri, None)
+ self.session.drop(uri2, None)
+ self.runWt(['load', '-jf', 'jsondump.out'])
+ self.runWt(['dump', '-j', uri], outfilename='jsondump2.out')
+
+ # Compare the two outputs, and check the content again.
+ compare_files(self, 'jsondump.out', 'jsondump2.out')
+ self.populate_check(self, uri, self.nentries)
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_lsm01.py b/test/suite/test_lsm01.py
index 1f89cf38d77..f6cee20e896 100644
--- a/test/suite/test_lsm01.py
+++ b/test/suite/test_lsm01.py
@@ -54,12 +54,10 @@ class test_lsm01(wttest.WiredTigerTestCase):
config_vars = [ 'chunk_size', 'merge_max', 'bloom',
'bloom_bit_count', 'bloom_hash_count' ]
- all_scenarios = wtscenario.multiply_scenarios('_',
+ scenarios = wtscenario.make_scenarios(
chunk_size_scenarios, merge_max_scenarios, bloom_scenarios,
- bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios)
-
- scenarios = wtscenario.prune_scenarios(all_scenarios, 500)
- scenarios = wtscenario.number_scenarios(scenarios)
+ bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios,
+ prune=500)
# Test drop of an object.
def test_lsm(self):
diff --git a/test/suite/test_metadata_cursor01.py b/test/suite/test_metadata_cursor01.py
index e759c14f846..7802f89f174 100644
--- a/test/suite/test_metadata_cursor01.py
+++ b/test/suite/test_metadata_cursor01.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_metadata_cursor01.py
# Metadata cursor operations
@@ -39,7 +39,7 @@ class test_metadata_cursor01(wttest.WiredTigerTestCase):
"""
table_name1 = 'test_metadata_cursor01'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('plain', {'metauri' : 'metadata:'}),
('create', {'metauri' : 'metadata:create'}),
])
diff --git a/test/suite/test_nsnap01.py b/test/suite/test_nsnap01.py
index 5207b577ba4..7e8951750f8 100644
--- a/test/suite/test_nsnap01.py
+++ b/test/suite/test_nsnap01.py
@@ -30,7 +30,6 @@
# Named snapshots: basic API
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
from helper import simple_populate
import wiredtiger, wttest
diff --git a/test/suite/test_nsnap02.py b/test/suite/test_nsnap02.py
index e4ed65ef72a..510c9d421ef 100644
--- a/test/suite/test_nsnap02.py
+++ b/test/suite/test_nsnap02.py
@@ -30,7 +30,6 @@
# Named snapshots: Combinations of dropping snapshots
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
from helper import simple_populate
import wiredtiger, wttest
diff --git a/test/suite/test_nsnap03.py b/test/suite/test_nsnap03.py
index 0e853522940..3986c0c1a0a 100644
--- a/test/suite/test_nsnap03.py
+++ b/test/suite/test_nsnap03.py
@@ -30,7 +30,6 @@
# Named snapshots: Access and create from multiple sessions
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
from helper import simple_populate
import wiredtiger, wttest
diff --git a/test/suite/test_nsnap04.py b/test/suite/test_nsnap04.py
index e8a5c9b6140..f9ef26b5600 100644
--- a/test/suite/test_nsnap04.py
+++ b/test/suite/test_nsnap04.py
@@ -30,7 +30,6 @@
# Named snapshots: Create snapshot from running transaction
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
from helper import simple_populate
import wiredtiger, wttest
diff --git a/test/suite/test_overwrite.py b/test/suite/test_overwrite.py
index e22cdab4dea..4972a016bec 100644
--- a/test/suite/test_overwrite.py
+++ b/test/suite/test_overwrite.py
@@ -28,13 +28,13 @@
import wiredtiger, wttest
from helper import key_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_overwrite.py
# cursor overwrite configuration method
class test_overwrite(wttest.WiredTigerTestCase):
name = 'overwrite'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file-r', dict(type='file:',keyfmt='r')),
('file-S', dict(type='file:',keyfmt='S')),
('lsm-S', dict(type='lsm:',keyfmt='S')),
diff --git a/test/suite/test_perf001.py b/test/suite/test_perf001.py
index 1280639c9dd..b22ed2baeb0 100644
--- a/test/suite/test_perf001.py
+++ b/test/suite/test_perf001.py
@@ -32,13 +32,13 @@
import wiredtiger, wttest
import random
from time import clock, time
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# Test performance of inserting into a table with an index.
class test_perf001(wttest.WiredTigerTestCase):
table_name = 'test_perf001'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
#('file-file', dict(tabletype='file',indextype='file')),
('file-lsm', dict(tabletype='file',indextype='lsm')),
#('lsm-file', dict(tabletype='lsm',indextype='file')),
diff --git a/test/suite/test_readonly01.py b/test/suite/test_readonly01.py
index 59e9743ab7e..e4b431ca1da 100644
--- a/test/suite/test_readonly01.py
+++ b/test/suite/test_readonly01.py
@@ -32,7 +32,7 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
import wttest
class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess):
@@ -73,8 +73,7 @@ class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=r,value_format=8t')),
]
- scenarios = multiply_scenarios('.',
- basecfg_list, dir_list, log_list, types)
+ scenarios = make_scenarios(basecfg_list, dir_list, log_list, types)
def conn_config(self, dir):
self.home = dir
diff --git a/test/suite/test_rebalance.py b/test/suite/test_rebalance.py
index f2167e864c9..98bd81de602 100644
--- a/test/suite/test_rebalance.py
+++ b/test/suite/test_rebalance.py
@@ -29,7 +29,7 @@
import os, time
import wiredtiger, wttest
from helper import complex_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_rebalance.py
# session level rebalance operation
@@ -41,7 +41,7 @@ class test_rebalance(wttest.WiredTigerTestCase):
config = 'key_format=S,allocation_size=512,internal_page_max=512' + \
',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:')),
('lsm', dict(uri='lsm:'))
diff --git a/test/suite/test_reconfig01.py b/test/suite/test_reconfig01.py
index 876de1fe5af..fb3fb7edac6 100644
--- a/test/suite/test_reconfig01.py
+++ b/test/suite/test_reconfig01.py
@@ -92,22 +92,25 @@ class test_reconfig01(wttest.WiredTigerTestCase):
self.conn.reconfigure("checkpoint=(wait=5)")
self.conn.reconfigure("checkpoint=(log_size=0)")
self.conn.reconfigure("checkpoint=(log_size=1M)")
- self.conn.reconfigure("checkpoint=(wait=0,name=hi)")
- self.conn.reconfigure("checkpoint=(wait=5,name=hi)")
- def test_reconfig_stat_log(self):
+ # Statistics logging: reconfigure the things we can reconfigure.
+ def test_reconfig_statistics_log_ok(self):
self.conn.reconfigure("statistics=[all],statistics_log=(wait=0)")
self.conn.reconfigure("statistics_log=(wait=0)")
- self.conn.reconfigure("statistics_log=(wait=2)")
+ self.conn.reconfigure("statistics_log=(wait=2,json=true)")
+ self.conn.reconfigure("statistics_log=(wait=0)")
+ self.conn.reconfigure("statistics_log=(wait=2,on_close=true)")
self.conn.reconfigure("statistics_log=(wait=0)")
self.conn.reconfigure("statistics_log=(wait=2,sources=[lsm:])")
self.conn.reconfigure("statistics_log=(wait=0)")
self.conn.reconfigure("statistics_log=(wait=2,timestamp=\"t%b %d\")")
self.conn.reconfigure("statistics_log=(wait=0)")
- self.conn.reconfigure("statistics_log=(wait=2,path=\"wts.%d.%H\")")
- self.conn.reconfigure("statistics_log=(wait=0)")
- self.conn.reconfigure(
- "statistics_log=(wait=2,sources=[lsm:],timestamp=\"%b\")")
+
+ # Statistics logging: reconfigure the things we can't reconfigure.
+ def test_reconfig_statistics_log_fail(self):
+ msg = '/unknown configuration key/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("log=(path=foo)"), msg)
def test_file_manager(self):
self.conn.reconfigure("file_manager=(close_scan_interval=3)")
diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py
index 85a9ceb2a34..9d9ac220aa7 100644
--- a/test/suite/test_reconfig02.py
+++ b/test/suite/test_reconfig02.py
@@ -41,24 +41,29 @@ class test_reconfig02(wttest.WiredTigerTestCase):
self.conn_config = self.init_config
return wttest.WiredTigerTestCase.setUpConnectionOpen(self, dir)
- # Call reconfigure for zero filling a file. There is nothing
- # we can actually look for to confirm it did anything.
- # Also changing the log file size is a no-op, but should not fail.
+ # Logging: reconfigure the things we can reconfigure.
def test_reconfig02_simple(self):
+ self.conn.reconfigure("log=(archive=false)")
+ self.conn.reconfigure("log=(prealloc=false)")
+ self.conn.reconfigure("log=(zero_fill=false)")
+
+ self.conn.reconfigure("log=(archive=true)")
+ self.conn.reconfigure("log=(prealloc=true)")
self.conn.reconfigure("log=(zero_fill=true)")
- self.conn.reconfigure("log=(file_max=1MB)")
- # Test that we get an error if we try to turn logging off.
+ # Logging: reconfigure the things we can't reconfigure.
def test_reconfig02_disable(self):
- msg = 'Invalid argument'
- gotException = False
- try:
- self.conn.reconfigure("log=(enabled=false)")
- except wiredtiger.WiredTigerError as e:
- gotException = True
- self.pr('got exception: ' + str(e))
- self.assertTrue(str(e).find(msg) >= 0)
- self.assertTrue(gotException)
+ msg = '/unknown configuration key/'
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("log=(enabled=true)"), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("log=(compressor=foo)"), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("log=(file_max=1MB)"), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("log=(path=foo)"), msg)
+ self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+ lambda: self.conn.reconfigure("log=(recovery=true)"), msg)
# Logging starts on, but prealloc is off. Verify it is off.
# Reconfigure it on and run again, making sure that log files
diff --git a/test/suite/test_rename.py b/test/suite/test_rename.py
index af968a4a38d..1979bbb802a 100644
--- a/test/suite/test_rename.py
+++ b/test/suite/test_rename.py
@@ -31,7 +31,7 @@ import wiredtiger, wttest
from helper import confirm_does_not_exist,\
complex_populate, complex_populate_check,\
simple_populate, simple_populate_check
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_rename.py
# session level rename operation
@@ -39,7 +39,7 @@ class test_rename(wttest.WiredTigerTestCase):
name1 = 'test_rename1'
name2 = 'test_rename2'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:'))
])
diff --git a/test/suite/test_schema02.py b/test/suite/test_schema02.py
index b404261c066..bccc7dfc728 100644
--- a/test/suite/test_schema02.py
+++ b/test/suite/test_schema02.py
@@ -27,7 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_schema02.py
# Columns, column groups, indexes
@@ -37,7 +37,7 @@ class test_schema02(wttest.WiredTigerTestCase):
"""
nentries = 1000
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('normal', { 'idx_config' : '' }),
('lsm', { 'idx_config' : ',type=lsm' }),
])
diff --git a/test/suite/test_schema03.py b/test/suite/test_schema03.py
index f48bfdf3cf8..81556393e78 100644
--- a/test/suite/test_schema03.py
+++ b/test/suite/test_schema03.py
@@ -29,7 +29,7 @@
import os
import suite_random
import wiredtiger, wtscenario, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
try:
# Windows does not getrlimit/setrlimit so we must catch the resource
@@ -249,7 +249,7 @@ class test_schema03(wttest.WiredTigerTestCase):
# but boost it up to this limit anyway.
OPEN_FILE_LIMIT = 1000
- restart_scenarios = check_scenarios([('table', dict(s_restart=['table'],P=0.3)),
+ restart_scenarios = [('table', dict(s_restart=['table'],P=0.3)),
('colgroup0', dict(s_restart=['colgroup0'],P=0.3)),
('index0', dict(s_restart=['index0'],P=0.3)),
('colgroup1', dict(s_restart=['colgroup1'],P=0.3)),
@@ -259,7 +259,7 @@ class test_schema03(wttest.WiredTigerTestCase):
('populate1', dict(s_restart=['populate1'],P=0.3)),
('ipop', dict(s_restart=['index0','populate0'],P=0.3)),
('all', dict(s_restart=['table','colgroup0','index0','colgroup1','index1','populate0','index2','populate1'],P=1.0)),
- ])
+ ]
ntable_scenarios = wtscenario.quick_scenarios('s_ntable',
[1,2,5,8], [1.0,0.4,0.5,0.5])
@@ -272,11 +272,10 @@ class test_schema03(wttest.WiredTigerTestCase):
table_args_scenarios = wtscenario.quick_scenarios('s_extra_table_args',
['', ',type=file', ',type=lsm'], [0.5, 0.3, 0.2])
- all_scenarios = wtscenario.multiply_scenarios('_', restart_scenarios, ntable_scenarios, ncolgroup_scenarios, nindex_scenarios, idx_args_scenarios, table_args_scenarios)
-
- # Prune the scenarios according to the probabilities given above.
- scenarios = wtscenario.prune_scenarios(all_scenarios, 30)
- scenarios = wtscenario.number_scenarios(scenarios)
+ scenarios = wtscenario.make_scenarios(
+ restart_scenarios, ntable_scenarios, ncolgroup_scenarios,
+ nindex_scenarios, idx_args_scenarios, table_args_scenarios,
+ prune=30)
# Note: the set can be reduced here for debugging, e.g.
# scenarios = scenarios[40:44]
diff --git a/test/suite/test_schema04.py b/test/suite/test_schema04.py
index cd41138deb0..8ac81690819 100644
--- a/test/suite/test_schema04.py
+++ b/test/suite/test_schema04.py
@@ -28,7 +28,7 @@
import os
import wiredtiger, wttest, run
-from wtscenario import check_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_schema04.py
# Test indices with duplicates
@@ -47,7 +47,7 @@ class test_schema04(wttest.WiredTigerTestCase):
"""
nentries = 100
- scenarios = number_scenarios([
+ scenarios = make_scenarios([
('index-before', { 'create_index' : 0 }),
('index-during', { 'create_index' : 1 }),
('index-after', { 'create_index' : 2 }),
diff --git a/test/suite/test_schema05.py b/test/suite/test_schema05.py
index 89722d5f89a..89484cfc7bd 100644
--- a/test/suite/test_schema05.py
+++ b/test/suite/test_schema05.py
@@ -28,7 +28,7 @@
import os
import wiredtiger, wttest, run
-from wtscenario import check_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_schema05.py
# Test indices using a custom extractor.
@@ -51,7 +51,7 @@ class test_schema05(wttest.WiredTigerTestCase):
nentries = 1000
nindices = 6
- scenarios = number_scenarios([
+ scenarios = make_scenarios([
('index-before', { 'create_index' : 0 }),
('index-during', { 'create_index' : 1 }),
('index-after', { 'create_index' : 2 }),
diff --git a/test/suite/test_schema06.py b/test/suite/test_schema06.py
index e72959edf2a..e0eec189137 100644
--- a/test/suite/test_schema06.py
+++ b/test/suite/test_schema06.py
@@ -27,6 +27,7 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
+from wtscenario import make_scenarios
# test_schema06.py
# Repeatedly create and drop indices
@@ -36,10 +37,10 @@ class test_schema06(wttest.WiredTigerTestCase):
"""
nentries = 1000
- scenarios = [
+ scenarios = make_scenarios([
('normal', { 'idx_config' : '' }),
('lsm', { 'idx_config' : ',type=lsm' }),
- ]
+ ])
def flip(self, inum, val):
"""
diff --git a/test/suite/test_split.py b/test/suite/test_split.py
index d09613e1c52..28bf6bc59b0 100644
--- a/test/suite/test_split.py
+++ b/test/suite/test_split.py
@@ -35,7 +35,6 @@ from wiredtiger import stat
from helper import confirm_empty,\
key_populate, value_populate, simple_populate,\
complex_populate, complex_value_populate
-from wtscenario import multiply_scenarios, number_scenarios
# Test splits
class test_split(wttest.WiredTigerTestCase):
diff --git a/test/suite/test_stat01.py b/test/suite/test_stat01.py
index 5c3259696eb..1ad51ee9882 100644
--- a/test/suite/test_stat01.py
+++ b/test/suite/test_stat01.py
@@ -29,7 +29,7 @@
import helper, wiredtiger, wttest
from wiredtiger import stat
from helper import key_populate, simple_populate
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_stat01.py
# Statistics operations
@@ -49,7 +49,7 @@ class test_stat01(wttest.WiredTigerTestCase):
('recno', dict(keyfmt='r')),
('string', dict(keyfmt='S')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+ scenarios = make_scenarios(types, keyfmt)
conn_config = 'statistics=(all)'
diff --git a/test/suite/test_stat02.py b/test/suite/test_stat02.py
index 88371947b5b..ef3907e54b1 100644
--- a/test/suite/test_stat02.py
+++ b/test/suite/test_stat02.py
@@ -28,7 +28,7 @@
import itertools, wiredtiger, wttest
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
from wiredtiger import stat
from helper import complex_populate, complex_populate_lsm, simple_populate
@@ -57,8 +57,7 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
('size', dict(cursor_config='size'))
]
- scenarios = number_scenarios(
- multiply_scenarios('.', uri, data_config, cursor_config))
+ scenarios = make_scenarios(uri, data_config, cursor_config)
# Turn on statistics for this test.
def conn_config(self, dir):
@@ -106,13 +105,13 @@ class test_stat_cursor_dsrc_clear(wttest.WiredTigerTestCase):
pfx = 'test_stat_cursor_dsrc_clear'
uri = [
- ('1', dict(uri='file:' + pfx, pop=simple_populate)),
- ('2', dict(uri='table:' + pfx, pop=simple_populate)),
- ('3', dict(uri='table:' + pfx, pop=complex_populate)),
- ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
+ ('dsrc_clear_1', dict(uri='file:' + pfx, pop=simple_populate)),
+ ('dsrc_clear_2', dict(uri='table:' + pfx, pop=simple_populate)),
+ ('dsrc_clear_3', dict(uri='table:' + pfx, pop=complex_populate)),
+ ('dsrc_clear_4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
]
- scenarios = number_scenarios(multiply_scenarios('.', uri))
+ scenarios = make_scenarios(uri)
conn_config = 'statistics=(all)'
def test_stat_cursor_dsrc_clear(self):
@@ -136,13 +135,13 @@ class test_stat_cursor_fast(wttest.WiredTigerTestCase):
pfx = 'test_stat_cursor_fast'
uri = [
- ('1', dict(uri='file:' + pfx, pop=simple_populate)),
- ('2', dict(uri='table:' + pfx, pop=simple_populate)),
- ('3', dict(uri='table:' + pfx, pop=complex_populate)),
- ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
+ ('fast_1', dict(uri='file:' + pfx, pop=simple_populate)),
+ ('fast_2', dict(uri='table:' + pfx, pop=simple_populate)),
+ ('fast_3', dict(uri='table:' + pfx, pop=complex_populate)),
+ ('fast_4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
]
- scenarios = number_scenarios(multiply_scenarios('.', uri))
+ scenarios = make_scenarios(uri)
conn_config = 'statistics=(all)'
def test_stat_cursor_fast(self):
@@ -180,13 +179,13 @@ class test_stat_cursor_dsrc_error(wttest.WiredTigerTestCase):
pfx = 'test_stat_cursor_dsrc_error'
uri = [
- ('1', dict(uri='file:' + pfx, pop=simple_populate)),
- ('2', dict(uri='table:' + pfx, pop=simple_populate)),
- ('3', dict(uri='table:' + pfx, pop=complex_populate)),
- ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
+ ('dsrc_error_1', dict(uri='file:' + pfx, pop=simple_populate)),
+ ('dsrc_error_2', dict(uri='table:' + pfx, pop=simple_populate)),
+ ('dsrc_error_3', dict(uri='table:' + pfx, pop=complex_populate)),
+ ('dsrc_error_4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
]
- scenarios = number_scenarios(multiply_scenarios('.', uri))
+ scenarios = make_scenarios(uri)
conn_config = 'statistics=(all)'
def test_stat_cursor_dsrc_error(self):
diff --git a/test/suite/test_stat03.py b/test/suite/test_stat03.py
index 039ad1f7f8d..b17fe6eb91c 100644
--- a/test/suite/test_stat03.py
+++ b/test/suite/test_stat03.py
@@ -34,7 +34,7 @@ from helper import complex_populate, complex_populate_lsm, simple_populate
from helper import key_populate, complex_value_populate, value_populate
from helper import complex_populate_colgroup_count, complex_populate_index_count
from helper import complex_populate_colgroup_name, complex_populate_index_name
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_stat03.py
# Statistics reset test.
@@ -51,7 +51,7 @@ class test_stat_cursor_reset(wttest.WiredTigerTestCase):
dict(uri='table:' + pfx, pop=complex_populate_lsm)),
]
- scenarios = number_scenarios(multiply_scenarios('.', uri))
+ scenarios = make_scenarios(uri)
conn_config = 'statistics=(all)'
def stat_cursor(self, uri):
diff --git a/test/suite/test_stat04.py b/test/suite/test_stat04.py
index e7c39371f80..b5309efff37 100644
--- a/test/suite/test_stat04.py
+++ b/test/suite/test_stat04.py
@@ -28,7 +28,7 @@
import os, struct
from suite_subprocess import suite_subprocess
-from wtscenario import number_scenarios, multiply_scenarios
+from wtscenario import make_scenarios
import wiredtiger, wttest
from wiredtiger import stat
@@ -49,7 +49,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess):
('large', dict(nentries=100000, valuesize=1)),
('jumboval', dict(nentries=100, valuesize=4200000)),
]
- scenarios = number_scenarios(multiply_scenarios('.', keyfmt, nentries))
+ scenarios = make_scenarios(keyfmt, nentries)
conn_config = 'statistics=(all)'
def init_test(self):
@@ -91,6 +91,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess):
self.checkcount(uri, count)
cursor[self.genkey(i)] = self.genvalue(i)
count += 1
+
# Remove a number of entries, at each step checking that stats match.
for i in range(0, self.nentries / 37):
cursor.set_key(self.genkey(i*11 % self.nentries))
@@ -99,5 +100,10 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess):
self.checkcount(uri, count)
cursor.close()
+ # Confirm the count is correct after writing to the backing file,
+ # that tests the on-disk format as well as the in-memory format.
+ self.reopen_conn()
+ self.checkcount(uri, count)
+
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_stat05.py b/test/suite/test_stat05.py
index 9bcedd65089..62562f78ed6 100644
--- a/test/suite/test_stat05.py
+++ b/test/suite/test_stat05.py
@@ -28,7 +28,7 @@
import itertools, wiredtiger, wttest
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
from wiredtiger import stat
from helper import complex_populate, complex_populate_lsm, simple_populate
from helper import complex_value_populate, key_populate, value_populate
@@ -43,16 +43,18 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
('file', dict(uri='file:' + pfx, pop=simple_populate, cfg='')),
('table', dict(uri='table:' + pfx, pop=simple_populate, cfg='')),
('inmem', dict(uri='table:' + pfx, pop=simple_populate, cfg='',
- conn_config='in_memory,statistics=(fast)')),
+ conn_config = 'in_memory,statistics=(fast)')),
('table-lsm', dict(uri='table:' + pfx, pop=simple_populate,
- cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)')),
+ cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)',
+ conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')),
('complex', dict(uri='table:' + pfx, pop=complex_populate, cfg='')),
('complex-lsm',
dict(uri='table:' + pfx, pop=complex_populate_lsm,
- cfg=',lsm=(chunk_size=1MB,merge_min=2)')),
+ cfg=',lsm=(chunk_size=1MB,merge_min=2)',
+ conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')),
]
- scenarios = number_scenarios(uri)
+ scenarios = make_scenarios(uri)
def openAndWalkStatCursor(self):
c = self.session.open_cursor(
@@ -62,7 +64,6 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
count += 1
c.close()
-
# Open a size-only statistics cursor on various table types. Ensure that
# the cursor open succeeds. Insert enough data that LSM tables to need to
# switch and merge.
diff --git a/test/suite/test_stat_log01.py b/test/suite/test_stat_log01.py
index f6033d940c5..65ce80dfe7d 100644
--- a/test/suite/test_stat_log01.py
+++ b/test/suite/test_stat_log01.py
@@ -51,9 +51,10 @@ class test_stat_log01(wttest.WiredTigerTestCase):
None, "create,statistics=(fast),statistics_log=(wait=1)")
# Wait for the default interval, to ensure stats have been written.
time.sleep(2)
- self.check_stats_file("WiredTigerStat")
+ self.check_stats_file(".")
def test_stats_log_name(self):
+ os.mkdir("foo")
self.conn = self.wiredtiger_open(
None, "create,statistics=(fast),statistics_log=(wait=1,path=foo)")
# Wait for the default interval, to ensure stats have been written.
@@ -66,21 +67,18 @@ class test_stat_log01(wttest.WiredTigerTestCase):
# Wait for the default interval, to ensure stats have been written.
time.sleep(2)
self.close_conn()
- self.check_stats_file("WiredTigerStat")
+ self.check_stats_file(".")
def test_stats_log_on_close(self):
self.conn = self.wiredtiger_open(None,
"create,statistics=(fast),statistics_log=(on_close=true)")
# Close the connection to ensure the statistics get generated.
self.close_conn()
- self.check_stats_file("WiredTigerStat")
+ self.check_stats_file(".")
- def check_stats_file(self, filename):
- if filename == "WiredTigerStat":
- files = glob.glob(filename + '.[0-9]*')
- self.assertTrue(files)
- else:
- self.assertTrue(os.path.isfile(filename))
+ def check_stats_file(self, dir):
+ files = glob.glob(dir + '/' + 'WiredTigerStat.[0-9]*')
+ self.assertTrue(files)
if __name__ == '__main__':
wttest.run()
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index bccd2bce012..71f8fcb180e 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -33,8 +33,8 @@
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
+from wtscenario import make_scenarios
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
import wttest
class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
@@ -55,7 +55,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=r,value_format=8t')),
]
- scenarios = types
+ scenarios = make_scenarios(types)
def test_ops(self):
#
diff --git a/test/suite/test_sweep03.py b/test/suite/test_sweep03.py
index 061c2f5b37b..61078fa96b5 100644
--- a/test/suite/test_sweep03.py
+++ b/test/suite/test_sweep03.py
@@ -33,7 +33,7 @@
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
import wttest
class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess):
@@ -54,7 +54,7 @@ class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess):
create_params = 'key_format=r,value_format=8t')),
]
- scenarios = types
+ scenarios = make_scenarios(types)
def test_disable_idle_timeout1(self):
#
diff --git a/test/suite/test_truncate01.py b/test/suite/test_truncate01.py
index 77a476e40c1..9a3518c6984 100644
--- a/test/suite/test_truncate01.py
+++ b/test/suite/test_truncate01.py
@@ -34,13 +34,13 @@ import wiredtiger, wttest
from helper import confirm_empty,\
key_populate, value_populate, simple_populate,\
complex_populate, complex_value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# Test truncation arguments.
class test_truncate_arguments(wttest.WiredTigerTestCase):
name = 'test_truncate'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(type='file:')),
('table', dict(type='table:'))
])
@@ -80,7 +80,7 @@ class test_truncate_arguments(wttest.WiredTigerTestCase):
# Test truncation of an object using its URI.
class test_truncate_uri(wttest.WiredTigerTestCase):
name = 'test_truncate'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(type='file:')),
('table', dict(type='table:'))
])
@@ -115,7 +115,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase):
('recno', dict(keyfmt='r')),
('string', dict(keyfmt='S')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+ scenarios = make_scenarios(types, keyfmt)
# Test an illegal order, then confirm that equal cursors works.
def test_truncate_cursor_order(self):
@@ -146,7 +146,7 @@ class test_truncate_cursor_end(wttest.WiredTigerTestCase):
('recno', dict(keyfmt='r')),
('string', dict(keyfmt='S')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+ scenarios = make_scenarios(types, keyfmt)
# Test truncation of cursors past the end of the object.
def test_truncate_cursor_order(self):
@@ -205,8 +205,7 @@ class test_truncate_cursor(wttest.WiredTigerTestCase):
('big', dict(nentries=1000,skip=37)),
]
- scenarios = number_scenarios(
- multiply_scenarios('.', types, keyfmt, size, reopen))
+ scenarios = make_scenarios(types, keyfmt, size, reopen)
# Set a cursor key.
def cursorKey(self, uri, key):
diff --git a/test/suite/test_truncate02.py b/test/suite/test_truncate02.py
index 6c11302787c..e57a65d2f97 100644
--- a/test/suite/test_truncate02.py
+++ b/test/suite/test_truncate02.py
@@ -32,7 +32,7 @@
import wiredtiger, wttest
from helper import key_populate, value_populate, simple_populate
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_truncate_fast_delete
# When deleting leaf pages that aren't in memory, we set transactional
@@ -86,8 +86,7 @@ class test_truncate_fast_delete(wttest.WiredTigerTestCase):
('txn2', dict(commit=False)),
]
- scenarios = number_scenarios(
- multiply_scenarios('.', types, keyfmt, overflow, reads, writes, txn))
+ scenarios = make_scenarios(types, keyfmt, overflow, reads, writes, txn)
# Return the number of records visible to the cursor; test both forward
# and backward iteration, they are different code paths in this case.
diff --git a/test/suite/test_txn01.py b/test/suite/test_txn01.py
index eb6963791fd..1ba74461088 100644
--- a/test/suite/test_txn01.py
+++ b/test/suite/test_txn01.py
@@ -27,13 +27,13 @@
# OTHER DEALINGS IN THE SOFTWARE.
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_txn01.py
# Transactions: basic functionality
class test_txn01(wttest.WiredTigerTestCase):
nentries = 1000
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('col-f', dict(uri='file:text_txn01',key_format='r',value_format='S')),
('col-t', dict(uri='table:text_txn01',key_format='r',value_format='S')),
('fix-f', dict(uri='file:text_txn01',key_format='r',value_format='8t')),
diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py
index fccf123d3bc..a0c2c12a47c 100644
--- a/test/suite/test_txn02.py
+++ b/test/suite/test_txn02.py
@@ -32,7 +32,7 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
@@ -81,22 +81,18 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))]
txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))]
- all_scenarios = multiply_scenarios('.', types,
- op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)
-
# This test generates thousands of potential scenarios.
# For default runs, we'll use a small subset of them, for
# long runs (when --long is set) we'll set a much larger limit.
- scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000))
+ scenarios = make_scenarios(types,
+ op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s,
+ prune=20, prunelong=5000)
# Each check_log() call takes a second, so we don't call it for
# every scenario, we'll limit it to the value of checklog_calls.
checklog_calls = 100 if wttest.islongtest() else 2
checklog_mod = (len(scenarios) / checklog_calls + 1)
- # scenarios = number_scenarios(multiply_scenarios('.', types,
- # op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)) [:3]
- # Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
self.home = dir
# Cycle through the different transaction_sync values in a
diff --git a/test/suite/test_txn03.py b/test/suite/test_txn03.py
index 97180a75949..18a0e096767 100644
--- a/test/suite/test_txn03.py
+++ b/test/suite/test_txn03.py
@@ -31,7 +31,7 @@
#
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
class test_txn03(wttest.WiredTigerTestCase):
tablename = 'test_txn03'
@@ -42,7 +42,7 @@ class test_txn03(wttest.WiredTigerTestCase):
data_str2 = "TEST_VAL1"
nentries = 1000
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('var', dict(create_params = "key_format=S,value_format=S")),
])
diff --git a/test/suite/test_txn04.py b/test/suite/test_txn04.py
index 9d9d2db62c6..ade39272f84 100644
--- a/test/suite/test_txn04.py
+++ b/test/suite/test_txn04.py
@@ -32,7 +32,7 @@
import shutil, os
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
@@ -62,7 +62,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
]
txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))]
- scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))
+ scenarios = make_scenarios(types, op1s, txn1s)
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
self.home = dir
diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py
index bb68034ca04..9e84fe7d3fe 100644
--- a/test/suite/test_txn05.py
+++ b/test/suite/test_txn05.py
@@ -32,7 +32,7 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn05(wttest.WiredTigerTestCase, suite_subprocess):
@@ -63,8 +63,7 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess):
]
txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))]
- scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))
- # scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))[:3]
+ scenarios = make_scenarios(types, op1s, txn1s)
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
self.home = dir
diff --git a/test/suite/test_txn06.py b/test/suite/test_txn06.py
index 9c1d0335d47..e4636e40e2e 100644
--- a/test/suite/test_txn06.py
+++ b/test/suite/test_txn06.py
@@ -30,7 +30,6 @@
# Transactions: test long-running snapshots
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
from helper import simple_populate
import wiredtiger, wttest
diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py
index f74120e3590..8dd8238343d 100644
--- a/test/suite/test_txn07.py
+++ b/test/suite/test_txn07.py
@@ -33,7 +33,7 @@
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn07(wttest.WiredTigerTestCase, suite_subprocess):
@@ -70,8 +70,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess):
('none', dict(compress='')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s,
- compress))
+ scenarios = make_scenarios(types, op1s, txn1s, compress)
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
self.home = dir
diff --git a/test/suite/test_txn08.py b/test/suite/test_txn08.py
index 36253856285..f0cdf08df07 100644
--- a/test/suite/test_txn08.py
+++ b/test/suite/test_txn08.py
@@ -33,7 +33,6 @@
import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
import wttest
class test_txn08(wttest.WiredTigerTestCase, suite_subprocess):
diff --git a/test/suite/test_txn09.py b/test/suite/test_txn09.py
index f536d65205d..cfad8270ab1 100644
--- a/test/suite/test_txn09.py
+++ b/test/suite/test_txn09.py
@@ -32,7 +32,7 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn09(wttest.WiredTigerTestCase, suite_subprocess):
@@ -73,13 +73,12 @@ class test_txn09(wttest.WiredTigerTestCase, suite_subprocess):
txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))]
txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))]
- all_scenarios = multiply_scenarios('.', types,
- op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)
-
# This test generates thousands of potential scenarios.
# For default runs, we'll use a small subset of them, for
# long runs (when --long is set) we'll set a much larger limit.
- scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000))
+ scenarios = make_scenarios(types,
+ op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s,
+ prune=20, prunelong=5000)
# Overrides WiredTigerTestCase
def setUpConnectionOpen(self, dir):
diff --git a/test/suite/test_txn10.py b/test/suite/test_txn10.py
index cf9c11dd4ab..a4745e60066 100644
--- a/test/suite/test_txn10.py
+++ b/test/suite/test_txn10.py
@@ -32,7 +32,6 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
import wttest
class test_txn10(wttest.WiredTigerTestCase, suite_subprocess):
diff --git a/test/suite/test_txn12.py b/test/suite/test_txn12.py
index 8ae9df33990..32c058bea85 100644
--- a/test/suite/test_txn12.py
+++ b/test/suite/test_txn12.py
@@ -29,7 +29,6 @@
import wiredtiger, wttest
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
# test_txn12.py
# test of commit following failed op in a read only transaction.
diff --git a/test/suite/test_txn13.py b/test/suite/test_txn13.py
index dd6a6dbcd6d..ae0250c06e8 100644
--- a/test/suite/test_txn13.py
+++ b/test/suite/test_txn13.py
@@ -33,7 +33,7 @@
#import fnmatch, os, shutil, run, time
from suite_subprocess import suite_subprocess
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
import wiredtiger, wttest
class test_txn13(wttest.WiredTigerTestCase, suite_subprocess):
@@ -43,7 +43,7 @@ class test_txn13(wttest.WiredTigerTestCase, suite_subprocess):
nops = 1024
create_params = 'key_format=i,value_format=S'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('1gb', dict(expect_err=False, valuesize=1048576)),
('2gb', dict(expect_err=False, valuesize=2097152)),
('4gb', dict(expect_err=True, valuesize=4194304))
diff --git a/test/suite/test_txn14.py b/test/suite/test_txn14.py
index 371f4402567..f9ccabaab8b 100644
--- a/test/suite/test_txn14.py
+++ b/test/suite/test_txn14.py
@@ -32,7 +32,7 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn14(wttest.WiredTigerTestCase, suite_subprocess):
@@ -47,7 +47,7 @@ class test_txn14(wttest.WiredTigerTestCase, suite_subprocess):
('sync', dict(sync='on')),
('bg', dict(sync='background')),
]
- scenarios = multiply_scenarios('.', sync_list)
+ scenarios = make_scenarios(sync_list)
def simulate_crash_restart(self, olddir, newdir):
''' Simulate a crash from olddir and restart in newdir. '''
diff --git a/test/suite/test_txn15.py b/test/suite/test_txn15.py
index 809dce4ebfa..c061c093b02 100644
--- a/test/suite/test_txn15.py
+++ b/test/suite/test_txn15.py
@@ -33,7 +33,7 @@
import fnmatch, os, shutil, time
from suite_subprocess import suite_subprocess
from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
import wttest
class test_txn15(wttest.WiredTigerTestCase, suite_subprocess):
@@ -71,7 +71,7 @@ class test_txn15(wttest.WiredTigerTestCase, suite_subprocess):
('c_none', dict(commit_sync=None)),
('c_off', dict(commit_sync='sync=off')),
]
- scenarios = multiply_scenarios('.', conn_sync_enabled, conn_sync_method,
+ scenarios = make_scenarios(conn_sync_enabled, conn_sync_method,
begin_sync, commit_sync)
# Given the different configuration settings determine if this group
diff --git a/test/suite/test_upgrade.py b/test/suite/test_upgrade.py
index 357e437f14d..e4f92f8f8d8 100644
--- a/test/suite/test_upgrade.py
+++ b/test/suite/test_upgrade.py
@@ -29,14 +29,14 @@
import os, time
import wiredtiger, wttest
from helper import complex_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_upgrade.py
# session level upgrade operation
class test_upgrade(wttest.WiredTigerTestCase):
name = 'test_upgrade'
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('file', dict(uri='file:')),
('table', dict(uri='table:'))
])
diff --git a/test/suite/test_util02.py b/test/suite/test_util02.py
index 475e856052a..421b0104484 100644
--- a/test/suite/test_util02.py
+++ b/test/suite/test_util02.py
@@ -29,7 +29,7 @@
import string, os
import wiredtiger, wttest
from suite_subprocess import suite_subprocess
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
from helper import complex_populate
# test_util02.py
@@ -44,7 +44,7 @@ class test_util02(wttest.WiredTigerTestCase, suite_subprocess):
nentries = 1000
stringclass = ''.__class__
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('SS', dict(key_format='S',value_format='S')),
('rS', dict(key_format='r',value_format='S')),
('ri', dict(key_format='r',value_format='i')),
diff --git a/test/suite/test_util03.py b/test/suite/test_util03.py
index c3ea48b8f5e..e341c79ff9e 100644
--- a/test/suite/test_util03.py
+++ b/test/suite/test_util03.py
@@ -28,7 +28,7 @@
from suite_subprocess import suite_subprocess
import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
# test_util03.py
# Utilities: wt create
@@ -36,7 +36,7 @@ class test_util03(wttest.WiredTigerTestCase, suite_subprocess):
tablename = 'test_util03.a'
nentries = 1000
- scenarios = check_scenarios([
+ scenarios = make_scenarios([
('none', dict(key_format=None,value_format=None)),
('SS', dict(key_format='S',value_format='S')),
('rS', dict(key_format='r',value_format='S')),
diff --git a/test/suite/test_util13.py b/test/suite/test_util13.py
index 222f42cd7f1..9804dc700ba 100644
--- a/test/suite/test_util13.py
+++ b/test/suite/test_util13.py
@@ -33,7 +33,7 @@ import itertools, wiredtiger, wttest
from helper import complex_populate_cgconfig, complex_populate_cgconfig_lsm
from helper import simple_populate
from helper import complex_populate_check, simple_populate_check
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
# test_util13.py
# Utilities: wt dump, as well as the dump cursor
@@ -73,7 +73,7 @@ class test_util13(wttest.WiredTigerTestCase, suite_subprocess):
cfg='merge_max=5')),
]
- scenarios = number_scenarios(multiply_scenarios('.', types))
+ scenarios = make_scenarios(types)
def compare_config(self, expected_cfg, actual_cfg):
# Replace '(' characters so configuration groups don't break parsing.
diff --git a/test/suite/wtscenario.py b/test/suite/wtscenario.py
index 7fad7c228fb..8576b3ac876 100644
--- a/test/suite/wtscenario.py
+++ b/test/suite/wtscenario.py
@@ -64,11 +64,37 @@ def log2chr(val):
megabyte = 1024 * 1024
+def make_scenarios(*args, **kwargs):
+ """
+ The standard way to create scenarios for WT tests.
+ Scenarios can be combined by listing them all as arguments.
+ A final prune= and/or prunelong= argument may be given that
+ forces the list of entries in the scenario to be pruned.
+ The result is a (combined) scenario that has been checked
+ for name duplicates and has been given names and numbers.
+ """
+ scenes = multiply_scenarios('.', *args)
+ pruneval = None
+ prunelong = None
+ for key in kwargs:
+ if key == 'prune':
+ pruneval = kwargs[key]
+ elif key == 'prunelong':
+ prunelong = kwargs[key]
+ else:
+ raise AssertionError(
+ 'make_scenarios: unexpected named arg: ' + key)
+ if pruneval != None or prunelong != None:
+ pruneval = pruneval if pruneval != None else -1
+ prunelong = prunelong if prunelong != None else -1
+ scenes = prune_scenarios(scenes, pruneval, prunelong)
+ return number_scenarios(scenes)
+
def check_scenarios(scenes):
"""
- Make sure all scenarios have unique names
+ Make sure all scenarios have unique case insensitive names
"""
- assert len(scenes) == len(dict(scenes))
+ assert len(scenes) == len(dict((k.lower(), v) for k, v in scenes))
return scenes
def multiply_scenarios(sep, *args):
@@ -81,8 +107,8 @@ def multiply_scenarios(sep, *args):
result = scenes
else:
total = []
- for scena in scenes:
- for scenb in result:
+ for scena in result:
+ for scenb in scenes:
# Create a merged scenario with a concatenated name
name = scena[0] + sep + scenb[0]
tdict = {}
@@ -235,7 +261,7 @@ class wtscenario:
scen.lmax = lmax
scen.cache_size = cache
s.append((scen.shortName(), dict(session_create_scenario=scen)))
- return s
+ return make_scenarios(s)
def shortName(self):
"""
diff --git a/test/suite/wttest.py b/test/suite/wttest.py
index 9e430fcdba7..788dd5d0307 100644
--- a/test/suite/wttest.py
+++ b/test/suite/wttest.py
@@ -212,8 +212,8 @@ class WiredTigerTestCase(unittest.TestCase):
# help distinguish tests.
scen = ''
if hasattr(self, 'scenario_number') and hasattr(self, 'scenario_name'):
- scen = '(scenario ' + str(self.scenario_number) + \
- ': ' + self.scenario_name + ')'
+ scen = ' -s ' + str(self.scenario_number) + \
+ ' (' + self.scenario_name + ')'
return self.simpleName() + scen
def simpleName(self):
@@ -293,6 +293,8 @@ class WiredTigerTestCase(unittest.TestCase):
raise Exception(self.testdir + ": cannot remove directory")
os.makedirs(self.testdir)
os.chdir(self.testdir)
+ with open('testname.txt', 'w+') as namefile:
+ namefile.write(str(self) + '\n')
self.fdSetUp()
# tearDown needs a conn field, set it here in case the open fails.
self.conn = None
diff --git a/test/thread/smoke.sh b/test/thread/smoke.sh
index 9a235b1d8e9..aa2f86c1def 100755
--- a/test/thread/smoke.sh
+++ b/test/thread/smoke.sh
@@ -4,10 +4,10 @@ set -e
# Smoke-test format as part of running "make check".
$TEST_WRAPPER ./t -t f
-$TEST_WRAPPER ./t -S -F -t f
+$TEST_WRAPPER ./t -S -F -n 1000 -t f
$TEST_WRAPPER ./t -t r
-$TEST_WRAPPER ./t -S -F -t r
+$TEST_WRAPPER ./t -S -F -n 1000 -t r
$TEST_WRAPPER ./t -t v
-$TEST_WRAPPER ./t -S -F -t v
+$TEST_WRAPPER ./t -S -F -n 1000 -t v
diff --git a/test/utility/misc.c b/test/utility/misc.c
index dfc655dec1a..dffd29a5b6a 100644
--- a/test/utility/misc.c
+++ b/test/utility/misc.c
@@ -192,3 +192,18 @@ dstrdup(const void *str)
return (p);
testutil_die(errno, "strdup");
}
+
+/*
+ * dstrndup --
+ * Call emulating strndup, dying on failure. Don't use actual strndup here
+ * as it is not supported within MSVC.
+ */
+void *
+dstrndup(const char *str, size_t len)
+{
+ char *p;
+
+ p = dcalloc(len + 1, sizeof(char));
+ memcpy(p, str, len);
+ return (p);
+}
diff --git a/test/utility/test_util.h b/test/utility/test_util.h
index 66ff8de2d19..821e06084d2 100644
--- a/test/utility/test_util.h
+++ b/test/utility/test_util.h
@@ -115,6 +115,7 @@ void *dcalloc(size_t, size_t);
void *dmalloc(size_t);
void *drealloc(void *, size_t);
void *dstrdup(const void *);
+void *dstrndup(const char *, size_t);
void testutil_clean_work_dir(char *);
void testutil_cleanup(TEST_OPTS *);
void testutil_make_work_dir(char *);
diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py
index a79cf1faf5e..b93f2449c63 100644
--- a/tools/wtstats/stat_data.py
+++ b/tools/wtstats/stat_data.py
@@ -3,17 +3,19 @@
no_scale_per_second_list = [
'async: current work queue length',
'async: maximum work queue length',
+ 'cache: bytes belonging to page images in the cache',
'cache: bytes currently in the cache',
+ 'cache: bytes not belonging to page images in the cache',
'cache: eviction currently operating in aggressive mode',
'cache: files with active eviction walks',
'cache: hazard pointer maximum array length',
'cache: maximum bytes configured',
'cache: maximum page size at eviction',
+ 'cache: overflow values cached in memory',
'cache: pages currently held in the cache',
'cache: percentage overhead',
'cache: tracked bytes belonging to internal pages in the cache',
'cache: tracked bytes belonging to leaf pages in the cache',
- 'cache: tracked bytes belonging to overflow pages in the cache',
'cache: tracked dirty bytes in the cache',
'cache: tracked dirty pages in the cache',
'connection: files currently open',
@@ -28,6 +30,22 @@ no_scale_per_second_list = [
'reconciliation: split objects currently awaiting free',
'session: open cursor count',
'session: open session count',
+ 'session: table compact failed calls',
+ 'session: table compact successful calls',
+ 'session: table create failed calls',
+ 'session: table create successful calls',
+ 'session: table drop failed calls',
+ 'session: table drop successful calls',
+ 'session: table rebalance failed calls',
+ 'session: table rebalance successful calls',
+ 'session: table rename failed calls',
+ 'session: table rename successful calls',
+ 'session: table salvage failed calls',
+ 'session: table salvage successful calls',
+ 'session: table truncate failed calls',
+ 'session: table truncate successful calls',
+ 'session: table verify failed calls',
+ 'session: table verify successful calls',
'thread-state: active filesystem fsync calls',
'thread-state: active filesystem read calls',
'thread-state: active filesystem write calls',
@@ -36,7 +54,10 @@ no_scale_per_second_list = [
'transaction: transaction checkpoint max time (msecs)',
'transaction: transaction checkpoint min time (msecs)',
'transaction: transaction checkpoint most recent time (msecs)',
+ 'transaction: transaction checkpoint scrub dirty target',
+ 'transaction: transaction checkpoint scrub time (msecs)',
'transaction: transaction checkpoint total time (msecs)',
+ 'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)',
'transaction: transaction range of IDs currently pinned',
'transaction: transaction range of IDs currently pinned by a checkpoint',
'transaction: transaction range of IDs currently pinned by named snapshots',
@@ -64,6 +85,7 @@ no_scale_per_second_list = [
'btree: overflow pages',
'btree: row-store internal pages',
'btree: row-store leaf pages',
+ 'cache: bytes currently in the cache',
'cache: overflow values cached in memory',
'LSM: bloom filters in the LSM tree',
'LSM: chunks in the LSM tree',
@@ -74,7 +96,9 @@ no_scale_per_second_list = [
]
no_clear_list = [
'async: maximum work queue length',
+ 'cache: bytes belonging to page images in the cache',
'cache: bytes currently in the cache',
+ 'cache: bytes not belonging to page images in the cache',
'cache: eviction currently operating in aggressive mode',
'cache: files with active eviction walks',
'cache: maximum bytes configured',
@@ -83,7 +107,6 @@ no_clear_list = [
'cache: percentage overhead',
'cache: tracked bytes belonging to internal pages in the cache',
'cache: tracked bytes belonging to leaf pages in the cache',
- 'cache: tracked bytes belonging to overflow pages in the cache',
'cache: tracked dirty bytes in the cache',
'cache: tracked dirty pages in the cache',
'connection: files currently open',
@@ -98,6 +121,22 @@ no_clear_list = [
'reconciliation: split objects currently awaiting free',
'session: open cursor count',
'session: open session count',
+ 'session: table compact failed calls',
+ 'session: table compact successful calls',
+ 'session: table create failed calls',
+ 'session: table create successful calls',
+ 'session: table drop failed calls',
+ 'session: table drop successful calls',
+ 'session: table rebalance failed calls',
+ 'session: table rebalance successful calls',
+ 'session: table rename failed calls',
+ 'session: table rename successful calls',
+ 'session: table salvage failed calls',
+ 'session: table salvage successful calls',
+ 'session: table truncate failed calls',
+ 'session: table truncate successful calls',
+ 'session: table verify failed calls',
+ 'session: table verify successful calls',
'thread-state: active filesystem fsync calls',
'thread-state: active filesystem read calls',
'thread-state: active filesystem write calls',
@@ -106,11 +145,15 @@ no_clear_list = [
'transaction: transaction checkpoint max time (msecs)',
'transaction: transaction checkpoint min time (msecs)',
'transaction: transaction checkpoint most recent time (msecs)',
+ 'transaction: transaction checkpoint scrub dirty target',
+ 'transaction: transaction checkpoint scrub time (msecs)',
'transaction: transaction checkpoint total time (msecs)',
+ 'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)',
'transaction: transaction range of IDs currently pinned',
'transaction: transaction range of IDs currently pinned by a checkpoint',
'transaction: transaction range of IDs currently pinned by named snapshots',
'btree: btree checkpoint generation',
+ 'cache: bytes currently in the cache',
'session: open cursor count',
]
prefix_list = [
diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py
index ff62d99e825..3549031c30f 100755
--- a/tools/wtstats/wtstats.py
+++ b/tools/wtstats/wtstats.py
@@ -137,6 +137,8 @@ def parse_wtperf_file(file, result):
for i, v in enumerate(values):
if v == 'N':
v = 0
+ if v == 'Y':
+ v = 1
# convert us to ms
if '(ms)' in headings[i]:
v = float(v) / 1000.0