Merge branch 'develop' into mongodb-3.4

author: Alex Gorrod <alexander.gorrod@mongodb.com> 2016-08-04 16:07:16 +1000
committer: Alex Gorrod <alexander.gorrod@mongodb.com> 2016-08-04 16:07:16 +1000
commit: a9e96961abc9dd20d464bdeb120d792166ee4cf9 (patch)
tree: aa569dc7ba001747a4c10902dcad759ae0999ae0
parent: d8fb874fc40989cb9675e56ca80b3b64e6fa2ee3 (diff)
parent: 034ecbf55bb22d05c137aa8ef62a070964bf2748 (diff)
download: mongo-a9e96961abc9dd20d464bdeb120d792166ee4cf9.tar.gz
254 files changed, 4447 insertions, 2911 deletions
diff --git a/SConstruct b/SConstruct
index a5dd8761d6c..b0ce771e9bd 100644
--- a/SConstruct
+++ b/SConstruct
@@ -484,7 +484,7 @@ t = env.Program("wtperf", [
     "bench/wtperf/wtperf_throttle.c",
     "bench/wtperf/wtperf_truncate.c",
     ],
-    LIBS=[wtlib, shim]  + wtlibs)
+    LIBS=[wtlib, shim, testutil] + wtlibs)
 Default(t)
 
 #Build the Examples
diff --git a/bench/wtperf/Makefile.am b/bench/wtperf/Makefile.am
index cc1f84b5406..57792e3887f 100644
--- a/bench/wtperf/Makefile.am
+++ b/bench/wtperf/Makefile.am
@@ -1,13 +1,17 @@
-AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)/src/include
-
-LDADD = $(top_builddir)/libwiredtiger.la -lm
+AM_CPPFLAGS = -I$(top_builddir)
+AM_CPPFLAGS +=-I$(top_srcdir)/src/include
+AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
 
 noinst_PROGRAMS = wtperf
-wtperf_LDFLAGS = -static
 wtperf_SOURCES =\
 	config.c idle_table_cycle.c misc.c track.c wtperf.c \
 	wtperf.h wtperf_opt.i wtperf_throttle.c wtperf_truncate.c
 
+wtperf_LDADD = $(top_builddir)/test/utility/libtest_util.la
+wtperf_LDADD +=$(top_builddir)/libwiredtiger.la
+wtperf_LDADD +=-lm
+wtperf_LDFLAGS = -static
+
 TESTS = smoke.sh
 AM_TESTS_ENVIRONMENT = rm -rf WT_TEST ; mkdir WT_TEST ;
 # automake 1.11 compatibility
diff --git a/bench/wtperf/idle_table_cycle.c b/bench/wtperf/idle_table_cycle.c
index b699b5b9dd1..3c079bb560f 100644
--- a/bench/wtperf/idle_table_cycle.c
+++ b/bench/wtperf/idle_table_cycle.c
@@ -129,7 +129,8 @@ cycle_idle_tables(void *arg)
 		 * Drop the table. Keep retrying on EBUSY failure - it is an
 		 * expected return when checkpoints are happening.
 		 */
-		while ((ret = session->drop(session, uri, "force")) == EBUSY)
+		while ((ret = session->drop(
+		    session, uri, "force,checkpoint_wait=false")) == EBUSY)
 			__wt_sleep(1, 0);
 
 		if (ret != 0 && ret != EBUSY) {
diff --git a/bench/wtperf/runners/checkpoint_schema_race.wtperf b/bench/wtperf/runners/checkpoint_schema_race.wtperf
new file mode 100644
index 00000000000..ade8e88ee9b
--- /dev/null
+++ b/bench/wtperf/runners/checkpoint_schema_race.wtperf
@@ -0,0 +1,20 @@
+# Check create and drop behavior concurrent with checkpoints (WT-2798).
+# Setup a multiple tables and a cache size large enough that checkpoints can
+# take a long time.
+conn_config="cache_size=8GB,log=(enabled=false),checkpoint=(wait=30)"
+table_config="leaf_page_max=4k,internal_page_max=16k,type=file"
+icount=10000000
+table_count=100
+table_count_idle=100
+# Turn on create/drop of idle tables, but don't worry if individual operations
+# take a long time.
+idle_table_cycle=120
+populate_threads=5
+checkpoint_threads=0
+report_interval=5
+# 100 million
+random_range=10000000
+run_time=300
+# Setup a workload that dirties a lot of the cache
+threads=((count=2,reads=1),(count=2,inserts=1),(count=2,updates=1))
+value_sz=500
diff --git a/bench/wtperf/runners/evict-btree-stress-multi.wtperf b/bench/wtperf/runners/evict-btree-stress-multi.wtperf
new file mode 100644
index 00000000000..9699b9ae3bb
--- /dev/null
+++ b/bench/wtperf/runners/evict-btree-stress-multi.wtperf
@@ -0,0 +1,12 @@
+conn_config="cache_size=1G,eviction=(threads_max=4),session_max=2000"
+table_config="type=file"
+table_count=100
+icount=100000000
+report_interval=5
+run_time=600
+populate_threads=1
+threads=((count=100,updates=1,reads=4,ops_per_txn=30))
+# Warn if a latency over a quarter second is seen
+max_latency=250
+sample_interval=5
+sample_rate=1
diff --git a/bench/wtperf/runners/fruit-lsm.wtperf b/bench/wtperf/runners/fruit-lsm.wtperf
deleted file mode 100644
index e5817554201..00000000000
--- a/bench/wtperf/runners/fruit-lsm.wtperf
+++ /dev/null
@@ -1,22 +0,0 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600),log=(enabled=true),transaction_sync=(enabled=true,method=none),checkpoint=(wait=180),lsm_manager=(worker_thread_max=12)"
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
-compact=true
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,leaf_item_max=4K,os_cache_dirty_max=16MB"
-icount=25000000
-key_sz=40
-value_sz=800
-#max_latency=2000
-pareto=20
-populate_threads=20
-report_interval=10
-random_value=true
-run_time=18000
-sample_interval=10
-table_count=8
-threads=((count=20,read=6,update=1))
diff --git a/bench/wtperf/runners/fruit-short.wtperf b/bench/wtperf/runners/fruit-short.wtperf
deleted file mode 100644
index 10cb423a92d..00000000000
--- a/bench/wtperf/runners/fruit-short.wtperf
+++ /dev/null
@@ -1,20 +0,0 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
-compact=true
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K"
-icount=25000000
-key_sz=40
-value_sz=800
-max_latency=2000
-pareto=20
-populate_threads=20
-report_interval=10
-random_value=true
-run_time=1800
-sample_interval=10
-threads=((count=20,read=6,update=1))
diff --git a/bench/wtperf/runners/log-append-large.wtperf b/bench/wtperf/runners/log-append-large.wtperf
deleted file mode 100644
index c1364c17c28..00000000000
--- a/bench/wtperf/runners/log-append-large.wtperf
+++ /dev/null
@@ -1,10 +0,0 @@
-# wtperf options file: Test a log file with a multi-threaded
-# append workload. We want to create a very large number of log file
-# switches with fewer records per log file than we have active threads.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)"
-table_config="type=file"
-icount=1000
-report_interval=5
-run_time=0
-value_sz=5000000
-populate_threads=8
diff --git a/bench/wtperf/runners/log-append-zero.wtperf b/bench/wtperf/runners/log-append-zero.wtperf
deleted file mode 100644
index 973d2cddd0d..00000000000
--- a/bench/wtperf/runners/log-append-zero.wtperf
+++ /dev/null
@@ -1,8 +0,0 @@
-# wtperf options file: Test a log file with a multi-threaded
-# append workload.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB,zero_fill=true),checkpoint=(log_size=1G)"
-table_config="type=file"
-icount=50000000
-report_interval=5
-run_time=0
-populate_threads=8
diff --git a/bench/wtperf/runners/log-append.wtperf b/bench/wtperf/runners/log-append.wtperf
deleted file mode 100644
index 9d0a78e3c61..00000000000
--- a/bench/wtperf/runners/log-append.wtperf
+++ /dev/null
@@ -1,8 +0,0 @@
-# wtperf options file: Test a log file with a multi-threaded
-# append workload.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB),checkpoint=(log_size=1G)"
-table_config="type=file"
-icount=50000000
-report_interval=5
-run_time=0
-populate_threads=8
diff --git a/bench/wtperf/runners/log-nockpt.wtperf b/bench/wtperf/runners/log-nockpt.wtperf
deleted file mode 100644
index a078cead740..00000000000
--- a/bench/wtperf/runners/log-nockpt.wtperf
+++ /dev/null
@@ -1,12 +0,0 @@
-# wtperf options file: Test performance with a log file enabled.
-# Set the log file reasonably small to catch log-swtich bottle
-# necks.
-conn_config="cache_size=1G,log=(enabled=true,file_max=20MB)"
-table_config="type=file"
-icount=50000
-report_interval=5
-run_time=40
-populate_threads=1
-random_range=50000000
-threads=((count=8,inserts=1))
-
diff --git a/bench/wtperf/runners/log-noprealloc.wtperf b/bench/wtperf/runners/log-noprealloc.wtperf
deleted file mode 100644
index 66032f599aa..00000000000
--- a/bench/wtperf/runners/log-noprealloc.wtperf
+++ /dev/null
@@ -1,11 +0,0 @@
-# wtperf options file: Test performance with a log file enabled.
-# Set the log file reasonably small to catch log-swtich bottle
-# necks.
-conn_config="cache_size=1G,log=(enabled=true,file_max=200K,prealloc=false),checkpoint=(log_size=500MB)"
-table_config="type=file"
-icount=50000
-report_interval=5
-run_time=120
-populate_threads=1
-random_range=50000000
-threads=((count=8,inserts=1))
diff --git a/bench/wtperf/runners/log.wtperf b/bench/wtperf/runners/log.wtperf
index 32a9cc3b0a6..6cf50dfb5a5 100644
--- a/bench/wtperf/runners/log.wtperf
+++ b/bench/wtperf/runners/log.wtperf
@@ -1,10 +1,27 @@
+#
 # wtperf options file: Test performance with a log file enabled.
 # Set the log file small to catch log-swtich bottlenecks.
-conn_config="cache_size=1G,log=(enabled=true,file_max=200K),checkpoint=(log_size=500MB)"
+#
+# Perform updates instead of inserts to stress logging not eviction,
+# page splits or reconciliation.  Have it fit in cache.
+#
+# We expect this test can and will be run in other forms from the command
+# line to change log file size, pre-allocation, zero filling, logging off
+# and checkpoint off.
+#
+# Jenkins runs for perf testing:
+# - Config as-is
+# - Config + "-C "log=(enabled,file_max=1M)": small log files and switching
+# - Config + "-C "log=(enabled,zero_fill=true,file_max=1M)": zero-filling
+# - Config + "-C "checkpoint=(wait=0)": no checkpoints
+# - Config + "-C "log=(enabled,prealloc=false,file_max=1M)": no pre-allocation
+#
+conn_config="cache_size=5G,log=(enabled=true),checkpoint=(log_size=500M),eviction=(threads_max=4)"
 table_config="type=file"
-icount=50000
+icount=1000000
 report_interval=5
-run_time=120
+run_time=180
 populate_threads=1
-random_range=50000000
-threads=((count=8,inserts=1))
+threads=((count=8,updates=1))
+# Warm up the cache for a minute.
+warmup=60
diff --git a/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf b/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf
new file mode 100644
index 00000000000..ddd9c055eac
--- /dev/null
+++ b/bench/wtperf/runners/multi-btree-zipfian-populate.wtperf
@@ -0,0 +1,19 @@
+# Create a set of tables with uneven distribution of data
+conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000"
+table_config="type=file"
+table_count=100
+icount=0
+random_range=1000000000
+pareto=10
+range_partition=true
+report_interval=5
+
+run_ops=10000000
+populate_threads=0
+icount=0
+threads=((count=20,inserts=1))
+
+# Warn if a latency over 1 second is seen
+max_latency=1000
+sample_interval=5
+sample_rate=1
diff --git a/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf b/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf
new file mode 100644
index 00000000000..380350c88c8
--- /dev/null
+++ b/bench/wtperf/runners/multi-btree-zipfian-workload.wtperf
@@ -0,0 +1,18 @@
+# Read from a set of tables with uneven distribution of data
+conn_config="cache_size=1G,eviction=(threads_max=4),file_manager=(close_idle_time=100000),checkpoint=(wait=60,log_size=2GB),statistics=(fast),statistics_log=(wait=5,json),session_max=1000"
+table_config="type=file"
+table_count=100
+icount=0
+random_range=1000000000
+pareto=10
+range_partition=true
+report_interval=5
+create=false
+
+run_time=600
+threads=((count=20,reads=1))
+
+# Warn if a latency over 1 second is seen
+max_latency=1000
+sample_interval=5
+sample_rate=1
diff --git a/bench/wtperf/runners/overflow-10k-short.wtperf b/bench/wtperf/runners/overflow-10k-short.wtperf
deleted file mode 100644
index 47228079db8..00000000000
--- a/bench/wtperf/runners/overflow-10k-short.wtperf
+++ /dev/null
@@ -1,19 +0,0 @@
-# wtperf options file: simulate riak and a short form of its voxer config.
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
-compact=true
-compression="snappy"
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB"
-icount=15000
-key_sz=40
-value_sz=10000
-max_latency=2000
-populate_threads=1
-report_interval=5
-random_value=true
-run_time=300
-threads=((count=10,read=1),(count=10,update=1))
diff --git a/bench/wtperf/runners/overflow-10k.wtperf b/bench/wtperf/runners/overflow-10k.wtperf
index 9b4ed2acaee..5d7eeea9cf2 100644
--- a/bench/wtperf/runners/overflow-10k.wtperf
+++ b/bench/wtperf/runners/overflow-10k.wtperf
@@ -1,9 +1,7 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
 #
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
+# Run with overflow items and LSM.
+#
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,lsm_manager=(worker_thread_max=6),statistics=(fast),statistics_log=(wait=10)"
 compact=true
 compression="snappy"
 sess_config="isolation=snapshot"
@@ -13,8 +11,8 @@ key_sz=40
 value_sz=10000
 max_latency=2000
 populate_threads=1
-report_interval=10
+report_interval=5
 random_value=true
-run_time=18000
-sample_interval=10
-threads=((count=20,read=1,update=1))
+run_time=300
+threads=((count=10,read=1),(count=10,update=1))
+warmup=30
diff --git a/bench/wtperf/runners/overflow-130k-short.wtperf b/bench/wtperf/runners/overflow-130k-short.wtperf
deleted file mode 100644
index 83f67062bf8..00000000000
--- a/bench/wtperf/runners/overflow-130k-short.wtperf
+++ /dev/null
@@ -1,19 +0,0 @@
-# wtperf options file: simulate riak and a short form of its voxer config.
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
-#
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
-compact=true
-compression="snappy"
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB"
-icount=15000
-key_sz=40
-value_sz=130000
-max_latency=2000
-populate_threads=1
-report_interval=5
-random_value=true
-run_time=300
-threads=((count=10,read=1),(count=10,update=1))
diff --git a/bench/wtperf/runners/overflow-130k.wtperf b/bench/wtperf/runners/overflow-130k.wtperf
index a3439f0c575..2be01afd08a 100644
--- a/bench/wtperf/runners/overflow-130k.wtperf
+++ b/bench/wtperf/runners/overflow-130k.wtperf
@@ -1,20 +1,18 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
-# The configuration for the connection and table are from riak and the
-# specification of the data (count, size, threads) is from basho_bench.
 #
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
-conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,lsm_manager=(worker_thread_max=6)"
+# Run with very large overflow items and btree.
+#
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,statistics=(fast),statistics_log=(wait=10)"
 compact=true
 compression="snappy"
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB),type=lsm,leaf_page_max=16K,os_cache_dirty_max=16MB"
+table_config="internal_page_max=128K,type=file,leaf_page_max=16K,os_cache_dirty_max=16MB,leaf_value_max=32K"
 icount=15000
 key_sz=40
 value_sz=130000
 max_latency=2000
 populate_threads=1
-report_interval=10
+report_interval=5
 random_value=true
-run_time=18000
-sample_interval=10
-threads=((count=20,read=1,update=1))
+run_time=300
+threads=((count=10,read=1),(count=10,update=1))
+warmup=30
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 9d35f6fa640..58271106d61 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -36,7 +36,6 @@ static const CONFIG default_cfg = {
 	NULL,				/* reopen config */
 	NULL,				/* base_uri */
 	NULL,				/* uris */
-	NULL,				/* helium_mount */
 	NULL,				/* conn */
 	NULL,				/* logf */
 	NULL,				/* async */
@@ -73,14 +72,14 @@ static const char * const debug_cconfig = "";
 static const char * const debug_tconfig = "";
 
 static void	*checkpoint_worker(void *);
-static int	 create_tables(CONFIG *);
-static int	drop_all_tables(CONFIG *);
+static int	 drop_all_tables(CONFIG *);
 static int	 execute_populate(CONFIG *);
 static int	 execute_workload(CONFIG *);
 static int	 find_table_count(CONFIG *);
 static void	*monitor(void *);
 static void	*populate_thread(void *);
 static void	 randomize_value(CONFIG_THREAD *, char *);
+static void	 recreate_dir(const char *);
 static int	 start_all_runs(CONFIG *);
 static int	 start_run(CONFIG *);
 static int	 start_threads(CONFIG *,
@@ -93,10 +92,6 @@ static void	*worker(void *);
 static uint64_t	 wtperf_rand(CONFIG_THREAD *);
 static uint64_t	 wtperf_value_range(CONFIG *);
 
-#define	HELIUM_NAME	"dev1"
-#define	HELIUM_PATH							\
-	"../../ext/test/helium/.libs/libwiredtiger_helium.so"
-#define	HELIUM_CONFIG	",type=helium"
 #define	INDEX_COL_NAMES	",columns=(key,val)"
 
 /* Retrieve an ID for the next insert operation. */
@@ -155,6 +150,23 @@ randomize_value(CONFIG_THREAD *thread, char *value_buf)
 }
 
 /*
+ * Partition data by key ranges.
+ */
+static uint32_t
+map_key_to_table(CONFIG *cfg, uint64_t k)
+{
+	if (cfg->range_partition) {
+		/* Take care to return a result in [0..table_count-1]. */
+		if (k > cfg->icount + cfg->random_range)
+			return (0);
+		return ((uint32_t)((k - 1) /
+		    ((cfg->icount + cfg->random_range + cfg->table_count - 1) /
+		    cfg->table_count)));
+	} else
+		return ((uint32_t)(k % cfg->table_count));
+}
+
+/*
  * Figure out and extend the size of the value string, used for growing
  * updates. We know that the value to be updated is in the threads value
  * scratch buffer.
@@ -393,7 +405,7 @@ worker_async(void *arg)
 		 * Then retry to get an async op.
 		 */
 		while ((ret = conn->async_new_op(
-		    conn, cfg->uris[next_val % cfg->table_count],
+		    conn, cfg->uris[map_key_to_table(cfg, next_val)],
 		    NULL, &cb, &asyncop)) == EBUSY)
 			(void)usleep(10000);
 		if (ret != 0)
@@ -466,7 +478,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
 	range_key_buf = &buf[0];
 
 	/* Save where the first key is for comparisons. */
-	cursor->get_key(cursor, &range_key_buf);
+	testutil_check(cursor->get_key(cursor, &range_key_buf));
 	extract_key(range_key_buf, &next_val);
 
 	for (range = 0; range < cfg->read_range; ++range) {
@@ -477,7 +489,7 @@ do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
 			break;
 
 		/* Retrieve and decode the key */
-		cursor->get_key(cursor, &range_key_buf);
+		testutil_check(cursor->get_key(cursor, &range_key_buf));
 		extract_key(range_key_buf, &next_val);
 		if (next_val < prev_val) {
 			lprintf(cfg, EINVAL, 0,
@@ -547,9 +559,8 @@ worker(void *arg)
 		}
 	}
 	/* Setup the timer for throttling. */
-	if (thread->workload->throttle != 0 &&
-	    (ret = setup_throttle(thread)) != 0)
-		goto err;
+	if (thread->workload->throttle != 0)
+		setup_throttle(thread);
 
 	/* Setup for truncate */
 	if (thread->workload->truncate != 0)
@@ -611,7 +622,7 @@ worker(void *arg)
 		/*
 		 * Spread the data out around the multiple databases.
 		 */
-		cursor = cursors[next_val % cfg->table_count];
+		cursor = cursors[map_key_to_table(cfg, next_val)];
 
 		/*
 		 * Skip the first time we do an operation, when trk->ops
@@ -1010,7 +1021,7 @@ populate_thread(void *arg)
 		/*
 		 * Figure out which table this op belongs to.
 		 */
-		cursor = cursors[op % cfg->table_count];
+		cursor = cursors[map_key_to_table(cfg, op)];
 		generate_key(cfg, key_buf, op);
 		measure_latency =
 		    cfg->sample_interval != 0 &&
@@ -1148,7 +1159,7 @@ populate_async(void *arg)
 		 * Allocate an async op for whichever table.
 		 */
 		while ((ret = conn->async_new_op(
-		    conn, cfg->uris[op % cfg->table_count],
+		    conn, cfg->uris[map_key_to_table(cfg, op)],
 		    NULL, &cb, &asyncop)) == EBUSY)
 			(void)usleep(10000);
 		if (ret != 0)
@@ -1858,7 +1869,7 @@ create_uris(CONFIG *cfg)
 	base_uri_len = strlen(cfg->base_uri);
 	cfg->uris = dcalloc(cfg->table_count, sizeof(char *));
 	for (i = 0; i < cfg->table_count; i++) {
-		uri = cfg->uris[i] = dcalloc(base_uri_len + 5, 1);
+		uri = cfg->uris[i] = dcalloc(base_uri_len + 6, 1);
 		/*
 		 * If there is only one table, just use base name.
 		 */
@@ -1877,9 +1888,6 @@ create_tables(CONFIG *cfg)
 	int ret;
 	char buf[512];
 
-	if (cfg->create == 0)
-		return (0);
-
 	if ((ret = cfg->conn->open_session(
 	    cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
 		lprintf(cfg, ret, 0,
@@ -1971,13 +1979,10 @@ start_all_runs(CONFIG *cfg)
 		if (strcmp(cfg->monitor_dir, cfg->home) == 0)
 			next_cfg->monitor_dir = new_home;
 
-		/* Create clean home directories. */
-		snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s",
-		    next_cfg->home, next_cfg->home);
-		if ((ret = system(cmd_buf)) != 0) {
-			fprintf(stderr, "%s: failed\n", cmd_buf);
-			goto err;
-		}
+		/* If creating the sub-database, recreate it's home */
+		if (cfg->create != 0)
+			recreate_dir(next_cfg->home);
+
 		if ((ret = pthread_create(
 		    &threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) {
 			lprintf(cfg, ret, 0, "Error creating thread");
@@ -2024,8 +2029,8 @@ start_run(CONFIG *cfg)
 {
 	pthread_t monitor_thread;
 	uint64_t total_ops;
+	uint32_t run_time;
 	int monitor_created, ret, t_ret;
-	char helium_buf[256];
 
 	monitor_created = ret = 0;
 					/* [-Wconditional-uninitialized] */
@@ -2040,21 +2045,10 @@ start_run(CONFIG *cfg)
 		goto err;
 	}
 
-	/* Configure optional Helium volume. */
-	if (cfg->helium_mount != NULL) {
-		snprintf(helium_buf, sizeof(helium_buf),
-		    "entry=wiredtiger_extension_init,config=["
-		    "%s=[helium_devices=\"he://./%s\","
-		    "helium_o_volume_truncate=1]]",
-		    HELIUM_NAME, cfg->helium_mount);
-		if ((ret = cfg->conn->load_extension(
-		    cfg->conn, HELIUM_PATH, helium_buf)) != 0)
-			lprintf(cfg,
-			    ret, 0, "Error loading Helium: %s", helium_buf);
-	}
-
 	create_uris(cfg);
-	if ((ret = create_tables(cfg)) != 0)
+
+	/* If creating, create the tables. */
+	if (cfg->create != 0 && (ret = create_tables(cfg)) != 0)
 		goto err;
 
 	/* Start the monitor thread. */
@@ -2083,7 +2077,8 @@ start_run(CONFIG *cfg)
 			goto err;
 
 		/* Didn't create, set insert count. */
-		if (cfg->create == 0 && find_table_count(cfg) != 0)
+		if (cfg->create == 0 && cfg->random_range == 0 &&
+		    find_table_count(cfg) != 0)
 			goto err;
 		/* Start the checkpoint thread. */
 		if (cfg->checkpoint_threads != 0) {
@@ -2108,26 +2103,27 @@ start_run(CONFIG *cfg)
 		cfg->ckpt_ops = sum_ckpt_ops(cfg);
 		total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops;
 
+		run_time = cfg->run_time == 0 ? 1 : cfg->run_time;
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " read operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
 		    cfg->read_ops, (cfg->read_ops * 100) / total_ops,
-		    cfg->read_ops / cfg->run_time);
+		    cfg->read_ops / run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " insert operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
 		    cfg->insert_ops, (cfg->insert_ops * 100) / total_ops,
-		    cfg->insert_ops / cfg->run_time);
+		    cfg->insert_ops / run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " truncate operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
 		    cfg->truncate_ops, (cfg->truncate_ops * 100) / total_ops,
-		    cfg->truncate_ops / cfg->run_time);
+		    cfg->truncate_ops / run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " update operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
 		    cfg->update_ops, (cfg->update_ops * 100) / total_ops,
-		    cfg->update_ops / cfg->run_time);
+		    cfg->update_ops / run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " checkpoint operations",
 		    cfg->ckpt_ops);
@@ -2182,18 +2178,21 @@ err:		if (ret == 0)
 
 extern int __wt_optind, __wt_optreset;
 extern char *__wt_optarg;
+void (*custom_die)(void) = NULL;
 
 int
 main(int argc, char *argv[])
 {
 	CONFIG *cfg, _cfg;
 	size_t req_len, sreq_len;
-	int ch, monitor_set, ret;
-	const char *opts = "C:H:h:m:O:o:T:";
+	bool monitor_set;
+	int ch, ret;
+	const char *opts = "C:h:m:O:o:T:";
 	const char *config_opts;
 	char *cc_buf, *sess_cfg, *tc_buf, *user_cconfig, *user_tconfig;
 
-	monitor_set = ret = 0;
+	monitor_set = false;
+	ret = 0;
 	config_opts = NULL;
 	cc_buf = sess_cfg = tc_buf = user_cconfig = user_tconfig = NULL;
 
@@ -2219,8 +2218,12 @@ main(int argc, char *argv[])
 				strcat(user_cconfig, __wt_optarg);
 			}
 			break;
-		case 'H':
-			cfg->helium_mount = __wt_optarg;
+		case 'h':
+			cfg->home = __wt_optarg;
+			break;
+		case 'm':
+			cfg->monitor_dir = __wt_optarg;
+			monitor_set = true;
 			break;
 		case 'O':
 			config_opts = __wt_optarg;
@@ -2236,15 +2239,7 @@ main(int argc, char *argv[])
 				strcat(user_tconfig, __wt_optarg);
 			}
 			break;
-		case 'h':
-			cfg->home = __wt_optarg;
-			break;
-		case 'm':
-			cfg->monitor_dir = __wt_optarg;
-			monitor_set = 1;
-			break;
 		case '?':
-			fprintf(stderr, "Invalid option\n");
 			usage();
 			goto einval;
 		}
@@ -2300,7 +2295,7 @@ main(int argc, char *argv[])
 		 * to 4096 if needed.
 		 */
 		req_len = strlen(",async=(enabled=true,threads=)") + 4;
-		cfg->async_config = dcalloc(req_len, 1);
+		cfg->async_config = dmalloc(req_len);
 		snprintf(cfg->async_config, req_len,
 		    ",async=(enabled=true,threads=%" PRIu32 ")",
 		    cfg->async_threads);
@@ -2321,13 +2316,9 @@ main(int argc, char *argv[])
 	}
 
 	/* Build the URI from the table name. */
-	req_len = strlen("table:") +
-	    strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
-	cfg->base_uri = dcalloc(req_len, 1);
-	snprintf(cfg->base_uri, req_len, "table:%s%s%s",
-	    cfg->helium_mount == NULL ? "" : HELIUM_NAME,
-	    cfg->helium_mount == NULL ? "" : "/",
-	    cfg->table_name);
+	req_len = strlen("table:") + strlen(cfg->table_name) + 2;
+	cfg->base_uri = dmalloc(req_len);
+	snprintf(cfg->base_uri, req_len, "table:%s", cfg->table_name);
 
 	/* Make stdout line buffered, so verbose output appears quickly. */
 	__wt_stream_set_line_buffer(stdout);
@@ -2346,13 +2337,13 @@ main(int argc, char *argv[])
 		if (cfg->session_count_idle > 0) {
 			sreq_len = strlen(",session_max=") + 6;
 			req_len += sreq_len;
-			sess_cfg = dcalloc(sreq_len, 1);
+			sess_cfg = dmalloc(sreq_len);
 			snprintf(sess_cfg, sreq_len,
 			    ",session_max=%" PRIu32,
 			    cfg->session_count_idle + cfg->workers_cnt +
 			    cfg->populate_threads + 10);
 		}
-		cc_buf = dcalloc(req_len, 1);
+		cc_buf = dmalloc(req_len);
 		/*
 		 * This is getting hard to parse.
 		 */
@@ -2368,36 +2359,34 @@ main(int argc, char *argv[])
 		if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
 			goto err;
 	}
-	if (cfg->verbose > 1 || cfg->index || cfg->helium_mount != NULL ||
+	if (cfg->verbose > 1 || cfg->index ||
 	    user_tconfig != NULL || cfg->compress_table != NULL) {
-		req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) +
-		    strlen(debug_tconfig) + 3;
+		req_len = strlen(cfg->table_config) + strlen(debug_tconfig) + 3;
 		if (user_tconfig != NULL)
 			req_len += strlen(user_tconfig);
 		if (cfg->compress_table != NULL)
 			req_len += strlen(cfg->compress_table);
 		if (cfg->index)
 			req_len += strlen(INDEX_COL_NAMES);
-		tc_buf = dcalloc(req_len, 1);
+		tc_buf = dmalloc(req_len);
 		/*
 		 * This is getting hard to parse.
 		 */
-		snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s%s",
+		snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s",
 		    cfg->table_config,
 		    cfg->index ? INDEX_COL_NAMES : "",
 		    cfg->compress_table ? cfg->compress_table : "",
 		    cfg->verbose > 1 ? ",": "",
 		    cfg->verbose > 1 ? debug_tconfig : "",
 		    user_tconfig ? ",": "",
-		    user_tconfig ? user_tconfig : "",
-		    cfg->helium_mount == NULL ? "" : HELIUM_CONFIG);
+		    user_tconfig ? user_tconfig : "");
 		if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
 			goto err;
 	}
 	if (cfg->log_partial && cfg->table_count > 1) {
 		req_len = strlen(cfg->table_config) +
 		    strlen(LOG_PARTIAL_CONFIG) + 1;
-		cfg->partial_config = dcalloc(req_len, 1);
+		cfg->partial_config = dmalloc(req_len);
 		snprintf(cfg->partial_config, req_len, "%s%s",
 		    cfg->table_config, LOG_PARTIAL_CONFIG);
 	}
@@ -2410,7 +2399,7 @@ main(int argc, char *argv[])
 		    strlen(READONLY_CONFIG) + 1;
 	else
 		req_len = strlen(cfg->conn_config) + 1;
-	cfg->reopen_config = dcalloc(req_len, 1);
+	cfg->reopen_config = dmalloc(req_len);
 	if (cfg->readonly)
 		snprintf(cfg->reopen_config, req_len, "%s%s",
 		    cfg->conn_config, READONLY_CONFIG);
@@ -2422,6 +2411,10 @@ main(int argc, char *argv[])
 	if ((ret = config_sanity(cfg)) != 0)
 		goto err;
 
+	/* If creating, remove and re-create the home directory. */
+	if (cfg->create != 0)
+		recreate_dir(cfg->home);
+
 	/* Write a copy of the config. */
 	config_to_file(cfg);
 
@@ -2536,6 +2529,19 @@ stop_threads(CONFIG *cfg, u_int num, CONFIG_THREAD *threads)
 	return (0);
 }
 
+static void
+recreate_dir(const char *name)
+{
+	char *buf;
+	size_t len;
+
+	len = strlen(name) * 2 + 100;
+	buf = dmalloc(len);
+	(void)snprintf(buf, len, "rm -rf %s && mkdir %s", name, name);
+	testutil_checkfmt(system(buf), "system: %s", buf);
+	free(buf);
+}
+
 static int
 drop_all_tables(CONFIG *cfg)
 {
@@ -2615,7 +2621,7 @@ wtperf_rand(CONFIG_THREAD *thread)
 		 * first item in the table being "hot".
 		 */
 		if (rval > wtperf_value_range(cfg))
-			rval = wtperf_value_range(cfg);
+			rval = 0;
 	}
 	/*
 	 * Wrap the key to within the expected range and avoid zero: we never
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index d874fa4eefe..27c3832d316 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -29,14 +29,11 @@
 #ifndef	HAVE_WTPERF_H
 #define	HAVE_WTPERF_H
 
-#include <wt_internal.h>
+#include "test_util.h"
+
 #include <assert.h>
 #include <math.h>
 
-#ifdef _WIN32
-#include "windows_shim.h"
-#endif
-
 #include "config_opt.h"
 
 typedef struct __config CONFIG;
@@ -83,7 +80,6 @@ typedef struct {
 typedef struct {
 	uint64_t stone_gap;
 	uint64_t needed_stones;
-	uint64_t final_stone_gap;
 	uint64_t expected_total;
 	uint64_t total_inserts;
 	uint64_t last_total_inserts;
@@ -126,7 +122,6 @@ struct __config {			/* Configuration structure */
 	char *reopen_config;		/* Config string for conn reopen */
 	char *base_uri;			/* Object URI */
 	char **uris;			/* URIs if multiple tables */
-	const char *helium_mount;	/* Optional Helium mount point */
 
 	WT_CONNECTION *conn;		/* Database connection */
 
@@ -281,7 +276,7 @@ void	 latency_print(CONFIG *);
 int	 run_truncate(
     CONFIG *, CONFIG_THREAD *, WT_CURSOR *, WT_SESSION *, int *);
 int	 setup_log_file(CONFIG *);
-int	 setup_throttle(CONFIG_THREAD*);
+void	 setup_throttle(CONFIG_THREAD*);
 int	 setup_truncate(CONFIG *, CONFIG_THREAD *, WT_SESSION *);
 int	 start_idle_table_cycle(CONFIG *, pthread_t *);
 int	 stop_idle_table_cycle(CONFIG *, pthread_t);
@@ -292,7 +287,7 @@ uint64_t sum_read_ops(CONFIG *);
 uint64_t sum_truncate_ops(CONFIG *);
 uint64_t sum_update_ops(CONFIG *);
 void	 usage(void);
-int	 worker_throttle(CONFIG_THREAD*);
+void	 worker_throttle(CONFIG_THREAD*);
 
 void	 lprintf(const CONFIG *, int err, uint32_t, const char *, ...)
 #if defined(__GNUC__)
@@ -328,75 +323,4 @@ die(int e, const char *str)
 	fprintf(stderr, "Call to %s failed: %s", str, wiredtiger_strerror(e));
 	exit(EXIT_FAILURE);
 }
-
-/*
- * dmalloc --
- *      Call malloc, dying on failure.
- */
-static inline void *
-dmalloc(size_t len)
-{
-	void *p;
-
-	if ((p = malloc(len)) == NULL)
-		die(errno, "malloc");
-	return (p);
-}
-
-/*
- * dcalloc --
- *      Call calloc, dying on failure.
- */
-static inline void *
-dcalloc(size_t num, size_t size)
-{
-	void *p;
-
-	if ((p = calloc(num, size)) == NULL)
-		die(errno, "calloc");
-	return (p);
-}
-
-/*
- * drealloc --
- *      Call realloc, dying on failure.
- */
-static inline void *
-drealloc(void *p, size_t len)
-{
-	void *repl;
-
-	if ((repl = realloc(p, len)) == NULL)
-		die(errno, "realloc");
-	return (repl);
-}
-
-/*
- * dstrdup --
- *      Call strdup, dying on failure.
- */
-static inline char *
-dstrdup(const char *str)
-{
-	char *p;
-
-	if ((p = strdup(str)) == NULL)
-		die(errno, "strdup");
-	return (p);
-}
-
-/*
- * dstrndup --
- *      Call emulating strndup, dying on failure. Don't use actual strndup here
- *	as it is not supported within MSVC.
- */
-static inline char *
-dstrndup(const char *str, const size_t len)
-{
-	char *p;
-
-	p = dcalloc(len + 1, sizeof(char));
-	memcpy(p, str, len);
-	return (p);
-}
 #endif
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 2afd20f777f..f6c96febc85 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -144,6 +144,7 @@ DEF_OPT_AS_UINT32(random_range, 0,
     "if non zero choose a value from within this range as the key for "
     "insert operations")
 DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value")
+DEF_OPT_AS_BOOL(range_partition, 0, "partition data by range (vs hash)")
 DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search")
 DEF_OPT_AS_BOOL(readonly, 0,
     "reopen the connection between populate and workload phases in readonly "
diff --git a/bench/wtperf/wtperf_throttle.c b/bench/wtperf/wtperf_throttle.c
index a98fd9b18d7..e49bca00d07 100644
--- a/bench/wtperf/wtperf_throttle.c
+++ b/bench/wtperf/wtperf_throttle.c
@@ -31,7 +31,7 @@
 /*
  * Put the initial config together for running a throttled workload.
  */
-int
+void
 setup_throttle(CONFIG_THREAD *thread)
 {
 	THROTTLE_CONFIG *throttle_cfg;
@@ -70,15 +70,14 @@ setup_throttle(CONFIG_THREAD *thread)
 	throttle_cfg->ops_count = throttle_cfg->ops_per_increment;
 
 	/* Set the first timestamp of when we incremented */
-	WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment));
-	return (0);
+	testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment));
 }
 
 /*
  * Run the throttle function.  We will sleep if needed and then reload the
  * counter to perform more operations.
  */
-int
+void
 worker_throttle(CONFIG_THREAD *thread)
 {
 	THROTTLE_CONFIG *throttle_cfg;
@@ -87,7 +86,7 @@ worker_throttle(CONFIG_THREAD *thread)
 
 	throttle_cfg = &thread->throttle_cfg;
 
-	WT_RET(__wt_epoch(NULL, &now));
+	testutil_check(__wt_epoch(NULL, &now));
 
 	/*
 	 * If we did enough operations in the current interval, sleep for
@@ -102,7 +101,7 @@ worker_throttle(CONFIG_THREAD *thread)
 		/*
 		 * After sleeping, set the interval to the current time.
 		 */
-		WT_RET(__wt_epoch(NULL, &throttle_cfg->last_increment));
+		testutil_check(__wt_epoch(NULL, &throttle_cfg->last_increment));
 	} else {
 		throttle_cfg->ops_count = (usecs_delta *
 		    throttle_cfg->ops_per_increment) /
@@ -115,6 +114,4 @@ worker_throttle(CONFIG_THREAD *thread)
 	 */
 	throttle_cfg->ops_count =
 	    WT_MIN(throttle_cfg->ops_count, thread->workload->throttle);
-
-	return (0);
 }
diff --git a/build_posix/Make.subdirs b/build_posix/Make.subdirs
index 64749378ed1..0b5175e4196 100644
--- a/build_posix/Make.subdirs
+++ b/build_posix/Make.subdirs
@@ -18,14 +18,15 @@ ext/extractors/csv
 ext/test/kvs_bdb HAVE_BERKELEY_DB
 .
 api/leveldb LEVELDB
-bench/wtperf
 examples/c
 lang/java JAVA
 examples/java JAVA
 lang/python PYTHON
 
-# Make the tests
+# Test/Benchmark support library.
 test/utility
+
+# Test programs.
 test/bloom
 test/checkpoint
 test/csuite
@@ -39,3 +40,6 @@ test/readonly
 test/recovery
 test/salvage
 test/thread
+
+# Benchmark programs.
+bench/wtperf
diff --git a/build_posix/aclocal/ax_pkg_swig.m4 b/build_posix/aclocal/ax_pkg_swig.m4
index 9ebdeb531b9..89941bc3fa9 100644
--- a/build_posix/aclocal/ax_pkg_swig.m4
+++ b/build_posix/aclocal/ax_pkg_swig.m4
@@ -32,9 +32,9 @@
 # LICENSE
 #
 #   Copyright (c) 2008 Sebastian Huber <sebastian-huber@web.de>
-#   Copyright (c) 2008 Alan W. Irwin <irwin@beluga.phys.uvic.ca>
+#   Copyright (c) 2008 Alan W. Irwin
 #   Copyright (c) 2008 Rafael Laboissiere <rafael@laboissiere.net>
-#   Copyright (c) 2008 Andrew Collier <colliera@ukzn.ac.za>
+#   Copyright (c) 2008 Andrew Collier
 #   Copyright (c) 2011 Murray Cumming <murrayc@openismus.com>
 #
 #   This program is free software; you can redistribute it and/or modify it
@@ -63,11 +63,11 @@
 #   modified version of the Autoconf Macro, you may extend this special
 #   exception to the GPL to apply to your modified version as well.
 
-#serial 8
+#serial 11
 
 AC_DEFUN([AX_PKG_SWIG],[
-        # Some systems have SWIG 2.0 named "swig2.0"
-        AC_PATH_PROGS([SWIG],[swig2.0 swig])
+        # Ubuntu has swig 2.0 as /usr/bin/swig2.0
+        AC_PATH_PROGS([SWIG],[swig swig3.0 swig2.0])
         if test -z "$SWIG" ; then
                 m4_ifval([$3],[$3],[:])
         elif test -n "$1" ; then
diff --git a/dist/api_data.py b/dist/api_data.py
index 90b1c8378a2..1302247e88e 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -247,8 +247,8 @@ file_config = format_meta + [
     Config('memory_page_max', '5MB', r'''
         the maximum size a page can grow to in memory before being
         reconciled to disk.  The specified size will be adjusted to a lower
-        bound of <code>50 * leaf_page_max</code>, and an upper bound of
-        <code>cache_size / 2</code>.  This limit is soft - it is possible
+        bound of <code>leaf_page_max</code>, and an upper bound of
+        <code>cache_size / 10</code>.  This limit is soft - it is possible
         for pages to be temporarily larger than this value.  This setting
         is ignored for LSM trees, see \c chunk_size''',
         min='512B', max='10TB'),
@@ -373,8 +373,6 @@ connection_runtime_config = [
         periodically checkpoint the database. Enabling the checkpoint server
         uses a session from the configured session_max''',
         type='category', subconfig=[
-        Config('name', '"WiredTigerCheckpoint"', r'''
-            the checkpoint name'''),
         Config('log_size', '0', r'''
             wait for this amount of log record bytes to be written to
                 the log between each checkpoint.  A database can configure
@@ -388,16 +386,31 @@ connection_runtime_config = [
         ]),
     Config('error_prefix', '', r'''
         prefix string for error messages'''),
-    Config('eviction_dirty_target', '80', r'''
+    Config('eviction', '', r'''
+        eviction configuration options.''',
+        type='category', subconfig=[
+            Config('threads_max', '1', r'''
+                maximum number of threads WiredTiger will start to help evict
+                pages from cache. The number of threads started will vary
+                depending on the current eviction load. Each eviction worker
+                thread uses a session from the configured session_max''',
+                min=1, max=20),
+            Config('threads_min', '1', r'''
+                minimum number of threads WiredTiger will start to help evict
+                pages from cache. The number of threads currently running will
+                vary depending on the current eviction load''',
+                min=1, max=20),
+            ]),
+    Config('eviction_dirty_target', '5', r'''
         continue evicting until the cache has less dirty memory than the
         value, as a percentage of the total cache size. Dirty pages will
         only be evicted if the cache is full enough to trigger eviction''',
-        min=5, max=99),
-    Config('eviction_dirty_trigger', '95', r'''
+        min=1, max=99),
+    Config('eviction_dirty_trigger', '20', r'''
         trigger eviction when the cache is using this much memory for dirty
         content, as a percentage of the total cache size. This setting only
         alters behavior if it is lower than eviction_trigger''',
-        min=5, max=99),
+        min=1, max=99),
     Config('eviction_target', '80', r'''
         continue evicting until the cache has less total memory than the
         value, as a percentage of the total cache size. Must be less than
@@ -420,40 +433,6 @@ connection_runtime_config = [
             interval in seconds at which to check for files that are
             inactive and close them''', min=1, max=100000),
         ]),
-    Config('log', '', r'''
-        enable logging. Enabling logging uses three sessions from the
-        configured session_max''',
-        type='category', subconfig=[
-        Config('archive', 'true', r'''
-            automatically archive unneeded log files''',
-            type='boolean'),
-        Config('compressor', 'none', r'''
-            configure a compressor for log records.  Permitted values are
-            \c "none" or custom compression engine name created with
-            WT_CONNECTION::add_compressor.  If WiredTiger has builtin support
-            for \c "snappy", \c "lz4" or \c "zlib" compression, these names
-            are also available. See @ref compression for more information'''),
-        Config('enabled', 'false', r'''
-            enable logging subsystem''',
-            type='boolean'),
-        Config('file_max', '100MB', r'''
-            the maximum size of log files''',
-            min='100KB', max='2GB'),
-        Config('path', '"."', r'''
-            the path to a directory into which the log files are written.
-            If the value is not an absolute path name, the files are created
-            relative to the database home'''),
-        Config('prealloc', 'true', r'''
-            pre-allocate log files.''',
-            type='boolean'),
-        Config('recover', 'on', r'''
-            run recovery or error if recovery needs to run after an
-            unclean shutdown.''',
-            choices=['error','on']),
-        Config('zero_fill', 'false', r'''
-            manually write zeroes into log files''',
-            type='boolean'),
-        ]),
     Config('lsm_manager', '', r'''
         configure database wide options for LSM tree management. The LSM
         manager is started automatically the first time an LSM tree is opened.
@@ -472,21 +451,6 @@ connection_runtime_config = [
     Config('lsm_merge', 'true', r'''
         merge LSM chunks where possible (deprecated)''',
         type='boolean', undoc=True),
-    Config('eviction', '', r'''
-        eviction configuration options.''',
-        type='category', subconfig=[
-            Config('threads_max', '1', r'''
-                maximum number of threads WiredTiger will start to help evict
-                pages from cache. The number of threads started will vary
-                depending on the current eviction load. Each eviction worker
-                thread uses a session from the configured session_max''',
-                min=1, max=20),
-            Config('threads_min', '1', r'''
-                minimum number of threads WiredTiger will start to help evict
-                pages from cache. The number of threads currently running will
-                vary depending on the current eviction load''',
-                min=1, max=20),
-            ]),
     Config('shared_cache', '', r'''
         shared cache configuration options. A database should configure
         either a cache_size or a shared_cache not both. Enabling a
@@ -525,38 +489,6 @@ connection_runtime_config = [
         are logged using the \c statistics_log configuration.  See
         @ref statistics for more information''',
         type='list', choices=['all', 'fast', 'none', 'clear']),
-    Config('statistics_log', '', r'''
-        log any statistics the database is configured to maintain,
-        to a file.  See @ref statistics for more information. Enabling
-        the statistics log server uses a session from the configured
-        session_max''',
-        type='category', subconfig=[
-        Config('json', 'false', r'''
-            encode statistics in JSON format''',
-            type='boolean'),
-        Config('on_close', 'false', r'''log statistics on database close''',
-            type='boolean'),
-        Config('path', '"WiredTigerStat.%d.%H"', r'''
-            the pathname to a file into which the log records are written,
-            may contain ISO C standard strftime conversion specifications.
-            If the value is not an absolute path name, the file is created
-            relative to the database home'''),
-        Config('sources', '', r'''
-            if non-empty, include statistics for the list of data source
-            URIs, if they are open at the time of the statistics logging.
-            The list may include URIs matching a single data source
-            ("table:mytable"), or a URI matching all data sources of a
-            particular type ("table:")''',
-            type='list'),
-        Config('timestamp', '"%b %d %H:%M:%S"', r'''
-            a timestamp prepended to each log record, may contain strftime
-            conversion specifications, when \c json is configured, defaults
-            to \c "%FT%Y.000Z"'''),
-        Config('wait', '0', r'''
-            seconds to wait between each write of the log records; setting
-            this value above 0 configures statistics logging''',
-            min='0', max='100000'),
-        ]),
     Config('verbose', '', r'''
         enable messages for various events. Only available if WiredTiger
         is configured with --enable-verbose. Options are given as a
@@ -590,13 +522,113 @@ connection_runtime_config = [
             'write']),
 ]
 
+# wiredtiger_open and WT_CONNECTION.reconfigure log configurations.
+log_configuration_common = [
+    Config('archive', 'true', r'''
+        automatically archive unneeded log files''',
+        type='boolean'),
+    Config('prealloc', 'true', r'''
+        pre-allocate log files.''',
+        type='boolean'),
+    Config('zero_fill', 'false', r'''
+        manually write zeroes into log files''',
+        type='boolean')
+]
+connection_reconfigure_log_configuration = [
+    Config('log', '', r'''
+        enable logging. Enabling logging uses three sessions from the
+        configured session_max''',
+        type='category', subconfig=
+        log_configuration_common)
+]
+wiredtiger_open_log_configuration = [
+    Config('log', '', r'''
+        enable logging. Enabling logging uses three sessions from the
+        configured session_max''',
+        type='category', subconfig=
+        log_configuration_common + [
+        Config('enabled', 'false', r'''
+            enable logging subsystem''',
+            type='boolean'),
+        Config('compressor', 'none', r'''
+            configure a compressor for log records.  Permitted values are
+            \c "none" or custom compression engine name created with
+            WT_CONNECTION::add_compressor.  If WiredTiger has builtin support
+            for \c "snappy", \c "lz4" or \c "zlib" compression, these names
+            are also available. See @ref compression for more information'''),
+        Config('file_max', '100MB', r'''
+            the maximum size of log files''',
+            min='100KB', max='2GB'),
+            Config('path', '"."', r'''
+                the name of a directory into which log files are written. The
+                directory must already exist. If the value is not an absolute
+                path, the path is relative to the database home (see @ref
+                absolute_path for more information)'''),
+        Config('recover', 'on', r'''
+            run recovery or error if recovery needs to run after an
+            unclean shutdown''',
+            choices=['error','on'])
+    ]),
+]
+
+# wiredtiger_open and WT_CONNECTION.reconfigure statistics log configurations.
+statistics_log_configuration_common = [
+    Config('json', 'false', r'''
+        encode statistics in JSON format''',
+        type='boolean'),
+    Config('on_close', 'false', r'''log statistics on database close''',
+        type='boolean'),
+    Config('sources', '', r'''
+        if non-empty, include statistics for the list of data source
+        URIs, if they are open at the time of the statistics logging.
+        The list may include URIs matching a single data source
+        ("table:mytable"), or a URI matching all data sources of a
+        particular type ("table:")''',
+        type='list'),
+    Config('timestamp', '"%b %d %H:%M:%S"', r'''
+        a timestamp prepended to each log record, may contain strftime
+        conversion specifications, when \c json is configured, defaults
+        to \c "%FT%Y.000Z"'''),
+    Config('wait', '0', r'''
+        seconds to wait between each write of the log records; setting
+        this value above 0 configures statistics logging''',
+        min='0', max='100000'),
+]
+connection_reconfigure_statistics_log_configuration = [
+    Config('statistics_log', '', r'''
+        log any statistics the database is configured to maintain,
+        to a file.  See @ref statistics for more information. Enabling
+        the statistics log server uses a session from the configured
+        session_max''',
+        type='category', subconfig=
+        statistics_log_configuration_common)
+]
+wiredtiger_open_statistics_log_configuration = [
+    Config('statistics_log', '', r'''
+        log any statistics the database is configured to maintain,
+        to a file.  See @ref statistics for more information. Enabling
+        the statistics log server uses a session from the configured
+        session_max''',
+        type='category', subconfig=
+        statistics_log_configuration_common + [
+        Config('path', '"."', r'''
+            the name of a directory into which statistics files are written.
+            The directory must already exist. If the value is not an absolute
+            path, the path is relative to the database home (see @ref
+            absolute_path for more information)''')
+        ])
+]
+
 session_config = [
     Config('isolation', 'read-committed', r'''
         the default isolation level for operations in this session''',
         choices=['read-uncommitted', 'read-committed', 'snapshot']),
 ]
 
-wiredtiger_open_common = connection_runtime_config + [
+wiredtiger_open_common =\
+    connection_runtime_config +\
+    wiredtiger_open_log_configuration +\
+    wiredtiger_open_statistics_log_configuration + [
     Config('buffer_alignment', '-1', r'''
         in-memory alignment (in bytes) for buffers used for I/O.  The
         default value of -1 indicates a platform-specific alignment value
@@ -788,8 +820,9 @@ methods = {
 
 'WT_SESSION.drop' : Method([
     Config('checkpoint_wait', 'true', r'''
-        wait for the checkpoint lock, if \c checkpoint_wait=false, fail if
-        this lock is not available immediately''',
+        wait for the checkpoint lock, if \c checkpoint_wait=false, perform
+        the drop operation without taking a lock, returning EBUSY if the
+        operation conflicts with a running checkpoint''',
         type='boolean', undoc=True),
     Config('force', 'false', r'''
         return success if the object does not exist''',
@@ -870,6 +903,11 @@ methods = {
         "WiredTigerCheckpoint" opens the most recent internal
         checkpoint taken for the object).  The cursor does not
         support data modification'''),
+    Config('checkpoint_wait', 'true', r'''
+        wait for the checkpoint lock, if \c checkpoint_wait=false, open the
+        cursor without taking a lock, returning EBUSY if the operation
+        conflicts with a running checkpoint''',
+        type='boolean', undoc=True),
     Config('dump', '', r'''
         configure the cursor for dump format inputs and outputs: "hex"
         selects a simple hexadecimal format, "json" selects a JSON format
@@ -1084,7 +1122,11 @@ methods = {
         don't free memory during close''',
         type='boolean'),
 ]),
-'WT_CONNECTION.reconfigure' : Method(connection_runtime_config),
+'WT_CONNECTION.reconfigure' : Method(
+    connection_reconfigure_log_configuration +\
+    connection_reconfigure_statistics_log_configuration +\
+    connection_runtime_config
+),
 'WT_CONNECTION.set_file_system' : Method([]),
 
 'WT_CONNECTION.load_extension' : Method([
diff --git a/dist/flags.py b/dist/flags.py
index b5f36fb707a..8091283a8c0 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -37,10 +37,13 @@ flags = {
         'READ_WONT_NEED',
     ],
     'rec_write' : [
+        'CHECKPOINTING',
+        'EVICTING',
         'EVICT_IN_MEMORY',
+        'EVICT_INMEM_SPLIT',
         'EVICT_LOOKASIDE',
+        'EVICT_SCRUB',
         'EVICT_UPDATE_RESTORE',
-        'EVICTING',
         'VISIBILITY_ERR',
     ],
     'txn_log_checkpoint' : [
diff --git a/dist/s_all b/dist/s_all
index 46a68864906..33b8f6a76ba 100755
--- a/dist/s_all
+++ b/dist/s_all
@@ -15,6 +15,8 @@ echo 'dist/s_all run started...'
 
 force=
 reconf=0
+errmode=0
+errfound=0
 while :
 	do case "$1" in
 	-A)	# Reconfigure the library build.
@@ -23,6 +25,9 @@ while :
 	-f)	# Force versions to be updated
 		force="-f"
 		shift;;
+	-E)	# Return an error code on failure
+		errmode=1
+		shift;;
 	*)
 		break;;
 	esac
@@ -48,6 +53,14 @@ errchk()
 	echo "#######################"
 
 	rm -f $2
+
+	# Some tests shouldn't return an error, we exclude them here.
+	case "$1" in
+	*s_export*)
+		break;;
+	*)
+		errfound=1;;
+	esac
 }
 
 run()
@@ -108,3 +121,6 @@ for f in `find . -name ${t_pfx}\*`; do
 done
 
 echo 'dist/s_all run finished'
+if test $errmode -ne 0; then
+	exit $errfound;
+fi
diff --git a/dist/s_string.ok b/dist/s_string.ok
index 7966ff2cf2e..8c5f1e99bff 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -86,6 +86,7 @@ DbEnv
 Decrement
 Decrypt
 DeleteFileA
+EACCES
 EAGAIN
 EB
 EBUSY
@@ -117,6 +118,7 @@ FLv
 FNV
 FORALL
 FOREACH
+FS
 FULLFSYNC
 FindClose
 FindFirstFile
@@ -204,6 +206,7 @@ MERCHANTABILITY
 METADATA
 MONGODB
 MSVC
+MULTI
 MULTIBLOCK
 MUTEX
 Manos
@@ -326,6 +329,7 @@ UID
 UIDs
 UINT
 ULINE
+UNC
 URI
 URIs
 UTF
@@ -528,6 +532,7 @@ cust
 customp
 cv
 cxa
+dT
 data's
 database's
 datalen
@@ -557,6 +562,7 @@ dequeued
 der
 dereference
 desc
+designator
 dest
 destSize
 dev
@@ -932,6 +938,7 @@ prepend
 prepended
 prepending
 presize
+presync
 primary's
 printf
 printlog
@@ -1065,6 +1072,7 @@ tV
 tablename
 tcbench
 td
+tempdir
 testutil
 th
 tid
@@ -1091,6 +1099,7 @@ txn
 txnc
 txnid
 txnmin
+txt
 typedef
 uB
 uS
diff --git a/dist/s_style b/dist/s_style
index a222c004cc3..e33db5a5fab 100755
--- a/dist/s_style
+++ b/dist/s_style
@@ -33,7 +33,7 @@ else
 		exit 1;
 	fi
 
-	egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t
+	egrep -w 'a a|an an|and and|are are|be be|by by|for for|from from|if if|in in[^-]|is is|it it|of of|the the|this this|to to|was was|were were|when when|with with|a an|an a|a the|the a' $f > $t
 	test -s $t && {
 		echo "paired typo"
 		echo "============================"
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 694ffc86ee4..51cc487f04c 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -81,10 +81,10 @@ class SessionStat(Stat):
     prefix = 'session'
     def __init__(self, name, desc, flags=''):
         Stat.__init__(self, name, SessionStat.prefix, desc, flags)
-class ThreadState(Stat):
+class ThreadStat(Stat):
     prefix = 'thread-state'
     def __init__(self, name, desc, flags=''):
-        Stat.__init__(self, name, ThreadState.prefix, desc, flags)
+        Stat.__init__(self, name, ThreadStat.prefix, desc, flags)
 class TxnStat(Stat):
     prefix = 'transaction'
     def __init__(self, name, desc, flags=''):
@@ -105,7 +105,7 @@ groups['evict'] = [
     BlockStat.prefix,
     CacheStat.prefix,
     ConnStat.prefix,
-    ThreadState.prefix
+    ThreadStat.prefix
 ]
 groups['lsm'] = [LSMStat.prefix, TxnStat.prefix]
 groups['memory'] = [CacheStat.prefix, ConnStat.prefix, RecStat.prefix]
@@ -113,7 +113,7 @@ groups['system'] = [
     ConnStat.prefix,
     DhandleStat.prefix,
     SessionStat.prefix,
-    ThreadState.prefix
+    ThreadStat.prefix
 ]
 
 ##########################################
@@ -159,6 +159,7 @@ connection_stats = [
     BlockStat('block_byte_map_read', 'mapped bytes read', 'size'),
     BlockStat('block_byte_read', 'bytes read', 'size'),
     BlockStat('block_byte_write', 'bytes written', 'size'),
+    BlockStat('block_byte_write_checkpoint', 'bytes written for checkpoint', 'size'),
     BlockStat('block_map_read', 'mapped blocks read'),
     BlockStat('block_preload', 'blocks pre-loaded'),
     BlockStat('block_read', 'blocks read'),
@@ -168,11 +169,12 @@ connection_stats = [
     # Cache and eviction statistics
     ##########################################
     CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale,size'),
+    CacheStat('cache_bytes_image', 'bytes belonging to page images in the cache', 'no_clear,no_scale,size'),
     CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale,size'),
     CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
     CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale,size'),
     CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale,size'),
-    CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale,size'),
+    CacheStat('cache_bytes_other', 'bytes not belonging to page images in the cache', 'no_clear,no_scale,size'),
     CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
     CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
     CacheStat('cache_eviction_aggressive_set', 'eviction currently operating in aggressive mode', 'no_clear,no_scale'),
@@ -193,7 +195,8 @@ connection_stats = [
     CacheStat('cache_eviction_internal', 'internal pages evicted'),
     CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale,size'),
     CacheStat('cache_eviction_pages_queued', 'pages queued for eviction'),
-    CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction'),
+    CacheStat('cache_eviction_pages_queued_oldest', 'pages queued for urgent eviction during walk'),
+    CacheStat('cache_eviction_pages_queued_urgent', 'pages queued for urgent eviction'),
     CacheStat('cache_eviction_pages_seen', 'pages seen by eviction walk'),
     CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'),
     CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'),
@@ -215,12 +218,14 @@ connection_stats = [
     CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
     CacheStat('cache_lookaside_insert', 'lookaside table insert calls'),
     CacheStat('cache_lookaside_remove', 'lookaside table remove calls'),
+    CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'),
     CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
     CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'),
     CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
     CacheStat('cache_pages_requested', 'pages requested from the cache'),
     CacheStat('cache_read', 'pages read into cache'),
     CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
+    CacheStat('cache_read_overflow', 'overflow pages read into cache'),
     CacheStat('cache_write', 'pages written from cache'),
     CacheStat('cache_write_lookaside', 'page written requiring lookaside records'),
     CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
@@ -294,11 +299,11 @@ connection_stats = [
     TxnStat('txn_begin', 'transaction begins'),
     TxnStat('txn_checkpoint', 'transaction checkpoints'),
     TxnStat('txn_checkpoint_fsync_post', 'transaction fsync calls for checkpoint after allocating the transaction ID'),
-    TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)'),
-    TxnStat('txn_checkpoint_fsync_pre', 'transaction fsync calls for checkpoint before allocating the transaction ID'),
-    TxnStat('txn_checkpoint_fsync_pre_duration', 'transaction fsync duration for checkpoint before allocating the transaction ID (usecs)'),
+    TxnStat('txn_checkpoint_fsync_post_duration', 'transaction fsync duration for checkpoint after allocating the transaction ID (usecs)', 'no_clear,no_scale'),
     TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
     TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_scrub_target', 'transaction checkpoint scrub dirty target', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_scrub_time', 'transaction checkpoint scrub time (msecs)', 'no_clear,no_scale'),
     TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
     TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
     TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
@@ -332,6 +337,22 @@ connection_stats = [
     ##########################################
     SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
     SessionStat('session_open', 'open session count', 'no_clear,no_scale'),
+    SessionStat('session_table_compact_fail', 'table compact failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_compact_success', 'table compact successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_create_fail', 'table create failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_create_success', 'table create successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_drop_fail', 'table drop failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_drop_success', 'table drop successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_rebalance_fail', 'table rebalance failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_rebalance_success', 'table rebalance successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_rename_fail', 'table rename failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_rename_success', 'table rename successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_salvage_fail', 'table salvage failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_salvage_success', 'table salvage successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_truncate_fail', 'table truncate failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_truncate_success', 'table truncate successful calls', 'no_clear,no_scale'),
+    SessionStat('session_table_verify_fail', 'table verify failed calls', 'no_clear,no_scale'),
+    SessionStat('session_table_verify_success', 'table verify successful calls', 'no_clear,no_scale'),
 
     ##########################################
     # Total cursor operations
@@ -349,11 +370,11 @@ connection_stats = [
     CursorStat('cursor_update', 'cursor update calls'),
 
     ##########################################
-    # Thread State statistics
+    # Thread Count statistics
     ##########################################
-    ThreadState('fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
-    ThreadState('read_active', 'active filesystem read calls','no_clear,no_scale'),
-    ThreadState('write_active', 'active filesystem write calls','no_clear,no_scale'),
+    ThreadStat('thread_fsync_active', 'active filesystem fsync calls','no_clear,no_scale'),
+    ThreadStat('thread_read_active', 'active filesystem read calls','no_clear,no_scale'),
+    ThreadStat('thread_write_active', 'active filesystem write calls','no_clear,no_scale'),
 
     ##########################################
     # Yield statistics
@@ -451,6 +472,7 @@ dsrc_stats = [
     ##########################################
     # Cache and eviction statistics
     ##########################################
+    CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale,size'),
     CacheStat('cache_bytes_read', 'bytes read into cache', 'size'),
     CacheStat('cache_bytes_write', 'bytes written from cache', 'size'),
     CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index dd807922c10..e8727df3f60 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -1160,34 +1160,27 @@ main(void)
 	if (ret == 0)
 		(void)conn->close(conn, NULL);
 
+#ifdef MIGHT_NOT_RUN
+	/*
+	 * Don't run this code, statistics logging doesn't yet support tables.
+	 */
 	/*! [Statistics logging with a table] */
 	ret = wiredtiger_open(home, NULL,
 	    "create, statistics_log=("
-	    "sources=(\"lsm:table1\",\"lsm:table2\"), wait=5)",
+	    "sources=(\"table:table1\",\"table:table2\"), wait=5)",
 	    &conn);
 	/*! [Statistics logging with a table] */
 	if (ret == 0)
 		(void)conn->close(conn, NULL);
 
-	/*! [Statistics logging with all tables] */
-	ret = wiredtiger_open(home, NULL,
-	    "create, statistics_log=(sources=(\"lsm:\"), wait=5)",
-	    &conn);
-	/*! [Statistics logging with all tables] */
-	if (ret == 0)
-		(void)conn->close(conn, NULL);
-
-#ifdef MIGHT_NOT_RUN
 	/*
-	 * This example code gets run, and a non-existent log file path might
-	 * cause the open to fail.  The documentation requires code snippets,
-	 * use #ifdef's to avoid running it.
+	 * Don't run this code, statistics logging doesn't yet support indexes.
 	 */
-	/*! [Statistics logging with path] */
+	/*! [Statistics logging with a source type] */
 	ret = wiredtiger_open(home, NULL,
-	    "create,"
-	    "statistics_log=(wait=120,path=/log/log.%m.%d.%y)", &conn);
-	/*! [Statistics logging with path] */
+	    "create, statistics_log=(sources=(\"index:\"), wait=5)",
+	    &conn);
+	/*! [Statistics logging with a source type] */
 	if (ret == 0)
 		(void)conn->close(conn, NULL);
 
diff --git a/examples/c/ex_file_system.c b/examples/c/ex_file_system.c
index 77e8f40480b..55ee20e9331 100644
--- a/examples/c/ex_file_system.c
+++ b/examples/c/ex_file_system.c
@@ -118,18 +118,17 @@ int demo_file_system_create(WT_CONNECTION *, WT_CONFIG_ARG *);
 /*
  * Forward function declarations for file system API implementation
  */
-static int demo_fs_open(WT_FILE_SYSTEM *,
-    WT_SESSION *, const char *, WT_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
+static int demo_fs_open(WT_FILE_SYSTEM *, WT_SESSION *,
+    const char *, WT_FS_OPEN_FILE_TYPE, uint32_t, WT_FILE_HANDLE **);
 static int demo_fs_directory_list(WT_FILE_SYSTEM *, WT_SESSION *,
     const char *, const char *, char ***, uint32_t *);
 static int demo_fs_directory_list_free(
     WT_FILE_SYSTEM *, WT_SESSION *, char **, uint32_t);
-static int demo_fs_directory_sync(WT_FILE_SYSTEM *file_system,
-    WT_SESSION *session, const char *directory);
 static int demo_fs_exist(WT_FILE_SYSTEM *, WT_SESSION *, const char *, bool *);
-static int demo_fs_remove(WT_FILE_SYSTEM *, WT_SESSION *, const char *);
+static int demo_fs_remove(
+    WT_FILE_SYSTEM *, WT_SESSION *, const char *, uint32_t);
 static int demo_fs_rename(
-    WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *);
+    WT_FILE_SYSTEM *, WT_SESSION *, const char *, const char *, uint32_t);
 static int demo_fs_size(
     WT_FILE_SYSTEM *, WT_SESSION *, const char *, wt_off_t *);
 static int demo_fs_terminate(WT_FILE_SYSTEM *, WT_SESSION *);
@@ -255,7 +254,6 @@ demo_file_system_create(WT_CONNECTION *conn, WT_CONFIG_ARG *config)
 	/* Initialize the in-memory jump table. */
 	file_system->fs_directory_list = demo_fs_directory_list;
 	file_system->fs_directory_list_free = demo_fs_directory_list_free;
-	file_system->fs_directory_sync = demo_fs_directory_sync;
 	file_system->fs_exist = demo_fs_exist;
 	file_system->fs_open_file = demo_fs_open;
 	file_system->fs_remove = demo_fs_remove;
@@ -282,7 +280,7 @@ err:	free(demo_fs);
  */
 static int
 demo_fs_open(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
-    const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+    const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
     WT_FILE_HANDLE **file_handlep)
 {
 	DEMO_FILE_HANDLE *demo_fh;
@@ -469,21 +467,6 @@ demo_fs_directory_list_free(WT_FILE_SYSTEM *file_system,
 }
 
 /*
- * demo_fs_directory_sync --
- *	Directory sync for our demo file system, which is a no-op.
- */
-static int
-demo_fs_directory_sync(WT_FILE_SYSTEM *file_system,
-    WT_SESSION *session, const char *directory)
-{
-	(void)file_system;		/* Unused */
-	(void)session;			/* Unused */
-	(void)directory;		/* Unused */
-
-	return (0);
-}
-
-/*
  * demo_fs_exist --
  *	Return if the file exists.
  */
@@ -507,13 +490,15 @@ demo_fs_exist(WT_FILE_SYSTEM *file_system,
  *	POSIX remove.
  */
 static int
-demo_fs_remove(
-    WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name)
+demo_fs_remove(WT_FILE_SYSTEM *file_system,
+    WT_SESSION *session, const char *name, uint32_t flags)
 {
 	DEMO_FILE_SYSTEM *demo_fs;
 	DEMO_FILE_HANDLE *demo_fh;
 	int ret = 0;
 
+	(void)flags;					/* Unused */
+
 	demo_fs = (DEMO_FILE_SYSTEM *)file_system;
 
 	ret = ENOENT;
@@ -531,13 +516,15 @@ demo_fs_remove(
  */
 static int
 demo_fs_rename(WT_FILE_SYSTEM *file_system,
-    WT_SESSION *session, const char *from, const char *to)
+    WT_SESSION *session, const char *from, const char *to, uint32_t flags)
 {
 	DEMO_FILE_HANDLE *demo_fh;
 	DEMO_FILE_SYSTEM *demo_fs;
 	char *copy;
 	int ret = 0;
 
+	(void)flags;					/* Unused */
+
 	demo_fs = (DEMO_FILE_SYSTEM *)file_system;
 
 	LOCK_FILE_SYSTEM(session, demo_fs);
diff --git a/examples/java/com/wiredtiger/examples/ex_all.java b/examples/java/com/wiredtiger/examples/ex_all.java
index 48e85c9fade..83a37e9a6a5 100644
--- a/examples/java/com/wiredtiger/examples/ex_all.java
+++ b/examples/java/com/wiredtiger/examples/ex_all.java
@@ -988,6 +988,10 @@ allExample()
     /*! [Statistics logging] */
     conn.close(null);
 
+    if (false) {  // MIGHT_NOT_RUN
+    /*
+     * Don't run this code, statistics logging doesn't yet support tables.
+     */
     /*! [Statistics logging with a table] */
     conn = wiredtiger.open(home,
         "create," +
@@ -995,23 +999,13 @@ allExample()
     /*! [Statistics logging with a table] */
     conn.close(null);
 
-    /*! [Statistics logging with all tables] */
-    conn = wiredtiger.open(home,
-        "create,statistics_log=(sources=(\"table:\"))");
-    /*! [Statistics logging with all tables] */
-    conn.close(null);
-
-    if (false) {  // MIGHT_NOT_RUN
     /*
-     * This example code gets run, and a non-existent log file path might
-     * cause the open to fail.  The documentation requires code snippets,
-     * use if (false) to avoid running it.
+     * Don't run this code, statistics logging doesn't yet support indexes.
      */
-    /*! [Statistics logging with path] */
+    /*! [Statistics logging with a source type] */
     conn = wiredtiger.open(home,
-        "create," +
-        "statistics_log=(wait=120,path=/log/log.%m.%d.%y)");
-    /*! [Statistics logging with path] */
+        "create,statistics_log=(sources=(\"index:\"))");
+    /*! [Statistics logging with a source type] */
     conn.close(null);
 
     /*
diff --git a/ext/compressors/zlib/zlib_compress.c b/ext/compressors/zlib/zlib_compress.c
index 9aede2ed907..484df0a6785 100644
--- a/ext/compressors/zlib/zlib_compress.c
+++ b/ext/compressors/zlib/zlib_compress.c
@@ -92,7 +92,7 @@ zalloc(void *cookie, uint32_t number, uint32_t size)
 	opaque = cookie;
 	wt_api = ((ZLIB_COMPRESSOR *)opaque->compressor)->wt_api;
 	return (wt_api->scr_alloc(
-	    wt_api, opaque->session, (size_t)(number * size)));
+	    wt_api, opaque->session, (size_t)number * size));
 }
 
 /*
diff --git a/src/async/async_api.c b/src/async/async_api.c
index fea8714176b..d53a6c65c1d 100644
--- a/src/async/async_api.c
+++ b/src/async/async_api.c
@@ -490,12 +490,24 @@ __wt_async_flush(WT_SESSION_IMPL *session)
 	WT_ASYNC *async;
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
+	uint32_t i, workers;
 
 	conn = S2C(session);
 	if (!conn->async_cfg)
 		return (0);
 
 	async = conn->async;
+	/*
+	 * Only add a flush operation if there are workers who can process
+	 * it.  Otherwise we will wait forever.
+	 */
+	workers = 0;
+	for (i = 0; i < conn->async_workers; ++i)
+		if (async->worker_tids[i] != 0)
+			++workers;
+	if (workers == 0)
+		return (0);
+
 	WT_STAT_FAST_CONN_INCR(session, async_flush);
 	/*
 	 * We have to do several things.  First we have to prevent
diff --git a/src/block/block_ckpt.c b/src/block/block_ckpt.c
index b9f0ec25d53..3584efc7671 100644
--- a/src/block/block_ckpt.c
+++ b/src/block/block_ckpt.c
@@ -252,7 +252,7 @@ __wt_block_checkpoint(WT_SESSION_IMPL *session,
 	} else
 		WT_ERR(__wt_block_write_off(session, block, buf,
 		    &ci->root_offset, &ci->root_size, &ci->root_cksum,
-		    data_cksum, false));
+		    data_cksum, true, false));
 
 	/*
 	 * Checkpoints are potentially reading/writing/merging lots of blocks,
diff --git a/src/block/block_ext.c b/src/block/block_ext.c
index 0d3e7b54f17..bad4d8d7990 100644
--- a/src/block/block_ext.c
+++ b/src/block/block_ext.c
@@ -1245,8 +1245,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
 	WT_DECL_RET;
 	WT_EXT *ext;
 	WT_PAGE_HEADER *dsk;
-	size_t size;
-	uint32_t entries;
+	size_t entries, size;
 	uint8_t *p;
 
 	WT_RET(__block_extlist_dump(session, block, el, "write"));
@@ -1311,7 +1310,7 @@ __wt_block_extlist_write(WT_SESSION_IMPL *session,
 
 	/* Write the extent list to disk. */
 	WT_ERR(__wt_block_write_off(session,
-	    block, tmp, &el->offset, &el->size, &el->cksum, true, true));
+	    block, tmp, &el->offset, &el->size, &el->cksum, true, true, true));
 
 	/*
 	 * Remove the allocated blocks from the system's allocation list, extent
@@ -1450,7 +1449,7 @@ __block_extlist_dump(
 		    tag, el->name, el->entries,
 		    __wt_buf_set_size(session, el->bytes, true, t1)));
 
-	if (ret != 0 || el->entries == 0)
+	if (el->entries == 0)
 		goto done;
 
 	memset(sizes, 0, sizeof(sizes));
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 971fe713f83..eff25f34304 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -479,11 +479,11 @@ __bm_verify_start(WT_BM *bm,
  *	Write a buffer into a block, returning the block's address cookie.
  */
 static int
-__bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
-    WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__bm_write(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
+    uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io)
 {
-	return (__wt_block_write(
-	    session, bm->block, buf, addr, addr_sizep, data_cksum));
+	return (__wt_block_write(session,
+	    bm->block, buf, addr, addr_sizep, data_cksum, checkpoint_io));
 }
 
 /*
@@ -492,13 +492,14 @@ __bm_write(WT_BM *bm, WT_SESSION_IMPL *session,
  * readonly version.
  */
 static int
-__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session,
-    WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__bm_write_readonly(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf,
+    uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io)
 {
 	WT_UNUSED(buf);
 	WT_UNUSED(addr);
 	WT_UNUSED(addr_sizep);
 	WT_UNUSED(data_cksum);
+	WT_UNUSED(checkpoint_io);
 
 	return (__bm_readonly(bm, session));
 }
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 1603b1574e7..7cff7eab629 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -15,9 +15,10 @@ static int __desc_read(WT_SESSION_IMPL *, WT_BLOCK *);
  *	Drop a file.
  */
 int
-__wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename)
+__wt_block_manager_drop(
+    WT_SESSION_IMPL *session, const char *filename, bool durable)
 {
-	 return (__wt_remove_if_exists(session, filename));
+	return (__wt_remove_if_exists(session, filename, durable));
 }
 
 /*
@@ -43,8 +44,9 @@ __wt_block_manager_create(
 	 * in our space. Move any existing files out of the way and complain.
 	 */
 	for (;;) {
-		if ((ret = __wt_open(session, filename, WT_OPEN_FILE_TYPE_DATA,
-		    WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &fh)) == 0)
+		if ((ret = __wt_open(session, filename,
+		    WT_FS_OPEN_FILE_TYPE_DATA, WT_FS_OPEN_CREATE |
+		    WT_FS_OPEN_DURABLE | WT_FS_OPEN_EXCLUSIVE, &fh)) == 0)
 			break;
 		WT_ERR_TEST(ret != EEXIST, ret);
 
@@ -56,7 +58,7 @@ __wt_block_manager_create(
 			WT_ERR(__wt_fs_exist(session, tmp->data, &exists));
 			if (!exists) {
 				WT_ERR(__wt_fs_rename(
-				    session, filename, tmp->data));
+				    session, filename, tmp->data, false));
 				WT_ERR(__wt_msg(session,
 				    "unexpected file %s found, renamed to %s",
 				    filename, (const char *)tmp->data));
@@ -77,16 +79,9 @@ __wt_block_manager_create(
 	/* Close the file handle. */
 	WT_TRET(__wt_close(session, &fh));
 
-	/*
-	 * Some filesystems require that we sync the directory to be confident
-	 * that the file will appear.
-	 */
-	if (ret == 0)
-		WT_TRET(__wt_fs_directory_sync(session, filename));
-
 	/* Undo any create on error. */
 	if (ret != 0)
-		WT_TRET(__wt_fs_remove(session, filename));
+		WT_TRET(__wt_fs_remove(session, filename, false));
 
 err:	__wt_scr_free(session, &tmp);
 
@@ -207,11 +202,11 @@ __wt_block_open(WT_SESSION_IMPL *session,
 	 */
 	flags = 0;
 	if (readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_CHECKPOINT))
-		LF_SET(WT_OPEN_DIRECTIO);
+		LF_SET(WT_FS_OPEN_DIRECTIO);
 	if (!readonly && FLD_ISSET(conn->direct_io, WT_DIRECT_IO_DATA))
-		LF_SET(WT_OPEN_DIRECTIO);
+		LF_SET(WT_FS_OPEN_DIRECTIO);
 	WT_ERR(__wt_open(
-	    session, filename, WT_OPEN_FILE_TYPE_DATA, flags, &block->fh));
+	    session, filename, WT_FS_OPEN_FILE_TYPE_DATA, flags, &block->fh));
 
 	/* Set the file's size. */
 	WT_ERR(__wt_filesize(session, block->fh, &block->size));
diff --git a/src/block/block_session.c b/src/block/block_session.c
index 268adb530cf..6223751effa 100644
--- a/src/block/block_session.c
+++ b/src/block/block_session.c
@@ -28,7 +28,7 @@ __block_ext_alloc(WT_SESSION_IMPL *session, WT_EXT **extp)
 {
 	WT_EXT *ext;
 
-	u_int skipdepth;
+	size_t skipdepth;
 
 	skipdepth = __wt_skip_choose_depth(session);
 	WT_RET(__wt_calloc(session, 1,
diff --git a/src/block/block_write.c b/src/block/block_write.c
index 1fefeee09da..30d06e6259a 100644
--- a/src/block/block_write.c
+++ b/src/block/block_write.c
@@ -210,15 +210,15 @@ __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep)
  *	Write a buffer into a block, returning the block's address cookie.
  */
 int
-__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
-    WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum)
+__wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf,
+    uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io)
 {
 	wt_off_t offset;
 	uint32_t size, cksum;
 	uint8_t *endp;
 
-	WT_RET(__wt_block_write_off(
-	    session, block, buf, &offset, &size, &cksum, data_cksum, false));
+	WT_RET(__wt_block_write_off(session, block,
+	    buf, &offset, &size, &cksum, data_cksum, checkpoint_io, false));
 
 	endp = addr;
 	WT_RET(__wt_block_addr_to_buffer(block, &endp, offset, size, cksum));
@@ -228,14 +228,14 @@ __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block,
 }
 
 /*
- * __wt_block_write_off --
+ * __block_write_off --
  *	Write a buffer into a block, returning the block's offset, size and
  * checksum.
  */
-int
-__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
+static int
+__block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
     WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump,
-    bool data_cksum, bool caller_locked)
+    bool data_cksum, bool checkpoint_io, bool caller_locked)
 {
 	WT_BLOCK_HEADER *blk;
 	WT_DECL_RET;
@@ -254,12 +254,6 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
 	blk = WT_BLOCK_HEADER_REF(buf->mem);
 	memset(blk, 0, sizeof(*blk));
 
-	/*
-	 * Swap the page-header as needed; this doesn't belong here, but it's
-	 * the best place to catch all callers.
-	 */
-	__wt_page_header_byteswap(buf->mem);
-
 	/* Buffers should be aligned for writing. */
 	if (!F_ISSET(buf, WT_ITEM_ALIGNED)) {
 		WT_ASSERT(session, F_ISSET(buf, WT_ITEM_ALIGNED));
@@ -380,6 +374,9 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
 
 	WT_STAT_FAST_CONN_INCR(session, block_write);
 	WT_STAT_FAST_CONN_INCRV(session, block_byte_write, align_size);
+	if (checkpoint_io)
+		WT_STAT_FAST_CONN_INCRV(
+		    session, block_byte_write_checkpoint, align_size);
 
 	WT_RET(__wt_verbose(session, WT_VERB_WRITE,
 	    "off %" PRIuMAX ", size %" PRIuMAX ", cksum %" PRIu32,
@@ -391,3 +388,28 @@ __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
 
 	return (0);
 }
+
+/*
+ * __wt_block_write_off --
+ *	Write a buffer into a block, returning the block's offset, size and
+ * checksum.
+ */
+int
+__wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block,
+    WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump,
+    bool data_cksum, bool checkpoint_io, bool caller_locked)
+{
+	WT_DECL_RET;
+
+	/*
+	 * Ensure the page header is in little endian order; this doesn't belong
+	 * here, but it's the best place to catch all callers. After the write,
+	 * swap values back to native order so callers never see anything other
+	 * than their original content.
+	 */
+	__wt_page_header_byteswap(buf->mem);
+	ret = __block_write_off(session, block, buf,
+	    offsetp, sizep, cksump, data_cksum, checkpoint_io, caller_locked);
+	__wt_page_header_byteswap(buf->mem);
+	return (ret);
+}
diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c
index 70b3ba56e31..e1b097c22a5 100644
--- a/src/btree/bt_curnext.c
+++ b/src/btree/bt_curnext.c
@@ -183,6 +183,7 @@ __cursor_var_next(WT_CURSOR_BTREE *cbt, bool newpage)
 		if (cbt->last_standard_recno == 0)
 			return (WT_NOTFOUND);
 		__cursor_set_recno(cbt, cbt->ref->ref_recno);
+		cbt->cip_saved = NULL;
 		goto new_page;
 	}
 
@@ -301,12 +302,13 @@ __cursor_row_next(WT_CURSOR_BTREE *cbt, bool newpage)
 	 * WT_INSERT_HEAD[0], and so on.  This means WT_INSERT lists are
 	 * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
 	 *
-	 * New page configuration.
+	 * Initialize for each new page.
 	 */
 	if (newpage) {
 		cbt->ins_head = WT_ROW_INSERT_SMALLEST(page);
 		cbt->ins = WT_SKIP_FIRST(cbt->ins_head);
 		cbt->row_iteration_slot = 1;
+		cbt->rip_saved = NULL;
 		goto new_insert;
 	}
 
@@ -517,11 +519,13 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
 	 */
 	F_SET(cbt, WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV);
 
-	/*
-	 * Clear the count of deleted items on the page.
-	 */
+	/* Clear the count of deleted items on the page. */
 	cbt->page_deleted_count = 0;
 
+	/* Clear saved iteration cursor position information. */
+	cbt->cip_saved = NULL;
+	cbt->rip_saved = NULL;
+
 	/*
 	 * If we don't have a search page, then we're done, we're starting at
 	 * the beginning or end of the tree, not as a result of a search.
@@ -661,7 +665,7 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
 		if (page != NULL &&
 		    (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
 		    (newpage && cbt->page_deleted_count > 0)))
-			__wt_page_evict_soon(page);
+			WT_ERR(__wt_page_evict_soon(session, cbt->ref));
 		cbt->page_deleted_count = 0;
 
 		WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index 872f648446c..e39dffa357f 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -329,6 +329,7 @@ __cursor_var_prev(WT_CURSOR_BTREE *cbt, bool newpage)
 		if (cbt->last_standard_recno == 0)
 			return (WT_NOTFOUND);
 		__cursor_set_recno(cbt, cbt->last_standard_recno);
+		cbt->cip_saved = NULL;
 		goto new_page;
 	}
 
@@ -447,7 +448,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
 	 * WT_INSERT_HEAD[0], and so on.  This means WT_INSERT lists are
 	 * odd-numbered slots, and WT_ROW array slots are even-numbered slots.
 	 *
-	 * New page configuration.
+	 * Initialize for each new page.
 	 */
 	if (newpage) {
 		/*
@@ -464,6 +465,7 @@ __cursor_row_prev(WT_CURSOR_BTREE *cbt, bool newpage)
 			    WT_ROW_INSERT_SLOT(page, page->pg_row_entries - 1);
 		cbt->ins = WT_SKIP_LAST(cbt->ins_head);
 		cbt->row_iteration_slot = page->pg_row_entries * 2 + 1;
+		cbt->rip_saved = NULL;
 		goto new_insert;
 	}
 
@@ -619,7 +621,7 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
 		if (page != NULL &&
 		    (cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD ||
 		    (newpage && cbt->page_deleted_count > 0)))
-			__wt_page_evict_soon(page);
+			WT_ERR(__wt_page_evict_soon(session, cbt->ref));
 		cbt->page_deleted_count = 0;
 
 		WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index a00bb7dc2b5..965aec16fc2 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -131,8 +131,10 @@ __wt_page_out(WT_SESSION_IMPL *session, WT_PAGE **pagep)
 
 	/* Discard any disk image. */
 	dsk = (WT_PAGE_HEADER *)page->dsk;
-	if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC))
+	if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_ALLOC)) {
+		__wt_cache_page_image_decr(session, dsk->mem_size);
 		__wt_overwrite_and_free_len(session, dsk, dsk->mem_size);
+	}
 
 	/* Discard any mapped image. */
 	if (F_ISSET_ATOMIC(page, WT_PAGE_DISK_MAPPED))
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index c97e05d74a7..cacf1369430 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -690,6 +690,8 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
 	 * Don't let pages grow large compared to the cache size or we can end
 	 * up in a situation where nothing can be evicted.  Take care getting
 	 * the cache size: with a shared cache, it may not have been set.
+	 * Don't forget to update the API documentation if you alter the
+	 * bounds for any of the parameters here.
 	 */
 	WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
 	btree->maxmempage = (uint64_t)cval.val;
diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c
index 9e9d69c342e..918791d9c6e 100644
--- a/src/btree/bt_huffman.c
+++ b/src/btree/bt_huffman.c
@@ -157,7 +157,8 @@ __huffman_confchk_file(WT_SESSION_IMPL *session,
 
 	/* Check the file exists. */
 	WT_RET(__wt_strndup(session, v->str + len, v->len - len, &fname));
-	WT_ERR(__wt_fopen(session, fname, WT_OPEN_FIXED, WT_STREAM_READ, &fs));
+	WT_ERR(__wt_fopen(
+	    session, fname, WT_FS_OPEN_FIXED, WT_STREAM_READ, &fs));
 
 	/* Optionally return the file handle. */
 	if (fsp == NULL)
diff --git a/src/btree/bt_io.c b/src/btree/bt_io.c
index 4339de6f25c..6c2e2f1b3fb 100644
--- a/src/btree/bt_io.c
+++ b/src/btree/bt_io.c
@@ -117,7 +117,7 @@ __wt_bt_read(WT_SESSION_IMPL *session,
 		 */
 		if (ret != 0 ||
 		    result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP) {
-			fail_msg = "block decryption failed";
+			fail_msg = "block decompression failed";
 			goto corrupt;
 		}
 	} else
@@ -168,7 +168,8 @@ err:	__wt_scr_free(session, &tmp);
  */
 int
 __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
-    uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed)
+    uint8_t *addr, size_t *addr_sizep,
+    bool checkpoint, bool checkpoint_io, bool compressed)
 {
 	WT_BM *bm;
 	WT_BTREE *btree;
@@ -359,10 +360,12 @@ __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf,
 	/* Call the block manager to write the block. */
 	WT_ERR(checkpoint ?
 	    bm->checkpoint(bm, session, ip, btree->ckpt, data_cksum) :
-	    bm->write(bm, session, ip, addr, addr_sizep, data_cksum));
+	    bm->write(
+	    bm, session, ip, addr, addr_sizep, data_cksum, checkpoint_io));
 
 	WT_STAT_FAST_CONN_INCR(session, cache_write);
 	WT_STAT_FAST_DATA_INCR(session, cache_write);
+	S2C(session)->cache->bytes_written += dsk->mem_size;
 	WT_STAT_FAST_CONN_INCRV(session, cache_bytes_write, dsk->mem_size);
 	WT_STAT_FAST_DATA_INCRV(session, cache_bytes_write, dsk->mem_size);
 
diff --git a/src/btree/bt_ovfl.c b/src/btree/bt_ovfl.c
index fbe361e000a..1f080041a23 100644
--- a/src/btree/bt_ovfl.c
+++ b/src/btree/bt_ovfl.c
@@ -33,6 +33,7 @@ __ovfl_read(WT_SESSION_IMPL *session,
 	store->data = WT_PAGE_HEADER_BYTE(btree, dsk);
 	store->size = dsk->u.datalen;
 
+	WT_STAT_FAST_CONN_INCR(session, cache_read_overflow);
 	WT_STAT_FAST_DATA_INCR(session, cache_read_overflow);
 
 	return (0);
@@ -208,6 +209,7 @@ __wt_ovfl_cache(WT_SESSION_IMPL *session,
 	 */
 	if (!visible) {
 		WT_RET(__ovfl_cache(session, page, vpack));
+		WT_STAT_FAST_CONN_INCR(session, cache_overflow_value);
 		WT_STAT_FAST_DATA_INCR(session, cache_overflow_value);
 	}
 
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index 00ec8aa4494..89e5f428628 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -219,6 +219,7 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
 
 	/* Update the page's in-memory size and the cache statistics. */
 	__wt_cache_page_inmem_incr(session, page, size);
+	__wt_cache_page_image_incr(session, dsk->mem_size);
 
 	/* Link the new internal page to the parent. */
 	if (ref != NULL) {
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 086500c8b2f..3d396d5ae5b 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -296,7 +296,7 @@ err:	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
  * __evict_force_check --
  *	Check if a page matches the criteria for forced eviction.
  */
-static int
+static bool
 __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
 {
 	WT_BTREE *btree;
@@ -307,26 +307,26 @@ __evict_force_check(WT_SESSION_IMPL *session, WT_REF *ref)
 
 	/* Leaf pages only. */
 	if (WT_PAGE_IS_INTERNAL(page))
-		return (0);
+		return (false);
 
 	/*
 	 * It's hard to imagine a page with a huge memory footprint that has
 	 * never been modified, but check to be sure.
 	 */
 	if (page->modify == NULL)
-		return (0);
+		return (false);
 
 	/* Pages are usually small enough, check that first. */
 	if (page->memory_footprint < btree->splitmempage)
-		return (0);
+		return (false);
 	else if (page->memory_footprint < btree->maxmempage)
 		return (__wt_leaf_page_can_split(session, page));
 
 	/* Trigger eviction on the next page release. */
-	__wt_page_evict_soon(page);
+	(void)__wt_page_evict_soon(session, ref);
 
 	/* Bump the oldest ID, we're about to do some visibility checks. */
-	WT_RET(__wt_txn_update_oldest(session, 0));
+	(void)__wt_txn_update_oldest(session, 0);
 
 	/* If eviction cannot succeed, don't try. */
 	return (__wt_page_can_evict(session, ref, NULL));
@@ -548,10 +548,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 			 * if the page qualifies for forced eviction and update
 			 * the page's generation number. If eviction isn't being
 			 * done on this file, we're done.
+			 * In-memory split of large pages is allowed while
+			 * no_eviction is set on btree, whereas reconciliation
+			 * is not allowed.
 			 */
 			if (LF_ISSET(WT_READ_NO_EVICT) ||
 			    F_ISSET(session, WT_SESSION_NO_EVICTION) ||
-			    F_ISSET(btree, WT_BTREE_NO_EVICTION))
+			    (F_ISSET(btree, WT_BTREE_NO_EVICTION) &&
+			     !F_ISSET(btree, WT_BTREE_NO_RECONCILE)))
 				goto skip_evict;
 
 			/*
@@ -595,7 +599,14 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 			page = ref->page;
 			if (page->read_gen == WT_READGEN_NOTSET) {
 				if (evict_soon)
-					__wt_page_evict_soon(page);
+					/*
+					 * Ignore error returns, since the
+					 * evict soon call is advisory and we
+					 * are holding a hazard pointer to the
+					 * page already.
+					 */
+					(void)__wt_page_evict_soon(
+					    session, ref);
 				else
 					__wt_cache_read_gen_new(session, page);
 			} else if (!LF_ISSET(WT_READ_NO_GEN))
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 7a05a883f83..4f6f300802e 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -298,7 +298,7 @@ static int
 __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
     WT_REF **from_refp, size_t *decrp, WT_REF **to_refp, size_t *incrp)
 {
-	WT_ADDR *addr;
+	WT_ADDR *addr, *ref_addr;
 	WT_CELL_UNPACK unpack;
 	WT_DECL_RET;
 	WT_IKEY *ikey;
@@ -345,13 +345,18 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
 	}
 
 	/*
-	 * If there's no address (the page has never been written), or the
-	 * address has been instantiated, there's no work to do.  Otherwise,
-	 * instantiate the address in-memory, from the on-page cell.
+	 * If there's no address at all (the page has never been written), or
+	 * the address has already been instantiated, there's no work to do.
+	 * Otherwise, the address still references a split page on-page cell,
+	 * instantiate it. We can race with reconciliation and/or eviction of
+	 * the child pages, be cautious: read the address and verify it, and
+	 * only update it if the value is unchanged from the original. In the
+	 * case of a race, the address must no longer reference the split page,
+	 * we're done.
 	 */
-	addr = ref->addr;
-	if (addr != NULL && !__wt_off_page(from_home, addr)) {
-		__wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
+	WT_ORDERED_READ(ref_addr, ref->addr);
+	if (ref_addr != NULL && !__wt_off_page(from_home, ref_addr)) {
+		__wt_cell_unpack((WT_CELL *)ref_addr, &unpack);
 		WT_RET(__wt_calloc_one(session, &addr));
 		if ((ret = __wt_strndup(
 		    session, unpack.data, unpack.size, &addr->addr)) != 0) {
@@ -371,7 +376,10 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
 			break;
 		WT_ILLEGAL_VALUE(session);
 		}
-		ref->addr = addr;
+		if (!__wt_atomic_cas_ptr(&ref->addr, ref_addr, addr)) {
+			__wt_free(session, addr->addr);
+			__wt_free(session, addr);
+		}
 	}
 
 	/* And finally, copy the WT_REF pointer itself. */
@@ -786,7 +794,9 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
 	 */
 	if (result_entries == 0) {
 		empty_parent = true;
-		__wt_page_evict_soon(parent);
+		if (!__wt_ref_is_root(parent->pg_intl_parent_ref))
+			ret = __wt_page_evict_soon(
+			    session, parent->pg_intl_parent_ref);
 		goto err;
 	}
 
@@ -1462,11 +1472,11 @@ err:	if (parent != NULL)
 
 /*
  * __split_multi_inmem --
- *	Instantiate a page in a multi-block set.
+ *	Instantiate a page from a disk image.
  */
 static int
 __split_multi_inmem(
-    WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref, WT_MULTI *multi)
+    WT_SESSION_IMPL *session, WT_PAGE *orig, WT_MULTI *multi, WT_REF *ref)
 {
 	WT_CURSOR_BTREE cbt;
 	WT_DECL_ITEM(key);
@@ -1487,13 +1497,12 @@ __split_multi_inmem(
 	    orig->type != WT_PAGE_COL_VAR || ref->ref_recno != 0);
 
 	/*
-	 * This code re-creates an in-memory page that is part of a set created
-	 * while evicting a large page, and adds references to any unresolved
-	 * update chains to the new page. We get here due to choosing to keep
-	 * the results of a split in memory or because and update could not be
-	 * written when attempting to evict a page.
+	 * This code re-creates an in-memory page from a disk image, and adds
+	 * references to any unresolved update chains to the new page. We get
+	 * here either because an update could not be written when evicting a
+	 * page, or eviction chose to keep a page in memory.
 	 *
-	 * Clear the disk image and link the page into the passed-in WT_REF to
+	 * Steal the disk image and link the page into the passed-in WT_REF to
 	 * simplify error handling: our caller will not discard the disk image
 	 * when discarding the original page, and our caller will discard the
 	 * allocated page on error, when discarding the allocated WT_REF.
@@ -1503,6 +1512,19 @@ __split_multi_inmem(
 	    WT_PAGE_DISK_ALLOC, &page));
 	multi->disk_image = NULL;
 
+	/*
+	 * Put the re-instantiated page in the same LRU queue location as the
+	 * original page, unless this was a forced eviction, in which case we
+	 * leave the new page with the read generation unset.  Eviction will
+	 * set the read generation next time it visits this page.
+	 */
+	if (orig->read_gen != WT_READGEN_OLDEST)
+		page->read_gen = orig->read_gen;
+
+	/* If there are no updates to apply to the page, we're done. */
+	if (multi->supd_entries == 0)
+		return (0);
+
 	if (orig->type == WT_PAGE_ROW_LEAF)
 		WT_RET(__wt_scr_alloc(session, 0, &key));
 
@@ -1551,14 +1573,12 @@ __split_multi_inmem(
 		}
 
 	/*
-	 * If we modified the page above, it will have set the first dirty
-	 * transaction to the last transaction currently running.  However, the
-	 * updates we installed may be older than that.  Set the first dirty
-	 * transaction to an impossibly old value so this page is never skipped
-	 * in a checkpoint.
+	 * When modifying the page we set the first dirty transaction to the
+	 * last transaction currently running.  However, the updates we made
+	 * might be older than that. Set the first dirty transaction to an
+	 * impossibly old value so this page is never skipped in a checkpoint.
 	 */
-	if (page->modify != NULL)
-		page->modify->first_dirty_txn = WT_TXN_FIRST;
+	page->modify->first_dirty_txn = WT_TXN_FIRST;
 
 err:	/* Free any resources that may have been cached in the cursor. */
 	WT_TRET(__wt_btcur_close(&cbt, true));
@@ -1629,19 +1649,17 @@ __split_multi_inmem_fail(WT_SESSION_IMPL *session, WT_PAGE *orig, WT_REF *ref)
  */
 int
 __wt_multi_to_ref(WT_SESSION_IMPL *session,
-    WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp)
+    WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing)
 {
 	WT_ADDR *addr;
 	WT_IKEY *ikey;
 	WT_REF *ref;
-	size_t incr;
-
-	incr = 0;
 
 	/* Allocate an underlying WT_REF. */
 	WT_RET(__wt_calloc_one(session, refp));
 	ref = *refp;
-	incr += sizeof(WT_REF);
+	if (incrp)
+		*incrp += sizeof(WT_REF);
 
 	/*
 	 * Set the WT_REF key before (optionally) building the page, underlying
@@ -1653,21 +1671,34 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
 		ikey = multi->key.ikey;
 		WT_RET(__wt_row_ikey(
 		    session, 0, WT_IKEY_DATA(ikey), ikey->size, ref));
-		incr += sizeof(WT_IKEY) + ikey->size;
+		if (incrp)
+			*incrp += sizeof(WT_IKEY) + ikey->size;
 		break;
 	default:
 		ref->ref_recno = multi->key.recno;
 		break;
 	}
 
-	/* If there's a disk image, build a page, otherwise set the address. */
-	if (multi->disk_image == NULL) {
-		/*
-		 * Copy the address: we could simply take the buffer, but that
-		 * would complicate error handling, freeing the reference array
-		 * would have to avoid freeing the memory, and it's not worth
-		 * the confusion.
-		 */
+	/* There should be an address or a disk image (or both). */
+	WT_ASSERT(session,
+	    multi->addr.addr != NULL || multi->disk_image != NULL);
+
+	/* If we're closing the file, there better be an address. */
+	WT_ASSERT(session, multi->addr.addr != NULL || !closing);
+
+	/* Verify any disk image we have. */
+	WT_ASSERT(session, multi->disk_image == NULL ||
+	    __wt_verify_dsk_image(session,
+	    "[page instantiate]", multi->disk_image, 0, false) == 0);
+
+	/*
+	 * If there's an address, the page was written, set it.
+	 *
+	 * Copy the address: we could simply take the buffer, but that would
+	 * complicate error handling, freeing the reference array would have
+	 * to avoid freeing the memory, and it's not worth the confusion.
+	 */
+	if (multi->addr.addr != NULL) {
 		WT_RET(__wt_calloc_one(session, &addr));
 		ref->addr = addr;
 		addr->size = multi->addr.size;
@@ -1675,14 +1706,20 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
 		WT_RET(__wt_strndup(session,
 		    multi->addr.addr, addr->size, &addr->addr));
 		ref->state = WT_REF_DISK;
-	} else {
-		WT_RET(__split_multi_inmem(session, page, ref, multi));
+	}
+
+	/*
+	 * If we have a disk image and we're not closing the file,
+	 * re-instantiate the page.
+	 *
+	 * Discard any page image we don't use.
+	 */
+	if (multi->disk_image != NULL && !closing) {
+		WT_RET(__split_multi_inmem(session, page, multi, ref));
 		ref->state = WT_REF_MEM;
 	}
+	__wt_free(session, multi->disk_image);
 
-	/* Optionally return changes in the memory footprint. */
-	if (incrp != NULL)
-		*incrp += incr;
 	return (0);
 }
 
@@ -2086,8 +2123,8 @@ __split_multi(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	 */
 	WT_RET(__wt_calloc_def(session, new_entries, &ref_new));
 	for (i = 0; i < new_entries; ++i)
-		WT_ERR(__wt_multi_to_ref(session,
-		    page, &mod->mod_multi[i], &ref_new[i], &parent_incr));
+		WT_ERR(__wt_multi_to_ref(session, page,
+		    &mod->mod_multi[i], &ref_new[i], &parent_incr, closing));
 
 	/*
 	 * Split into the parent; if we're closing the file, we hold it
@@ -2175,15 +2212,13 @@ __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref)
  *	Rewrite an in-memory page with a new version.
  */
 int
-__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi)
 {
 	WT_DECL_RET;
 	WT_PAGE *page;
-	WT_PAGE_MODIFY *mod;
 	WT_REF *new;
 
 	page = ref->page;
-	mod = page->modify;
 
 	WT_RET(__wt_verbose(
 	    session, WT_VERB_SPLIT, "%p: split-rewrite", ref->page));
@@ -2198,14 +2233,14 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
 	 *
 	 * Build the new page.
 	 *
-	 * Allocate a WT_REF because the error path uses routines that will ea
-	 * free memory. The only field we need to set is the record number, as
-	 * it's used by the search routines.
+	 * Allocate a WT_REF, the error path calls routines that free memory.
+	 * The only field we need to set is the record number, as it's used by
+	 * the search routines.
 	 */
 	WT_RET(__wt_calloc_one(session, &new));
 	new->ref_recno = ref->ref_recno;
 
-	WT_ERR(__split_multi_inmem(session, page, new, &mod->mod_multi[0]));
+	WT_ERR(__split_multi_inmem(session, page, multi, new));
 
 	/*
 	 * The rewrite succeeded, we can no longer fail.
@@ -2213,7 +2248,7 @@ __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref)
 	 * Finalize the move, discarding moved update lists from the original
 	 * page.
 	 */
-	__split_multi_inmem_final(page, &mod->mod_multi[0]);
+	__split_multi_inmem_final(page, multi);
 
 	/*
 	 * Discard the original page.
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index 3d5abf34147..d3ddf33446e 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -41,6 +41,9 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
 	WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
 	WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
 
+	WT_STAT_SET(session, stats, cache_bytes_inuse,
+	    __wt_btree_bytes_inuse(session));
+
 	/* Everything else is really, really expensive. */
 	if (!F_ISSET(cst, WT_CONN_STAT_ALL))
 		return (0);
@@ -139,7 +142,7 @@ __stat_page_col_var(
 		} else {
 			orig_deleted = false;
 			__wt_cell_unpack(cell, unpack);
-			if (unpack->type == WT_CELL_ADDR_DEL)
+			if (unpack->type == WT_CELL_DEL)
 				orig_deleted = true;
 			else {
 				entry_cnt += __wt_cell_rle(unpack);
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index da6c53aa316..df794c96cda 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -84,7 +84,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 					WT_ERR(__wt_txn_get_snapshot(session));
 				leaf_bytes += page->memory_footprint;
 				++leaf_pages;
-				WT_ERR(__wt_reconcile(session, walk, NULL, 0));
+				WT_ERR(__wt_reconcile(
+				    session, walk, NULL, WT_CHECKPOINTING));
 			}
 		}
 		break;
@@ -92,7 +93,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		/*
 		 * If we are flushing a file at read-committed isolation, which
 		 * is of particular interest for flushing the metadata to make
-		 * schema-changing operation durable, get a transactional
+		 * a schema-changing operation durable, get a transactional
 		 * snapshot now.
 		 *
 		 * All changes committed up to this point should be included.
@@ -126,7 +127,17 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		 */
 		WT_PUBLISH(btree->checkpointing, WT_CKPT_PREPARE);
 
-		WT_ERR(__wt_evict_file_exclusive_on(session));
+		/*
+		 * Sync for checkpoint allows splits to happen while the queue
+		 * is being drained, but not reconciliation. We need to do this,
+		 * since draining the queue can take long enough for hot pages
+		 * to grow significantly larger than the configured maximum
+		 * size.
+		 */
+		F_SET(btree, WT_BTREE_NO_RECONCILE);
+		ret = __wt_evict_file_exclusive_on(session);
+		F_CLR(btree, WT_BTREE_NO_RECONCILE);
+		WT_ERR(ret);
 		__wt_evict_file_exclusive_off(session);
 
 		WT_PUBLISH(btree->checkpointing, WT_CKPT_RUNNING);
@@ -183,7 +194,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 				leaf_bytes += page->memory_footprint;
 				++leaf_pages;
 			}
-			WT_ERR(__wt_reconcile(session, walk, NULL, 0));
+			WT_ERR(__wt_reconcile(
+			    session, walk, NULL, WT_CHECKPOINTING));
 		}
 		break;
 	case WT_SYNC_CLOSE:
@@ -217,41 +229,9 @@ err:	/* On error, clear any left-over tree walk. */
 	    saved_snap_min == WT_TXN_NONE)
 		__wt_txn_release_snapshot(session);
 
-	if (btree->checkpointing != WT_CKPT_OFF) {
-		/*
-		 * Update the checkpoint generation for this handle so visible
-		 * updates newer than the checkpoint can be evicted.
-		 *
-		 * This has to be published before eviction is enabled again,
-		 * so that eviction knows that the checkpoint has completed.
-		 */
-		WT_PUBLISH(btree->checkpoint_gen,
-		    conn->txn_global.checkpoint_gen);
-		WT_STAT_FAST_DATA_SET(session,
-		    btree_checkpoint_generation, btree->checkpoint_gen);
-
-		/*
-		 * Clear the checkpoint flag and push the change; not required,
-		 * but publishing the change means stalled eviction gets moving
-		 * as soon as possible.
-		 */
-		btree->checkpointing = WT_CKPT_OFF;
-		WT_FULL_BARRIER();
-
-		/*
-		 * If this tree was being skipped by the eviction server during
-		 * the checkpoint, clear the wait.
-		 */
-		btree->evict_walk_period = 0;
-
-		/*
-		 * Wake the eviction server, in case application threads have
-		 * stalled while the eviction server decided it couldn't make
-		 * progress.  Without this, application threads will be stalled
-		 * until the eviction server next wakes.
-		 */
-		WT_TRET(__wt_evict_server_wake(session));
-	}
+	/* Clear the checkpoint flag and push the change. */
+	if (btree->checkpointing != WT_CKPT_OFF)
+		WT_PUBLISH(btree->checkpointing, WT_CKPT_OFF);
 
 	__wt_spin_unlock(session, &btree->flush_lock);
 
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index bb8a750d848..17d32d6ed63 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -381,16 +381,6 @@ restart:	/*
 			__ref_ascend(session, &ref, &pindex, &slot);
 
 			/*
-			 * If we got all the way through an internal page and
-			 * all of the child pages were deleted, mark it for
-			 * eviction.
-			 */
-			if (empty_internal && pindex->entries > 1) {
-				__wt_page_evict_soon(ref->page);
-				empty_internal = false;
-			}
-
-			/*
 			 * If at the root and returning internal pages, return
 			 * the root page, otherwise we're done. Regardless, no
 			 * hazard pointer is required, release the one we hold.
@@ -404,6 +394,16 @@ restart:	/*
 			}
 
 			/*
+			 * If we got all the way through an internal page and
+			 * all of the child pages were deleted, mark it for
+			 * eviction.
+			 */
+			if (empty_internal && pindex->entries > 1) {
+				WT_ERR(__wt_page_evict_soon(session, ref));
+				empty_internal = false;
+			}
+
+			/*
 			 * Optionally return internal pages. Swap our previous
 			 * hazard pointer for the page we'll return. We don't
 			 * handle restart or not-found returns, it would require
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index 4afcd74520f..0f70e84de7e 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -775,7 +775,7 @@ __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 	 * traversing the skip list each time accumulates to real time.
 	 */
 	if (samples > 5000)
-		__wt_page_evict_soon(page);
+		WT_RET(__wt_page_evict_soon(session, cbt->ref));
 
 	return (0);
 }
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 1b656c5a0aa..192b80bb359 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -42,7 +42,6 @@ static const WT_CONFIG_CHECK
 static const WT_CONFIG_CHECK
     confchk_wiredtiger_open_checkpoint_subconfigs[] = {
 	{ "log_size", "int", NULL, "min=0,max=2GB", NULL, 0 },
-	{ "name", "string", NULL, NULL, NULL, 0 },
 	{ "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
 	{ NULL, NULL, NULL, NULL, NULL, 0 }
 };
@@ -67,16 +66,9 @@ static const WT_CONFIG_CHECK
 };
 
 static const WT_CONFIG_CHECK
-    confchk_wiredtiger_open_log_subconfigs[] = {
+    confchk_WT_CONNECTION_reconfigure_log_subconfigs[] = {
 	{ "archive", "boolean", NULL, NULL, NULL, 0 },
-	{ "compressor", "string", NULL, NULL, NULL, 0 },
-	{ "enabled", "boolean", NULL, NULL, NULL, 0 },
-	{ "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 },
-	{ "path", "string", NULL, NULL, NULL, 0 },
 	{ "prealloc", "boolean", NULL, NULL, NULL, 0 },
-	{ "recover", "string",
-	    NULL, "choices=[\"error\",\"on\"]",
-	    NULL, 0 },
 	{ "zero_fill", "boolean", NULL, NULL, NULL, 0 },
 	{ NULL, NULL, NULL, NULL, NULL, 0 }
 };
@@ -99,10 +91,9 @@ static const WT_CONFIG_CHECK
 };
 
 static const WT_CONFIG_CHECK
-    confchk_wiredtiger_open_statistics_log_subconfigs[] = {
+    confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs[] = {
 	{ "json", "boolean", NULL, NULL, NULL, 0 },
 	{ "on_close", "boolean", NULL, NULL, NULL, 0 },
-	{ "path", "string", NULL, NULL, NULL, 0 },
 	{ "sources", "list", NULL, NULL, NULL, 0 },
 	{ "timestamp", "string", NULL, NULL, NULL, 0 },
 	{ "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
@@ -117,16 +108,16 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+	    confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
 	{ "error_prefix", "string", NULL, NULL, NULL, 0 },
 	{ "eviction", "category",
 	    NULL, NULL,
 	    confchk_wiredtiger_open_eviction_subconfigs, 2 },
 	{ "eviction_dirty_target", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_dirty_trigger", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
 	{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -135,7 +126,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
 	    confchk_wiredtiger_open_file_manager_subconfigs, 3 },
 	{ "log", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_log_subconfigs, 8 },
+	    confchk_WT_CONNECTION_reconfigure_log_subconfigs, 3 },
 	{ "lsm_manager", "category",
 	    NULL, NULL,
 	    confchk_wiredtiger_open_lsm_manager_subconfigs, 2 },
@@ -148,7 +139,7 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
 	    NULL, 0 },
 	{ "statistics_log", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_statistics_log_subconfigs, 6 },
+	    confchk_WT_CONNECTION_reconfigure_statistics_log_subconfigs, 5 },
 	{ "verbose", "list",
 	    NULL, "choices=[\"api\",\"block\",\"checkpoint\",\"compact\","
 	    "\"evict\",\"evictserver\",\"fileops\",\"handleops\",\"log\","
@@ -326,6 +317,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
 	{ "append", "boolean", NULL, NULL, NULL, 0 },
 	{ "bulk", "string", NULL, NULL, NULL, 0 },
 	{ "checkpoint", "string", NULL, NULL, NULL, 0 },
+	{ "checkpoint_wait", "boolean", NULL, NULL, NULL, 0 },
 	{ "dump", "string",
 	    NULL, "choices=[\"hex\",\"json\",\"print\"]",
 	    NULL, 0 },
@@ -608,6 +600,32 @@ static const WT_CONFIG_CHECK
 };
 
 static const WT_CONFIG_CHECK
+    confchk_wiredtiger_open_log_subconfigs[] = {
+	{ "archive", "boolean", NULL, NULL, NULL, 0 },
+	{ "compressor", "string", NULL, NULL, NULL, 0 },
+	{ "enabled", "boolean", NULL, NULL, NULL, 0 },
+	{ "file_max", "int", NULL, "min=100KB,max=2GB", NULL, 0 },
+	{ "path", "string", NULL, NULL, NULL, 0 },
+	{ "prealloc", "boolean", NULL, NULL, NULL, 0 },
+	{ "recover", "string",
+	    NULL, "choices=[\"error\",\"on\"]",
+	    NULL, 0 },
+	{ "zero_fill", "boolean", NULL, NULL, NULL, 0 },
+	{ NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
+    confchk_wiredtiger_open_statistics_log_subconfigs[] = {
+	{ "json", "boolean", NULL, NULL, NULL, 0 },
+	{ "on_close", "boolean", NULL, NULL, NULL, 0 },
+	{ "path", "string", NULL, NULL, NULL, 0 },
+	{ "sources", "list", NULL, NULL, NULL, 0 },
+	{ "timestamp", "string", NULL, NULL, NULL, 0 },
+	{ "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
+	{ NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
     confchk_wiredtiger_open_transaction_sync_subconfigs[] = {
 	{ "enabled", "boolean", NULL, NULL, NULL, 0 },
 	{ "method", "string",
@@ -625,7 +643,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+	    confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
 	{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
 	{ "config_base", "boolean", NULL, NULL, NULL, 0 },
 	{ "create", "boolean", NULL, NULL, NULL, 0 },
@@ -640,10 +658,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
 	    NULL, NULL,
 	    confchk_wiredtiger_open_eviction_subconfigs, 2 },
 	{ "eviction_dirty_target", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_dirty_trigger", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
 	{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -706,7 +724,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+	    confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
 	{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
 	{ "config_base", "boolean", NULL, NULL, NULL, 0 },
 	{ "create", "boolean", NULL, NULL, NULL, 0 },
@@ -721,10 +739,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
 	    NULL, NULL,
 	    confchk_wiredtiger_open_eviction_subconfigs, 2 },
 	{ "eviction_dirty_target", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_dirty_trigger", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
 	{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -788,7 +806,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+	    confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
 	{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
 	{ "direct_io", "list",
 	    NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
@@ -801,10 +819,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
 	    NULL, NULL,
 	    confchk_wiredtiger_open_eviction_subconfigs, 2 },
 	{ "eviction_dirty_target", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_dirty_trigger", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
 	{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -864,7 +882,7 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
 	    NULL, NULL,
-	    confchk_wiredtiger_open_checkpoint_subconfigs, 3 },
+	    confchk_wiredtiger_open_checkpoint_subconfigs, 2 },
 	{ "checkpoint_sync", "boolean", NULL, NULL, NULL, 0 },
 	{ "direct_io", "list",
 	    NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
@@ -877,10 +895,10 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
 	    NULL, NULL,
 	    confchk_wiredtiger_open_eviction_subconfigs, 2 },
 	{ "eviction_dirty_target", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_dirty_trigger", "int",
-	    NULL, "min=5,max=99",
+	    NULL, "min=1,max=99",
 	    NULL, 0 },
 	{ "eviction_target", "int", NULL, "min=10,max=99", NULL, 0 },
 	{ "eviction_trigger", "int", NULL, "min=10,max=99", NULL, 0 },
@@ -970,17 +988,14 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "WT_CONNECTION.reconfigure",
 	  "async=(enabled=0,ops_max=1024,threads=2),cache_overhead=8,"
-	  "cache_size=100MB,checkpoint=(log_size=0,"
-	  "name=\"WiredTigerCheckpoint\",wait=0),error_prefix=,"
-	  "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
-	  "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
+	  "cache_size=100MB,checkpoint=(log_size=0,wait=0),error_prefix=,"
+	  "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5,"
+	  "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
 	  ",file_manager=(close_handle_minimum=250,close_idle_time=30,"
-	  "close_scan_interval=10),log=(archive=,compressor=,enabled=0,"
-	  "file_max=100MB,path=\".\",prealloc=,recover=on,zero_fill=0),"
+	  "close_scan_interval=10),log=(archive=,prealloc=,zero_fill=0),"
 	  "lsm_manager=(merge=,worker_thread_max=4),lsm_merge=,"
 	  "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
-	  "statistics=none,statistics_log=(json=0,on_close=0,"
-	  "path=\"WiredTigerStat.%d.%H\",sources=,"
+	  "statistics=none,statistics_log=(json=0,on_close=0,sources=,"
 	  "timestamp=\"%b %d %H:%M:%S\",wait=0),verbose=",
 	  confchk_WT_CONNECTION_reconfigure, 18
 	},
@@ -1052,10 +1067,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  NULL, 0
 	},
 	{ "WT_SESSION.open_cursor",
-	  "append=0,bulk=0,checkpoint=,dump=,next_random=0,"
-	  "next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
+	  "append=0,bulk=0,checkpoint=,checkpoint_wait=,dump=,next_random=0"
+	  ",next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
 	  "skip_sort_check=0,statistics=,target=",
-	  confchk_WT_SESSION_open_cursor, 12
+	  confchk_WT_SESSION_open_cursor, 13
 	},
 	{ "WT_SESSION.rebalance",
 	  "",
@@ -1168,21 +1183,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "wiredtiger_open",
 	  "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
-	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
-	  "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
-	  "config_base=,create=0,direct_io=,encryption=(keyid=,name=,"
-	  "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
-	  ",eviction_dirty_target=80,eviction_dirty_trigger=95,"
-	  "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
-	  "file_extend=,file_manager=(close_handle_minimum=250,"
-	  "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
-	  "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
-	  "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge="
-	  ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0"
-	  ",session_max=100,session_scratch_max=2MB,"
-	  "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
-	  "statistics=none,statistics_log=(json=0,on_close=0,"
-	  "path=\"WiredTigerStat.%d.%H\",sources=,"
+	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+	  ",checkpoint_sync=,config_base=,create=0,direct_io=,"
+	  "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+	  "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5,"
+	  "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+	  ",exclusive=0,extensions=,file_extend=,"
+	  "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+	  "close_scan_interval=10),hazard_max=1000,in_memory=0,"
+	  "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
+	  "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
+	  "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
+	  "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
+	  ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
+	  "statistics_log=(json=0,on_close=0,path=\".\",sources=,"
 	  "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
 	  ",method=fsync),use_environment=,use_environment_priv=0,verbose=,"
 	  "write_through=",
@@ -1190,21 +1204,20 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "wiredtiger_open_all",
 	  "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
-	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
-	  "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
-	  "config_base=,create=0,direct_io=,encryption=(keyid=,name=,"
-	  "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
-	  ",eviction_dirty_target=80,eviction_dirty_trigger=95,"
-	  "eviction_target=80,eviction_trigger=95,exclusive=0,extensions=,"
-	  "file_extend=,file_manager=(close_handle_minimum=250,"
-	  "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
-	  "in_memory=0,log=(archive=,compressor=,enabled=0,file_max=100MB,"
-	  "path=\".\",prealloc=,recover=on,zero_fill=0),lsm_manager=(merge="
-	  ",worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0"
-	  ",session_max=100,session_scratch_max=2MB,"
-	  "shared_cache=(chunk=10MB,name=,quota=0,reserve=0,size=500MB),"
-	  "statistics=none,statistics_log=(json=0,on_close=0,"
-	  "path=\"WiredTigerStat.%d.%H\",sources=,"
+	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+	  ",checkpoint_sync=,config_base=,create=0,direct_io=,"
+	  "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+	  "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=5,"
+	  "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+	  ",exclusive=0,extensions=,file_extend=,"
+	  "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+	  "close_scan_interval=10),hazard_max=1000,in_memory=0,"
+	  "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
+	  "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
+	  "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
+	  "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
+	  ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
+	  "statistics_log=(json=0,on_close=0,path=\".\",sources=,"
 	  "timestamp=\"%b %d %H:%M:%S\",wait=0),transaction_sync=(enabled=0"
 	  ",method=fsync),use_environment=,use_environment_priv=0,verbose=,"
 	  "version=(major=0,minor=0),write_through=",
@@ -1212,41 +1225,39 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "wiredtiger_open_basecfg",
 	  "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
-	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
-	  "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
-	  "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=,"
-	  "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
-	  "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
-	  ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
-	  ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
-	  "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
-	  "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
-	  "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
-	  "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
-	  ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
-	  "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
-	  "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
-	  "transaction_sync=(enabled=0,method=fsync),verbose=,"
+	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+	  ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=,"
+	  "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
+	  ",eviction_dirty_target=5,eviction_dirty_trigger=20,"
+	  "eviction_target=80,eviction_trigger=95,extensions=,file_extend=,"
+	  "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+	  "close_scan_interval=10),hazard_max=1000,log=(archive=,"
+	  "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=,"
+	  "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)"
+	  ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100,"
+	  "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0,"
+	  "reserve=0,size=500MB),statistics=none,statistics_log=(json=0,"
+	  "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\","
+	  "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=,"
 	  "version=(major=0,minor=0),write_through=",
 	  confchk_wiredtiger_open_basecfg, 33
 	},
 	{ "wiredtiger_open_usercfg",
 	  "async=(enabled=0,ops_max=1024,threads=2),buffer_alignment=-1,"
-	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
-	  "name=\"WiredTigerCheckpoint\",wait=0),checkpoint_sync=,"
-	  "direct_io=,encryption=(keyid=,name=,secretkey=),error_prefix=,"
-	  "eviction=(threads_max=1,threads_min=1),eviction_dirty_target=80,"
-	  "eviction_dirty_trigger=95,eviction_target=80,eviction_trigger=95"
-	  ",extensions=,file_extend=,file_manager=(close_handle_minimum=250"
-	  ",close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
-	  "log=(archive=,compressor=,enabled=0,file_max=100MB,path=\".\","
-	  "prealloc=,recover=on,zero_fill=0),lsm_manager=(merge=,"
-	  "worker_thread_max=4),lsm_merge=,mmap=,multiprocess=0,readonly=0,"
-	  "session_max=100,session_scratch_max=2MB,shared_cache=(chunk=10MB"
-	  ",name=,quota=0,reserve=0,size=500MB),statistics=none,"
-	  "statistics_log=(json=0,on_close=0,path=\"WiredTigerStat.%d.%H\","
-	  "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
-	  "transaction_sync=(enabled=0,method=fsync),verbose=,"
+	  "cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,wait=0)"
+	  ",checkpoint_sync=,direct_io=,encryption=(keyid=,name=,"
+	  "secretkey=),error_prefix=,eviction=(threads_max=1,threads_min=1)"
+	  ",eviction_dirty_target=5,eviction_dirty_trigger=20,"
+	  "eviction_target=80,eviction_trigger=95,extensions=,file_extend=,"
+	  "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+	  "close_scan_interval=10),hazard_max=1000,log=(archive=,"
+	  "compressor=,enabled=0,file_max=100MB,path=\".\",prealloc=,"
+	  "recover=on,zero_fill=0),lsm_manager=(merge=,worker_thread_max=4)"
+	  ",lsm_merge=,mmap=,multiprocess=0,readonly=0,session_max=100,"
+	  "session_scratch_max=2MB,shared_cache=(chunk=10MB,name=,quota=0,"
+	  "reserve=0,size=500MB),statistics=none,statistics_log=(json=0,"
+	  "on_close=0,path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\","
+	  "wait=0),transaction_sync=(enabled=0,method=fsync),verbose=,"
 	  "write_through=",
 	  confchk_wiredtiger_open_usercfg, 32
 	},
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 98267eeeb2c..1c6b0c2b500 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -1217,7 +1217,8 @@ __conn_config_file(WT_SESSION_IMPL *session,
 		return (0);
 
 	/* Open the configuration file. */
-	WT_RET(__wt_open(session, filename, WT_OPEN_FILE_TYPE_REGULAR, 0, &fh));
+	WT_RET(__wt_open(
+	    session, filename, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &fh));
 	WT_ERR(__wt_filesize(session, fh, &size));
 	if (size == 0)
 		goto err;
@@ -1510,8 +1511,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
 	exist = false;
 	if (!is_create)
 		WT_ERR(__wt_fs_exist(session, WT_WIREDTIGER, &exist));
-	ret = __wt_open(session, WT_SINGLETHREAD, WT_OPEN_FILE_TYPE_REGULAR,
-	    is_create || exist ? WT_OPEN_CREATE : 0, &conn->lock_fh);
+	ret = __wt_open(session, WT_SINGLETHREAD, WT_FS_OPEN_FILE_TYPE_REGULAR,
+	    is_create || exist ? WT_FS_OPEN_CREATE : 0, &conn->lock_fh);
 
 	/*
 	 * If this is a read-only connection and we cannot grab the lock
@@ -1554,7 +1555,7 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
 		 */
 #define	WT_SINGLETHREAD_STRING	"WiredTiger lock file\n"
 		WT_ERR(__wt_filesize(session, conn->lock_fh, &size));
-		if (size != strlen(WT_SINGLETHREAD_STRING))
+		if ((size_t)size != strlen(WT_SINGLETHREAD_STRING))
 			WT_ERR(__wt_write(session, conn->lock_fh, (wt_off_t)0,
 			    strlen(WT_SINGLETHREAD_STRING),
 			    WT_SINGLETHREAD_STRING));
@@ -1563,7 +1564,8 @@ __conn_single(WT_SESSION_IMPL *session, const char *cfg[])
 
 	/* We own the lock file, optionally create the WiredTiger file. */
 	ret = __wt_open(session, WT_WIREDTIGER,
-	    WT_OPEN_FILE_TYPE_REGULAR, is_create ? WT_OPEN_CREATE : 0, &fh);
+	    WT_FS_OPEN_FILE_TYPE_REGULAR, is_create ? WT_FS_OPEN_CREATE : 0,
+	    &fh);
 
 	/*
 	 * If we're read-only, check for handled errors. Even if able to open
@@ -1784,7 +1786,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
 	 * runs.  This doesn't matter for correctness, it's just cleaning up
 	 * random files.
 	 */
-	WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
+	WT_RET(__wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
 
 	/*
 	 * The base configuration file is only written if creating the database,
@@ -1809,7 +1811,7 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
 		return (0);
 
 	WT_RET(__wt_fopen(session, WT_BASECONFIG_SET,
-	    WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
+	    WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
 
 	WT_ERR(__wt_fprintf(session, fs, "%s\n\n",
 	    "# Do not modify this file.\n"
@@ -1870,7 +1872,8 @@ __conn_write_base_config(WT_SESSION_IMPL *session, const char *cfg[])
 	if (0) {
 		/* Close open file handle, remove any temporary file. */
 err:		WT_TRET(__wt_fclose(session, &fs));
-		WT_TRET(__wt_remove_if_exists(session, WT_BASECONFIG_SET));
+		WT_TRET(
+		    __wt_remove_if_exists(session, WT_BASECONFIG_SET, false));
 	}
 
 	__wt_free(session, base_config);
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 9f15db5382b..e8bb7187418 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -176,6 +176,10 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
 		    &cache->evict_queues[i].evict_lock, "cache eviction"));
 	}
 
+	/* Ensure there is always a non-NULL current queue. */
+	cache->evict_current_queue =
+	    &cache->evict_queues[WT_EVICT_URGENT_QUEUE + 1];
+
 	/*
 	 * We get/set some values in the cache statistics (rather than have
 	 * two copies), configure them.
@@ -197,7 +201,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
 	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
 	WT_CONNECTION_STATS **stats;
-	uint64_t inuse, leaf, used;
+	uint64_t inuse, leaf;
 
 	conn = S2C(session);
 	cache = conn->cache;
@@ -208,26 +212,29 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
 	 * There are races updating the different cache tracking values so
 	 * be paranoid calculating the leaf byte usage.
 	 */
-	used = cache->bytes_overflow + cache->bytes_internal;
-	leaf = inuse > used ? inuse - used : 0;
+	leaf = inuse > cache->bytes_internal ?
+	    inuse - cache->bytes_internal : 0;
 
 	WT_STAT_SET(session, stats, cache_bytes_max, conn->cache_size);
 	WT_STAT_SET(session, stats, cache_bytes_inuse, inuse);
-
 	WT_STAT_SET(session, stats, cache_overhead, cache->overhead_pct);
-	WT_STAT_SET(
-	    session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
+
 	WT_STAT_SET(
 	    session, stats, cache_bytes_dirty, __wt_cache_dirty_inuse(cache));
-	WT_STAT_SET(session, stats,
-	    cache_eviction_maximum_page_size, cache->evict_max_page_size);
-	WT_STAT_SET(session, stats, cache_pages_dirty, cache->pages_dirty);
-
 	WT_STAT_SET(
-	    session, stats, cache_bytes_internal, cache->bytes_internal);
+	    session, stats, cache_bytes_image, __wt_cache_bytes_image(cache));
 	WT_STAT_SET(
-	    session, stats, cache_bytes_overflow, cache->bytes_overflow);
+	    session, stats, cache_pages_inuse, __wt_cache_pages_inuse(cache));
+	WT_STAT_SET(
+	    session, stats, cache_bytes_internal, cache->bytes_internal);
 	WT_STAT_SET(session, stats, cache_bytes_leaf, leaf);
+	WT_STAT_SET(
+	    session, stats, cache_bytes_other, __wt_cache_bytes_other(cache));
+
+	WT_STAT_SET(session, stats,
+	    cache_eviction_maximum_page_size, cache->evict_max_page_size);
+	WT_STAT_SET(session, stats, cache_pages_dirty,
+	    cache->pages_dirty_intl + cache->pages_dirty_leaf);
 
 	/*
 	 * The number of files with active walks ~= number of hazard pointers
@@ -235,7 +242,7 @@ __wt_cache_stats_update(WT_SESSION_IMPL *session)
 	 */
 	if (conn->evict_session != NULL)
 		WT_STAT_SET(session, stats, cache_eviction_walks_active,
-		    conn->evict_session->nhazard);
+		    cache->walk_session->nhazard);
 }
 
 /*
@@ -267,11 +274,13 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
 		__wt_errx(session,
 		    "cache server: exiting with %" PRIu64 " bytes in memory",
 		    cache->bytes_inmem);
-	if (cache->bytes_dirty != 0 || cache->pages_dirty != 0)
+	if (cache->bytes_dirty_intl + cache->bytes_dirty_leaf != 0 ||
+	    cache->pages_dirty_intl + cache->pages_dirty_leaf != 0)
 		__wt_errx(session,
 		    "cache server: exiting with %" PRIu64
 		    " bytes dirty and %" PRIu64 " pages dirty",
-		    cache->bytes_dirty, cache->pages_dirty);
+		    cache->bytes_dirty_intl + cache->bytes_dirty_leaf,
+		    cache->pages_dirty_intl + cache->pages_dirty_leaf);
 
 	WT_TRET(__wt_cond_auto_destroy(session, &cache->evict_cond));
 	WT_TRET(__wt_cond_destroy(session, &cache->evict_waiter_cond));
@@ -286,6 +295,7 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
 		__wt_spin_destroy(session, &cache->evict_queues[i].evict_lock);
 		__wt_free(session, cache->evict_queues[i].evict_queue);
 	}
+
 	__wt_free(session, conn->cache);
 	return (ret);
 }
diff --git a/src/conn/conn_ckpt.c b/src/conn/conn_ckpt.c
index a23350a5e46..d54c65c4767 100644
--- a/src/conn/conn_ckpt.c
+++ b/src/conn/conn_ckpt.c
@@ -19,61 +19,38 @@ __ckpt_server_config(WT_SESSION_IMPL *session, const char **cfg, bool *startp)
 {
 	WT_CONFIG_ITEM cval;
 	WT_CONNECTION_IMPL *conn;
-	WT_DECL_ITEM(tmp);
-	WT_DECL_RET;
-	char *p;
+
+	*startp = false;
 
 	conn = S2C(session);
 
-	/*
-	 * The checkpoint configuration requires a wait time and/or a log
-	 * size -- if one is not set, we're not running at all.
-	 * Checkpoints based on log size also require logging be enabled.
-	 */
 	WT_RET(__wt_config_gets(session, cfg, "checkpoint.wait", &cval));
 	conn->ckpt_usecs = (uint64_t)cval.val * WT_MILLION;
 
 	WT_RET(__wt_config_gets(session, cfg, "checkpoint.log_size", &cval));
 	conn->ckpt_logsize = (wt_off_t)cval.val;
 
-	/* Checkpoints are incompatible with in-memory configuration */
-	if (conn->ckpt_usecs != 0 || conn->ckpt_logsize != 0) {
+	/*
+	 * The checkpoint configuration requires a wait time and/or a log size,
+	 * if neither is set, we're not running at all. Checkpoints based on log
+	 * size also require logging be enabled.
+	 */
+	if (conn->ckpt_usecs != 0 ||
+	    (conn->ckpt_logsize != 0 &&
+	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
+		/* Checkpoints are incompatible with in-memory configuration */
 		WT_RET(__wt_config_gets(session, cfg, "in_memory", &cval));
 		if (cval.val != 0)
 			WT_RET_MSG(session, EINVAL,
-			    "In memory configuration incompatible with "
-			    "checkpoints");
-	}
+			    "checkpoint configuration incompatible with "
+			    "in-memory configuration");
 
-	__wt_log_written_reset(session);
-	if ((conn->ckpt_usecs == 0 && conn->ckpt_logsize == 0) ||
-	    (conn->ckpt_logsize && conn->ckpt_usecs == 0 &&
-	     !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED))) {
-		*startp = false;
-		return (0);
-	}
-	*startp = true;
+		__wt_log_written_reset(session);
 
-	/*
-	 * The application can specify a checkpoint name, which we ignore if
-	 * it's our default.
-	 */
-	WT_RET(__wt_config_gets(session, cfg, "checkpoint.name", &cval));
-	if (cval.len != 0 &&
-	    !WT_STRING_MATCH(WT_CHECKPOINT, cval.str, cval.len)) {
-		WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));
-
-		WT_RET(__wt_scr_alloc(session, cval.len + 20, &tmp));
-		WT_ERR(__wt_buf_fmt(
-		    session, tmp, "name=%.*s", (int)cval.len, cval.str));
-		WT_ERR(__wt_strdup(session, tmp->data, &p));
-
-		__wt_free(session, conn->ckpt_config);
-		conn->ckpt_config = p;
+		*startp = true;
 	}
 
-err:	__wt_scr_free(session, &tmp);
-	return (ret);
+	return (0);
 }
 
 /*
@@ -103,7 +80,7 @@ __ckpt_server(void *arg)
 		    __wt_cond_wait(session, conn->ckpt_cond, conn->ckpt_usecs));
 
 		/* Checkpoint the database. */
-		WT_ERR(wt_session->checkpoint(wt_session, conn->ckpt_config));
+		WT_ERR(wt_session->checkpoint(wt_session, NULL));
 
 		/* Reset. */
 		if (conn->ckpt_logsize) {
@@ -179,7 +156,16 @@ __wt_checkpoint_server_create(WT_SESSION_IMPL *session, const char *cfg[])
 	conn = S2C(session);
 	start = false;
 
-	/* If there is already a server running, shut it down. */
+	/*
+	 * Stop any server that is already running. This means that each time
+	 * reconfigure is called we'll bounce the server even if there are no
+	 * configuration changes. This makes our life easier as the underlying
+	 * configuration routine doesn't have to worry about freeing objects
+	 * in the connection structure (it's guaranteed to always start with a
+	 * blank slate), and we don't have to worry about races where a running
+	 * server is reading configuration information that we're updating, and
+	 * it's not expected that reconfiguration will happen a lot.
+	 */
 	if (conn->ckpt_session != NULL)
 		WT_RET(__wt_checkpoint_server_destroy(session));
 
@@ -211,8 +197,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
 	}
 	WT_TRET(__wt_cond_destroy(session, &conn->ckpt_cond));
 
-	__wt_free(session, conn->ckpt_config);
-
 	/* Close the server thread's session. */
 	if (conn->ckpt_session != NULL) {
 		wt_session = &conn->ckpt_session->iface;
@@ -226,7 +210,6 @@ __wt_checkpoint_server_destroy(WT_SESSION_IMPL *session)
 	conn->ckpt_session = NULL;
 	conn->ckpt_tid_set = false;
 	conn->ckpt_cond = NULL;
-	conn->ckpt_config = NULL;
 	conn->ckpt_usecs = 0;
 
 	return (ret);
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 08fb2b24468..f52fccc7d1c 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -64,6 +64,16 @@ __conn_dhandle_alloc(WT_SESSION_IMPL *session,
 		F_SET(dhandle, WT_DHANDLE_IS_METADATA);
 
 	/*
+	 * We are holding the data handle list lock, which protects most
+	 * threads from seeing the new handle until that lock is released.
+	 *
+	 * However, the sweep server scans the list of handles without holding
+	 * that lock, so we need a write barrier here to ensure the sweep
+	 * server doesn't see a partially filled in structure.
+	 */
+	WT_WRITE_BARRIER();
+
+	/*
 	 * Prepend the handle to the connection list, assuming we're likely to
 	 * need new files again soon, until they are cached by all sessions.
 	 */
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 1ae370ef2fa..18ed71e4688 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -51,6 +51,25 @@ __logmgr_config(
 	WT_CONNECTION_IMPL *conn;
 	bool enabled;
 
+	/*
+	 * A note on reconfiguration: the standard "is this configuration string
+	 * allowed" checks should fail if reconfiguration has invalid strings,
+	 * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
+	 * the connection reconfiguration method doesn't allow those strings.
+	 * Additionally, the base configuration values during reconfiguration
+	 * are the currently configured values (so we don't revert to default
+	 * values when repeatedly reconfiguring), and configuration processing
+	 * of a currently set value should not change the currently set value.
+	 *
+	 * In this code path, log server reconfiguration does not stop/restart
+	 * the log server, so there's no point in re-evaluating configuration
+	 * strings that cannot be reconfigured, risking bugs in configuration
+	 * setup, and depending on evaluation of currently set values to always
+	 * result in the currently set value. Skip tests for any configuration
+	 * strings which don't make sense during reconfiguration, but don't
+	 * worry about error reporting because it should never happen.
+	 */
+
 	conn = S2C(session);
 
 	WT_RET(__wt_config_gets(session, cfg, "log.enabled", &cval));
@@ -62,6 +81,8 @@ __logmgr_config(
 	 *
 	 * If it is off and the user it turning it on, or it is on
 	 * and the user is turning it off, return an error.
+	 *
+	 * See above: should never happen.
 	 */
 	if (reconfig &&
 	    ((enabled && !FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED)) ||
@@ -83,6 +104,8 @@ __logmgr_config(
 	 * Setup a log path and compression even if logging is disabled in case
 	 * we are going to print a log.  Only do this on creation.  Once a
 	 * compressor or log path are set they cannot be changed.
+	 *
+	 * See above: should never happen.
 	 */
 	if (!reconfig) {
 		conn->log_compressor = NULL;
@@ -95,6 +118,7 @@ __logmgr_config(
 		WT_RET(__wt_strndup(
 		    session, cval.str, cval.len, &conn->log_path));
 	}
+
 	/* We are done if logging isn't enabled. */
 	if (!*runp)
 		return (0);
@@ -103,13 +127,14 @@ __logmgr_config(
 	if (cval.val != 0)
 		FLD_SET(conn->log_flags, WT_CONN_LOG_ARCHIVE);
 
+	/*
+	 * The file size cannot be reconfigured. The amount of memory allocated
+	 * to the log slots may be based on the log file size at creation and we
+	 * don't want to re-allocate that memory while running.
+	 *
+	 * See above: should never happen.
+	 */
 	if (!reconfig) {
-		/*
-		 * Ignore if the user tries to change the file size.  The
-		 * amount of memory allocated to the log slots may be based
-		 * on the log file size at creation and we don't want to
-		 * re-allocate that memory while running.
-		 */
 		WT_RET(__wt_config_gets(session, cfg, "log.file_max", &cval));
 		conn->log_file_max = (wt_off_t)cval.val;
 		WT_STAT_FAST_CONN_SET(session,
@@ -125,12 +150,17 @@ __logmgr_config(
 		conn->log_prealloc = 1;
 
 	/*
-	 * Note that it is meaningless to reconfigure this value during
-	 * runtime.  It only matters on create before recovery runs.
+	 * Note it's meaningless to reconfigure this value during runtime, it
+	 * only matters on create before recovery runs.
+	 *
+	 * See above: should never happen.
 	 */
-	WT_RET(__wt_config_gets_def(session, cfg, "log.recover", 0, &cval));
-	if (cval.len != 0  && WT_STRING_MATCH("error", cval.str, cval.len))
-		FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
+	if (!reconfig) {
+		WT_RET(__wt_config_gets_def(
+		    session, cfg, "log.recover", 0, &cval));
+		if (WT_STRING_MATCH("error", cval.str, cval.len))
+			FLD_SET(conn->log_flags, WT_CONN_LOG_RECOVER_ERR);
+	}
 
 	WT_RET(__wt_config_gets(session, cfg, "log.zero_fill", &cval));
 	if (cval.val != 0) {
diff --git a/src/conn/conn_stat.c b/src/conn/conn_stat.c
index 855ff57808e..4e7cac59c4a 100644
--- a/src/conn/conn_stat.c
+++ b/src/conn/conn_stat.c
@@ -36,6 +36,31 @@ __stat_sources_free(WT_SESSION_IMPL *session, char ***sources)
 }
 
 /*
+ * __stat_config_discard --
+ *	Discard all statistics-log configuration.
+ */
+static int
+__stat_config_discard(WT_SESSION_IMPL *session)
+{
+	WT_CONNECTION_IMPL *conn;
+	WT_DECL_RET;
+
+	conn = S2C(session);
+
+	/*
+	 * Discard all statistics-log configuration information, called when
+	 * reconfiguring or destroying the statistics logging setup,
+	 */
+	__wt_free(session, conn->stat_format);
+	ret = __wt_fclose(session, &conn->stat_fs);
+	__wt_free(session, conn->stat_path);
+	__stat_sources_free(session, &conn->stat_sources);
+	conn->stat_stamp = NULL;
+	conn->stat_usecs = 0;
+	return (ret);
+}
+
+/*
  * __wt_conn_stat_init --
  *	Initialize the per-connection statistics.
  */
@@ -73,20 +98,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
 	WT_CONFIG objectconf;
 	WT_CONFIG_ITEM cval, k, v;
 	WT_CONNECTION_IMPL *conn;
+	WT_DECL_ITEM(tmp);
 	WT_DECL_RET;
 	int cnt;
 	char **sources;
 
+	/*
+	 * A note on reconfiguration: the standard "is this configuration string
+	 * allowed" checks should fail if reconfiguration has invalid strings,
+	 * for example, "log=(enabled)", or "statistics_log=(path=XXX)", because
+	 * the connection reconfiguration method doesn't allow those strings.
+	 * Additionally, the base configuration values during reconfiguration
+	 * are the currently configured values (so we don't revert to default
+	 * values when repeatedly reconfiguring), and configuration processing
+	 * of a currently set value should not change the currently set value.
+	 *
+	 * In this code path, a previous statistics log server reconfiguration
+	 * may have stopped the server (and we're about to restart it). Because
+	 * stopping the server discarded the configured information stored in
+	 * the connection structure, we have to re-evaluate all configuration
+	 * values, reconfiguration can't skip any of them.
+	 */
+
 	conn = S2C(session);
 	sources = NULL;
 
-	WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
 	/* Only start the server if wait time is non-zero */
+	WT_RET(__wt_config_gets(session, cfg, "statistics_log.wait", &cval));
 	*runp = cval.val != 0;
 	conn->stat_usecs = (uint64_t)cval.val * WT_MILLION;
 
-	WT_RET(__wt_config_gets(
-	    session, cfg, "statistics_log.json", &cval));
+	WT_RET(__wt_config_gets(session, cfg, "statistics_log.json", &cval));
 	if (cval.val != 0)
 		FLD_SET(conn->stat_flags, WT_CONN_STAT_JSON);
 
@@ -96,24 +138,30 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
 		FLD_SET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE);
 
 	/*
-	 * Statistics logging configuration requires either a wait time or an
-	 * on-close setting.
-	 */
-	if (!*runp && !FLD_ISSET(conn->stat_flags, WT_CONN_STAT_ON_CLOSE))
-		return (0);
-
-	/*
-	 * If any statistics logging is done, this must not be a read-only
-	 * connection.
+	 * We don't allow the log path to be reconfigured for security reasons.
+	 * (Applications passing input strings directly to reconfigure would
+	 * expose themselves to a potential security problem, the utility of
+	 * reconfiguring a statistics log path isn't worth the security risk.)
+	 *
+	 * See above for the details, but during reconfiguration we're loading
+	 * the path value from the saved configuration information, and it's
+	 * required during reconfiguration because we potentially stopped and
+	 * are restarting, the server.
 	 */
-	WT_RET(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
-	WT_RET(__wt_config_subinit(session, &objectconf, &cval));
+	WT_RET(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
+	WT_ERR(__wt_scr_alloc(session, 0, &tmp));
+	WT_ERR(__wt_buf_fmt(session,
+	    tmp, "%.*s/%s", (int)cval.len, cval.str, WT_STATLOG_FILENAME));
+	WT_ERR(__wt_filename(session, tmp->data, &conn->stat_path));
+
+	WT_ERR(__wt_config_gets(session, cfg, "statistics_log.sources", &cval));
+	WT_ERR(__wt_config_subinit(session, &objectconf, &cval));
 	for (cnt = 0; (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt)
 		;
-	WT_RET_NOTFOUND_OK(ret);
+	WT_ERR_NOTFOUND_OK(ret);
 	if (cnt != 0) {
-		WT_RET(__wt_calloc_def(session, cnt + 1, &sources));
-		WT_RET(__wt_config_subinit(session, &objectconf, &cval));
+		WT_ERR(__wt_calloc_def(session, cnt + 1, &sources));
+		WT_ERR(__wt_config_subinit(session, &objectconf, &cval));
 		for (cnt = 0;
 		    (ret = __wt_config_next(&objectconf, &k, &v)) == 0; ++cnt) {
 			/*
@@ -138,29 +186,37 @@ __statlog_config(WT_SESSION_IMPL *session, const char **cfg, bool *runp)
 		sources = NULL;
 	}
 
-	WT_ERR(__wt_config_gets(session, cfg, "statistics_log.path", &cval));
-	WT_ERR(__wt_nfilename(session, cval.str, cval.len, &conn->stat_path));
-
 	/*
 	 * When using JSON format, use the same timestamp format as MongoDB by
-	 * default.
+	 * default. This requires caution: the user might have set the timestamp
+	 * in a previous reconfigure call and we don't want to override that, so
+	 * compare the retrieved value with the default value to decide if we
+	 * should use the JSON default.
+	 *
+	 * (This still implies if the user explicitly sets the timestamp to the
+	 * default value, then sets the JSON flag in a separate reconfigure
+	 * call, or vice-versa, we will incorrectly switch to the JSON default
+	 * timestamp. But there's no way to detect that, and this is all a low
+	 * probability path.)
+	 *
+	 * !!!
+	 * Don't rewrite in the compressed "%FT%T.000Z" form, MSVC13 segfaults.
 	 */
-	if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON)) {
-		ret = __wt_config_gets(
-		    session, &cfg[1], "statistics_log.timestamp", &cval);
-		if (ret == WT_NOTFOUND)
-			WT_ERR(__wt_strdup(
-			    session, "%FT%T.000Z", &conn->stat_format));
-		WT_ERR_NOTFOUND_OK(ret);
-	}
-	if (conn->stat_format == NULL) {
-		WT_ERR(__wt_config_gets(
-		    session, cfg, "statistics_log.timestamp", &cval));
+#define	WT_TIMESTAMP_DEFAULT		"%b %d %H:%M:%S"
+#define	WT_TIMESTAMP_JSON_DEFAULT	"%Y-%m-%dT%H:%M:%S.000Z"
+	WT_ERR(__wt_config_gets(
+	    session, cfg, "statistics_log.timestamp", &cval));
+	if (FLD_ISSET(conn->stat_flags, WT_CONN_STAT_JSON) &&
+	    WT_STRING_MATCH(WT_TIMESTAMP_DEFAULT, cval.str, cval.len))
+		WT_ERR(__wt_strdup(
+		    session, WT_TIMESTAMP_JSON_DEFAULT, &conn->stat_format));
+	else
 		WT_ERR(__wt_strndup(
 		    session, cval.str, cval.len, &conn->stat_format));
-	}
 
 err:	__stat_sources_free(session, &sources);
+	__wt_scr_free(session, &tmp);
+
 	return (ret);
 }
 
@@ -373,7 +429,7 @@ __statlog_log_one(WT_SESSION_IMPL *session, WT_ITEM *path, WT_ITEM *tmp)
 		if (path != NULL)
 			(void)strcpy(path->mem, tmp->mem);
 		WT_RET(__wt_fopen(session, tmp->mem,
-		    WT_OPEN_CREATE | WT_OPEN_FIXED, WT_STREAM_APPEND,
+		    WT_FS_OPEN_CREATE | WT_FS_OPEN_FIXED, WT_STREAM_APPEND,
 		    &log_stream));
 	}
 	conn->stat_fs = log_stream;
@@ -538,14 +594,23 @@ __wt_statlog_create(WT_SESSION_IMPL *session, const char *cfg[])
 	bool start;
 
 	conn = S2C(session);
-	start = false;
 
 	/*
 	 * Stop any server that is already running. This means that each time
 	 * reconfigure is called we'll bounce the server even if there are no
-	 * configuration changes - but that makes our lives easier.
+	 * configuration changes. This makes our life easier as the underlying
+	 * configuration routine doesn't have to worry about freeing objects
+	 * in the connection structure (it's guaranteed to always start with a
+	 * blank slate), and we don't have to worry about races where a running
+	 * server is reading configuration information that we're updating, and
+	 * it's not expected that reconfiguration will happen a lot.
+	 *
+	 * If there's no server running, discard any configuration information
+	 * so we don't leak memory during reconfiguration.
 	 */
-	if (conn->stat_session != NULL)
+	if (conn->stat_session == NULL)
+		WT_RET(__stat_config_discard(session));
+	else
 		WT_RET(__wt_statlog_destroy(session, false));
 
 	WT_RET(__statlog_config(session, cfg, &start));
@@ -568,38 +633,28 @@ __wt_statlog_destroy(WT_SESSION_IMPL *session, bool is_close)
 
 	conn = S2C(session);
 
+	/* Stop the server thread. */
 	F_CLR(conn, WT_CONN_SERVER_STATISTICS);
 	if (conn->stat_tid_set) {
 		WT_TRET(__wt_cond_signal(session, conn->stat_cond));
 		WT_TRET(__wt_thread_join(session, conn->stat_tid));
 		conn->stat_tid_set = false;
 	}
+	WT_TRET(__wt_cond_destroy(session, &conn->stat_cond));
 
 	/* Log a set of statistics on shutdown if configured. */
 	if (is_close)
 		WT_TRET(__wt_statlog_log_one(session));
 
-	WT_TRET(__wt_cond_destroy(session, &conn->stat_cond));
-
-	__stat_sources_free(session, &conn->stat_sources);
-	__wt_free(session, conn->stat_path);
-	__wt_free(session, conn->stat_format);
+	/* Discard all configuration information. */
+	WT_TRET(__stat_config_discard(session));
 
 	/* Close the server thread's session. */
 	if (conn->stat_session != NULL) {
 		wt_session = &conn->stat_session->iface;
 		WT_TRET(wt_session->close(wt_session, NULL));
+		conn->stat_session = NULL;
 	}
 
-	/* Clear connection settings so reconfigure is reliable. */
-	conn->stat_session = NULL;
-	conn->stat_tid_set = false;
-	conn->stat_format = NULL;
-	WT_TRET(__wt_fclose(session, &conn->stat_fs));
-	conn->stat_path = NULL;
-	conn->stat_sources = NULL;
-	conn->stat_stamp = NULL;
-	conn->stat_usecs = 0;
-
 	return (ret);
 }
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 4ee23008687..63952169566 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -9,13 +9,12 @@
 #include "wt_internal.h"
 
 static int __backup_all(WT_SESSION_IMPL *);
-static int __backup_cleanup_handles(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
 static int __backup_list_append(
     WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *);
 static int __backup_list_uri_append(WT_SESSION_IMPL *, const char *, bool *);
 static int __backup_start(
     WT_SESSION_IMPL *, WT_CURSOR_BACKUP *, const char *[]);
-static int __backup_stop(WT_SESSION_IMPL *);
+static int __backup_stop(WT_SESSION_IMPL *, WT_CURSOR_BACKUP *);
 static int __backup_uri(WT_SESSION_IMPL *, const char *[], bool *, bool *);
 
 /*
@@ -76,20 +75,26 @@ __curbackup_close(WT_CURSOR *cursor)
 	WT_CURSOR_BACKUP *cb;
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
-	int tret;
 
 	cb = (WT_CURSOR_BACKUP *)cursor;
 
 	CURSOR_API_CALL(cursor, session, close, NULL);
 
-	WT_TRET(__backup_cleanup_handles(session, cb));
+	/*
+	 * When starting a hot backup, we serialize hot backup cursors and set
+	 * the connection's hot-backup flag. Once that's done, we set the
+	 * cursor's backup-locker flag, implying the cursor owns all necessary
+	 * cleanup (including removing temporary files), regardless of error or
+	 * success. The cursor's backup-locker flag is never cleared (it's just
+	 * discarded when the cursor is closed), because that cursor will never
+	 * not be responsible for cleanup.
+	 */
+	if (F_ISSET(cb, WT_CURBACKUP_LOCKER))
+		WT_TRET(__backup_stop(session, cb));
+
 	WT_TRET(__wt_cursor_close(cursor));
 	session->bkp_cursor = NULL;
 
-	WT_WITH_SCHEMA_LOCK(session, tret,
-	    tret = __backup_stop(session));		/* Stop the backup. */
-	WT_TRET(tret);
-
 err:	API_END_RET(session, ret);
 }
 
@@ -144,11 +149,11 @@ __wt_curbackup_open(WT_SESSION_IMPL *session,
 		ret = __backup_start(session, cb, cfg)));
 	WT_ERR(ret);
 
-	/* __wt_cursor_init is last so we don't have to clean up on error. */
 	WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
 
 	if (0) {
-err:		__wt_free(session, cb);
+err:		WT_TRET(__curbackup_close(cursor));
+		*cursorp = NULL;
 	}
 
 	return (ret);
@@ -226,6 +231,9 @@ __backup_start(
 	conn->hot_backup = true;
 	WT_ERR(__wt_writeunlock(session, conn->hot_backup_lock));
 
+	/* We're the lock holder, we own cleanup. */
+	F_SET(cb, WT_CURBACKUP_LOCKER);
+
 	/*
 	 * Create a temporary backup file.  This must be opened before
 	 * generating the list of targets in backup_uri.  This file will
@@ -235,7 +243,7 @@ __backup_start(
 	 * doesn't confuse restarting in the source database.
 	 */
 	WT_ERR(__wt_fopen(session, WT_BACKUP_TMP,
-	    WT_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
+	    WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &cb->bfs));
 	/*
 	 * If a list of targets was specified, work our way through them.
 	 * Else, generate a list of all database objects.
@@ -261,7 +269,7 @@ __backup_start(
 		 */
 		dest = WT_INCREMENTAL_BACKUP;
 		WT_ERR(__wt_fopen(session, WT_INCREMENTAL_SRC,
-		    WT_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
+		    WT_FS_OPEN_CREATE, WT_STREAM_WRITE, &srcfs));
 		WT_ERR(__backup_list_append(
 		    session, cb, WT_INCREMENTAL_BACKUP));
 	} else {
@@ -282,12 +290,9 @@ err:	/* Close the hot backup file. */
 	WT_TRET(__wt_fclose(session, &cb->bfs));
 	if (srcfs != NULL)
 		WT_TRET(__wt_fclose(session, &srcfs));
-	if (ret != 0) {
-		WT_TRET(__backup_cleanup_handles(session, cb));
-		WT_TRET(__backup_stop(session));
-	} else {
+	if (ret == 0) {
 		WT_ASSERT(session, dest != NULL);
-		WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest));
+		WT_TRET(__wt_fs_rename(session, WT_BACKUP_TMP, dest, false));
 	}
 
 	return (ret);
@@ -295,9 +300,7 @@ err:	/* Close the hot backup file. */
 
 /*
  * __backup_cleanup_handles --
- *	Release and free all btree handles held by the backup. This is kept
- *	separate from __backup_stop because it can be called without the
- *	schema lock held.
+ *	Release and free all btree handles held by the backup.
  */
 static int
 __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
@@ -325,15 +328,18 @@ __backup_cleanup_handles(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
  *	Stop a backup.
  */
 static int
-__backup_stop(WT_SESSION_IMPL *session)
+__backup_stop(WT_SESSION_IMPL *session, WT_CURSOR_BACKUP *cb)
 {
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 
 	conn = S2C(session);
 
+	/* Release all btree handles held by the backup. */
+	WT_TRET(__backup_cleanup_handles(session, cb));
+
 	/* Remove any backup specific file. */
-	ret = __wt_backup_file_remove(session);
+	WT_TRET(__wt_backup_file_remove(session));
 
 	/* Checkpoint deletion can proceed, as can the next hot backup. */
 	WT_TRET(__wt_writelock(session, conn->hot_backup_lock));
@@ -443,10 +449,10 @@ __wt_backup_file_remove(WT_SESSION_IMPL *session)
 	 * always know we were a source directory while there's any chance of
 	 * an incremental backup file existing.
 	 */
-	WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP));
-	WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP));
-	WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC));
-	WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP));
+	WT_TRET(__wt_remove_if_exists(session, WT_BACKUP_TMP, true));
+	WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_BACKUP, true));
+	WT_TRET(__wt_remove_if_exists(session, WT_INCREMENTAL_SRC, true));
+	WT_TRET(__wt_remove_if_exists(session, WT_METADATA_BACKUP, true));
 	return (ret);
 }
 
diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c
index e0d270e4245..2d3f3ffd176 100644
--- a/src/cursor/cur_config.c
+++ b/src/cursor/cur_config.c
@@ -58,11 +58,11 @@ __wt_curconfig_open(WT_SESSION_IMPL *session,
 	cursor->session = &session->iface;
 	cursor->key_format = cursor->value_format = "S";
 
-	/* __wt_cursor_init is last so we don't have to clean up on error. */
 	WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
 
 	if (0) {
-err:		__wt_free(session, cconfig);
+err:		WT_TRET(__curconfig_close(cursor));
+		*cursorp = NULL;
 	}
 	return (ret);
 }
diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c
index d2b8d81ab37..8d4b7a9384b 100644
--- a/src/cursor/cur_ds.c
+++ b/src/cursor/cur_ds.c
@@ -518,10 +518,7 @@ __wt_curds_open(
 	source->flags = 0;
 
 	if (0) {
-err:		if (F_ISSET(cursor, WT_CURSTD_OPEN))
-			WT_TRET(cursor->close(cursor));
-		else
-			__wt_free(session, data_source);
+err:		WT_TRET(__curds_close(cursor));
 		*cursorp = NULL;
 	}
 
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index 595915df7b7..d7f18bb61ac 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -401,13 +401,13 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
 		cursor->json_private = child->json_private = json;
 	}
 
-	/* __wt_cursor_init is last so we don't have to clean up on error. */
 	cfg[0] = WT_CONFIG_BASE(session, WT_SESSION_open_cursor);
 	cfg[1] = NULL;
 	WT_ERR(__wt_cursor_init(cursor, NULL, owner, cfg, cursorp));
 
 	if (0) {
-err:		__wt_free(session, cursor);
+err:		WT_TRET(__curdump_close(cursor));
+		*cursorp = NULL;
 	}
 	return (ret);
 }
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index fac903b4770..8e7bd4bbea5 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -388,11 +388,11 @@ err:	API_END_RET(session, ret);
 }
 
 /*
- * __wt_curfile_create --
+ * __curfile_create --
  *	Open a cursor for a given btree handle.
  */
-int
-__wt_curfile_create(WT_SESSION_IMPL *session,
+static int
+__curfile_create(WT_SESSION_IMPL *session,
     WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap,
     WT_CURSOR **cursorp)
 {
@@ -439,6 +439,13 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
 	cursor->value_format = btree->value_format;
 	cbt->btree = btree;
 
+	/*
+	 * Increment the data-source's in-use counter; done now because closing
+	 * the cursor will decrement it, and all failure paths from here close
+	 * the cursor.
+	 */
+	__wt_cursor_dhandle_incr_use(session);
+
 	if (session->dhandle->checkpoint != NULL)
 		F_SET(cbt, WT_CBT_NO_TXN);
 
@@ -478,7 +485,6 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
 	/* Underlying btree initialization. */
 	__wt_btcur_open(cbt);
 
-	/* __wt_cursor_init is last so we don't have to clean up on error. */
 	WT_ERR(__wt_cursor_init(
 	    cursor, cursor->internal_uri, owner, cfg, cursorp));
 
@@ -486,7 +492,8 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
 	WT_STAT_FAST_DATA_INCR(session, cursor_create);
 
 	if (0) {
-err:		__wt_free(session, cbt);
+err:		WT_TRET(__curfile_close(cursor));
+		*cursorp = NULL;
 	}
 
 	return (ret);
@@ -503,9 +510,10 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
 	WT_CONFIG_ITEM cval;
 	WT_DECL_RET;
 	uint32_t flags;
-	bool bitmap, bulk;
+	bool bitmap, bulk, checkpoint_wait;
 
 	bitmap = bulk = false;
+	checkpoint_wait = true;
 	flags = 0;
 
 	/*
@@ -531,6 +539,12 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
 		else if (!WT_STRING_MATCH("unordered", cval.str, cval.len))
 			WT_RET_MSG(session, EINVAL,
 			    "Value for 'bulk' must be a boolean or 'bitmap'");
+
+		if (bulk) {
+			WT_RET(__wt_config_gets(session,
+			    cfg, "checkpoint_wait", &cval));
+			checkpoint_wait = cval.val != 0;
+		}
 	}
 
 	/* Bulk handles require exclusive access. */
@@ -540,11 +554,11 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
 	/* Get the handle and lock it while the cursor is using it. */
 	if (WT_PREFIX_MATCH(uri, "file:")) {
 		/*
-		 * If we are opening exclusive, get the handle while holding
-		 * the checkpoint lock.  This prevents a bulk cursor open
-		 * failing with EBUSY due to a database-wide checkpoint.
+		 * If we are opening exclusive and don't want a bulk cursor
+		 * open to fail with EBUSY due to a database-wide checkpoint,
+		 * get the handle while holding the checkpoint lock.
 		 */
-		if (LF_ISSET(WT_DHANDLE_EXCLUSIVE))
+		if (LF_ISSET(WT_DHANDLE_EXCLUSIVE) && checkpoint_wait)
 			WT_WITH_CHECKPOINT_LOCK(session, ret,
 			    ret = __wt_session_get_btree_ckpt(
 			    session, uri, cfg, flags));
@@ -555,10 +569,8 @@ __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri,
 	} else
 		WT_RET(__wt_bad_object_type(session, uri));
 
-	WT_ERR(__wt_curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
+	WT_ERR(__curfile_create(session, owner, cfg, bulk, bitmap, cursorp));
 
-	/* Increment the data-source's in-use counter. */
-	__wt_cursor_dhandle_incr_use(session);
 	return (0);
 
 err:	/* If the cursor could not be opened, release the handle. */
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c
index 6de68d86778..82a27d65ce6 100644
--- a/src/cursor/cur_index.c
+++ b/src/cursor/cur_index.c
@@ -263,19 +263,57 @@ err:		F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
 static int
 __curindex_search_near(WT_CURSOR *cursor, int *exact)
 {
+	WT_CURSOR *child;
 	WT_CURSOR_INDEX *cindex;
 	WT_DECL_RET;
+	WT_ITEM found_key;
 	WT_SESSION_IMPL *session;
+	int cmp;
 
 	cindex = (WT_CURSOR_INDEX *)cursor;
-	JOINABLE_CURSOR_API_CALL(cursor, session, search_near, NULL);
-	__wt_cursor_set_raw_key(cindex->child, &cursor->key);
-	if ((ret = cindex->child->search_near(cindex->child, exact)) == 0)
-		ret = __curindex_move(cindex);
-	else
-		F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+	child = cindex->child;
+	JOINABLE_CURSOR_API_CALL(cursor, session, search, NULL);
 
-err:	API_END_RET(session, ret);
+	/*
+	 * We are searching using the application-specified key, which
+	 * (usually) doesn't contain the primary key, so it is just a prefix of
+	 * any matching index key.  That said, if there is an exact match, we
+	 * want to find the first matching index entry and set exact equal to
+	 * zero. Do a search_near, step to the next entry if we land on one
+	 * that is too small, then check that the prefix matches.
+	 */
+	__wt_cursor_set_raw_key(child, &cursor->key);
+	WT_ERR(child->search_near(child, &cmp));
+
+	if (cmp < 0)
+		WT_ERR(child->next(child));
+
+	/*
+	 * We expect partial matches, and want the smallest record with a key
+	 * greater than or equal to the search key.
+	 *
+	 * If the key we find is shorter than the search key, it can't possibly
+	 * match.
+	 *
+	 * The only way for the key to be exactly equal is if there is an index
+	 * on the primary key, because otherwise the primary key columns will
+	 * be appended to the index key, but we don't disallow that (odd) case.
+	 */
+	found_key = child->key;
+	if (found_key.size < cursor->key.size)
+		WT_ERR(WT_NOTFOUND);
+	found_key.size = cursor->key.size;
+
+	WT_ERR(__wt_compare(
+	    session, cindex->index->collator, &cursor->key, &found_key, exact));
+
+	WT_ERR(__curindex_move(cindex));
+
+	if (0) {
+err:		F_CLR(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
+	}
+
+	API_END_RET(session, ret);
 }
 
 /*
diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c
index 0a13803da5d..2adf0c2b8ab 100644
--- a/src/cursor/cur_log.c
+++ b/src/cursor/cur_log.c
@@ -315,16 +315,16 @@ __curlog_close(WT_CURSOR *cursor)
 	WT_CONNECTION_IMPL *conn;
 	WT_CURSOR_LOG *cl;
 	WT_DECL_RET;
-	WT_LOG *log;
 	WT_SESSION_IMPL *session;
 
 	CURSOR_API_CALL(cursor, session, close, NULL);
 	cl = (WT_CURSOR_LOG *)cursor;
 	conn = S2C(session);
+
 	WT_ASSERT(session, FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED));
-	log = conn->log;
-	WT_TRET(__wt_readunlock(session, log->log_archive_lock));
-	WT_TRET(__curlog_reset(cursor));
+	if (F_ISSET(cl, WT_CURLOG_ARCHIVE_LOCK))
+		WT_TRET(__wt_readunlock(session, conn->log->log_archive_lock));
+
 	__wt_free(session, cl->cur_lsn);
 	__wt_free(session, cl->next_lsn);
 	__wt_scr_free(session, &cl->logrec);
@@ -332,6 +332,7 @@ __curlog_close(WT_CURSOR *cursor)
 	__wt_scr_free(session, &cl->opvalue);
 	__wt_free(session, cl->packed_key);
 	__wt_free(session, cl->packed_value);
+
 	WT_TRET(__wt_cursor_close(cursor));
 
 err:	API_END_RET(session, ret);
@@ -401,23 +402,10 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
 
 	/* Log cursors block archiving. */
 	WT_ERR(__wt_readlock(session, log->log_archive_lock));
+	F_SET(cl, WT_CURLOG_ARCHIVE_LOCK);
 
 	if (0) {
-err:		if (F_ISSET(cursor, WT_CURSTD_OPEN))
-			WT_TRET(cursor->close(cursor));
-		else {
-			__wt_free(session, cl->cur_lsn);
-			__wt_free(session, cl->next_lsn);
-			__wt_scr_free(session, &cl->logrec);
-			__wt_scr_free(session, &cl->opkey);
-			__wt_scr_free(session, &cl->opvalue);
-			/*
-			 * NOTE:  We cannot get on the error path with the
-			 * readlock held.  No need to unlock it unless that
-			 * changes above.
-			 */
-			__wt_free(session, cl);
-		}
+err:		WT_TRET(__curlog_close(cursor));
 		*cursorp = NULL;
 	}
 
diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c
index 3d702e2ea8c..fc63ca13f7c 100644
--- a/src/cursor/cur_metadata.c
+++ b/src/cursor/cur_metadata.c
@@ -475,9 +475,11 @@ __curmetadata_close(WT_CURSOR *cursor)
 	mdc = (WT_CURSOR_METADATA *)cursor;
 	file_cursor = mdc->file_cursor;
 	CURSOR_API_CALL(cursor, session,
-	    close, ((WT_CURSOR_BTREE *)file_cursor)->btree);
+	    close, file_cursor == NULL ?
+	    NULL : ((WT_CURSOR_BTREE *)file_cursor)->btree);
 
-	ret = file_cursor->close(file_cursor);
+	if (file_cursor != NULL)
+		ret = file_cursor->close(file_cursor);
 	WT_TRET(__wt_cursor_close(cursor));
 
 err:	API_END_RET(session, ret);
@@ -552,9 +554,8 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
 	}
 
 	if (0) {
-err:		if (mdc->file_cursor != NULL)
-			WT_TRET(mdc->file_cursor->close(mdc->file_cursor));
-		__wt_free(session, mdc);
+err:		WT_TRET(__curmetadata_close(cursor));
+		*cursorp = NULL;
 	}
 	return (ret);
 }
diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c
index f7a8f5fc866..5c9159a4c0b 100644
--- a/src/cursor/cur_stat.c
+++ b/src/cursor/cur_stat.c
@@ -37,22 +37,6 @@ __curstat_print_value(WT_SESSION_IMPL *session, uint64_t v, WT_ITEM *buf)
 }
 
 /*
- * __curstat_free_config --
- *	Free the saved configuration string stack
- */
-static void
-__curstat_free_config(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
-{
-	size_t i;
-
-	if (cst->cfg != NULL) {
-		for (i = 0; cst->cfg[i] != NULL; ++i)
-			__wt_free(session, cst->cfg[i]);
-		__wt_free(session, cst->cfg);
-	}
-}
-
-/*
  * __curstat_get_key --
  *	WT_CURSOR->get_key for statistics cursors.
  */
@@ -334,11 +318,16 @@ __curstat_close(WT_CURSOR *cursor)
 	WT_CURSOR_STAT *cst;
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
+	size_t i;
 
 	cst = (WT_CURSOR_STAT *)cursor;
 	CURSOR_API_CALL(cursor, session, close, NULL);
 
-	__curstat_free_config(session, cst);
+	if (cst->cfg != NULL) {
+		for (i = 0; cst->cfg[i] != NULL; ++i)
+			__wt_free(session, cst->cfg[i]);
+		__wt_free(session, cst->cfg);
+	}
 
 	__wt_buf_free(session, &cst->pv);
 	__wt_free(session, cst->desc_buf);
@@ -691,7 +680,6 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
 	/* The cursor isn't yet positioned. */
 	cst->notpositioned = true;
 
-	/* __wt_cursor_init is last so we don't have to clean up on error. */
 	WT_ERR(__wt_cursor_init(cursor, uri, NULL, cfg, cursorp));
 
 	if (0) {
@@ -701,8 +689,8 @@ config_err:	WT_ERR_MSG(session, EINVAL,
 	}
 
 	if (0) {
-err:		__curstat_free_config(session, cst);
-		__wt_free(session, cst);
+err:		WT_TRET(__curstat_close(cursor));
+		*cursorp = NULL;
 	}
 
 	return (ret);
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 6d50523043a..a14b40a1150 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -525,15 +525,20 @@ __curtable_insert(WT_CURSOR *cursor)
 	}
 
 	/*
-	 * WT_CURSOR.insert doesn't leave the cursor positioned, and the
-	 * application may want to free the memory used to configure the
-	 * insert; don't read that memory again (matching the underlying
-	 * file object cursor insert semantics).
+	 * Insert is the one cursor operation that doesn't end with the cursor
+	 * pointing to an on-page item (except for column-store appends, where
+	 * we are returning a key). That is, the application's cursor continues
+	 * to reference the application's memory after a successful cursor call,
+	 * which isn't true anywhere else. We don't want to have to explain that
+	 * scoping corner case, so we reset the application's cursor so it can
+	 * free the referenced memory and continue on without risking subsequent
+	 * core dumps.
 	 */
 	F_CLR(primary, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
+	if (F_ISSET(primary, WT_CURSTD_APPEND))
+		F_SET(primary, WT_CURSTD_KEY_INT);
 
 err:	CURSOR_UPDATE_API_END(session, ret);
-
 	return (ret);
 }
 
diff --git a/src/docs/security.dox b/src/docs/security.dox
index 331f74d969b..82e13ae7ad3 100644
--- a/src/docs/security.dox
+++ b/src/docs/security.dox
@@ -2,10 +2,23 @@
 
 @section directory_permissions Database directory permissions
 
-All WiredTiger files are stored in the database home directory, and the
-WiredTiger database directory should have its permissions set to ensure
-database objects are not accessible to users without appropriate
-permissions.  See @ref home for more information.
+By default, WiredTiger files are stored beneath the database home directory.
+The WiredTiger database directory should have its permissions set to ensure
+database objects are not accessible to users without appropriate permissions.
+See @ref home for more information.
+
+@section absolute_path Absolute paths
+
+WiredTiger prepends the name of the database home to file names which
+do not appear to be absolute paths. (The absolute path test is
+simplistic, matching a leading slash character on POSIX systems or a
+leading alphabetic character and colon on Windows.) No file path
+sanitization or validation is done by WiredTiger, for example, file
+paths may match universal naming conventions (UNC), or include \c "../"
+(dot dot slash) components.
+
+Applications are responsible for validating user-supplied file paths as
+necessary to prevent directory traversal attacks.
 
 @section file_permissions File permissions
 
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 96fe04d7426..e08eb7d1447 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -80,6 +80,7 @@ SCons
 Seward's
 SiH
 TXT
+UNC
 URIs
 WILLNEED
 WiredTiger
@@ -368,6 +369,7 @@ php
 png
 posix
 pre
+prepends
 primary's
 printf
 printlog
@@ -411,6 +413,7 @@ runtime
 rwlock
 sHQ
 sHq
+sanitization
 scalable
 scanf
 schemas
diff --git a/src/docs/statistics.dox b/src/docs/statistics.dox
index 0a29e351e4e..36ce2711dc5 100644
--- a/src/docs/statistics.dox
+++ b/src/docs/statistics.dox
@@ -90,11 +90,20 @@ cursor.
 
 @section statistics_log Statistics logging
 
-WiredTiger will optionally log database statistics into a file when the
+WiredTiger will optionally log database statistics into files when the
 the ::wiredtiger_open \c statistics_log configuration is set.
 
-The resulting statistics can be displayed using the \c wtstats visualization
-tool.  For more information, see @ref_single wtstats.
+The log files are named \c WiredTiger.%%d.%%H, where \c %%d is replaced
+with the day of the month as a decimal number (01-31), and \c %%H
+is replaced by the hour (24-hour clock) as a decimal number (00-23).
+Each log file contains the statistics for the hour specified in its name.
+
+The location of the log files may be changed with the \c statistics_log.path
+configuration string.
+
+The resulting statistics can be displayed and interactively examined
+using the \c wtstats visualization tool.  For more information, see
+@ref_single wtstats.
 
 The following example logs statistics every 30 seconds:
 
@@ -120,7 +129,7 @@ Statistics for all underlying data sources of a particular type may be
 included by adding a partial data source URI to the \c statistics_log
 configuration string:
 
-@snippet ex_all.c Statistics logging with all tables
+@snippet ex_all.c Statistics logging with a source type
 
 When database statistics are logged, the database home will be the first
 space-separated entry for each record in the log file.  For example:
@@ -151,23 +160,9 @@ currently open in the database, nor will any statistics requiring the
 traversal of a tree (as if the \c statistics_fast configuration string
 were set).
 
-The location of the log files may be changed with the \c statistics_log.path
-configuration string.  The \c path value value may contain ISO C90 standard
-strftime conversion specifications.  WiredTiger will not create non-existent
-directories in the path, they must exist before ::wiredtiger_open is called.
-
-The following example logs statistics into files named with the month,
-day and year:
-
-@snippet ex_all.c Statistics logging with path
-
 A Python script that parses the default logging output and uses the
 <a href="http://www.gnuplot.info/">gnuplot</a>, utility to generate
 Portable Network Graphics (PNG) format graphs is included in the
 WiredTiger distribution in the file \c tools/statlog.py.
 
-@m_if{c}
-To interactively examine statistics results, see @ref wtstats.
-@m_endif
-
 */
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index 5e824fee977..9d3d2239bb4 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,5 +1,48 @@
 /*! @page upgrading Upgrading WiredTiger applications
 
+@section version_281 Upgrading to Version 2.8.1
+<dl>
+<dt>Cache management defaults</dt>
+<dd>
+The default values for the \c eviction_dirty_target and \c
+eviction_dirty_trigger settings to ::wiredtiger_open have changed to 5 and 20,
+respectively.  This means that by default, WiredTiger will start writing dirty
+pages from cache when it becomes 5% dirty and will throttle activity to keep
+the volume of dirty data in cache under 20%.  For write-heavy workloads, the
+new defaults may result in lower throughput and more threads writing to data
+files concurrently.
+
+These settings also now determine how much work is done at the beginning of a
+checkpoint to make the critical section of checkpoints complete more quickly.
+</dd>
+
+<dt>Checkpoint server created checkpoint names</dt>
+<dd>
+The ::wiredtiger_open checkpoint configuration no longer supports the
+\c name configuration, and checkpoint server created checkpoints will
+always be named the default WiredTiger checkpoint name,
+"WiredTigerCheckpoint". Applications depending on the ability to set the
+checkpoint name for the checkpoint server will require modification.
+</dd>
+
+<dt>Statistics logging path</dt>
+<dd>
+The ::wiredtiger_open statistics logging path configuration has been
+simplified to be only a path to a directory, and the file name component
+of the path may no longer be specified. Applications depending on the
+ability to set statistics log file names will require modification.
+</dd>
+
+<dt>Deprecated statistics field</dt>
+<dd>
+The connection statistic \c WT_STAT_CONN_CACHE_BYTES_OVERFLOW has been
+removed.  Overflow information is now available in the
+\c WT_STAT_CONN_CACHE_BYTES_OVERFLOW and \c WT_STAT_CONN_CACHE_OVERFLOW_VALUE.
+Applications specifically looking for that statistic will require
+modification.
+</dd>
+
+</dl><hr>
 @section version_280 Upgrading to Version 2.8.0
 <dl>
 <dt>LSM metadata</dt>
@@ -55,7 +98,6 @@ The WiredTiger public API used to define a structure that could encapsulate
 log sequence numbers. That structure is no longer exposed publicly.
 </dd>
 
-<dt>
 </dl><hr>
 @section version_270 Upgrading to Version 2.7.0
 
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index e06272d117c..17b95660f79 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -210,6 +210,8 @@ if non zero choose a value from within this range as the key for
 insert operations
 @par random_value (boolean, default=false)
 generate random content for the value
+@par range_partition (boolean, default=false)
+partition data by range (vs hash)
 @par read_range (unsigned int, default=0)
 scan a range of keys after each search
 @par readonly (boolean, default=false)
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index 8ea487bbf83..7d3fd838dcd 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -17,7 +17,7 @@ static int  __evict_page(WT_SESSION_IMPL *, bool);
 static int  __evict_pass(WT_SESSION_IMPL *);
 static int  __evict_server(WT_SESSION_IMPL *, bool *);
 static int  __evict_walk(WT_SESSION_IMPL *, uint32_t);
-static int  __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int *);
+static int  __evict_walk_file(WT_SESSION_IMPL *, uint32_t, u_int, u_int *);
 
 /*
  * __evict_read_gen --
@@ -31,11 +31,6 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
 	uint64_t read_gen;
 
 	btree = entry->btree;
-
-	/* Never prioritize empty slots. */
-	if (entry->ref == NULL)
-		return (UINT64_MAX);
-
 	page = entry->ref->page;
 
 	/* Any page set to the oldest generation should be discarded. */
@@ -70,14 +65,15 @@ __evict_read_gen(const WT_EVICT_ENTRY *entry)
  *	Qsort function: sort the eviction array.
  */
 static int WT_CDECL
-__evict_lru_cmp(const void *a, const void *b)
+__evict_lru_cmp(const void *a_arg, const void *b_arg)
 {
-	uint64_t a_lru, b_lru;
+	const WT_EVICT_ENTRY *a = a_arg, *b = b_arg;
+	uint64_t a_score, b_score;
 
-	a_lru = __evict_read_gen(a);
-	b_lru = __evict_read_gen(b);
+	a_score = (a->ref == NULL ? UINT64_MAX : a->score);
+	b_score = (b->ref == NULL ? UINT64_MAX : b->score);
 
-	return ((a_lru < b_lru) ? -1 : (a_lru == b_lru) ? 0 : 1);
+	return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
 }
 
 /*
@@ -542,54 +538,49 @@ __evict_update_work(WT_SESSION_IMPL *session)
 	cache->evict_max_refs_per_file =
 	    WT_MAX(100, WT_MILLION / (conn->open_file_count + 1));
 
+	if (cache->evict_queues[WT_EVICT_URGENT_QUEUE].evict_current != NULL)
+		FLD_SET(cache->state, WT_EVICT_STATE_URGENT);
+
 	/*
-	 * Page eviction overrides the dirty target and other types of eviction,
-	 * that is, we don't care where we are with respect to the dirty target
-	 * if page eviction is configured.
+	 * If we need space in the cache, try to find clean pages to evict.
 	 *
 	 * Avoid division by zero if the cache size has not yet been set in a
 	 * shared cache.
 	 */
 	bytes_max = conn->cache_size + 1;
 	bytes_inuse = __wt_cache_bytes_inuse(cache);
-	if (bytes_inuse > (cache->eviction_target * bytes_max) / 100) {
-		FLD_SET(cache->state, WT_EVICT_PASS_ALL);
-		goto done;
-	}
+	if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
+		FLD_SET(cache->state, WT_EVICT_STATE_CLEAN);
 
 	/*
-	 * If the cache has been stuck and is now under control, clear the
-	 * stuck flag.
+	 * Scrub dirty pages and keep them in cache if we are less than half
+	 * way between the cache target and trigger.
 	 */
-	if (bytes_inuse < bytes_max)
-		F_CLR(cache, WT_CACHE_STUCK);
+	if (bytes_inuse < ((cache->eviction_target + cache->eviction_trigger) *
+	    bytes_max) / 200)
+		FLD_SET(cache->state, WT_EVICT_STATE_SCRUB);
 
-	dirty_inuse = __wt_cache_dirty_inuse(cache);
-	if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100) {
-		FLD_SET(cache->state, WT_EVICT_PASS_DIRTY);
-		goto done;
-	}
+	dirty_inuse = __wt_cache_dirty_leaf_inuse(cache);
+	if (dirty_inuse > (cache->eviction_dirty_target * bytes_max) / 100)
+		FLD_SET(cache->state, WT_EVICT_STATE_DIRTY);
 
 	/*
-	 * Evict pages with oldest generation (which would otherwise block
-	 * application threads), set regardless of whether we have reached
-	 * the eviction trigger.
+	 * If the cache has been stuck and is now under control, clear the
+	 * stuck flag.
 	 */
-	if (F_ISSET(cache, WT_CACHE_WOULD_BLOCK)) {
-		FLD_SET(cache->state, WT_EVICT_PASS_WOULD_BLOCK);
-
-		F_CLR(cache, WT_CACHE_WOULD_BLOCK);
-		goto done;
-	}
-
-	return (false);
+	if (bytes_inuse < bytes_max &&
+	    dirty_inuse < (cache->eviction_dirty_trigger * bytes_max) / 100)
+		F_CLR(cache, WT_CACHE_STUCK);
 
-done:	if (F_ISSET(cache, WT_CACHE_STUCK)) {
+	if (F_ISSET(cache, WT_CACHE_STUCK)) {
+		WT_ASSERT(session, cache->state != 0);
 		WT_STAT_FAST_CONN_SET(session,
 		    cache_eviction_aggressive_set, 1);
-		FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE);
+		FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE);
 	}
-	return (true);
+
+	return (FLD_ISSET(cache->state,
+	    WT_EVICT_STATE_ALL | WT_EVICT_STATE_URGENT));
 }
 
 /*
@@ -603,7 +594,7 @@ __evict_pass(WT_SESSION_IMPL *session)
 	WT_CONNECTION_IMPL *conn;
 	WT_EVICT_WORKER *worker;
 	uint64_t pages_evicted;
-	int loop;
+	u_int loop;
 
 	conn = S2C(session);
 	cache = conn->cache;
@@ -647,15 +638,14 @@ __evict_pass(WT_SESSION_IMPL *session)
 		if (loop > 10) {
 			WT_STAT_FAST_CONN_SET(session,
 			    cache_eviction_aggressive_set, 1);
-			FLD_SET(cache->state, WT_EVICT_PASS_AGGRESSIVE);
+			FLD_SET(cache->state, WT_EVICT_STATE_AGGRESSIVE);
 		}
 
 		/*
 		 * Start a worker if we have capacity and we haven't reached
 		 * the eviction targets.
 		 */
-		if (FLD_ISSET(cache->state, WT_EVICT_PASS_ALL |
-		    WT_EVICT_PASS_DIRTY | WT_EVICT_PASS_WOULD_BLOCK) &&
+		if (FLD_ISSET(cache->state, WT_EVICT_STATE_ALL) &&
 		    conn->evict_workers < conn->evict_workers_max) {
 			WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
 			    "Starting evict worker: %"PRIu32"\n",
@@ -671,7 +661,8 @@ __evict_pass(WT_SESSION_IMPL *session)
 		WT_RET(__wt_verbose(session, WT_VERB_EVICTSERVER,
 		    "Eviction pass with: Max: %" PRIu64
 		    " In use: %" PRIu64 " Dirty: %" PRIu64,
-		    conn->cache_size, cache->bytes_inmem, cache->bytes_dirty));
+		    conn->cache_size, cache->bytes_inmem,
+		    cache->bytes_dirty_intl + cache->bytes_dirty_leaf));
 
 		WT_RET(__evict_lru_walk(session));
 		WT_RET_NOTFOUND_OK(__evict_lru_pages(session, true));
@@ -682,29 +673,32 @@ __evict_pass(WT_SESSION_IMPL *session)
 		 * sleep, it's not something we can fix.
 		 */
 		if (pages_evicted == cache->pages_evict) {
-			WT_STAT_FAST_CONN_INCR(session,
-			    cache_eviction_server_slept);
 			/*
 			 * Back off if we aren't making progress: walks hold
-			 * the handle list lock, which blocks other operations
-			 * that can free space in cache, such as LSM discarding
+			 * the handle list lock, blocking other operations that
+			 * can free space in cache, such as LSM discarding
 			 * handles.
+			 *
+			 * Allow this wait to be interrupted (e.g. if a
+			 * checkpoint completes): make sure we wait for a
+			 * non-zero number of microseconds).
 			 */
-			__wt_sleep(0, WT_THOUSAND * (uint64_t)loop);
+			WT_STAT_FAST_CONN_INCR(session,
+			    cache_eviction_server_slept);
+			WT_RET(__wt_cond_wait(session,
+			    cache->evict_cond, WT_THOUSAND * WT_MAX(loop, 1)));
+
 			if (loop == 100) {
 				/*
 				 * Mark the cache as stuck if we need space
 				 * and aren't evicting any pages.
 				 */
-				if (!FLD_ISSET(cache->state,
-				    WT_EVICT_PASS_WOULD_BLOCK)) {
-					F_SET(cache, WT_CACHE_STUCK);
-					WT_STAT_FAST_CONN_INCR(
-					    session, cache_eviction_slow);
-					WT_RET(__wt_verbose(
-					    session, WT_VERB_EVICTSERVER,
-					    "unable to reach eviction goal"));
-				}
+				F_SET(cache, WT_CACHE_STUCK);
+				WT_STAT_FAST_CONN_INCR(
+				    session, cache_eviction_slow);
+				WT_RET(__wt_verbose(
+				    session, WT_VERB_EVICTSERVER,
+				    "unable to reach eviction goal"));
 				break;
 			}
 		} else {
@@ -927,26 +921,29 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
 {
 	WT_CACHE *cache;
 	WT_DECL_RET;
-	WT_EVICT_QUEUE *evict_queue;
-	uint64_t cutoff, read_gen_oldest;
+	WT_EVICT_QUEUE *queue;
+	uint64_t read_gen_oldest;
 	uint32_t candidates, entries, queue_index;
 
 	cache = S2C(session)->cache;
 
-	queue_index = cache->evict_queue_fill++ % WT_EVICT_QUEUE_MAX;
-	evict_queue = &cache->evict_queues[queue_index];
+	/* Fill the next queue (that isn't the urgent queue). */
+	queue_index =
+	    1 + (cache->evict_queue_fill++ % (WT_EVICT_QUEUE_MAX - 1));
+	queue = &cache->evict_queues[queue_index];
+
 	/* Get some more pages to consider for eviction. */
 	if ((ret = __evict_walk(cache->walk_session, queue_index)) != 0)
 		return (ret == EBUSY ? 0 : ret);
 
 	/* Sort the list into LRU order and restart. */
-	__wt_spin_lock(session, &evict_queue->evict_lock);
+	__wt_spin_lock(session, &queue->evict_lock);
 
-	entries = evict_queue->evict_entries;
-	qsort(evict_queue->evict_queue,
+	entries = queue->evict_entries;
+	qsort(queue->evict_queue,
 	    entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
 
-	while (entries > 0 && evict_queue->evict_queue[entries - 1].ref == NULL)
+	while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL)
 		--entries;
 
 	/*
@@ -956,9 +953,9 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
 	 */
 	while (entries > WT_EVICT_WALK_BASE)
 		__evict_list_clear(session,
-		    &evict_queue->evict_queue[--entries]);
+		    &queue->evict_queue[--entries]);
 
-	evict_queue->evict_entries = entries;
+	queue->evict_entries = entries;
 
 	if (entries == 0) {
 		/*
@@ -966,23 +963,19 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
 		 * Make sure application threads don't read past the end of the
 		 * candidate list, or they may race with the next walk.
 		 */
-		evict_queue->evict_candidates = 0;
-		__wt_spin_unlock(session, &evict_queue->evict_lock);
-		__wt_spin_lock(session, &cache->evict_queue_lock);
-		cache->evict_current = NULL;
-		cache->evict_current_queue = NULL;
-		__wt_spin_unlock(session, &cache->evict_queue_lock);
+		queue->evict_candidates = 0;
+		queue->evict_current = NULL;
+		__wt_spin_unlock(session, &queue->evict_lock);
 		return (0);
 	}
 
 	/* Decide how many of the candidates we're going to try and evict. */
-	if (FLD_ISSET(cache->state,
-	    WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
+	if (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) {
 		/*
 		 * Take all candidates if we only gathered pages with an oldest
 		 * read generation set.
 		 */
-		evict_queue->evict_candidates = entries;
+		queue->evict_candidates = entries;
 	} else {
 		/*
 		 * Find the oldest read generation we have in the queue, used
@@ -992,8 +985,7 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
 		read_gen_oldest = WT_READGEN_OLDEST;
 		for (candidates = 0; candidates < entries; ++candidates) {
 			read_gen_oldest =
-			    __evict_read_gen(
-			    &evict_queue->evict_queue[candidates]);
+			    queue->evict_queue[candidates].score;
 			if (read_gen_oldest != WT_READGEN_OLDEST)
 				break;
 		}
@@ -1002,51 +994,45 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
 		 * Take all candidates if we only gathered pages with an oldest
 		 * read generation set.
 		 *
-		 * We normally never take more than 50% of the entries; if 50%
-		 * of the entries were at the oldest read generation, take them.
+		 * We normally never take more than 50% of the entries but if
+		 * 50% of the entries were at the oldest read generation, take
+		 * all of them.
 		 */
 		if (read_gen_oldest == WT_READGEN_OLDEST)
-			evict_queue->evict_candidates = entries;
-		else if (candidates >= entries / 2)
-			evict_queue->evict_candidates = candidates;
+			queue->evict_candidates = entries;
+		else if (candidates > entries / 2)
+			queue->evict_candidates = candidates;
 		else {
-			/* Save the calculated oldest generation. */
-			cache->read_gen_oldest = read_gen_oldest;
-
-			/* Find the bottom 25% of read generations. */
-			cutoff =
-			    (3 * read_gen_oldest + __evict_read_gen(
-			    &evict_queue->evict_queue[entries - 1])) / 4;
-
 			/*
-			 * Don't take less than 10% or more than 50% of entries,
-			 * regardless. That said, if there is only one entry,
-			 * which is normal when populating an empty file, don't
-			 * exclude it.
+			 * Take all of the urgent pages plus a third of
+			 * ordinary candidates (which could be expressed as
+			 * WT_EVICT_WALK_INCR / WT_EVICT_WALK_BASE).  In the
+			 * steady state, we want to get as many candidates as
+			 * the eviction walk adds to the queue.
+			 *
+			 * That said, if there is only one entry, which is
+			 * normal when populating an empty file, don't exclude
+			 * it.
 			 */
-			for (candidates = 1 + entries / 10;
-			    candidates < entries / 2;
-			    candidates++)
-				if (__evict_read_gen(
-				    &evict_queue->evict_queue[candidates]) >
-				    cutoff)
-					break;
-			evict_queue->evict_candidates = candidates;
+			queue->evict_candidates =
+			    1 + candidates + ((entries - candidates) - 1) / 3;
+			cache->read_gen_oldest = read_gen_oldest;
 		}
 	}
 
-	__wt_spin_unlock(session, &evict_queue->evict_lock);
+	queue->evict_current = queue->evict_queue;
+	__wt_spin_unlock(session, &queue->evict_lock);
+
 	/*
 	 * Now we can set the next queue.
 	 */
 	__wt_spin_lock(session, &cache->evict_queue_lock);
-	if (cache->evict_current == NULL)
+	if (cache->evict_current_queue->evict_current == NULL)
 		WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_empty);
 	else
 		WT_STAT_FAST_CONN_INCR(session, cache_eviction_queue_not_empty);
 
-	cache->evict_current = evict_queue->evict_queue;
-	cache->evict_current_queue = evict_queue;
+	cache->evict_current_queue = queue;
 	__wt_spin_unlock(session, &cache->evict_queue_lock);
 
 	/*
@@ -1070,9 +1056,8 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index)
 	WT_CONNECTION_IMPL *conn;
 	WT_DATA_HANDLE *dhandle;
 	WT_DECL_RET;
-	WT_EVICT_QUEUE *evict_queue;
-	u_int max_entries, prev_slot, retries;
-	u_int slot, start_slot, spins;
+	WT_EVICT_QUEUE *queue;
+	u_int max_entries, prev_slot, retries, slot, start_slot, spins;
 	bool dhandle_locked, incr;
 
 	conn = S2C(session);
@@ -1086,9 +1071,9 @@ __evict_walk(WT_SESSION_IMPL *session, uint32_t queue_index)
 	 * Set the starting slot in the queue and the maximum pages added
 	 * per walk.
 	 */
-	evict_queue = &cache->evict_queues[queue_index];
-	start_slot = slot = evict_queue->evict_entries;
-	max_entries = slot + WT_EVICT_WALK_INCR;
+	queue = &cache->evict_queues[queue_index];
+	start_slot = slot = queue->evict_entries;
+	max_entries = WT_MIN(slot + WT_EVICT_WALK_INCR, cache->evict_slots);
 
 retry:	while (slot < max_entries && ret == 0) {
 		/*
@@ -1158,7 +1143,7 @@ retry:	while (slot < max_entries && ret == 0) {
 		 */
 		if ((btree->checkpointing != WT_CKPT_OFF ||
 		    btree->evict_priority != 0) &&
-		    !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE))
+		    !FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE))
 			continue;
 
 		/* Skip files if we have used all available hazard pointers. */
@@ -1171,7 +1156,6 @@ retry:	while (slot < max_entries && ret == 0) {
 		 * useful in the past.
 		 */
 		if (btree->evict_walk_period != 0 &&
-		    evict_queue->evict_entries >= WT_EVICT_WALK_INCR &&
 		    btree->evict_walk_skips++ < btree->evict_walk_period)
 			continue;
 		btree->evict_walk_skips = 0;
@@ -1197,8 +1181,8 @@ retry:	while (slot < max_entries && ret == 0) {
 			if (!F_ISSET(btree, WT_BTREE_NO_EVICTION)) {
 				cache->evict_file_next = dhandle;
 				WT_WITH_DHANDLE(session, dhandle,
-				    ret = __evict_walk_file(
-					session, queue_index, &slot));
+				    ret = __evict_walk_file(session,
+				    queue_index, max_entries, &slot));
 				WT_ASSERT(session, session->split_gen == 0);
 			}
 			__wt_spin_unlock(session, &cache->evict_walk_lock);
@@ -1234,39 +1218,49 @@ retry:	while (slot < max_entries && ret == 0) {
 	if (cache->pass_intr == 0 && ret == 0 &&
 	    slot < max_entries && (retries < 2 ||
 	    (retries < 10 &&
-	    !FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
-	    (slot == evict_queue->evict_entries || slot > start_slot)))) {
+	    (slot == queue->evict_entries || slot > start_slot)))) {
 		start_slot = slot;
 		++retries;
 		goto retry;
 	}
 
-	evict_queue->evict_entries = slot;
+	queue->evict_entries = slot;
 	return (ret);
 }
 
 /*
- * __evict_init_candidate --
+ * __evict_push_candidate --
  *	Initialize a WT_EVICT_ENTRY structure with a given page.
  */
-static void
-__evict_init_candidate(WT_SESSION_IMPL *session,
-    WT_EVICT_QUEUE *evict_queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
+static bool
+__evict_push_candidate(WT_SESSION_IMPL *session,
+    WT_EVICT_QUEUE *queue, WT_EVICT_ENTRY *evict, WT_REF *ref)
 {
 	u_int slot;
+	uint8_t orig_flags, new_flags;
+
+	/*
+	 * Threads can race to queue a page (e.g., an ordinary LRU walk can
+	 * race with a page being queued for urgent eviction.
+	 */
+	orig_flags = new_flags = ref->page->flags_atomic;
+	FLD_SET(new_flags, WT_PAGE_EVICT_LRU);
+	if (orig_flags == new_flags ||
+	    !__wt_atomic_cas8(&ref->page->flags_atomic, orig_flags, new_flags))
+		return (false);
 
 	/* Keep track of the maximum slot we are using. */
-	slot = (u_int)(evict - evict_queue->evict_queue);
-	if (slot >= evict_queue->evict_max)
-		evict_queue->evict_max = slot + 1;
+	slot = (u_int)(evict - queue->evict_queue);
+	if (slot >= queue->evict_max)
+		queue->evict_max = slot + 1;
 
 	if (evict->ref != NULL)
 		__evict_list_clear(session, evict);
-	evict->ref = ref;
-	evict->btree = S2BT(session);
 
-	/* Mark the page on the list; set last to flush the other updates. */
-	F_SET_ATOMIC(ref->page, WT_PAGE_EVICT_LRU);
+	evict->btree = S2BT(session);
+	evict->ref = ref;
+	evict->score = __evict_read_gen(evict);
+	return (true);
 }
 
 /*
@@ -1274,34 +1268,73 @@ __evict_init_candidate(WT_SESSION_IMPL *session,
  *	Get a few page eviction candidates from a single underlying file.
  */
 static int
-__evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
+__evict_walk_file(WT_SESSION_IMPL *session,
+    uint32_t queue_index, u_int max_entries, u_int *slotp)
 {
 	WT_BTREE *btree;
 	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_EVICT_ENTRY *end, *evict, *start;
-	WT_EVICT_QUEUE *evict_queue;
+	WT_EVICT_QUEUE *queue;
 	WT_PAGE *page;
 	WT_PAGE_MODIFY *mod;
 	WT_REF *ref;
+	uint64_t btree_inuse, bytes_per_slot, cache_inuse;
 	uint64_t pages_seen, refs_walked;
-	uint32_t walk_flags;
+	uint32_t remaining_slots, target_pages, total_slots, walk_flags;
 	int internal_pages, restarts;
 	bool enough, modified;
 
 	conn = S2C(session);
 	btree = S2BT(session);
 	cache = conn->cache;
-	evict_queue = &cache->evict_queues[queue_index];
+	queue = &cache->evict_queues[queue_index];
 	internal_pages = restarts = 0;
 	enough = false;
 
-	start = evict_queue->evict_queue + *slotp;
-	end = start + WT_EVICT_WALK_PER_FILE;
+	/*
+	 * Figure out how many slots to fill from this tree.
+	 * Note that some care is taken in the calculation to avoid overflow.
+	 */
+	start = queue->evict_queue + *slotp;
+	remaining_slots = max_entries - *slotp;
+	btree_inuse = __wt_btree_bytes_inuse(session);
+	cache_inuse = __wt_cache_bytes_inuse(cache);
+	total_slots = max_entries - queue->evict_entries;
+
+	/*
+	 * The target number of pages for this tree is proportional to the
+	 * space it is taking up in cache.  Round to the nearest number of
+	 * slots so we assign all of the slots to a tree filling 99+% of the
+	 * cache (and only have to walk it once).
+	 */
+	bytes_per_slot = cache_inuse / total_slots;
+	target_pages = (uint32_t)(
+	    (btree_inuse + bytes_per_slot / 2) / bytes_per_slot);
+	if (target_pages == 0) {
+		/*
+		 * Randomly walk trees with a tiny fraction of the cache in
+		 * case there are so many trees that none of them use enough of
+		 * the cache to be allocated slots.
+		 *
+		 * Map a random number into the range [0..1], and if the result
+		 * is greater than the fraction of the cache used by this tree,
+		 * give up.  In other words, there is a small chance we will
+		 * visit trees that use a small fraction of the cache.  Arrange
+		 * this calculation to avoid overflow (e.g., don't multiply
+		 * anything by UINT32_MAX).
+		 */
+		if (__wt_random(&session->rnd) / (double)UINT32_MAX >
+		    btree_inuse / (double)cache_inuse)
+			return (0);
+		target_pages = 10;
+	}
+
 	if (F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
-	    end > evict_queue->evict_queue + cache->evict_slots)
-		end = evict_queue->evict_queue + cache->evict_slots;
+	    target_pages > remaining_slots)
+		target_pages = remaining_slots;
+	end = start + target_pages;
 
 	walk_flags =
 	    WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_GEN | WT_READ_NO_WAIT;
@@ -1352,14 +1385,11 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
 		/*
 		 * It's possible (but unlikely) to visit a page without a read
 		 * generation, if we race with the read instantiating the page.
-		 * Ignore those pages, but set the page's read generation here
-		 * to ensure a bug doesn't somehow leave a page without a read
-		 * generation.
+		 * Set the page's read generation here to ensure a bug doesn't
+		 * somehow leave a page without a read generation.
 		 */
-		if (page->read_gen == WT_READGEN_NOTSET) {
+		if (page->read_gen == WT_READGEN_NOTSET)
 			__wt_cache_read_gen_new(session, page);
-			continue;
-		}
 
 		/* Pages we no longer need (clean or dirty), are found money. */
 		if (page->read_gen == WT_READGEN_OLDEST) {
@@ -1367,26 +1397,23 @@ __evict_walk_file(WT_SESSION_IMPL *session, uint32_t queue_index, u_int *slotp)
 			    session, cache_eviction_pages_queued_oldest);
 			goto fast;
 		}
+
 		if (__wt_page_is_empty(page) ||
-		    F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
+		    F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+		    FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE))
 			goto fast;
 
 		/* Skip clean pages if appropriate. */
 		if (!modified && (F_ISSET(conn, WT_CONN_IN_MEMORY) ||
-		    FLD_ISSET(cache->state, WT_EVICT_PASS_DIRTY)))
+		    !FLD_ISSET(cache->state, WT_EVICT_STATE_CLEAN)))
 			continue;
 
-		/*
-		 * If we are only trickling out pages marked for definite
-		 * eviction, skip anything that isn't marked.
-		 */
-		if (FLD_ISSET(cache->state, WT_EVICT_PASS_WOULD_BLOCK) &&
-		    page->memory_footprint < btree->splitmempage)
+		/* Skip dirty pages if appropriate. */
+		if (modified && !FLD_ISSET(cache->state, WT_EVICT_STATE_DIRTY))
 			continue;
 
-		/* Limit internal pages to 50% unless we get aggressive. */
+		/* Limit internal pages to 50% of the total. */
 		if (WT_PAGE_IS_INTERNAL(page) &&
-		    !FLD_ISSET(cache->state, WT_EVICT_PASS_AGGRESSIVE) &&
 		    internal_pages >= (int)(evict - start) / 2)
 			continue;
 
@@ -1410,8 +1437,7 @@ fast:		/* If the page can't be evicted, give up. */
 		 * configure lookaside table writes in reconciliation, allowing
 		 * us to evict pages we can't usually evict.
 		 */
-		if (!FLD_ISSET(cache->state,
-		    WT_EVICT_PASS_AGGRESSIVE | WT_EVICT_PASS_WOULD_BLOCK)) {
+		if (!FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE)) {
 			/*
 			 * If the page is clean but has modifications that
 			 * appear too new to evict, skip it.
@@ -1422,7 +1448,8 @@ fast:		/* If the page can't be evicted, give up. */
 		}
 
 		WT_ASSERT(session, evict->ref == NULL);
-		__evict_init_candidate(session, evict_queue, evict, ref);
+		if (!__evict_push_candidate(session, queue, evict, ref))
+			continue;
 		++evict;
 
 		if (WT_PAGE_IS_INTERNAL(page))
@@ -1479,19 +1506,21 @@ __evict_check_entry_size(WT_SESSION_IMPL *session, WT_EVICT_ENTRY *entry)
 
 	cache = S2C(session)->cache;
 
-	if (cache->pages_evict == 0)
+	if (cache->pages_evict == 0 || cache->bytes_evict < WT_MEGABYTE)
 		return (true);
 
 	max = (cache->bytes_evict / cache->pages_evict) * 4;
 	if ((ref = entry->ref) != NULL) {
 		if ((page = ref->page) == NULL)
 			return (true);
+
 		/*
-		 * If this page is more than four times the average evicted page
-		 * size then return false.  Return true in all other cases.
-		 * XXX Should we care here if the page is dirty?  Probably...
+		 * If this page is dirty and more than four times the average
+		 * evicted page size then return false.  Return true in all
+		 * other cases.
 		 */
-		if (page->memory_footprint > max) {
+		if (__wt_page_is_modified(page) &&
+		    page->memory_footprint > max) {
 			WT_STAT_FAST_CONN_INCR(
 			    session, cache_eviction_server_toobig);
 			return (false);
@@ -1510,71 +1539,85 @@ __evict_get_ref(
 {
 	WT_CACHE *cache;
 	WT_EVICT_ENTRY *evict;
-	WT_EVICT_QUEUE *evict_queue;
+	WT_EVICT_QUEUE *queue, *urgent_queue;
 	uint32_t candidates;
 
 	cache = S2C(session)->cache;
+	urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
 	*btreep = NULL;
 	*refp = NULL;
 
-	/*
-	 * Avoid the LRU lock if no pages are available.
-	 */
+	/* Avoid the LRU lock if no pages are available. */
 	WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref);
-	if (cache->evict_current == NULL) {
+	if (cache->evict_current_queue->evict_current == NULL &&
+	    urgent_queue->evict_current == NULL) {
 		WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty);
 		return (WT_NOTFOUND);
 	}
+
 	__wt_spin_lock(session, &cache->evict_queue_lock);
+
+	/* Check the urgent queue first. */
+	queue = urgent_queue->evict_current != NULL &&
+	    (FLD_ISSET(cache->state, WT_EVICT_STATE_AGGRESSIVE) ||
+	    (F_ISSET(session, WT_SESSION_INTERNAL) &&
+	    (!is_server || S2C(session)->evict_workers <= 1))) ?
+	    urgent_queue : cache->evict_current_queue;
+
+	__wt_spin_unlock(session, &cache->evict_queue_lock);
+
 	/*
-	 * Verify there are still pages available.
+	 * Only evict half of the pages before looking for more. The remainder
+	 * are left to eviction workers (if configured), or application threads
+	 * if necessary.
 	 */
-	if (cache->evict_current == NULL) {
-		__wt_spin_unlock(session, &cache->evict_queue_lock);
-		WT_STAT_FAST_CONN_INCR(session, cache_eviction_get_ref_empty2);
-		return (WT_NOTFOUND);
-	}
+	candidates = queue->evict_candidates;
+	if (is_server && queue != urgent_queue && candidates > 1)
+		candidates /= 2;
+
 	/*
-	 * We got the queue lock, which should be fast, and now we want to
-	 * get the lock on the individual queue.  We know that the shared
-	 * queue fields cannot change now.
+	 * We got the queue lock, which should be fast, and chose a queue.
+	 * Now we want to get the lock on the individual queue.
 	 */
-	evict_queue = cache->evict_current_queue;
 	for (;;) {
-		if (__wt_spin_trylock(session, &evict_queue->evict_lock) == 0)
-			break;
-		if (!F_ISSET(session, WT_SESSION_INTERNAL)) {
-			__wt_spin_unlock(session, &cache->evict_queue_lock);
+		/* Verify there are still pages available. */
+		if (queue->evict_current == NULL || (uint32_t)
+		    (queue->evict_current - queue->evict_queue) >= candidates) {
+			WT_STAT_FAST_CONN_INCR(
+			    session, cache_eviction_get_ref_empty2);
 			return (WT_NOTFOUND);
 		}
-		__wt_yield();
+		if (!is_server)
+			__wt_spin_lock(session, &queue->evict_lock);
+		else if (__wt_spin_trylock(session, &queue->evict_lock) != 0)
+			continue;
+		break;
 	}
-	/*
-	 * Only evict half of the pages before looking for more. The remainder
-	 * are left to eviction workers (if configured), or application threads
-	 * if necessary.
-	 */
-	candidates = evict_queue->evict_candidates;
-	if (is_server && candidates > 1)
-		candidates /= 2;
 
 	/* Get the next page queued for eviction. */
-	while ((evict = cache->evict_current) != NULL &&
-	    evict < evict_queue->evict_queue + candidates &&
-	    evict->ref != NULL) {
+	for (evict = queue->evict_current;
+	    evict >= queue->evict_queue &&
+	    evict < queue->evict_queue + candidates;
+	    ++evict) {
+		if (evict->ref == NULL)
+			continue;
 		WT_ASSERT(session, evict->btree != NULL);
+
 		/*
-		 * If the server is helping out and encounters an entry that
-		 * is too large, it stops helping.  Evicting a very large
-		 * page in the server thread could stall eviction from finding
-		 * new work.
+		 * If the server is helping out and encounters an entry that is
+		 * too large, it stops helping.  Evicting a very large page in
+		 * the server thread could stall eviction from finding new
+		 * work.
+		 *
+		 * However, we can't skip entries in the urgent queue or they
+		 * may never be found again.
 		 */
-		if (is_server && S2C(session)->evict_workers > 1 &&
-		    !__evict_check_entry_size(session, evict))
+		if (is_server && queue != urgent_queue &&
+		    S2C(session)->evict_workers > 1 &&
+		    !__evict_check_entry_size(session, evict)) {
+			--evict;
 			break;
-
-		/* Move to the next item. */
-		++cache->evict_current;
+		}
 
 		/*
 		 * Lock the page while holding the eviction mutex to prevent
@@ -1604,11 +1647,14 @@ __evict_get_ref(
 		break;
 	}
 
-	/* Clear the current pointer if there are no more candidates. */
-	if (evict >= evict_queue->evict_queue + evict_queue->evict_candidates)
-		cache->evict_current = NULL;
-	__wt_spin_unlock(session, &evict_queue->evict_lock);
-	__wt_spin_unlock(session, &cache->evict_queue_lock);
+	/* Move to the next item. */
+	if (evict != NULL && evict + 1 <
+	    queue->evict_queue + queue->evict_candidates)
+		queue->evict_current = evict + 1;
+	else /* Clear the current pointer if there are no more candidates. */
+		queue->evict_current = NULL;
+
+	__wt_spin_unlock(session, &queue->evict_lock);
 
 	return ((*refp == NULL) ? WT_NOTFOUND : 0);
 }
@@ -1633,16 +1679,14 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
 	 * An internal session flags either the server itself or an eviction
 	 * worker thread.
 	 */
-	if (F_ISSET(session, WT_SESSION_INTERNAL)) {
-		if (is_server) {
-			WT_STAT_FAST_CONN_INCR(
-			    session, cache_eviction_server_evicting);
-			cache->server_evicts++;
-		} else {
-			WT_STAT_FAST_CONN_INCR(
-			    session, cache_eviction_worker_evicting);
-			cache->worker_evicts++;
-		}
+	if (is_server) {
+		WT_STAT_FAST_CONN_INCR(
+		    session, cache_eviction_server_evicting);
+		cache->server_evicts++;
+	} else if (F_ISSET(session, WT_SESSION_INTERNAL)) {
+		WT_STAT_FAST_CONN_INCR(
+		    session, cache_eviction_worker_evicting);
+		cache->worker_evicts++;
 	} else {
 		if (__wt_page_is_modified(ref->page))
 			WT_STAT_FAST_CONN_INCR(
@@ -1768,6 +1812,64 @@ __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full)
 }
 
 /*
+ * __wt_page_evict_soon --
+ *      Set a page to be evicted as soon as possible.
+ */
+int
+__wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+	WT_CACHE *cache;
+	WT_EVICT_ENTRY *evict;
+	WT_EVICT_QUEUE *urgent_queue;
+	WT_PAGE *page;
+	bool queued;
+
+	/* Root pages should never be evicted via LRU. */
+	WT_ASSERT(session, !__wt_ref_is_root(ref));
+
+	page = ref->page;
+	page->read_gen = WT_READGEN_OLDEST;
+	if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) ||
+	    F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION))
+		return (0);
+
+	/* Append to the urgent queue if we can. */
+	cache = S2C(session)->cache;
+	urgent_queue = &cache->evict_queues[WT_EVICT_URGENT_QUEUE];
+	queued = false;
+
+	__wt_spin_lock(session, &cache->evict_queue_lock);
+	if (F_ISSET_ATOMIC(page, WT_PAGE_EVICT_LRU) ||
+	    F_ISSET(S2BT(session), WT_BTREE_NO_EVICTION))
+		goto done;
+
+	__wt_spin_lock(session, &urgent_queue->evict_lock);
+	if (urgent_queue->evict_current == NULL) {
+		urgent_queue->evict_current = urgent_queue->evict_queue;
+		urgent_queue->evict_candidates = 0;
+	}
+	evict = urgent_queue->evict_queue + urgent_queue->evict_candidates;
+	if (evict < urgent_queue->evict_queue + WT_EVICT_QUEUE_MAX &&
+	    __evict_push_candidate(session, urgent_queue, evict, ref)) {
+		++urgent_queue->evict_candidates;
+		queued = true;
+	}
+	__wt_spin_unlock(session, &urgent_queue->evict_lock);
+
+done:	__wt_spin_unlock(session, &cache->evict_queue_lock);
+	if (queued) {
+		WT_STAT_FAST_CONN_INCR(
+		    session, cache_eviction_pages_queued_urgent);
+		if (S2C(session)->evict_workers > 1)
+			WT_RET(__wt_cond_signal(
+			    session, cache->evict_waiter_cond));
+		else
+			WT_RET(__wt_evict_server_wake(session));
+	}
+	return (0);
+}
+
+/*
  * __wt_evict_priority_set --
  *	Set a tree's eviction priority.
  */
@@ -1801,13 +1903,15 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
 	WT_DATA_HANDLE *dhandle, *saved_dhandle;
 	WT_PAGE *page;
 	WT_REF *next_walk;
-	uint64_t dirty_bytes, dirty_pages, intl_bytes, intl_pages;
-	uint64_t leaf_bytes, leaf_pages;
-	uint64_t max_dirty_bytes, max_intl_bytes, max_leaf_bytes, total_bytes;
+	uint64_t intl_bytes, intl_bytes_max, intl_dirty_bytes;
+	uint64_t intl_dirty_bytes_max, intl_dirty_pages, intl_pages;
+	uint64_t leaf_bytes, leaf_bytes_max, leaf_dirty_bytes;
+	uint64_t leaf_dirty_bytes_max, leaf_dirty_pages, leaf_pages;
+	uint64_t total_bytes, total_dirty_bytes;
 	size_t size;
 
 	conn = S2C(session);
-	total_bytes = 0;
+	total_bytes = total_dirty_bytes = 0;
 
 	if (ofile == NULL)
 		fp = stderr;
@@ -1823,9 +1927,10 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
 		    !F_ISSET(dhandle, WT_DHANDLE_OPEN))
 			continue;
 
-		dirty_bytes = dirty_pages = intl_bytes = intl_pages = 0;
-		leaf_bytes = leaf_pages = 0;
-		max_dirty_bytes = max_intl_bytes = max_leaf_bytes = 0;
+		intl_bytes = intl_bytes_max = intl_dirty_bytes = 0;
+		intl_dirty_bytes_max = intl_dirty_pages = intl_pages = 0;
+		leaf_bytes = leaf_bytes_max = leaf_dirty_bytes = 0;
+		leaf_dirty_bytes_max = leaf_dirty_pages = leaf_pages = 0;
 
 		next_walk = NULL;
 		session->dhandle = dhandle;
@@ -1838,17 +1943,23 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
 			if (WT_PAGE_IS_INTERNAL(page)) {
 				++intl_pages;
 				intl_bytes += size;
-				max_intl_bytes = WT_MAX(max_intl_bytes, size);
+				intl_bytes_max = WT_MAX(intl_bytes_max, size);
+				if (__wt_page_is_modified(page)) {
+					++intl_dirty_pages;
+					intl_dirty_bytes += size;
+					intl_dirty_bytes_max =
+					    WT_MAX(intl_dirty_bytes_max, size);
+				}
 			} else {
 				++leaf_pages;
 				leaf_bytes += size;
-				max_leaf_bytes = WT_MAX(max_leaf_bytes, size);
-			}
-			if (__wt_page_is_modified(page)) {
-				++dirty_pages;
-				dirty_bytes += size;
-				max_dirty_bytes =
-				    WT_MAX(max_dirty_bytes, size);
+				leaf_bytes_max = WT_MAX(leaf_bytes_max, size);
+				if (__wt_page_is_modified(page)) {
+					++leaf_dirty_pages;
+					leaf_dirty_bytes += size;
+					leaf_dirty_bytes_max =
+					    WT_MAX(leaf_dirty_bytes_max, size);
+				}
 			}
 		}
 		session->dhandle = NULL;
@@ -1860,21 +1971,41 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
 			    dhandle->name, dhandle->checkpoint);
 		if (intl_pages != 0)
 			(void)fprintf(fp,
-			    "\t" "internal pages: %" PRIu64 " pages, %" PRIu64
-			    " max, %" PRIu64 "MB total\n",
-			    intl_pages, max_intl_bytes, intl_bytes >> 20);
+			    "\t" "internal: "
+			    "%" PRIu64 " pages, "
+			    "%" PRIu64 "MB, "
+			    "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
+			    "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
+			    "%" PRIu64 "MB max page, "
+			    "%" PRIu64 "MB max dirty page\n",
+			    intl_pages,
+			    intl_bytes >> 20,
+			    intl_pages - intl_dirty_pages,
+			    intl_dirty_pages,
+			    (intl_bytes - intl_dirty_bytes) >> 20,
+			    intl_dirty_bytes >> 20,
+			    intl_bytes_max >> 20,
+			    intl_dirty_bytes_max >> 20);
 		if (leaf_pages != 0)
 			(void)fprintf(fp,
-			    "\t" "leaf pages: %" PRIu64 " pages, %" PRIu64
-			    " max, %" PRIu64 "MB total\n",
-			    leaf_pages, max_leaf_bytes, leaf_bytes >> 20);
-		if (dirty_pages != 0)
-			(void)fprintf(fp,
-			    "\t" "dirty pages: %" PRIu64 " pages, %" PRIu64
-			    " max, %" PRIu64 "MB total\n",
-			    dirty_pages, max_dirty_bytes, dirty_bytes >> 20);
+			    "\t" "leaf: "
+			    "%" PRIu64 " pages, "
+			    "%" PRIu64 "MB, "
+			    "%" PRIu64 "/%" PRIu64 " clean/dirty pages, "
+			    "%" PRIu64 "/%" PRIu64 " clean/dirty MB, "
+			    "%" PRIu64 "MB max page, "
+			    "%" PRIu64 "MB max dirty page\n",
+			    leaf_pages,
+			    leaf_bytes >> 20,
+			    leaf_pages - leaf_dirty_pages,
+			    leaf_dirty_pages,
+			    (leaf_bytes - leaf_dirty_bytes) >> 20,
+			    leaf_dirty_bytes >> 20,
+			    leaf_bytes_max >> 20,
+			    leaf_dirty_bytes_max >> 20);
 
 		total_bytes += intl_bytes + leaf_bytes;
+		total_dirty_bytes += intl_dirty_bytes + leaf_dirty_bytes;
 	}
 	session->dhandle = saved_dhandle;
 
@@ -1886,10 +2017,13 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
 		total_bytes +=
 		    (total_bytes * (uint64_t)conn->cache->overhead_pct) / 100;
 	(void)fprintf(fp,
-	    "cache dump: total found = %" PRIu64
-	    "MB vs tracked inuse %" PRIu64 "MB\n",
-	    total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20);
+	    "cache dump: "
+	    "total found = %" PRIu64 "MB vs tracked inuse %" PRIu64 "MB\n"
+	    "total dirty bytes = %" PRIu64 "MB\n",
+	    total_bytes >> 20, __wt_cache_bytes_inuse(conn->cache) >> 20,
+	    total_dirty_bytes >> 20);
 	(void)fprintf(fp, "==========\n");
+
 	if (ofile != NULL && fclose(fp) != 0)
 		return (EIO);
 	return (0);
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index 305b81fe69e..d4c4e3e311a 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -10,7 +10,7 @@
 
 static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool);
 static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool);
-static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool *, bool);
+static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t *, bool);
 
 /*
  * __evict_exclusive_clear --
@@ -46,6 +46,55 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
 }
 
 /*
+ * __wt_page_release_evict --
+ *	Release a reference to a page, and attempt to immediately evict it.
+ */
+int
+__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
+{
+	WT_BTREE *btree;
+	WT_DECL_RET;
+	WT_PAGE *page;
+	bool locked, too_big;
+
+	btree = S2BT(session);
+	page = ref->page;
+
+	/*
+	 * Take some care with order of operations: if we release the hazard
+	 * reference without first locking the page, it could be evicted in
+	 * between.
+	 */
+	locked = __wt_atomic_casv32(&ref->state, WT_REF_MEM, WT_REF_LOCKED);
+	if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) {
+		if (locked)
+			ref->state = WT_REF_MEM;
+		return (ret == 0 ? EBUSY : ret);
+	}
+
+	(void)__wt_atomic_addv32(&btree->evict_busy, 1);
+
+	too_big = page->memory_footprint > btree->splitmempage;
+	if ((ret = __wt_evict(session, ref, false)) == 0) {
+		if (too_big)
+			WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
+		else
+			/*
+			 * If the page isn't too big, we are evicting it because
+			 * it had a chain of deleted entries that make traversal
+			 * expensive.
+			 */
+			WT_STAT_FAST_CONN_INCR(
+			    session, cache_eviction_force_delete);
+	} else
+		WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail);
+
+	(void)__wt_atomic_subv32(&btree->evict_busy, 1);
+
+	return (ret);
+}
+
+/*
  * __wt_evict --
  *	Evict a page.
  */
@@ -56,7 +105,8 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	WT_DECL_RET;
 	WT_PAGE *page;
 	WT_PAGE_MODIFY *mod;
-	bool clean_page, forced_eviction, inmem_split, tree_dead;
+	uint32_t flags;
+	bool clean_page, tree_dead;
 
 	conn = S2C(session);
 
@@ -64,8 +114,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session));
 
 	page = ref->page;
-	forced_eviction = page->read_gen == WT_READGEN_OLDEST;
-	inmem_split = false;
 	tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
 
 	WT_RET(__wt_verbose(session, WT_VERB_EVICT,
@@ -78,20 +126,14 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	 * to make this check for clean pages, too: while unlikely eviction
 	 * would choose an internal page with children, it's not disallowed.
 	 */
-	WT_ERR(__evict_review(session, ref, &inmem_split, closing));
+	WT_ERR(__evict_review(session, ref, &flags, closing));
 
 	/*
 	 * If there was an in-memory split, the tree has been left in the state
 	 * we want: there is nothing more to do.
 	 */
-	if (inmem_split)
-		goto done;
-
-	/*
-	 * Update the page's modification reference, reconciliation might have
-	 * changed it.
-	 */
-	mod = page->modify;
+	if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
+		return (0);
 
 	/* Count evictions of internal pages during normal operation. */
 	if (!closing && WT_PAGE_IS_INTERNAL(page)) {
@@ -108,12 +150,13 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		conn->cache->evict_max_page_size = page->memory_footprint;
 
 	/* Figure out whether reconciliation was done on the page */
+	mod = page->modify;
 	clean_page = mod == NULL || mod->rec_result == 0;
 
 	/* Update the reference and discard the page. */
 	if (__wt_ref_is_root(ref))
 		__wt_ref_out(session, ref);
-	else if (tree_dead || (clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)))
+	else if ((clean_page && !LF_ISSET(WT_EVICT_IN_MEMORY)) || tree_dead)
 		/*
 		 * Pages that belong to dead trees never write back to disk
 		 * and can't support page splits.
@@ -139,14 +182,9 @@ err:		if (!closing)
 		WT_STAT_FAST_DATA_INCR(session, cache_eviction_fail);
 	}
 
-done:	if (((inmem_split && ret == 0) || (forced_eviction && ret == EBUSY)) &&
-	    !F_ISSET(conn->cache, WT_CACHE_WOULD_BLOCK)) {
-		F_SET(conn->cache, WT_CACHE_WOULD_BLOCK);
-		WT_TRET(__wt_evict_server_wake(session));
-	}
-
 	return (ret);
 }
+
 /*
  * __evict_delete_ref --
  *	Mark a page reference deleted and check if the parent can reverse
@@ -210,13 +248,6 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	WT_DECL_RET;
 
 	/*
-	 * If doing normal system eviction, but only in the service of reducing
-	 * the number of dirty pages, leave the clean page in cache.
-	 */
-	if (!closing && __wt_eviction_dirty_target(session))
-		return (EBUSY);
-
-	/*
 	 * Discard the page and update the reference structure; if the page has
 	 * an address, it's a disk page; if it has no address, it's a deleted
 	 * page re-instantiated (for example, by searching) and never written.
@@ -242,6 +273,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	WT_ADDR *addr;
 	WT_DECL_RET;
 	WT_PAGE_MODIFY *mod;
+	WT_MULTI multi;
 
 	mod = ref->page->modify;
 
@@ -284,24 +316,15 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		 * write. Take advantage of the fact we have exclusive access
 		 * to the page and rewrite it in memory.
 		 */
-		if (mod->mod_multi_entries == 1)
-			WT_RET(__wt_split_rewrite(session, ref));
-		else
+		if (mod->mod_multi_entries == 1) {
+			WT_ASSERT(session, closing == false);
+			WT_RET(__wt_split_rewrite(
+			    session, ref, &mod->mod_multi[0]));
+		} else
 			WT_RET(__wt_split_multi(session, ref, closing));
 		break;
 	case WT_PM_REC_REPLACE: 			/* 1-for-1 page swap */
 		/*
-		 * If doing normal system eviction, but only in the service of
-		 * reducing the number of dirty pages, leave the clean page in
-		 * cache. Only do this when replacing a page with another one,
-		 * because when a page splits into multiple pages, we want to
-		 * push it out of cache (and read it back in, when needed), we
-		 * would rather have more, smaller pages than fewer large pages.
-		 */
-		if (!closing && __wt_eviction_dirty_target(session))
-			return (EBUSY);
-
-		/*
 		 * Update the parent to reference the replacement page.
 		 *
 		 * Publish: a barrier to ensure the structure fields are set
@@ -311,10 +334,26 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		*addr = mod->mod_replace;
 		mod->mod_replace.addr = NULL;
 		mod->mod_replace.size = 0;
-
-		__wt_ref_out(session, ref);
 		ref->addr = addr;
-		WT_PUBLISH(ref->state, WT_REF_DISK);
+
+		/*
+		 * Eviction wants to keep this page if we have a disk image,
+		 * re-instantiate the page in memory, else discard the page.
+		 */
+		if (mod->mod_disk_image == NULL) {
+			__wt_ref_out(session, ref);
+			WT_PUBLISH(ref->state, WT_REF_DISK);
+		} else {
+			/*
+			 * The split code works with WT_MULTI structures, build
+			 * one for the disk image.
+			 */
+			memset(&multi, 0, sizeof(multi));
+			multi.disk_image = mod->mod_disk_image;
+
+			WT_RET(__wt_split_rewrite(session, ref, &multi));
+		}
+
 		break;
 	WT_ILLEGAL_VALUE(session);
 	}
@@ -351,13 +390,17 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
  */
 static int
 __evict_review(
-    WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp, bool closing)
+    WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *flagsp, bool closing)
 {
+	WT_CACHE *cache;
 	WT_DECL_RET;
 	WT_PAGE *page;
 	uint32_t flags;
 	bool modified;
 
+	flags = WT_EVICTING;
+	*flagsp = flags;
+
 	/*
 	 * Get exclusive access to the page if our caller doesn't have the tree
 	 * locked down.
@@ -423,8 +466,9 @@ __evict_review(
 			WT_RET(__wt_txn_update_oldest(
 			    session, WT_TXN_OLDEST_STRICT));
 
-		if (!__wt_page_can_evict(session, ref, inmem_splitp))
+		if (!__wt_page_can_evict(session, ref, flagsp))
 			return (EBUSY);
+		flags = *flagsp;
 
 		/*
 		 * Check for an append-only workload needing an in-memory
@@ -433,8 +477,12 @@ __evict_review(
 		 * the page stays in memory and the tree is left in the desired
 		 * state: avoid the usual cleanup.
 		 */
-		if (*inmem_splitp)
+		if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
 			return (__wt_split_insert(session, ref));
+
+		/* We are done if reconciliation is disabled. */
+		if (F_ISSET(S2BT(session), WT_BTREE_NO_RECONCILE))
+			return (EBUSY);
 	}
 
 	/* If the page is clean, we're done and we can evict. */
@@ -447,10 +495,15 @@ __evict_review(
 	 * If we have an exclusive lock (we're discarding the tree), assert
 	 * there are no updates we cannot read.
 	 *
-	 * Otherwise, if the page we're evicting is a leaf page marked for
-	 * forced eviction, set the update-restore flag, so reconciliation will
-	 * write blocks it can write and create a list of skipped updates for
-	 * blocks it cannot write.  This is how forced eviction of active, huge
+	 * Don't set any other flags for internal pages: they don't have update
+	 * lists to be saved and restored, nor can we re-create them in memory.
+	 *
+	 * For leaf pages:
+	 *
+	 * If an in-memory configuration or the page is being forcibly evicted,
+	 * set the update-restore flag, so reconciliation will write blocks it
+	 * can write and create a list of skipped updates for blocks it cannot
+	 * write, along with disk images.  This is how eviction of active, huge
 	 * pages works: we take a big page and reconcile it into blocks, some of
 	 * which we write and discard, the rest of which we re-create as smaller
 	 * in-memory pages, (restoring the updates that stopped us from writing
@@ -461,32 +514,43 @@ __evict_review(
 	 * allowing the eviction of pages we'd otherwise have to retain in cache
 	 * to support older readers.
 	 *
-	 * Don't set the update-restore or lookaside table flags for internal
-	 * pages, they don't have update lists that can be saved and restored.
+	 * Finally, if we don't need to do eviction at the moment, create disk
+	 * images of split pages in order to re-instantiate them.
 	 */
-	flags = WT_EVICTING;
+	cache = S2C(session)->cache;
 	if (closing)
 		LF_SET(WT_VISIBILITY_ERR);
 	else if (!WT_PAGE_IS_INTERNAL(page)) {
 		if (F_ISSET(S2C(session), WT_CONN_IN_MEMORY))
-			LF_SET(WT_EVICT_IN_MEMORY | WT_EVICT_UPDATE_RESTORE);
-		else if (page->read_gen == WT_READGEN_OLDEST)
-			LF_SET(WT_EVICT_UPDATE_RESTORE);
-		else if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK))
+			LF_SET(WT_EVICT_IN_MEMORY |
+			    WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE);
+		else if (F_ISSET(cache, WT_CACHE_STUCK))
 			LF_SET(WT_EVICT_LOOKASIDE);
+		else if (!__wt_txn_visible_all(
+		    session, page->modify->update_txn))
+			LF_SET(WT_EVICT_UPDATE_RESTORE);
+
+		/*
+		 * If we aren't trying to free space in the cache, scrub the
+		 * page and keep it around.
+		 */
+		if (!LF_ISSET(WT_EVICT_LOOKASIDE) &&
+		    FLD_ISSET(cache->state, WT_EVICT_STATE_SCRUB))
+			LF_SET(WT_EVICT_SCRUB);
 	}
+	*flagsp = flags;
 
 	WT_RET(__wt_reconcile(session, ref, NULL, flags));
 
 	/*
 	 * Success: assert the page is clean or reconciliation was configured
-	 * for an update/restore split.  If the page is clean, assert that
-	 * reconciliation was configured for a lookaside table, or it's not a
-	 * durable object (currently the lookaside table), or all page updates
-	 * were globally visible.
+	 * for update/restore. If the page is clean, assert that reconciliation
+	 * was configured for a lookaside table, or it's not a durable object
+	 * (currently the lookaside table), or all page updates were globally
+	 * visible.
 	 */
 	WT_ASSERT(session,
-	    LF_ISSET(WT_EVICT_UPDATE_RESTORE) || !__wt_page_is_modified(page));
+	    !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE));
 	WT_ASSERT(session,
 	    __wt_page_is_modified(page) ||
 	    LF_ISSET(WT_EVICT_LOOKASIDE) ||
diff --git a/src/include/api.h b/src/include/api.h
index 50b2eab83b8..0a4593178dc 100644
--- a/src/include/api.h
+++ b/src/include/api.h
@@ -66,6 +66,8 @@
 		else if (ret == 0 && !F_ISSET(&(s)->txn, WT_TXN_ERROR))	\
 			ret = __wt_txn_commit((s), NULL);		\
 		else {							\
+			if (retry)					\
+				WT_TRET(__wt_session_copy_values(s));	\
 			WT_TRET(__wt_txn_rollback((s), NULL));		\
 			if ((ret == 0 || ret == WT_ROLLBACK) &&		\
 			    (retry)) {					\
diff --git a/src/include/block.h b/src/include/block.h
index a8080c1651c..3342f9b1e5e 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -192,7 +192,7 @@ struct __wt_bm {
 	int (*verify_start)
 	    (WT_BM *, WT_SESSION_IMPL *, WT_CKPT *, const char *[]);
 	int (*write) (WT_BM *,
-	    WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool);
+	    WT_SESSION_IMPL *, WT_ITEM *, uint8_t *, size_t *, bool, bool);
 	int (*write_size)(WT_BM *, WT_SESSION_IMPL *, size_t *);
 
 	WT_BLOCK *block;			/* Underlying file */
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 9700b6f4761..817ce892952 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -250,9 +250,19 @@ struct __wt_page_modify {
 	 * a replace address and multiple replacement blocks.
 	 */
 	union {
-	WT_ADDR	 replace;		/* Single, written replacement block */
+	struct {			/* Single, written replacement block */
+		WT_ADDR	 replace;
+
+		/*
+		 * A disk image that may or may not have been written, used to
+		 * re-instantiate the page in memory.
+		 */
+		void	*disk_image;
+	} r;
 #undef	mod_replace
-#define	mod_replace	u1.replace
+#define	mod_replace	u1.r.replace
+#undef	mod_disk_image
+#define	mod_disk_image	u1.r.disk_image
 
 	struct {			/* Multiple replacement blocks */
 	struct __wt_multi {
@@ -266,14 +276,19 @@ struct __wt_page_modify {
 		} key;
 
 		/*
-		 * Eviction, but the block wasn't written: either an in-memory
-		 * configuration or unresolved updates prevented the write.
-		 * There may be a list of unresolved updates, there's always an
-		 * associated disk image.
+		 * A disk image that may or may not have been written, used to
+		 * re-instantiate the page in memory.
+		 */
+		void	*disk_image;
+
+		/*
+		 * List of unresolved updates. Updates are either a WT_INSERT
+		 * or a row-store leaf page entry; when creating lookaside
+		 * records, there is an additional value, the committed item's
+		 * transaction ID.
 		 *
-		 * Saved updates are either a WT_INSERT, or a row-store leaf
-		 * page entry; in the case of creating lookaside records, there
-		 * is an additional value, the committed item's transaction ID.
+		 * If there are unresolved updates, the block wasn't written and
+		 * there will always be a disk image.
 		 */
 		struct __wt_save_upd {
 			WT_INSERT *ins;
@@ -281,10 +296,9 @@ struct __wt_page_modify {
 			uint64_t   onpage_txn;
 		} *supd;
 		uint32_t supd_entries;
-		void	*disk_image;
 
 		/*
-		 * Block was written: address, size and checksum.
+		 * Disk image was written: address, size and checksum.
 		 * On subsequent reconciliations of this page, we avoid writing
 		 * the block if it's unchanged by comparing size and checksum;
 		 * the reuse flag is set when the block is unchanged and we're
diff --git a/src/include/btree.h b/src/include/btree.h
index fd921677751..432474f9dc1 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -126,12 +126,16 @@ struct __wt_btree {
 	u_int	 block_header;		/* WT_PAGE_HEADER_BYTE_SIZE */
 
 	uint64_t checkpoint_gen;	/* Checkpoint generation */
+	bool     include_checkpoint_txn;/* ID checks include checkpoint */
 	uint64_t rec_max_txn;		/* Maximum txn seen (clean trees) */
 	uint64_t write_gen;		/* Write generation */
 
+	uint64_t    bytes_inmem;	/* Cache bytes in memory. */
+
 	WT_REF	   *evict_ref;		/* Eviction thread's location */
 	uint64_t    evict_priority;	/* Relative priority of cached pages */
 	u_int	    evict_walk_period;	/* Skip this many LRU walks */
+	u_int	    evict_walk_saved;	/* Saved walk skips for checkpoints */
 	u_int	    evict_walk_skips;	/* Number of walks skipped */
 	u_int	    evict_disabled;	/* Eviction disabled count */
 	volatile uint32_t evict_busy;	/* Count of threads in eviction */
@@ -154,11 +158,12 @@ struct __wt_btree {
 #define	WT_BTREE_NO_CHECKPOINT	0x00800	/* Disable checkpoints */
 #define	WT_BTREE_NO_EVICTION	0x01000	/* Disable eviction */
 #define	WT_BTREE_NO_LOGGING	0x02000	/* Disable logging */
-#define	WT_BTREE_REBALANCE	0x04000	/* Handle is for rebalance */
-#define	WT_BTREE_SALVAGE	0x08000	/* Handle is for salvage */
-#define	WT_BTREE_SKIP_CKPT	0x10000	/* Handle skipped checkpoint */
-#define	WT_BTREE_UPGRADE	0x20000	/* Handle is for upgrade */
-#define	WT_BTREE_VERIFY		0x40000	/* Handle is for verify */
+#define	WT_BTREE_NO_RECONCILE	0x04000 /* Allow splits, even with no evict */
+#define	WT_BTREE_REBALANCE	0x08000	/* Handle is for rebalance */
+#define	WT_BTREE_SALVAGE	0x10000	/* Handle is for salvage */
+#define	WT_BTREE_SKIP_CKPT	0x20000	/* Handle skipped checkpoint */
+#define	WT_BTREE_UPGRADE	0x40000	/* Handle is for upgrade */
+#define	WT_BTREE_VERIFY		0x80000	/* Handle is for verify */
 	uint32_t flags;
 };
 
diff --git a/src/include/btree.i b/src/include/btree.i
index e0102a11511..3234ad1ed41 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -55,6 +55,27 @@ __wt_btree_block_free(
 }
 
 /*
+ * __wt_btree_bytes_inuse --
+ *	Return the number of bytes in use.
+ */
+static inline uint64_t
+__wt_btree_bytes_inuse(WT_SESSION_IMPL *session)
+{
+	WT_CACHE *cache;
+	uint64_t bytes_inuse;
+
+	cache = S2C(session)->cache;
+
+	/* Adjust the cache size to take allocation overhead into account. */
+	bytes_inuse = S2BT(session)->bytes_inmem;
+	if (cache->overhead_pct != 0)
+		bytes_inuse +=
+		    (bytes_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+	return (bytes_inuse);
+}
+
+/*
  * __wt_cache_page_inmem_incr --
  *	Increment a page's memory footprint in the cache.
  */
@@ -66,17 +87,17 @@ __wt_cache_page_inmem_incr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
 	WT_ASSERT(session, size < WT_EXABYTE);
 
 	cache = S2C(session)->cache;
+	(void)__wt_atomic_add64(&S2BT(session)->bytes_inmem, size);
 	(void)__wt_atomic_add64(&cache->bytes_inmem, size);
 	(void)__wt_atomic_addsize(&page->memory_footprint, size);
 	if (__wt_page_is_modified(page)) {
-		(void)__wt_atomic_add64(&cache->bytes_dirty, size);
 		(void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
+		(void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ?
+		    &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size);
 	}
-	/* Track internal and overflow size in cache. */
+	/* Track internal size in cache. */
 	if (WT_PAGE_IS_INTERNAL(page))
 		(void)__wt_atomic_add64(&cache->bytes_internal, size);
-	else if (page->type == WT_PAGE_OVFL)
-		(void)__wt_atomic_add64(&cache->bytes_overflow, size);
 }
 
 /*
@@ -144,10 +165,16 @@ __wt_cache_page_byte_dirty_decr(
     WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
 {
 	WT_CACHE *cache;
+	const char *destname;
+	uint64_t *dest;
 	size_t decr, orig;
 	int i;
 
 	cache = S2C(session)->cache;
+	dest = WT_PAGE_IS_INTERNAL(page) ?
+	    &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf;
+	destname = WT_PAGE_IS_INTERNAL(page) ?
+	    "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf";
 
 	/*
 	 * We don't have exclusive access and there are ways of decrementing the
@@ -175,8 +202,8 @@ __wt_cache_page_byte_dirty_decr(
 		decr = WT_MIN(size, orig);
 		if (__wt_atomic_cassize(
 		    &page->modify->bytes_dirty, orig, orig - decr)) {
-			__wt_cache_decr_check_uint64(session,
-			    &cache->bytes_dirty, decr, "WT_CACHE.bytes_dirty");
+			__wt_cache_decr_check_uint64(
+			    session, dest, decr, destname);
 			break;
 		}
 	}
@@ -196,18 +223,17 @@ __wt_cache_page_inmem_decr(WT_SESSION_IMPL *session, WT_PAGE *page, size_t size)
 	WT_ASSERT(session, size < WT_EXABYTE);
 
 	__wt_cache_decr_check_uint64(
+	    session, &S2BT(session)->bytes_inmem, size, "WT_BTREE.bytes_inmem");
+	__wt_cache_decr_check_uint64(
 	    session, &cache->bytes_inmem, size, "WT_CACHE.bytes_inmem");
 	__wt_cache_decr_check_size(
 	    session, &page->memory_footprint, size, "WT_PAGE.memory_footprint");
 	if (__wt_page_is_modified(page))
 		__wt_cache_page_byte_dirty_decr(session, page, size);
-	/* Track internal and overflow size in cache. */
+	/* Track internal size in cache. */
 	if (WT_PAGE_IS_INTERNAL(page))
 		__wt_cache_decr_check_uint64(session,
 		    &cache->bytes_internal, size, "WT_CACHE.bytes_internal");
-	else if (page->type == WT_PAGE_OVFL)
-		__wt_cache_decr_check_uint64(session,
-		    &cache->bytes_overflow, size, "WT_CACHE.bytes_overflow");
 }
 
 /*
@@ -222,14 +248,16 @@ __wt_cache_dirty_incr(WT_SESSION_IMPL *session, WT_PAGE *page)
 	size_t size;
 
 	cache = S2C(session)->cache;
-	(void)__wt_atomic_add64(&cache->pages_dirty, 1);
+	(void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ?
+	    &cache->pages_dirty_intl : &cache->pages_dirty_leaf, 1);
 
 	/*
 	 * Take care to read the memory_footprint once in case we are racing
 	 * with updates.
 	 */
 	size = page->memory_footprint;
-	(void)__wt_atomic_add64(&cache->bytes_dirty, size);
+	(void)__wt_atomic_add64(WT_PAGE_IS_INTERNAL(page) ?
+	    &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf, size);
 	(void)__wt_atomic_addsize(&page->modify->bytes_dirty, size);
 }
 
@@ -243,16 +271,19 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
 {
 	WT_CACHE *cache;
 	WT_PAGE_MODIFY *modify;
+	uint64_t *pages_dirty;
 
 	cache = S2C(session)->cache;
+	pages_dirty = WT_PAGE_IS_INTERNAL(page) ?
+	    &cache->pages_dirty_intl : &cache->pages_dirty_leaf;
 
-	if (cache->pages_dirty < 1) {
+	if (*pages_dirty < 1) {
 		__wt_errx(session,
 		   "cache eviction dirty-page decrement failed: dirty page"
 		   "count went negative");
-		cache->pages_dirty = 0;
+		*pages_dirty = 0;
 	} else
-		(void)__wt_atomic_sub64(&cache->pages_dirty, 1);
+		(void)__wt_atomic_sub64(pages_dirty, 1);
 
 	modify = page->modify;
 	if (modify != NULL && modify->bytes_dirty != 0)
@@ -261,6 +292,34 @@ __wt_cache_dirty_decr(WT_SESSION_IMPL *session, WT_PAGE *page)
 }
 
 /*
+ * __wt_cache_page_image_decr --
+ *	Decrement a page image's size to the cache.
+ */
+static inline void
+__wt_cache_page_image_decr(WT_SESSION_IMPL *session, uint32_t size)
+{
+	WT_CACHE *cache;
+
+	cache = S2C(session)->cache;
+
+	__wt_cache_decr_check_uint64(
+	    session, &cache->bytes_image, size, "WT_CACHE.image_inmem");
+}
+
+/*
+ * __wt_cache_page_image_incr --
+ *	Increment a page image's size to the cache.
+ */
+static inline void
+__wt_cache_page_image_incr(WT_SESSION_IMPL *session, uint32_t size)
+{
+	WT_CACHE *cache;
+
+	cache = S2C(session)->cache;
+	(void)__wt_atomic_add64(&cache->bytes_image, size);
+}
+
+/*
  * __wt_cache_page_evict --
  *	Evict pages from the cache.
  */
@@ -269,13 +328,20 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
 {
 	WT_CACHE *cache;
 	WT_PAGE_MODIFY *modify;
+	uint64_t *dest;
+	const char *destname;
 
 	cache = S2C(session)->cache;
+	dest = WT_PAGE_IS_INTERNAL(page) ?
+	    &cache->bytes_dirty_intl : &cache->bytes_dirty_leaf;
+	destname = WT_PAGE_IS_INTERNAL(page) ?
+	    "WT_CACHE.bytes_dirty_intl" : "WT_CACHE.bytes_dirty_leaf";
 	modify = page->modify;
 
 	/* Update the bytes in-memory to reflect the eviction. */
-	__wt_cache_decr_check_uint64(session,
-	    &cache->bytes_inmem,
+	__wt_cache_decr_check_uint64(session, &S2BT(session)->bytes_inmem,
+	    page->memory_footprint, "WT_BTREE.bytes_inmem");
+	__wt_cache_decr_check_uint64(session, &cache->bytes_inmem,
 	    page->memory_footprint, "WT_CACHE.bytes_inmem");
 
 	/* Update the bytes_internal value to reflect the eviction */
@@ -286,15 +352,14 @@ __wt_cache_page_evict(WT_SESSION_IMPL *session, WT_PAGE *page)
 
 	/* Update the cache's dirty-byte count. */
 	if (modify != NULL && modify->bytes_dirty != 0) {
-		if (cache->bytes_dirty < modify->bytes_dirty) {
+		if ((size_t)*dest < modify->bytes_dirty) {
 			__wt_errx(session,
-			   "cache eviction dirty-bytes decrement failed: "
-			   "dirty byte count went negative");
-			cache->bytes_dirty = 0;
+			   "%s decrement failed: "
+			   "dirty byte count went negative", destname);
+			*dest = 0;
 		} else
-			__wt_cache_decr_check_uint64(session,
-			    &cache->bytes_dirty,
-			    modify->bytes_dirty, "WT_CACHE.bytes_dirty");
+			__wt_cache_decr_check_uint64(session, dest,
+			    modify->bytes_dirty, destname);
 	}
 
 	/* Update pages and bytes evicted. */
@@ -318,16 +383,6 @@ __wt_update_list_memsize(WT_UPDATE *upd)
 }
 
 /*
- * __wt_page_evict_soon --
- *      Set a page to be evicted as soon as possible.
- */
-static inline void
-__wt_page_evict_soon(WT_PAGE *page)
-{
-	page->read_gen = WT_READGEN_OLDEST;
-}
-
-/*
  * __wt_page_modify_init --
  *	A page is about to be modified, allocate the modification structure.
  */
@@ -1099,16 +1154,14 @@ __wt_leaf_page_can_split(WT_SESSION_IMPL *session, WT_PAGE *page)
  *	Check whether a page can be evicted.
  */
 static inline bool
-__wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
+__wt_page_can_evict(
+    WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *evict_flagsp)
 {
 	WT_BTREE *btree;
 	WT_PAGE *page;
 	WT_PAGE_MODIFY *mod;
 	bool modified;
 
-	if (inmem_splitp != NULL)
-		*inmem_splitp = false;
-
 	btree = S2BT(session);
 	page = ref->page;
 	mod = page->modify;
@@ -1124,8 +1177,8 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
 	 * won't be written or discarded from the cache.
 	 */
 	if (__wt_leaf_page_can_split(session, page)) {
-		if (inmem_splitp != NULL)
-			*inmem_splitp = true;
+		if (evict_flagsp != NULL)
+			FLD_SET(*evict_flagsp, WT_EVICT_INMEM_SPLIT);
 		return (true);
 	}
 
@@ -1164,6 +1217,10 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
 	    F_ISSET_ATOMIC(page, WT_PAGE_SPLIT_BLOCK))
 		return (false);
 
+	/* If the cache is stuck, try anything else. */
+	if (F_ISSET(S2C(session)->cache, WT_CACHE_STUCK))
+		return (true);
+
 	/*
 	 * If the oldest transaction hasn't changed since the last time
 	 * this page was written, it's unlikely we can make progress.
@@ -1172,7 +1229,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
 	 * attempt to avoid repeated attempts to evict the same page.
 	 */
 	if (modified &&
-	    !F_ISSET(S2C(session)->cache, WT_CACHE_STUCK) &&
 	    (mod->last_oldest_id == __wt_txn_oldest_id(session) ||
 	    !__wt_txn_visible_all(session, mod->update_txn)))
 		return (false);
@@ -1181,56 +1237,6 @@ __wt_page_can_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool *inmem_splitp)
 }
 
 /*
- * __wt_page_release_evict --
- *	Release a reference to a page, and attempt to immediately evict it.
- */
-static inline int
-__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
-{
-	WT_BTREE *btree;
-	WT_DECL_RET;
-	WT_PAGE *page;
-	bool locked, too_big;
-
-	btree = S2BT(session);
-	page = ref->page;
-
-	/*
-	 * Take some care with order of operations: if we release the hazard
-	 * reference without first locking the page, it could be evicted in
-	 * between.
-	 */
-	locked = __wt_atomic_casv32(
-	    &ref->state, WT_REF_MEM, WT_REF_LOCKED) ? true : false;
-	if ((ret = __wt_hazard_clear(session, page)) != 0 || !locked) {
-		if (locked)
-			ref->state = WT_REF_MEM;
-		return (ret == 0 ? EBUSY : ret);
-	}
-
-	(void)__wt_atomic_addv32(&btree->evict_busy, 1);
-
-	too_big = page->memory_footprint > btree->maxmempage;
-	if ((ret = __wt_evict(session, ref, false)) == 0) {
-		if (too_big)
-			WT_STAT_FAST_CONN_INCR(session, cache_eviction_force);
-		else
-			/*
-			 * If the page isn't too big, we are evicting it because
-			 * it had a chain of deleted entries that make traversal
-			 * expensive.
-			 */
-			WT_STAT_FAST_CONN_INCR(
-			    session, cache_eviction_force_delete);
-	} else
-		WT_STAT_FAST_CONN_INCR(session, cache_eviction_force_fail);
-
-	(void)__wt_atomic_subv32(&btree->evict_busy, 1);
-
-	return (ret);
-}
-
-/*
  * __wt_page_release --
  *	Release a reference to a page.
  */
diff --git a/src/include/cache.h b/src/include/cache.h
index f4a35de7201..e3a003ccc56 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -13,7 +13,6 @@
 #define	WT_EVICT_INT_SKEW  (1<<20)	/* Prefer leaf pages over internal
 					   pages by this many increments of the
 					   read generation. */
-#define	WT_EVICT_WALK_PER_FILE	 10	/* Pages to queue per file */
 #define	WT_EVICT_WALK_BASE	300	/* Pages tracked across file visits */
 #define	WT_EVICT_WALK_INCR	100	/* Pages added each walk */
 
@@ -24,9 +23,12 @@
 struct __wt_evict_entry {
 	WT_BTREE *btree;		/* Enclosing btree object */
 	WT_REF	 *ref;			/* Page to flush/evict */
+	uint64_t  score;		/* Relative eviction priority */
 };
 
-#define	WT_EVICT_QUEUE_MAX	2
+#define	WT_EVICT_URGENT_QUEUE	0	/* Urgent queue index */
+#define	WT_EVICT_QUEUE_MAX	3	/* Urgent plus two ordinary queues */
+
 /*
  * WT_EVICT_QUEUE --
  *	Encapsulation of an eviction candidate queue.
@@ -34,6 +36,7 @@ struct __wt_evict_entry {
 struct __wt_evict_queue {
 	WT_SPINLOCK evict_lock;		/* Eviction LRU queue */
 	WT_EVICT_ENTRY *evict_queue;	/* LRU pages being tracked */
+	WT_EVICT_ENTRY *evict_current;	/* LRU current page to be evicted */
 	uint32_t evict_candidates;	/* LRU list pages to evict */
 	uint32_t evict_entries;		/* LRU entries in the queue */
 	volatile uint32_t evict_max;	/* LRU maximum eviction slot used */
@@ -70,16 +73,19 @@ struct __wt_cache {
 	 * be exact, they can't be garbage, we track what comes in and what goes
 	 * out and calculate the difference as needed.
 	 */
-	uint64_t bytes_inmem;		/* Bytes/pages in memory */
-	uint64_t pages_inmem;
-	uint64_t bytes_internal;	/* Bytes of internal pages */
-	uint64_t bytes_overflow;	/* Bytes of overflow pages */
+	uint64_t bytes_dirty_intl;	/* Bytes/pages currently dirty */
+	uint64_t pages_dirty_intl;
+	uint64_t bytes_dirty_leaf;
+	uint64_t pages_dirty_leaf;
 	uint64_t bytes_evict;		/* Bytes/pages discarded by eviction */
 	uint64_t pages_evict;
 	uint64_t pages_evicted;		/* Pages evicted during a pass */
-	uint64_t bytes_dirty;		/* Bytes/pages currently dirty */
-	uint64_t pages_dirty;
+	uint64_t bytes_image;		/* Bytes of disk images */
+	uint64_t bytes_inmem;		/* Bytes/pages in memory */
+	uint64_t pages_inmem;
+	uint64_t bytes_internal;	/* Bytes of internal pages */
 	uint64_t bytes_read;		/* Bytes read into memory */
+	uint64_t bytes_written;
 
 	uint64_t app_waits;		/* User threads waited for cache */
 	uint64_t app_evicts;		/* Pages evicted by user threads */
@@ -121,7 +127,6 @@ struct __wt_cache {
 	WT_SPINLOCK evict_queue_lock;	/* Eviction current queue lock */
 	WT_EVICT_QUEUE evict_queues[WT_EVICT_QUEUE_MAX];
 	WT_EVICT_QUEUE *evict_current_queue;/* LRU current queue in use */
-	WT_EVICT_ENTRY *evict_current;	/* LRU current page to be evicted */
 	uint32_t evict_queue_fill;	/* LRU eviction queue index to fill */
 	uint32_t evict_slots;		/* LRU list eviction slots */
 	WT_DATA_HANDLE
@@ -145,10 +150,13 @@ struct __wt_cache {
 	/*
 	 * Work state.
 	 */
-#define	WT_EVICT_PASS_AGGRESSIVE	0x01
-#define	WT_EVICT_PASS_ALL		0x02
-#define	WT_EVICT_PASS_DIRTY		0x04
-#define	WT_EVICT_PASS_WOULD_BLOCK	0x08
+#define	WT_EVICT_STATE_AGGRESSIVE	0x01 /* Eviction isn't making progress:
+						try harder */
+#define	WT_EVICT_STATE_CLEAN		0x02 /* Evict clean pages */
+#define	WT_EVICT_STATE_DIRTY		0x04 /* Evict dirty pages */
+#define	WT_EVICT_STATE_SCRUB		0x08 /* Scrub dirty pages pages */
+#define	WT_EVICT_STATE_URGENT		0x10 /* Pages are in the urgent queue */
+#define	WT_EVICT_STATE_ALL	(WT_EVICT_STATE_CLEAN | WT_EVICT_STATE_DIRTY)
 	uint32_t state;
 	/*
 	 * Pass interrupt counter.
@@ -162,7 +170,6 @@ struct __wt_cache {
 #define	WT_CACHE_POOL_RUN	0x02	/* Cache pool thread running */
 #define	WT_CACHE_STUCK		0x04	/* Eviction server is stuck */
 #define	WT_CACHE_WALK_REVERSE	0x08	/* Scan backwards for candidates */
-#define	WT_CACHE_WOULD_BLOCK	0x10	/* Pages that would block apps */
 	uint32_t flags;
 };
 
diff --git a/src/include/cache.i b/src/include/cache.i
index 72c8307756d..b5cb79afb3c 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -104,7 +104,7 @@ __wt_cache_dirty_inuse(WT_CACHE *cache)
 {
 	uint64_t dirty_inuse;
 
-	dirty_inuse = cache->bytes_dirty;
+	dirty_inuse = cache->bytes_dirty_intl + cache->bytes_dirty_leaf;
 	if (cache->overhead_pct != 0)
 		dirty_inuse +=
 		    (dirty_inuse * (uint64_t)cache->overhead_pct) / 100;
@@ -113,6 +113,67 @@ __wt_cache_dirty_inuse(WT_CACHE *cache)
 }
 
 /*
+ * __wt_cache_dirty_leaf_inuse --
+ *	Return the number of dirty bytes in use by leaf pages.
+ */
+static inline uint64_t
+__wt_cache_dirty_leaf_inuse(WT_CACHE *cache)
+{
+	uint64_t dirty_inuse;
+
+	dirty_inuse = cache->bytes_dirty_leaf;
+	if (cache->overhead_pct != 0)
+		dirty_inuse +=
+		    (dirty_inuse * (uint64_t)cache->overhead_pct) / 100;
+
+	return (dirty_inuse);
+}
+
+/*
+ * __wt_cache_bytes_image --
+ *	Return the number of page image bytes in use.
+ */
+static inline uint64_t
+__wt_cache_bytes_image(WT_CACHE *cache)
+{
+	uint64_t bytes_image;
+
+	bytes_image = cache->bytes_image;
+	if (cache->overhead_pct != 0)
+		bytes_image +=
+		    (bytes_image * (uint64_t)cache->overhead_pct) / 100;
+
+	return (bytes_image);
+}
+
+/*
+ * __wt_cache_bytes_other --
+ *	Return the number of bytes in use not for page images.
+ */
+static inline uint64_t
+__wt_cache_bytes_other(WT_CACHE *cache)
+{
+	uint64_t bytes_image, bytes_inmem, bytes_other;
+
+	bytes_image = cache->bytes_image;
+	bytes_inmem = cache->bytes_inmem;
+
+	/*
+	 * The reads above could race with changes to the values, so protect
+	 * against underflow.
+	 */
+	if (bytes_image > bytes_inmem)
+		return (0);
+
+	bytes_other = bytes_inmem - bytes_image;
+	if (cache->overhead_pct != 0)
+		bytes_other +=
+		    (bytes_other * (uint64_t)cache->overhead_pct) / 100;
+
+	return (bytes_other);
+}
+
+/*
  * __wt_session_can_wait --
  *	Return if a session available for a potentially slow operation.
  */
@@ -139,20 +200,9 @@ __wt_session_can_wait(WT_SESSION_IMPL *session)
 }
 
 /*
- * __wt_eviction_dirty_target --
- *	Return if the eviction server is running to reduce the number of dirty
- * pages (versus running to discard pages from the cache).
- */
-static inline bool
-__wt_eviction_dirty_target(WT_SESSION_IMPL *session)
-{
-	return (FLD_ISSET(S2C(session)->cache->state, WT_EVICT_PASS_DIRTY));
-}
-
-/*
  * __wt_eviction_needed --
  *	Return if an application thread should do eviction, and the cache full
- * percentage as a side-effect.
+ *      percentage as a side-effect.
  */
 static inline bool
 __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
@@ -186,22 +236,21 @@ __wt_eviction_needed(WT_SESSION_IMPL *session, u_int *pct_fullp)
 	pct_full = (u_int)((100 * bytes_inuse) / bytes_max);
 	if (pct_fullp != NULL)
 		*pct_fullp = pct_full;
-	/*
-	 * If the connection is closing we do not need eviction from an
-	 * application thread.  The eviction subsystem is already closed.
-	 * We return here because some callers depend on the percent full
-	 * having been filled in.
-	 */
-	if (F_ISSET(conn, WT_CONN_CLOSING))
-		return (false);
 
 	if (pct_full > cache->eviction_trigger)
 		return (true);
 
-	/* Return if there are too many dirty bytes in cache. */
-	if (__wt_cache_dirty_inuse(cache) >
+	/*
+	 * Check if there are too many dirty bytes in cache.
+	 *
+	 * We try to avoid penalizing read-only operations by only checking the
+	 * dirty limit once a transaction ID has been allocated, or if the last
+	 * transaction did an update.
+	 */
+	if (__wt_cache_dirty_leaf_inuse(cache) >
 	    (cache->eviction_dirty_trigger * bytes_max) / 100)
 		return (true);
+
 	return (false);
 }
 
diff --git a/src/include/connection.h b/src/include/connection.h
index 0e0c357279a..a9855e42980 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -271,7 +271,6 @@ struct __wt_connection_impl {
 	wt_thread_t	 ckpt_tid;	/* Checkpoint thread */
 	bool		 ckpt_tid_set;	/* Checkpoint thread set */
 	WT_CONDVAR	*ckpt_cond;	/* Checkpoint wait mutex */
-	const char	*ckpt_config;	/* Checkpoint configuration */
 #define	WT_CKPT_LOGSIZE(conn)	((conn)->ckpt_logsize != 0)
 	wt_off_t	 ckpt_logsize;	/* Checkpoint log size period */
 	uint32_t	 ckpt_signalled;/* Checkpoint signalled */
@@ -314,6 +313,7 @@ struct __wt_connection_impl {
 	uint32_t	 evict_workers;	/* Number of eviction workers */
 	WT_EVICT_WORKER	*evict_workctx;	/* Eviction worker context */
 
+#define	WT_STATLOG_FILENAME	"WiredTigerStat.%d.%H"
 	WT_SESSION_IMPL *stat_session;	/* Statistics log session */
 	wt_thread_t	 stat_tid;	/* Statistics log thread */
 	bool		 stat_tid_set;	/* Statistics log thread set */
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 6357523a03f..dce24f20844 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -73,6 +73,9 @@ struct __wt_cursor_backup {
 	WT_CURSOR_BACKUP_ENTRY *list;	/* List of files to be copied. */
 	size_t list_allocated;
 	size_t list_next;
+
+#define	WT_CURBACKUP_LOCKER	0x01	/* Hot-backup started */
+	uint8_t	flags;
 };
 #define	WT_CURSOR_BACKUP_ID(cursor)	(((WT_CURSOR_BACKUP *)cursor)->maxid)
 
@@ -413,7 +416,9 @@ struct __wt_cursor_log {
 	uint32_t	step_count;	/* Intra-record count */
 	uint32_t	rectype;	/* Record type */
 	uint64_t	txnid;		/* Record txnid */
-	uint32_t	flags;
+
+#define	WT_CURLOG_ARCHIVE_LOCK	0x01	/* Archive lock held */
+	uint8_t		flags;
 };
 
 struct __wt_cursor_metadata {
@@ -424,7 +429,7 @@ struct __wt_cursor_metadata {
 #define	WT_MDC_CREATEONLY	0x01
 #define	WT_MDC_ONMETADATA	0x02
 #define	WT_MDC_POSITIONED	0x04
-	uint32_t flags;
+	uint8_t	flags;
 };
 
 struct __wt_join_stats_group {
diff --git a/src/include/cursor.i b/src/include/cursor.i
index 553dd03f958..76a08138afb 100644
--- a/src/include/cursor.i
+++ b/src/include/cursor.i
@@ -38,9 +38,6 @@ __cursor_pos_clear(WT_CURSOR_BTREE *cbt)
 	cbt->ins_head = NULL;
 	cbt->ins_stack[0] = NULL;
 
-	cbt->cip_saved = NULL;
-	cbt->rip_saved = NULL;
-
 	F_CLR(cbt, WT_CBT_POSITION_MASK);
 }
 
@@ -120,7 +117,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt)
 	 */
 	if (cbt->ref != NULL &&
 	    cbt->page_deleted_count > WT_BTREE_DELETE_THRESHOLD)
-		__wt_page_evict_soon(cbt->ref->page);
+		WT_TRET(__wt_page_evict_soon(session, cbt->ref));
 	cbt->page_deleted_count = 0;
 
 	/*
@@ -130,7 +127,7 @@ __curfile_leave(WT_CURSOR_BTREE *cbt)
 	 *
 	 * Clear the reference regardless, so we don't try the release twice.
 	 */
-	ret = __wt_page_release(session, cbt->ref, 0);
+	WT_TRET(__wt_page_release(session, cbt->ref, 0));
 	cbt->ref = NULL;
 
 	return (ret);
diff --git a/src/include/extern.h b/src/include/extern.h
index b0c0f6eccad..f3a639ac07f 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -44,7 +44,7 @@ extern void __wt_block_extlist_free(WT_SESSION_IMPL *session, WT_EXTLIST *el);
 extern int __wt_block_map(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_regionp, size_t *lengthp, void *mapped_cookiep);
 extern int __wt_block_unmap(WT_SESSION_IMPL *session, WT_BLOCK *block, void *mapped_region, size_t length, void *mapped_cookie);
 extern int __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BM **bmp);
-extern int __wt_block_manager_drop(WT_SESSION_IMPL *session, const char *filename);
+extern int __wt_block_manager_drop( WT_SESSION_IMPL *session, const char *filename, bool durable);
 extern int __wt_block_manager_create( WT_SESSION_IMPL *session, const char *filename, uint32_t allocsize);
 extern void __wt_block_configure_first_fit(WT_BLOCK *block, bool on);
 extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const char *cfg[], bool forced_salvage, bool readonly, uint32_t allocsize, WT_BLOCK **blockp);
@@ -76,8 +76,8 @@ extern int __wt_block_verify_addr(WT_SESSION_IMPL *session, WT_BLOCK *block, con
 extern int __wt_block_truncate(WT_SESSION_IMPL *session, WT_BLOCK *block, wt_off_t len);
 extern int __wt_block_discard(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t added_size);
 extern int __wt_block_write_size(WT_SESSION_IMPL *session, WT_BLOCK *block, size_t *sizep);
-extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum);
-extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool caller_locked);
+extern int __wt_block_write(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool data_cksum, bool checkpoint_io);
+extern int __wt_block_write_off(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t *offsetp, uint32_t *sizep, uint32_t *cksump, bool data_cksum, bool checkpoint_io, bool caller_locked);
 extern int __wt_bloom_create( WT_SESSION_IMPL *session, const char *uri, const char *config, uint64_t count, uint32_t factor, uint32_t k, WT_BLOOM **bloomp);
 extern int __wt_bloom_open(WT_SESSION_IMPL *session, const char *uri, uint32_t factor, uint32_t k, WT_CURSOR *owner, WT_BLOOM **bloomp);
 extern int __wt_bloom_insert(WT_BLOOM *bloom, WT_ITEM *key);
@@ -139,7 +139,7 @@ extern void __wt_btree_evictable(WT_SESSION_IMPL *session, bool on);
 extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session);
 extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);
 extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size);
-extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool compressed);
+extern int __wt_bt_write(WT_SESSION_IMPL *session, WT_ITEM *buf, uint8_t *addr, size_t *addr_sizep, bool checkpoint, bool checkpoint_io, bool compressed);
 extern const char *__wt_page_type_string(u_int type);
 extern const char *__wt_cell_type_string(uint8_t type);
 extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf);
@@ -161,11 +161,11 @@ extern int __wt_kv_return(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_UPD
 extern int __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[]);
 extern void __wt_split_stash_discard(WT_SESSION_IMPL *session);
 extern void __wt_split_stash_discard_all( WT_SESSION_IMPL *session_safe, WT_SESSION_IMPL *session);
-extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp);
+extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing);
 extern int __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref);
 extern int __wt_split_multi(WT_SESSION_IMPL *session, WT_REF *ref, int closing);
 extern int __wt_split_reverse(WT_SESSION_IMPL *session, WT_REF *ref);
-extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref);
+extern int __wt_split_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, WT_MULTI *multi);
 extern int __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst);
 extern int __wt_cache_op(WT_SESSION_IMPL *session, WT_CACHE_OP op);
 extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
@@ -282,7 +282,6 @@ extern int __wt_curconfig_open(WT_SESSION_IMPL *session, const char *uri, const
 extern int __wt_curds_open( WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_DATA_SOURCE *dsrc, WT_CURSOR **cursorp);
 extern int __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp);
 extern int __wt_curfile_update_check(WT_CURSOR *cursor);
-extern int __wt_curfile_create(WT_SESSION_IMPL *session, WT_CURSOR *owner, const char *cfg[], bool bulk, bool bitmap, WT_CURSOR **cursorp);
 extern int __wt_curfile_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
 extern int __wt_curindex_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp);
 extern int __wt_curjoin_joined(WT_CURSOR *cursor);
@@ -346,9 +345,11 @@ extern int __wt_evict_destroy(WT_SESSION_IMPL *session);
 extern int __wt_evict_file_exclusive_on(WT_SESSION_IMPL *session);
 extern void __wt_evict_file_exclusive_off(WT_SESSION_IMPL *session);
 extern int __wt_cache_eviction_worker(WT_SESSION_IMPL *session, bool busy, u_int pct_full);
+extern int __wt_page_evict_soon(WT_SESSION_IMPL *session, WT_REF *ref);
 extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
 extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
 extern int __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile);
+extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref);
 extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing);
 extern int __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
 extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start);
@@ -485,8 +486,7 @@ extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **va
 extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value);
 extern int __wt_filename(WT_SESSION_IMPL *session, const char *name, char **path);
 extern int __wt_nfilename( WT_SESSION_IMPL *session, const char *name, size_t namelen, char **path);
-extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_rename_and_sync_directory( WT_SESSION_IMPL *session, const char *from, const char *to);
+extern int __wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable);
 extern int __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to);
 extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
 extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
@@ -500,7 +500,7 @@ extern int __wt_errno(void);
 extern const char *__wt_strerror(WT_SESSION_IMPL *session, int error, char *errbuf, size_t errlen);
 extern int __wt_ext_map_windows_error( WT_EXTENSION_API *wt_api, WT_SESSION *wt_session, uint32_t windows_error);
 extern bool __wt_handle_is_open(WT_SESSION_IMPL *session, const char *name);
-extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp);
+extern int __wt_open(WT_SESSION_IMPL *session, const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp);
 extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
 extern int __wt_close_connection_close(WT_SESSION_IMPL *session);
 extern int __wt_os_inmemory(WT_SESSION_IMPL *session);
@@ -585,7 +585,7 @@ extern WT_DATA_SOURCE *__wt_schema_get_source(WT_SESSION_IMPL *session, const ch
 extern int __wt_str_name_check(WT_SESSION_IMPL *session, const char *str);
 extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len);
 extern int __wt_schema_worker(WT_SESSION_IMPL *session, const char *uri, int (*file_func)(WT_SESSION_IMPL *, const char *[]), int (*name_func)(WT_SESSION_IMPL *, const char *, bool *), const char *cfg[], uint32_t open_flags);
-extern int __wt_session_notsup(WT_SESSION *wt_session);
+extern int __wt_session_notsup(WT_SESSION_IMPL *session);
 extern int __wt_session_reset_cursors(WT_SESSION_IMPL *session, bool free_buffers);
 extern int __wt_session_copy_values(WT_SESSION_IMPL *session);
 extern int __wt_session_release_resources(WT_SESSION_IMPL *session);
@@ -719,7 +719,6 @@ extern void __wt_txn_stats_update(WT_SESSION_IMPL *session);
 extern void __wt_txn_destroy(WT_SESSION_IMPL *session);
 extern int __wt_txn_global_init(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_txn_global_destroy(WT_SESSION_IMPL *session);
-extern int __wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len);
 extern int __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[]);
diff --git a/src/include/flags.h b/src/include/flags.h
index f134af69d29..9346605ed24 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -2,6 +2,7 @@
  * DO NOT EDIT: automatically built by dist/flags.py.
  * flags section: BEGIN
  */
+#define	WT_CHECKPOINTING				0x00000001
 #define	WT_CONN_CACHE_POOL				0x00000001
 #define	WT_CONN_CKPT_SYNC				0x00000002
 #define	WT_CONN_CLOSING					0x00000004
@@ -21,10 +22,12 @@
 #define	WT_CONN_SERVER_STATISTICS			0x00010000
 #define	WT_CONN_SERVER_SWEEP				0x00020000
 #define	WT_CONN_WAS_BACKUP				0x00040000
-#define	WT_EVICTING					0x00000001
-#define	WT_EVICT_IN_MEMORY				0x00000002
-#define	WT_EVICT_LOOKASIDE				0x00000004
-#define	WT_EVICT_UPDATE_RESTORE				0x00000008
+#define	WT_EVICTING					0x00000002
+#define	WT_EVICT_INMEM_SPLIT				0x00000004
+#define	WT_EVICT_IN_MEMORY				0x00000008
+#define	WT_EVICT_LOOKASIDE				0x00000010
+#define	WT_EVICT_SCRUB					0x00000020
+#define	WT_EVICT_UPDATE_RESTORE				0x00000040
 #define	WT_LOGSCAN_FIRST				0x00000001
 #define	WT_LOGSCAN_FROM_CKP				0x00000002
 #define	WT_LOGSCAN_ONE					0x00000004
@@ -100,7 +103,7 @@
 #define	WT_VERB_VERIFY					0x00800000
 #define	WT_VERB_VERSION					0x01000000
 #define	WT_VERB_WRITE					0x02000000
-#define	WT_VISIBILITY_ERR				0x00000010
+#define	WT_VISIBILITY_ERR				0x00000080
 /*
  * flags section: END
  * DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/include/hardware.h b/src/include/hardware.h
index 93ed8a868b6..0e52818ae05 100644
--- a/src/include/hardware.h
+++ b/src/include/hardware.h
@@ -45,7 +45,16 @@
 	    &(p)->flags_atomic, __orig, __orig & ~(uint8_t)(mask)));	\
 } while (0)
 
-#define	WT_CACHE_LINE_ALIGNMENT	64	/* Cache line alignment */
+/*
+ * Cache line alignment.
+ */
+#if defined(__PPC64__) || defined(PPC64)
+#define	WT_CACHE_LINE_ALIGNMENT	128
+#elif defined(__s390x__)
+#define	WT_CACHE_LINE_ALIGNMENT	256
+#else
+#define	WT_CACHE_LINE_ALIGNMENT	64
+#endif
 #define	WT_CACHE_LINE_ALIGNMENT_VERIFY(session, a)			\
 	WT_ASSERT(session,						\
 	    WT_PTRDIFF(&(a)[1], &(a)[0]) >= WT_CACHE_LINE_ALIGNMENT &&	\
diff --git a/src/include/intpack.i b/src/include/intpack.i
index b27afd24e6c..e8bea58cede 100644
--- a/src/include/intpack.i
+++ b/src/include/intpack.i
@@ -59,7 +59,7 @@
 /* Count the leading zero bytes. */
 #if defined(__GNUC__)
 #define	WT_LEADING_ZEROS(x, i)						\
-	(i = (x == 0) ? (int)sizeof (x) : __builtin_clzll(x) >> 3)
+	(i = (x == 0) ? (int)sizeof(x) : __builtin_clzll(x) >> 3)
 #elif defined(_MSC_VER)
 #define	WT_LEADING_ZEROS(x, i)	do {					\
 	if (x == 0) i = (int)sizeof(x);				\
@@ -89,7 +89,7 @@ __wt_vpack_posint(uint8_t **pp, size_t maxlen, uint64_t x)
 	int len, lz, shift;
 
 	WT_LEADING_ZEROS(x, lz);
-	len = (int)sizeof (x) - lz;
+	len = (int)sizeof(x) - lz;
 	WT_SIZE_CHECK_PACK(len + 1, maxlen);
 	p = *pp;
 
@@ -114,7 +114,7 @@ __wt_vpack_negint(uint8_t **pp, size_t maxlen, uint64_t x)
 	int len, lz, shift;
 
 	WT_LEADING_ZEROS(~x, lz);
-	len = (int)sizeof (x) - lz;
+	len = (int)sizeof(x) - lz;
 	WT_SIZE_CHECK_PACK(len + 1, maxlen);
 	p = *pp;
 
@@ -170,7 +170,7 @@ __wt_vunpack_negint(const uint8_t **pp, size_t maxlen, uint64_t *retp)
 
 	/* There are four length bits in the first byte. */
 	p = *pp;
-	len = (int)sizeof (x) - (*p++ & 0xf);
+	len = (int)sizeof(x) - (*p++ & 0xf);
 	WT_SIZE_CHECK_UNPACK(len + 1, maxlen);
 
 	for (x = UINT64_MAX; len != 0; --len)
diff --git a/src/include/os_fhandle.i b/src/include/os_fhandle.i
index 313bf8eca3f..9bf5ce0e60b 100644
--- a/src/include/os_fhandle.i
+++ b/src/include/os_fhandle.i
@@ -26,7 +26,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
 	 * There is no way to check when the non-blocking sync-file-range is
 	 * complete, but we track the time taken in the call for completeness.
 	 */
-	WT_STAT_FAST_CONN_INCR_ATOMIC(session, fsync_active);
+	WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_fsync_active);
 	WT_STAT_FAST_CONN_INCR(session, fsync_io);
 	if (block)
 		ret = (handle->fh_sync == NULL ? 0 :
@@ -34,7 +34,7 @@ __wt_fsync(WT_SESSION_IMPL *session, WT_FH *fh, bool block)
 	else
 		ret = (handle->fh_sync_nowait == NULL ? 0 :
 		    handle->fh_sync_nowait(handle, (WT_SESSION *)session));
-	WT_STAT_FAST_CONN_DECR_ATOMIC(session, fsync_active);
+	WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_fsync_active);
 	return (ret);
 }
 
@@ -107,13 +107,13 @@ __wt_read(
 	    "%s: handle-read: %" WT_SIZET_FMT " at %" PRIuMAX,
 	    fh->handle->name, len, (uintmax_t)offset));
 
-	WT_STAT_FAST_CONN_INCR_ATOMIC(session, read_active);
+	WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_read_active);
 	WT_STAT_FAST_CONN_INCR(session, read_io);
 
 	ret = fh->handle->fh_read(
 	    fh->handle, (WT_SESSION *)session, offset, len, buf);
 
-	WT_STAT_FAST_CONN_DECR_ATOMIC(session, read_active);
+	WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_read_active);
 	return (ret);
 }
 
@@ -165,12 +165,12 @@ __wt_write(WT_SESSION_IMPL *session,
 	    "%s: handle-write: %" WT_SIZET_FMT " at %" PRIuMAX,
 	    fh->handle->name, len, (uintmax_t)offset));
 
-	WT_STAT_FAST_CONN_INCR_ATOMIC(session, write_active);
+	WT_STAT_FAST_CONN_INCR_ATOMIC(session, thread_write_active);
 	WT_STAT_FAST_CONN_INCR(session, write_io);
 
 	ret = fh->handle->fh_write(
 	    fh->handle, (WT_SESSION *)session, offset, len, buf);
 
-	WT_STAT_FAST_CONN_DECR_ATOMIC(session, write_active);
+	WT_STAT_FAST_CONN_DECR_ATOMIC(session, thread_write_active);
 	return (ret);
 }
diff --git a/src/include/os_fs.i b/src/include/os_fs.i
index 88ee71d953a..a3a2fe29b65 100644
--- a/src/include/os_fs.i
+++ b/src/include/os_fs.i
@@ -8,7 +8,7 @@
 
 /*
  * __wt_fs_directory_list --
- *	Get a list of files from a directory.
+ *	Return a list of files from a directory.
  */
 static inline int
 __wt_fs_directory_list(WT_SESSION_IMPL *session,
@@ -61,61 +61,6 @@ __wt_fs_directory_list_free(
 }
 
 /*
- * __wt_fs_directory_sync --
- *	Flush a directory to ensure file creation is durable.
- */
-static inline int
-__wt_fs_directory_sync(WT_SESSION_IMPL *session, const char *name)
-{
-	WT_DECL_RET;
-	WT_FILE_SYSTEM *file_system;
-	WT_SESSION *wt_session;
-	char *copy, *dir;
-
-	WT_ASSERT(session, !F_ISSET(S2C(session), WT_CONN_READONLY));
-
-	WT_RET(__wt_verbose(
-	    session, WT_VERB_FILEOPS, "%s: directory-sync", name));
-
-	/*
-	 * POSIX 1003.1 does not require that fsync of a file handle ensures the
-	 * entry in the directory containing the file has also reached disk (and
-	 * there are historic Linux filesystems requiring it). If the underlying
-	 * filesystem method is set, do an explicit fsync on a file descriptor
-	 * for the directory to be sure.
-	 *
-	 * directory-sync is not a required call, no method means the call isn't
-	 * needed.
-	 */
-	file_system = S2C(session)->file_system;
-	if (file_system->fs_directory_sync == NULL)
-		return (0);
-
-	copy = NULL;
-	if (name == NULL || strchr(name, '/') == NULL)
-		name = S2C(session)->home;
-	else {
-		/*
-		 * File name construction should not return a path without any
-		 * slash separator, but caution isn't unreasonable.
-		 */
-		WT_RET(__wt_filename(session, name, &copy));
-		if ((dir = strrchr(copy, '/')) == NULL)
-			name = S2C(session)->home;
-		else {
-			dir[1] = '\0';
-			name = copy;
-		}
-	}
-
-	wt_session = (WT_SESSION *)session;
-	ret = file_system->fs_directory_sync(file_system, wt_session, name);
-
-	__wt_free(session, copy);
-	return (ret);
-}
-
-/*
  * __wt_fs_exist --
  *	Return if the file exists.
  */
@@ -141,10 +86,10 @@ __wt_fs_exist(WT_SESSION_IMPL *session, const char *name, bool *existp)
 
 /*
  * __wt_fs_remove --
- *	POSIX remove.
+ *	Remove the file.
  */
 static inline int
-__wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
+__wt_fs_remove(WT_SESSION_IMPL *session, const char *name, bool durable)
 {
 	WT_DECL_RET;
 	WT_FILE_SYSTEM *file_system;
@@ -169,7 +114,8 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
 
 	file_system = S2C(session)->file_system;
 	wt_session = (WT_SESSION *)session;
-	ret = file_system->fs_remove(file_system, wt_session, path);
+	ret = file_system->fs_remove(
+	    file_system, wt_session, path, durable ? WT_FS_DURABLE : 0);
 
 	__wt_free(session, path);
 	return (ret);
@@ -177,10 +123,11 @@ __wt_fs_remove(WT_SESSION_IMPL *session, const char *name)
 
 /*
  * __wt_fs_rename --
- *	POSIX rename.
+ *	Rename the file.
  */
 static inline int
-__wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
+__wt_fs_rename(
+    WT_SESSION_IMPL *session, const char *from, const char *to, bool durable)
 {
 	WT_DECL_RET;
 	WT_FILE_SYSTEM *file_system;
@@ -211,8 +158,8 @@ __wt_fs_rename(WT_SESSION_IMPL *session, const char *from, const char *to)
 
 	file_system = S2C(session)->file_system;
 	wt_session = (WT_SESSION *)session;
-	ret = file_system->fs_rename(
-	    file_system, wt_session, from_path, to_path);
+	ret = file_system->fs_rename(file_system,
+	    wt_session, from_path, to_path, durable ? WT_FS_DURABLE : 0);
 
 err:	__wt_free(session, from_path);
 	__wt_free(session, to_path);
@@ -221,7 +168,7 @@ err:	__wt_free(session, from_path);
 
 /*
  * __wt_fs_size --
- *	Get the size of a file in bytes, by file name.
+ *	Return the size of a file in bytes, by file name.
  */
 static inline int
 __wt_fs_size(WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
diff --git a/src/include/os_fstream.i b/src/include/os_fstream.i
index 8c0fdadbdb0..92274431011 100644
--- a/src/include/os_fstream.i
+++ b/src/include/os_fstream.i
@@ -93,5 +93,5 @@ __wt_sync_and_rename(WT_SESSION_IMPL *session,
 	WT_TRET(__wt_fclose(session, &fstr));
 	WT_RET(ret);
 
-	return (__wt_rename_and_sync_directory(session, from, to));
+	return (__wt_fs_rename(session, from, to, true));
 }
diff --git a/src/include/queue.h b/src/include/queue.h
index 1d494875cf6..e3d4daf0f4c 100644
--- a/src/include/queue.h
+++ b/src/include/queue.h
@@ -1,4 +1,4 @@
-/*
+/*-
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -27,28 +27,18 @@
  * SUCH DAMAGE.
  *
  *	@(#)queue.h	8.5 (Berkeley) 8/20/94
- * $FreeBSD: src/sys/sys/queue.h,v 1.54 2002/08/05 05:18:43 alfred Exp $
+ * $FreeBSD$
  */
 
-#ifndef	_DB_QUEUE_H_
-#define	_DB_QUEUE_H_
-
-#if defined(__cplusplus)
-extern "C" {
-#endif
-
 /*
+ * This is a stripped-down version of the FreeBSD sys/queue.h include file.
+ *
  * WiredTiger only uses the TAILQ macros (we've gotten into trouble in the past
  * by trying to use simpler queues and subsequently discovering a list we didn't
  * think would ever get to be large could, under some workloads, become large,
  * and the linear performance for removal of elements from simpler macros proved
  * to be more trouble than the memory savings were worth.
  *
- * Additionally, we've altered the TAILQ_INSERT_XXX functions to include a write
- * barrier, in order to ensure we never insert a partially built structure onto
- * a list (this is required because the spinlocks we use don't necessarily imply
- * a write barrier).
- *
  * We #undef all of the macros because there are incompatible versions of this
  * file and these macros on various systems.  What makes the problem worse is
  * they are included and/or defined by system include files which we may have
@@ -57,13 +47,28 @@ extern "C" {
  * several of the LIST_XXX macros.  Visual C.NET 7.0 also defines some of these
  * same macros in Vc7\PlatformSDK\Include\WinNT.h.  Make sure we use ours.
  */
-
+#undef QMD_SAVELINK
+#undef QMD_TAILQ_CHECK_HEAD
+#undef QMD_TAILQ_CHECK_NEXT
+#undef QMD_TAILQ_CHECK_PREV
+#undef QMD_TAILQ_CHECK_TAIL
+#undef QMD_TRACE_ELEM
+#undef QMD_TRACE_HEAD
+#undef QUEUE_TYPEOF
+#undef TAILQ_CLASS_ENTRY
+#undef TAILQ_CLASS_HEAD
 #undef TAILQ_CONCAT
 #undef TAILQ_EMPTY
 #undef TAILQ_ENTRY
 #undef TAILQ_FIRST
 #undef TAILQ_FOREACH
+#undef TAILQ_FOREACH_FROM
+#undef TAILQ_FOREACH_FROM_SAFE
 #undef TAILQ_FOREACH_REVERSE
+#undef TAILQ_FOREACH_REVERSE_FROM
+#undef TAILQ_FOREACH_REVERSE_FROM_SAFE
+#undef TAILQ_FOREACH_REVERSE_SAFE
+#undef TAILQ_FOREACH_SAFE
 #undef TAILQ_HEAD
 #undef TAILQ_HEAD_INITIALIZER
 #undef TAILQ_INIT
@@ -76,41 +81,25 @@ extern "C" {
 #undef TAILQ_PREV
 #undef TAILQ_REMOVE
 #undef TRACEBUF
+#undef TRACEBUF_INITIALIZER
 #undef TRASHIT
+#undef TAILQ_SWAP
 
-#define	QUEUE_MACRO_DEBUG 0
-#if QUEUE_MACRO_DEBUG
-/* Store the last 2 places the queue element or head was altered */
-struct qm_trace {
-	char * lastfile;
-	int lastline;
-	char * prevfile;
-	int prevline;
-};
-
-#define	TRACEBUF	struct qm_trace trace;
-#define	TRASHIT(x)	do {(x) = (void *)-1;} while (0)
-
-#define	QMD_TRACE_HEAD(head) do {					\
-	(head)->trace.prevline = (head)->trace.lastline;		\
-	(head)->trace.prevfile = (head)->trace.lastfile;		\
-	(head)->trace.lastline = __LINE__;				\
-	(head)->trace.lastfile = __FILE__;				\
-} while (0)
-
-#define	QMD_TRACE_ELEM(elem) do {					\
-	(elem)->trace.prevline = (elem)->trace.lastline;		\
-	(elem)->trace.prevfile = (elem)->trace.lastfile;		\
-	(elem)->trace.lastline = __LINE__;				\
-	(elem)->trace.lastfile = __FILE__;				\
-} while (0)
-
-#else
 #define	QMD_TRACE_ELEM(elem)
 #define	QMD_TRACE_HEAD(head)
+#define	QMD_SAVELINK(name, link)
 #define	TRACEBUF
+#define	TRACEBUF_INITIALIZER
 #define	TRASHIT(x)
-#endif	/* QUEUE_MACRO_DEBUG */
+
+#ifdef __cplusplus
+/*
+ * In C++ there can be structure lists and class lists:
+ */
+#define	QUEUE_TYPEOF(type) type
+#else
+#define	QUEUE_TYPEOF(type) struct type
+#endif
 
 /*
  * Tail queue declarations.
@@ -122,8 +111,15 @@ struct name {								\
 	TRACEBUF							\
 }
 
+#define	TAILQ_CLASS_HEAD(name, type)					\
+struct name {								\
+	class type *tqh_first;	/* first element */			\
+	class type **tqh_last;	/* addr of last next element */		\
+	TRACEBUF							\
+}
+
 #define	TAILQ_HEAD_INITIALIZER(head)					\
-	{ NULL, &(head).tqh_first }
+	{ NULL, &(head).tqh_first, TRACEBUF_INITIALIZER }
 
 #define	TAILQ_ENTRY(type)						\
 struct {								\
@@ -132,16 +128,28 @@ struct {								\
 	TRACEBUF							\
 }
 
+#define	TAILQ_CLASS_ENTRY(type)						\
+struct {								\
+	class type *tqe_next;	/* next element */			\
+	class type **tqe_prev;	/* address of previous next element */	\
+	TRACEBUF							\
+}
+
 /*
  * Tail queue functions.
  */
+#define	QMD_TAILQ_CHECK_HEAD(head, field)
+#define	QMD_TAILQ_CHECK_TAIL(head, headname)
+#define	QMD_TAILQ_CHECK_NEXT(elm, field)
+#define	QMD_TAILQ_CHECK_PREV(elm, field)
+
 #define	TAILQ_CONCAT(head1, head2, field) do {				\
 	if (!TAILQ_EMPTY(head2)) {					\
 		*(head1)->tqh_last = (head2)->tqh_first;		\
 		(head2)->tqh_first->field.tqe_prev = (head1)->tqh_last;	\
 		(head1)->tqh_last = (head2)->tqh_last;			\
 		TAILQ_INIT((head2));					\
-		QMD_TRACE_HEAD(head);					\
+		QMD_TRACE_HEAD(head1);					\
 		QMD_TRACE_HEAD(head2);					\
 	}								\
 } while (0)
@@ -155,11 +163,41 @@ struct {								\
 	    (var);							\
 	    (var) = TAILQ_NEXT((var), field))
 
+#define	TAILQ_FOREACH_FROM(var, head, field)				\
+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\
+	    (var);							\
+	    (var) = TAILQ_NEXT((var), field))
+
+#define	TAILQ_FOREACH_SAFE(var, head, field, tvar)			\
+	for ((var) = TAILQ_FIRST((head));				\
+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_FROM_SAFE(var, head, field, tvar)			\
+	for ((var) = ((var) ? (var) : TAILQ_FIRST((head)));		\
+	    (var) && ((tvar) = TAILQ_NEXT((var), field), 1);		\
+	    (var) = (tvar))
+
 #define	TAILQ_FOREACH_REVERSE(var, head, headname, field)		\
 	for ((var) = TAILQ_LAST((head), headname);			\
 	    (var);							\
 	    (var) = TAILQ_PREV((var), headname, field))
 
+#define	TAILQ_FOREACH_REVERSE_FROM(var, head, headname, field)		\
+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\
+	    (var);							\
+	    (var) = TAILQ_PREV((var), headname, field))
+
+#define	TAILQ_FOREACH_REVERSE_SAFE(var, head, headname, field, tvar)	\
+	for ((var) = TAILQ_LAST((head), headname);			\
+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\
+	    (var) = (tvar))
+
+#define	TAILQ_FOREACH_REVERSE_FROM_SAFE(var, head, headname, field, tvar) \
+	for ((var) = ((var) ? (var) : TAILQ_LAST((head), headname));	\
+	    (var) && ((tvar) = TAILQ_PREV((var), headname, field), 1);	\
+	    (var) = (tvar))
+
 #define	TAILQ_INIT(head) do {						\
 	TAILQ_FIRST((head)) = NULL;					\
 	(head)->tqh_last = &TAILQ_FIRST((head));			\
@@ -167,9 +205,9 @@ struct {								\
 } while (0)
 
 #define	TAILQ_INSERT_AFTER(head, listelm, elm, field) do {		\
-	WT_WRITE_BARRIER();						\
+	QMD_TAILQ_CHECK_NEXT(listelm, field);				\
 	if ((TAILQ_NEXT((elm), field) = TAILQ_NEXT((listelm), field)) != NULL)\
-		TAILQ_NEXT((elm), field)->field.tqe_prev =		\
+		TAILQ_NEXT((elm), field)->field.tqe_prev = 		\
 		    &TAILQ_NEXT((elm), field);				\
 	else {								\
 		(head)->tqh_last = &TAILQ_NEXT((elm), field);		\
@@ -178,21 +216,21 @@ struct {								\
 	TAILQ_NEXT((listelm), field) = (elm);				\
 	(elm)->field.tqe_prev = &TAILQ_NEXT((listelm), field);		\
 	QMD_TRACE_ELEM(&(elm)->field);					\
-	QMD_TRACE_ELEM(&listelm->field);				\
+	QMD_TRACE_ELEM(&(listelm)->field);				\
 } while (0)
 
 #define	TAILQ_INSERT_BEFORE(listelm, elm, field) do {			\
-	WT_WRITE_BARRIER();						\
+	QMD_TAILQ_CHECK_PREV(listelm, field);				\
 	(elm)->field.tqe_prev = (listelm)->field.tqe_prev;		\
 	TAILQ_NEXT((elm), field) = (listelm);				\
 	*(listelm)->field.tqe_prev = (elm);				\
 	(listelm)->field.tqe_prev = &TAILQ_NEXT((elm), field);		\
 	QMD_TRACE_ELEM(&(elm)->field);					\
-	QMD_TRACE_ELEM(&listelm->field);				\
+	QMD_TRACE_ELEM(&(listelm)->field);				\
 } while (0)
 
 #define	TAILQ_INSERT_HEAD(head, elm, field) do {			\
-	WT_WRITE_BARRIER();						\
+	QMD_TAILQ_CHECK_HEAD(head, field);				\
 	if ((TAILQ_NEXT((elm), field) = TAILQ_FIRST((head))) != NULL)	\
 		TAILQ_FIRST((head))->field.tqe_prev =			\
 		    &TAILQ_NEXT((elm), field);				\
@@ -205,7 +243,7 @@ struct {								\
 } while (0)
 
 #define	TAILQ_INSERT_TAIL(head, elm, field) do {			\
-	WT_WRITE_BARRIER();						\
+	QMD_TAILQ_CHECK_TAIL(head, field);				\
 	TAILQ_NEXT((elm), field) = NULL;				\
 	(elm)->field.tqe_prev = (head)->tqh_last;			\
 	*(head)->tqh_last = (elm);					\
@@ -223,20 +261,36 @@ struct {								\
 	(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
 
 #define	TAILQ_REMOVE(head, elm, field) do {				\
+	QMD_SAVELINK(oldnext, (elm)->field.tqe_next);			\
+	QMD_SAVELINK(oldprev, (elm)->field.tqe_prev);			\
+	QMD_TAILQ_CHECK_NEXT(elm, field);				\
+	QMD_TAILQ_CHECK_PREV(elm, field);				\
 	if ((TAILQ_NEXT((elm), field)) != NULL)				\
-		TAILQ_NEXT((elm), field)->field.tqe_prev =		\
+		TAILQ_NEXT((elm), field)->field.tqe_prev = 		\
 		    (elm)->field.tqe_prev;				\
 	else {								\
 		(head)->tqh_last = (elm)->field.tqe_prev;		\
 		QMD_TRACE_HEAD(head);					\
 	}								\
 	*(elm)->field.tqe_prev = TAILQ_NEXT((elm), field);		\
-	TRASHIT((elm)->field.tqe_next);					\
-	TRASHIT((elm)->field.tqe_prev);					\
+	TRASHIT(*oldnext);						\
+	TRASHIT(*oldprev);						\
 	QMD_TRACE_ELEM(&(elm)->field);					\
 } while (0)
 
-#if defined(__cplusplus)
-}
-#endif
-#endif	/* !_DB_QUEUE_H_ */
+#define	TAILQ_SWAP(head1, head2, type, field) do {			\
+	QUEUE_TYPEOF(type) *swap_first = (head1)->tqh_first;		\
+	QUEUE_TYPEOF(type) **swap_last = (head1)->tqh_last;		\
+	(head1)->tqh_first = (head2)->tqh_first;			\
+	(head1)->tqh_last = (head2)->tqh_last;				\
+	(head2)->tqh_first = swap_first;				\
+	(head2)->tqh_last = swap_last;					\
+	if ((swap_first = (head1)->tqh_first) != NULL)			\
+		swap_first->field.tqe_prev = &(head1)->tqh_first;	\
+	else								\
+		(head1)->tqh_last = &(head1)->tqh_first;		\
+	if ((swap_first = (head2)->tqh_first) != NULL)			\
+		swap_first->field.tqe_prev = &(head2)->tqh_first;	\
+	else								\
+		(head2)->tqh_last = &(head2)->tqh_first;		\
+} while (0)
diff --git a/src/include/stat.h b/src/include/stat.h
index 57126af8aa4..1df24382236 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -145,14 +145,14 @@ __wt_stats_clear(void *stats_arg, int slot)
 #define	WT_STAT_DECRV(session, stats, fld, value)			\
 	(stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value)
 #define	WT_STAT_DECRV_ATOMIC(session, stats, fld, value)		\
-	__wt_atomic_addi64(						\
+	__wt_atomic_subi64(						\
 	    &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value))
 #define	WT_STAT_DECR(session, stats, fld)				\
 	WT_STAT_DECRV(session, stats, fld, 1)
 #define	WT_STAT_INCRV(session, stats, fld, value)			\
 	(stats)[WT_STATS_SLOT_ID(session)]->fld += (int64_t)(value)
 #define	WT_STAT_INCRV_ATOMIC(session, stats, fld, value)		\
-	__wt_atomic_subi64(						\
+	__wt_atomic_addi64(						\
 	    &(stats)[WT_STATS_SLOT_ID(session)]->fld, (int64_t)(value))
 #define	WT_STAT_INCR(session, stats, fld)				\
 	WT_STAT_INCRV(session, stats, fld, 1)
@@ -273,9 +273,12 @@ struct __wt_connection_stats {
 	int64_t block_write;
 	int64_t block_byte_read;
 	int64_t block_byte_write;
+	int64_t block_byte_write_checkpoint;
 	int64_t block_map_read;
 	int64_t block_byte_map_read;
+	int64_t cache_bytes_image;
 	int64_t cache_bytes_inuse;
+	int64_t cache_bytes_other;
 	int64_t cache_bytes_read;
 	int64_t cache_bytes_write;
 	int64_t cache_eviction_checkpoint;
@@ -309,6 +312,8 @@ struct __wt_connection_stats {
 	int64_t cache_eviction_maximum_page_size;
 	int64_t cache_eviction_dirty;
 	int64_t cache_eviction_app_dirty;
+	int64_t cache_read_overflow;
+	int64_t cache_overflow_value;
 	int64_t cache_eviction_deepen;
 	int64_t cache_write_lookaside;
 	int64_t cache_pages_inuse;
@@ -316,6 +321,7 @@ struct __wt_connection_stats {
 	int64_t cache_eviction_force_delete;
 	int64_t cache_eviction_app;
 	int64_t cache_eviction_pages_queued;
+	int64_t cache_eviction_pages_queued_urgent;
 	int64_t cache_eviction_pages_queued_oldest;
 	int64_t cache_read;
 	int64_t cache_read_lookaside;
@@ -328,7 +334,6 @@ struct __wt_connection_stats {
 	int64_t cache_overhead;
 	int64_t cache_bytes_internal;
 	int64_t cache_bytes_leaf;
-	int64_t cache_bytes_overflow;
 	int64_t cache_bytes_dirty;
 	int64_t cache_pages_dirty;
 	int64_t cache_eviction_clean;
@@ -408,9 +413,25 @@ struct __wt_connection_stats {
 	int64_t rec_split_stashed_objects;
 	int64_t session_cursor_open;
 	int64_t session_open;
-	int64_t fsync_active;
-	int64_t read_active;
-	int64_t write_active;
+	int64_t session_table_compact_fail;
+	int64_t session_table_compact_success;
+	int64_t session_table_create_fail;
+	int64_t session_table_create_success;
+	int64_t session_table_drop_fail;
+	int64_t session_table_drop_success;
+	int64_t session_table_rebalance_fail;
+	int64_t session_table_rebalance_success;
+	int64_t session_table_rename_fail;
+	int64_t session_table_rename_success;
+	int64_t session_table_salvage_fail;
+	int64_t session_table_salvage_success;
+	int64_t session_table_truncate_fail;
+	int64_t session_table_truncate_success;
+	int64_t session_table_verify_fail;
+	int64_t session_table_verify_success;
+	int64_t thread_fsync_active;
+	int64_t thread_read_active;
+	int64_t thread_write_active;
 	int64_t page_busy_blocked;
 	int64_t page_forcible_evict_blocked;
 	int64_t page_locked_blocked;
@@ -424,13 +445,13 @@ struct __wt_connection_stats {
 	int64_t txn_checkpoint_time_max;
 	int64_t txn_checkpoint_time_min;
 	int64_t txn_checkpoint_time_recent;
+	int64_t txn_checkpoint_scrub_target;
+	int64_t txn_checkpoint_scrub_time;
 	int64_t txn_checkpoint_time_total;
 	int64_t txn_checkpoint;
 	int64_t txn_fail_cache;
 	int64_t txn_checkpoint_fsync_post;
-	int64_t txn_checkpoint_fsync_pre;
 	int64_t txn_checkpoint_fsync_post_duration;
-	int64_t txn_checkpoint_fsync_pre_duration;
 	int64_t txn_pinned_range;
 	int64_t txn_pinned_checkpoint_range;
 	int64_t txn_pinned_snapshot_range;
@@ -484,6 +505,7 @@ struct __wt_dsrc_stats {
 	int64_t btree_compact_rewrite;
 	int64_t btree_row_internal;
 	int64_t btree_row_leaf;
+	int64_t cache_bytes_inuse;
 	int64_t cache_bytes_read;
 	int64_t cache_bytes_write;
 	int64_t cache_eviction_checkpoint;
diff --git a/src/include/txn.h b/src/include/txn.h
index d10738cc670..2e41ae8620d 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -98,6 +98,7 @@ struct __wt_txn_global {
 	volatile uint32_t checkpoint_id;	/* Checkpoint's session ID */
 	volatile uint64_t checkpoint_gen;
 	volatile uint64_t checkpoint_pinned;
+	volatile uint64_t checkpoint_txnid;	/* Checkpoint's txn ID */
 
 	/* Named snapshot state. */
 	WT_RWLOCK *nsnap_rwlock;
diff --git a/src/include/txn.i b/src/include/txn.i
index 96f7426e421..8f0f49d9676 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -105,7 +105,8 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
 {
 	WT_BTREE *btree;
 	WT_TXN_GLOBAL *txn_global;
-	uint64_t checkpoint_gen, checkpoint_pinned, oldest_id;
+	uint64_t checkpoint_pinned, oldest_id;
+	bool include_checkpoint_txn;
 
 	txn_global = &S2C(session)->txn_global;
 	btree = S2BT_SAFE(session);
@@ -117,7 +118,11 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
 	 * we take the minimum of the other two IDs, which is what we want.
 	 */
 	oldest_id = txn_global->oldest_id;
-	WT_ORDERED_READ(checkpoint_gen, txn_global->checkpoint_gen);
+	if (btree == NULL)
+		include_checkpoint_txn = false;
+	else
+		WT_ORDERED_READ(
+		    include_checkpoint_txn, btree->include_checkpoint_txn);
 	checkpoint_pinned = txn_global->checkpoint_pinned;
 
 	/*
@@ -130,10 +135,9 @@ __wt_txn_oldest_id(WT_SESSION_IMPL *session)
 	 * checkpoint, or this handle is up to date with the active checkpoint
 	 * then it's safe to ignore the checkpoint ID in the visibility check.
 	 */
-	if (checkpoint_pinned == WT_TXN_NONE ||
+	if (!include_checkpoint_txn || checkpoint_pinned == WT_TXN_NONE ||
 	    WT_TXNID_LT(oldest_id, checkpoint_pinned) ||
-	    WT_SESSION_IS_CHECKPOINT(session) ||
-	    (btree != NULL && btree->checkpoint_gen == checkpoint_gen))
+	    WT_SESSION_IS_CHECKPOINT(session))
 		return (oldest_id);
 
 	return (checkpoint_pinned);
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index f578f4e6c08..0e022048835 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -131,13 +131,13 @@ struct __wt_item {
  * The maximum packed size of a 64-bit integer.  The ::wiredtiger_struct_pack
  * function will pack single long integers into at most this many bytes.
  */
-#define	WT_INTPACK64_MAXSIZE	((int)sizeof (int64_t) + 1)
+#define	WT_INTPACK64_MAXSIZE	((int)sizeof(int64_t) + 1)
 
 /*!
  * The maximum packed size of a 32-bit integer.  The ::wiredtiger_struct_pack
  * function will pack single integers into at most this many bytes.
  */
-#define	WT_INTPACK32_MAXSIZE	((int)sizeof (int32_t) + 1)
+#define	WT_INTPACK32_MAXSIZE	((int)sizeof(int32_t) + 1)
 
 /*!
  * A WT_CURSOR handle is the interface to a cursor.
@@ -405,6 +405,12 @@ struct __wt_cursor {
 	 * WT_CURSOR::next (WT_CURSOR::prev) method will iterate from the
 	 * beginning (end) of the table.
 	 *
+	 * If the cursor does not have record number keys or was not configured
+	 * with "append=true", the cursor ends with no key set and a subsequent
+	 * call to the WT_CURSOR::get_key method will fail. The cursor ends with
+	 * no value set and a subsequent call to the WT_CURSOR::get_value method
+	 * will fail.
+	 *
 	 * Inserting a new record after the current maximum record in a
 	 * fixed-length bit field column-store (that is, a store with an
 	 * 'r' type key and 't' type value) may implicitly create the missing
@@ -1159,8 +1165,8 @@ struct __wt_session {
 	 * @config{ ),,}
 	 * @config{memory_page_max, the maximum size a page can grow to in
 	 * memory before being reconciled to disk.  The specified size will be
-	 * adjusted to a lower bound of <code>50 * leaf_page_max</code>\, and an
-	 * upper bound of <code>cache_size / 2</code>. This limit is soft - it
+	 * adjusted to a lower bound of <code>leaf_page_max</code>\, and an
+	 * upper bound of <code>cache_size / 10</code>. This limit is soft - it
 	 * is possible for pages to be temporarily larger than this value.  This
 	 * setting is ignored for LSM trees\, see \c chunk_size., an integer
 	 * between 512B and 10TB; default \c 5MB.}
@@ -1783,8 +1789,6 @@ struct __wt_connection {
 	 * database can configure both log_size and wait to set an upper bound
 	 * for checkpoints; setting this value above 0 configures periodic
 	 * checkpoints., an integer between 0 and 2GB; default \c 0.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the checkpoint name., a string;
-	 * default \c "WiredTigerCheckpoint".}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
 	 * checkpoint; setting this value above 0 configures periodic
 	 * checkpoints., an integer between 0 and 100000; default \c 0.}
@@ -1806,11 +1810,11 @@ struct __wt_connection {
 	 * @config{eviction_dirty_target, continue evicting until the cache has
 	 * less dirty memory than the value\, as a percentage of the total cache
 	 * size.  Dirty pages will only be evicted if the cache is full enough
-	 * to trigger eviction., an integer between 5 and 99; default \c 80.}
+	 * to trigger eviction., an integer between 1 and 99; default \c 5.}
 	 * @config{eviction_dirty_trigger, trigger eviction when the cache is
 	 * using this much memory for dirty content\, as a percentage of the
 	 * total cache size.  This setting only alters behavior if it is lower
-	 * than eviction_trigger., an integer between 5 and 99; default \c 95.}
+	 * than eviction_trigger., an integer between 1 and 99; default \c 20.}
 	 * @config{eviction_target, continue evicting until the cache has less
 	 * total memory than the value\, as a percentage of the total cache
 	 * size.  Must be less than \c eviction_trigger., an integer between 10
@@ -1836,25 +1840,8 @@ struct __wt_connection {
 	 * configuration options defined below.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;archive, automatically archive
 	 * unneeded log files., a boolean flag; default \c true.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;compressor, configure a compressor
-	 * for log records.  Permitted values are \c "none" or custom
-	 * compression engine name created with WT_CONNECTION::add_compressor.
-	 * If WiredTiger has builtin support for \c "snappy"\, \c "lz4" or \c
-	 * "zlib" compression\, these names are also available.  See @ref
-	 * compression for more information., a string; default \c none.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;enabled, enable logging subsystem., a
-	 * boolean flag; default \c false.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log
-	 * files., an integer between 100KB and 2GB; default \c 100MB.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into
-	 * which the log files are written.  If the value is not an absolute
-	 * path name\, the files are created relative to the database home., a
-	 * string; default \c ".".}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc,
-	 * pre-allocate log files., a boolean flag; default \c true.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery or error if
-	 * recovery needs to run after an unclean shutdown., a string\, chosen
-	 * from the following options: \c "error"\, \c "on"; default \c on.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a
+	 * boolean flag; default \c true.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;zero_fill, manually write zeroes into
 	 * log files., a boolean flag; default \c false.}
 	 * @config{ ),,}
@@ -1914,11 +1901,6 @@ struct __wt_connection {
 	 * statistics in JSON format., a boolean flag; default \c false.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close, log statistics on database
 	 * close., a boolean flag; default \c false.}
-	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the pathname to a file into
-	 * which the log records are written\, may contain ISO C standard
-	 * strftime conversion specifications.  If the value is not an absolute
-	 * path name\, the file is created relative to the database home., a
-	 * string; default \c "WiredTigerStat.%d.%H".}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include
 	 * statistics for the list of data source URIs\, if they are open at the
 	 * time of the statistics logging.  The list may include URIs matching a
@@ -2223,11 +2205,10 @@ struct __wt_connection {
  * configure both log_size and wait to set an upper bound for checkpoints;
  * setting this value above 0 configures periodic checkpoints., an integer
  * between 0 and 2GB; default \c 0.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;name, the
- * checkpoint name., a string; default \c "WiredTigerCheckpoint".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait, seconds to wait between each
- * checkpoint; setting this value above 0 configures periodic checkpoints., an
- * integer between 0 and 100000; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;wait,
+ * seconds to wait between each checkpoint; setting this value above 0
+ * configures periodic checkpoints., an integer between 0 and 100000; default \c
+ * 0.}
  * @config{ ),,}
  * @config{checkpoint_sync, flush files to stable storage when closing or
  * writing checkpoints., a boolean flag; default \c true.}
@@ -2288,11 +2269,11 @@ struct __wt_connection {
  * @config{eviction_dirty_target, continue evicting until the cache has less
  * dirty memory than the value\, as a percentage of the total cache size.  Dirty
  * pages will only be evicted if the cache is full enough to trigger eviction.,
- * an integer between 5 and 99; default \c 80.}
+ * an integer between 1 and 99; default \c 5.}
  * @config{eviction_dirty_trigger, trigger eviction when the cache is using this
  * much memory for dirty content\, as a percentage of the total cache size.
  * This setting only alters behavior if it is lower than eviction_trigger., an
- * integer between 5 and 99; default \c 95.}
+ * integer between 1 and 99; default \c 20.}
  * @config{eviction_target, continue evicting until the cache has less total
  * memory than the value\, as a percentage of the total cache size.  Must be
  * less than \c eviction_trigger., an integer between 10 and 99; default \c 80.}
@@ -2343,9 +2324,10 @@ struct __wt_connection {
  * subsystem., a boolean flag; default \c false.}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, the maximum size of log files., an
  * integer between 100KB and 2GB; default \c 100MB.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the path to a directory into which the
- * log files are written.  If the value is not an absolute path name\, the files
- * are created relative to the database home., a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which log
+ * files are written.  The directory must already exist.  If the value is not an
+ * absolute path\, the path is relative to the database home (see @ref
+ * absolute_path for more information)., a string; default \c ".".}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;prealloc, pre-allocate log files., a boolean
  * flag; default \c true.}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;recover, run recovery
@@ -2415,16 +2397,15 @@ struct __wt_connection {
  * boolean flag; default \c false.}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;on_close,
  * log statistics on database close., a boolean flag; default \c false.}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the pathname to a file into which the
- * log records are written\, may contain ISO C standard strftime conversion
- * specifications.  If the value is not an absolute path name\, the file is
- * created relative to the database home., a string; default \c
- * "WiredTigerStat.%d.%H".}
- * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if
- * non-empty\, include statistics for the list of data source URIs\, if they are
- * open at the time of the statistics logging.  The list may include URIs
- * matching a single data source ("table:mytable")\, or a URI matching all data
- * sources of a particular type ("table:")., a list of strings; default empty.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;path, the name of a directory into which
+ * statistics files are written.  The directory must already exist.  If the
+ * value is not an absolute path\, the path is relative to the database home
+ * (see @ref absolute_path for more information)., a string; default \c ".".}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;sources, if non-empty\, include statistics
+ * for the list of data source URIs\, if they are open at the time of the
+ * statistics logging.  The list may include URIs matching a single data source
+ * ("table:mytable")\, or a URI matching all data sources of a particular type
+ * ("table:")., a list of strings; default empty.}
  * @config{&nbsp;&nbsp;&nbsp;&nbsp;timestamp, a timestamp prepended to each log
  * record\, may contain strftime conversion specifications\, when \c json is
  * configured\, defaults to \c "%FT%Y.000Z"., a string; default \c "%b %d
@@ -3701,24 +3682,34 @@ struct __wt_extractor {
 #if !defined(SWIG)
 /*! WT_FILE_SYSTEM::open_file file types */
 typedef enum {
-	WT_OPEN_FILE_TYPE_CHECKPOINT,	/*!< open a data file checkpoint */
-	WT_OPEN_FILE_TYPE_DATA,		/*!< open a data file */
-	WT_OPEN_FILE_TYPE_DIRECTORY,	/*!< open a directory */
-	WT_OPEN_FILE_TYPE_LOG,		/*!< open a log file */
-	WT_OPEN_FILE_TYPE_REGULAR	/*!< open a regular file */
-} WT_OPEN_FILE_TYPE;
+	WT_FS_OPEN_FILE_TYPE_CHECKPOINT,/*!< open a data file checkpoint */
+	WT_FS_OPEN_FILE_TYPE_DATA,	/*!< open a data file */
+	WT_FS_OPEN_FILE_TYPE_DIRECTORY,	/*!< open a directory */
+	WT_FS_OPEN_FILE_TYPE_LOG,	/*!< open a log file */
+	WT_FS_OPEN_FILE_TYPE_REGULAR	/*!< open a regular file */
+} WT_FS_OPEN_FILE_TYPE;
 
 /*! WT_FILE_SYSTEM::open_file flags: create if does not exist */
-#define	WT_OPEN_CREATE		0x001
+#define	WT_FS_OPEN_CREATE	0x001
 /*! WT_FILE_SYSTEM::open_file flags: direct I/O requested */
-#define	WT_OPEN_DIRECTIO	0x002
-/*! WT_FILE_SYSTEM::open_file flags: error if exclusive use not available */
-#define	WT_OPEN_EXCLUSIVE	0x004
+#define	WT_FS_OPEN_DIRECTIO	0x002
+/*! WT_FILE_SYSTEM::open_file flags: file creation must be durable */
+#define	WT_FS_OPEN_DURABLE	0x004
+/*!
+ * WT_FILE_SYSTEM::open_file flags: return EBUSY if exclusive use not available
+ */
+#define	WT_FS_OPEN_EXCLUSIVE	0x008
 #ifndef DOXYGEN
-#define	WT_OPEN_FIXED		0x008	/* Path not home relative (internal) */
+#define	WT_FS_OPEN_FIXED	0x010	/* Path not home relative (internal) */
 #endif
 /*! WT_FILE_SYSTEM::open_file flags: open is read-only */
-#define	WT_OPEN_READONLY	0x010
+#define	WT_FS_OPEN_READONLY	0x020
+
+/*!
+ * WT_FILE_SYSTEM::remove or WT_FILE_SYSTEM::rename flags: the remove or rename
+ * operation must be durable
+ */
+#define	WT_FS_DURABLE		0x001
 
 /*!
  * The interface implemented by applications to provide a custom file system
@@ -3748,7 +3739,7 @@ struct __wt_file_system {
 	 * @param[out] dirlist the method returns an allocated array of
 	 *    individually allocated strings, one for each entry in the
 	 *    directory.
-	 * @param[out] countp the method the number of entries returned
+	 * @param[out] countp the number of entries returned
 	 */
 	int (*fs_directory_list)(WT_FILE_SYSTEM *file_system,
 	    WT_SESSION *session, const char *directory, const char *prefix,
@@ -3768,23 +3759,6 @@ struct __wt_file_system {
 	    WT_SESSION *session, char **dirlist, uint32_t count);
 
 	/*!
-	 * Flush the named directory.
-	 *
-	 * This method is not required for readonly file systems or file systems
-	 * where it is not necessary to flush a file's directory to ensure the
-	 * durability of file system operations, and should be set to NULL when
-	 * not required by the file system.
-	 *
-	 * @errors
-	 *
-	 * @param file_system the WT_FILE_SYSTEM
-	 * @param session the current WiredTiger session
-	 * @param directory the name of the directory
-	 */
-	int (*fs_directory_sync)(WT_FILE_SYSTEM *file_system,
-	    WT_SESSION *session, const char *directory);
-
-	/*!
 	 * Return if the named file system object exists.
 	 *
 	 * @errors
@@ -3800,6 +3774,16 @@ struct __wt_file_system {
 	/*!
 	 * Open a handle for a named file system object
 	 *
+	 * The method should return ENOENT if the file is not being created and
+	 * does not exist.
+	 *
+	 * The method should return EACCES if the file cannot be opened in the
+	 * requested mode (for example, a file opened for writing in a readonly
+	 * file system).
+	 *
+	 * The method should return EBUSY if ::WT_FS_OPEN_EXCLUSIVE is set and
+	 * the file is in use.
+	 *
 	 * @errors
 	 *
 	 * @param file_system the WT_FILE_SYSTEM
@@ -3809,8 +3793,8 @@ struct __wt_file_system {
 	 *    The file type is provided to allow optimization for different file
 	 *    access patterns.
 	 * @param flags flags indicating how to open the file, one or more of
-	 *    ::WT_OPEN_CREATE, ::WT_OPEN_DIRECTIO, ::WT_OPEN_EXCLUSIVE or
-	 *    ::WT_OPEN_READONLY.
+	 *    ::WT_FS_OPEN_CREATE, ::WT_FS_OPEN_DIRECTIO, ::WT_FS_OPEN_DURABLE,
+	 *    ::WT_FS_OPEN_EXCLUSIVE or ::WT_FS_OPEN_READONLY.
 	 * @param[out] file_handlep the handle to the newly opened file. File
 	 *    system implementations must allocate memory for the handle and
 	 *    the WT_FILE_HANDLE::name field, and fill in the WT_FILE_HANDLE::
@@ -3819,7 +3803,7 @@ struct __wt_file_system {
 	 *    their own structure as a superset of a WT_FILE_HANDLE:: structure.
 	 */
 	int (*fs_open_file)(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
-	    const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+	    const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
 	    WT_FILE_HANDLE **file_handlep);
 
 	/*!
@@ -3833,9 +3817,11 @@ struct __wt_file_system {
 	 * @param file_system the WT_FILE_SYSTEM
 	 * @param session the current WiredTiger session
 	 * @param name the name of the file system object
+	 * @param durable if the operation requires durability
+	 * @param flags 0 or ::WT_FS_DURABLE
 	 */
-	int (*fs_remove)(
-	    WT_FILE_SYSTEM *file_system, WT_SESSION *session, const char *name);
+	int (*fs_remove)(WT_FILE_SYSTEM *file_system,
+	    WT_SESSION *session, const char *name, uint32_t flags);
 
 	/*!
 	 * Rename a named file system object
@@ -3849,9 +3835,10 @@ struct __wt_file_system {
 	 * @param session the current WiredTiger session
 	 * @param from the original name of the object
 	 * @param to the new name for the object
+	 * @param flags 0 or ::WT_FS_DURABLE
 	 */
-	int (*fs_rename)(WT_FILE_SYSTEM *file_system,
-	    WT_SESSION *session, const char *from, const char *to);
+	int (*fs_rename)(WT_FILE_SYSTEM *file_system, WT_SESSION *session,
+	    const char *from, const char *to, uint32_t flags);
 
 	/*!
 	 * Return the size of a named file system object
@@ -3981,7 +3968,7 @@ struct __wt_file_handle {
 
 	/*!
 	 * Lock/unlock a file from the perspective of other processes running
-	 * in the system.
+	 * in the system, where necessary.
 	 *
 	 * @errors
 	 *
@@ -4256,340 +4243,380 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_CONN_BLOCK_BYTE_READ			1026
 /*! block-manager: bytes written */
 #define	WT_STAT_CONN_BLOCK_BYTE_WRITE			1027
+/*! block-manager: bytes written for checkpoint */
+#define	WT_STAT_CONN_BLOCK_BYTE_WRITE_CHECKPOINT	1028
 /*! block-manager: mapped blocks read */
-#define	WT_STAT_CONN_BLOCK_MAP_READ			1028
+#define	WT_STAT_CONN_BLOCK_MAP_READ			1029
 /*! block-manager: mapped bytes read */
-#define	WT_STAT_CONN_BLOCK_BYTE_MAP_READ		1029
+#define	WT_STAT_CONN_BLOCK_BYTE_MAP_READ		1030
+/*! cache: bytes belonging to page images in the cache */
+#define	WT_STAT_CONN_CACHE_BYTES_IMAGE			1031
 /*! cache: bytes currently in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_INUSE			1030
+#define	WT_STAT_CONN_CACHE_BYTES_INUSE			1032
+/*! cache: bytes not belonging to page images in the cache */
+#define	WT_STAT_CONN_CACHE_BYTES_OTHER			1033
 /*! cache: bytes read into cache */
-#define	WT_STAT_CONN_CACHE_BYTES_READ			1031
+#define	WT_STAT_CONN_CACHE_BYTES_READ			1034
 /*! cache: bytes written from cache */
-#define	WT_STAT_CONN_CACHE_BYTES_WRITE			1032
+#define	WT_STAT_CONN_CACHE_BYTES_WRITE			1035
 /*! cache: checkpoint blocked page eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT		1033
+#define	WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT		1036
 /*! cache: eviction calls to get a page */
-#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF		1034
+#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF		1037
 /*! cache: eviction calls to get a page found queue empty */
-#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY	1035
+#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY	1038
 /*! cache: eviction calls to get a page found queue empty after locking */
-#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2	1036
+#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2	1039
 /*! cache: eviction currently operating in aggressive mode */
-#define	WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET	1037
+#define	WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET	1040
 /*! cache: eviction server candidate queue empty when topping up */
-#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY		1038
+#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY		1041
 /*! cache: eviction server candidate queue not empty when topping up */
-#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY	1039
+#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY	1042
 /*! cache: eviction server evicting pages */
-#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING	1040
+#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING	1043
 /*! cache: eviction server populating queue, but not evicting pages */
-#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING	1041
+#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_NOT_EVICTING	1044
 /*! cache: eviction server skipped very large page */
-#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG	1042
+#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_TOOBIG	1045
 /*! cache: eviction server slept, because we did not make progress with
  * eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT	1043
+#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT	1046
 /*! cache: eviction server unable to reach eviction goal */
-#define	WT_STAT_CONN_CACHE_EVICTION_SLOW		1044
+#define	WT_STAT_CONN_CACHE_EVICTION_SLOW		1047
 /*! cache: eviction worker thread evicting pages */
-#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING	1045
+#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING	1048
 /*! cache: failed eviction of pages that exceeded the in-memory maximum */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL		1046
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL		1049
 /*! cache: files with active eviction walks */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE	1047
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE	1050
 /*! cache: files with new eviction walks started */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED	1048
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED	1051
 /*! cache: hazard pointer blocked page eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_HAZARD		1049
+#define	WT_STAT_CONN_CACHE_EVICTION_HAZARD		1052
 /*! cache: hazard pointer check calls */
-#define	WT_STAT_CONN_CACHE_HAZARD_CHECKS		1050
+#define	WT_STAT_CONN_CACHE_HAZARD_CHECKS		1053
 /*! cache: hazard pointer check entries walked */
-#define	WT_STAT_CONN_CACHE_HAZARD_WALKS			1051
+#define	WT_STAT_CONN_CACHE_HAZARD_WALKS			1054
 /*! cache: hazard pointer maximum array length */
-#define	WT_STAT_CONN_CACHE_HAZARD_MAX			1052
+#define	WT_STAT_CONN_CACHE_HAZARD_MAX			1055
 /*! cache: in-memory page passed criteria to be split */
-#define	WT_STAT_CONN_CACHE_INMEM_SPLITTABLE		1053
+#define	WT_STAT_CONN_CACHE_INMEM_SPLITTABLE		1056
 /*! cache: in-memory page splits */
-#define	WT_STAT_CONN_CACHE_INMEM_SPLIT			1054
+#define	WT_STAT_CONN_CACHE_INMEM_SPLIT			1057
 /*! cache: internal pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_INTERNAL		1055
+#define	WT_STAT_CONN_CACHE_EVICTION_INTERNAL		1058
 /*! cache: internal pages split during eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL	1056
+#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL	1059
 /*! cache: leaf pages split during eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF		1057
+#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF		1060
 /*! cache: lookaside table insert calls */
-#define	WT_STAT_CONN_CACHE_LOOKASIDE_INSERT		1058
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_INSERT		1061
 /*! cache: lookaside table remove calls */
-#define	WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE		1059
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE		1062
 /*! cache: maximum bytes configured */
-#define	WT_STAT_CONN_CACHE_BYTES_MAX			1060
+#define	WT_STAT_CONN_CACHE_BYTES_MAX			1063
 /*! cache: maximum page size at eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE	1061
+#define	WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE	1064
 /*! cache: modified pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_DIRTY		1062
+#define	WT_STAT_CONN_CACHE_EVICTION_DIRTY		1065
 /*! cache: modified pages evicted by application threads */
-#define	WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY		1063
+#define	WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY		1066
+/*! cache: overflow pages read into cache */
+#define	WT_STAT_CONN_CACHE_READ_OVERFLOW		1067
+/*! cache: overflow values cached in memory */
+#define	WT_STAT_CONN_CACHE_OVERFLOW_VALUE		1068
 /*! cache: page split during eviction deepened the tree */
-#define	WT_STAT_CONN_CACHE_EVICTION_DEEPEN		1064
+#define	WT_STAT_CONN_CACHE_EVICTION_DEEPEN		1069
 /*! cache: page written requiring lookaside records */
-#define	WT_STAT_CONN_CACHE_WRITE_LOOKASIDE		1065
+#define	WT_STAT_CONN_CACHE_WRITE_LOOKASIDE		1070
 /*! cache: pages currently held in the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_INUSE			1066
+#define	WT_STAT_CONN_CACHE_PAGES_INUSE			1071
 /*! cache: pages evicted because they exceeded the in-memory maximum */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE		1067
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE		1072
 /*! cache: pages evicted because they had chains of deleted items */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE	1068
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE	1073
 /*! cache: pages evicted by application threads */
-#define	WT_STAT_CONN_CACHE_EVICTION_APP			1069
+#define	WT_STAT_CONN_CACHE_EVICTION_APP			1074
 /*! cache: pages queued for eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED	1070
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED	1075
 /*! cache: pages queued for urgent eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST	1071
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT	1076
+/*! cache: pages queued for urgent eviction during walk */
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST	1077
 /*! cache: pages read into cache */
-#define	WT_STAT_CONN_CACHE_READ				1072
+#define	WT_STAT_CONN_CACHE_READ				1078
 /*! cache: pages read into cache requiring lookaside entries */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE		1073
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE		1079
 /*! cache: pages requested from the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_REQUESTED		1074
+#define	WT_STAT_CONN_CACHE_PAGES_REQUESTED		1080
 /*! cache: pages seen by eviction walk */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN		1075
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN		1081
 /*! cache: pages selected for eviction unable to be evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_FAIL		1076
+#define	WT_STAT_CONN_CACHE_EVICTION_FAIL		1082
 /*! cache: pages walked for eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALK		1077
+#define	WT_STAT_CONN_CACHE_EVICTION_WALK		1083
 /*! cache: pages written from cache */
-#define	WT_STAT_CONN_CACHE_WRITE			1078
+#define	WT_STAT_CONN_CACHE_WRITE			1084
 /*! cache: pages written requiring in-memory restoration */
-#define	WT_STAT_CONN_CACHE_WRITE_RESTORE		1079
+#define	WT_STAT_CONN_CACHE_WRITE_RESTORE		1085
 /*! cache: percentage overhead */
-#define	WT_STAT_CONN_CACHE_OVERHEAD			1080
+#define	WT_STAT_CONN_CACHE_OVERHEAD			1086
 /*! cache: tracked bytes belonging to internal pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_INTERNAL		1081
+#define	WT_STAT_CONN_CACHE_BYTES_INTERNAL		1087
 /*! cache: tracked bytes belonging to leaf pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_LEAF			1082
-/*! cache: tracked bytes belonging to overflow pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_OVERFLOW		1083
+#define	WT_STAT_CONN_CACHE_BYTES_LEAF			1088
 /*! cache: tracked dirty bytes in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_DIRTY			1084
+#define	WT_STAT_CONN_CACHE_BYTES_DIRTY			1089
 /*! cache: tracked dirty pages in the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_DIRTY			1085
+#define	WT_STAT_CONN_CACHE_PAGES_DIRTY			1090
 /*! cache: unmodified pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_CLEAN		1086
+#define	WT_STAT_CONN_CACHE_EVICTION_CLEAN		1091
 /*! connection: auto adjusting condition resets */
-#define	WT_STAT_CONN_COND_AUTO_WAIT_RESET		1087
+#define	WT_STAT_CONN_COND_AUTO_WAIT_RESET		1092
 /*! connection: auto adjusting condition wait calls */
-#define	WT_STAT_CONN_COND_AUTO_WAIT			1088
+#define	WT_STAT_CONN_COND_AUTO_WAIT			1093
 /*! connection: files currently open */
-#define	WT_STAT_CONN_FILE_OPEN				1089
+#define	WT_STAT_CONN_FILE_OPEN				1094
 /*! connection: memory allocations */
-#define	WT_STAT_CONN_MEMORY_ALLOCATION			1090
+#define	WT_STAT_CONN_MEMORY_ALLOCATION			1095
 /*! connection: memory frees */
-#define	WT_STAT_CONN_MEMORY_FREE			1091
+#define	WT_STAT_CONN_MEMORY_FREE			1096
 /*! connection: memory re-allocations */
-#define	WT_STAT_CONN_MEMORY_GROW			1092
+#define	WT_STAT_CONN_MEMORY_GROW			1097
 /*! connection: pthread mutex condition wait calls */
-#define	WT_STAT_CONN_COND_WAIT				1093
+#define	WT_STAT_CONN_COND_WAIT				1098
 /*! connection: pthread mutex shared lock read-lock calls */
-#define	WT_STAT_CONN_RWLOCK_READ			1094
+#define	WT_STAT_CONN_RWLOCK_READ			1099
 /*! connection: pthread mutex shared lock write-lock calls */
-#define	WT_STAT_CONN_RWLOCK_WRITE			1095
+#define	WT_STAT_CONN_RWLOCK_WRITE			1100
 /*! connection: total fsync I/Os */
-#define	WT_STAT_CONN_FSYNC_IO				1096
+#define	WT_STAT_CONN_FSYNC_IO				1101
 /*! connection: total read I/Os */
-#define	WT_STAT_CONN_READ_IO				1097
+#define	WT_STAT_CONN_READ_IO				1102
 /*! connection: total write I/Os */
-#define	WT_STAT_CONN_WRITE_IO				1098
+#define	WT_STAT_CONN_WRITE_IO				1103
 /*! cursor: cursor create calls */
-#define	WT_STAT_CONN_CURSOR_CREATE			1099
+#define	WT_STAT_CONN_CURSOR_CREATE			1104
 /*! cursor: cursor insert calls */
-#define	WT_STAT_CONN_CURSOR_INSERT			1100
+#define	WT_STAT_CONN_CURSOR_INSERT			1105
 /*! cursor: cursor next calls */
-#define	WT_STAT_CONN_CURSOR_NEXT			1101
+#define	WT_STAT_CONN_CURSOR_NEXT			1106
 /*! cursor: cursor prev calls */
-#define	WT_STAT_CONN_CURSOR_PREV			1102
+#define	WT_STAT_CONN_CURSOR_PREV			1107
 /*! cursor: cursor remove calls */
-#define	WT_STAT_CONN_CURSOR_REMOVE			1103
+#define	WT_STAT_CONN_CURSOR_REMOVE			1108
 /*! cursor: cursor reset calls */
-#define	WT_STAT_CONN_CURSOR_RESET			1104
+#define	WT_STAT_CONN_CURSOR_RESET			1109
 /*! cursor: cursor restarted searches */
-#define	WT_STAT_CONN_CURSOR_RESTART			1105
+#define	WT_STAT_CONN_CURSOR_RESTART			1110
 /*! cursor: cursor search calls */
-#define	WT_STAT_CONN_CURSOR_SEARCH			1106
+#define	WT_STAT_CONN_CURSOR_SEARCH			1111
 /*! cursor: cursor search near calls */
-#define	WT_STAT_CONN_CURSOR_SEARCH_NEAR			1107
+#define	WT_STAT_CONN_CURSOR_SEARCH_NEAR			1112
 /*! cursor: cursor update calls */
-#define	WT_STAT_CONN_CURSOR_UPDATE			1108
+#define	WT_STAT_CONN_CURSOR_UPDATE			1113
 /*! cursor: truncate calls */
-#define	WT_STAT_CONN_CURSOR_TRUNCATE			1109
+#define	WT_STAT_CONN_CURSOR_TRUNCATE			1114
 /*! data-handle: connection data handles currently active */
-#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1110
+#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1115
 /*! data-handle: connection sweep candidate became referenced */
-#define	WT_STAT_CONN_DH_SWEEP_REF			1111
+#define	WT_STAT_CONN_DH_SWEEP_REF			1116
 /*! data-handle: connection sweep dhandles closed */
-#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1112
+#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1117
 /*! data-handle: connection sweep dhandles removed from hash list */
-#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1113
+#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1118
 /*! data-handle: connection sweep time-of-death sets */
-#define	WT_STAT_CONN_DH_SWEEP_TOD			1114
+#define	WT_STAT_CONN_DH_SWEEP_TOD			1119
 /*! data-handle: connection sweeps */
-#define	WT_STAT_CONN_DH_SWEEPS				1115
+#define	WT_STAT_CONN_DH_SWEEPS				1120
 /*! data-handle: session dhandles swept */
-#define	WT_STAT_CONN_DH_SESSION_HANDLES			1116
+#define	WT_STAT_CONN_DH_SESSION_HANDLES			1121
 /*! data-handle: session sweep attempts */
-#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1117
+#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1122
 /*! log: busy returns attempting to switch slots */
-#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1118
+#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1123
 /*! log: consolidated slot closures */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1119
+#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1124
 /*! log: consolidated slot join races */
-#define	WT_STAT_CONN_LOG_SLOT_RACES			1120
+#define	WT_STAT_CONN_LOG_SLOT_RACES			1125
 /*! log: consolidated slot join transitions */
-#define	WT_STAT_CONN_LOG_SLOT_TRANSITIONS		1121
+#define	WT_STAT_CONN_LOG_SLOT_TRANSITIONS		1126
 /*! log: consolidated slot joins */
-#define	WT_STAT_CONN_LOG_SLOT_JOINS			1122
+#define	WT_STAT_CONN_LOG_SLOT_JOINS			1127
 /*! log: consolidated slot unbuffered writes */
-#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1123
+#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1128
 /*! log: log bytes of payload data */
-#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1124
+#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1129
 /*! log: log bytes written */
-#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1125
+#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1130
 /*! log: log files manually zero-filled */
-#define	WT_STAT_CONN_LOG_ZERO_FILLS			1126
+#define	WT_STAT_CONN_LOG_ZERO_FILLS			1131
 /*! log: log flush operations */
-#define	WT_STAT_CONN_LOG_FLUSH				1127
+#define	WT_STAT_CONN_LOG_FLUSH				1132
 /*! log: log force write operations */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE			1128
+#define	WT_STAT_CONN_LOG_FORCE_WRITE			1133
 /*! log: log force write operations skipped */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1129
+#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1134
 /*! log: log records compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1130
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1135
 /*! log: log records not compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1131
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1136
 /*! log: log records too small to compress */
-#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1132
+#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1137
 /*! log: log release advances write LSN */
-#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1133
+#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1138
 /*! log: log scan operations */
-#define	WT_STAT_CONN_LOG_SCANS				1134
+#define	WT_STAT_CONN_LOG_SCANS				1139
 /*! log: log scan records requiring two reads */
-#define	WT_STAT_CONN_LOG_SCAN_REREADS			1135
+#define	WT_STAT_CONN_LOG_SCAN_REREADS			1140
 /*! log: log server thread advances write LSN */
-#define	WT_STAT_CONN_LOG_WRITE_LSN			1136
+#define	WT_STAT_CONN_LOG_WRITE_LSN			1141
 /*! log: log server thread write LSN walk skipped */
-#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1137
+#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1142
 /*! log: log sync operations */
-#define	WT_STAT_CONN_LOG_SYNC				1138
+#define	WT_STAT_CONN_LOG_SYNC				1143
 /*! log: log sync time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DURATION			1139
+#define	WT_STAT_CONN_LOG_SYNC_DURATION			1144
 /*! log: log sync_dir operations */
-#define	WT_STAT_CONN_LOG_SYNC_DIR			1140
+#define	WT_STAT_CONN_LOG_SYNC_DIR			1145
 /*! log: log sync_dir time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1141
+#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1146
 /*! log: log write operations */
-#define	WT_STAT_CONN_LOG_WRITES				1142
+#define	WT_STAT_CONN_LOG_WRITES				1147
 /*! log: logging bytes consolidated */
-#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1143
+#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1148
 /*! log: maximum log file size */
-#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1144
+#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1149
 /*! log: number of pre-allocated log files to create */
-#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1145
+#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1150
 /*! log: pre-allocated log files not ready and missed */
-#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1146
+#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1151
 /*! log: pre-allocated log files prepared */
-#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1147
+#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1152
 /*! log: pre-allocated log files used */
-#define	WT_STAT_CONN_LOG_PREALLOC_USED			1148
+#define	WT_STAT_CONN_LOG_PREALLOC_USED			1153
 /*! log: records processed by log scan */
-#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1149
+#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1154
 /*! log: total in-memory size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1150
+#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1155
 /*! log: total log buffer size */
-#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1151
+#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1156
 /*! log: total size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1152
+#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1157
 /*! log: written slots coalesced */
-#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1153
+#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1158
 /*! log: yields waiting for previous log file close */
-#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1154
+#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1159
 /*! reconciliation: fast-path pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1155
+#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1160
 /*! reconciliation: page reconciliation calls */
-#define	WT_STAT_CONN_REC_PAGES				1156
+#define	WT_STAT_CONN_REC_PAGES				1161
 /*! reconciliation: page reconciliation calls for eviction */
-#define	WT_STAT_CONN_REC_PAGES_EVICTION			1157
+#define	WT_STAT_CONN_REC_PAGES_EVICTION			1162
 /*! reconciliation: pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE			1158
+#define	WT_STAT_CONN_REC_PAGE_DELETE			1163
 /*! reconciliation: split bytes currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1159
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1164
 /*! reconciliation: split objects currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1160
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1165
 /*! session: open cursor count */
-#define	WT_STAT_CONN_SESSION_CURSOR_OPEN		1161
+#define	WT_STAT_CONN_SESSION_CURSOR_OPEN		1166
 /*! session: open session count */
-#define	WT_STAT_CONN_SESSION_OPEN			1162
+#define	WT_STAT_CONN_SESSION_OPEN			1167
+/*! session: table compact failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1168
+/*! session: table compact successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1169
+/*! session: table create failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1170
+/*! session: table create successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1171
+/*! session: table drop failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1172
+/*! session: table drop successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1173
+/*! session: table rebalance failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL	1174
+/*! session: table rebalance successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS	1175
+/*! session: table rename failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1176
+/*! session: table rename successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1177
+/*! session: table salvage failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1178
+/*! session: table salvage successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1179
+/*! session: table truncate failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1180
+/*! session: table truncate successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1181
+/*! session: table verify failed calls */
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1182
+/*! session: table verify successful calls */
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1183
 /*! thread-state: active filesystem fsync calls */
-#define	WT_STAT_CONN_FSYNC_ACTIVE			1163
+#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1184
 /*! thread-state: active filesystem read calls */
-#define	WT_STAT_CONN_READ_ACTIVE			1164
+#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1185
 /*! thread-state: active filesystem write calls */
-#define	WT_STAT_CONN_WRITE_ACTIVE			1165
+#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1186
 /*! thread-yield: page acquire busy blocked */
-#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1166
+#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1187
 /*! thread-yield: page acquire eviction blocked */
-#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1167
+#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1188
 /*! thread-yield: page acquire locked blocked */
-#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1168
+#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1189
 /*! thread-yield: page acquire read blocked */
-#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1169
+#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1190
 /*! thread-yield: page acquire time sleeping (usecs) */
-#define	WT_STAT_CONN_PAGE_SLEEP				1170
+#define	WT_STAT_CONN_PAGE_SLEEP				1191
 /*! transaction: number of named snapshots created */
-#define	WT_STAT_CONN_TXN_SNAPSHOTS_CREATED		1171
+#define	WT_STAT_CONN_TXN_SNAPSHOTS_CREATED		1192
 /*! transaction: number of named snapshots dropped */
-#define	WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED		1172
+#define	WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED		1193
 /*! transaction: transaction begins */
-#define	WT_STAT_CONN_TXN_BEGIN				1173
+#define	WT_STAT_CONN_TXN_BEGIN				1194
 /*! transaction: transaction checkpoint currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1174
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1195
 /*! transaction: transaction checkpoint generation */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1175
+#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1196
 /*! transaction: transaction checkpoint max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1176
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1197
 /*! transaction: transaction checkpoint min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1177
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1198
 /*! transaction: transaction checkpoint most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1178
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1199
+/*! transaction: transaction checkpoint scrub dirty target */
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1200
+/*! transaction: transaction checkpoint scrub time (msecs) */
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1201
 /*! transaction: transaction checkpoint total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1179
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1202
 /*! transaction: transaction checkpoints */
-#define	WT_STAT_CONN_TXN_CHECKPOINT			1180
+#define	WT_STAT_CONN_TXN_CHECKPOINT			1203
 /*! transaction: transaction failures due to cache overflow */
-#define	WT_STAT_CONN_TXN_FAIL_CACHE			1181
+#define	WT_STAT_CONN_TXN_FAIL_CACHE			1204
 /*! transaction: transaction fsync calls for checkpoint after allocating
  * the transaction ID */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1182
-/*! transaction: transaction fsync calls for checkpoint before allocating
- * the transaction ID */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE		1183
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1205
 /*! transaction: transaction fsync duration for checkpoint after
  * allocating the transaction ID (usecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1184
-/*! transaction: transaction fsync duration for checkpoint before
- * allocating the transaction ID (usecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_PRE_DURATION	1185
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1206
 /*! transaction: transaction range of IDs currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_RANGE			1186
+#define	WT_STAT_CONN_TXN_PINNED_RANGE			1207
 /*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1187
+#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1208
 /*! transaction: transaction range of IDs currently pinned by named
  * snapshots */
-#define	WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE		1188
+#define	WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE		1209
 /*! transaction: transaction sync calls */
-#define	WT_STAT_CONN_TXN_SYNC				1189
+#define	WT_STAT_CONN_TXN_SYNC				1210
 /*! transaction: transactions committed */
-#define	WT_STAT_CONN_TXN_COMMIT				1190
+#define	WT_STAT_CONN_TXN_COMMIT				1211
 /*! transaction: transactions rolled back */
-#define	WT_STAT_CONN_TXN_ROLLBACK			1191
+#define	WT_STAT_CONN_TXN_ROLLBACK			1212
 
 /*!
  * @}
@@ -4678,127 +4705,129 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_DSRC_BTREE_ROW_INTERNAL			2038
 /*! btree: row-store leaf pages */
 #define	WT_STAT_DSRC_BTREE_ROW_LEAF			2039
+/*! cache: bytes currently in the cache */
+#define	WT_STAT_DSRC_CACHE_BYTES_INUSE			2040
 /*! cache: bytes read into cache */
-#define	WT_STAT_DSRC_CACHE_BYTES_READ			2040
+#define	WT_STAT_DSRC_CACHE_BYTES_READ			2041
 /*! cache: bytes written from cache */
-#define	WT_STAT_DSRC_CACHE_BYTES_WRITE			2041
+#define	WT_STAT_DSRC_CACHE_BYTES_WRITE			2042
 /*! cache: checkpoint blocked page eviction */
-#define	WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT		2042
+#define	WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT		2043
 /*! cache: data source pages selected for eviction unable to be evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_FAIL		2043
+#define	WT_STAT_DSRC_CACHE_EVICTION_FAIL		2044
 /*! cache: hazard pointer blocked page eviction */
-#define	WT_STAT_DSRC_CACHE_EVICTION_HAZARD		2044
+#define	WT_STAT_DSRC_CACHE_EVICTION_HAZARD		2045
 /*! cache: in-memory page passed criteria to be split */
-#define	WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE		2045
+#define	WT_STAT_DSRC_CACHE_INMEM_SPLITTABLE		2046
 /*! cache: in-memory page splits */
-#define	WT_STAT_DSRC_CACHE_INMEM_SPLIT			2046
+#define	WT_STAT_DSRC_CACHE_INMEM_SPLIT			2047
 /*! cache: internal pages evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_INTERNAL		2047
+#define	WT_STAT_DSRC_CACHE_EVICTION_INTERNAL		2048
 /*! cache: internal pages split during eviction */
-#define	WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL	2048
+#define	WT_STAT_DSRC_CACHE_EVICTION_SPLIT_INTERNAL	2049
 /*! cache: leaf pages split during eviction */
-#define	WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF		2049
+#define	WT_STAT_DSRC_CACHE_EVICTION_SPLIT_LEAF		2050
 /*! cache: modified pages evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_DIRTY		2050
+#define	WT_STAT_DSRC_CACHE_EVICTION_DIRTY		2051
 /*! cache: overflow pages read into cache */
-#define	WT_STAT_DSRC_CACHE_READ_OVERFLOW		2051
+#define	WT_STAT_DSRC_CACHE_READ_OVERFLOW		2052
 /*! cache: overflow values cached in memory */
-#define	WT_STAT_DSRC_CACHE_OVERFLOW_VALUE		2052
+#define	WT_STAT_DSRC_CACHE_OVERFLOW_VALUE		2053
 /*! cache: page split during eviction deepened the tree */
-#define	WT_STAT_DSRC_CACHE_EVICTION_DEEPEN		2053
+#define	WT_STAT_DSRC_CACHE_EVICTION_DEEPEN		2054
 /*! cache: page written requiring lookaside records */
-#define	WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE		2054
+#define	WT_STAT_DSRC_CACHE_WRITE_LOOKASIDE		2055
 /*! cache: pages read into cache */
-#define	WT_STAT_DSRC_CACHE_READ				2055
+#define	WT_STAT_DSRC_CACHE_READ				2056
 /*! cache: pages read into cache requiring lookaside entries */
-#define	WT_STAT_DSRC_CACHE_READ_LOOKASIDE		2056
+#define	WT_STAT_DSRC_CACHE_READ_LOOKASIDE		2057
 /*! cache: pages requested from the cache */
-#define	WT_STAT_DSRC_CACHE_PAGES_REQUESTED		2057
+#define	WT_STAT_DSRC_CACHE_PAGES_REQUESTED		2058
 /*! cache: pages written from cache */
-#define	WT_STAT_DSRC_CACHE_WRITE			2058
+#define	WT_STAT_DSRC_CACHE_WRITE			2059
 /*! cache: pages written requiring in-memory restoration */
-#define	WT_STAT_DSRC_CACHE_WRITE_RESTORE		2059
+#define	WT_STAT_DSRC_CACHE_WRITE_RESTORE		2060
 /*! cache: unmodified pages evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_CLEAN		2060
+#define	WT_STAT_DSRC_CACHE_EVICTION_CLEAN		2061
 /*! compression: compressed pages read */
-#define	WT_STAT_DSRC_COMPRESS_READ			2061
+#define	WT_STAT_DSRC_COMPRESS_READ			2062
 /*! compression: compressed pages written */
-#define	WT_STAT_DSRC_COMPRESS_WRITE			2062
+#define	WT_STAT_DSRC_COMPRESS_WRITE			2063
 /*! compression: page written failed to compress */
-#define	WT_STAT_DSRC_COMPRESS_WRITE_FAIL		2063
+#define	WT_STAT_DSRC_COMPRESS_WRITE_FAIL		2064
 /*! compression: page written was too small to compress */
-#define	WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL		2064
+#define	WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL		2065
 /*! compression: raw compression call failed, additional data available */
-#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY	2065
+#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY	2066
 /*! compression: raw compression call failed, no additional data available */
-#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL			2066
+#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL			2067
 /*! compression: raw compression call succeeded */
-#define	WT_STAT_DSRC_COMPRESS_RAW_OK			2067
+#define	WT_STAT_DSRC_COMPRESS_RAW_OK			2068
 /*! cursor: bulk-loaded cursor-insert calls */
-#define	WT_STAT_DSRC_CURSOR_INSERT_BULK			2068
+#define	WT_STAT_DSRC_CURSOR_INSERT_BULK			2069
 /*! cursor: create calls */
-#define	WT_STAT_DSRC_CURSOR_CREATE			2069
+#define	WT_STAT_DSRC_CURSOR_CREATE			2070
 /*! cursor: cursor-insert key and value bytes inserted */
-#define	WT_STAT_DSRC_CURSOR_INSERT_BYTES		2070
+#define	WT_STAT_DSRC_CURSOR_INSERT_BYTES		2071
 /*! cursor: cursor-remove key bytes removed */
-#define	WT_STAT_DSRC_CURSOR_REMOVE_BYTES		2071
+#define	WT_STAT_DSRC_CURSOR_REMOVE_BYTES		2072
 /*! cursor: cursor-update value bytes updated */
-#define	WT_STAT_DSRC_CURSOR_UPDATE_BYTES		2072
+#define	WT_STAT_DSRC_CURSOR_UPDATE_BYTES		2073
 /*! cursor: insert calls */
-#define	WT_STAT_DSRC_CURSOR_INSERT			2073
+#define	WT_STAT_DSRC_CURSOR_INSERT			2074
 /*! cursor: next calls */
-#define	WT_STAT_DSRC_CURSOR_NEXT			2074
+#define	WT_STAT_DSRC_CURSOR_NEXT			2075
 /*! cursor: prev calls */
-#define	WT_STAT_DSRC_CURSOR_PREV			2075
+#define	WT_STAT_DSRC_CURSOR_PREV			2076
 /*! cursor: remove calls */
-#define	WT_STAT_DSRC_CURSOR_REMOVE			2076
+#define	WT_STAT_DSRC_CURSOR_REMOVE			2077
 /*! cursor: reset calls */
-#define	WT_STAT_DSRC_CURSOR_RESET			2077
+#define	WT_STAT_DSRC_CURSOR_RESET			2078
 /*! cursor: restarted searches */
-#define	WT_STAT_DSRC_CURSOR_RESTART			2078
+#define	WT_STAT_DSRC_CURSOR_RESTART			2079
 /*! cursor: search calls */
-#define	WT_STAT_DSRC_CURSOR_SEARCH			2079
+#define	WT_STAT_DSRC_CURSOR_SEARCH			2080
 /*! cursor: search near calls */
-#define	WT_STAT_DSRC_CURSOR_SEARCH_NEAR			2080
+#define	WT_STAT_DSRC_CURSOR_SEARCH_NEAR			2081
 /*! cursor: truncate calls */
-#define	WT_STAT_DSRC_CURSOR_TRUNCATE			2081
+#define	WT_STAT_DSRC_CURSOR_TRUNCATE			2082
 /*! cursor: update calls */
-#define	WT_STAT_DSRC_CURSOR_UPDATE			2082
+#define	WT_STAT_DSRC_CURSOR_UPDATE			2083
 /*! reconciliation: dictionary matches */
-#define	WT_STAT_DSRC_REC_DICTIONARY			2083
+#define	WT_STAT_DSRC_REC_DICTIONARY			2084
 /*! reconciliation: fast-path pages deleted */
-#define	WT_STAT_DSRC_REC_PAGE_DELETE_FAST		2084
+#define	WT_STAT_DSRC_REC_PAGE_DELETE_FAST		2085
 /*! reconciliation: internal page key bytes discarded using suffix
  * compression */
-#define	WT_STAT_DSRC_REC_SUFFIX_COMPRESSION		2085
+#define	WT_STAT_DSRC_REC_SUFFIX_COMPRESSION		2086
 /*! reconciliation: internal page multi-block writes */
-#define	WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL		2086
+#define	WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL		2087
 /*! reconciliation: internal-page overflow keys */
-#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL		2087
+#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL		2088
 /*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define	WT_STAT_DSRC_REC_PREFIX_COMPRESSION		2088
+#define	WT_STAT_DSRC_REC_PREFIX_COMPRESSION		2089
 /*! reconciliation: leaf page multi-block writes */
-#define	WT_STAT_DSRC_REC_MULTIBLOCK_LEAF		2089
+#define	WT_STAT_DSRC_REC_MULTIBLOCK_LEAF		2090
 /*! reconciliation: leaf-page overflow keys */
-#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF		2090
+#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF		2091
 /*! reconciliation: maximum blocks required for a page */
-#define	WT_STAT_DSRC_REC_MULTIBLOCK_MAX			2091
+#define	WT_STAT_DSRC_REC_MULTIBLOCK_MAX			2092
 /*! reconciliation: overflow values written */
-#define	WT_STAT_DSRC_REC_OVERFLOW_VALUE			2092
+#define	WT_STAT_DSRC_REC_OVERFLOW_VALUE			2093
 /*! reconciliation: page checksum matches */
-#define	WT_STAT_DSRC_REC_PAGE_MATCH			2093
+#define	WT_STAT_DSRC_REC_PAGE_MATCH			2094
 /*! reconciliation: page reconciliation calls */
-#define	WT_STAT_DSRC_REC_PAGES				2094
+#define	WT_STAT_DSRC_REC_PAGES				2095
 /*! reconciliation: page reconciliation calls for eviction */
-#define	WT_STAT_DSRC_REC_PAGES_EVICTION			2095
+#define	WT_STAT_DSRC_REC_PAGES_EVICTION			2096
 /*! reconciliation: pages deleted */
-#define	WT_STAT_DSRC_REC_PAGE_DELETE			2096
+#define	WT_STAT_DSRC_REC_PAGE_DELETE			2097
 /*! session: object compaction */
-#define	WT_STAT_DSRC_SESSION_COMPACT			2097
+#define	WT_STAT_DSRC_SESSION_COMPACT			2098
 /*! session: open cursor count */
-#define	WT_STAT_DSRC_SESSION_CURSOR_OPEN		2098
+#define	WT_STAT_DSRC_SESSION_CURSOR_OPEN		2099
 /*! transaction: update conflicts */
-#define	WT_STAT_DSRC_TXN_UPDATE_CONFLICT		2099
+#define	WT_STAT_DSRC_TXN_UPDATE_CONFLICT		2100
 
 /*!
  * @}
diff --git a/src/log/log.c b/src/log/log.c
index bf83c280d8d..8ec910115ac 100644
--- a/src/log/log.c
+++ b/src/log/log.c
@@ -9,13 +9,17 @@
 #include "wt_internal.h"
 
 static int __log_openfile(
-	WT_SESSION_IMPL *, bool, WT_FH **, const char *, uint32_t);
+	WT_SESSION_IMPL *, WT_FH **, const char *, uint32_t, uint32_t);
 static int __log_write_internal(
 	WT_SESSION_IMPL *, WT_ITEM *, WT_LSN *, uint32_t);
 
 #define	WT_LOG_COMPRESS_SKIP	(offsetof(WT_LOG_RECORD, record))
 #define	WT_LOG_ENCRYPT_SKIP	(offsetof(WT_LOG_RECORD, record))
 
+/* Flags to __log_openfile */
+#define	WT_LOG_OPEN_CREATE_OK	0x01
+#define	WT_LOG_OPEN_VERIFY	0x02
+
 /*
  * __wt_log_ckpt --
  *	Record the given LSN as the checkpoint LSN and signal the archive
@@ -146,7 +150,7 @@ __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn)
 		 * file than we want.
 		 */
 		WT_ERR(__log_openfile(session,
-		    false, &log_fh, WT_LOG_FILENAME, min_lsn->l.file));
+		    &log_fh, WT_LOG_FILENAME, min_lsn->l.file, 0));
 		WT_ERR(__wt_verbose(session, WT_VERB_LOG,
 		    "log_force_sync: sync %s to LSN %" PRIu32 "/%" PRIu32,
 		    log_fh->name, min_lsn->l.file, min_lsn->l.offset));
@@ -277,7 +281,8 @@ __log_get_files(WT_SESSION_IMPL *session,
 /*
  * __wt_log_get_all_files --
  *	Retrieve the list of log files, either all of them or only the active
- *	ones (those that are not candidates for archiving).
+ *	ones (those that are not candidates for archiving).  The caller is
+ *	responsible for freeing the directory list returned.
  */
 int
 __wt_log_get_all_files(WT_SESSION_IMPL *session,
@@ -307,6 +312,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
 	for (max = 0, i = 0; i < count; ) {
 		WT_ERR(__wt_log_extract_lognum(session, files[i], &id));
 		if (active_only && id < log->ckpt_lsn.l.file) {
+			/*
+			 * Any files not being returned are individually freed
+			 * and the array adjusted.
+			 */
 			__wt_free(session, files[i]);
 			files[i] = files[count - 1];
 			files[--count] = NULL;
@@ -321,6 +330,10 @@ __wt_log_get_all_files(WT_SESSION_IMPL *session,
 	*filesp = files;
 	*countp = count;
 
+	/*
+	 * Only free on error.  The caller is responsible for calling free
+	 * once it is done using the returned list.
+	 */
 	if (0) {
 err:		WT_TRET(__wt_fs_directory_list_free(session, &files, count));
 	}
@@ -674,7 +687,7 @@ err:	__wt_scr_free(session, &buf);
  */
 static int
 __log_openfile(WT_SESSION_IMPL *session,
-    bool ok_create, WT_FH **fhp, const char *file_prefix, uint32_t id)
+    WT_FH **fhp, const char *file_prefix, uint32_t id, uint32_t flags)
 {
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_ITEM(buf);
@@ -683,7 +696,7 @@ __log_openfile(WT_SESSION_IMPL *session,
 	WT_LOG_DESC *desc;
 	WT_LOG_RECORD *logrec;
 	uint32_t allocsize;
-	u_int flags;
+	u_int wtopen_flags;
 
 	conn = S2C(session);
 	log = conn->log;
@@ -695,19 +708,19 @@ __log_openfile(WT_SESSION_IMPL *session,
 	WT_ERR(__log_filename(session, id, file_prefix, buf));
 	WT_ERR(__wt_verbose(session, WT_VERB_LOG,
 	    "opening log %s", (const char *)buf->data));
-	flags = 0;
-	if (ok_create)
-		LF_SET(WT_OPEN_CREATE);
+	wtopen_flags = 0;
+	if (LF_ISSET(WT_LOG_OPEN_CREATE_OK))
+		FLD_SET(wtopen_flags, WT_FS_OPEN_CREATE);
 	if (FLD_ISSET(conn->direct_io, WT_DIRECT_IO_LOG))
-		LF_SET(WT_OPEN_DIRECTIO);
+		FLD_SET(wtopen_flags, WT_FS_OPEN_DIRECTIO);
 	WT_ERR(__wt_open(
-	    session, buf->data, WT_OPEN_FILE_TYPE_LOG, flags, fhp));
+	    session, buf->data, WT_FS_OPEN_FILE_TYPE_LOG, wtopen_flags, fhp));
 
 	/*
 	 * If we are not creating the log file but opening it for reading,
 	 * check that the magic number and versions are correct.
 	 */
-	if (!ok_create) {
+	if (LF_ISSET(WT_LOG_OPEN_VERIFY)) {
 		WT_ERR(__wt_buf_grow(session, buf, allocsize));
 		memset(buf->mem, 0, allocsize);
 		WT_ERR(__wt_read(session, *fhp, 0, allocsize, buf->mem));
@@ -773,7 +786,7 @@ __log_alloc_prealloc(WT_SESSION_IMPL *session, uint32_t to_num)
 	 * All file setup, writing the header and pre-allocation was done
 	 * before.  We only need to rename it.
 	 */
-	WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data));
+	WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
 
 err:	__wt_scr_free(session, &from_path);
 	__wt_scr_free(session, &to_path);
@@ -870,7 +883,7 @@ __log_newfile(WT_SESSION_IMPL *session, bool conn_open, bool *created)
 	 * window where another thread could see a NULL log file handle.
 	 */
 	WT_RET(__log_openfile(session,
-	    false, &log_fh, WT_LOG_FILENAME, log->fileid));
+	    &log_fh, WT_LOG_FILENAME, log->fileid, 0));
 	WT_PUBLISH(log->log_fh, log_fh);
 	/*
 	 * We need to setup the LSNs.  Set the end LSN and alloc LSN to
@@ -978,7 +991,7 @@ __log_truncate(WT_SESSION_IMPL *session,
 	 * Truncate the log file to the given LSN.
 	 */
 	WT_ERR(__log_openfile(session,
-	    false, &log_fh, file_prefix, lsn->l.file));
+	    &log_fh, file_prefix, lsn->l.file, 0));
 	WT_ERR(__wt_ftruncate(session, log_fh, lsn->l.offset));
 	WT_ERR(__wt_fsync(session, log_fh, true));
 	WT_ERR(__wt_close(session, &log_fh));
@@ -995,7 +1008,7 @@ __log_truncate(WT_SESSION_IMPL *session,
 		if (lognum > lsn->l.file &&
 		    lognum < log->trunc_lsn.l.file) {
 			WT_ERR(__log_openfile(session,
-			    false, &log_fh, file_prefix, lognum));
+			    &log_fh, file_prefix, lognum, 0));
 			/*
 			 * If there are intervening files pre-allocated,
 			 * truncate them to the end of the log file header.
@@ -1047,7 +1060,8 @@ __wt_log_allocfile(
 	/*
 	 * Set up the temporary file.
 	 */
-	WT_ERR(__log_openfile(session, true, &log_fh, WT_LOG_TMPNAME, tmp_id));
+	WT_ERR(__log_openfile(session,
+	    &log_fh, WT_LOG_TMPNAME, tmp_id, WT_LOG_OPEN_CREATE_OK));
 	WT_ERR(__log_file_header(session, log_fh, NULL, true));
 	WT_ERR(__log_prealloc(session, log_fh));
 	WT_ERR(__wt_fsync(session, log_fh, true));
@@ -1058,7 +1072,7 @@ __wt_log_allocfile(
 	/*
 	 * Rename it into place and make it available.
 	 */
-	WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data));
+	WT_ERR(__wt_fs_rename(session, from_path->data, to_path->data, false));
 
 err:	__wt_scr_free(session, &from_path);
 	__wt_scr_free(session, &to_path);
@@ -1081,7 +1095,7 @@ __wt_log_remove(WT_SESSION_IMPL *session,
 	WT_ERR(__log_filename(session, lognum, file_prefix, path));
 	WT_ERR(__wt_verbose(session, WT_VERB_LOG,
 	    "log_remove: remove log %s", (char *)path->data));
-	WT_ERR(__wt_fs_remove(session, path->data));
+	WT_ERR(__wt_fs_remove(session, path->data, false));
 err:	__wt_scr_free(session, &path);
 	return (ret);
 }
@@ -1117,7 +1131,7 @@ __wt_log_open(WT_SESSION_IMPL *session)
 		WT_RET(__wt_verbose(session, WT_VERB_LOG,
 		    "log_open: open fh to directory %s", conn->log_path));
 		WT_RET(__wt_open(session, conn->log_path,
-		    WT_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
+		    WT_FS_OPEN_FILE_TYPE_DIRECTORY, 0, &log->log_dir_fh));
 	}
 
 	if (!F_ISSET(conn, WT_CONN_READONLY)) {
@@ -1587,8 +1601,8 @@ __wt_log_scan(WT_SESSION_IMPL *session, WT_LSN *lsnp, uint32_t flags,
 		WT_ERR(
 		    __wt_fs_directory_list_free(session, &logfiles, logcount));
 	}
-	WT_ERR(__log_openfile(
-	    session, false, &log_fh, WT_LOG_FILENAME, start_lsn.l.file));
+	WT_ERR(__log_openfile(session,
+	    &log_fh, WT_LOG_FILENAME, start_lsn.l.file, WT_LOG_OPEN_VERIFY));
 	WT_ERR(__wt_filesize(session, log_fh, &log_size));
 	rd_lsn = start_lsn;
 
@@ -1637,7 +1651,8 @@ advance:
 			if (rd_lsn.l.file > end_lsn.l.file)
 				break;
 			WT_ERR(__log_openfile(session,
-			    false, &log_fh, WT_LOG_FILENAME, rd_lsn.l.file));
+			    &log_fh, WT_LOG_FILENAME,
+			    rd_lsn.l.file, WT_LOG_OPEN_VERIFY));
 			WT_ERR(__wt_filesize(session, log_fh, &log_size));
 			eol = false;
 			continue;
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 78235fb6a92..bedef6a8596 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -205,6 +205,12 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
 			WT_RET(__wt_txn_id_check(session));
 
 			WT_RET(__clsm_enter_update(clsm));
+			/*
+			 * Switching the tree will update the generation before
+			 * updating the switch transaction.  We test the
+			 * transaction in clsm_enter_update.  Now test the
+			 * disk generation to avoid races.
+			 */
 			if (clsm->dsk_gen != clsm->lsm_tree->dsk_gen)
 				goto open;
 
@@ -219,13 +225,20 @@ __clsm_enter(WT_CURSOR_LSM *clsm, bool reset, bool update)
 			 * transaction ID in each chunk: any transaction ID
 			 * that overlaps with our snapshot is a potential
 			 * conflict.
+			 *
+			 * Note that the global snap_min is correct here: it
+			 * tracks concurrent transactions excluding special
+			 * transactions such as checkpoint (which we can't
+			 * conflict with because checkpoint only writes the
+			 * metadata, which is not an LSM tree).
 			 */
 			clsm->nupdates = 1;
 			if (txn->isolation == WT_ISO_SNAPSHOT &&
 			    F_ISSET(clsm, WT_CLSM_OPEN_SNAPSHOT)) {
 				WT_ASSERT(session,
 				    F_ISSET(txn, WT_TXN_HAS_SNAPSHOT));
-				snap_min = txn->snap_min;
+				snap_min =
+				    WT_SESSION_TXN_STATE(session)->snap_min;
 				for (switch_txnp =
 				    &clsm->switch_txn[clsm->nchunks - 2];
 				    clsm->nupdates < clsm->nchunks;
@@ -1521,6 +1534,8 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
 	WT_LSM_TREE *lsm_tree;
 	bool bulk;
 
+	WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
+
 	clsm = NULL;
 	cursor = NULL;
 	lsm_tree = NULL;
@@ -1566,6 +1581,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
 	cursor->value_format = lsm_tree->value_format;
 
 	clsm->lsm_tree = lsm_tree;
+	lsm_tree = NULL;
 
 	/*
 	 * The tree's dsk_gen starts at one, so starting the cursor on zero
@@ -1573,7 +1589,6 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
 	 */
 	clsm->dsk_gen = 0;
 
-	WT_STATIC_ASSERT(offsetof(WT_CURSOR_LSM, iface) == 0);
 	WT_ERR(__wt_cursor_init(cursor, cursor->uri, owner, cfg, cursorp));
 
 	if (bulk)
@@ -1585,10 +1600,6 @@ err:		if (clsm != NULL)
 		else if (lsm_tree != NULL)
 			__wt_lsm_tree_release(session, lsm_tree);
 
-		/*
-		 * We open bulk cursors after setting the returned cursor.
-		 * Fix that here.
-		 */
 		*cursorp = NULL;
 	}
 
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index da106ae2089..2ecfb614eee 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -771,6 +771,11 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
 	++lsm_tree->dsk_gen;
 
 	lsm_tree->modified = true;
+	/*
+	 * Ensure the updated disk generation is visible to all other threads
+	 * before updating the transaction ID.
+	 */
+	WT_FULL_BARRIER();
 
 	/*
 	 * Set the switch transaction in the previous chunk unless this is
@@ -1187,8 +1192,15 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, bool *skipp)
 	 */
 	if (lsm_tree->nchunks > 0 &&
 	    (chunk = lsm_tree->chunk[lsm_tree->nchunks - 1]) != NULL) {
-		if (chunk->switch_txn == WT_TXN_NONE)
+		if (chunk->switch_txn == WT_TXN_NONE) {
+			/*
+			 * Make sure any cursors open on the tree see the
+			 * new switch generation before updating.
+			 */
+			++lsm_tree->dsk_gen;
+			WT_FULL_BARRIER();
 			chunk->switch_txn = __wt_txn_id_alloc(session, false);
+		}
 		/*
 		 * If we have a chunk, we want to look for it to be on-disk.
 		 * So we need to add a reference to keep it available.
diff --git a/src/lsm/lsm_work_unit.c b/src/lsm/lsm_work_unit.c
index c19f42327be..0f2a407c70d 100644
--- a/src/lsm/lsm_work_unit.c
+++ b/src/lsm/lsm_work_unit.c
@@ -526,7 +526,7 @@ __lsm_drop_file(WT_SESSION_IMPL *session, const char *uri)
 	    ret = __wt_schema_drop(session, uri, drop_cfg));
 
 	if (ret == 0)
-		ret = __wt_fs_remove(session, uri + strlen("file:"));
+		ret = __wt_fs_remove(session, uri + strlen("file:"), false);
 	WT_RET(__wt_verbose(session, WT_VERB_LSM, "Dropped %s", uri));
 
 	if (ret == EBUSY || ret == ENOENT)
diff --git a/src/meta/meta_table.c b/src/meta/meta_table.c
index 38a2edd7219..d39df163daf 100644
--- a/src/meta/meta_table.c
+++ b/src/meta/meta_table.c
@@ -68,6 +68,9 @@ __wt_metadata_cursor_open(
 	if (F_ISSET(btree, WT_BTREE_NO_LOGGING))
 		F_CLR(btree, WT_BTREE_NO_LOGGING);
 
+	/* The metadata file always uses checkpoint IDs in visibility checks. */
+	btree->include_checkpoint_txn = true;
+
 	return (0);
 }
 
diff --git a/src/meta/meta_track.c b/src/meta/meta_track.c
index eb06b2bed66..3d8b7c46500 100644
--- a/src/meta/meta_track.c
+++ b/src/meta/meta_track.c
@@ -141,7 +141,8 @@ __meta_track_apply(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
 		    ret = bm->checkpoint_resolve(bm, session));
 		break;
 	case WT_ST_DROP_COMMIT:
-		if ((ret = __wt_block_manager_drop(session, trk->a)) != 0)
+		if ((ret =
+		    __wt_block_manager_drop(session, trk->a, false)) != 0)
 			__wt_err(session, ret,
 			    "metadata remove dropped file %s", trk->a);
 		break;
@@ -188,13 +189,15 @@ __meta_track_unroll(WT_SESSION_IMPL *session, WT_META_TRACK *trk)
 		 * For removes, b is NULL.
 		 */
 		if (trk->a != NULL && trk->b != NULL &&
-		    (ret = __wt_rename_and_sync_directory(session,
-		    trk->b + strlen("file:"), trk->a + strlen("file:"))) != 0)
+		    (ret = __wt_fs_rename(session,
+		    trk->b + strlen("file:"), trk->a + strlen("file:"),
+		    true)) != 0)
 			__wt_err(session, ret,
 			    "metadata unroll rename %s to %s", trk->b, trk->a);
 
-		if (trk->a == NULL && (ret =
-		    __wt_fs_remove(session, trk->b + strlen("file:"))) != 0)
+		if (trk->a == NULL &&
+		    (ret = __wt_fs_remove(session,
+		    trk->b + strlen("file:"), false)) != 0)
 			__wt_err(session, ret,
 			    "metadata unroll create %s", trk->b);
 
diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c
index 4d2b359bbed..ace0fabab48 100644
--- a/src/meta/meta_turtle.c
+++ b/src/meta/meta_turtle.c
@@ -158,7 +158,7 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
 	 * Discard any turtle setup file left-over from previous runs.  This
 	 * doesn't matter for correctness, it's just cleaning up random files.
 	 */
-	WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET));
+	WT_RET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
 
 	/*
 	 * We could die after creating the turtle file and before creating the
@@ -197,9 +197,10 @@ __wt_turtle_init(WT_SESSION_IMPL *session)
 			    "Both %s and %s exist; recreating metadata from "
 			    "backup",
 			    WT_METADATA_TURTLE, WT_METADATA_BACKUP));
-			WT_RET(__wt_remove_if_exists(session, WT_METAFILE));
+			WT_RET(
+			    __wt_remove_if_exists(session, WT_METAFILE, false));
 			WT_RET(__wt_remove_if_exists(
-			    session, WT_METADATA_TURTLE));
+			    session, WT_METADATA_TURTLE, false));
 			load = true;
 		}
 	} else
@@ -305,7 +306,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
 	 * every time.
 	 */
 	WT_RET(__wt_fopen(session, WT_METADATA_TURTLE_SET,
-	    WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
+	    WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, WT_STREAM_WRITE, &fs));
 
 	version = wiredtiger_version(&vmajor, &vminor, &vpatch);
 	WT_ERR(__wt_fprintf(session, fs,
@@ -320,7 +321,7 @@ __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
 
 	/* Close any file handle left open, remove any temporary file. */
 err:	WT_TRET(__wt_fclose(session, &fs));
-	WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET));
+	WT_TRET(__wt_remove_if_exists(session, WT_METADATA_TURTLE_SET, false));
 
 	return (ret);
 }
diff --git a/src/os_common/filename.c b/src/os_common/filename.c
index 5f174288350..8b6c1269829 100644
--- a/src/os_common/filename.c
+++ b/src/os_common/filename.c
@@ -56,55 +56,17 @@ __wt_nfilename(
  *	Remove a file if it exists.
  */
 int
-__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name)
+__wt_remove_if_exists(WT_SESSION_IMPL *session, const char *name, bool durable)
 {
 	bool exist;
 
 	WT_RET(__wt_fs_exist(session, name, &exist));
 	if (exist)
-		WT_RET(__wt_fs_remove(session, name));
+		WT_RET(__wt_fs_remove(session, name, durable));
 	return (0);
 }
 
 /*
- * __wt_rename_and_sync_directory --
- *	Rename a file and sync the enclosing directory.
- */
-int
-__wt_rename_and_sync_directory(
-    WT_SESSION_IMPL *session, const char *from, const char *to)
-{
-	const char *fp, *tp;
-	bool same_directory;
-
-	/* Rename the source file to the target. */
-	WT_RET(__wt_fs_rename(session, from, to));
-
-	/*
-	 * Flush the backing directory to guarantee the rename. My reading of
-	 * POSIX 1003.1 is there's no guarantee flushing only one of the from
-	 * or to directories, or flushing a common parent, is sufficient, and
-	 * even if POSIX were to make that guarantee, existing filesystems are
-	 * known to not provide the guarantee or only provide the guarantee
-	 * with specific mount options. Flush both of the from/to directories
-	 * until it's a performance problem.
-	 */
-	WT_RET(__wt_fs_directory_sync(session, from));
-
-	/*
-	 * In almost all cases, we're going to be renaming files in the same
-	 * directory, we can at least fast-path that.
-	 */
-	fp = strrchr(from, '/');
-	tp = strrchr(to, '/');
-	same_directory = (fp == NULL && tp == NULL) ||
-	    (fp != NULL && tp != NULL &&
-	    fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
-
-	return (same_directory ? 0 : __wt_fs_directory_sync(session, to));
-}
-
-/*
  * __wt_copy_and_sync --
  *	Copy a file safely; here to support the wt utility.
  */
@@ -134,13 +96,13 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
 	WT_ERR(__wt_scr_alloc(session, 0, &tmp));
 	WT_ERR(__wt_buf_fmt(session, tmp, "%s.copy", to));
 
-	WT_ERR(__wt_remove_if_exists(session, to));
-	WT_ERR(__wt_remove_if_exists(session, tmp->data));
+	WT_ERR(__wt_remove_if_exists(session, to, false));
+	WT_ERR(__wt_remove_if_exists(session, tmp->data, false));
 
 	/* Open the from and temporary file handles. */
-	WT_ERR(__wt_open(session, from, WT_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
-	WT_ERR(__wt_open(session, tmp->data, WT_OPEN_FILE_TYPE_REGULAR,
-	    WT_OPEN_CREATE | WT_OPEN_EXCLUSIVE, &tfh));
+	WT_ERR(__wt_open(session, from, WT_FS_OPEN_FILE_TYPE_REGULAR, 0, &ffh));
+	WT_ERR(__wt_open(session, tmp->data, WT_FS_OPEN_FILE_TYPE_REGULAR,
+	    WT_FS_OPEN_CREATE | WT_FS_OPEN_EXCLUSIVE, &tfh));
 
 	/*
 	 * Allocate a copy buffer. Don't use a scratch buffer, this thing is
@@ -162,7 +124,7 @@ __wt_copy_and_sync(WT_SESSION *wt_session, const char *from, const char *to)
 	WT_ERR(__wt_fsync(session, tfh, true));
 	WT_ERR(__wt_close(session, &tfh));
 
-	ret = __wt_rename_and_sync_directory(session, tmp->data, to);
+	ret = __wt_fs_rename(session, tmp->data, to, true);
 
 err:	WT_TRET(__wt_close(session, &ffh));
 	WT_TRET(__wt_close(session, &tfh));
diff --git a/src/os_common/os_fhandle.c b/src/os_common/os_fhandle.c
index 81e4cc14ccb..184a9df0e72 100644
--- a/src/os_common/os_fhandle.c
+++ b/src/os_common/os_fhandle.c
@@ -150,19 +150,19 @@ __open_verbose(
 	 */
 
 	switch (file_type) {
-	case WT_OPEN_FILE_TYPE_CHECKPOINT:
+	case WT_FS_OPEN_FILE_TYPE_CHECKPOINT:
 		file_type_tag = "checkpoint";
 		break;
-	case WT_OPEN_FILE_TYPE_DATA:
+	case WT_FS_OPEN_FILE_TYPE_DATA:
 		file_type_tag = "data";
 		break;
-	case WT_OPEN_FILE_TYPE_DIRECTORY:
+	case WT_FS_OPEN_FILE_TYPE_DIRECTORY:
 		file_type_tag = "directory";
 		break;
-	case WT_OPEN_FILE_TYPE_LOG:
+	case WT_FS_OPEN_FILE_TYPE_LOG:
 		file_type_tag = "log";
 		break;
-	case WT_OPEN_FILE_TYPE_REGULAR:
+	case WT_FS_OPEN_FILE_TYPE_REGULAR:
 		file_type_tag = "regular";
 		break;
 	default:
@@ -172,18 +172,18 @@ __open_verbose(
 
 	WT_RET(__wt_scr_alloc(session, 0, &tmp));
 	sep = " (";
-#define	WT_OPEN_VERBOSE_FLAG(f, name)					\
+#define	WT_FS_OPEN_VERBOSE_FLAG(f, name)				\
 	if (LF_ISSET(f)) {						\
 		WT_ERR(__wt_buf_catfmt(					\
 		    session, tmp, "%s%s", sep, name));			\
 		sep = ", ";						\
 	}
 
-	WT_OPEN_VERBOSE_FLAG(WT_OPEN_CREATE, "create");
-	WT_OPEN_VERBOSE_FLAG(WT_OPEN_DIRECTIO, "direct-IO");
-	WT_OPEN_VERBOSE_FLAG(WT_OPEN_EXCLUSIVE, "exclusive");
-	WT_OPEN_VERBOSE_FLAG(WT_OPEN_FIXED, "fixed");
-	WT_OPEN_VERBOSE_FLAG(WT_OPEN_READONLY, "readonly");
+	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_CREATE, "create");
+	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_DIRECTIO, "direct-IO");
+	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_EXCLUSIVE, "exclusive");
+	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_FIXED, "fixed");
+	WT_FS_OPEN_VERBOSE_FLAG(WT_FS_OPEN_READONLY, "readonly");
 
 	if (tmp->size != 0)
 		WT_ERR(__wt_buf_catfmt(session, tmp, ")"));
@@ -209,7 +209,7 @@ err:	__wt_scr_free(session, &tmp);
  */
 int
 __wt_open(WT_SESSION_IMPL *session,
-    const char *name, WT_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp)
+    const char *name, WT_FS_OPEN_FILE_TYPE file_type, u_int flags, WT_FH **fhp)
 {
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
@@ -247,12 +247,12 @@ __wt_open(WT_SESSION_IMPL *session,
 	if (F_ISSET(conn, WT_CONN_READONLY)) {
 		lock_file = strcmp(name, WT_SINGLETHREAD) == 0;
 		if (!lock_file)
-			LF_SET(WT_OPEN_READONLY);
-		WT_ASSERT(session, lock_file || !LF_ISSET(WT_OPEN_CREATE));
+			LF_SET(WT_FS_OPEN_READONLY);
+		WT_ASSERT(session, lock_file || !LF_ISSET(WT_FS_OPEN_CREATE));
 	}
 
 	/* Create the path to the file. */
-	if (!LF_ISSET(WT_OPEN_FIXED))
+	if (!LF_ISSET(WT_FS_OPEN_FIXED))
 		WT_ERR(__wt_filename(session, name, &path));
 
 	/* Call the underlying open function. */
@@ -261,7 +261,7 @@ __wt_open(WT_SESSION_IMPL *session,
 	open_called = true;
 
 	WT_ERR(__fhandle_method_finalize(
-	    session, fh->handle, LF_ISSET(WT_OPEN_READONLY)));
+	    session, fh->handle, LF_ISSET(WT_FS_OPEN_READONLY)));
 
 	/*
 	 * Repeat the check for a match: if there's no match, link our newly
diff --git a/src/os_common/os_fs_inmemory.c b/src/os_common/os_fs_inmemory.c
index 09c2e08db83..178adc1dac8 100644
--- a/src/os_common/os_fs_inmemory.c
+++ b/src/os_common/os_fs_inmemory.c
@@ -188,14 +188,16 @@ __im_fs_exist(WT_FILE_SYSTEM *file_system,
  *	POSIX remove.
  */
 static int
-__im_fs_remove(
-    WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
+__im_fs_remove(WT_FILE_SYSTEM *file_system,
+    WT_SESSION *wt_session, const char *name, uint32_t flags)
 {
 	WT_DECL_RET;
 	WT_FILE_HANDLE_INMEM *im_fh;
 	WT_FILE_SYSTEM_INMEM *im_fs;
 	WT_SESSION_IMPL *session;
 
+	WT_UNUSED(flags);
+
 	im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
 	session = (WT_SESSION_IMPL *)wt_session;
 
@@ -215,7 +217,7 @@ __im_fs_remove(
  */
 static int
 __im_fs_rename(WT_FILE_SYSTEM *file_system,
-    WT_SESSION *wt_session, const char *from, const char *to)
+    WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
 {
 	WT_DECL_RET;
 	WT_FILE_HANDLE_INMEM *im_fh;
@@ -224,6 +226,8 @@ __im_fs_rename(WT_FILE_SYSTEM *file_system,
 	uint64_t bucket;
 	char *copy;
 
+	WT_UNUSED(flags);
+
 	im_fs = (WT_FILE_SYSTEM_INMEM *)file_system;
 	session = (WT_SESSION_IMPL *)wt_session;
 
@@ -463,7 +467,7 @@ err:	__wt_spin_unlock(session, &im_fs->lock);
  */
 static int
 __im_file_open(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
-    const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+    const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
     WT_FILE_HANDLE **file_handlep)
 {
 	WT_DECL_RET;
diff --git a/src/os_common/os_fstream.c b/src/os_common/os_fstream.c
index 0b199529e19..5a368ea75e6 100644
--- a/src/os_common/os_fstream.c
+++ b/src/os_common/os_fstream.c
@@ -187,7 +187,7 @@ __wt_fopen(WT_SESSION_IMPL *session,
 	fstr = NULL;
 
 	WT_RET(__wt_open(
-	    session, name, WT_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
+	    session, name, WT_FS_OPEN_FILE_TYPE_REGULAR, open_flags, &fh));
 
 	WT_ERR(__wt_calloc_one(session, &fstr));
 	fstr->fh = fh;
diff --git a/src/os_posix/os_fs.c b/src/os_posix/os_fs.c
index 86fa2e8f117..11f38ec063b 100644
--- a/src/os_posix/os_fs.c
+++ b/src/os_posix/os_fs.c
@@ -30,7 +30,7 @@
 
 /*
  * __posix_sync --
- *	Underlying support function to flush a file handle.
+ *	Underlying support function to flush a file descriptor.
  */
 static int
 __posix_sync(
@@ -77,33 +77,42 @@ __posix_sync(
 #ifdef __linux__
 /*
  * __posix_directory_sync --
- *	Flush a directory to ensure file creation is durable.
+ *	Flush a directory to ensure file creation, remove or rename is durable.
  */
 static int
-__posix_directory_sync(
-    WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *path)
+__posix_directory_sync(WT_SESSION_IMPL *session, const char *path)
 {
+	WT_DECL_ITEM(tmp);
 	WT_DECL_RET;
-	WT_SESSION_IMPL *session;
 	int fd, tret;
+	char *dir;
 
-	WT_UNUSED(file_system);
+	WT_RET(__wt_scr_alloc(session, 0, &tmp));
+	WT_ERR(__wt_buf_setstr(session, tmp, path));
 
-	session = (WT_SESSION_IMPL *)wt_session;
+	/*
+	 * This layer should never see a path that doesn't include a trailing
+	 * path separator, this code asserts that fact.
+	 */
+	dir = tmp->mem;
+	strrchr(dir, '/')[1] = '\0';
 
+	fd = -1;			/* -Wconditional-uninitialized */
 	WT_SYSCALL_RETRY((
-	    (fd = open(path, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
+	    (fd = open(dir, O_RDONLY, 0444)) == -1 ? -1 : 0), ret);
 	if (ret != 0)
-		WT_RET_MSG(session, ret, "%s: directory-sync: open", path);
+		WT_ERR_MSG(session, ret, "%s: directory-sync: open", dir);
 
-	ret = __posix_sync(session, fd, path, "directory-sync");
+	ret = __posix_sync(session, fd, dir, "directory-sync");
 
 	WT_SYSCALL(close(fd), tret);
 	if (tret != 0) {
-		__wt_err(session, tret, "%s: directory-sync: close", path);
+		__wt_err(session, tret, "%s: directory-sync: close", dir);
 		if (ret == 0)
 			ret = tret;
 	}
+
+err:	__wt_scr_free(session, &tmp);
 	return (ret);
 }
 #endif
@@ -141,8 +150,8 @@ __posix_fs_exist(WT_FILE_SYSTEM *file_system,
  *	Remove a file.
  */
 static int
-__posix_fs_remove(
-    WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
+__posix_fs_remove(WT_FILE_SYSTEM *file_system,
+    WT_SESSION *wt_session, const char *name, uint32_t flags)
 {
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
@@ -159,9 +168,17 @@ __posix_fs_remove(
 	 * using unlink may be marginally safer.
 	 */
 	WT_SYSCALL(unlink(name), ret);
-	if (ret == 0)
+	if (ret != 0)
+		WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
+
+	if (!LF_ISSET(WT_FS_DURABLE))
 		return (0);
-	WT_RET_MSG(session, ret, "%s: file-remove: unlink", name);
+
+#ifdef __linux__
+	/* Flush the backing directory to guarantee the remove. */
+	WT_RET (__posix_directory_sync(session, name));
+#endif
+	return (0);
 }
 
 /*
@@ -170,7 +187,7 @@ __posix_fs_remove(
  */
 static int
 __posix_fs_rename(WT_FILE_SYSTEM *file_system,
-    WT_SESSION *wt_session, const char *from, const char *to)
+    WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
 {
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
@@ -187,9 +204,43 @@ __posix_fs_rename(WT_FILE_SYSTEM *file_system,
 	 * return (if errno is 0), but we've done the best we can.
 	 */
 	WT_SYSCALL(rename(from, to) != 0 ? -1 : 0, ret);
-	if (ret == 0)
+	if (ret != 0)
+		WT_RET_MSG(
+		    session, ret, "%s to %s: file-rename: rename", from, to);
+
+	if (!LF_ISSET(WT_FS_DURABLE))
 		return (0);
-	WT_RET_MSG(session, ret, "%s to %s: file-rename: rename", from, to);
+#ifdef __linux__
+	/*
+	 * Flush the backing directory to guarantee the rename. My reading of
+	 * POSIX 1003.1 is there's no guarantee flushing only one of the from
+	 * or to directories, or flushing a common parent, is sufficient, and
+	 * even if POSIX were to make that guarantee, existing filesystems are
+	 * known to not provide the guarantee or only provide the guarantee
+	 * with specific mount options. Flush both of the from/to directories
+	 * until it's a performance problem.
+	 */
+	WT_RET(__posix_directory_sync(session, from));
+
+	/*
+	 * In almost all cases, we're going to be renaming files in the same
+	 * directory, we can at least fast-path that.
+	 */
+	{
+	bool same_directory;
+	const char *fp, *tp;
+
+	fp = strrchr(from, '/');
+	tp = strrchr(to, '/');
+	same_directory = (fp == NULL && tp == NULL) ||
+	    (fp != NULL && tp != NULL &&
+	    fp - from == tp - to && memcmp(from, to, (size_t)(fp - from)) == 0);
+
+	if (!same_directory)
+		WT_RET(__posix_directory_sync(session, to));
+	}
+#endif
+	return (0);
 }
 
 /*
@@ -513,7 +564,7 @@ __posix_open_file_cloexec(WT_SESSION_IMPL *session, int fd, const char *name)
  */
 static int
 __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
-    const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+    const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
     WT_FILE_HANDLE **file_handlep)
 {
 	WT_CONNECTION_IMPL *conn;
@@ -536,7 +587,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 	/* Set up error handling. */
 	pfh->fd = -1;
 
-	if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY) {
+	if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY) {
 		f = O_RDONLY;
 #ifdef O_CLOEXEC
 		/*
@@ -554,10 +605,10 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 		goto directory_open;
 	}
 
-	f = LF_ISSET(WT_OPEN_READONLY) ? O_RDONLY : O_RDWR;
-	if (LF_ISSET(WT_OPEN_CREATE)) {
+	f = LF_ISSET(WT_FS_OPEN_READONLY) ? O_RDONLY : O_RDWR;
+	if (LF_ISSET(WT_FS_OPEN_CREATE)) {
 		f |= O_CREAT;
-		if (LF_ISSET(WT_OPEN_EXCLUSIVE))
+		if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
 			f |= O_EXCL;
 		mode = 0666;
 	} else
@@ -577,7 +628,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 #endif
 #ifdef O_DIRECT
 	/* Direct I/O. */
-	if (LF_ISSET(WT_OPEN_DIRECTIO)) {
+	if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
 		f |= O_DIRECT;
 		pfh->direct_io = true;
 	} else
@@ -585,11 +636,11 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 #endif
 #ifdef O_NOATIME
 	/* Avoid updating metadata for read-only workloads. */
-	if (file_type == WT_OPEN_FILE_TYPE_DATA)
+	if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
 		f |= O_NOATIME;
 #endif
 
-	if (file_type == WT_OPEN_FILE_TYPE_LOG &&
+	if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
 	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC)) {
 #ifdef O_DSYNC
 		f |= O_DSYNC;
@@ -601,6 +652,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 #endif
 	}
 
+	/* Create/Open the file. */
 	WT_SYSCALL_RETRY(((pfh->fd = open(name, f, mode)) == -1 ? -1 : 0), ret);
 	if (ret != 0)
 		WT_ERR_MSG(session, ret,
@@ -608,6 +660,16 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 		    "%s: handle-open: open: failed with direct I/O configured, "
 		    "some filesystem types do not support direct I/O" :
 		    "%s: handle-open: open", name);
+
+#ifdef __linux__
+	/*
+	 * Durability: some filesystems require a directory sync to be confident
+	 * the file will appear.
+	 */
+	if (LF_ISSET(WT_FS_OPEN_DURABLE))
+		WT_ERR(__posix_directory_sync(session, name));
+#endif
+
 	WT_ERR(__posix_open_file_cloexec(session, pfh->fd, name));
 
 #if defined(HAVE_POSIX_FADVISE)
@@ -616,7 +678,7 @@ __posix_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 	 * Ignore fadvise when doing direct I/O, the kernel cache isn't
 	 * interesting.
 	 */
-	if (!pfh->direct_io && file_type == WT_OPEN_FILE_TYPE_DATA) {
+	if (!pfh->direct_io && file_type == WT_FS_OPEN_FILE_TYPE_DATA) {
 		WT_SYSCALL(
 		    posix_fadvise(pfh->fd, 0, 0, POSIX_FADV_RANDOM), ret);
 		if (ret != 0)
@@ -705,9 +767,6 @@ __wt_os_posix(WT_SESSION_IMPL *session)
 	/* Initialize the POSIX jump table. */
 	file_system->fs_directory_list = __wt_posix_directory_list;
 	file_system->fs_directory_list_free = __wt_posix_directory_list_free;
-#ifdef __linux__
-	file_system->fs_directory_sync = __posix_directory_sync;
-#endif
 	file_system->fs_exist = __posix_fs_exist;
 	file_system->fs_open_file = __posix_open_file;
 	file_system->fs_remove = __posix_fs_remove;
diff --git a/src/os_win/os_fs.c b/src/os_win/os_fs.c
index 5daba124e90..fc03e0a2595 100644
--- a/src/os_win/os_fs.c
+++ b/src/os_win/os_fs.c
@@ -36,13 +36,14 @@ __win_fs_exist(WT_FILE_SYSTEM *file_system,
  *	Remove a file.
  */
 static int
-__win_fs_remove(
-    WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session, const char *name)
+__win_fs_remove(WT_FILE_SYSTEM *file_system,
+    WT_SESSION *wt_session, const char *name, uint32_t flags)
 {
 	DWORD windows_error;
 	WT_SESSION_IMPL *session;
 
 	WT_UNUSED(file_system);
+	WT_UNUSED(flags);
 
 	session = (WT_SESSION_IMPL *)wt_session;
 
@@ -62,12 +63,13 @@ __win_fs_remove(
  */
 static int
 __win_fs_rename(WT_FILE_SYSTEM *file_system,
-    WT_SESSION *wt_session, const char *from, const char *to)
+    WT_SESSION *wt_session, const char *from, const char *to, uint32_t flags)
 {
 	DWORD windows_error;
 	WT_SESSION_IMPL *session;
 
 	WT_UNUSED(file_system);
+	WT_UNUSED(flags);
 
 	session = (WT_SESSION_IMPL *)wt_session;
 
@@ -426,7 +428,7 @@ __win_file_write(WT_FILE_HANDLE *file_handle,
  */
 static int
 __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
-    const char *name, WT_OPEN_FILE_TYPE file_type, uint32_t flags,
+    const char *name, WT_FS_OPEN_FILE_TYPE file_type, uint32_t flags,
     WT_FILE_HANDLE **file_handlep)
 {
 	DWORD dwCreationDisposition, windows_error;
@@ -458,11 +460,11 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 	 * require that functionality: create an empty WT_FH structure with
 	 * invalid handles.
 	 */
-	if (file_type == WT_OPEN_FILE_TYPE_DIRECTORY)
+	if (file_type == WT_FS_OPEN_FILE_TYPE_DIRECTORY)
 		goto directory_open;
 
 	desired_access = GENERIC_READ;
-	if (!LF_ISSET(WT_OPEN_READONLY))
+	if (!LF_ISSET(WT_FS_OPEN_READONLY))
 		desired_access |= GENERIC_WRITE;
 
 	/*
@@ -476,15 +478,15 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 	f = FILE_ATTRIBUTE_NORMAL;
 
 	dwCreationDisposition = 0;
-	if (LF_ISSET(WT_OPEN_CREATE)) {
+	if (LF_ISSET(WT_FS_OPEN_CREATE)) {
 		dwCreationDisposition = CREATE_NEW;
-		if (LF_ISSET(WT_OPEN_EXCLUSIVE))
+		if (LF_ISSET(WT_FS_OPEN_EXCLUSIVE))
 			dwCreationDisposition = CREATE_ALWAYS;
 	} else
 		dwCreationDisposition = OPEN_EXISTING;
 
 	/* Direct I/O. */
-	if (LF_ISSET(WT_OPEN_DIRECTIO)) {
+	if (LF_ISSET(WT_FS_OPEN_DIRECTIO)) {
 		f |= FILE_FLAG_NO_BUFFERING;
 		win_fh->direct_io = true;
 	}
@@ -493,19 +495,19 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 	if (FLD_ISSET(conn->write_through, file_type))
 		f |= FILE_FLAG_WRITE_THROUGH;
 
-	if (file_type == WT_OPEN_FILE_TYPE_LOG &&
+	if (file_type == WT_FS_OPEN_FILE_TYPE_LOG &&
 	    FLD_ISSET(conn->txn_logsync, WT_LOG_DSYNC))
 		f |= FILE_FLAG_WRITE_THROUGH;
 
 	/* Disable read-ahead on trees: it slows down random read workloads. */
-	if (file_type == WT_OPEN_FILE_TYPE_DATA)
+	if (file_type == WT_FS_OPEN_FILE_TYPE_DATA)
 		f |= FILE_FLAG_RANDOM_ACCESS;
 
 	win_fh->filehandle = CreateFileA(name, desired_access,
 	    FILE_SHARE_READ | FILE_SHARE_WRITE,
 	    NULL, dwCreationDisposition, f, NULL);
 	if (win_fh->filehandle == INVALID_HANDLE_VALUE) {
-		if (LF_ISSET(WT_OPEN_CREATE) &&
+		if (LF_ISSET(WT_FS_OPEN_CREATE) &&
 		    GetLastError() == ERROR_FILE_EXISTS)
 			win_fh->filehandle = CreateFileA(name, desired_access,
 			    FILE_SHARE_READ | FILE_SHARE_WRITE,
@@ -528,7 +530,7 @@ __win_open_file(WT_FILE_SYSTEM *file_system, WT_SESSION *wt_session,
 	 * concurrently with reads on the file. Writes would also move the file
 	 * pointer.
 	 */
-	if (!LF_ISSET(WT_OPEN_READONLY)) {
+	if (!LF_ISSET(WT_FS_OPEN_READONLY)) {
 		win_fh->filehandle_secondary = CreateFileA(name, desired_access,
 		    FILE_SHARE_READ | FILE_SHARE_WRITE,
 		    NULL, OPEN_EXISTING, f, NULL);
diff --git a/src/os_win/os_path.c b/src/os_win/os_path.c
index 220752ce7a1..74050600417 100644
--- a/src/os_win/os_path.c
+++ b/src/os_win/os_path.c
@@ -16,8 +16,30 @@ bool
 __wt_absolute_path(const char *path)
 {
 	/*
-	 * Check for a drive name (for example, "D:"), allow both forward and
-	 * backward slashes.
+	 * https://msdn.microsoft.com/en-us/library/windows/desktop/aa365247
+	 *
+	 * For Windows API functions that manipulate files, file names can often
+	 * be relative to the current directory, while some APIs require a fully
+	 * qualified path. A file name is relative to the current directory if
+	 * it does not begin with one of the following:
+	 *
+	 * -- A UNC name of any format, which always start with two backslash
+	 *    characters ("\\").
+	 * -- A disk designator with a backslash, for example "C:\" or "d:\".
+	 * -- A single backslash, for example, "\directory" or "\file.txt". This
+	 *    is also referred to as an absolute path.
+	 *
+	 * If a file name begins with only a disk designator but not the
+	 * backslash after the colon, it is interpreted as a relative path to
+	 * the current directory on the drive with the specified letter. Note
+	 * that the current directory may or may not be the root directory
+	 * depending on what it was set to during the most recent "change
+	 * directory" operation on that disk.
+	 *
+	 * -- "C:tmp.txt" refers to a file named "tmp.txt" in the current
+	 *    directory on drive C.
+	 * -- "C:tempdir\tmp.txt" refers to a file in a subdirectory to the
+	 *    current directory on drive C.
 	 */
 	if (strlen(path) >= 3 && __wt_isalpha(path[0]) && path[1] == ':')
 		path += 2;
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index b49946bb10e..b96b34594b0 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -115,6 +115,7 @@ typedef struct {
 	 */
 	uint32_t page_size;		/* Set page size */
 	uint32_t page_size_orig;	/* Saved set page size */
+	uint32_t max_raw_page_size;	/* Max page size with raw compression */
 
 	/*
 	 * Second, the split size: if we're doing the page layout, split to a
@@ -159,9 +160,16 @@ typedef struct {
 		WT_ADDR addr;		/* Split's written location */
 		uint32_t size;		/* Split's size */
 		uint32_t cksum;		/* Split's checksum */
+
 		void    *disk_image;	/* Split's disk image */
 
 		/*
+		 * Raw compression, the disk image being written is already
+		 * compressed.
+		 */
+		bool	 already_compressed;
+
+		/*
 		 * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and
 		 * WT_EVICT_LOOKASIDE configurations.
 		 */
@@ -175,13 +183,6 @@ typedef struct {
 		 * column-store key.
 		 */
 		WT_ITEM key;		/* Promoted row-store key */
-
-		/*
-		 * During wrapup, after reconciling the root page, we write a
-		 * final block as part of a checkpoint.  If raw compression
-		 * was configured, that block may have already been compressed.
-		 */
-		bool already_compressed;
 	} *bnd;				/* Saved boundaries */
 	uint32_t bnd_next;		/* Next boundary slot */
 	uint32_t bnd_next_max;		/* Maximum boundary slots used */
@@ -445,17 +446,32 @@ __wt_reconcile(WT_SESSION_IMPL *session,
 	}
 
 	/*
-	 * Clean up reconciliation resources: some workloads have millions of
-	 * boundary structures, and if associated with an application session
-	 * pulled into doing forced eviction, they won't be discarded for the
-	 * life of the session (or until session.reset is called). Discard all
-	 * of the reconciliation resources if an application thread, not doing
-	 * a checkpoint.
-	 */
-	__rec_bnd_cleanup(session, r,
-	    F_ISSET(session, WT_SESSION_INTERNAL) ||
-	    WT_SESSION_IS_CHECKPOINT(session) ? false : true);
+	 * When application threads perform eviction, don't cache block manager
+	 * or reconciliation structures (even across calls), we can have a
+	 * significant number of application threads doing eviction at the same
+	 * time with large items. We ignore checkpoints, once the checkpoint
+	 * completes, all unnecessary session resources will be discarded.
+	 *
+	 * Even in application threads doing checkpoints or in internal threads
+	 * doing any reconciliation, clean up reconciliation resources. Some
+	 * workloads have millions of boundary structures in a reconciliation
+	 * and we don't want to tie that memory down, even across calls.
+	 */
+	if (WT_SESSION_IS_CHECKPOINT(session) ||
+	    F_ISSET(session, WT_SESSION_INTERNAL))
+		__rec_bnd_cleanup(session, r, false);
+	else {
+		/*
+		 * Clean up the underlying block manager memory too: it's not
+		 * reconciliation, but threads discarding reconciliation
+		 * structures want to clean up the block manager's structures
+		 * as well, and there's no obvious place to do that.
+		 */
+		if (session->block_manager_cleanup != NULL)
+			WT_TRET(session->block_manager_cleanup(session));
 
+		WT_TRET(__rec_destroy_session(session));
+	}
 	WT_RET(ret);
 
 	/*
@@ -652,7 +668,7 @@ __rec_root_write(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
 		WT_ASSERT(session, mod->mod_multi[i].supd == NULL);
 
 		WT_ERR(__wt_multi_to_ref(session,
-		    next, &mod->mod_multi[i], &pindex->index[i], NULL));
+		    next, &mod->mod_multi[i], &pindex->index[i], NULL, false));
 		pindex->index[i]->home = next;
 	}
 
@@ -1135,8 +1151,20 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 	 */
 	if (!skipped &&
 	    (F_ISSET(btree, WT_BTREE_LOOKASIDE) ||
-	    __wt_txn_visible_all(session, max_txn)))
+	    __wt_txn_visible_all(session, max_txn))) {
+#ifdef HAVE_DIAGNOSTIC
+		/*
+		 * The checkpoint transaction is special.  Make sure we never
+		 * write (metadata) updates from a checkpoint in a concurrent
+		 * session.
+		 */
+		txnid = *updp == NULL ? WT_TXN_NONE : (*updp)->txnid;
+		WT_ASSERT(session, txnid == WT_TXN_NONE ||
+		    txnid != S2C(session)->txn_global.checkpoint_txnid ||
+		    WT_SESSION_IS_CHECKPOINT(session));
+#endif
 		return (0);
+	}
 
 	/*
 	 * In some cases, there had better not be skipped updates or updates not
@@ -1845,18 +1873,19 @@ __rec_split_bnd_init(WT_SESSION_IMPL *session, WT_BOUNDARY *bnd)
 	WT_CLEAR(bnd->addr);
 	bnd->size = 0;
 	bnd->cksum = 0;
+
 	__wt_free(session, bnd->disk_image);
 
 	__wt_free(session, bnd->supd);
 	bnd->supd_next = 0;
 	bnd->supd_allocated = 0;
 
+	bnd->already_compressed = false;
+
 	/*
 	 * Don't touch the key, we re-use that memory in each new
 	 * reconciliation.
 	 */
-
-	bnd->already_compressed = false;
 }
 
 /*
@@ -1950,10 +1979,19 @@ __rec_split_init(WT_SESSION_IMPL *session,
 	 * additional data because we don't know how well it will compress, and
 	 * we don't want to increment our way up to the amount of data needed by
 	 * the application to successfully compress to the target page size.
+	 * Ideally accumulate data several times the page size without
+	 * approaching the memory page maximum, but at least have data worth
+	 * one page.
+	 *
+	 * There are cases when we grow the page size to accommodate large
+	 * records, in those cases we split the pages once they have crossed
+	 * the maximum size for a page with raw compression.
 	 */
 	r->page_size = r->page_size_orig = max;
 	if (r->raw_compression)
-		r->page_size *= 10;
+		r->max_raw_page_size = r->page_size =
+		    (uint32_t)WT_MIN(r->page_size * 10,
+		    WT_MAX(r->page_size, btree->maxmempage / 2));
 
 	/*
 	 * Ensure the disk image buffer is large enough for the max object, as
@@ -2295,7 +2333,7 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
 	/* Hitting a page boundary resets the dictionary, in all cases. */
 	__rec_dictionary_reset(r);
 
-	inuse = WT_PTRDIFF32(r->first_free, dsk);
+	inuse = WT_PTRDIFF(r->first_free, dsk);
 	switch (r->bnd_state) {
 	case SPLIT_BOUNDARY:
 		/*
@@ -2465,7 +2503,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
 	WT_COMPRESSOR *compressor;
 	WT_DECL_RET;
 	WT_ITEM *dst, *write_ref;
-	WT_PAGE_HEADER *dsk, *dsk_dst;
+	WT_PAGE_HEADER *dsk, *dsk_dst, *disk_image;
 	WT_SESSION *wt_session;
 	size_t corrected_page_size, extra_skip, len, result_len;
 	uint64_t recno;
@@ -2582,11 +2620,9 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
 
 		/*
 		 * Don't create an image so large that any future update will
-		 * cause a split in memory.  Use half of the maximum size so
-		 * we split very compressible pages that have reached the
-		 * maximum size in memory into two equal blocks.
+		 * cause a split in memory.
 		 */
-		if (len > (size_t)btree->maxmempage / 2)
+		if (max_image_slot == 0 && len > (size_t)r->max_raw_page_size)
 			max_image_slot = slots;
 	}
 
@@ -2648,7 +2684,7 @@ __rec_split_raw_worker(WT_SESSION_IMPL *session,
 	    r->page_size_orig, btree->split_pct,
 	    WT_BLOCK_COMPRESS_SKIP + extra_skip,
 	    (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP, r->raw_offsets,
-	    no_more_rows || max_image_slot == 0 ? slots : max_image_slot,
+	    max_image_slot == 0 ? slots : max_image_slot,
 	    (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP,
 	    result_len,
 	    no_more_rows || max_image_slot != 0,
@@ -2751,7 +2787,8 @@ no_slots:
 
 	if (result_slots != 0) {
 		/*
-		 * We have a block, finalize the header information.
+		 * We have a block, finalize the compressed disk image's header
+		 * information.
 		 */
 		dst->size = result_len + WT_BLOCK_COMPRESS_SKIP;
 		dsk_dst = dst->mem;
@@ -2761,6 +2798,26 @@ no_slots:
 		dsk_dst->u.entries = r->raw_entries[result_slots - 1];
 
 		/*
+		 * Optionally keep the disk image in cache. Update the initial
+		 * page-header fields to reflect the actual data being written.
+		 *
+		 * If updates are saved and need to be restored, we have to keep
+		 * a copy of the disk image. Unfortunately, we don't yet know if
+		 * there are updates to restore for the key range covered by the
+		 * disk image just created. If there are any saved updates, take
+		 * a copy of the disk image, it's freed later if not needed.
+		 */
+		if (F_ISSET(r, WT_EVICT_SCRUB) ||
+		    (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && r->supd_next > 0)) {
+			WT_RET(__wt_strndup(session, dsk,
+			    dsk_dst->mem_size, &last->disk_image));
+			disk_image = last->disk_image;
+			disk_image->recno = last->recno;
+			disk_image->mem_size = dsk_dst->mem_size;
+			disk_image->u.entries = dsk_dst->u.entries;
+		}
+
+		/*
 		 * There is likely a remnant in the working buffer that didn't
 		 * get compressed; copy it down to the start of the buffer and
 		 * update the starting record number, free space and so on.
@@ -2874,48 +2931,6 @@ split_grow:	/*
 }
 
 /*
- * __rec_raw_decompress --
- *	Decompress a raw-compressed image.
- */
-static int
-__rec_raw_decompress(
-    WT_SESSION_IMPL *session, const void *image, size_t size, void *retp)
-{
-	WT_BTREE *btree;
-	WT_DECL_ITEM(tmp);
-	WT_DECL_RET;
-	WT_PAGE_HEADER const *dsk;
-	size_t result_len;
-
-	btree = S2BT(session);
-	dsk = image;
-
-	/*
-	 * We skipped an update and we can't write a block, but unfortunately,
-	 * the block has already been compressed. Decompress the block so we
-	 * can subsequently re-instantiate it in memory.
-	 */
-	WT_RET(__wt_scr_alloc(session, dsk->mem_size, &tmp));
-	memcpy(tmp->mem, image, WT_BLOCK_COMPRESS_SKIP);
-	WT_ERR(btree->compressor->decompress(btree->compressor,
-	    &session->iface,
-	    (uint8_t *)image + WT_BLOCK_COMPRESS_SKIP,
-	    size - WT_BLOCK_COMPRESS_SKIP,
-	    (uint8_t *)tmp->mem + WT_BLOCK_COMPRESS_SKIP,
-	    dsk->mem_size - WT_BLOCK_COMPRESS_SKIP,
-	    &result_len));
-	if (result_len != dsk->mem_size - WT_BLOCK_COMPRESS_SKIP)
-		WT_ERR(__wt_illegal_value(session, btree->dhandle->name));
-
-	WT_ERR(__wt_strndup(session, tmp->data, dsk->mem_size, retp));
-	WT_ASSERT(session, __wt_verify_dsk_image(session,
-	    "[raw evict split]", tmp->data, dsk->mem_size, false) == 0);
-
-err:	__wt_scr_free(session, &tmp);
-	return (ret);
-}
-
-/*
  * __rec_split_raw --
  *	Raw compression split routine.
  */
@@ -3022,7 +3037,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	if (r->raw_compression && r->entries != 0) {
 		while (r->entries != 0) {
 			data_size =
-			    WT_PTRDIFF32(r->first_free, r->disk_image.mem);
+			    WT_PTRDIFF(r->first_free, r->disk_image.mem);
 			if (data_size <= btree->allocsize)
 				break;
 			WT_RET(__rec_split_raw_worker(session, r, 0, true));
@@ -3145,14 +3160,13 @@ __rec_split_write(WT_SESSION_IMPL *session,
 	uint32_t bnd_slot, i, j;
 	int cmp;
 	uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
+	bool need_image;
 
 	btree = S2BT(session);
 	dsk = buf->mem;
 	page = r->page;
 	mod = page->modify;
 
-	WT_RET(__wt_scr_alloc(session, 0, &key));
-
 	/* Set the zero-length value flag in the page header. */
 	if (dsk->type == WT_PAGE_ROW_LEAF) {
 		F_CLR(dsk, WT_PAGE_EMPTY_V_ALL | WT_PAGE_EMPTY_V_NONE);
@@ -3163,6 +3177,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
 			F_SET(dsk, WT_PAGE_EMPTY_V_NONE);
 	}
 
+	bnd->entries = r->entries;
+
 	/* Initialize the address (set the page type for the parent). */
 	switch (dsk->type) {
 	case WT_PAGE_COL_FIX:
@@ -3176,9 +3192,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
 	case WT_PAGE_ROW_INT:
 		bnd->addr.type = WT_ADDR_INT;
 		break;
-	WT_ILLEGAL_VALUE_ERR(session);
+	WT_ILLEGAL_VALUE(session);
 	}
-
 	bnd->size = (uint32_t)buf->size;
 	bnd->cksum = 0;
 
@@ -3190,6 +3205,8 @@ __rec_split_write(WT_SESSION_IMPL *session,
 	 * This code requires a key be filled in for the next block (or the
 	 * last block flag be set, if there's no next block).
 	 */
+	if (page->type == WT_PAGE_ROW_LEAF)
+		WT_RET(__wt_scr_alloc(session, 0, &key));
 	for (i = 0, supd = r->supd; i < r->supd_next; ++i, ++supd) {
 		/* The last block gets all remaining saved updates. */
 		if (last_block) {
@@ -3254,33 +3271,11 @@ supd_check_complete:
 	 * image, we can't actually write it. Instead, we will re-instantiate
 	 * the page using the disk image and any list of updates we skipped.
 	 */
-	if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
-	    (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)) {
-
-		/* Statistics tracking that we used update/restore. */
-		if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL)
-			r->cache_write_restore = true;
-
-		/*
-		 * If the buffer is compressed (raw compression was configured),
-		 * we have to decompress it so we can instantiate it later. It's
-		 * a slow and convoluted path, but it's also a rare one and it's
-		 * not worth making it faster. Else, the disk image is ready,
-		 * copy it into place for later. It's possible the disk image
-		 * has no items; we have to flag that for verification, it's a
-		 * special case since read/writing empty pages isn't generally
-		 * allowed.
-		 */
-		if (bnd->already_compressed)
-			WT_ERR(__rec_raw_decompress(
-			    session, buf->data, buf->size, &bnd->disk_image));
-		else {
-			WT_ERR(__wt_strndup(
-			    session, buf->data, buf->size, &bnd->disk_image));
-			WT_ASSERT(session, __wt_verify_dsk_image(session,
-			    "[evict split]", buf->data, buf->size, true) == 0);
-		}
-		goto done;
+	if (F_ISSET(r, WT_EVICT_IN_MEMORY))
+		goto copy_image;
+	if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL) {
+		r->cache_write_restore = true;
+		goto copy_image;
 	}
 
 	/*
@@ -3324,13 +3319,11 @@ supd_check_complete:
 				bnd->addr = multi->addr;
 
 				WT_STAT_FAST_DATA_INCR(session, rec_page_match);
-				goto done;
+				goto copy_image;
 			}
 		}
 	}
 
-	bnd->entries = r->entries;
-
 #ifdef HAVE_VERBOSE
 	/* Output a verbose message if we create a page without many entries */
 	if (WT_VERBOSE_ISSET(session, WT_VERB_SPLIT) && r->entries < 6)
@@ -3343,8 +3336,8 @@ supd_check_complete:
 		    r->bnd_state));
 #endif
 
-	WT_ERR(__wt_bt_write(session,
-	    buf, addr, &addr_size, false, bnd->already_compressed));
+	WT_ERR(__wt_bt_write(session, buf, addr, &addr_size,
+	    false, F_ISSET(r, WT_CHECKPOINTING), bnd->already_compressed));
 	WT_ERR(__wt_strndup(session, addr, addr_size, &bnd->addr.addr));
 	bnd->addr.size = (uint8_t)addr_size;
 
@@ -3354,9 +3347,29 @@ supd_check_complete:
 	 * the database's lookaside store.
 	 */
 	if (F_ISSET(r, WT_EVICT_LOOKASIDE) && bnd->supd != NULL)
-		ret = __rec_update_las(session, r, btree->id, bnd);
+		WT_ERR(__rec_update_las(session, r, btree->id, bnd));
+
+copy_image:
+	/*
+	 * If re-instantiating this page in memory (either because eviction
+	 * wants to, or because we skipped updates to build the disk image),
+	 * save a copy of the disk image.
+	 *
+	 * Raw compression might have already saved a copy of the disk image
+	 * before we could know if we skipped updates to create it, and now
+	 * we know if we're going to need it.
+	 *
+	 * Copy the disk image if we need a copy and don't already have one,
+	 * discard any already saved copy we don't need.
+	 */
+	need_image = F_ISSET(r, WT_EVICT_SCRUB) ||
+	    (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL);
+	if (need_image && bnd->disk_image == NULL)
+		WT_ERR(__wt_strndup(
+		    session, buf->data, buf->size, &bnd->disk_image));
+	if (!need_image)
+		__wt_free(session, bnd->disk_image);
 
-done:
 err:	__wt_scr_free(session, &key);
 	return (ret);
 }
@@ -3556,8 +3569,9 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 	WT_PAGE *parent;
 	WT_RECONCILE *r;
 
-	r = cbulk->reconcile;
 	btree = S2BT(session);
+	if ((r = cbulk->reconcile) == NULL)
+		return (0);
 
 	switch (btree->type) {
 	case BTREE_COL_FIX:
@@ -5601,9 +5615,10 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 			WT_RET(__wt_btree_block_free(session,
 			    mod->mod_replace.addr, mod->mod_replace.size));
 
-		/* Discard the replacement page's address. */
+		/* Discard the replacement page's address and disk image. */
 		__wt_free(session, mod->mod_replace.addr);
 		mod->mod_replace.size = 0;
+		__wt_free(session, mod->mod_disk_image);
 		break;
 	WT_ILLEGAL_VALUE(session);
 	}
@@ -5651,26 +5666,33 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		bnd = &r->bnd[0];
 
 		/*
-		 * If saving/restoring changes for this page and there's only
-		 * one block, there's nothing to write. This is an in-memory
-		 * configuration or a special case of forced eviction: set up
+		 * If in-memory, or saving/restoring changes for this page and
+		 * there's only one block, there's nothing to write. Set up
 		 * a single block as if to split, then use that disk image to
-		 * rewrite the page in memory.
+		 * rewrite the page in memory. This is separate from simple
+		 * replacements where eviction has decided to retain the page
+		 * in memory because the latter can't handle update lists and
+		 * splits can.
 		 */
-		if (bnd->disk_image != NULL)
+		if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
+		    (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && bnd->supd != NULL))
 			goto split;
 
 		/*
-		 * If this is a root page, then we don't have an address and we
-		 * have to create a sync point.  The address was cleared when
-		 * we were about to write the buffer so we know what to do here.
+		 * A root page, we don't have an address and we have to create
+		 * a sync point. The address was cleared when we were about to
+		 * write the buffer so we know what to do here.
 		 */
 		if (bnd->addr.addr == NULL)
 			WT_RET(__wt_bt_write(session, &r->disk_image,
-			    NULL, NULL, true, bnd->already_compressed));
+			    NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING),
+			    bnd->already_compressed));
 		else {
 			mod->mod_replace = bnd->addr;
 			bnd->addr.addr = NULL;
+
+			mod->mod_disk_image = bnd->disk_image;
+			bnd->disk_image = NULL;
 		}
 
 		mod->rec_result = WT_PM_REC_REPLACE;
@@ -5805,19 +5827,26 @@ __rec_split_row(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		WT_RET(__wt_row_ikey_alloc(session, 0,
 		    bnd->key.data, bnd->key.size, &multi->key.ikey));
 
-		if (bnd->disk_image == NULL) {
-			multi->addr = bnd->addr;
-			multi->addr.reuse = 0;
-			multi->size = bnd->size;
-			multi->cksum = bnd->cksum;
-			bnd->addr.addr = NULL;
-		} else {
+		/*
+		 * Copy any disk image.  Don't take saved updates without a
+		 * disk image (which happens if they have been saved to the
+		 * lookaside table): they should be discarded along with the
+		 * original page.
+		 */
+		multi->disk_image = bnd->disk_image;
+		bnd->disk_image = NULL;
+		if (multi->disk_image != NULL) {
 			multi->supd = bnd->supd;
 			multi->supd_entries = bnd->supd_next;
 			bnd->supd = NULL;
-			multi->disk_image = bnd->disk_image;
-			bnd->disk_image = NULL;
 		}
+
+		/* Copy any address. */
+		multi->addr = bnd->addr;
+		multi->addr.reuse = 0;
+		multi->size = bnd->size;
+		multi->cksum = bnd->cksum;
+		bnd->addr.addr = NULL;
 	}
 	mod->mod_multi_entries = r->bnd_next;
 
@@ -5845,19 +5874,26 @@ __rec_split_col(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 	    bnd = r->bnd, i = 0; i < r->bnd_next; ++multi, ++bnd, ++i) {
 		multi->key.recno = bnd->recno;
 
-		if (bnd->disk_image == NULL) {
-			multi->addr = bnd->addr;
-			multi->addr.reuse = 0;
-			multi->size = bnd->size;
-			multi->cksum = bnd->cksum;
-			bnd->addr.addr = NULL;
-		} else {
+		/*
+		 * Copy any disk image.  Don't take saved updates without a
+		 * disk image (which happens if they have been saved to the
+		 * lookaside table): they should be discarded along with the
+		 * original page.
+		 */
+		multi->disk_image = bnd->disk_image;
+		bnd->disk_image = NULL;
+		if (multi->disk_image != NULL) {
 			multi->supd = bnd->supd;
 			multi->supd_entries = bnd->supd_next;
 			bnd->supd = NULL;
-			multi->disk_image = bnd->disk_image;
-			bnd->disk_image = NULL;
 		}
+
+		/* Copy any address. */
+		multi->addr = bnd->addr;
+		multi->addr.reuse = 0;
+		multi->size = bnd->size;
+		multi->cksum = bnd->cksum;
+		bnd->addr.addr = NULL;
 	}
 	mod->mod_multi_entries = r->bnd_next;
 
@@ -6133,7 +6169,8 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
 
 		/* Write the buffer. */
 		addr = buf;
-		WT_ERR(__wt_bt_write(session, tmp, addr, &size, false, false));
+		WT_ERR(__wt_bt_write(session, tmp,
+		    addr, &size, false, F_ISSET(r, WT_CHECKPOINTING), false));
 
 		/*
 		 * Track the overflow record (unless it's a bulk load, which
diff --git a/src/schema/schema_rename.c b/src/schema/schema_rename.c
index 8f4d374fd22..bc92c882117 100644
--- a/src/schema/schema_rename.c
+++ b/src/schema/schema_rename.c
@@ -64,7 +64,7 @@ __rename_file(
 	WT_ERR(__wt_metadata_insert(session, newuri, oldvalue));
 
 	/* Rename the underlying file. */
-	WT_ERR(__wt_fs_rename(session, filename, newfile));
+	WT_ERR(__wt_fs_rename(session, filename, newfile, false));
 	if (WT_META_TRACKING(session))
 		WT_ERR(__wt_meta_track_fileop(session, uri, newuri));
 
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 77d1dc74c84..0072d7e1445 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -17,12 +17,8 @@ static int __session_rollback_transaction(WT_SESSION *, const char *);
  *	Unsupported session method.
  */
 int
-__wt_session_notsup(WT_SESSION *wt_session)
+__wt_session_notsup(WT_SESSION_IMPL *session)
 {
-	WT_SESSION_IMPL *session;
-
-	session = (WT_SESSION_IMPL *)wt_session;
-
 	WT_RET_MSG(session, ENOTSUP, "Unsupported session method");
 }
 
@@ -66,6 +62,17 @@ __wt_session_copy_values(WT_SESSION_IMPL *session)
 
 	TAILQ_FOREACH(cursor, &session->cursors, q)
 		if (F_ISSET(cursor, WT_CURSTD_VALUE_INT)) {
+#ifdef HAVE_DIAGNOSTIC
+			/*
+			 * We have to do this with a transaction ID pinned
+			 * unless the cursor is reading from a checkpoint.
+			 */
+			WT_TXN_STATE *txn_state = WT_SESSION_TXN_STATE(session);
+			WT_ASSERT(session, txn_state->snap_min != WT_TXN_NONE ||
+			   (WT_PREFIX_MATCH(cursor->uri, "file:") &&
+			   F_ISSET((WT_CURSOR_BTREE *)cursor, WT_CBT_NO_TXN)));
+#endif
+
 			F_CLR(cursor, WT_CURSTD_VALUE_INT);
 			WT_RET(__wt_buf_set(session, &cursor->value,
 			    cursor->value.data, cursor->value.size));
@@ -509,7 +516,11 @@ __session_create(WT_SESSION *wt_session, const char *uri, const char *config)
 
 	ret = __wt_session_create(session, uri, config);
 
-err:	API_END_RET_NOTFOUND_MAP(session, ret);
+err:	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_create_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_create_success);
+	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
 /*
@@ -520,10 +531,18 @@ static int
 __session_create_readonly(
     WT_SESSION *wt_session, const char *uri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, create);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_create_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -570,9 +589,16 @@ err:	API_END_RET(session, ret);
 static int
 __session_log_flush_readonly(WT_SESSION *wt_session, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, log_flush);
+
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -605,9 +631,16 @@ static int
 __session_log_printf_readonly(WT_SESSION *wt_session, const char *fmt, ...)
     WT_GCC_FUNC_ATTRIBUTE((format (printf, 2, 3)))
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(fmt);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, log_printf);
+
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -630,7 +663,12 @@ __session_rebalance(WT_SESSION *wt_session, const char *uri, const char *config)
 		ret = __wt_schema_worker(session, uri, __wt_bt_rebalance,
 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_REBALANCE)));
 
-err:	API_END_RET_NOTFOUND_MAP(session, ret);
+err:	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session,
+		    session_table_rebalance_success);
+	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
 /*
@@ -641,10 +679,18 @@ static int
 __session_rebalance_readonly(
     WT_SESSION *wt_session, const char *uri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, rebalance);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_rebalance_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -670,7 +716,11 @@ __session_rename(WT_SESSION *wt_session,
 		WT_WITH_TABLE_LOCK(session, ret,
 		    ret = __wt_schema_rename(session, uri, newuri, cfg))));
 
-err:	API_END_RET_NOTFOUND_MAP(session, ret);
+err:	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_rename_success);
+	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
 /*
@@ -681,11 +731,19 @@ static int
 __session_rename_readonly(WT_SESSION *wt_session,
     const char *uri, const char *newuri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(newuri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, rename);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_rename_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -733,8 +791,8 @@ __wt_session_drop(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
 		F_SET(session, WT_SESSION_LOCK_NO_WAIT);
 
 	/*
-	 * The checkpoint lock only is needed to avoid a spurious EBUSY error
-	 * return.
+	 * Take the checkpoint lock if there is a need to prevent the drop
+	 * operation from failing with EBUSY due to an ongoing checkpoint.
 	 */
 	if (checkpoint_wait)
 		WT_WITH_CHECKPOINT_LOCK(session, ret,
@@ -770,7 +828,12 @@ __session_drop(WT_SESSION *wt_session, const char *uri, const char *config)
 
 	ret = __wt_session_drop(session, uri, cfg);
 
-err:	/* Note: drop operations cannot be unrolled (yet?). */
+err:	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_drop_success);
+
+	/* Note: drop operations cannot be unrolled (yet?). */
 	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
@@ -782,10 +845,18 @@ static int
 __session_drop_readonly(
     WT_SESSION *wt_session, const char *uri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, drop);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_drop_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -943,7 +1014,11 @@ __session_salvage(WT_SESSION *wt_session, const char *uri, const char *config)
 		ret = __wt_schema_worker(session, uri, __wt_salvage,
 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_SALVAGE)));
 
-err:	API_END_RET_NOTFOUND_MAP(session, ret);
+err:	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_salvage_success);
+	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
 /*
@@ -954,10 +1029,18 @@ static int
 __session_salvage_readonly(
     WT_SESSION *wt_session, const char *uri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, salvage);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_salvage_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -1135,6 +1218,10 @@ __session_truncate(WT_SESSION *wt_session,
 
 err:	TXN_API_END_RETRY(session, ret, 0);
 
+	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_truncate_success);
 	/*
 	 * Only map WT_NOTFOUND to ENOENT if a URI was specified.
 	 */
@@ -1149,12 +1236,20 @@ static int
 __session_truncate_readonly(WT_SESSION *wt_session,
     const char *uri, WT_CURSOR *start, WT_CURSOR *stop, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(start);
 	WT_UNUSED(stop);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, truncate);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_truncate_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -1187,10 +1282,17 @@ static int
 __session_upgrade_readonly(
     WT_SESSION *wt_session, const char *uri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, upgrade);
+
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -1216,7 +1318,11 @@ __session_verify(WT_SESSION *wt_session, const char *uri, const char *config)
 		ret = __wt_schema_worker(session, uri, __wt_verify,
 		NULL, cfg, WT_DHANDLE_EXCLUSIVE | WT_BTREE_VERIFY)));
 
-err:	API_END_RET_NOTFOUND_MAP(session, ret);
+err:	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_verify_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_verify_success);
+	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
 /*
@@ -1421,9 +1527,16 @@ err:	API_END_RET(session, ret);
 static int
 __session_transaction_sync_readonly(WT_SESSION *wt_session, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, transaction_sync);
+
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
@@ -1481,9 +1594,16 @@ err:	API_END_RET_NOTFOUND_MAP(session, ret);
 static int
 __session_checkpoint_readonly(WT_SESSION *wt_session, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, checkpoint);
+
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
 
 /*
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 3f7b34d132f..47ed5298304 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -333,6 +333,10 @@ err:	session->compact = NULL;
 	 */
 	WT_TRET(__wt_session_release_resources(session));
 
+	if (ret != 0)
+		WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail);
+	else
+		WT_STAT_FAST_CONN_INCR(session, session_table_compact_success);
 	API_END_RET_NOTFOUND_MAP(session, ret);
 }
 
@@ -344,8 +348,16 @@ int
 __wt_session_compact_readonly(
     WT_SESSION *wt_session, const char *uri, const char *config)
 {
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
 	WT_UNUSED(uri);
 	WT_UNUSED(config);
 
-	return (__wt_session_notsup(wt_session));
+	session = (WT_SESSION_IMPL *)wt_session;
+	SESSION_API_CALL_NOCONF(session, compact);
+
+	WT_STAT_FAST_CONN_INCR(session, session_table_compact_fail);
+	ret = __wt_session_notsup(session);
+err:	API_END_RET(session, ret);
 }
diff --git a/src/support/err.c b/src/support/err.c
index 93c0af37328..60fc53cecd0 100644
--- a/src/support/err.c
+++ b/src/support/err.c
@@ -118,7 +118,13 @@ __handler_failure(WT_SESSION_IMPL *session,
 	    handler->handle_error(handler, wt_session, error, s) == 0)
 		return;
 
+	/*
+	 * In case there is a failure in the default error handler, make sure
+	 * we don't recursively try to report *that* error.
+	 */
+	session->event_handler = &__event_handler_default;
 	(void)__handle_error_default(NULL, wt_session, error, s);
+	session->event_handler = handler;
 }
 
 /*
diff --git a/src/support/stat.c b/src/support/stat.c
index d972f0c140f..49cb3bebc07 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -43,6 +43,7 @@ static const char * const __stats_dsrc_desc[] = {
 	"btree: pages rewritten by compaction",
 	"btree: row-store internal pages",
 	"btree: row-store leaf pages",
+	"cache: bytes currently in the cache",
 	"cache: bytes read into cache",
 	"cache: bytes written from cache",
 	"cache: checkpoint blocked page eviction",
@@ -173,6 +174,7 @@ __wt_stat_dsrc_clear_single(WT_DSRC_STATS *stats)
 	stats->btree_compact_rewrite = 0;
 	stats->btree_row_internal = 0;
 	stats->btree_row_leaf = 0;
+		/* not clearing cache_bytes_inuse */
 	stats->cache_bytes_read = 0;
 	stats->cache_bytes_write = 0;
 	stats->cache_eviction_checkpoint = 0;
@@ -300,6 +302,7 @@ __wt_stat_dsrc_aggregate_single(
 	to->btree_compact_rewrite += from->btree_compact_rewrite;
 	to->btree_row_internal += from->btree_row_internal;
 	to->btree_row_leaf += from->btree_row_leaf;
+	to->cache_bytes_inuse += from->cache_bytes_inuse;
 	to->cache_bytes_read += from->cache_bytes_read;
 	to->cache_bytes_write += from->cache_bytes_write;
 	to->cache_eviction_checkpoint += from->cache_eviction_checkpoint;
@@ -433,6 +436,7 @@ __wt_stat_dsrc_aggregate(
 	    WT_STAT_READ(from, btree_compact_rewrite);
 	to->btree_row_internal += WT_STAT_READ(from, btree_row_internal);
 	to->btree_row_leaf += WT_STAT_READ(from, btree_row_leaf);
+	to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
 	to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
 	to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
 	to->cache_eviction_checkpoint +=
@@ -542,9 +546,12 @@ static const char * const __stats_connection_desc[] = {
 	"block-manager: blocks written",
 	"block-manager: bytes read",
 	"block-manager: bytes written",
+	"block-manager: bytes written for checkpoint",
 	"block-manager: mapped blocks read",
 	"block-manager: mapped bytes read",
+	"cache: bytes belonging to page images in the cache",
 	"cache: bytes currently in the cache",
+	"cache: bytes not belonging to page images in the cache",
 	"cache: bytes read into cache",
 	"cache: bytes written from cache",
 	"cache: checkpoint blocked page eviction",
@@ -578,6 +585,8 @@ static const char * const __stats_connection_desc[] = {
 	"cache: maximum page size at eviction",
 	"cache: modified pages evicted",
 	"cache: modified pages evicted by application threads",
+	"cache: overflow pages read into cache",
+	"cache: overflow values cached in memory",
 	"cache: page split during eviction deepened the tree",
 	"cache: page written requiring lookaside records",
 	"cache: pages currently held in the cache",
@@ -586,6 +595,7 @@ static const char * const __stats_connection_desc[] = {
 	"cache: pages evicted by application threads",
 	"cache: pages queued for eviction",
 	"cache: pages queued for urgent eviction",
+	"cache: pages queued for urgent eviction during walk",
 	"cache: pages read into cache",
 	"cache: pages read into cache requiring lookaside entries",
 	"cache: pages requested from the cache",
@@ -597,7 +607,6 @@ static const char * const __stats_connection_desc[] = {
 	"cache: percentage overhead",
 	"cache: tracked bytes belonging to internal pages in the cache",
 	"cache: tracked bytes belonging to leaf pages in the cache",
-	"cache: tracked bytes belonging to overflow pages in the cache",
 	"cache: tracked dirty bytes in the cache",
 	"cache: tracked dirty pages in the cache",
 	"cache: unmodified pages evicted",
@@ -677,6 +686,22 @@ static const char * const __stats_connection_desc[] = {
 	"reconciliation: split objects currently awaiting free",
 	"session: open cursor count",
 	"session: open session count",
+	"session: table compact failed calls",
+	"session: table compact successful calls",
+	"session: table create failed calls",
+	"session: table create successful calls",
+	"session: table drop failed calls",
+	"session: table drop successful calls",
+	"session: table rebalance failed calls",
+	"session: table rebalance successful calls",
+	"session: table rename failed calls",
+	"session: table rename successful calls",
+	"session: table salvage failed calls",
+	"session: table salvage successful calls",
+	"session: table truncate failed calls",
+	"session: table truncate successful calls",
+	"session: table verify failed calls",
+	"session: table verify successful calls",
 	"thread-state: active filesystem fsync calls",
 	"thread-state: active filesystem read calls",
 	"thread-state: active filesystem write calls",
@@ -693,13 +718,13 @@ static const char * const __stats_connection_desc[] = {
 	"transaction: transaction checkpoint max time (msecs)",
 	"transaction: transaction checkpoint min time (msecs)",
 	"transaction: transaction checkpoint most recent time (msecs)",
+	"transaction: transaction checkpoint scrub dirty target",
+	"transaction: transaction checkpoint scrub time (msecs)",
 	"transaction: transaction checkpoint total time (msecs)",
 	"transaction: transaction checkpoints",
 	"transaction: transaction failures due to cache overflow",
 	"transaction: transaction fsync calls for checkpoint after allocating the transaction ID",
-	"transaction: transaction fsync calls for checkpoint before allocating the transaction ID",
 	"transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)",
-	"transaction: transaction fsync duration for checkpoint before allocating the transaction ID (usecs)",
 	"transaction: transaction range of IDs currently pinned",
 	"transaction: transaction range of IDs currently pinned by a checkpoint",
 	"transaction: transaction range of IDs currently pinned by named snapshots",
@@ -764,9 +789,12 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 	stats->block_write = 0;
 	stats->block_byte_read = 0;
 	stats->block_byte_write = 0;
+	stats->block_byte_write_checkpoint = 0;
 	stats->block_map_read = 0;
 	stats->block_byte_map_read = 0;
+		/* not clearing cache_bytes_image */
 		/* not clearing cache_bytes_inuse */
+		/* not clearing cache_bytes_other */
 	stats->cache_bytes_read = 0;
 	stats->cache_bytes_write = 0;
 	stats->cache_eviction_checkpoint = 0;
@@ -800,6 +828,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 		/* not clearing cache_eviction_maximum_page_size */
 	stats->cache_eviction_dirty = 0;
 	stats->cache_eviction_app_dirty = 0;
+	stats->cache_read_overflow = 0;
+	stats->cache_overflow_value = 0;
 	stats->cache_eviction_deepen = 0;
 	stats->cache_write_lookaside = 0;
 		/* not clearing cache_pages_inuse */
@@ -807,6 +837,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 	stats->cache_eviction_force_delete = 0;
 	stats->cache_eviction_app = 0;
 	stats->cache_eviction_pages_queued = 0;
+	stats->cache_eviction_pages_queued_urgent = 0;
 	stats->cache_eviction_pages_queued_oldest = 0;
 	stats->cache_read = 0;
 	stats->cache_read_lookaside = 0;
@@ -819,7 +850,6 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 		/* not clearing cache_overhead */
 		/* not clearing cache_bytes_internal */
 		/* not clearing cache_bytes_leaf */
-		/* not clearing cache_bytes_overflow */
 		/* not clearing cache_bytes_dirty */
 		/* not clearing cache_pages_dirty */
 	stats->cache_eviction_clean = 0;
@@ -899,9 +929,25 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 		/* not clearing rec_split_stashed_objects */
 		/* not clearing session_cursor_open */
 		/* not clearing session_open */
-		/* not clearing fsync_active */
-		/* not clearing read_active */
-		/* not clearing write_active */
+		/* not clearing session_table_compact_fail */
+		/* not clearing session_table_compact_success */
+		/* not clearing session_table_create_fail */
+		/* not clearing session_table_create_success */
+		/* not clearing session_table_drop_fail */
+		/* not clearing session_table_drop_success */
+		/* not clearing session_table_rebalance_fail */
+		/* not clearing session_table_rebalance_success */
+		/* not clearing session_table_rename_fail */
+		/* not clearing session_table_rename_success */
+		/* not clearing session_table_salvage_fail */
+		/* not clearing session_table_salvage_success */
+		/* not clearing session_table_truncate_fail */
+		/* not clearing session_table_truncate_success */
+		/* not clearing session_table_verify_fail */
+		/* not clearing session_table_verify_success */
+		/* not clearing thread_fsync_active */
+		/* not clearing thread_read_active */
+		/* not clearing thread_write_active */
 	stats->page_busy_blocked = 0;
 	stats->page_forcible_evict_blocked = 0;
 	stats->page_locked_blocked = 0;
@@ -915,13 +961,13 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 		/* not clearing txn_checkpoint_time_max */
 		/* not clearing txn_checkpoint_time_min */
 		/* not clearing txn_checkpoint_time_recent */
+		/* not clearing txn_checkpoint_scrub_target */
+		/* not clearing txn_checkpoint_scrub_time */
 		/* not clearing txn_checkpoint_time_total */
 	stats->txn_checkpoint = 0;
 	stats->txn_fail_cache = 0;
 	stats->txn_checkpoint_fsync_post = 0;
-	stats->txn_checkpoint_fsync_pre = 0;
-	stats->txn_checkpoint_fsync_post_duration = 0;
-	stats->txn_checkpoint_fsync_pre_duration = 0;
+		/* not clearing txn_checkpoint_fsync_post_duration */
 		/* not clearing txn_pinned_range */
 		/* not clearing txn_pinned_checkpoint_range */
 		/* not clearing txn_pinned_snapshot_range */
@@ -978,9 +1024,13 @@ __wt_stat_connection_aggregate(
 	to->block_write += WT_STAT_READ(from, block_write);
 	to->block_byte_read += WT_STAT_READ(from, block_byte_read);
 	to->block_byte_write += WT_STAT_READ(from, block_byte_write);
+	to->block_byte_write_checkpoint +=
+	    WT_STAT_READ(from, block_byte_write_checkpoint);
 	to->block_map_read += WT_STAT_READ(from, block_map_read);
 	to->block_byte_map_read += WT_STAT_READ(from, block_byte_map_read);
+	to->cache_bytes_image += WT_STAT_READ(from, cache_bytes_image);
 	to->cache_bytes_inuse += WT_STAT_READ(from, cache_bytes_inuse);
+	to->cache_bytes_other += WT_STAT_READ(from, cache_bytes_other);
 	to->cache_bytes_read += WT_STAT_READ(from, cache_bytes_read);
 	to->cache_bytes_write += WT_STAT_READ(from, cache_bytes_write);
 	to->cache_eviction_checkpoint +=
@@ -1039,6 +1089,8 @@ __wt_stat_connection_aggregate(
 	to->cache_eviction_dirty += WT_STAT_READ(from, cache_eviction_dirty);
 	to->cache_eviction_app_dirty +=
 	    WT_STAT_READ(from, cache_eviction_app_dirty);
+	to->cache_read_overflow += WT_STAT_READ(from, cache_read_overflow);
+	to->cache_overflow_value += WT_STAT_READ(from, cache_overflow_value);
 	to->cache_eviction_deepen +=
 	    WT_STAT_READ(from, cache_eviction_deepen);
 	to->cache_write_lookaside +=
@@ -1050,6 +1102,8 @@ __wt_stat_connection_aggregate(
 	to->cache_eviction_app += WT_STAT_READ(from, cache_eviction_app);
 	to->cache_eviction_pages_queued +=
 	    WT_STAT_READ(from, cache_eviction_pages_queued);
+	to->cache_eviction_pages_queued_urgent +=
+	    WT_STAT_READ(from, cache_eviction_pages_queued_urgent);
 	to->cache_eviction_pages_queued_oldest +=
 	    WT_STAT_READ(from, cache_eviction_pages_queued_oldest);
 	to->cache_read += WT_STAT_READ(from, cache_read);
@@ -1065,7 +1119,6 @@ __wt_stat_connection_aggregate(
 	to->cache_overhead += WT_STAT_READ(from, cache_overhead);
 	to->cache_bytes_internal += WT_STAT_READ(from, cache_bytes_internal);
 	to->cache_bytes_leaf += WT_STAT_READ(from, cache_bytes_leaf);
-	to->cache_bytes_overflow += WT_STAT_READ(from, cache_bytes_overflow);
 	to->cache_bytes_dirty += WT_STAT_READ(from, cache_bytes_dirty);
 	to->cache_pages_dirty += WT_STAT_READ(from, cache_pages_dirty);
 	to->cache_eviction_clean += WT_STAT_READ(from, cache_eviction_clean);
@@ -1151,9 +1204,41 @@ __wt_stat_connection_aggregate(
 	    WT_STAT_READ(from, rec_split_stashed_objects);
 	to->session_cursor_open += WT_STAT_READ(from, session_cursor_open);
 	to->session_open += WT_STAT_READ(from, session_open);
-	to->fsync_active += WT_STAT_READ(from, fsync_active);
-	to->read_active += WT_STAT_READ(from, read_active);
-	to->write_active += WT_STAT_READ(from, write_active);
+	to->session_table_compact_fail +=
+	    WT_STAT_READ(from, session_table_compact_fail);
+	to->session_table_compact_success +=
+	    WT_STAT_READ(from, session_table_compact_success);
+	to->session_table_create_fail +=
+	    WT_STAT_READ(from, session_table_create_fail);
+	to->session_table_create_success +=
+	    WT_STAT_READ(from, session_table_create_success);
+	to->session_table_drop_fail +=
+	    WT_STAT_READ(from, session_table_drop_fail);
+	to->session_table_drop_success +=
+	    WT_STAT_READ(from, session_table_drop_success);
+	to->session_table_rebalance_fail +=
+	    WT_STAT_READ(from, session_table_rebalance_fail);
+	to->session_table_rebalance_success +=
+	    WT_STAT_READ(from, session_table_rebalance_success);
+	to->session_table_rename_fail +=
+	    WT_STAT_READ(from, session_table_rename_fail);
+	to->session_table_rename_success +=
+	    WT_STAT_READ(from, session_table_rename_success);
+	to->session_table_salvage_fail +=
+	    WT_STAT_READ(from, session_table_salvage_fail);
+	to->session_table_salvage_success +=
+	    WT_STAT_READ(from, session_table_salvage_success);
+	to->session_table_truncate_fail +=
+	    WT_STAT_READ(from, session_table_truncate_fail);
+	to->session_table_truncate_success +=
+	    WT_STAT_READ(from, session_table_truncate_success);
+	to->session_table_verify_fail +=
+	    WT_STAT_READ(from, session_table_verify_fail);
+	to->session_table_verify_success +=
+	    WT_STAT_READ(from, session_table_verify_success);
+	to->thread_fsync_active += WT_STAT_READ(from, thread_fsync_active);
+	to->thread_read_active += WT_STAT_READ(from, thread_read_active);
+	to->thread_write_active += WT_STAT_READ(from, thread_write_active);
 	to->page_busy_blocked += WT_STAT_READ(from, page_busy_blocked);
 	to->page_forcible_evict_blocked +=
 	    WT_STAT_READ(from, page_forcible_evict_blocked);
@@ -1175,18 +1260,18 @@ __wt_stat_connection_aggregate(
 	    WT_STAT_READ(from, txn_checkpoint_time_min);
 	to->txn_checkpoint_time_recent +=
 	    WT_STAT_READ(from, txn_checkpoint_time_recent);
+	to->txn_checkpoint_scrub_target +=
+	    WT_STAT_READ(from, txn_checkpoint_scrub_target);
+	to->txn_checkpoint_scrub_time +=
+	    WT_STAT_READ(from, txn_checkpoint_scrub_time);
 	to->txn_checkpoint_time_total +=
 	    WT_STAT_READ(from, txn_checkpoint_time_total);
 	to->txn_checkpoint += WT_STAT_READ(from, txn_checkpoint);
 	to->txn_fail_cache += WT_STAT_READ(from, txn_fail_cache);
 	to->txn_checkpoint_fsync_post +=
 	    WT_STAT_READ(from, txn_checkpoint_fsync_post);
-	to->txn_checkpoint_fsync_pre +=
-	    WT_STAT_READ(from, txn_checkpoint_fsync_pre);
 	to->txn_checkpoint_fsync_post_duration +=
 	    WT_STAT_READ(from, txn_checkpoint_fsync_post_duration);
-	to->txn_checkpoint_fsync_pre_duration +=
-	    WT_STAT_READ(from, txn_checkpoint_fsync_pre_duration);
 	to->txn_pinned_range += WT_STAT_READ(from, txn_pinned_range);
 	to->txn_pinned_checkpoint_range +=
 	    WT_STAT_READ(from, txn_pinned_checkpoint_range);
diff --git a/src/txn/txn.c b/src/txn/txn.c
index dd4384d9a9a..87b74433769 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -124,6 +124,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
 	txn = &session->txn;
 	txn_global = &conn->txn_global;
 	txn_state = WT_SESSION_TXN_STATE(session);
+	n = 0;
 
 	/*
 	 * Spin waiting for the lock: the sleeps in our blocking readlock
@@ -137,20 +138,26 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
 	current_id = snap_min = txn_global->current;
 	prev_oldest_id = txn_global->oldest_id;
 
+	/*
+	 * Include the checkpoint transaction, if one is running: we should
+	 * ignore any uncommitted changes the checkpoint has written to the
+	 * metadata.  We don't have to keep the checkpoint's changes pinned so
+	 * don't including it in the published snap_min.
+	 */
+	if ((id = txn_global->checkpoint_txnid) != WT_TXN_NONE)
+		txn->snapshot[n++] = id;
+
 	/* For pure read-only workloads, avoid scanning. */
 	if (prev_oldest_id == current_id) {
 		txn_state->snap_min = current_id;
-		__txn_sort_snapshot(session, 0, current_id);
-
 		/* Check that the oldest ID has not moved in the meantime. */
 		WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
-		WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
-		return (0);
+		goto done;
 	}
 
 	/* Walk the array of concurrent transactions. */
 	WT_ORDERED_READ(session_cnt, conn->session_cnt);
-	for (i = n = 0, s = txn_global->states; i < session_cnt; i++, s++) {
+	for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
 		/*
 		 * Build our snapshot of any concurrent transaction IDs.
 		 *
@@ -178,8 +185,7 @@ __wt_txn_get_snapshot(WT_SESSION_IMPL *session)
 	WT_ASSERT(session, prev_oldest_id == txn_global->oldest_id);
 	txn_state->snap_min = snap_min;
 
-	WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
-
+done:	WT_RET(__wt_readunlock(session, txn_global->scan_rwlock));
 	__txn_sort_snapshot(session, n, current_id);
 	return (0);
 }
@@ -433,18 +439,22 @@ __wt_txn_release(WT_SESSION_IMPL *session)
 	WT_TXN_STATE *txn_state;
 
 	txn = &session->txn;
-	WT_ASSERT(session, txn->mod_count == 0);
-	txn->notify = NULL;
-
 	txn_global = &S2C(session)->txn_global;
 	txn_state = WT_SESSION_TXN_STATE(session);
 
+	WT_ASSERT(session, txn->mod_count == 0);
+	txn->notify = NULL;
+
 	/* Clear the transaction's ID from the global table. */
 	if (WT_SESSION_IS_CHECKPOINT(session)) {
 		WT_ASSERT(session, txn_state->id == WT_TXN_NONE);
-		txn->id = WT_TXN_NONE;
+		txn->id = txn_global->checkpoint_txnid = WT_TXN_NONE;
 
-		/* Clear the global checkpoint transaction IDs. */
+		/*
+		 * Be extra careful to cleanup everything for checkpoints: once
+		 * the global checkpoint ID is cleared, we can no longer tell
+		 * if this session is doing a checkpoint.
+		 */
 		txn_global->checkpoint_id = 0;
 		txn_global->checkpoint_pinned = WT_TXN_NONE;
 	} else if (F_ISSET(txn, WT_TXN_HAS_ID)) {
@@ -470,6 +480,7 @@ __wt_txn_release(WT_SESSION_IMPL *session)
 	 */
 	__wt_txn_release_snapshot(session);
 	txn->isolation = session->isolation;
+
 	/* Ensure the transaction flags are cleared on exit */
 	txn->flags = 0;
 }
@@ -487,10 +498,12 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_TXN *txn;
 	WT_TXN_OP *op;
 	u_int i;
+	bool did_update;
 
 	txn = &session->txn;
 	conn = S2C(session);
-	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
+	did_update = txn->mod_count != 0;
+	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || !did_update);
 
 	if (!F_ISSET(txn, WT_TXN_RUNNING))
 		WT_RET_MSG(session, EINVAL, "No transaction is active");
@@ -540,8 +553,18 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 		WT_TRET(txn->notify->notify(txn->notify,
 		    (WT_SESSION *)session, txn->id, 1));
 
+	/*
+	 * We are about to release the snapshot: copy values into any
+	 * positioned cursors so they don't point to updates that could be
+	 * freed once we don't have a snapshot.
+	 */
+	if (session->ncursors > 0) {
+		WT_DIAGNOSTIC_YIELD;
+		WT_RET(__wt_session_copy_values(session));
+	}
+
 	/* If we are logging, write a commit log record. */
-	if (ret == 0 && txn->mod_count > 0 &&
+	if (ret == 0 && did_update &&
 	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) &&
 	    !F_ISSET(session, WT_SESSION_NO_LOGGING)) {
 		/*
@@ -569,14 +592,6 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 		__wt_txn_op_free(session, op);
 	txn->mod_count = 0;
 
-	/*
-	 * We are about to release the snapshot: copy values into any
-	 * positioned cursors so they don't point to updates that could be
-	 * freed once we don't have a transaction ID pinned.
-	 */
-	if (session->ncursors > 0)
-		WT_RET(__wt_session_copy_values(session));
-
 	__wt_txn_release(session);
 	return (0);
 }
diff --git a/src/txn/txn_ckpt.c b/src/txn/txn_ckpt.c
index 51d26b9aed6..c23f293154a 100644
--- a/src/txn/txn_ckpt.c
+++ b/src/txn/txn_ckpt.c
@@ -10,14 +10,16 @@
 
 static int __checkpoint_lock_tree(
     WT_SESSION_IMPL *, bool, bool, const char *[]);
+static int __checkpoint_mark_deletes(WT_SESSION_IMPL *, const char *[]);
+static int __checkpoint_presync(WT_SESSION_IMPL *, const char *[]);
 static int __checkpoint_tree_helper(WT_SESSION_IMPL *, const char *[]);
 
 /*
- * __wt_checkpoint_name_ok --
+ * __checkpoint_name_ok --
  *	Complain if the checkpoint name isn't acceptable.
  */
-int
-__wt_checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
+static int
+__checkpoint_name_ok(WT_SESSION_IMPL *session, const char *name, size_t len)
 {
 	/* Check for characters we don't want to see in a metadata file. */
 	WT_RET(__wt_name_check(session, name, len));
@@ -107,7 +109,7 @@ __checkpoint_apply_all(WT_SESSION_IMPL *session, const char *cfg[],
 	WT_RET(__wt_config_gets(session, cfg, "name", &cval));
 	named = cval.len != 0;
 	if (named)
-		WT_RET(__wt_checkpoint_name_ok(session, cval.str, cval.len));
+		WT_RET(__checkpoint_name_ok(session, cval.str, cval.len));
 
 	/* Step through the targets and optionally operate on each one. */
 	WT_ERR(__wt_config_gets(session, cfg, "target", &cval));
@@ -183,6 +185,8 @@ __checkpoint_apply(WT_SESSION_IMPL *session, const char *cfg[],
 
 	/* If we have already locked the handles, apply the operation. */
 	for (i = 0; i < session->ckpt_handle_next; ++i) {
+		if (session->ckpt_handle[i] == NULL)
+			continue;
 		WT_WITH_DHANDLE(session, session->ckpt_handle[i],
 		    ret = (*op)(session, cfg));
 		WT_RET(ret);
@@ -234,6 +238,7 @@ __checkpoint_data_source(WT_SESSION_IMPL *session, const char *cfg[])
 int
 __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
 {
+	WT_BTREE *btree;
 	WT_DECL_RET;
 	const char *name;
 
@@ -258,6 +263,14 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
 	if ((ret = __wt_session_get_btree(session, name, NULL, NULL, 0)) != 0)
 		return (ret == EBUSY ? 0 : ret);
 
+	/*
+	 * Save the current eviction walk setting: checkpoint can interfere
+	 * with eviction and we don't want to unfairly penalize (or promote)
+	 * eviction in trees due to checkpoints.
+	 */
+	btree = S2BT(session);
+	btree->evict_walk_saved = btree->evict_walk_period;
+
 	WT_SAVE_DHANDLE(session,
 	    ret = __checkpoint_lock_tree(session, true, true, cfg));
 	if (ret != 0) {
@@ -265,20 +278,164 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
 		return (ret);
 	}
 
+	/*
+	 * Flag that the handle is part of a checkpoint for the purposes
+	 * of transaction visibility checks.
+	 */
+	WT_PUBLISH(btree->include_checkpoint_txn, true);
+
 	session->ckpt_handle[session->ckpt_handle_next++] = session->dhandle;
 	return (0);
 }
 
 /*
- * __checkpoint_write_leaves --
- *	Write any dirty leaf pages for all checkpoint handles.
+ * __checkpoint_update_generation --
+ *	Update the checkpoint generation of the current tree.
+ *
+ *	This indicates that the tree will not be visited again by the current
+ *	checkpoint.
+ */
+static void
+__checkpoint_update_generation(WT_SESSION_IMPL *session)
+{
+	WT_BTREE *btree;
+
+	btree = S2BT(session);
+	if (!WT_IS_METADATA(session, session->dhandle))
+		WT_PUBLISH(btree->include_checkpoint_txn, false);
+
+	WT_PUBLISH(btree->checkpoint_gen,
+	    S2C(session)->txn_global.checkpoint_gen);
+	WT_STAT_FAST_DATA_SET(session,
+	    btree_checkpoint_generation, btree->checkpoint_gen);
+}
+
+/*
+ * __checkpoint_reduce_dirty_cache --
+ *	Release clean trees from the list cached for checkpoints.
  */
 static int
-__checkpoint_write_leaves(WT_SESSION_IMPL *session, const char *cfg[])
+__checkpoint_reduce_dirty_cache(WT_SESSION_IMPL *session)
 {
-	WT_UNUSED(cfg);
+	WT_CACHE *cache;
+	WT_CONNECTION_IMPL *conn;
+	struct timespec start, last, stop;
+	u_int current_dirty;
+	uint64_t bytes_written_last, bytes_written_start, bytes_written_total;
+	uint64_t current_us, stepdown_us, total_ms;
+	bool progress;
+
+	conn = S2C(session);
+	cache = conn->cache;
+
+	WT_RET(__wt_epoch(session, &start));
+	last = start;
+	bytes_written_last = 0;
+	bytes_written_start = cache->bytes_written;
+	stepdown_us = 10000;
+	progress = false;
+
+	/* Step down the dirty target to the eviction trigger */
+	for (;;) {
+		current_dirty = (u_int)((100 *
+		    __wt_cache_dirty_leaf_inuse(cache)) / conn->cache_size);
+		if (current_dirty <= cache->eviction_dirty_target)
+			break;
+
+		__wt_sleep(0, stepdown_us / 4);
+		WT_RET(__wt_epoch(session, &stop));
+		current_us = WT_TIMEDIFF_US(stop, last);
+		total_ms = WT_TIMEDIFF_MS(stop, start);
+		bytes_written_total =
+		    cache->bytes_written - bytes_written_start;
 
-	return (__wt_cache_op(session, WT_SYNC_WRITE_LEAVES));
+		/*
+		 * Estimate how long the next step down of 1% of dirty data
+		 * should take.
+		 *
+		 * The calculation here assumes that the system is writing from
+		 * cache as fast as it can, and determines the write throughput
+		 * based on the change in the bytes written from cache since
+		 * the start of the call.  We use that to estimate how long it
+		 * will take to step the dirty target down by 1%.
+		 *
+		 * Take care to avoid dividing by zero.
+		 */
+		if (bytes_written_total - bytes_written_last > WT_MEGABYTE &&
+		    bytes_written_total > total_ms && total_ms > 0 &&
+		    (!progress ||
+		    current_dirty <= cache->eviction_dirty_trigger)) {
+			stepdown_us = (uint64_t)(WT_THOUSAND * (
+			    (double)(conn->cache_size / 100) /
+			    (double)(bytes_written_total / total_ms)));
+			if (!progress)
+				stepdown_us = WT_MIN(stepdown_us, 200000);
+		}
+
+		bytes_written_last = bytes_written_total;
+
+		if (current_dirty <= cache->eviction_dirty_trigger) {
+			progress = true;
+
+			/*
+			 * Smooth out step down: try to limit the impact on
+			 * performance to 10% by waiting once we reach the last
+			 * level.
+			 */
+			__wt_sleep(0, 10 * stepdown_us);
+			cache->eviction_dirty_trigger = current_dirty - 1;
+			WT_STAT_FAST_CONN_SET(session,
+			    txn_checkpoint_scrub_target, current_dirty - 1);
+			WT_RET(__wt_epoch(session, &last));
+			continue;
+		}
+
+		/*
+		 * We haven't reached the current target.
+		 *
+		 * Don't wait indefinitely: there might be dirty pages that
+		 * can't be evicted.  If we can't meet the target, give up
+		 * and start the checkpoint for real.
+		 */
+		if (current_us > 10 * stepdown_us)
+			break;
+	}
+
+	WT_RET(__wt_epoch(session, &stop));
+	total_ms = WT_TIMEDIFF_MS(stop, start);
+	WT_STAT_FAST_CONN_SET(session, txn_checkpoint_scrub_time, total_ms);
+
+	return (0);
+}
+
+/*
+ * __checkpoint_release_clean_trees --
+ *	Release clean trees from the list cached for checkpoints.
+ */
+static int
+__checkpoint_release_clean_trees(WT_SESSION_IMPL *session)
+{
+	WT_BTREE *btree;
+	WT_DATA_HANDLE *dhandle;
+	WT_DECL_RET;
+	u_int i;
+
+	for (i = 0; i < session->ckpt_handle_next; i++) {
+		dhandle = session->ckpt_handle[i];
+		btree = dhandle->handle;
+		if (!F_ISSET(btree, WT_BTREE_SKIP_CKPT))
+			continue;
+		__wt_meta_ckptlist_free(session, btree->ckpt);
+		btree->ckpt = NULL;
+		WT_WITH_DHANDLE(session, dhandle,
+		    __checkpoint_update_generation(session));
+		session->ckpt_handle[i] = NULL;
+		WT_WITH_DHANDLE(session, dhandle,
+		    ret = __wt_session_release_btree(session));
+		WT_RET(ret);
+	}
+
+	return (0);
 }
 
 /*
@@ -352,6 +509,7 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 {
 	struct timespec fsync_start, fsync_stop;
 	struct timespec start, stop, verb_timer;
+	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_TXN *txn;
@@ -359,13 +517,15 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_TXN_ISOLATION saved_isolation;
 	WT_TXN_STATE *txn_state;
 	void *saved_meta_next;
-	u_int i;
+	u_int i, orig_trigger;
 	uint64_t fsync_duration_usecs;
 	bool full, idle, logging, tracking;
 	const char *txn_cfg[] = { WT_CONFIG_BASE(session,
 	    WT_SESSION_begin_transaction), "isolation=snapshot", NULL };
 
 	conn = S2C(session);
+	cache = conn->cache;
+	orig_trigger = cache->eviction_dirty_trigger;
 	txn = &session->txn;
 	txn_global = &conn->txn_global;
 	txn_state = WT_SESSION_TXN_STATE(session);
@@ -384,21 +544,8 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	/* Configure logging only if doing a full checkpoint. */
 	logging = FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED);
 
-	/* Keep track of handles acquired for locking. */
-	WT_ERR(__wt_meta_track_on(session));
-	tracking = true;
-
-	/*
-	 * Get a list of handles we want to flush; this may pull closed objects
-	 * into the session cache, but we're going to do that eventually anyway.
-	 */
-	WT_ASSERT(session, session->ckpt_handle_next == 0);
-	WT_WITH_SCHEMA_LOCK(session, ret,
-	    WT_WITH_TABLE_LOCK(session, ret,
-		WT_WITH_HANDLE_LIST_LOCK(session,
-		    ret = __checkpoint_apply_all(
-		    session, cfg, __wt_checkpoint_get_handles, NULL))));
-	WT_ERR(ret);
+	/* Reset the maximum page size seen by eviction. */
+	conn->cache->evict_max_page_size = 0;
 
 	/*
 	 * Update the global oldest ID so we do all possible cleanup.
@@ -412,28 +559,11 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	/* Flush data-sources before we start the checkpoint. */
 	WT_ERR(__checkpoint_data_source(session, cfg));
 
-	WT_ERR(__wt_epoch(session, &verb_timer));
-	WT_ERR(__checkpoint_verbose_track(session,
-	    "starting write leaves", &verb_timer));
-
-	/* Flush dirty leaf pages before we start the checkpoint. */
-	session->isolation = txn->isolation = WT_ISO_READ_COMMITTED;
-	WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_write_leaves));
-
 	/*
-	 * The underlying flush routine scheduled an asynchronous flush
-	 * after writing the leaf pages, but in order to minimize I/O
-	 * while holding the schema lock, do a flush and wait for the
-	 * completion. Do it after flushing the pages to give the
-	 * asynchronous flush as much time as possible before we wait.
+	 * Try to reduce the amount of dirty data in cache so there is less
+	 * work do during the critical section of the checkpoint.
 	 */
-	WT_ERR(__wt_epoch(session, &fsync_start));
-	WT_ERR(__checkpoint_apply(session, cfg, __wt_checkpoint_sync));
-	WT_ERR(__wt_epoch(session, &fsync_stop));
-	fsync_duration_usecs = WT_TIMEDIFF_US(fsync_stop, fsync_start);
-	WT_STAT_FAST_CONN_INCR(session, txn_checkpoint_fsync_pre);
-	WT_STAT_FAST_CONN_INCRV(session,
-	    txn_checkpoint_fsync_pre_duration, fsync_duration_usecs);
+	WT_ERR(__checkpoint_reduce_dirty_cache(session));
 
 	/* Tell logging that we are about to start a database checkpoint. */
 	if (full && logging)
@@ -462,6 +592,36 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_STAT_FAST_CONN_SET(session,
 	    txn_checkpoint_generation, txn_global->checkpoint_gen);
 
+	/* Keep track of handles acquired for locking. */
+	WT_ERR(__wt_meta_track_on(session));
+	tracking = true;
+
+	/*
+	 * Get a list of handles we want to flush; for named checkpoints this
+	 * may pull closed objects into the session cache.
+	 *
+	 * We want to skip checkpointing clean handles whenever possible.  That
+	 * is, when the checkpoint is not named or forced.  However, we need to
+	 * take care about ordering with respect to the checkpoint transaction.
+	 *
+	 * If we skip clean handles before starting the transaction, the
+	 * checkpoint can miss updates in trees that become dirty as the
+	 * checkpoint is starting.  If we wait until the transaction has
+	 * started before locking a handle, there could be a metadata-changing
+	 * operation in between (e.g., salvage) that will cause a write
+	 * conflict when the checkpoint goes to write the metadata.
+	 *
+	 * First, gather all handles, then start the checkpoint transaction,
+	 * then release any clean handles.
+	 */
+	WT_ASSERT(session, session->ckpt_handle_next == 0);
+	WT_WITH_SCHEMA_LOCK(session, ret,
+	    WT_WITH_TABLE_LOCK(session, ret,
+		WT_WITH_HANDLE_LIST_LOCK(session,
+		    ret = __checkpoint_apply_all(
+		    session, cfg, __wt_checkpoint_get_handles, NULL))));
+	WT_ERR(ret);
+
 	/*
 	 * Start a snapshot transaction for the checkpoint.
 	 *
@@ -475,21 +635,22 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_ERR(__wt_txn_id_check(session));
 
 	/*
-	 * Save the checkpoint session ID.  We never do checkpoints in the
-	 * default session (with id zero).
+	 * Save the checkpoint session ID.
+	 *
+	 * We never do checkpoints in the default session (with id zero).
 	 */
 	WT_ASSERT(session, session->id != 0 && txn_global->checkpoint_id == 0);
 	txn_global->checkpoint_id = session->id;
 
-	txn_global->checkpoint_pinned =
-	    WT_MIN(txn_state->id, txn_state->snap_min);
-
 	/*
-	 * We're about to clear the checkpoint transaction from the global
-	 * state table so the oldest ID can move forward.  Make sure everything
-	 * we've done above is scheduled.
+	 * Remove the checkpoint transaction from the global table.
+	 *
+	 * This allows ordinary visibility checks to move forward because
+	 * checkpoints often take a long time and only write to the metadata.
 	 */
-	WT_FULL_BARRIER();
+	WT_ERR(__wt_writelock(session, txn_global->scan_rwlock));
+	txn_global->checkpoint_txnid = txn->id;
+	txn_global->checkpoint_pinned = WT_MIN(txn->id, txn->snap_min);
 
 	/*
 	 * Sanity check that the oldest ID hasn't moved on before we have
@@ -507,6 +668,25 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	 * details).
 	 */
 	txn_state->id = txn_state->snap_min = WT_TXN_NONE;
+	WT_ERR(__wt_writeunlock(session, txn_global->scan_rwlock));
+
+	/*
+	 * Unblock updates -- we can figure out that any updates to clean pages
+	 * after this point are too new to be written in the checkpoint.
+	 */
+	cache->eviction_dirty_trigger = orig_trigger;
+	WT_STAT_FAST_CONN_SET(
+	    session, txn_checkpoint_scrub_target, orig_trigger);
+
+	/*
+	 * Mark old checkpoints that are being deleted and figure out which
+	 * trees we can skip in this checkpoint.
+	 *
+	 * Release clean trees.  Any updates made after this point will not
+	 * visible to the checkpoint transaction.
+	 */
+	WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_mark_deletes));
+	WT_ERR(__checkpoint_release_clean_trees(session));
 
 	/* Tell logging that we have started a database checkpoint. */
 	if (full && logging)
@@ -522,9 +702,13 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	 */
 	session->dhandle = NULL;
 
-	/* Release the snapshot so we aren't pinning pages in cache. */
+	/* Release the snapshot so we aren't pinning updates in cache. */
 	__wt_txn_release_snapshot(session);
 
+	/* Mark all trees as open for business (particularly eviction). */
+	WT_ERR(__checkpoint_apply(session, cfg, __checkpoint_presync));
+	WT_ERR(__wt_evict_server_wake(session));
+
 	WT_ERR(__checkpoint_verbose_track(session,
 	    "committing transaction", &verb_timer));
 
@@ -587,6 +771,12 @@ __txn_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 		    ret = __wt_txn_checkpoint_log(
 		    session, false, WT_TXN_LOG_CKPT_SYNC, NULL));
 
+	/*
+	 * Now that the metadata is stable, re-open the metadata file for
+	 * regular eviction by clearing the checkpoint_pinned flag.
+	 */
+	txn_global->checkpoint_pinned = WT_TXN_NONE;
+
 	if (full) {
 		WT_ERR(__wt_epoch(session, &stop));
 		__checkpoint_stats(session, &start, &stop);
@@ -609,6 +799,10 @@ err:	/*
 	if (tracking)
 		WT_TRET(__wt_meta_track_off(session, false, ret != 0));
 
+	cache->eviction_dirty_trigger = orig_trigger;
+	WT_STAT_FAST_CONN_SET(
+	    session, txn_checkpoint_scrub_target, orig_trigger);
+
 	if (F_ISSET(txn, WT_TXN_RUNNING)) {
 		/*
 		 * Clear the dhandle so the visibility check doesn't get
@@ -634,9 +828,12 @@ err:	/*
 		    WT_TXN_LOG_CKPT_STOP : WT_TXN_LOG_CKPT_CLEANUP, NULL));
 	}
 
-	for (i = 0; i < session->ckpt_handle_next; ++i)
+	for (i = 0; i < session->ckpt_handle_next; ++i) {
+		if (session->ckpt_handle[i] == NULL)
+			continue;
 		WT_WITH_DHANDLE(session, session->ckpt_handle[i],
 		    WT_TRET(__wt_session_release_btree(session)));
+	}
 
 	__wt_free(session, session->ckpt_handle);
 	session->ckpt_handle_allocated = session->ckpt_handle_next = 0;
@@ -836,7 +1033,7 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session,
 	if (cval.len == 0)
 		name = WT_CHECKPOINT;
 	else {
-		WT_ERR(__wt_checkpoint_name_ok(session, cval.str, cval.len));
+		WT_ERR(__checkpoint_name_ok(session, cval.str, cval.len));
 		WT_ERR(__wt_strndup(session, cval.str, cval.len, &name_alloc));
 		name = name_alloc;
 	}
@@ -851,10 +1048,10 @@ __checkpoint_lock_tree(WT_SESSION_IMPL *session,
 			    __wt_config_next(&dropconf, &k, &v)) == 0) {
 				/* Disallow unsafe checkpoint names. */
 				if (v.len == 0)
-					WT_ERR(__wt_checkpoint_name_ok(
+					WT_ERR(__checkpoint_name_ok(
 					    session, k.str, k.len));
 				else
-					WT_ERR(__wt_checkpoint_name_ok(
+					WT_ERR(__checkpoint_name_ok(
 					    session, v.str, v.len));
 
 				if (v.len == 0)
@@ -986,42 +1183,23 @@ err:	if (hot_backup_locked)
 }
 
 /*
- * __checkpoint_tree --
- *	Checkpoint a single tree.
- *	Assumes all necessary locks have been acquired by the caller.
+ * __checkpoint_mark_deletes --
+ *	Figure out what old checkpoints will be deleted, and whether the
+ *	checkpoint can be skipped entirely.
  */
 static int
-__checkpoint_tree(
-    WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
+__checkpoint_mark_deletes(
+    WT_SESSION_IMPL *session, const char *cfg[])
 {
-	WT_BM *bm;
 	WT_BTREE *btree;
 	WT_CKPT *ckpt, *ckptbase;
 	WT_CONFIG_ITEM cval;
-	WT_CONNECTION_IMPL *conn;
-	WT_DATA_HANDLE *dhandle;
-	WT_DECL_RET;
-	WT_LSN ckptlsn;
 	const char *name;
-	int deleted, was_modified;
-	bool fake_ckpt, force;
+	int deleted;
+	bool force;
 
 	btree = S2BT(session);
-	bm = btree->bm;
 	ckptbase = btree->ckpt;
-	conn = S2C(session);
-	dhandle = session->dhandle;
-	fake_ckpt = false;
-	was_modified = btree->modified;
-
-	/*
-	 * Set the checkpoint LSN to the maximum LSN so that if logging is
-	 * disabled, recovery will never roll old changes forward over the
-	 * non-logged changes in this checkpoint.  If logging is enabled, a
-	 * real checkpoint LSN will be assigned for this checkpoint and
-	 * overwrite this.
-	 */
-	WT_MAX_LSN(&ckptlsn);
 
 	/*
 	 * Check for clean objects not requiring a checkpoint.
@@ -1050,20 +1228,15 @@ __checkpoint_tree(
 	force = false;
 	F_CLR(btree, WT_BTREE_SKIP_CKPT);
 	if (!btree->modified && cfg != NULL) {
-		ret = __wt_config_gets(session, cfg, "force", &cval);
-		if (ret != 0 && ret != WT_NOTFOUND)
-			WT_ERR(ret);
-		if (ret == 0 && cval.val != 0)
-			force = true;
+		WT_RET(__wt_config_gets(session, cfg, "force", &cval));
+		force = cval.val != 0;
 	}
 	if (!btree->modified && !force) {
-		if (!is_checkpoint)
-			goto nockpt;
-
 		deleted = 0;
 		WT_CKPT_FOREACH(ckptbase, ckpt)
 			if (F_ISSET(ckpt, WT_CKPT_DELETE))
 				++deleted;
+
 		/*
 		 * Complicated test: if the tree is clean and last two
 		 * checkpoints have the same name (correcting for internal
@@ -1077,17 +1250,52 @@ __checkpoint_tree(
 		    (strcmp(name, (ckpt - 2)->name) == 0 ||
 		    (WT_PREFIX_MATCH(name, WT_CHECKPOINT) &&
 		    WT_PREFIX_MATCH((ckpt - 2)->name, WT_CHECKPOINT)))) {
-nockpt:			F_SET(btree, WT_BTREE_SKIP_CKPT);
-			WT_PUBLISH(btree->checkpoint_gen,
-			    S2C(session)->txn_global.checkpoint_gen);
-			WT_STAT_FAST_DATA_SET(session,
-			    btree_checkpoint_generation,
-			    btree->checkpoint_gen);
-			ret = 0;
-			goto err;
+			F_SET(btree, WT_BTREE_SKIP_CKPT);
+			return (0);
 		}
 	}
 
+	return (0);
+}
+
+/*
+ * __checkpoint_tree --
+ *	Checkpoint a single tree.
+ *	Assumes all necessary locks have been acquired by the caller.
+ */
+static int
+__checkpoint_tree(
+    WT_SESSION_IMPL *session, bool is_checkpoint, const char *cfg[])
+{
+	WT_BM *bm;
+	WT_BTREE *btree;
+	WT_CKPT *ckpt, *ckptbase;
+	WT_CONNECTION_IMPL *conn;
+	WT_DATA_HANDLE *dhandle;
+	WT_DECL_RET;
+	WT_LSN ckptlsn;
+	int was_modified;
+	bool fake_ckpt;
+
+	WT_UNUSED(cfg);
+
+	btree = S2BT(session);
+	bm = btree->bm;
+	ckptbase = btree->ckpt;
+	conn = S2C(session);
+	dhandle = session->dhandle;
+	fake_ckpt = false;
+	was_modified = btree->modified;
+
+	/*
+	 * Set the checkpoint LSN to the maximum LSN so that if logging is
+	 * disabled, recovery will never roll old changes forward over the
+	 * non-logged changes in this checkpoint.  If logging is enabled, a
+	 * real checkpoint LSN will be assigned for this checkpoint and
+	 * overwrite this.
+	 */
+	WT_MAX_LSN(&ckptlsn);
+
 	/*
 	 * If an object has never been used (in other words, if it could become
 	 * a bulk-loaded file), then we must fake the checkpoint.  This is good
@@ -1183,10 +1391,10 @@ fake:	/*
 
 	/*
 	 * If we wrote a checkpoint (rather than faking one), pages may be
-	 * available for re-use.  If tracking enabled, defer making pages
-	 * available until transaction end.  The exception is if the handle
-	 * is being discarded, in which case the handle will be gone by the
-	 * time we try to apply or unroll the meta tracking event.
+	 * available for re-use.  If tracking is enabled, defer making pages
+	 * available until transaction end.  The exception is if the handle is
+	 * being discarded, in which case the handle will be gone by the time
+	 * we try to apply or unroll the meta tracking event.
 	 */
 	if (!fake_ckpt) {
 		if (WT_META_TRACKING(session) && is_checkpoint)
@@ -1214,13 +1422,59 @@ err:	/*
 }
 
 /*
+ * __checkpoint_presync --
+ *	Visit all handles after the checkpoint writes are complete and before
+ *	syncing.  At this point, all trees should be completely open for
+ *	business.
+ */
+static int
+__checkpoint_presync(WT_SESSION_IMPL *session, const char *cfg[])
+{
+	WT_BTREE *btree;
+
+	WT_UNUSED(cfg);
+
+	btree = S2BT(session);
+	WT_ASSERT(session, !btree->include_checkpoint_txn);
+	btree->evict_walk_period = btree->evict_walk_saved;
+	return (0);
+}
+
+/*
  * __checkpoint_tree_helper --
  *	Checkpoint a tree (suitable for use in *_apply functions).
  */
 static int
 __checkpoint_tree_helper(WT_SESSION_IMPL *session, const char *cfg[])
 {
-	return (__checkpoint_tree(session, true, cfg));
+	WT_BTREE *btree;
+	WT_DECL_RET;
+
+	btree = S2BT(session);
+
+	ret = __checkpoint_tree(session, true, cfg);
+
+	/*
+	 * Whatever happened, we aren't visiting this tree again in this
+	 * checkpoint.  Don't keep updates pinned any longer.
+	 */
+	__checkpoint_update_generation(session);
+
+	/*
+	 * In case this tree was being skipped by the eviction server
+	 * during the checkpoint, restore the previous state.
+	 */
+	btree->evict_walk_period = btree->evict_walk_saved;
+
+	/*
+	 * Wake the eviction server, in case application threads have
+	 * stalled while the eviction server decided it couldn't make
+	 * progress.  Without this, application threads will be stalled
+	 * until the eviction server next wakes.
+	 */
+	WT_TRET(__wt_evict_server_wake(session));
+
+	return (ret);
 }
 
 /*
@@ -1242,6 +1496,9 @@ __wt_checkpoint(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_SAVE_DHANDLE(session,
 	    ret = __checkpoint_lock_tree(session, true, true, cfg));
 	WT_RET(ret);
+	WT_SAVE_DHANDLE(session,
+	    ret = __checkpoint_mark_deletes(session, cfg));
+	WT_RET(ret);
 	return (__checkpoint_tree(session, true, cfg));
 }
 
@@ -1319,6 +1576,11 @@ __wt_checkpoint_close(WT_SESSION_IMPL *session, bool final)
 	WT_SAVE_DHANDLE(session,
 	    ret = __checkpoint_lock_tree(session, false, need_tracking, NULL));
 	WT_ASSERT(session, ret == 0);
+	if (ret == 0) {
+		WT_SAVE_DHANDLE(session,
+		    ret = __checkpoint_mark_deletes(session, NULL));
+		WT_ASSERT(session, ret == 0);
+	}
 	if (ret == 0)
 		ret = __checkpoint_tree(session, false, NULL);
 
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index 470515244f3..e73ff00f5b7 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -329,7 +329,7 @@ __wt_txn_checkpoint_log(
 	case WT_TXN_LOG_CKPT_START:
 		/* Take a copy of the transaction snapshot. */
 		txn->ckpt_nsnapshot = txn->snapshot_count;
-		recsize = txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
+		recsize = (size_t)txn->ckpt_nsnapshot * WT_INTPACK64_MAXSIZE;
 		WT_ERR(__wt_scr_alloc(session, recsize, &txn->ckpt_snapshot));
 		p = txn->ckpt_snapshot->mem;
 		end = p + recsize;
diff --git a/src/utilities/util_dump.c b/src/utilities/util_dump.c
index da70aea35be..6344a90dddd 100644
--- a/src/utilities/util_dump.c
+++ b/src/utilities/util_dump.c
@@ -242,6 +242,7 @@ dump_table_config(
 	char *p, **cfg, *_cfg[4] = {NULL, NULL, NULL, NULL};
 
 	p = NULL;
+	srch = NULL;
 	cfg = &_cfg[3];
 
 	/* Get the table name. */
@@ -306,32 +307,31 @@ dump_table_config(
 
 	WT_ERR(print_config(session, uri, cfg, json, true));
 
-	if (complex_table) {
-		/*
-		 * The underlying table configuration function needs a second
-		 * cursor: open one before calling it, it makes error handling
-		 * hugely simpler.
-		 */
-		if ((ret = session->open_cursor(
-		    session, "metadata:", NULL, NULL, &srch)) != 0)
-			WT_ERR(util_cerr(cursor, "open_cursor", ret));
-
-		if ((ret = dump_table_config_complex(
-		    session, cursor, srch, name, "colgroup:", json)) == 0)
-			ret = dump_table_config_complex(
-			    session, cursor, srch, name, "index:", json);
-
-		if ((tret = srch->close(srch)) != 0) {
-			tret = util_cerr(cursor, "close", tret);
-			if (ret == 0)
-				ret = tret;
-		}
-	} else if (json && printf(
-		    "            \"colgroups\" : [],\n"
-		    "            \"indices\" : []\n") < 0)
+	/*
+	 * The underlying table configuration function needs a second
+	 * cursor: open one before calling it, it makes error handling
+	 * hugely simpler.
+	 */
+	if ((ret = session->open_cursor(
+	    session, "metadata:", NULL, NULL, &srch)) != 0)
+		WT_ERR(util_cerr(cursor, "open_cursor", ret));
+
+	if (complex_table)
+		WT_ERR(dump_table_config_complex(
+		    session, cursor, srch, name, "colgroup:", json));
+	else if (json && printf(
+		    "            \"colgroups\" : [],\n") < 0)
 		WT_ERR(util_cerr(cursor, NULL, EIO));
 
-err:	free(p);
+	WT_ERR(dump_table_config_complex(
+	    session, cursor, srch, name, "index:", json));
+
+err:	if (srch != NULL && (tret = srch->close(srch)) != 0) {
+		tret = util_cerr(cursor, "close", tret);
+		if (ret == 0)
+			ret = tret;
+	}
+	free(p);
 	free(_cfg[0]);
 	free(_cfg[1]);
 	free(_cfg[2]);
diff --git a/test/cursor_order/cursor_order_ops.c b/test/cursor_order/cursor_order_ops.c
index a2185dd123f..58da49b2991 100644
--- a/test/cursor_order/cursor_order_ops.c
+++ b/test/cursor_order/cursor_order_ops.c
@@ -130,7 +130,8 @@ ops_start(SHARED_CONFIG *cfg)
 	seconds = (stop.tv_sec - start.tv_sec) +
 	    (stop.tv_usec - start.tv_usec) * 1e-6;
 	fprintf(stderr, "timer: %.2lf seconds (%d ops/second)\n",
-	    seconds, (int)(((cfg->reverse_scanners + cfg->append_inserters) *
+	    seconds, (int)
+	    (((double)(cfg->reverse_scanners + cfg->append_inserters) *
 	    total_nops) / seconds));
 
 	/* Verify the files. */
diff --git a/test/format/ops.c b/test/format/ops.c
index c97d82809a1..283e2912daa 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -342,7 +342,7 @@ snap_check(WT_CURSOR *cursor,
 		switch (g.type) {
 		case FIX:
 			testutil_die(ret,
-			    "snap_check: %" PRIu64 " search: "
+			    "snapshot-isolation: %" PRIu64 " search: "
 			    "expected {0x%02x}, found {0x%02x}",
 			    start->keyno,
 			    start->deleted ? 0 : *(uint8_t *)start->vdata,
@@ -350,7 +350,7 @@ snap_check(WT_CURSOR *cursor,
 			/* NOTREACHED */
 		case ROW:
 			testutil_die(ret,
-			    "snap_check: %.*s search: "
+			    "snapshot-isolation: %.*s search: "
 			    "expected {%.*s}, found {%.*s}",
 			    (int)key->size, key->data,
 			    start->deleted ?
@@ -362,7 +362,7 @@ snap_check(WT_CURSOR *cursor,
 			/* NOTREACHED */
 		case VAR:
 			testutil_die(ret,
-			    "snap_check: %" PRIu64 " search: "
+			    "snapshot-isolation: %" PRIu64 " search: "
 			    "expected {%.*s}, found {%.*s}",
 			    start->keyno,
 			    start->deleted ?
diff --git a/test/format/smoke.sh b/test/format/smoke.sh
index 5fbc349f242..0c86b5e57c6 100755
--- a/test/format/smoke.sh
+++ b/test/format/smoke.sh
@@ -3,7 +3,7 @@
 set -e
 
 # Smoke-test format as part of running "make check".
-args="-1 -c "." data_source=table ops=100000 rows=10000 threads=4 compression=none logging_compression=none"
+args="-1 -c "." data_source=table ops=50000 rows=10000 threads=4 compression=none logging_compression=none"
 
 $TEST_WRAPPER ./t $args file_type=fix
 $TEST_WRAPPER ./t $args file_type=row
diff --git a/test/manydbs/Makefile.am b/test/manydbs/Makefile.am
index 2bc47ad7f2e..ff5985cf2a4 100644
--- a/test/manydbs/Makefile.am
+++ b/test/manydbs/Makefile.am
@@ -10,7 +10,8 @@ t_LDADD +=$(top_builddir)/libwiredtiger.la
 t_LDFLAGS = -static
 
 # Run this during a "make check" smoke test.
-TESTS = smoke.sh
+TESTS = $(noinst_PROGRAMS)
+LOG_COMPILER = $(TEST_WRAPPER)
 
 clean-local:
 	rm -rf WT_TEST *.core
diff --git a/test/manydbs/smoke.sh b/test/manydbs/smoke.sh
deleted file mode 100755
index c0e2976f154..00000000000
--- a/test/manydbs/smoke.sh
+++ /dev/null
@@ -1,18 +0,0 @@
-#!/bin/sh
-
-set -e
-
-# Smoke-test format as part of running "make check".
-# Run with:
-# 1.  The defaults
-# 2.  Set idle flag to turn off operations.
-# 3.  More dbs.
-# 
-echo "manydbs: default with operations turned on"
-$TEST_WRAPPER ./t
-echo "manydbs: totally idle databases"
-$TEST_WRAPPER ./t -I
-echo "manydbs: 40 databases with operations"
-$TEST_WRAPPER ./t -D 40
-echo "manydbs: 40 idle databases"
-$TEST_WRAPPER ./t -I -D 40
diff --git a/test/mciproject.yml b/test/mciproject.yml
index 3df1ce5805e..8825bb65052 100644
--- a/test/mciproject.yml
+++ b/test/mciproject.yml
@@ -8,12 +8,12 @@ functions:
     command: git.get_project
     params:
       directory: wiredtiger
-  "fetch artifacts" : &fetch_artifacts
+  "fetch binaries" : &fetch_binaries
     - command: s3.get
       params:
         aws_key: ${aws_key}
         aws_secret: ${aws_secret}
-        remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
+        remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz
         bucket: build_external
         extract_to: wiredtiger
 
@@ -23,6 +23,22 @@ pre:
       script: |
         rm -rf "wiredtiger"
 post:
+  - command: archive.targz_pack
+    params:
+      target: "wiredtiger.tgz"
+      source_dir: "wiredtiger"
+      include:
+        - "./**"
+  - command: s3.put
+    params:
+      aws_secret: ${aws_secret}
+      aws_key: ${aws_key}
+      local_file: wiredtiger.tgz
+      bucket: build_external
+      permissions: public-read
+      content_type: application/tar
+      display_name: Artifacts
+      remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
   - command: shell.exec
     params:
       script: |
@@ -49,7 +65,7 @@ tasks:
               ./build_posix/reconf
               ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-verbose
               ${make_command|make} ${smp_command|} 2>&1
-              ${make_command|make} check 2>&1
+              ${make_command|make} VERBOSE=1 check 2>&1
             fi
       - command: archive.targz_pack
         params:
@@ -65,14 +81,14 @@ tasks:
           bucket: build_external
           permissions: public-read
           content_type: application/tar
-          display_name: Artifacts
-          remote_file: wiredtiger/${build_variant}/${revision}/artifacts/${build_id}.tgz
+          display_name: Binaries
+          remote_file: wiredtiger/${build_variant}/${revision}/binaries/${build_id}.tgz
 
   - name: unit-test
     depends_on:
     - name: compile
     commands:
-      - func: "fetch artifacts"
+      - func: "fetch binaries"
       - command: shell.exec
         params:
           working_dir: "wiredtiger"
@@ -85,7 +101,7 @@ tasks:
     depends_on:
     - name: compile
     commands:
-      - func: "fetch artifacts"
+      - func: "fetch binaries"
       - command: shell.exec
         params:
           working_dir: "wiredtiger"
@@ -99,7 +115,7 @@ tasks:
     depends_on:
     - name: compile
     commands:
-      - func: "fetch artifacts"
+      - func: "fetch binaries"
       - command: shell.exec
         params:
           working_dir: "wiredtiger"
diff --git a/test/recovery/Makefile.am b/test/recovery/Makefile.am
index 19fc48dce47..3e7fce17d0e 100644
--- a/test/recovery/Makefile.am
+++ b/test/recovery/Makefile.am
@@ -14,8 +14,7 @@ truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la
 truncated_log_LDFLAGS = -static
 
 # Run this during a "make check" smoke test.
-TESTS = $(noinst_PROGRAMS)
-LOG_COMPILER = $(TEST_WRAPPER)
+TESTS = smoke.sh
 
 clean-local:
 	rm -rf WT_TEST.* *.core
diff --git a/test/recovery/random-abort.c b/test/recovery/random-abort.c
index 85629eddec4..16065cec29e 100644
--- a/test/recovery/random-abort.c
+++ b/test/recovery/random-abort.c
@@ -91,7 +91,8 @@ thread_run(void *arg)
 	if ((fp = fopen(buf, "w")) == NULL)
 		testutil_die(errno, "fopen");
 	/*
-	 * Set to no buffering.
+	 * Set to line buffering.  But that is advisory only.  We've seen
+	 * cases where the result files end up with partial lines.
 	 */
 	__wt_stream_set_line_buffer(fp);
 	if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
@@ -188,7 +189,7 @@ main(int argc, char *argv[])
 	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	WT_RAND_STATE rnd;
-	uint64_t key;
+	uint64_t key, last_key;
 	uint32_t absent, count, i, nth, timeout;
 	int ch, status, ret;
 	pid_t pid;
@@ -317,12 +318,23 @@ main(int argc, char *argv[])
 		 * in the table after recovery.  Since we did write-no-sync, we
 		 * expect every key to have been recovered.
 		 */
-		for (;; ++count) {
+		for (last_key = UINT64_MAX;; ++count, last_key = key) {
 			ret = fscanf(fp, "%" SCNu64 "\n", &key);
 			if (ret != EOF && ret != 1)
 				testutil_die(errno, "fscanf");
 			if (ret == EOF)
 				break;
+			/*
+			 * If we're unlucky, the last line may be a partially
+			 * written key at the end that can result in a false
+			 * negative error for a missing record.  Detect it.
+			 */
+			if (last_key != UINT64_MAX && key != last_key + 1) {
+				printf("%s: Ignore partial record %" PRIu64
+				    " last valid key %" PRIu64 "\n",
+				    fname, key, last_key);
+				break;
+			}
 			snprintf(kname, sizeof(kname), "%" PRIu64, key);
 			cursor->set_key(cursor, kname);
 			if ((ret = cursor->search(cursor)) != 0) {
diff --git a/test/recovery/smoke.sh b/test/recovery/smoke.sh
new file mode 100755
index 00000000000..c7677b64503
--- /dev/null
+++ b/test/recovery/smoke.sh
@@ -0,0 +1,8 @@
+#! /bin/sh
+
+set -e
+
+# Smoke-test recovery as part of running "make check".
+
+$TEST_WRAPPER ./random-abort -t 10 -T 5
+$TEST_WRAPPER ./truncated-log
diff --git a/test/suite/helper.py b/test/suite/helper.py
index f85d708880f..9f34b566b3c 100644
--- a/test/suite/helper.py
+++ b/test/suite/helper.py
@@ -179,6 +179,49 @@ def simple_populate_check(self, uri, rows):
     simple_populate_check_cursor(self, cursor, rows)
     cursor.close()
 
+# population of a simple object, with a single index
+#    uri:       object
+#    config:    prefix of the session.create configuration string (defaults
+#               to string value formats)
+#    rows:      entries to insert
+def simple_index_populate(self, uri, config, rows):
+    self.pr('simple_index_populate: ' + uri + ' with ' + str(rows) + ' rows')
+    self.session.create(uri, 'value_format=S,columns=(key0,value0),' + config)
+    indxname = 'index:' + uri.split(":")[1]
+    self.session.create(indxname + ':index1', 'columns=(value0,key0)')
+    cursor = self.session.open_cursor(uri, None)
+    for i in range(1, rows + 1):
+        cursor[key_populate(cursor, i)] = value_populate(cursor, i)
+    cursor.close()
+
+def simple_index_populate_check_cursor(self, cursor, rows):
+    i = 0
+    for key,val in cursor:
+        i += 1
+        self.assertEqual(key, key_populate(cursor, i))
+        if cursor.value_format == '8t' and val == 0:    # deleted
+            continue
+        self.assertEqual(val, value_populate(cursor, i))
+    self.assertEqual(i, rows)
+
+def simple_index_populate_check(self, uri, rows):
+    self.pr('simple_index_populate_check: ' + uri)
+
+    # Check values in the main table.
+    cursor = self.session.open_cursor(uri, None)
+    simple_index_populate_check_cursor(self, cursor, rows)
+
+    # Check values in the index.
+    indxname = 'index:' + uri.split(":")[1]
+    idxcursor = self.session.open_cursor(indxname + ':index1')
+    for i in range(1, rows + 1):
+        k = key_populate(cursor, i)
+        v = value_populate(cursor, i)
+        ik = (v,k)  # The index key is columns=(v,k).
+        self.assertEqual(v, idxcursor[ik])
+    idxcursor.close()
+    cursor.close()
+
 # Return the value stored in a complex object.
 def complex_value_populate(cursor, i):
     return [str(i) + ': abcdefghijklmnopqrstuvwxyz'[0:i%26],
diff --git a/test/suite/run.py b/test/suite/run.py
index 6e7421b8b96..c37093a2a55 100644
--- a/test/suite/run.py
+++ b/test/suite/run.py
@@ -87,6 +87,7 @@ Options:\n\
   -j N    | --parallel N         run all tests in parallel using N processes\n\
   -l      | --long               run the entire test suite\n\
   -p      | --preserve           preserve output files in WT_TEST/<testname>\n\
+  -s N    | --scenario N         use scenario N (N can be number or symbolic)\n\
   -t      | --timestamp          name WT_TEST according to timestamp\n\
   -v N    | --verbose N          set verboseness to N (0<=N<=3, default=1)\n\
 \n\
@@ -95,15 +96,27 @@ Tests:\n\
   may be a subsuite name (e.g. \'base\' runs test_base*.py)\n\
 \n\
   When -C or -c are present, there may not be any tests named.\n\
+  When -s is present, there must be a test named.\n\
 '
 
 # capture the category (AKA 'subsuite') part of a test name,
 # e.g. test_util03 -> util
 reCatname = re.compile(r"test_([^0-9]+)[0-9]*")
 
-def addScenarioTests(tests, loader, testname):
+def restrictScenario(testcases, restrict):
+    if restrict == '':
+        return testcases
+    elif restrict.isdigit():
+        s = int(restrict)
+        return [t for t in testcases
+            if hasattr(t, 'scenario_number') and t.scenario_number == s]
+    else:
+        return [t for t in testcases
+            if hasattr(t, 'scenario_name') and t.scenario_name == restrict]
+
+def addScenarioTests(tests, loader, testname, scenario):
     loaded = loader.loadTestsFromName(testname)
-    tests.addTests(generate_scenarios(loaded))
+    tests.addTests(restrictScenario(generate_scenarios(loaded), scenario))
 
 def configRecord(cmap, tup):
     """
@@ -195,20 +208,20 @@ def configApply(suites, configfilename, configwrite):
             json.dump(configmap, f, sort_keys=True, indent=4)
     return newsuite
 
-def testsFromArg(tests, loader, arg):
+def testsFromArg(tests, loader, arg, scenario):
     # If a group of test is mentioned, do all tests in that group
     # e.g. 'run.py base'
     groupedfiles = glob.glob(suitedir + os.sep + 'test_' + arg + '*.py')
     if len(groupedfiles) > 0:
         for file in groupedfiles:
-            testsFromArg(tests, loader, os.path.basename(file))
+            testsFromArg(tests, loader, os.path.basename(file), scenario)
         return
 
     # Explicit test class names
     if not arg[0].isdigit():
         if arg.endswith('.py'):
             arg = arg[:-3]
-        addScenarioTests(tests, loader, arg)
+        addScenarioTests(tests, loader, arg, scenario)
         return
 
     # Deal with ranges
@@ -217,7 +230,7 @@ def testsFromArg(tests, loader, arg):
     else:
         start, end = int(arg), int(arg)
     for t in xrange(start, end+1):
-        addScenarioTests(tests, loader, 'test%03d' % t)
+        addScenarioTests(tests, loader, 'test%03d' % t, scenario)
 
 if __name__ == '__main__':
     tests = unittest.TestSuite()
@@ -228,6 +241,7 @@ if __name__ == '__main__':
     configfile = None
     configwrite = False
     dirarg = None
+    scenario = ''
     verbose = 1
     args = sys.argv[1:]
     testargs = []
@@ -265,6 +279,12 @@ if __name__ == '__main__':
             if option == '-preserve' or option == 'p':
                 preserve = True
                 continue
+            if option == '-scenario' or option == 's':
+                if scenario != '' or len(args) == 0:
+                    usage()
+                    sys.exit(2)
+                scenario = args.pop(0)
+                continue
             if option == '-timestamp' or option == 't':
                 timestamp = True
                 continue
@@ -303,15 +323,20 @@ if __name__ == '__main__':
 
     # Without any tests listed as arguments, do discovery
     if len(testargs) == 0:
+        if scenario != '':
+            sys.stderr.write(
+                'run.py: specifying a scenario requires a test name\n')
+            usage()
+            sys.exit(2)
         from discover import defaultTestLoader as loader
         suites = loader.discover(suitedir)
         suites = sorted(suites, key=lambda c: str(list(c)[0]))
         if configfile != None:
             suites = configApply(suites, configfile, configwrite)
-        tests.addTests(generate_scenarios(suites))
+        tests.addTests(restrictScenario(generate_scenarios(suites), ''))
     else:
         for arg in testargs:
-            testsFromArg(tests, loader, arg)
+            testsFromArg(tests, loader, arg, scenario)
 
     if debug:
         import pdb
diff --git a/test/suite/test_async01.py b/test/suite/test_async01.py
index 71a18a68121..9322748c30f 100644
--- a/test/suite/test_async01.py
+++ b/test/suite/test_async01.py
@@ -29,7 +29,7 @@
 import sys, threading, wiredtiger, wttest
 from suite_subprocess import suite_subprocess
 from wiredtiger import WiredTigerError
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # TODO - tmp code
 def tty_pr(s):
@@ -122,7 +122,7 @@ class test_async01(wttest.WiredTigerTestCase, suite_subprocess):
     async_threads = 3
     current = {}
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-col', dict(tablekind='col',uri='file')),
         ('file-fix', dict(tablekind='fix',uri='file')),
         ('file-row', dict(tablekind='row',uri='file')),
diff --git a/test/suite/test_async02.py b/test/suite/test_async02.py
index 7aa1b85a2f3..bc6b389fc27 100644
--- a/test/suite/test_async02.py
+++ b/test/suite/test_async02.py
@@ -29,7 +29,7 @@
 import sys, threading, wiredtiger, wttest
 from suite_subprocess import suite_subprocess
 from wiredtiger import WiredTigerError
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 class Callback(wiredtiger.AsyncCallback):
     def __init__(self, current):
@@ -119,7 +119,7 @@ class test_async02(wttest.WiredTigerTestCase, suite_subprocess):
     async_threads = 3
     current = {}
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-col', dict(tablekind='col',uri='file')),
         ('file-fix', dict(tablekind='fix',uri='file')),
         ('file-row', dict(tablekind='row',uri='file')),
diff --git a/test/suite/test_backup02.py b/test/suite/test_backup02.py
index 095bfbe404a..398d55abd7a 100644
--- a/test/suite/test_backup02.py
+++ b/test/suite/test_backup02.py
@@ -30,13 +30,13 @@ import Queue
 import threading, time, wiredtiger, wttest
 from helper import key_populate, simple_populate
 from wtthread import backup_thread, checkpoint_thread, op_thread
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_backup02.py
 #   Run background checkpoints and backsups repeatedly while doing inserts
 #   in another thread
 class test_backup02(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('table', dict(uri='table:test',fmt='L',dsize=100,nops=200,nthreads=1,time=30)),
     ])
 
diff --git a/test/suite/test_backup03.py b/test/suite/test_backup03.py
index e810a2ec714..053009c6edb 100644
--- a/test/suite/test_backup03.py
+++ b/test/suite/test_backup03.py
@@ -28,7 +28,7 @@
 
 import glob, os, shutil, string
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wiredtiger, wttest
 from helper import compare_files,\
     complex_populate, complex_populate_lsm, simple_populate
@@ -56,25 +56,25 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess):
         ('table:' + pfx + '.4', complex_populate_lsm, 3),
     ]
     list = [
-        ( '1', dict(big=0,list=[0])),           # Target objects individually
-        ( '2', dict(big=1,list=[1])),
-        ( '3', dict(big=2,list=[2])),
-        ( '4', dict(big=3,list=[3])),
-        ('5a', dict(big=0,list=[0,2])),         # Target groups of objects
-        ('5b', dict(big=2,list=[0,2])),
-        ('6a', dict(big=1,list=[1,3])),
-        ('6b', dict(big=3,list=[1,3])),
-        ('7a', dict(big=0,list=[0,1,2])),
-        ('7b', dict(big=1,list=[0,1,2])),
-        ('7c', dict(big=2,list=[0,1,2])),
-        ('8a', dict(big=0,list=[0,1,2,3])),
-        ('8b', dict(big=1,list=[0,1,2,3])),
-        ('8c', dict(big=2,list=[0,1,2,3])),
-        ('8d', dict(big=3,list=[0,1,2,3])),
-        ( '9', dict(big=3,list=[])),            # Backup everything
+        ( 'backup_1', dict(big=0,list=[0])),       # Target objects individually
+        ( 'backup_2', dict(big=1,list=[1])),
+        ( 'backup_3', dict(big=2,list=[2])),
+        ( 'backup_4', dict(big=3,list=[3])),
+        ('backup_5a', dict(big=0,list=[0,2])),     # Target groups of objects
+        ('backup_5b', dict(big=2,list=[0,2])),
+        ('backup_6a', dict(big=1,list=[1,3])),
+        ('backup_6b', dict(big=3,list=[1,3])),
+        ('backup_7a', dict(big=0,list=[0,1,2])),
+        ('backup_7b', dict(big=1,list=[0,1,2])),
+        ('backup_7c', dict(big=2,list=[0,1,2])),
+        ('backup_8a', dict(big=0,list=[0,1,2,3])),
+        ('backup_8b', dict(big=1,list=[0,1,2,3])),
+        ('backup_8c', dict(big=2,list=[0,1,2,3])),
+        ('backup_8d', dict(big=3,list=[0,1,2,3])),
+        ('backup_9', dict(big=3,list=[])),         # Backup everything
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', list))
+    scenarios = make_scenarios(list)
     # Create a large cache, otherwise this test runs quite slowly.
     conn_config = 'cache_size=1G'
 
diff --git a/test/suite/test_backup04.py b/test/suite/test_backup04.py
index 852a22c1e0c..866e673dccb 100644
--- a/test/suite/test_backup04.py
+++ b/test/suite/test_backup04.py
@@ -30,7 +30,7 @@ import Queue
 import threading, time, wiredtiger, wttest
 import glob, os, shutil
 from suite_subprocess import suite_subprocess
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 from wtthread import op_thread
 from helper import compare_files, key_populate
 
@@ -54,7 +54,7 @@ class test_backup_target(wttest.WiredTigerTestCase, suite_subprocess):
     # and that is not what we want here.
     #
     pfx = 'test_backup'
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('table', dict(uri='table:test',dsize=100,nops=2000,nthreads=1,time=30)),
     ])
 
diff --git a/test/suite/test_backup05.py b/test/suite/test_backup05.py
index fbe219d8de8..131732e9a89 100644
--- a/test/suite/test_backup05.py
+++ b/test/suite/test_backup05.py
@@ -35,7 +35,6 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
 from helper import copy_wiredtiger_home
 import wiredtiger, wttest
 
diff --git a/test/suite/test_base02.py b/test/suite/test_base02.py
index 70117573241..2b51fe1b530 100644
--- a/test/suite/test_base02.py
+++ b/test/suite/test_base02.py
@@ -32,14 +32,14 @@
 
 import json
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # Test configuration strings.
 class test_base02(wttest.WiredTigerTestCase):
     name = 'test_base02a'
     extra_config = ''
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:')),
         ('table', dict(uri='table:')),
         ('lsm', dict(uri='lsm:')),
diff --git a/test/suite/test_base05.py b/test/suite/test_base05.py
index f191f23561f..4bee0efcfe2 100644
--- a/test/suite/test_base05.py
+++ b/test/suite/test_base05.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_base05.py
 #    Cursor operations
@@ -40,7 +40,7 @@ class test_base05(wttest.WiredTigerTestCase):
     table_name1 = 'test_base05a'
     table_name2 = 'test_base05b'
     nentries = 1000
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('no_huffman', dict(extraconfig='')),
         ('huffman_key', dict(extraconfig='huffman_key="english"')),
         ('huffman_val', dict(extraconfig='huffman_value="english"')),
diff --git a/test/suite/test_bug003.py b/test/suite/test_bug003.py
index 739279a0141..28d71a534e2 100644
--- a/test/suite/test_bug003.py
+++ b/test/suite/test_bug003.py
@@ -30,7 +30,7 @@
 #       Regression tests.
 
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Regression tests.
 class test_bug003(wttest.WiredTigerTestCase):
@@ -43,7 +43,7 @@ class test_bug003(wttest.WiredTigerTestCase):
         ('yes', dict(name=1)),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types, ckpt))
+    scenarios = make_scenarios(types, ckpt)
 
     # Confirm bulk-load isn't stopped by checkpoints.
     def test_bug003(self):
diff --git a/test/suite/test_bug006.py b/test/suite/test_bug006.py
index e522cdf96f7..314ba57038f 100644
--- a/test/suite/test_bug006.py
+++ b/test/suite/test_bug006.py
@@ -31,13 +31,13 @@
 
 import wiredtiger, wttest
 from helper import key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # Check that verify and salvage both raise exceptions if there is an open
 # cursor.
 class test_bug006(wttest.WiredTigerTestCase):
     name = 'test_bug006'
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:')),
         ('table', dict(uri='table:')),
     ])
diff --git a/test/suite/test_bug008.py b/test/suite/test_bug008.py
index 0243887e258..c4fa411f55e 100644
--- a/test/suite/test_bug008.py
+++ b/test/suite/test_bug008.py
@@ -31,13 +31,13 @@
 
 import wiredtiger, wttest
 from helper import simple_populate, key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # Test search/search-near operations, including invisible values and keys
 # past the end of the table.
 class test_bug008(wttest.WiredTigerTestCase):
     uri = 'file:test_bug008'                # This is a btree layer test.
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('fix', dict(fmt='key_format=r,value_format=8t', empty=1, colvar=0)),
         ('row', dict(fmt='key_format=S', empty=0, colvar=0)),
         ('var', dict(fmt='key_format=r', empty=0, colvar=1))
diff --git a/test/suite/test_bug009.py b/test/suite/test_bug009.py
index 4d10e4391d9..2bdfb7dec52 100644
--- a/test/suite/test_bug009.py
+++ b/test/suite/test_bug009.py
@@ -33,7 +33,6 @@
 
 import wiredtiger, wttest
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
 
 class test_bug009(wttest.WiredTigerTestCase):
     name = 'test_bug009'
diff --git a/test/suite/test_bug011.py b/test/suite/test_bug011.py
index 50dba1c48be..fceb7a22ddb 100644
--- a/test/suite/test_bug011.py
+++ b/test/suite/test_bug011.py
@@ -42,7 +42,7 @@ class test_bug011(wttest.WiredTigerTestCase):
     nops = 10000
     # Add connection configuration for this test.
     def conn_config(self, dir):
-        return 'cache_size=10MB,hazard_max=' + str(self.ntables / 2)
+        return 'cache_size=10MB,eviction_dirty_target=99,eviction_dirty_trigger=99,hazard_max=' + str(self.ntables / 2)
 
     def test_eviction(self):
         cursors = []
diff --git a/test/suite/test_bug016.py b/test/suite/test_bug016.py
new file mode 100644
index 00000000000..f7cb3c32559
--- /dev/null
+++ b/test/suite/test_bug016.py
@@ -0,0 +1,109 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_bug016.py
+#       WT-2757: WT_CURSOR.get_key() fails after WT_CURSOR.insert unless the
+# cursor has a record number key with append configured.
+class test_bug016(wttest.WiredTigerTestCase):
+
+    # Insert a row into a simple column-store table configured to append.
+    # WT_CURSOR.get_key should succeed.
+    def test_simple_column_store_append(self):
+        uri='file:bug016'
+        self.session.create(uri, 'key_format=r,value_format=S')
+        cursor = self.session.open_cursor(uri, None, 'append')
+        cursor.set_value('value')
+        cursor.insert()
+        self.assertEquals(cursor.get_key(), 1)
+
+    # Insert a row into a simple column-store table.
+    # WT_CURSOR.get_key should fail.
+    def test_simple_column_store(self):
+        uri='file:bug016'
+        self.session.create(uri, 'key_format=r,value_format=S')
+        cursor = self.session.open_cursor(uri, None)
+        cursor.set_key(37)
+        cursor.set_value('value')
+        cursor.insert()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: cursor.get_key(), "/requires key be set/")
+
+    # Insert a row into a simple row-store table.
+    # WT_CURSOR.get_key should fail.
+    def test_simple_row_store(self):
+        uri='file:bug016'
+        self.session.create(uri, 'key_format=S,value_format=S')
+        cursor = self.session.open_cursor(uri, None)
+        cursor.set_key('key')
+        cursor.set_value('value')
+        cursor.insert()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: cursor.get_key(), "/requires key be set/")
+
+    # Insert a row into a complex column-store table configured to append.
+    # WT_CURSOR.get_key should succeed.
+    def test_complex_column_store_append(self):
+        uri='table:bug016'
+        self.session.create(
+            uri, 'key_format=r,value_format=S,columns=(key,value)')
+        cursor = self.session.open_cursor(uri, None, 'append')
+        cursor.set_value('value')
+        cursor.insert()
+        self.assertEquals(cursor.get_key(), 1)
+
+    # Insert a row into a complex column-store table.
+    # WT_CURSOR.get_key should fail.
+    def test_complex_column_store(self):
+        uri='table:bug016'
+        self.session.create(
+            uri, 'key_format=r,value_format=S,columns=(key,value)')
+        cursor = self.session.open_cursor(uri, None)
+        cursor.set_key(37)
+        cursor.set_value('value')
+        cursor.insert()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: cursor.get_key(), "/requires key be set/")
+
+    # Insert a row into a complex row-store table.
+    # WT_CURSOR.get_key should fail.
+    def test_complex_row_store(self):
+        uri='table:bug016'
+        self.session.create(
+            uri, 'key_format=S,value_format=S,columns=(key,value)')
+        cursor = self.session.open_cursor(uri, None)
+        cursor.set_key('key')
+        cursor.set_value('value')
+        cursor.insert()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: cursor.get_key(), "/requires key be set/")
+
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/test/suite/test_bulk01.py b/test/suite/test_bulk01.py
index 1add11af26b..5bacfafaa20 100644
--- a/test/suite/test_bulk01.py
+++ b/test/suite/test_bulk01.py
@@ -32,7 +32,7 @@
 
 import wiredtiger, wttest
 from helper import key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Smoke test bulk-load.
 class test_bulk_load(wttest.WiredTigerTestCase):
@@ -52,7 +52,7 @@ class test_bulk_load(wttest.WiredTigerTestCase):
         ('integer', dict(valfmt='i')),
         ('string', dict(valfmt='S')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt, valfmt))
+    scenarios = make_scenarios(types, keyfmt, valfmt)
 
     # Test a simple bulk-load
     def test_bulk_load(self):
diff --git a/test/suite/test_bulk02.py b/test/suite/test_bulk02.py
index fe8118209f2..af0b6d4485d 100644
--- a/test/suite/test_bulk02.py
+++ b/test/suite/test_bulk02.py
@@ -32,7 +32,7 @@
 import shutil, os
 from helper import confirm_empty, key_populate, value_populate
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wiredtiger, wttest
 
 # test_bulkload_checkpoint
@@ -47,7 +47,7 @@ class test_bulkload_checkpoint(wttest.WiredTigerTestCase, suite_subprocess):
         ('unnamed', dict(ckpt_type='unnamed')),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_type))
+    scenarios = make_scenarios(types, ckpt_type)
 
     # Bulk-load handles are skipped by checkpoints.
     # Named and unnamed checkpoint versions.
@@ -90,8 +90,7 @@ class test_bulkload_backup(wttest.WiredTigerTestCase, suite_subprocess):
         ('different', dict(session_type='different')),
         ('same', dict(session_type='same')),
     ]
-    scenarios = number_scenarios(
-        multiply_scenarios('.', types, ckpt_type, session_type))
+    scenarios = make_scenarios(types, ckpt_type, session_type)
 
     # Backup a set of chosen tables/files using the wt backup command.
     # The only files are bulk-load files, so they shouldn't be copied.
diff --git a/test/suite/test_checkpoint01.py b/test/suite/test_checkpoint01.py
index 6e1ad7814ed..78754dc82fa 100644
--- a/test/suite/test_checkpoint01.py
+++ b/test/suite/test_checkpoint01.py
@@ -28,7 +28,7 @@
 
 import wiredtiger, wttest
 from helper import key_populate, complex_populate_lsm, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_checkpoint01.py
 #    Checkpoint tests
@@ -36,7 +36,7 @@ from wtscenario import check_scenarios
 # with a set of checkpoints, then confirm the checkpoint's values are correct,
 # including after other checkpoints are dropped.
 class test_checkpoint(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:checkpoint',fmt='S')),
         ('table', dict(uri='table:checkpoint',fmt='S'))
     ])
@@ -139,7 +139,7 @@ class test_checkpoint(wttest.WiredTigerTestCase):
 
 # Check some specific cursor checkpoint combinations.
 class test_checkpoint_cursor(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:checkpoint',fmt='S')),
         ('table', dict(uri='table:checkpoint',fmt='S'))
     ])
@@ -205,7 +205,7 @@ class test_checkpoint_cursor(wttest.WiredTigerTestCase):
 
 # Check that you can checkpoint targets.
 class test_checkpoint_target(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:checkpoint',fmt='S')),
         ('table', dict(uri='table:checkpoint',fmt='S'))
     ])
@@ -252,7 +252,7 @@ class test_checkpoint_target(wttest.WiredTigerTestCase):
 
 # Check that you can't write checkpoint cursors.
 class test_checkpoint_cursor_update(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-r', dict(uri='file:checkpoint',fmt='r')),
         ('file-S', dict(uri='file:checkpoint',fmt='S')),
         ('table-r', dict(uri='table:checkpoint',fmt='r')),
@@ -277,7 +277,7 @@ class test_checkpoint_cursor_update(wttest.WiredTigerTestCase):
 
 # Check that WiredTigerCheckpoint works as a checkpoint specifier.
 class test_checkpoint_last(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:checkpoint',fmt='S')),
         ('table', dict(uri='table:checkpoint',fmt='S'))
     ])
@@ -343,7 +343,7 @@ class test_checkpoint_lsm_name(wttest.WiredTigerTestCase):
 
 
 class test_checkpoint_empty(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:checkpoint')),
         ('table', dict(uri='table:checkpoint')),
     ])
diff --git a/test/suite/test_checkpoint02.py b/test/suite/test_checkpoint02.py
index 71c8792359c..ac57499a9e4 100644
--- a/test/suite/test_checkpoint02.py
+++ b/test/suite/test_checkpoint02.py
@@ -30,13 +30,13 @@ import Queue
 import threading, time, wiredtiger, wttest
 from helper import key_populate, simple_populate
 from wtthread import checkpoint_thread, op_thread
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_checkpoint02.py
 #   Run background checkpoints repeatedly while doing inserts and other
 #   operations in another thread
 class test_checkpoint02(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('table-100', dict(uri='table:test',fmt='L',dsize=100,nops=50000,nthreads=10)),
         ('table-10', dict(uri='table:test',fmt='L',dsize=10,nops=50000,nthreads=30))
     ])
diff --git a/test/suite/test_colgap.py b/test/suite/test_colgap.py
index 46682c23167..5cc363dbd4a 100644
--- a/test/suite/test_colgap.py
+++ b/test/suite/test_colgap.py
@@ -28,7 +28,7 @@
 
 import wiredtiger, wttest
 from helper import simple_populate, key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_colgap.py
 #    Test variable-length column-store gap performance.
@@ -149,8 +149,8 @@ class test_colmax(wttest.WiredTigerTestCase):
         ('not-single', dict(single=0)),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios(\
-        '.', types, valfmt, record_number, bulk, reopen, single))
+    scenarios = make_scenarios(\
+        types, valfmt, record_number, bulk, reopen, single)
 
     # Test that variable-length column-store correctly/efficiently handles big
     # records (if it's not efficient, we'll just hang).
diff --git a/test/suite/test_collator.py b/test/suite/test_collator.py
index 34b5c20247f..a8103fb3671 100644
--- a/test/suite/test_collator.py
+++ b/test/suite/test_collator.py
@@ -28,7 +28,6 @@
 
 import os
 import wiredtiger, wttest, run
-from wtscenario import check_scenarios, number_scenarios
 
 # test_collator.py
 #    Test indices using a custom extractor and collator.
diff --git a/test/suite/test_compact01.py b/test/suite/test_compact01.py
index 3af550708ed..183d75f9d31 100644
--- a/test/suite/test_compact01.py
+++ b/test/suite/test_compact01.py
@@ -30,7 +30,7 @@ import wiredtiger, wttest
 from helper import complex_populate, simple_populate, key_populate
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_compact.py
 #    session level compact operation
@@ -53,7 +53,7 @@ class test_compact(wttest.WiredTigerTestCase, suite_subprocess):
         ('method_reopen', dict(utility=0,reopen=1)),
         ('utility', dict(utility=1,reopen=0)),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, compact))
+    scenarios = make_scenarios(types, compact)
     # We want a large cache so that eviction doesn't happen
     # (which could skew our compaction results).
     conn_config = 'cache_size=250MB,statistics=(all)'
diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py
index 7ad05cd2536..eb21817bd90 100644
--- a/test/suite/test_compact02.py
+++ b/test/suite/test_compact02.py
@@ -32,7 +32,7 @@
 
 import wiredtiger, wttest
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test basic compression
 class test_compact02(wttest.WiredTigerTestCase):
@@ -57,8 +57,7 @@ class test_compact02(wttest.WiredTigerTestCase):
         ('64KB', dict(fileConfig='leaf_page_max=64KB')),
         ('128KB', dict(fileConfig='leaf_page_max=128KB')),
     ]
-    scenarios = \
-        number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig))
+    scenarios = make_scenarios(types, cacheSize, fileConfig)
 
     # We want about 22K records that total about 130Mb.  That is an average
     # of 6196 bytes per record.  Half the records should be smaller, about
@@ -97,7 +96,7 @@ class test_compact02(wttest.WiredTigerTestCase):
         self.home = '.'
         conn_params = 'create,' + \
             cacheSize + ',error_prefix="%s: ",' % self.shortid() + \
-            'statistics=(fast)'
+            'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99'
         try:
             self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
         except wiredtiger.WiredTigerError as e:
diff --git a/test/suite/test_compress01.py b/test/suite/test_compress01.py
index 94c748fc3e5..2a7e2a7e1a8 100644
--- a/test/suite/test_compress01.py
+++ b/test/suite/test_compress01.py
@@ -32,7 +32,7 @@
 
 import os, run
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test basic compression
 class test_compress01(wttest.WiredTigerTestCase):
@@ -46,7 +46,7 @@ class test_compress01(wttest.WiredTigerTestCase):
         ('snappy', dict(compress='snappy')),
         ('none', dict(compress=None)),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, compress))
+    scenarios = make_scenarios(types, compress)
 
     nrecords = 10000
     bigvalue = "abcdefghij" * 1000
diff --git a/test/suite/test_config03.py b/test/suite/test_config03.py
index e91c5de62f8..88ca6ae3f39 100644
--- a/test/suite/test_config03.py
+++ b/test/suite/test_config03.py
@@ -69,14 +69,11 @@ class test_config03(test_base03.test_base03):
                     'eviction_trigger', 'hazard_max', 'multiprocess',
                     'session_max', 'verbose' ]
 
-    all_scenarios = wtscenario.multiply_scenarios('_',
+    scenarios = wtscenario.make_scenarios(
         cache_size_scenarios, create_scenarios, error_prefix_scenarios,
         eviction_target_scenarios, eviction_trigger_scenarios,
         hazard_max_scenarios, multiprocess_scenarios, session_max_scenarios,
-        transactional_scenarios, verbose_scenarios)
-
-    scenarios = wtscenario.prune_scenarios(all_scenarios, 1000)
-    scenarios = wtscenario.number_scenarios(scenarios)
+        transactional_scenarios, verbose_scenarios, prune=1000)
 
     #wttest.WiredTigerTestCase.printVerbose(2, 'test_config03: running ' + \
     #                      str(len(scenarios)) + ' of ' + \
diff --git a/test/suite/test_cursor01.py b/test/suite/test_cursor01.py
index cf39d4a4ba4..8c66042eec0 100644
--- a/test/suite/test_cursor01.py
+++ b/test/suite/test_cursor01.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor01.py
 #    Cursor operations
@@ -41,7 +41,7 @@ class test_cursor01(wttest.WiredTigerTestCase):
     table_name1 = 'test_cursor01'
     nentries = 10
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-col', dict(tablekind='col',uri='file')),
         ('file-fix', dict(tablekind='fix',uri='file')),
         ('file-row', dict(tablekind='row',uri='file')),
diff --git a/test/suite/test_cursor02.py b/test/suite/test_cursor02.py
index eb1ba4dfc41..a83d30def47 100644
--- a/test/suite/test_cursor02.py
+++ b/test/suite/test_cursor02.py
@@ -28,7 +28,7 @@
 
 import wiredtiger
 from test_cursor_tracker import TestCursorTracker
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor02.py
 #     Cursor operations on small tables.
@@ -39,7 +39,7 @@ class test_cursor02(TestCursorTracker):
     key/value content and to track/verify content
     after inserts and removes.
     """
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('row', dict(tablekind='row', uri='table')),
         ('lsm-row', dict(tablekind='row', uri='lsm')),
         ('col', dict(tablekind='col', uri='table')),
diff --git a/test/suite/test_cursor03.py b/test/suite/test_cursor03.py
index 63237f942ca..b4598483c12 100644
--- a/test/suite/test_cursor03.py
+++ b/test/suite/test_cursor03.py
@@ -28,7 +28,7 @@
 
 import wiredtiger
 from test_cursor_tracker import TestCursorTracker
-from wtscenario import multiply_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor03.py
 #    Cursor operations on tables of various sizes, with key/values of various
@@ -40,7 +40,7 @@ class test_cursor03(TestCursorTracker):
     key/value content and to track/verify content
     after inserts and removes.
     """
-    scenarios = multiply_scenarios('.', [
+    scenarios = make_scenarios([
             ('row', dict(tablekind='row', keysize=None, valsize=None, uri='table')),
             ('lsm-row', dict(tablekind='row', keysize=None, valsize=None, uri='lsm')),
             ('col', dict(tablekind='col', keysize=None, valsize=None, uri='table')),
diff --git a/test/suite/test_cursor04.py b/test/suite/test_cursor04.py
index 6576c623f8a..8cbf922b5eb 100644
--- a/test/suite/test_cursor04.py
+++ b/test/suite/test_cursor04.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_base04.py
 #     Cursor operations
@@ -38,7 +38,7 @@ class test_cursor04(wttest.WiredTigerTestCase):
     table_name1 = 'test_cursor04'
     nentries = 20
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('row', dict(tablekind='row', uri='table')),
         ('lsm-row', dict(tablekind='row', uri='lsm')),
         ('col', dict(tablekind='col', uri='table')),
diff --git a/test/suite/test_cursor06.py b/test/suite/test_cursor06.py
index 5545c862dd7..3a6240bc6c7 100644
--- a/test/suite/test_cursor06.py
+++ b/test/suite/test_cursor06.py
@@ -29,13 +29,13 @@
 import wiredtiger, wttest
 from helper import key_populate, value_populate, simple_populate
 from helper import complex_value_populate, complex_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor06.py
 #    Test cursor reconfiguration.
 class test_cursor06(wttest.WiredTigerTestCase):
     name = 'reconfigure'
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-r', dict(type='file:', config='key_format=r', complex=0)),
         ('file-S', dict(type='file:', config='key_format=S', complex=0)),
         ('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)),
diff --git a/test/suite/test_cursor07.py b/test/suite/test_cursor07.py
index d8de0874d7f..d6078183fc1 100644
--- a/test/suite/test_cursor07.py
+++ b/test/suite/test_cursor07.py
@@ -33,7 +33,7 @@
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess):
@@ -44,7 +44,7 @@ class test_cursor07(wttest.WiredTigerTestCase, suite_subprocess):
     # test that scenario for log cursors.
     nkeys = 7000
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('regular', dict(reopen=False)),
         ('reopen', dict(reopen=True))
     ])
diff --git a/test/suite/test_cursor08.py b/test/suite/test_cursor08.py
index 1a379518224..3f8f50defa7 100644
--- a/test/suite/test_cursor08.py
+++ b/test/suite/test_cursor08.py
@@ -33,7 +33,7 @@
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat, WiredTigerError
-from wtscenario import multiply_scenarios, number_scenarios, check_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess):
@@ -42,17 +42,17 @@ class test_cursor08(wttest.WiredTigerTestCase, suite_subprocess):
     uri = 'table:' + tablename
     nkeys = 500
 
-    reopens = check_scenarios([
+    reopens = [
         ('regular', dict(reopen=False)),
         ('reopen', dict(reopen=True))
-    ])
-    compress = check_scenarios([
+    ]
+    compress = [
         ('nop', dict(compress='nop')),
         ('snappy', dict(compress='snappy')),
         ('zlib', dict(compress='zlib')),
         ('none', dict(compress='none')),
-    ])
-    scenarios = number_scenarios(multiply_scenarios('.', reopens, compress))
+    ]
+    scenarios = make_scenarios(reopens, compress)
     # Load the compression extension, and enable it for logging.
     def conn_config(self, dir):
         return 'log=(archive=false,enabled,file_max=%s,' % self.logmax + \
diff --git a/test/suite/test_cursor09.py b/test/suite/test_cursor09.py
index b77336bc1d7..a05caea4f1f 100644
--- a/test/suite/test_cursor09.py
+++ b/test/suite/test_cursor09.py
@@ -29,12 +29,12 @@
 import wiredtiger, wttest
 from helper import key_populate, value_populate, simple_populate
 from helper import complex_populate, complex_value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor09.py
 #    JIRA WT-2217: insert resets key/value "set".
 class test_cursor09(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-r', dict(type='file:', config='key_format=r', complex=0)),
         ('file-S', dict(type='file:', config='key_format=S', complex=0)),
         ('lsm-S', dict(type='lsm:', config='key_format=S', complex=0)),
diff --git a/test/suite/test_cursor_compare.py b/test/suite/test_cursor_compare.py
index 130f4e8ca96..179e20682d2 100644
--- a/test/suite/test_cursor_compare.py
+++ b/test/suite/test_cursor_compare.py
@@ -29,7 +29,7 @@
 import wiredtiger, wttest, exceptions
 from helper import complex_populate, simple_populate, key_populate
 from helper import complex_populate_index_name
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test cursor comparisons.
 class test_cursor_comparison(wttest.WiredTigerTestCase):
@@ -45,7 +45,7 @@ class test_cursor_comparison(wttest.WiredTigerTestCase):
         ('recno', dict(keyfmt='r')),
         ('string', dict(keyfmt='S'))
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+    scenarios = make_scenarios(types, keyfmt)
 
     def test_cursor_comparison(self):
         uri = self.type + 'compare'
diff --git a/test/suite/test_cursor_pin.py b/test/suite/test_cursor_pin.py
index 329759d8fc8..1aea49c32b0 100644
--- a/test/suite/test_cursor_pin.py
+++ b/test/suite/test_cursor_pin.py
@@ -28,7 +28,7 @@
 
 import wiredtiger, wttest
 from helper import simple_populate, key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor_pin.py
 #       Smoke-test fast-path searching for pinned pages before re-descending
@@ -37,7 +37,7 @@ class test_cursor_pin(wttest.WiredTigerTestCase):
     uri = 'file:cursor_pin'
     nentries = 10000
     config = 'allocation_size=512,leaf_page_max=512,value_format=S,key_format='
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('recno', dict(keyfmt='r')),
         ('string', dict(keyfmt='S')),
     ])
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index 16ce5cae685..8d7c230043b 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -29,7 +29,7 @@
 import wiredtiger, wttest
 from helper import complex_populate, simple_populate
 from helper import key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor_random.py
 #    Cursor next_random operations
@@ -42,7 +42,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
         ('sample', dict(config='next_random=true,next_random_sample_size=35')),
         ('not-sample', dict(config='next_random=true'))
     ]
-    scenarios =number_scenarios(multiply_scenarios('.', types, config))
+    scenarios = make_scenarios(types, config)
 
     # Check that opening a random cursor on a row-store returns not-supported
     # for methods other than next, reconfigure and reset, and next returns
@@ -136,7 +136,7 @@ class test_cursor_random(wttest.WiredTigerTestCase):
 
 # Check that opening a random cursor on column-store returns not-supported.
 class test_cursor_random_column(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:random')),
         ('table', dict(uri='table:random'))
     ])
@@ -159,7 +159,7 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
         ('sample', dict(config='next_random=true,next_random_sample_size=35')),
         ('not-sample', dict(config='next_random=true'))
     ]
-    scenarios =number_scenarios(multiply_scenarios('.', types, config))
+    scenarios = make_scenarios(types, config)
 
     def test_cursor_random_invisible_all(self):
         uri = self.type
diff --git a/test/suite/test_cursor_random02.py b/test/suite/test_cursor_random02.py
index 84ac0279fc4..93aa97f2282 100644
--- a/test/suite/test_cursor_random02.py
+++ b/test/suite/test_cursor_random02.py
@@ -29,7 +29,7 @@
 import wiredtiger, wttest
 from helper import complex_populate, simple_populate
 from helper import key_populate, value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_cursor_random02.py
 #    Cursor next_random operations
@@ -46,7 +46,7 @@ class test_cursor_random02(wttest.WiredTigerTestCase):
         ('10000', dict(records=10000)),
         ('50000', dict(records=50000)),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', config, records))
+    scenarios = make_scenarios(config, records)
 
     # Check that next_random works in the presence of a larger set of values,
     # where the values are in an insert list.
diff --git a/test/suite/test_drop.py b/test/suite/test_drop.py
index 52ea7251ab5..a3e80214295 100644
--- a/test/suite/test_drop.py
+++ b/test/suite/test_drop.py
@@ -30,7 +30,7 @@ import os, time
 import wiredtiger, wttest
 from helper import confirm_does_not_exist, complex_populate, \
     complex_populate_index_name, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_drop.py
 #    session level drop operation
@@ -38,7 +38,7 @@ class test_drop(wttest.WiredTigerTestCase):
     name = 'test_drop'
     extra_config = ''
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:')),
         ('table', dict(uri='table:')),
         ('table-lsm', dict(uri='table:', extra_config=',type=lsm')),
diff --git a/test/suite/test_dump.py b/test/suite/test_dump.py
index 85196174c1b..280d5870359 100644
--- a/test/suite/test_dump.py
+++ b/test/suite/test_dump.py
@@ -30,9 +30,10 @@ import os, shutil
 import wiredtiger, wttest
 from helper import \
     complex_populate, complex_populate_check, \
-    simple_populate, simple_populate_check
+    simple_populate, simple_populate_check, \
+    simple_index_populate, simple_index_populate_check
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_dump.py
 #    Utilities: wt dump
@@ -64,6 +65,9 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
         ('table-simple', dict(uri='table:', config='', lsm=False,
           populate=simple_populate,
           populate_check=simple_populate_check)),
+        ('table-index', dict(uri='table:', config='', lsm=False,
+          populate=simple_index_populate,
+          populate_check=simple_index_populate_check)),
         ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
           populate=simple_populate,
           populate_check=simple_populate_check)),
@@ -74,8 +78,7 @@ class test_dump(wttest.WiredTigerTestCase, suite_subprocess):
           populate=complex_populate,
           populate_check=complex_populate_check))
     ]
-    scenarios = number_scenarios(
-        multiply_scenarios('.', types, keyfmt, dumpfmt))
+    scenarios = make_scenarios(types, keyfmt, dumpfmt)
 
     # Extract the values lines from the dump output.
     def value_lines(self, fname):
diff --git a/test/suite/test_dupc.py b/test/suite/test_dupc.py
index ec55a36df4c..12b18f1ba79 100644
--- a/test/suite/test_dupc.py
+++ b/test/suite/test_dupc.py
@@ -33,7 +33,7 @@
 import os, time
 import wiredtiger, wttest
 from helper import complex_populate, key_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # Test session.open_cursor with cursor duplication.
 class test_duplicate_cursor(wttest.WiredTigerTestCase):
@@ -42,7 +42,7 @@ class test_duplicate_cursor(wttest.WiredTigerTestCase):
 
     config = 'key_format='
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-r', dict(uri='file:', fmt='r')),
         ('file-S', dict(uri='file:', fmt='S')),
         ('table-r', dict(uri='table:', fmt='r')),
diff --git a/test/suite/test_durability01.py b/test/suite/test_durability01.py
index f578a79baf1..32cdd795914 100644
--- a/test/suite/test_durability01.py
+++ b/test/suite/test_durability01.py
@@ -34,7 +34,6 @@
 import fnmatch, os, shutil, time
 from helper import copy_wiredtiger_home
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
 import wttest
 
 class test_durability01(wttest.WiredTigerTestCase, suite_subprocess):
diff --git a/test/suite/test_empty.py b/test/suite/test_empty.py
index 50b79db70e4..9fe88107412 100644
--- a/test/suite/test_empty.py
+++ b/test/suite/test_empty.py
@@ -29,14 +29,14 @@
 import os
 import wiredtiger, wttest
 from helper import key_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_empty.py
 #       Test that empty objects don't write anything other than a single sector.
 class test_empty(wttest.WiredTigerTestCase):
     name = 'test_empty'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-r', dict(type='file:', fmt='r')),
         ('file-S', dict(type='file:', fmt='S')),
         ('table-r', dict(type='table:', fmt='r')),
diff --git a/test/suite/test_encrypt01.py b/test/suite/test_encrypt01.py
index 0f2782204d2..d48605aaa83 100644
--- a/test/suite/test_encrypt01.py
+++ b/test/suite/test_encrypt01.py
@@ -32,7 +32,7 @@
 
 import os, run, random
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test basic encryption
 class test_encrypt01(wttest.WiredTigerTestCase):
@@ -60,8 +60,7 @@ class test_encrypt01(wttest.WiredTigerTestCase):
         ('none-snappy', dict(log_compress=None, block_compress='snappy')),
         ('snappy-lz4', dict(log_compress='snappy', block_compress='lz4')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types,
-                                                    encrypt, compress))
+    scenarios = make_scenarios(types, encrypt, compress)
 
     nrecords = 5000
     bigvalue = "abcdefghij" * 1001    # len(bigvalue) = 10010
diff --git a/test/suite/test_encrypt02.py b/test/suite/test_encrypt02.py
index 0376b3e42e4..648686274c4 100644
--- a/test/suite/test_encrypt02.py
+++ b/test/suite/test_encrypt02.py
@@ -33,7 +33,7 @@
 import os, run, random
 import wiredtiger, wttest
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test basic encryption
 class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess):
@@ -48,7 +48,7 @@ class test_encrypt02(wttest.WiredTigerTestCase, suite_subprocess):
         ('keyid-pass', dict( encrypt='rotn', encrypt_args='name=rotn,keyid=11',
                         secret_arg='ABC')),
     ]
-    scenarios = number_scenarios(encrypt_type)
+    scenarios = make_scenarios(encrypt_type)
 
     nrecords = 5000
     bigvalue = "abcdefghij" * 1001    # len(bigvalue) = 10010
diff --git a/test/suite/test_encrypt03.py b/test/suite/test_encrypt03.py
index 702d0a2369f..0dc1755d6eb 100644
--- a/test/suite/test_encrypt03.py
+++ b/test/suite/test_encrypt03.py
@@ -32,7 +32,7 @@
 
 import os, run, random
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test basic encryption
 class test_encrypt03(wttest.WiredTigerTestCase):
@@ -48,7 +48,7 @@ class test_encrypt03(wttest.WiredTigerTestCase):
         #('noname', dict( sys_encrypt='rotn', sys_encrypt_args=',keyid=11',
         #    file_encrypt='none', file_encrypt_args=',keyid=13')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, encrypt))
+    scenarios = make_scenarios(types, encrypt)
 
     # Override WiredTigerTestCase, we have extensions.
     def setUpConnectionOpen(self, dir):
diff --git a/test/suite/test_encrypt04.py b/test/suite/test_encrypt04.py
index d7c12d2cba8..97d2cee03a0 100644
--- a/test/suite/test_encrypt04.py
+++ b/test/suite/test_encrypt04.py
@@ -32,7 +32,7 @@
 
 import os, run, random
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 from suite_subprocess import suite_subprocess
 
 # Test basic encryption with mismatched configuration
@@ -69,8 +69,7 @@ class test_encrypt04(wttest.WiredTigerTestCase, suite_subprocess):
         ('rotn11xyz_and_clear', dict( name2='rotn', keyid2='11',
                                       secretkey2='XYZ', fileinclear2=True))
     ]
-    scenarios = number_scenarios(multiply_scenarios \
-                                 ('.', encrypt_scen_1, encrypt_scen_2))
+    scenarios = make_scenarios(encrypt_scen_1, encrypt_scen_2)
     nrecords = 5000
     bigvalue = "abcdefghij" * 1001    # len(bigvalue) = 10010
 
diff --git a/test/suite/test_encrypt05.py b/test/suite/test_encrypt05.py
index afd8a8103f9..19a3522b3d5 100644
--- a/test/suite/test_encrypt05.py
+++ b/test/suite/test_encrypt05.py
@@ -32,7 +32,7 @@
 
 import os, run, random
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test raw compression with encryption
 class test_encrypt05(wttest.WiredTigerTestCase):
@@ -44,8 +44,7 @@ class test_encrypt05(wttest.WiredTigerTestCase):
     compress = [
         ('zlib', dict(log_compress='zlib', block_compress='zlib')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.',
-                                                    encrypt, compress))
+    scenarios = make_scenarios(encrypt, compress)
 
     nrecords = 500
     bigvalue = 'a' * 500 # we use values that will definitely give compression
diff --git a/test/suite/test_encrypt06.py b/test/suite/test_encrypt06.py
index 5c88b698aeb..9300583d099 100644
--- a/test/suite/test_encrypt06.py
+++ b/test/suite/test_encrypt06.py
@@ -32,7 +32,7 @@
 
 import os, run, random
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test encryption, when on, does not leak any information
 class test_encrypt06(wttest.WiredTigerTestCase):
@@ -86,7 +86,7 @@ class test_encrypt06(wttest.WiredTigerTestCase):
             file0_encrypt='rotn', file0_encrypt_args=key13, encrypt0=True,
             file1_encrypt='none', file1_encrypt_args='', encrypt1=False)),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', encrypt, storagetype))
+    scenarios = make_scenarios(encrypt, storagetype)
     nrecords = 1000
 
     # Override WiredTigerTestCase, we have extensions.
diff --git a/test/suite/test_encrypt07.py b/test/suite/test_encrypt07.py
index 30f28e096a8..97ab1987d4f 100644
--- a/test/suite/test_encrypt07.py
+++ b/test/suite/test_encrypt07.py
@@ -32,7 +32,6 @@
 
 import os, run, string, codecs
 import wiredtiger, wttest
-from wtscenario import multiply_scenarios, number_scenarios
 import test_salvage
 
 # Run the regular salvage test, but with encryption on
diff --git a/test/suite/test_excl.py b/test/suite/test_excl.py
index 90926f51877..cea5756dfbb 100644
--- a/test/suite/test_excl.py
+++ b/test/suite/test_excl.py
@@ -27,11 +27,11 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # Test session.create with the exclusive configuration.
 class test_create_excl(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(type='file:')),
         ('table', dict(type='table:'))
     ])
diff --git a/test/suite/test_huffman01.py b/test/suite/test_huffman01.py
index d71198e3151..be307550f2e 100644
--- a/test/suite/test_huffman01.py
+++ b/test/suite/test_huffman01.py
@@ -28,7 +28,7 @@
 
 import os
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wiredtiger, wttest
 
 # test_huffman01.py
@@ -52,7 +52,7 @@ class test_huffman01(wttest.WiredTigerTestCase, suite_subprocess):
         ('utf8', dict(huffval=',huffman_value=utf8t8file',vfile='t8file')),
         ('utf16', dict(huffval=',huffman_value=utf16t16file',vfile='t16file')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', huffkey, huffval))
+    scenarios = make_scenarios(huffkey, huffval)
 
     def test_huffman(self):
         dir = self.conn.get_home()
diff --git a/test/suite/test_huffman02.py b/test/suite/test_huffman02.py
index aa4329415a4..d74704daf58 100644
--- a/test/suite/test_huffman02.py
+++ b/test/suite/test_huffman02.py
@@ -28,7 +28,7 @@
 
 import os
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wiredtiger, wttest
 
 # test_huffman02.py
@@ -48,7 +48,7 @@ class test_huffman02(wttest.WiredTigerTestCase, suite_subprocess):
         ('file', dict(uri='file:huff')),
         ('table', dict(uri='table:huff')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.',type,huffkey, huffval))
+    scenarios = make_scenarios(type, huffkey, huffval)
 
     def test_huffman(self):
         if self.keybad or self.valbad:
diff --git a/test/suite/test_index02.py b/test/suite/test_index02.py
new file mode 100644
index 00000000000..9f39df003b1
--- /dev/null
+++ b/test/suite/test_index02.py
@@ -0,0 +1,68 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2016 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_index02.py
+#    test search_near in indices
+class test_index02(wttest.WiredTigerTestCase):
+    '''Test search_near in indices'''
+
+    basename = 'test_index02'
+    tablename = 'table:' + basename
+    indexname = 'index:' + basename + ":inverse"
+
+    def test_search_near(self):
+        '''Create a table, look for a nonexistent key'''
+        self.session.create(self.tablename, 'key_format=r,value_format=Q,columns=(k,v)')
+        self.session.create(self.indexname, 'columns=(v)')
+        cur = self.session.open_cursor(self.tablename, None, "append")
+        cur.set_value(1)
+        cur.insert()
+        cur.set_value(5)
+        cur.insert()
+        cur.set_value(5)
+        cur.insert()
+        cur.set_value(5)
+        cur.insert()
+        cur.set_value(10)
+        cur.insert()
+
+        # search near should find a match
+        cur2 = self.session.open_cursor(self.indexname, None, None)
+        cur2.set_key(5)
+        self.assertEqual(cur2.search_near(), 0)
+
+        # Retry after reopening
+        self.reopen_conn()
+        cur3 = self.session.open_cursor(self.indexname, None, None)
+        cur3.set_key(5)
+        self.assertEqual(cur3.search_near(), 0)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/test/suite/test_inmem01.py b/test/suite/test_inmem01.py
index 875ebb2bfa7..c6ae7ff6c4b 100644
--- a/test/suite/test_inmem01.py
+++ b/test/suite/test_inmem01.py
@@ -30,95 +30,73 @@ import wiredtiger, wttest
 from time import sleep
 from helper import simple_populate, simple_populate_check
 from helper import key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_inmem01.py
 #    Test in-memory configuration.
 class test_inmem01(wttest.WiredTigerTestCase):
-    name = 'inmem01'
-    """
-    In memory configuration still creates files on disk, but has limits
-    in terms of how much data can be written.
-    Test various scenarios including:
-     - Add a small amount of data, ensure it is present.
-     - Add more data than would fit into the configured cache.
-     - Fill the cache with data, remove some data, ensure more data can be
-       inserted (after a reasonable amount of time for space to be reclaimed)
-     - Run queries after adding, removing and re-inserting data.
-     - Try out keeping a cursor open while adding new data.
-    """
-    scenarios = check_scenarios([
-        ('col', dict(tablekind='col')),
-        # Fixed length is very slow, disable it for now
-        #('fix', dict(tablekind='fix')),
-        ('row', dict(tablekind='row'))
-    ])
-
-    # create an in-memory database
-    conn_config = 'cache_size=5MB,' + \
-                  'file_manager=(close_idle_time=0),in_memory=true'
+    uri = 'table:inmem01'
+    conn_config = \
+        'cache_size=5MB,file_manager=(close_idle_time=0),in_memory=true'
+    table_config = ',memory_page_max=32k,leaf_page_max=4k'
 
-    def get_table_config(self):
-        kf = 'key_format='
-        vf = 'value_format='
-        if self.tablekind == 'row':
-            kf = kf + 'S'
-        else:
-            kf = kf + 'r'  # record format
-        if self.tablekind == 'fix':
-            vf = vf + '8t'
-        else:
-            vf = vf + 'S'
-        return 'memory_page_max=32k,leaf_page_max=4k,' + kf + ',' + vf
+    scenarios = make_scenarios([
+        ('col', dict(fmt='key_format=r,value_format=S')),
+        ('fix', dict(fmt='key_format=r,value_format=8t')),
+        ('row', dict(fmt='key_format=S,value_format=S'))
+    ])
 
+    # Smoke-test in-memory configurations, add a small amount of data and
+    # ensure it's visible.
     def test_insert(self):
-        table_config = self.get_table_config()
-        simple_populate(self,
-            "table:" + self.name, table_config, 1000)
-        # Ensure the data is visible.
-        simple_populate_check(self, 'table:' + self.name, 1000)
+        config = self.fmt + self.table_config
+        simple_populate(self, self.uri, config, 1000)
+        simple_populate_check(self, self.uri, 1000)
 
+    # Add more data than fits into the configured cache and verify it fails.
     def test_insert_over_capacity(self):
-        table_config = self.get_table_config()
+        config = self.fmt + self.table_config
         msg = '/WT_CACHE_FULL.*/'
         self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
-            lambda:simple_populate(self,
-                "table:" + self.name, table_config, 10000000), msg)
+            lambda:simple_populate(self, self.uri, config, 10000000), msg)
 
-        # Figure out the last key we inserted.
-        cursor = self.session.open_cursor('table:' + self.name, None)
+        # Figure out the last key we successfully inserted, and check all
+        # previous inserts are still there.
+        cursor = self.session.open_cursor(self.uri, None)
         cursor.prev()
         last_key = int(cursor.get_key())
-        simple_populate_check(self, 'table:' + self.name, last_key)
+        simple_populate_check(self, self.uri, last_key)
 
+    # Fill the cache with data, remove some data, ensure more data can be
+    # inserted (after a reasonable amount of time for space to be reclaimed).
     def test_insert_over_delete(self):
-        table_config = self.get_table_config()
+        config = self.fmt + self.table_config
         msg = '/WT_CACHE_FULL.*/'
         self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
-            lambda:simple_populate(self,
-                "table:" + self.name, table_config, 10000000), msg)
+            lambda:simple_populate(self, self.uri, config, 10000000), msg)
 
         # Now that the database contains as much data as will fit into
         # the configured cache, verify removes succeed.
-        cursor = self.session.open_cursor('table:' + self.name, None)
+        cursor = self.session.open_cursor(self.uri, None)
         for i in range(1, 100):
             cursor.set_key(key_populate(cursor, i))
             cursor.remove()
 
+    # Run queries after adding, removing and re-inserting data.
+    # Try out keeping a cursor open while adding new data.
     def test_insert_over_delete_replace(self):
-        table_config = self.get_table_config()
+        config = self.fmt + self.table_config
         msg = '/WT_CACHE_FULL.*/'
         self.assertRaisesHavingMessage(wiredtiger.WiredTigerError,
-            lambda:simple_populate(self,
-                "table:" + self.name, table_config, 10000000), msg)
+            lambda:simple_populate(self, self.uri, config, 10000000), msg)
 
-        cursor = self.session.open_cursor('table:' + self.name, None)
+        cursor = self.session.open_cursor(self.uri, None)
         cursor.prev()
         last_key = int(cursor.get_key())
 
         # Now that the database contains as much data as will fit into
         # the configured cache, verify removes succeed.
-        cursor = self.session.open_cursor('table:' + self.name, None)
+        cursor = self.session.open_cursor(self.uri, None)
         for i in range(1, last_key / 4, 1):
             cursor.set_key(key_populate(cursor, i))
             cursor.remove()
diff --git a/test/suite/test_intpack.py b/test/suite/test_intpack.py
index 187b2d7f579..b0cece09494 100644
--- a/test/suite/test_intpack.py
+++ b/test/suite/test_intpack.py
@@ -31,7 +31,7 @@
 #
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 class PackTester:
     def __init__(self, formatcode, validlow, validhigh, equals):
@@ -126,22 +126,27 @@ class PackTester:
 class test_intpack(wttest.WiredTigerTestCase):
     name = 'test_intpack'
 
-    scenarios = check_scenarios([
-        ('b', dict(formatcode='b', low=-128, high=127, nbits=8)),
-        ('B', dict(formatcode='B', low=0, high=255, nbits=8)),
-        ('8t', dict(formatcode='8t', low=0, high=255, nbits=8)),
-        ('5t', dict(formatcode='5t', low=0, high=31, nbits=5)),
-        ('h', dict(formatcode='h', low=-32768, high=32767, nbits=16)),
-        ('H', dict(formatcode='H', low=0, high=65535, nbits=16)),
-        ('i', dict(formatcode='i', low=-2147483648, high=2147483647, nbits=32)),
-        ('I', dict(formatcode='I', low=0, high=4294967295, nbits=32)),
-        ('l', dict(formatcode='l', low=-2147483648, high=2147483647, nbits=32)),
-        ('L', dict(formatcode='L', low=0, high=4294967295, nbits=32)),
-        ('q', dict(formatcode='q', low=-9223372036854775808,
+    # We have to be a bit verbose here with naming, as there can be problems with
+    # case insensitive test names:w
+
+    scenarios = make_scenarios([
+        ('int8_t_b', dict(formatcode='b', low=-128, high=127, nbits=8)),
+        ('uint8_t_B', dict(formatcode='B', low=0, high=255, nbits=8)),
+        ('fix_len_8t', dict(formatcode='8t', low=0, high=255, nbits=8)),
+        ('fix_len_5t', dict(formatcode='5t', low=0, high=31, nbits=5)),
+        ('int16_t_h', dict(formatcode='h', low=-32768, high=32767, nbits=16)),
+        ('uint16_t_H', dict(formatcode='H', low=0, high=65535, nbits=16)),
+        ('int32_t_i', dict(formatcode='i', low=-2147483648, high=2147483647,
+                   nbits=32)),
+        ('uint32_t_I', dict(formatcode='I', low=0, high=4294967295, nbits=32)),
+        ('int32_t_l', dict(formatcode='l', low=-2147483648, high=2147483647,
+                   nbits=32)),
+        ('uint32_t_L', dict(formatcode='L', low=0, high=4294967295, nbits=32)),
+        ('int64_t_q', dict(formatcode='q', low=-9223372036854775808,
                    high=9223372036854775807, nbits=64)),
-        ('Q', dict(formatcode='Q', low=0, high=18446744073709551615, nbits=64)),
+        ('uint64_t_Q', dict(formatcode='Q', low=0, high=18446744073709551615,
+                   nbits=64)),
     ])
-    scenarios = check_scenarios(number_scenarios(scenarios))
 
     def test_packing(self):
         pt = PackTester(self.formatcode, self.low, self.high, self.assertEquals)
diff --git a/test/suite/test_join01.py b/test/suite/test_join01.py
index f8d96a2718a..f3b13026896 100644
--- a/test/suite/test_join01.py
+++ b/test/suite/test_join01.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_join01.py
 #    Join operations
@@ -67,11 +67,9 @@ class test_join01(wttest.WiredTigerTestCase):
         ('order=2', dict(join_order=2)),
         ('order=3', dict(join_order=3)),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', type_scen,
-                                                    bloom0_scen, bloom1_scen,
-                                                    projection_scen,
-                                                    nested_scen, stats_scen,
-                                                    order_scen))
+    scenarios = make_scenarios(type_scen, bloom0_scen, bloom1_scen,
+                               projection_scen, nested_scen, stats_scen,
+                               order_scen)
 
     # We need statistics for these tests.
     conn_config = 'statistics=(all)'
diff --git a/test/suite/test_join02.py b/test/suite/test_join02.py
index a691c499cf6..db11ed01039 100644
--- a/test/suite/test_join02.py
+++ b/test/suite/test_join02.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest, suite_random
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_join02.py
 #    Join operations
@@ -48,7 +48,7 @@ class test_join02(wttest.WiredTigerTestCase):
         ('nobloom', dict(usebloom=False))
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', keyscen, bloomscen))
+    scenarios = make_scenarios(keyscen, bloomscen)
 
     # Start our range from 1, since WT record numbers start at 1,
     # it makes things work out nicer.
diff --git a/test/suite/test_join03.py b/test/suite/test_join03.py
index 613d2396b07..af19d934d70 100644
--- a/test/suite/test_join03.py
+++ b/test/suite/test_join03.py
@@ -28,7 +28,6 @@
 
 import os
 import wiredtiger, wttest, run
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
 
 # test_join03.py
 #    Join operations
diff --git a/test/suite/test_join04.py b/test/suite/test_join04.py
index 7e2afb15285..b270cb7a21c 100644
--- a/test/suite/test_join04.py
+++ b/test/suite/test_join04.py
@@ -28,7 +28,6 @@
 
 import os
 import wiredtiger, wttest, run
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
 
 # test_join04.py
 #    Join operations
diff --git a/test/suite/test_join05.py b/test/suite/test_join05.py
index ef2be4c6460..7dcb3e08911 100644
--- a/test/suite/test_join05.py
+++ b/test/suite/test_join05.py
@@ -27,7 +27,6 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
 
 # test_join05.py
 #    Tests based on JIRA reports
diff --git a/test/suite/test_join06.py b/test/suite/test_join06.py
index 9af6f93792f..5fedd365712 100644
--- a/test/suite/test_join06.py
+++ b/test/suite/test_join06.py
@@ -28,7 +28,7 @@
 
 import os
 import wiredtiger, wttest, run
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_join06.py
 #    Join operations
@@ -46,7 +46,7 @@ class test_join06(wttest.WiredTigerTestCase):
         ('nobloom', dict(bloom=False))
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', isoscen, bloomscen))
+    scenarios = make_scenarios(isoscen, bloomscen)
 
     def gen_values(self, i):
         s = str(i)                    # 345 => "345"
diff --git a/test/suite/test_join07.py b/test/suite/test_join07.py
index 36e91361329..2a32e678d72 100644
--- a/test/suite/test_join07.py
+++ b/test/suite/test_join07.py
@@ -28,7 +28,7 @@
 
 import os, re, run
 import wiredtiger, wttest, suite_random
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 class ParseException(Exception):
     def __init__(self, msg):
@@ -198,7 +198,7 @@ class test_join07(wttest.WiredTigerTestCase):
         ('noextractor', dict(extractor=False))
     ]
 
-    scenarios = number_scenarios(extractscen)
+    scenarios = make_scenarios(extractscen)
 
     # Return the wiredtiger_open extension argument for a shared library.
     def extensionArg(self, exts):
diff --git a/test/suite/test_join08.py b/test/suite/test_join08.py
index 6d674ab8193..d389fad706b 100644
--- a/test/suite/test_join08.py
+++ b/test/suite/test_join08.py
@@ -27,7 +27,6 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
 
 # test_join08.py
 #    Test join error paths
diff --git a/test/suite/test_jsondump01.py b/test/suite/test_jsondump01.py
index 10262edc777..dc8027c2115 100644
--- a/test/suite/test_jsondump01.py
+++ b/test/suite/test_jsondump01.py
@@ -29,10 +29,12 @@
 import os, json
 import wiredtiger, wttest
 from helper import \
-    complex_populate, complex_populate_check_cursor,\
-    simple_populate, simple_populate_check_cursor
+    complex_populate, complex_populate_check, complex_populate_check_cursor,\
+    simple_populate, simple_populate_check, simple_populate_check_cursor, \
+    simple_index_populate, simple_index_populate_check, \
+    simple_index_populate_check_cursor, compare_files
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # A 'fake' cursor based on a set of rows.
 # It emulates a WT cursor well enough for the *_check_cursor methods.
@@ -79,25 +81,34 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
     types = [
         ('file', dict(uri='file:', config='', lsm=False,
           populate=simple_populate,
-          populate_check=simple_populate_check_cursor)),
+          populate_check=simple_populate_check,
+          populate_check_cursor=simple_populate_check_cursor)),
         ('lsm', dict(uri='lsm:', config='', lsm=True,
           populate=simple_populate,
-          populate_check=simple_populate_check_cursor)),
+          populate_check=simple_populate_check,
+          populate_check_cursor=simple_populate_check_cursor)),
         ('table-simple', dict(uri='table:', config='', lsm=False,
           populate=simple_populate,
-          populate_check=simple_populate_check_cursor)),
+          populate_check=simple_populate_check,
+          populate_check_cursor=simple_populate_check_cursor)),
+        ('table-index', dict(uri='table:', config='', lsm=False,
+          populate=simple_index_populate,
+          populate_check=simple_index_populate_check,
+          populate_check_cursor=simple_index_populate_check_cursor)),
         ('table-simple-lsm', dict(uri='table:', config='type=lsm', lsm=True,
           populate=simple_populate,
-          populate_check=simple_populate_check_cursor)),
+          populate_check=simple_populate_check,
+          populate_check_cursor=simple_populate_check_cursor)),
         ('table-complex', dict(uri='table:', config='', lsm=False,
           populate=complex_populate,
-          populate_check=complex_populate_check_cursor)),
+          populate_check=complex_populate_check,
+          populate_check_cursor=complex_populate_check_cursor)),
         ('table-complex-lsm', dict(uri='table:', config='type=lsm', lsm=True,
           populate=complex_populate,
-          populate_check=complex_populate_check_cursor))
+          populate_check=complex_populate_check,
+          populate_check_cursor=complex_populate_check_cursor))
     ]
-    scenarios = number_scenarios(
-        multiply_scenarios('.', types, keyfmt))
+    scenarios = make_scenarios(types, keyfmt)
 
     # Dump using util, re-load using python's JSON, and do a content comparison.
     def test_jsondump_util(self):
@@ -132,7 +143,7 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
         cursor = self.session.open_cursor(uri, None)
         fake = FakeCursor(cursor.key_format, cursor.value_format, data)
         cursor.close()
-        self.populate_check(self, fake, self.nentries)
+        self.populate_check_cursor(self, fake, self.nentries)
 
     # Dump using util, re-load using python's JSON, and do a content comparison.
     def test_jsonload_util(self):
@@ -153,9 +164,18 @@ class test_jsondump01(wttest.WiredTigerTestCase, suite_subprocess):
             loadcmd.append('-a')
         self.runWt(loadcmd)
 
-        # check the contents of the data we read.
-        cursor = self.session.open_cursor(uri2, None)
-        self.populate_check(self, cursor, self.nentries)
+        # Check the contents of the data we read.
+        self.populate_check(self, uri2, self.nentries)
+
+        # Reload into the original uri, and dump into another file.
+        self.session.drop(uri, None)
+        self.session.drop(uri2, None)
+        self.runWt(['load', '-jf', 'jsondump.out'])
+        self.runWt(['dump', '-j', uri], outfilename='jsondump2.out')
+
+        # Compare the two outputs, and check the content again.
+        compare_files(self, 'jsondump.out', 'jsondump2.out')
+        self.populate_check(self, uri, self.nentries)
 
 if __name__ == '__main__':
     wttest.run()
diff --git a/test/suite/test_lsm01.py b/test/suite/test_lsm01.py
index 1f89cf38d77..f6cee20e896 100644
--- a/test/suite/test_lsm01.py
+++ b/test/suite/test_lsm01.py
@@ -54,12 +54,10 @@ class test_lsm01(wttest.WiredTigerTestCase):
     config_vars = [ 'chunk_size', 'merge_max', 'bloom',
                     'bloom_bit_count', 'bloom_hash_count' ]
 
-    all_scenarios = wtscenario.multiply_scenarios('_',
+    scenarios = wtscenario.make_scenarios(
         chunk_size_scenarios, merge_max_scenarios, bloom_scenarios,
-        bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios)
-
-    scenarios = wtscenario.prune_scenarios(all_scenarios, 500)
-    scenarios = wtscenario.number_scenarios(scenarios)
+        bloom_bit_scenarios, bloom_hash_scenarios, record_count_scenarios,
+        prune=500)
 
     # Test drop of an object.
     def test_lsm(self):
diff --git a/test/suite/test_metadata_cursor01.py b/test/suite/test_metadata_cursor01.py
index e759c14f846..7802f89f174 100644
--- a/test/suite/test_metadata_cursor01.py
+++ b/test/suite/test_metadata_cursor01.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_metadata_cursor01.py
 #    Metadata cursor operations
@@ -39,7 +39,7 @@ class test_metadata_cursor01(wttest.WiredTigerTestCase):
     """
     table_name1 = 'test_metadata_cursor01'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('plain', {'metauri' : 'metadata:'}),
         ('create', {'metauri' : 'metadata:create'}),
     ])
diff --git a/test/suite/test_nsnap01.py b/test/suite/test_nsnap01.py
index 5207b577ba4..7e8951750f8 100644
--- a/test/suite/test_nsnap01.py
+++ b/test/suite/test_nsnap01.py
@@ -30,7 +30,6 @@
 #   Named snapshots: basic API
 
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
 from helper import simple_populate
 import wiredtiger, wttest
 
diff --git a/test/suite/test_nsnap02.py b/test/suite/test_nsnap02.py
index e4ed65ef72a..510c9d421ef 100644
--- a/test/suite/test_nsnap02.py
+++ b/test/suite/test_nsnap02.py
@@ -30,7 +30,6 @@
 #   Named snapshots: Combinations of dropping snapshots
 
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
 from helper import simple_populate
 import wiredtiger, wttest
 
diff --git a/test/suite/test_nsnap03.py b/test/suite/test_nsnap03.py
index 0e853522940..3986c0c1a0a 100644
--- a/test/suite/test_nsnap03.py
+++ b/test/suite/test_nsnap03.py
@@ -30,7 +30,6 @@
 #   Named snapshots: Access and create from multiple sessions
 
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
 from helper import simple_populate
 import wiredtiger, wttest
 
diff --git a/test/suite/test_nsnap04.py b/test/suite/test_nsnap04.py
index e8a5c9b6140..f9ef26b5600 100644
--- a/test/suite/test_nsnap04.py
+++ b/test/suite/test_nsnap04.py
@@ -30,7 +30,6 @@
 #   Named snapshots: Create snapshot from running transaction
 
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
 from helper import simple_populate
 import wiredtiger, wttest
 
diff --git a/test/suite/test_overwrite.py b/test/suite/test_overwrite.py
index e22cdab4dea..4972a016bec 100644
--- a/test/suite/test_overwrite.py
+++ b/test/suite/test_overwrite.py
@@ -28,13 +28,13 @@
 
 import wiredtiger, wttest
 from helper import key_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_overwrite.py
 #    cursor overwrite configuration method
 class test_overwrite(wttest.WiredTigerTestCase):
     name = 'overwrite'
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file-r', dict(type='file:',keyfmt='r')),
         ('file-S', dict(type='file:',keyfmt='S')),
         ('lsm-S', dict(type='lsm:',keyfmt='S')),
diff --git a/test/suite/test_perf001.py b/test/suite/test_perf001.py
index 1280639c9dd..b22ed2baeb0 100644
--- a/test/suite/test_perf001.py
+++ b/test/suite/test_perf001.py
@@ -32,13 +32,13 @@
 import wiredtiger, wttest
 import random
 from time import clock, time
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # Test performance of inserting into a table with an index.
 class test_perf001(wttest.WiredTigerTestCase):
     table_name = 'test_perf001'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         #('file-file', dict(tabletype='file',indextype='file')),
         ('file-lsm', dict(tabletype='file',indextype='lsm')),
         #('lsm-file', dict(tabletype='lsm',indextype='file')),
diff --git a/test/suite/test_readonly01.py b/test/suite/test_readonly01.py
index 59e9743ab7e..e4b431ca1da 100644
--- a/test/suite/test_readonly01.py
+++ b/test/suite/test_readonly01.py
@@ -32,7 +32,7 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess):
@@ -73,8 +73,7 @@ class test_readonly01(wttest.WiredTigerTestCase, suite_subprocess):
                     create_params = 'key_format=r,value_format=8t')),
     ]
 
-    scenarios = multiply_scenarios('.',
-        basecfg_list, dir_list, log_list, types)
+    scenarios = make_scenarios(basecfg_list, dir_list, log_list, types)
 
     def conn_config(self, dir):
         self.home = dir
diff --git a/test/suite/test_rebalance.py b/test/suite/test_rebalance.py
index f2167e864c9..98bd81de602 100644
--- a/test/suite/test_rebalance.py
+++ b/test/suite/test_rebalance.py
@@ -29,7 +29,7 @@
 import os, time
 import wiredtiger, wttest
 from helper import complex_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_rebalance.py
 #    session level rebalance operation
@@ -41,7 +41,7 @@ class test_rebalance(wttest.WiredTigerTestCase):
     config = 'key_format=S,allocation_size=512,internal_page_max=512' + \
              ',leaf_page_max=1k,lsm=(chunk_size=512k,merge_min=10)'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:')),
         ('table', dict(uri='table:')),
         ('lsm', dict(uri='lsm:'))
diff --git a/test/suite/test_reconfig01.py b/test/suite/test_reconfig01.py
index 876de1fe5af..fb3fb7edac6 100644
--- a/test/suite/test_reconfig01.py
+++ b/test/suite/test_reconfig01.py
@@ -92,22 +92,25 @@ class test_reconfig01(wttest.WiredTigerTestCase):
         self.conn.reconfigure("checkpoint=(wait=5)")
         self.conn.reconfigure("checkpoint=(log_size=0)")
         self.conn.reconfigure("checkpoint=(log_size=1M)")
-        self.conn.reconfigure("checkpoint=(wait=0,name=hi)")
-        self.conn.reconfigure("checkpoint=(wait=5,name=hi)")
 
-    def test_reconfig_stat_log(self):
+    # Statistics logging: reconfigure the things we can reconfigure.
+    def test_reconfig_statistics_log_ok(self):
         self.conn.reconfigure("statistics=[all],statistics_log=(wait=0)")
         self.conn.reconfigure("statistics_log=(wait=0)")
-        self.conn.reconfigure("statistics_log=(wait=2)")
+        self.conn.reconfigure("statistics_log=(wait=2,json=true)")
+        self.conn.reconfigure("statistics_log=(wait=0)")
+        self.conn.reconfigure("statistics_log=(wait=2,on_close=true)")
         self.conn.reconfigure("statistics_log=(wait=0)")
         self.conn.reconfigure("statistics_log=(wait=2,sources=[lsm:])")
         self.conn.reconfigure("statistics_log=(wait=0)")
         self.conn.reconfigure("statistics_log=(wait=2,timestamp=\"t%b %d\")")
         self.conn.reconfigure("statistics_log=(wait=0)")
-        self.conn.reconfigure("statistics_log=(wait=2,path=\"wts.%d.%H\")")
-        self.conn.reconfigure("statistics_log=(wait=0)")
-        self.conn.reconfigure(
-             "statistics_log=(wait=2,sources=[lsm:],timestamp=\"%b\")")
+
+    # Statistics logging: reconfigure the things we can't reconfigure.
+    def test_reconfig_statistics_log_fail(self):
+        msg = '/unknown configuration key/'
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.conn.reconfigure("log=(path=foo)"), msg)
 
     def test_file_manager(self):
         self.conn.reconfigure("file_manager=(close_scan_interval=3)")
diff --git a/test/suite/test_reconfig02.py b/test/suite/test_reconfig02.py
index 85a9ceb2a34..9d9ac220aa7 100644
--- a/test/suite/test_reconfig02.py
+++ b/test/suite/test_reconfig02.py
@@ -41,24 +41,29 @@ class test_reconfig02(wttest.WiredTigerTestCase):
         self.conn_config = self.init_config
         return wttest.WiredTigerTestCase.setUpConnectionOpen(self, dir)
 
-    # Call reconfigure for zero filling a file.  There is nothing
-    # we can actually look for to confirm it did anything.
-    # Also changing the log file size is a no-op, but should not fail.
+    # Logging: reconfigure the things we can reconfigure.
     def test_reconfig02_simple(self):
+        self.conn.reconfigure("log=(archive=false)")
+        self.conn.reconfigure("log=(prealloc=false)")
+        self.conn.reconfigure("log=(zero_fill=false)")
+
+        self.conn.reconfigure("log=(archive=true)")
+        self.conn.reconfigure("log=(prealloc=true)")
         self.conn.reconfigure("log=(zero_fill=true)")
-        self.conn.reconfigure("log=(file_max=1MB)")
 
-    # Test that we get an error if we try to turn logging off.
+    # Logging: reconfigure the things we can't reconfigure.
     def test_reconfig02_disable(self):
-        msg = 'Invalid argument'
-        gotException = False
-        try:
-            self.conn.reconfigure("log=(enabled=false)")
-        except wiredtiger.WiredTigerError as e:
-            gotException = True
-            self.pr('got exception: ' + str(e))
-            self.assertTrue(str(e).find(msg) >= 0)
-        self.assertTrue(gotException)
+        msg = '/unknown configuration key/'
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.conn.reconfigure("log=(enabled=true)"), msg)
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.conn.reconfigure("log=(compressor=foo)"), msg)
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.conn.reconfigure("log=(file_max=1MB)"), msg)
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.conn.reconfigure("log=(path=foo)"), msg)
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.conn.reconfigure("log=(recovery=true)"), msg)
 
     # Logging starts on, but prealloc is off.  Verify it is off.
     # Reconfigure it on and run again, making sure that log files
diff --git a/test/suite/test_rename.py b/test/suite/test_rename.py
index af968a4a38d..1979bbb802a 100644
--- a/test/suite/test_rename.py
+++ b/test/suite/test_rename.py
@@ -31,7 +31,7 @@ import wiredtiger, wttest
 from helper import confirm_does_not_exist,\
     complex_populate, complex_populate_check,\
     simple_populate, simple_populate_check
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_rename.py
 #    session level rename operation
@@ -39,7 +39,7 @@ class test_rename(wttest.WiredTigerTestCase):
     name1 = 'test_rename1'
     name2 = 'test_rename2'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:')),
         ('table', dict(uri='table:'))
     ])
diff --git a/test/suite/test_schema02.py b/test/suite/test_schema02.py
index b404261c066..bccc7dfc728 100644
--- a/test/suite/test_schema02.py
+++ b/test/suite/test_schema02.py
@@ -27,7 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_schema02.py
 #    Columns, column groups, indexes
@@ -37,7 +37,7 @@ class test_schema02(wttest.WiredTigerTestCase):
     """
     nentries = 1000
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('normal', { 'idx_config' : '' }),
         ('lsm', { 'idx_config' : ',type=lsm' }),
     ])
diff --git a/test/suite/test_schema03.py b/test/suite/test_schema03.py
index f48bfdf3cf8..81556393e78 100644
--- a/test/suite/test_schema03.py
+++ b/test/suite/test_schema03.py
@@ -29,7 +29,7 @@
 import os
 import suite_random
 import wiredtiger, wtscenario, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 try:
     # Windows does not getrlimit/setrlimit so we must catch the resource
@@ -249,7 +249,7 @@ class test_schema03(wttest.WiredTigerTestCase):
     # but boost it up to this limit anyway.
     OPEN_FILE_LIMIT = 1000
 
-    restart_scenarios = check_scenarios([('table', dict(s_restart=['table'],P=0.3)),
+    restart_scenarios = [('table', dict(s_restart=['table'],P=0.3)),
                          ('colgroup0', dict(s_restart=['colgroup0'],P=0.3)),
                          ('index0', dict(s_restart=['index0'],P=0.3)),
                          ('colgroup1', dict(s_restart=['colgroup1'],P=0.3)),
@@ -259,7 +259,7 @@ class test_schema03(wttest.WiredTigerTestCase):
                          ('populate1', dict(s_restart=['populate1'],P=0.3)),
                          ('ipop', dict(s_restart=['index0','populate0'],P=0.3)),
                          ('all', dict(s_restart=['table','colgroup0','index0','colgroup1','index1','populate0','index2','populate1'],P=1.0)),
-    ])
+    ]
 
     ntable_scenarios = wtscenario.quick_scenarios('s_ntable',
         [1,2,5,8], [1.0,0.4,0.5,0.5])
@@ -272,11 +272,10 @@ class test_schema03(wttest.WiredTigerTestCase):
     table_args_scenarios = wtscenario.quick_scenarios('s_extra_table_args',
         ['', ',type=file', ',type=lsm'], [0.5, 0.3, 0.2])
 
-    all_scenarios = wtscenario.multiply_scenarios('_', restart_scenarios, ntable_scenarios, ncolgroup_scenarios, nindex_scenarios, idx_args_scenarios, table_args_scenarios)
-
-    # Prune the scenarios according to the probabilities given above.
-    scenarios = wtscenario.prune_scenarios(all_scenarios, 30)
-    scenarios = wtscenario.number_scenarios(scenarios)
+    scenarios = wtscenario.make_scenarios(
+        restart_scenarios, ntable_scenarios, ncolgroup_scenarios,
+        nindex_scenarios, idx_args_scenarios, table_args_scenarios,
+        prune=30)
 
     # Note: the set can be reduced here for debugging, e.g.
     # scenarios = scenarios[40:44]
diff --git a/test/suite/test_schema04.py b/test/suite/test_schema04.py
index cd41138deb0..8ac81690819 100644
--- a/test/suite/test_schema04.py
+++ b/test/suite/test_schema04.py
@@ -28,7 +28,7 @@
 
 import os
 import wiredtiger, wttest, run
-from wtscenario import check_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_schema04.py
 #    Test indices with duplicates
@@ -47,7 +47,7 @@ class test_schema04(wttest.WiredTigerTestCase):
     """
     nentries = 100
 
-    scenarios = number_scenarios([
+    scenarios = make_scenarios([
         ('index-before', { 'create_index' : 0 }),
         ('index-during', { 'create_index' : 1 }),
         ('index-after', { 'create_index' : 2 }),
diff --git a/test/suite/test_schema05.py b/test/suite/test_schema05.py
index 89722d5f89a..89484cfc7bd 100644
--- a/test/suite/test_schema05.py
+++ b/test/suite/test_schema05.py
@@ -28,7 +28,7 @@
 
 import os
 import wiredtiger, wttest, run
-from wtscenario import check_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_schema05.py
 #    Test indices using a custom extractor.
@@ -51,7 +51,7 @@ class test_schema05(wttest.WiredTigerTestCase):
     nentries = 1000
     nindices = 6
 
-    scenarios = number_scenarios([
+    scenarios = make_scenarios([
         ('index-before', { 'create_index' : 0 }),
         ('index-during', { 'create_index' : 1 }),
         ('index-after', { 'create_index' : 2 }),
diff --git a/test/suite/test_schema06.py b/test/suite/test_schema06.py
index e72959edf2a..e0eec189137 100644
--- a/test/suite/test_schema06.py
+++ b/test/suite/test_schema06.py
@@ -27,6 +27,7 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
+from wtscenario import make_scenarios
 
 # test_schema06.py
 #    Repeatedly create and drop indices
@@ -36,10 +37,10 @@ class test_schema06(wttest.WiredTigerTestCase):
     """
     nentries = 1000
 
-    scenarios = [
+    scenarios = make_scenarios([
         ('normal', { 'idx_config' : '' }),
         ('lsm', { 'idx_config' : ',type=lsm' }),
-    ]
+    ])
 
     def flip(self, inum, val):
         """
diff --git a/test/suite/test_split.py b/test/suite/test_split.py
index d09613e1c52..28bf6bc59b0 100644
--- a/test/suite/test_split.py
+++ b/test/suite/test_split.py
@@ -35,7 +35,6 @@ from wiredtiger import stat
 from helper import confirm_empty,\
     key_populate, value_populate, simple_populate,\
     complex_populate, complex_value_populate
-from wtscenario import multiply_scenarios, number_scenarios
 
 # Test splits
 class test_split(wttest.WiredTigerTestCase):
diff --git a/test/suite/test_stat01.py b/test/suite/test_stat01.py
index 5c3259696eb..1ad51ee9882 100644
--- a/test/suite/test_stat01.py
+++ b/test/suite/test_stat01.py
@@ -29,7 +29,7 @@
 import helper, wiredtiger, wttest
 from wiredtiger import stat
 from helper import key_populate, simple_populate
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_stat01.py
 #    Statistics operations
@@ -49,7 +49,7 @@ class test_stat01(wttest.WiredTigerTestCase):
         ('recno', dict(keyfmt='r')),
         ('string', dict(keyfmt='S')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+    scenarios = make_scenarios(types, keyfmt)
 
     conn_config = 'statistics=(all)'
 
diff --git a/test/suite/test_stat02.py b/test/suite/test_stat02.py
index 88371947b5b..ef3907e54b1 100644
--- a/test/suite/test_stat02.py
+++ b/test/suite/test_stat02.py
@@ -28,7 +28,7 @@
 
 import itertools, wiredtiger, wttest
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 from wiredtiger import stat
 from helper import complex_populate, complex_populate_lsm, simple_populate
 
@@ -57,8 +57,7 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
         ('size', dict(cursor_config='size'))
     ]
 
-    scenarios = number_scenarios(
-        multiply_scenarios('.', uri, data_config, cursor_config))
+    scenarios = make_scenarios(uri, data_config, cursor_config)
 
     # Turn on statistics for this test.
     def conn_config(self, dir):
@@ -106,13 +105,13 @@ class test_stat_cursor_dsrc_clear(wttest.WiredTigerTestCase):
     pfx = 'test_stat_cursor_dsrc_clear'
 
     uri = [
-        ('1',  dict(uri='file:' + pfx, pop=simple_populate)),
-        ('2', dict(uri='table:' + pfx, pop=simple_populate)),
-        ('3', dict(uri='table:' + pfx, pop=complex_populate)),
-        ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
+        ('dsrc_clear_1',  dict(uri='file:' + pfx, pop=simple_populate)),
+        ('dsrc_clear_2', dict(uri='table:' + pfx, pop=simple_populate)),
+        ('dsrc_clear_3', dict(uri='table:' + pfx, pop=complex_populate)),
+        ('dsrc_clear_4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', uri))
+    scenarios = make_scenarios(uri)
     conn_config = 'statistics=(all)'
 
     def test_stat_cursor_dsrc_clear(self):
@@ -136,13 +135,13 @@ class test_stat_cursor_fast(wttest.WiredTigerTestCase):
     pfx = 'test_stat_cursor_fast'
 
     uri = [
-        ('1',  dict(uri='file:' + pfx, pop=simple_populate)),
-        ('2', dict(uri='table:' + pfx, pop=simple_populate)),
-        ('3', dict(uri='table:' + pfx, pop=complex_populate)),
-        ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
+        ('fast_1',  dict(uri='file:' + pfx, pop=simple_populate)),
+        ('fast_2', dict(uri='table:' + pfx, pop=simple_populate)),
+        ('fast_3', dict(uri='table:' + pfx, pop=complex_populate)),
+        ('fast_4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', uri))
+    scenarios = make_scenarios(uri)
     conn_config = 'statistics=(all)'
 
     def test_stat_cursor_fast(self):
@@ -180,13 +179,13 @@ class test_stat_cursor_dsrc_error(wttest.WiredTigerTestCase):
     pfx = 'test_stat_cursor_dsrc_error'
 
     uri = [
-        ('1',  dict(uri='file:' + pfx, pop=simple_populate)),
-        ('2', dict(uri='table:' + pfx, pop=simple_populate)),
-        ('3', dict(uri='table:' + pfx, pop=complex_populate)),
-        ('4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
+        ('dsrc_error_1',  dict(uri='file:' + pfx, pop=simple_populate)),
+        ('dsrc_error_2', dict(uri='table:' + pfx, pop=simple_populate)),
+        ('dsrc_error_3', dict(uri='table:' + pfx, pop=complex_populate)),
+        ('dsrc_error_4', dict(uri='table:' + pfx, pop=complex_populate_lsm))
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', uri))
+    scenarios = make_scenarios(uri)
     conn_config = 'statistics=(all)'
 
     def test_stat_cursor_dsrc_error(self):
diff --git a/test/suite/test_stat03.py b/test/suite/test_stat03.py
index 039ad1f7f8d..b17fe6eb91c 100644
--- a/test/suite/test_stat03.py
+++ b/test/suite/test_stat03.py
@@ -34,7 +34,7 @@ from helper import complex_populate, complex_populate_lsm, simple_populate
 from helper import key_populate, complex_value_populate, value_populate
 from helper import complex_populate_colgroup_count, complex_populate_index_count
 from helper import complex_populate_colgroup_name, complex_populate_index_name
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_stat03.py
 #    Statistics reset test.
@@ -51,7 +51,7 @@ class test_stat_cursor_reset(wttest.WiredTigerTestCase):
             dict(uri='table:' + pfx, pop=complex_populate_lsm)),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', uri))
+    scenarios = make_scenarios(uri)
     conn_config = 'statistics=(all)'
 
     def stat_cursor(self, uri):
diff --git a/test/suite/test_stat04.py b/test/suite/test_stat04.py
index e7c39371f80..b5309efff37 100644
--- a/test/suite/test_stat04.py
+++ b/test/suite/test_stat04.py
@@ -28,7 +28,7 @@
 
 import os, struct
 from suite_subprocess import suite_subprocess
-from wtscenario import number_scenarios, multiply_scenarios
+from wtscenario import make_scenarios
 import wiredtiger, wttest
 from wiredtiger import stat
 
@@ -49,7 +49,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess):
         ('large', dict(nentries=100000, valuesize=1)),
         ('jumboval', dict(nentries=100, valuesize=4200000)),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', keyfmt, nentries))
+    scenarios = make_scenarios(keyfmt, nentries)
     conn_config = 'statistics=(all)'
 
     def init_test(self):
@@ -91,6 +91,7 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess):
                 self.checkcount(uri, count)
             cursor[self.genkey(i)] = self.genvalue(i)
             count += 1
+
         # Remove a number of entries, at each step checking that stats match.
         for i in range(0, self.nentries / 37):
             cursor.set_key(self.genkey(i*11 % self.nentries))
@@ -99,5 +100,10 @@ class test_stat04(wttest.WiredTigerTestCase, suite_subprocess):
             self.checkcount(uri, count)
         cursor.close()
 
+        # Confirm the count is correct after writing to the backing file,
+        # that tests the on-disk format as well as the in-memory format.
+        self.reopen_conn()
+        self.checkcount(uri, count)
+
 if __name__ == '__main__':
     wttest.run()
diff --git a/test/suite/test_stat05.py b/test/suite/test_stat05.py
index 9bcedd65089..62562f78ed6 100644
--- a/test/suite/test_stat05.py
+++ b/test/suite/test_stat05.py
@@ -28,7 +28,7 @@
 
 import itertools, wiredtiger, wttest
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 from wiredtiger import stat
 from helper import complex_populate, complex_populate_lsm, simple_populate
 from helper import complex_value_populate, key_populate, value_populate
@@ -43,16 +43,18 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
         ('file',  dict(uri='file:' + pfx, pop=simple_populate, cfg='')),
         ('table', dict(uri='table:' + pfx, pop=simple_populate, cfg='')),
         ('inmem', dict(uri='table:' + pfx, pop=simple_populate, cfg='',
-            conn_config='in_memory,statistics=(fast)')),
+            conn_config = 'in_memory,statistics=(fast)')),
         ('table-lsm', dict(uri='table:' + pfx, pop=simple_populate,
-            cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)')),
+            cfg=',type=lsm,lsm=(chunk_size=1MB,merge_min=2)',
+            conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')),
         ('complex', dict(uri='table:' + pfx, pop=complex_populate, cfg='')),
         ('complex-lsm',
             dict(uri='table:' + pfx, pop=complex_populate_lsm,
-            cfg=',lsm=(chunk_size=1MB,merge_min=2)')),
+            cfg=',lsm=(chunk_size=1MB,merge_min=2)',
+            conn_config = 'statistics=(fast),eviction_dirty_target=99,eviction_dirty_trigger=99')),
     ]
 
-    scenarios = number_scenarios(uri)
+    scenarios = make_scenarios(uri)
 
     def openAndWalkStatCursor(self):
         c = self.session.open_cursor(
@@ -62,7 +64,6 @@ class test_stat_cursor_config(wttest.WiredTigerTestCase):
             count += 1
         c.close()
 
-
     # Open a size-only statistics cursor on various table types. Ensure that
     # the cursor open succeeds. Insert enough data that LSM tables to need to
     # switch and merge.
diff --git a/test/suite/test_stat_log01.py b/test/suite/test_stat_log01.py
index f6033d940c5..65ce80dfe7d 100644
--- a/test/suite/test_stat_log01.py
+++ b/test/suite/test_stat_log01.py
@@ -51,9 +51,10 @@ class test_stat_log01(wttest.WiredTigerTestCase):
             None, "create,statistics=(fast),statistics_log=(wait=1)")
         # Wait for the default interval, to ensure stats have been written.
         time.sleep(2)
-        self.check_stats_file("WiredTigerStat")
+        self.check_stats_file(".")
 
     def test_stats_log_name(self):
+        os.mkdir("foo")
         self.conn = self.wiredtiger_open(
             None, "create,statistics=(fast),statistics_log=(wait=1,path=foo)")
         # Wait for the default interval, to ensure stats have been written.
@@ -66,21 +67,18 @@ class test_stat_log01(wttest.WiredTigerTestCase):
         # Wait for the default interval, to ensure stats have been written.
         time.sleep(2)
         self.close_conn()
-        self.check_stats_file("WiredTigerStat")
+        self.check_stats_file(".")
 
     def test_stats_log_on_close(self):
         self.conn = self.wiredtiger_open(None,
             "create,statistics=(fast),statistics_log=(on_close=true)")
         # Close the connection to ensure the statistics get generated.
         self.close_conn()
-        self.check_stats_file("WiredTigerStat")
+        self.check_stats_file(".")
 
-    def check_stats_file(self, filename):
-        if filename == "WiredTigerStat":
-            files = glob.glob(filename + '.[0-9]*')
-            self.assertTrue(files)
-        else:
-            self.assertTrue(os.path.isfile(filename))
+    def check_stats_file(self, dir):
+        files = glob.glob(dir + '/' + 'WiredTigerStat.[0-9]*')
+        self.assertTrue(files)
 
 if __name__ == '__main__':
     wttest.run()
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index bccd2bce012..71f8fcb180e 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -33,8 +33,8 @@
 
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
+from wtscenario import make_scenarios
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
 import wttest
 
 class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
@@ -55,7 +55,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
                     create_params = 'key_format=r,value_format=8t')),
     ]
 
-    scenarios = types
+    scenarios = make_scenarios(types)
 
     def test_ops(self):
         #
diff --git a/test/suite/test_sweep03.py b/test/suite/test_sweep03.py
index 061c2f5b37b..61078fa96b5 100644
--- a/test/suite/test_sweep03.py
+++ b/test/suite/test_sweep03.py
@@ -33,7 +33,7 @@
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess):
@@ -54,7 +54,7 @@ class test_sweep03(wttest.WiredTigerTestCase, suite_subprocess):
                     create_params = 'key_format=r,value_format=8t')),
     ]
 
-    scenarios = types
+    scenarios = make_scenarios(types)
 
     def test_disable_idle_timeout1(self):
         #
diff --git a/test/suite/test_truncate01.py b/test/suite/test_truncate01.py
index 77a476e40c1..9a3518c6984 100644
--- a/test/suite/test_truncate01.py
+++ b/test/suite/test_truncate01.py
@@ -34,13 +34,13 @@ import wiredtiger, wttest
 from helper import confirm_empty,\
     key_populate, value_populate, simple_populate,\
     complex_populate, complex_value_populate
-from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # Test truncation arguments.
 class test_truncate_arguments(wttest.WiredTigerTestCase):
     name = 'test_truncate'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(type='file:')),
         ('table', dict(type='table:'))
     ])
@@ -80,7 +80,7 @@ class test_truncate_arguments(wttest.WiredTigerTestCase):
 # Test truncation of an object using its URI.
 class test_truncate_uri(wttest.WiredTigerTestCase):
     name = 'test_truncate'
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(type='file:')),
         ('table', dict(type='table:'))
     ])
@@ -115,7 +115,7 @@ class test_truncate_cursor_order(wttest.WiredTigerTestCase):
         ('recno', dict(keyfmt='r')),
         ('string', dict(keyfmt='S')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+    scenarios = make_scenarios(types, keyfmt)
 
     # Test an illegal order, then confirm that equal cursors works.
     def test_truncate_cursor_order(self):
@@ -146,7 +146,7 @@ class test_truncate_cursor_end(wttest.WiredTigerTestCase):
         ('recno', dict(keyfmt='r')),
         ('string', dict(keyfmt='S')),
     ]
-    scenarios = number_scenarios(multiply_scenarios('.', types, keyfmt))
+    scenarios = make_scenarios(types, keyfmt)
 
     # Test truncation of cursors past the end of the object.
     def test_truncate_cursor_order(self):
@@ -205,8 +205,7 @@ class test_truncate_cursor(wttest.WiredTigerTestCase):
         ('big', dict(nentries=1000,skip=37)),
     ]
 
-    scenarios = number_scenarios(
-        multiply_scenarios('.', types, keyfmt, size, reopen))
+    scenarios = make_scenarios(types, keyfmt, size, reopen)
 
     # Set a cursor key.
     def cursorKey(self, uri, key):
diff --git a/test/suite/test_truncate02.py b/test/suite/test_truncate02.py
index 6c11302787c..e57a65d2f97 100644
--- a/test/suite/test_truncate02.py
+++ b/test/suite/test_truncate02.py
@@ -32,7 +32,7 @@
 
 import wiredtiger, wttest
 from helper import key_populate, value_populate, simple_populate
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_truncate_fast_delete
 #       When deleting leaf pages that aren't in memory, we set transactional
@@ -86,8 +86,7 @@ class test_truncate_fast_delete(wttest.WiredTigerTestCase):
         ('txn2', dict(commit=False)),
         ]
 
-    scenarios = number_scenarios(
-        multiply_scenarios('.', types, keyfmt, overflow, reads, writes, txn))
+    scenarios = make_scenarios(types, keyfmt, overflow, reads, writes, txn)
 
     # Return the number of records visible to the cursor; test both forward
     # and backward iteration, they are different code paths in this case.
diff --git a/test/suite/test_txn01.py b/test/suite/test_txn01.py
index eb6963791fd..1ba74461088 100644
--- a/test/suite/test_txn01.py
+++ b/test/suite/test_txn01.py
@@ -27,13 +27,13 @@
 # OTHER DEALINGS IN THE SOFTWARE.
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_txn01.py
 #    Transactions: basic functionality
 class test_txn01(wttest.WiredTigerTestCase):
     nentries = 1000
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('col-f', dict(uri='file:text_txn01',key_format='r',value_format='S')),
         ('col-t', dict(uri='table:text_txn01',key_format='r',value_format='S')),
         ('fix-f', dict(uri='file:text_txn01',key_format='r',value_format='8t')),
diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py
index fccf123d3bc..a0c2c12a47c 100644
--- a/test/suite/test_txn02.py
+++ b/test/suite/test_txn02.py
@@ -32,7 +32,7 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
@@ -81,22 +81,18 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
     txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))]
     txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))]
 
-    all_scenarios = multiply_scenarios('.', types,
-        op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)
-
     # This test generates thousands of potential scenarios.
     # For default runs, we'll use a small subset of them, for
     # long runs (when --long is set) we'll set a much larger limit.
-    scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000))
+    scenarios = make_scenarios(types,
+        op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s,
+        prune=20, prunelong=5000)
 
     # Each check_log() call takes a second, so we don't call it for
     # every scenario, we'll limit it to the value of checklog_calls.
     checklog_calls = 100 if wttest.islongtest() else 2
     checklog_mod = (len(scenarios) / checklog_calls + 1)
 
-    # scenarios = number_scenarios(multiply_scenarios('.', types,
-    # op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)) [:3]
-    # Overrides WiredTigerTestCase
     def setUpConnectionOpen(self, dir):
         self.home = dir
         # Cycle through the different transaction_sync values in a
diff --git a/test/suite/test_txn03.py b/test/suite/test_txn03.py
index 97180a75949..18a0e096767 100644
--- a/test/suite/test_txn03.py
+++ b/test/suite/test_txn03.py
@@ -31,7 +31,7 @@
 #
 
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 class test_txn03(wttest.WiredTigerTestCase):
     tablename = 'test_txn03'
@@ -42,7 +42,7 @@ class test_txn03(wttest.WiredTigerTestCase):
     data_str2 = "TEST_VAL1"
 
     nentries = 1000
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('var', dict(create_params = "key_format=S,value_format=S")),
     ])
 
diff --git a/test/suite/test_txn04.py b/test/suite/test_txn04.py
index 9d9d2db62c6..ade39272f84 100644
--- a/test/suite/test_txn04.py
+++ b/test/suite/test_txn04.py
@@ -32,7 +32,7 @@
 
 import shutil, os
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
@@ -62,7 +62,7 @@ class test_txn04(wttest.WiredTigerTestCase, suite_subprocess):
     ]
     txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))
+    scenarios = make_scenarios(types, op1s, txn1s)
     # Overrides WiredTigerTestCase
     def setUpConnectionOpen(self, dir):
         self.home = dir
diff --git a/test/suite/test_txn05.py b/test/suite/test_txn05.py
index bb68034ca04..9e84fe7d3fe 100644
--- a/test/suite/test_txn05.py
+++ b/test/suite/test_txn05.py
@@ -32,7 +32,7 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn05(wttest.WiredTigerTestCase, suite_subprocess):
@@ -63,8 +63,7 @@ class test_txn05(wttest.WiredTigerTestCase, suite_subprocess):
     ]
     txn1s = [('t1c', dict(txn1='commit')), ('t1r', dict(txn1='rollback'))]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))
-    # scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s))[:3]
+    scenarios = make_scenarios(types, op1s, txn1s)
     # Overrides WiredTigerTestCase
     def setUpConnectionOpen(self, dir):
         self.home = dir
diff --git a/test/suite/test_txn06.py b/test/suite/test_txn06.py
index 9c1d0335d47..e4636e40e2e 100644
--- a/test/suite/test_txn06.py
+++ b/test/suite/test_txn06.py
@@ -30,7 +30,6 @@
 #   Transactions: test long-running snapshots
 
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios
 from helper import simple_populate
 import wiredtiger, wttest
 
diff --git a/test/suite/test_txn07.py b/test/suite/test_txn07.py
index f74120e3590..8dd8238343d 100644
--- a/test/suite/test_txn07.py
+++ b/test/suite/test_txn07.py
@@ -33,7 +33,7 @@
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn07(wttest.WiredTigerTestCase, suite_subprocess):
@@ -70,8 +70,7 @@ class test_txn07(wttest.WiredTigerTestCase, suite_subprocess):
         ('none', dict(compress='')),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types, op1s, txn1s,
-                                                    compress))
+    scenarios = make_scenarios(types, op1s, txn1s, compress)
     # Overrides WiredTigerTestCase
     def setUpConnectionOpen(self, dir):
         self.home = dir
diff --git a/test/suite/test_txn08.py b/test/suite/test_txn08.py
index 36253856285..f0cdf08df07 100644
--- a/test/suite/test_txn08.py
+++ b/test/suite/test_txn08.py
@@ -33,7 +33,6 @@
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
 import wttest
 
 class test_txn08(wttest.WiredTigerTestCase, suite_subprocess):
diff --git a/test/suite/test_txn09.py b/test/suite/test_txn09.py
index f536d65205d..cfad8270ab1 100644
--- a/test/suite/test_txn09.py
+++ b/test/suite/test_txn09.py
@@ -32,7 +32,7 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn09(wttest.WiredTigerTestCase, suite_subprocess):
@@ -73,13 +73,12 @@ class test_txn09(wttest.WiredTigerTestCase, suite_subprocess):
     txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))]
     txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))]
 
-    all_scenarios = multiply_scenarios('.', types,
-        op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)
-
     # This test generates thousands of potential scenarios.
     # For default runs, we'll use a small subset of them, for
     # long runs (when --long is set) we'll set a much larger limit.
-    scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000))
+    scenarios = make_scenarios(types,
+        op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s,
+        prune=20, prunelong=5000)
 
     # Overrides WiredTigerTestCase
     def setUpConnectionOpen(self, dir):
diff --git a/test/suite/test_txn10.py b/test/suite/test_txn10.py
index cf9c11dd4ab..a4745e60066 100644
--- a/test/suite/test_txn10.py
+++ b/test/suite/test_txn10.py
@@ -32,7 +32,6 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
 import wttest
 
 class test_txn10(wttest.WiredTigerTestCase, suite_subprocess):
diff --git a/test/suite/test_txn12.py b/test/suite/test_txn12.py
index 8ae9df33990..32c058bea85 100644
--- a/test/suite/test_txn12.py
+++ b/test/suite/test_txn12.py
@@ -29,7 +29,6 @@
 import wiredtiger, wttest
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios
 
 # test_txn12.py
 #    test of commit following failed op in a read only transaction.
diff --git a/test/suite/test_txn13.py b/test/suite/test_txn13.py
index dd6a6dbcd6d..ae0250c06e8 100644
--- a/test/suite/test_txn13.py
+++ b/test/suite/test_txn13.py
@@ -33,7 +33,7 @@
 
 #import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 import wiredtiger, wttest
 
 class test_txn13(wttest.WiredTigerTestCase, suite_subprocess):
@@ -43,7 +43,7 @@ class test_txn13(wttest.WiredTigerTestCase, suite_subprocess):
     nops = 1024
     create_params = 'key_format=i,value_format=S'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('1gb', dict(expect_err=False, valuesize=1048576)),
         ('2gb', dict(expect_err=False, valuesize=2097152)),
         ('4gb', dict(expect_err=True, valuesize=4194304))
diff --git a/test/suite/test_txn14.py b/test/suite/test_txn14.py
index 371f4402567..f9ccabaab8b 100644
--- a/test/suite/test_txn14.py
+++ b/test/suite/test_txn14.py
@@ -32,7 +32,7 @@
 
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn14(wttest.WiredTigerTestCase, suite_subprocess):
@@ -47,7 +47,7 @@ class test_txn14(wttest.WiredTigerTestCase, suite_subprocess):
         ('sync', dict(sync='on')),
         ('bg', dict(sync='background')),
     ]
-    scenarios = multiply_scenarios('.', sync_list)
+    scenarios = make_scenarios(sync_list)
 
     def simulate_crash_restart(self, olddir, newdir):
         ''' Simulate a crash from olddir and restart in newdir. '''
diff --git a/test/suite/test_txn15.py b/test/suite/test_txn15.py
index 809dce4ebfa..c061c093b02 100644
--- a/test/suite/test_txn15.py
+++ b/test/suite/test_txn15.py
@@ -33,7 +33,7 @@
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import stat
-from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
+from wtscenario import make_scenarios
 import wttest
 
 class test_txn15(wttest.WiredTigerTestCase, suite_subprocess):
@@ -71,7 +71,7 @@ class test_txn15(wttest.WiredTigerTestCase, suite_subprocess):
         ('c_none', dict(commit_sync=None)),
         ('c_off', dict(commit_sync='sync=off')),
     ]
-    scenarios = multiply_scenarios('.', conn_sync_enabled, conn_sync_method,
+    scenarios = make_scenarios(conn_sync_enabled, conn_sync_method,
         begin_sync, commit_sync)
 
     # Given the different configuration settings determine if this group
diff --git a/test/suite/test_upgrade.py b/test/suite/test_upgrade.py
index 357e437f14d..e4f92f8f8d8 100644
--- a/test/suite/test_upgrade.py
+++ b/test/suite/test_upgrade.py
@@ -29,14 +29,14 @@
 import os, time
 import wiredtiger, wttest
 from helper import complex_populate, simple_populate
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_upgrade.py
 #    session level upgrade operation
 class test_upgrade(wttest.WiredTigerTestCase):
     name = 'test_upgrade'
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('file', dict(uri='file:')),
         ('table', dict(uri='table:'))
     ])
diff --git a/test/suite/test_util02.py b/test/suite/test_util02.py
index 475e856052a..421b0104484 100644
--- a/test/suite/test_util02.py
+++ b/test/suite/test_util02.py
@@ -29,7 +29,7 @@
 import string, os
 import wiredtiger, wttest
 from suite_subprocess import suite_subprocess
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 from helper import complex_populate
 
 # test_util02.py
@@ -44,7 +44,7 @@ class test_util02(wttest.WiredTigerTestCase, suite_subprocess):
     nentries = 1000
     stringclass = ''.__class__
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('SS', dict(key_format='S',value_format='S')),
         ('rS', dict(key_format='r',value_format='S')),
         ('ri', dict(key_format='r',value_format='i')),
diff --git a/test/suite/test_util03.py b/test/suite/test_util03.py
index c3ea48b8f5e..e341c79ff9e 100644
--- a/test/suite/test_util03.py
+++ b/test/suite/test_util03.py
@@ -28,7 +28,7 @@
 
 from suite_subprocess import suite_subprocess
 import wiredtiger, wttest
-from wtscenario import check_scenarios
+from wtscenario import make_scenarios
 
 # test_util03.py
 #    Utilities: wt create
@@ -36,7 +36,7 @@ class test_util03(wttest.WiredTigerTestCase, suite_subprocess):
     tablename = 'test_util03.a'
     nentries = 1000
 
-    scenarios = check_scenarios([
+    scenarios = make_scenarios([
         ('none', dict(key_format=None,value_format=None)),
         ('SS', dict(key_format='S',value_format='S')),
         ('rS', dict(key_format='r',value_format='S')),
diff --git a/test/suite/test_util13.py b/test/suite/test_util13.py
index 222f42cd7f1..9804dc700ba 100644
--- a/test/suite/test_util13.py
+++ b/test/suite/test_util13.py
@@ -33,7 +33,7 @@ import itertools, wiredtiger, wttest
 from helper import complex_populate_cgconfig, complex_populate_cgconfig_lsm
 from helper import simple_populate
 from helper import complex_populate_check, simple_populate_check
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import make_scenarios
 
 # test_util13.py
 #    Utilities: wt dump, as well as the dump cursor
@@ -73,7 +73,7 @@ class test_util13(wttest.WiredTigerTestCase, suite_subprocess):
             cfg='merge_max=5')),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types))
+    scenarios = make_scenarios(types)
 
     def compare_config(self, expected_cfg, actual_cfg):
         # Replace '(' characters so configuration groups don't break parsing.
diff --git a/test/suite/wtscenario.py b/test/suite/wtscenario.py
index 7fad7c228fb..8576b3ac876 100644
--- a/test/suite/wtscenario.py
+++ b/test/suite/wtscenario.py
@@ -64,11 +64,37 @@ def log2chr(val):
 
 megabyte = 1024 * 1024
 
+def make_scenarios(*args, **kwargs):
+    """
+    The standard way to create scenarios for WT tests.
+    Scenarios can be combined by listing them all as arguments.
+    A final prune= and/or prunelong= argument may be given that
+    forces the list of entries in the scenario to be pruned.
+    The result is a (combined) scenario that has been checked
+    for name duplicates and has been given names and numbers.
+    """
+    scenes = multiply_scenarios('.', *args)
+    pruneval = None
+    prunelong = None
+    for key in kwargs:
+        if key == 'prune':
+            pruneval = kwargs[key]
+        elif key == 'prunelong':
+            prunelong = kwargs[key]
+        else:
+            raise AssertionError(
+                'make_scenarios: unexpected named arg: ' + key)
+    if pruneval != None or prunelong != None:
+        pruneval = pruneval if pruneval != None else -1
+        prunelong = prunelong if prunelong != None else -1
+        scenes = prune_scenarios(scenes, pruneval, prunelong)
+    return number_scenarios(scenes)
+
 def check_scenarios(scenes):
     """
-    Make sure all scenarios have unique names
+    Make sure all scenarios have unique case insensitive names
     """
-    assert len(scenes) == len(dict(scenes))
+    assert len(scenes) == len(dict((k.lower(), v) for k, v in scenes))
     return scenes
 
 def multiply_scenarios(sep, *args):
@@ -81,8 +107,8 @@ def multiply_scenarios(sep, *args):
             result = scenes
         else:
             total = []
-            for scena in scenes:
-                for scenb in result:
+            for scena in result:
+                for scenb in scenes:
                     # Create a merged scenario with a concatenated name
                     name = scena[0] + sep + scenb[0]
                     tdict = {}
@@ -235,7 +261,7 @@ class wtscenario:
                             scen.lmax = lmax
                             scen.cache_size = cache
                             s.append((scen.shortName(), dict(session_create_scenario=scen)))
-        return s
+        return make_scenarios(s)
 
     def shortName(self):
         """
diff --git a/test/suite/wttest.py b/test/suite/wttest.py
index 9e430fcdba7..788dd5d0307 100644
--- a/test/suite/wttest.py
+++ b/test/suite/wttest.py
@@ -212,8 +212,8 @@ class WiredTigerTestCase(unittest.TestCase):
         # help distinguish tests.
         scen = ''
         if hasattr(self, 'scenario_number') and hasattr(self, 'scenario_name'):
-            scen = '(scenario ' + str(self.scenario_number) + \
-                   ': ' + self.scenario_name + ')'
+            scen = ' -s ' + str(self.scenario_number) + \
+                   ' (' + self.scenario_name + ')'
         return self.simpleName() + scen
 
     def simpleName(self):
@@ -293,6 +293,8 @@ class WiredTigerTestCase(unittest.TestCase):
             raise Exception(self.testdir + ": cannot remove directory")
         os.makedirs(self.testdir)
         os.chdir(self.testdir)
+        with open('testname.txt', 'w+') as namefile:
+            namefile.write(str(self) + '\n')
         self.fdSetUp()
         # tearDown needs a conn field, set it here in case the open fails.
         self.conn = None
diff --git a/test/thread/smoke.sh b/test/thread/smoke.sh
index 9a235b1d8e9..aa2f86c1def 100755
--- a/test/thread/smoke.sh
+++ b/test/thread/smoke.sh
@@ -4,10 +4,10 @@ set -e
 
 # Smoke-test format as part of running "make check".
 $TEST_WRAPPER ./t -t f
-$TEST_WRAPPER ./t -S -F -t f
+$TEST_WRAPPER ./t -S -F -n 1000 -t f
 
 $TEST_WRAPPER ./t -t r
-$TEST_WRAPPER ./t -S -F -t r
+$TEST_WRAPPER ./t -S -F -n 1000 -t r
 
 $TEST_WRAPPER ./t -t v
-$TEST_WRAPPER ./t -S -F -t v
+$TEST_WRAPPER ./t -S -F -n 1000 -t v
diff --git a/test/utility/misc.c b/test/utility/misc.c
index dfc655dec1a..dffd29a5b6a 100644
--- a/test/utility/misc.c
+++ b/test/utility/misc.c
@@ -192,3 +192,18 @@ dstrdup(const void *str)
 		return (p);
 	testutil_die(errno, "strdup");
 }
+
+/*
+ * dstrndup --
+ *      Call emulating strndup, dying on failure. Don't use actual strndup here
+ *	as it is not supported within MSVC.
+ */
+void *
+dstrndup(const char *str, size_t len)
+{
+	char *p;
+
+	p = dcalloc(len + 1, sizeof(char));
+	memcpy(p, str, len);
+	return (p);
+}
diff --git a/test/utility/test_util.h b/test/utility/test_util.h
index 66ff8de2d19..821e06084d2 100644
--- a/test/utility/test_util.h
+++ b/test/utility/test_util.h
@@ -115,6 +115,7 @@ void *dcalloc(size_t, size_t);
 void *dmalloc(size_t);
 void *drealloc(void *, size_t);
 void *dstrdup(const void *);
+void *dstrndup(const char *, size_t);
 void  testutil_clean_work_dir(char *);
 void  testutil_cleanup(TEST_OPTS *);
 void  testutil_make_work_dir(char *);
diff --git a/tools/wtstats/stat_data.py b/tools/wtstats/stat_data.py
index a79cf1faf5e..b93f2449c63 100644
--- a/tools/wtstats/stat_data.py
+++ b/tools/wtstats/stat_data.py
@@ -3,17 +3,19 @@
 no_scale_per_second_list = [
     'async: current work queue length',
     'async: maximum work queue length',
+    'cache: bytes belonging to page images in the cache',
     'cache: bytes currently in the cache',
+    'cache: bytes not belonging to page images in the cache',
     'cache: eviction currently operating in aggressive mode',
     'cache: files with active eviction walks',
     'cache: hazard pointer maximum array length',
     'cache: maximum bytes configured',
     'cache: maximum page size at eviction',
+    'cache: overflow values cached in memory',
     'cache: pages currently held in the cache',
     'cache: percentage overhead',
     'cache: tracked bytes belonging to internal pages in the cache',
     'cache: tracked bytes belonging to leaf pages in the cache',
-    'cache: tracked bytes belonging to overflow pages in the cache',
     'cache: tracked dirty bytes in the cache',
     'cache: tracked dirty pages in the cache',
     'connection: files currently open',
@@ -28,6 +30,22 @@ no_scale_per_second_list = [
     'reconciliation: split objects currently awaiting free',
     'session: open cursor count',
     'session: open session count',
+    'session: table compact failed calls',
+    'session: table compact successful calls',
+    'session: table create failed calls',
+    'session: table create successful calls',
+    'session: table drop failed calls',
+    'session: table drop successful calls',
+    'session: table rebalance failed calls',
+    'session: table rebalance successful calls',
+    'session: table rename failed calls',
+    'session: table rename successful calls',
+    'session: table salvage failed calls',
+    'session: table salvage successful calls',
+    'session: table truncate failed calls',
+    'session: table truncate successful calls',
+    'session: table verify failed calls',
+    'session: table verify successful calls',
     'thread-state: active filesystem fsync calls',
     'thread-state: active filesystem read calls',
     'thread-state: active filesystem write calls',
@@ -36,7 +54,10 @@ no_scale_per_second_list = [
     'transaction: transaction checkpoint max time (msecs)',
     'transaction: transaction checkpoint min time (msecs)',
     'transaction: transaction checkpoint most recent time (msecs)',
+    'transaction: transaction checkpoint scrub dirty target',
+    'transaction: transaction checkpoint scrub time (msecs)',
     'transaction: transaction checkpoint total time (msecs)',
+    'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)',
     'transaction: transaction range of IDs currently pinned',
     'transaction: transaction range of IDs currently pinned by a checkpoint',
     'transaction: transaction range of IDs currently pinned by named snapshots',
@@ -64,6 +85,7 @@ no_scale_per_second_list = [
     'btree: overflow pages',
     'btree: row-store internal pages',
     'btree: row-store leaf pages',
+    'cache: bytes currently in the cache',
     'cache: overflow values cached in memory',
     'LSM: bloom filters in the LSM tree',
     'LSM: chunks in the LSM tree',
@@ -74,7 +96,9 @@ no_scale_per_second_list = [
 ]
 no_clear_list = [
     'async: maximum work queue length',
+    'cache: bytes belonging to page images in the cache',
     'cache: bytes currently in the cache',
+    'cache: bytes not belonging to page images in the cache',
     'cache: eviction currently operating in aggressive mode',
     'cache: files with active eviction walks',
     'cache: maximum bytes configured',
@@ -83,7 +107,6 @@ no_clear_list = [
     'cache: percentage overhead',
     'cache: tracked bytes belonging to internal pages in the cache',
     'cache: tracked bytes belonging to leaf pages in the cache',
-    'cache: tracked bytes belonging to overflow pages in the cache',
     'cache: tracked dirty bytes in the cache',
     'cache: tracked dirty pages in the cache',
     'connection: files currently open',
@@ -98,6 +121,22 @@ no_clear_list = [
     'reconciliation: split objects currently awaiting free',
     'session: open cursor count',
     'session: open session count',
+    'session: table compact failed calls',
+    'session: table compact successful calls',
+    'session: table create failed calls',
+    'session: table create successful calls',
+    'session: table drop failed calls',
+    'session: table drop successful calls',
+    'session: table rebalance failed calls',
+    'session: table rebalance successful calls',
+    'session: table rename failed calls',
+    'session: table rename successful calls',
+    'session: table salvage failed calls',
+    'session: table salvage successful calls',
+    'session: table truncate failed calls',
+    'session: table truncate successful calls',
+    'session: table verify failed calls',
+    'session: table verify successful calls',
     'thread-state: active filesystem fsync calls',
     'thread-state: active filesystem read calls',
     'thread-state: active filesystem write calls',
@@ -106,11 +145,15 @@ no_clear_list = [
     'transaction: transaction checkpoint max time (msecs)',
     'transaction: transaction checkpoint min time (msecs)',
     'transaction: transaction checkpoint most recent time (msecs)',
+    'transaction: transaction checkpoint scrub dirty target',
+    'transaction: transaction checkpoint scrub time (msecs)',
     'transaction: transaction checkpoint total time (msecs)',
+    'transaction: transaction fsync duration for checkpoint after allocating the transaction ID (usecs)',
     'transaction: transaction range of IDs currently pinned',
     'transaction: transaction range of IDs currently pinned by a checkpoint',
     'transaction: transaction range of IDs currently pinned by named snapshots',
     'btree: btree checkpoint generation',
+    'cache: bytes currently in the cache',
     'session: open cursor count',
 ]
 prefix_list = [
diff --git a/tools/wtstats/wtstats.py b/tools/wtstats/wtstats.py
index ff62d99e825..3549031c30f 100755
--- a/tools/wtstats/wtstats.py
+++ b/tools/wtstats/wtstats.py
@@ -137,6 +137,8 @@ def parse_wtperf_file(file, result):
         for i, v in enumerate(values):
             if v == 'N': 
                 v = 0
+            if v == 'Y': 
+                v = 1
             # convert us to ms
             if '(ms)' in headings[i]:
                 v = float(v) / 1000.0
author	Alex Gorrod <alexander.gorrod@mongodb.com>	2016-08-04 16:07:16 +1000
committer	Alex Gorrod <alexander.gorrod@mongodb.com>	2016-08-04 16:07:16 +1000
commit	a9e96961abc9dd20d464bdeb120d792166ee4cf9 (patch)
tree	aa569dc7ba001747a4c10902dcad759ae0999ae0
parent	d8fb874fc40989cb9675e56ca80b3b64e6fa2ee3 (diff)
parent	034ecbf55bb22d05c137aa8ef62a070964bf2748 (diff)
download	mongo-a9e96961abc9dd20d464bdeb120d792166ee4cf9.tar.gz