Import wiredtiger: 4a3194b043b8cffb5339c12e1554d0bd42ed1b1f from branch mongodb-4.0

ref: 4051e4941c..4a3194b043 for: 4.0.11 WT-4477 Add eviction debug mode and extra checks WT-4690 Make sure eviction does not split during checkpoints WT-4706 Add a statistic to track the lookaside table size WT-4712 Add debug log op record for non-logged tables WT-4723 Restructure the reconciliation code WT-4760 Checkpoint should not read past a stable update WT-4776 Modify operations should be equivalent to updates WT-4803 Implement file_max configuration for Cache Overflow mechanism WT-4817 heap-buffer-overflow failure in timestamp_abort WT-4823 Add check for uninitialised lookaside resources WT-4827 Apply commit timestamps when a truncated page is read WT-4848 Fix perf regression when calculating differences
author: Luke Chen <luke.chen@mongodb.com> 2019-07-01 16:37:06 +1000
committer: Luke Chen <luke.chen@mongodb.com> 2019-07-01 16:37:06 +1000
commit: 663bbd278e0a11d37cb2ba02f5e1349a9ce985bb (patch)
tree: 7c7ce5088a2bc015d9e1d86ca7421637516bb39f
parent: ce5e56f2c2c82b0ec65c8c8957326fe96bf1a7e7 (diff)
download: mongo-663bbd278e0a11d37cb2ba02f5e1349a9ce985bb.tar.gz
68 files changed, 5987 insertions, 4266 deletions
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index 6908a52f5e0..3e5f2806de0 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -430,6 +430,18 @@ connection_runtime_config = [
         for space to be available in cache before giving up. Default will
         wait forever''',
         min=0),
+    Config('cache_overflow', '', r'''
+        cache overflow configuration options''',
+        type='category', subconfig=[
+        Config('file_max', '0', r'''
+            The maximum number of bytes that WiredTiger is allowed to use for
+            its cache overflow mechanism. If the cache overflow file exceeds
+            this size, a panic will be triggered. The default value means that
+            the cache overflow file is unbounded and may use as much space as
+            the filesystem will accommodate. The minimum non-zero setting is
+            100MB.''',    # !!! Must match WT_LAS_FILE_MIN
+            min='0')
+        ]),
     Config('cache_overhead', '8', r'''
         assume the heap allocator overhead is the specified percentage, and
         adjust the cache usage by that amount (for example, if there is 10GB
@@ -456,6 +468,31 @@ connection_runtime_config = [
             above 0 configures periodic checkpoints''',
             min='0', max='100000'),
         ]),
+    Config('debug_mode', '', r'''
+        control the settings of various extended debugging features''',
+        type='category', subconfig=[
+        Config('checkpoint_retention', '0', r'''
+            adjust log archiving to retain the log records of this number
+            of checkpoints. Zero or one means perform normal archiving.''',
+            min='0', max='1024'),
+        Config('eviction', 'false', r'''
+            if true, modify internal algorithms to change skew to force
+            lookaside eviction to happen more aggressively. This includes but
+            is not limited to not skewing newest, not favoring leaf pages,
+            and modifying the eviction score mechanism.''',
+            type='boolean'),
+        Config('rollback_error', '0', r'''
+            return a WT_ROLLBACK error from a transaction operation about
+            every Nth operation to simulate a collision''',
+            min='0', max='10M'),
+        Config('table_logging', 'false', r'''
+            if true, write transaction related information to the log for all
+            operations, even operations for tables with logging turned off.
+            This setting introduces a log format change that may break older
+            versions of WiredTiger. These operations are informational and
+            skipped in recovery.''',
+            type='boolean'),
+        ]),
     Config('error_prefix', '', r'''
         prefix string for error messages'''),
     Config('eviction', '', r'''
diff --git a/src/third_party/wiredtiger/dist/filelist b/src/third_party/wiredtiger/dist/filelist
index 73fa6819e94..036b1a8b1a9 100644
--- a/src/third_party/wiredtiger/dist/filelist
+++ b/src/third_party/wiredtiger/dist/filelist
@@ -163,7 +163,12 @@ src/os_win/os_yield.c			WINDOWS_HOST
 src/packing/pack_api.c
 src/packing/pack_impl.c
 src/packing/pack_stream.c
+src/reconcile/rec_child.c
+src/reconcile/rec_col.c
+src/reconcile/rec_dictionary.c
+src/reconcile/rec_row.c
 src/reconcile/rec_track.c
+src/reconcile/rec_visibility.c
 src/reconcile/rec_write.c
 src/schema/schema_alter.c
 src/schema/schema_create.c
diff --git a/src/third_party/wiredtiger/dist/log.py b/src/third_party/wiredtiger/dist/log.py
index 4669b6bcc73..b3e6a71b63a 100644
--- a/src/third_party/wiredtiger/dist/log.py
+++ b/src/third_party/wiredtiger/dist/log.py
@@ -18,6 +18,9 @@ field_types = {
           'WT_ERR(__logrec_make_hex_str(session, &escaped, &arg));']),
     'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', [ '' ]),
     'uint32' : ('uint32_t', 'I', '%" PRIu32 "', 'arg', [ '' ]),
+    # The fileid may have the high bit set. Print in both decimal and hex.
+    'uint32_id' : ('uint32_t', 'I',
+        '%" PRIu32 " 0x%" PRIx32 "', 'arg, arg', [ '' ]),
     'uint64' : ('uint64_t', 'Q', '%" PRIu64 "', 'arg', [ '' ]),
 }
 
diff --git a/src/third_party/wiredtiger/dist/log_data.py b/src/third_party/wiredtiger/dist/log_data.py
index 9e1538ccf04..18f368eaad0 100644
--- a/src/third_party/wiredtiger/dist/log_data.py
+++ b/src/third_party/wiredtiger/dist/log_data.py
@@ -36,7 +36,7 @@ rectypes = [
     # the allocated LSN to reduce the amount of work recovery has to do, and
     # they are useful for debugging recovery.
     LogRecordType('file_sync', 'file sync', [
-        ('uint32', 'fileid'), ('int', 'start')]),
+        ('uint32_id', 'fileid'), ('int', 'start')]),
 
     # Debugging message in the log
     LogRecordType('message', 'message', [('string', 'message')]),
@@ -62,25 +62,39 @@ class LogOperationType:
 optypes = [
 # commit operations
     LogOperationType('col_modify', 'column modify',
-        [('uint32', 'fileid'), ('recno', 'recno'), ('item', 'value')]),
+        [('uint32_id', 'fileid'), ('recno', 'recno'), ('item', 'value')]),
     LogOperationType('col_put', 'column put',
-        [('uint32', 'fileid'), ('recno', 'recno'), ('item', 'value')]),
+        [('uint32_id', 'fileid'), ('recno', 'recno'), ('item', 'value')]),
     LogOperationType('col_remove', 'column remove',
-        [('uint32', 'fileid'), ('recno', 'recno')]),
+        [('uint32_id', 'fileid'), ('recno', 'recno')]),
     LogOperationType('col_truncate', 'column truncate',
-        [('uint32', 'fileid'), ('recno', 'start'), ('recno', 'stop')]),
+        [('uint32_id', 'fileid'), ('recno', 'start'), ('recno', 'stop')]),
     LogOperationType('row_modify', 'row modify',
-        [('uint32', 'fileid'), ('item', 'key'), ('item', 'value')]),
+        [('uint32_id', 'fileid'), ('item', 'key'), ('item', 'value')]),
     LogOperationType('row_put', 'row put',
-        [('uint32', 'fileid'), ('item', 'key'), ('item', 'value')]),
+        [('uint32_id', 'fileid'), ('item', 'key'), ('item', 'value')]),
     LogOperationType('row_remove', 'row remove',
-        [('uint32', 'fileid'), ('item', 'key')]),
+        [('uint32_id', 'fileid'), ('item', 'key')]),
     LogOperationType('row_truncate', 'row truncate',
-        [('uint32', 'fileid'), ('item', 'start'), ('item', 'stop'),
+        [('uint32_id', 'fileid'), ('item', 'start'), ('item', 'stop'),
             ('uint32', 'mode')]),
 
 # system operations
     LogOperationType('checkpoint_start', 'checkpoint start', []),
     LogOperationType('prev_lsn', 'previous LSN',
         [('WT_LSN', 'prev_lsn')]),
+
+# diagnostic operations
+# Operations used only for diagnostic purposes should be have their type
+# values in the diagnostic range in src/include/wiredtiger.in so that they
+# are always ignored by recovery.
+    #
+    # We need to know the base size/type of a 'struct timespec'. Cast its
+    # parts to uint64_t and split it into seconds and nanoseconds.
+    #
+    LogOperationType('txn_timestamp', 'txn_timestamp',
+        [('uint64', 'time_sec'), ('uint64', 'time_nsec'),
+            ('uint64', 'commit_ts'), ('uint64', 'durable_ts'),
+            ('uint64', 'first_ts'), ('uint64', 'prepare_ts'),
+            ('uint64', 'read_ts')]),
 ]
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index f199900e860..4ed32778cbb 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -20,6 +20,7 @@ WT_BLOCK_HEADER_SIZE
 WT_CACHE_LINE_ALIGNMENT
 WT_CACHE_LINE_PAD_BEGIN
 WT_CACHE_LINE_PAD_END
+WT_CELL_UNUSED_BIT4
 WT_CLOCKDIFF_NS
 WT_CONN_CHECK_PANIC
 WT_DEADLOCK
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index c251c99f2fe..3f336d0443b 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -103,6 +103,7 @@ DbEnv
 Decrement
 Decrypt
 DeleteFileW
+Dh
 EACCES
 EAGAIN
 EB
diff --git a/src/third_party/wiredtiger/dist/s_void b/src/third_party/wiredtiger/dist/s_void
index 6c2b8b34040..2bb8b7abf0a 100755
--- a/src/third_party/wiredtiger/dist/s_void
+++ b/src/third_party/wiredtiger/dist/s_void
@@ -119,6 +119,7 @@ func_ok()
 	    -e '/int snappy_pre_size$/d' \
 	    -e '/int snappy_terminate$/d' \
 	    -e '/int subtest_error_handler$/d' \
+	    -e '/int test_las_workload$/d' \
 	    -e '/int uri2name$/d' \
 	    -e '/int usage$/d' \
 	    -e '/int util_err$/d' \
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 8b26fa2e9af..34d957a75ec 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -274,6 +274,8 @@ connection_stats = [
     CacheStat('cache_lookaside_cursor_wait_internal', 'cache overflow cursor internal thread wait time (usecs)'),
     CacheStat('cache_lookaside_entries', 'cache overflow table entries', 'no_clear,no_scale'),
     CacheStat('cache_lookaside_insert', 'cache overflow table insert calls'),
+    CacheStat('cache_lookaside_ondisk', 'cache overflow table on-disk size', 'no_clear,no_scale,size'),
+    CacheStat('cache_lookaside_ondisk_max', 'cache overflow table max on-disk size', 'no_clear,no_scale,size'),
     CacheStat('cache_lookaside_remove', 'cache overflow table remove calls'),
     CacheStat('cache_lookaside_score', 'cache overflow score', 'no_clear,no_scale'),
     CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 43dc53c86e3..a6cf0bc879e 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
 {
-    "commit": "4051e4941c894655cdb7d3dec97a7e32e7defbe6", 
+    "commit": "4a3194b043b8cffb5339c12e1554d0bd42ed1b1f", 
     "github": "wiredtiger/wiredtiger.git", 
     "vendor": "wiredtiger", 
     "branch": "mongodb-4.0"
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 6047be0be14..55b41ad4b21 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -1423,12 +1423,13 @@ __cursor_chain_exceeded(WT_CURSOR_BTREE *cbt)
 	    upd != NULL && upd->type == WT_UPDATE_MODIFY;
 	    ++i, upd = upd->next) {
 		upd_size += WT_UPDATE_MEMSIZE(upd);
-		if (upd_size >= WT_MODIFY_MEM_FACTOR * cursor->value.size)
+		if (i >= WT_MAX_MODIFY_UPDATE &&
+		    upd_size * WT_MODIFY_MEM_FRACTION >= cursor->value.size)
 			return (true);
 	}
-	if (upd != NULL && upd->type == WT_UPDATE_STANDARD &&
-	    __wt_txn_upd_visible_all(session, upd) &&
-	    i >= WT_MAX_MODIFY_UPDATE)
+	if (i >= WT_MAX_MODIFY_UPDATE && upd != NULL &&
+	    upd->type == WT_UPDATE_STANDARD &&
+	    __wt_txn_upd_visible_all(session, upd))
 		return (true);
 	return (false);
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index b0fd6a58edf..63ee4a3bc7c 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -81,7 +81,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
 		}
 
 		(void)__wt_atomic_addv32(&S2BT(session)->evict_busy, 1);
-		ret = __wt_evict(session, ref, false, previous_state);
+		ret = __wt_evict(session, ref, previous_state, 0);
 		(void)__wt_atomic_subv32(&S2BT(session)->evict_busy, 1);
 		WT_RET_BUSY_OK(ret);
 		ret = 0;
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index 8dd918e8011..87f47f20aeb 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -776,7 +776,7 @@ read:			/*
 			if (force_attempts < 10 &&
 			    __evict_force_check(session, ref)) {
 				++force_attempts;
-				ret = __wt_page_release_evict(session, ref);
+				ret = __wt_page_release_evict(session, ref, 0);
 				/* If forced eviction fails, stall. */
 				if (ret == EBUSY) {
 					WT_NOT_READ(ret, 0);
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index bc85dcee4f5..4b42221865e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -201,6 +201,13 @@ __wt_value_return_upd(WT_SESSION_IMPL *session,
 					memcpy(listp, list, sizeof(list));
 			}
 			listp[i++] = upd;
+
+			/*
+			 * Once a modify is found, all previously committed
+			 * modifications should be applied regardless of
+			 * visibility.
+			 */
+			ignore_visibility = true;
 		}
 	}
 
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index f8f2552dc0a..5b0f2a5569a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -328,7 +328,8 @@ __wt_bt_salvage(WT_SESSION_IMPL *session, WT_CKPT *ckptbase, const char *cfg[])
 	 */
 	if (ss->root_ref.page != NULL) {
 		btree->ckpt = ckptbase;
-		ret = __wt_evict(session, &ss->root_ref, true, WT_REF_MEM);
+		ret = __wt_evict(session, &ss->root_ref, WT_REF_MEM,
+		    WT_EVICT_CALL_CLOSING);
 		ss->root_ref.page = NULL;
 		btree->ckpt = NULL;
 	}
@@ -1300,7 +1301,8 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
 
 	ret = __wt_page_release(session, ref, 0);
 	if (ret == 0)
-		ret = __wt_evict(session, ref, true, WT_REF_MEM);
+		ret = __wt_evict(session, ref, WT_REF_MEM,
+		    WT_EVICT_CALL_CLOSING);
 
 	if (0) {
 err:		WT_TRET(__wt_page_release(session, ref, 0));
@@ -2019,7 +2021,8 @@ __slvg_row_build_leaf(
 	 */
 	ret = __wt_page_release(session, ref, 0);
 	if (ret == 0)
-		ret = __wt_evict(session, ref, true, WT_REF_MEM);
+		ret = __wt_evict(session, ref, WT_REF_MEM,
+		    WT_EVICT_CALL_CLOSING);
 
 	if (0) {
 err:		WT_TRET(__wt_page_release(session, ref, 0));
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 113b95e6ff9..9321cc88282 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1406,6 +1406,25 @@ err:	if (parent != NULL)
 	return (0);
 }
 
+#ifdef HAVE_DIAGNOSTIC
+/*
+ * __check_upd_list --
+ *	Sanity check an update list.
+ *	In particular, make sure there no birthmarks.
+ */
+static void
+__check_upd_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
+{
+	int birthmark_count;
+
+	for (birthmark_count = 0; upd != NULL; upd = upd->next)
+		if (upd->type == WT_UPDATE_BIRTHMARK)
+			++birthmark_count;
+
+	WT_ASSERT(session, birthmark_count <= 1);
+}
+#endif
+
 /*
  * __split_multi_inmem --
  *	Instantiate a page from a disk image.
@@ -1501,6 +1520,10 @@ __split_multi_inmem(
 				key->size = WT_INSERT_KEY_SIZE(supd->ins);
 			}
 
+#ifdef HAVE_DIAGNOSTIC
+			__check_upd_list(session, upd);
+#endif
+
 			/* Search the page. */
 			WT_ERR(__wt_row_search(
 			    session, key, ref, &cbt, true, true));
@@ -1802,9 +1825,11 @@ __split_insert(WT_SESSION_IMPL *session, WT_REF *ref)
 		    WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL) {
 			key->data = WT_INSERT_KEY(ins);
 			key->size = WT_INSERT_KEY_SIZE(ins);
-		} else
+		} else {
+			WT_ASSERT(session, page->entries > 0);
 			WT_ERR(__wt_row_leaf_key(
 			    session, page, &page->pg_row[0], key, true));
+		}
 		WT_ERR(__wt_row_ikey(session, 0, key->data, key->size, child));
 		parent_incr += sizeof(WT_IKEY) + key->size;
 		__wt_scr_free(session, &key);
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index c7d17abd202..7113f4d9724 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -143,7 +143,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 	 * checkpoint, the on-disk version is correct.  If the truncate is
 	 * visible, we skip over the child page when writing its parent.  We
 	 * check whether a truncate is visible in the checkpoint as part of
-	 * reconciling internal pages (specifically in __rec_child_modify).
+	 * reconciling internal pages (specifically in __wt_rec_child_modify).
 	 */
 	LF_SET(WT_READ_DELETED_SKIP);
 
@@ -326,7 +326,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 			    page->read_gen == WT_READGEN_WONT_NEED &&
 			    !tried_eviction) {
 				WT_ERR_BUSY_OK(
-				    __wt_page_release_evict(session, walk));
+				    __wt_page_release_evict(session, walk, 0));
 				walk = prev;
 				prev = NULL;
 				tried_eviction = true;
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index 77614e9c9e4..0e9f4f04f46 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -57,6 +57,46 @@ __las_entry_count(WT_CACHE *cache)
 }
 
 /*
+ * __wt_las_config --
+ *	Configure the lookaside table.
+ */
+int
+__wt_las_config(WT_SESSION_IMPL *session, const char **cfg)
+{
+	WT_CONFIG_ITEM cval;
+	WT_CURSOR_BTREE *las_cursor;
+	WT_SESSION_IMPL *las_session;
+
+	WT_RET(__wt_config_gets(
+	    session, cfg, "cache_overflow.file_max", &cval));
+
+	if (cval.val != 0 && cval.val < WT_LAS_FILE_MIN)
+		WT_RET_MSG(session, EINVAL,
+		    "max cache overflow size %" PRId64 " below minimum %d",
+		    cval.val, WT_LAS_FILE_MIN);
+
+	/* This is expected for in-memory configurations. */
+	las_session = S2C(session)->cache->las_session[0];
+	WT_ASSERT(session,
+	    las_session != NULL || F_ISSET(S2C(session), WT_CONN_IN_MEMORY));
+
+	if (las_session == NULL)
+		return (0);
+
+	/*
+	 * We need to set file_max on the btree associated with one of the
+	 * lookaside sessions.
+	 */
+	las_cursor = (WT_CURSOR_BTREE *)las_session->las_cursor;
+	las_cursor->btree->file_max = (uint64_t)cval.val;
+
+	WT_STAT_CONN_SET(
+	    session, cache_lookaside_ondisk_max, las_cursor->btree->file_max);
+
+	return (0);
+}
+
+/*
  * __wt_las_empty --
  *	Return when there are entries in the lookaside table.
  */
@@ -126,7 +166,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
  *	Initialize the database's lookaside store.
  */
 int
-__wt_las_create(WT_SESSION_IMPL *session)
+__wt_las_create(WT_SESSION_IMPL *session, const char **cfg)
 {
 	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
@@ -166,6 +206,8 @@ __wt_las_create(WT_SESSION_IMPL *session)
 		WT_RET(__wt_las_cursor_open(cache->las_session[i]));
 	}
 
+	WT_RET(__wt_las_config(session, cfg));
+
 	/* The statistics server is already running, make sure we don't race. */
 	WT_WRITE_BARRIER();
 	F_SET(conn, WT_CONN_LOOKASIDE_OPEN);
@@ -609,8 +651,10 @@ __wt_las_insert_block(WT_CURSOR *cursor,
 	WT_SAVE_UPD *list;
 	WT_SESSION_IMPL *session;
 	WT_TXN_ISOLATION saved_isolation;
-	WT_UPDATE *upd;
-	uint64_t insert_cnt, las_counter, las_pageid, prepared_insert_cnt;
+	WT_UPDATE *first_upd, *upd;
+	wt_off_t las_size;
+	uint64_t insert_cnt, las_counter, las_pageid, max_las_size;
+	uint64_t prepared_insert_cnt;
 	uint32_t btree_id, i, slot;
 	uint8_t *p;
 	bool local_txn;
@@ -688,7 +732,7 @@ __wt_las_insert_block(WT_CURSOR *cursor,
 			slot = page->type == WT_PAGE_ROW_LEAF ?
 			    WT_ROW_SLOT(page, list->ripcip) :
 			    WT_COL_SLOT(page, list->ripcip);
-		upd = list->ins == NULL ?
+		first_upd = upd = list->ins == NULL ?
 		    page->modify->mod_row_update[slot] : list->ins->upd;
 
 		/*
@@ -707,6 +751,9 @@ __wt_las_insert_block(WT_CURSOR *cursor,
 				las_value.size = upd->size;
 				break;
 			case WT_UPDATE_BIRTHMARK:
+				WT_ASSERT(session, upd != first_upd ||
+				    multi->page_las.skew_newest);
+				/* FALLTHROUGH */
 			case WT_UPDATE_TOMBSTONE:
 				las_value.size = 0;
 				break;
@@ -727,6 +774,8 @@ __wt_las_insert_block(WT_CURSOR *cursor,
 			    (upd->type == WT_UPDATE_STANDARD ||
 			    upd->type == WT_UPDATE_MODIFY)) {
 				las_value.size = 0;
+				WT_ASSERT(session, upd != first_upd ||
+				    multi->page_las.skew_newest);
 				cursor->set_value(cursor, upd->txnid,
 				    upd->timestamp, upd->prepare_state,
 				    WT_UPDATE_BIRTHMARK, &las_value);
@@ -748,6 +797,14 @@ __wt_las_insert_block(WT_CURSOR *cursor,
 		} while ((upd = upd->next) != NULL);
 	}
 
+	WT_ERR(__wt_block_manager_named_size(session, WT_LAS_FILE, &las_size));
+	WT_STAT_CONN_SET(session, cache_lookaside_ondisk, las_size);
+	max_las_size = ((WT_CURSOR_BTREE *)cursor)->btree->file_max;
+	if (max_las_size != 0 && (uint64_t)las_size > max_las_size)
+		WT_PANIC_MSG(session, WT_PANIC,
+		    "WiredTigerLAS: file size of %" PRIu64 " exceeds maximum "
+		    "size %" PRIu64, (uint64_t)las_size, max_las_size);
+
 err:	/* Resolve the transaction. */
 	if (local_txn) {
 		if (ret == 0)
@@ -773,6 +830,7 @@ err:	/* Resolve the transaction. */
 		__las_insert_block_verbose(session, btree, multi);
 	}
 
+	WT_UNUSED(first_upd);
 	return (ret);
 }
 
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index 521f3d4bdc8..9e78e669cbb 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -61,6 +61,12 @@ static const WT_CONFIG_CHECK
 };
 
 static const WT_CONFIG_CHECK
+    confchk_wiredtiger_open_cache_overflow_subconfigs[] = {
+	{ "file_max", "int", NULL, "min=0", NULL, 0 },
+	{ NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
     confchk_wiredtiger_open_checkpoint_subconfigs[] = {
 	{ "log_size", "int", NULL, "min=0,max=2GB", NULL, 0 },
 	{ "wait", "int", NULL, "min=0,max=100000", NULL, 0 },
@@ -74,6 +80,17 @@ static const WT_CONFIG_CHECK
 };
 
 static const WT_CONFIG_CHECK
+    confchk_wiredtiger_open_debug_mode_subconfigs[] = {
+	{ "checkpoint_retention", "int",
+	    NULL, "min=0,max=1024",
+	    NULL, 0 },
+	{ "eviction", "boolean", NULL, NULL, NULL, 0 },
+	{ "rollback_error", "int", NULL, "min=0,max=10M", NULL, 0 },
+	{ "table_logging", "boolean", NULL, NULL, NULL, 0 },
+	{ NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
+static const WT_CONFIG_CHECK
     confchk_wiredtiger_open_eviction_subconfigs[] = {
 	{ "threads_max", "int", NULL, "min=1,max=20", NULL, 0 },
 	{ "threads_min", "int", NULL, "min=1,max=20", NULL, 0 },
@@ -148,6 +165,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
 	    NULL, NULL,
 	    confchk_wiredtiger_open_async_subconfigs, 3 },
 	{ "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
+	{ "cache_overflow", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
 	{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
@@ -156,6 +176,9 @@ static const WT_CONFIG_CHECK confchk_WT_CONNECTION_reconfigure[] = {
 	{ "compatibility", "category",
 	    NULL, NULL,
 	    confchk_WT_CONNECTION_reconfigure_compatibility_subconfigs, 1 },
+	{ "debug_mode", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
 	{ "error_prefix", "string", NULL, NULL, NULL, 0 },
 	{ "eviction", "category",
 	    NULL, NULL,
@@ -839,6 +862,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
 	{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
 	{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
 	{ "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
+	{ "cache_overflow", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
 	{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
@@ -850,6 +876,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
 	    confchk_wiredtiger_open_compatibility_subconfigs, 3 },
 	{ "config_base", "boolean", NULL, NULL, NULL, 0 },
 	{ "create", "boolean", NULL, NULL, NULL, 0 },
+	{ "debug_mode", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
 	{ "direct_io", "list",
 	    NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
 	    NULL, 0 },
@@ -948,6 +977,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
 	{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
 	{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
 	{ "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
+	{ "cache_overflow", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
 	{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
@@ -959,6 +991,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_all[] = {
 	    confchk_wiredtiger_open_compatibility_subconfigs, 3 },
 	{ "config_base", "boolean", NULL, NULL, NULL, 0 },
 	{ "create", "boolean", NULL, NULL, NULL, 0 },
+	{ "debug_mode", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
 	{ "direct_io", "list",
 	    NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
 	    NULL, 0 },
@@ -1058,6 +1093,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
 	{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
 	{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
 	{ "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
+	{ "cache_overflow", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
 	{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
@@ -1067,6 +1105,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_basecfg[] = {
 	{ "compatibility", "category",
 	    NULL, NULL,
 	    confchk_wiredtiger_open_compatibility_subconfigs, 3 },
+	{ "debug_mode", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
 	{ "direct_io", "list",
 	    NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
 	    NULL, 0 },
@@ -1162,6 +1203,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
 	{ "builtin_extension_config", "string", NULL, NULL, NULL, 0 },
 	{ "cache_cursors", "boolean", NULL, NULL, NULL, 0 },
 	{ "cache_max_wait_ms", "int", NULL, "min=0", NULL, 0 },
+	{ "cache_overflow", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_cache_overflow_subconfigs, 1 },
 	{ "cache_overhead", "int", NULL, "min=0,max=30", NULL, 0 },
 	{ "cache_size", "int", NULL, "min=1MB,max=10TB", NULL, 0 },
 	{ "checkpoint", "category",
@@ -1171,6 +1215,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open_usercfg[] = {
 	{ "compatibility", "category",
 	    NULL, NULL,
 	    confchk_wiredtiger_open_compatibility_subconfigs, 3 },
+	{ "debug_mode", "category",
+	    NULL, NULL,
+	    confchk_wiredtiger_open_debug_mode_subconfigs, 4 },
 	{ "direct_io", "list",
 	    NULL, "choices=[\"checkpoint\",\"data\",\"log\"]",
 	    NULL, 0 },
@@ -1307,8 +1354,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "WT_CONNECTION.reconfigure",
 	  "async=(enabled=false,ops_max=1024,threads=2),cache_max_wait_ms=0"
-	  ",cache_overhead=8,cache_size=100MB,checkpoint=(log_size=0,"
-	  "wait=0),compatibility=(release=),error_prefix=,"
+	  ",cache_overflow=(file_max=0),cache_overhead=8,cache_size=100MB,"
+	  "checkpoint=(log_size=0,wait=0),compatibility=(release=),"
+	  "debug_mode=(checkpoint_retention=0,eviction=false,"
+	  "rollback_error=0,table_logging=false),error_prefix=,"
 	  "eviction=(threads_max=8,threads_min=1),"
 	  "eviction_checkpoint_target=1,eviction_dirty_target=5,"
 	  "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
@@ -1321,7 +1370,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "statistics=none,statistics_log=(json=false,on_close=false,"
 	  "sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
 	  "timing_stress_for_test=,verbose=",
-	  confchk_WT_CONNECTION_reconfigure, 24
+	  confchk_WT_CONNECTION_reconfigure, 26
 	},
 	{ "WT_CONNECTION.rollback_to_stable",
 	  "",
@@ -1556,19 +1605,22 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	{ "wiredtiger_open",
 	  "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
 	  ",builtin_extension_config=,cache_cursors=true,"
-	  "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
-	  "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
-	  "compatibility=(release=,require_max=,require_min=),"
-	  "config_base=true,create=false,direct_io=,encryption=(keyid=,"
-	  "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
-	  "threads_min=1),eviction_checkpoint_target=1,"
-	  "eviction_dirty_target=5,eviction_dirty_trigger=20,"
-	  "eviction_target=80,eviction_trigger=95,exclusive=false,"
-	  "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
-	  "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
-	  "in_memory=false,io_capacity=(total=0),log=(archive=true,"
-	  "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0,"
-	  "path=\".\",prealloc=true,recover=on,zero_fill=false),"
+	  "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+	  ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+	  "checkpoint_sync=true,compatibility=(release=,require_max=,"
+	  "require_min=),config_base=true,create=false,"
+	  "debug_mode=(checkpoint_retention=0,eviction=false,"
+	  "rollback_error=0,table_logging=false),direct_io=,"
+	  "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+	  "eviction=(threads_max=8,threads_min=1),"
+	  "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+	  "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+	  ",exclusive=false,extensions=,file_extend=,"
+	  "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+	  "close_scan_interval=10),hazard_max=1000,in_memory=false,"
+	  "io_capacity=(total=0),log=(archive=true,compressor=,"
+	  "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+	  "prealloc=true,recover=on,zero_fill=false),"
 	  "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
 	  "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
 	  "path=\".\"),readonly=false,salvage=false,session_max=100,"
@@ -1579,24 +1631,27 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "timing_stress_for_test=,transaction_sync=(enabled=false,"
 	  "method=fsync),use_environment=true,use_environment_priv=false,"
 	  "verbose=,write_through=",
-	  confchk_wiredtiger_open, 48
+	  confchk_wiredtiger_open, 50
 	},
 	{ "wiredtiger_open_all",
 	  "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
 	  ",builtin_extension_config=,cache_cursors=true,"
-	  "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
-	  "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
-	  "compatibility=(release=,require_max=,require_min=),"
-	  "config_base=true,create=false,direct_io=,encryption=(keyid=,"
-	  "name=,secretkey=),error_prefix=,eviction=(threads_max=8,"
-	  "threads_min=1),eviction_checkpoint_target=1,"
-	  "eviction_dirty_target=5,eviction_dirty_trigger=20,"
-	  "eviction_target=80,eviction_trigger=95,exclusive=false,"
-	  "extensions=,file_extend=,file_manager=(close_handle_minimum=250,"
-	  "close_idle_time=30,close_scan_interval=10),hazard_max=1000,"
-	  "in_memory=false,io_capacity=(total=0),log=(archive=true,"
-	  "compressor=,enabled=false,file_max=100MB,os_cache_dirty_pct=0,"
-	  "path=\".\",prealloc=true,recover=on,zero_fill=false),"
+	  "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+	  ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+	  "checkpoint_sync=true,compatibility=(release=,require_max=,"
+	  "require_min=),config_base=true,create=false,"
+	  "debug_mode=(checkpoint_retention=0,eviction=false,"
+	  "rollback_error=0,table_logging=false),direct_io=,"
+	  "encryption=(keyid=,name=,secretkey=),error_prefix=,"
+	  "eviction=(threads_max=8,threads_min=1),"
+	  "eviction_checkpoint_target=1,eviction_dirty_target=5,"
+	  "eviction_dirty_trigger=20,eviction_target=80,eviction_trigger=95"
+	  ",exclusive=false,extensions=,file_extend=,"
+	  "file_manager=(close_handle_minimum=250,close_idle_time=30,"
+	  "close_scan_interval=10),hazard_max=1000,in_memory=false,"
+	  "io_capacity=(total=0),log=(archive=true,compressor=,"
+	  "enabled=false,file_max=100MB,os_cache_dirty_pct=0,path=\".\","
+	  "prealloc=true,recover=on,zero_fill=false),"
 	  "lsm_manager=(merge=true,worker_thread_max=4),lsm_merge=true,"
 	  "mmap=true,multiprocess=false,operation_tracking=(enabled=false,"
 	  "path=\".\"),readonly=false,salvage=false,session_max=100,"
@@ -1607,14 +1662,16 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "timing_stress_for_test=,transaction_sync=(enabled=false,"
 	  "method=fsync),use_environment=true,use_environment_priv=false,"
 	  "verbose=,version=(major=0,minor=0),write_through=",
-	  confchk_wiredtiger_open_all, 49
+	  confchk_wiredtiger_open_all, 51
 	},
 	{ "wiredtiger_open_basecfg",
 	  "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
 	  ",builtin_extension_config=,cache_cursors=true,"
-	  "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
-	  "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
-	  "compatibility=(release=,require_max=,require_min=),direct_io=,"
+	  "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+	  ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+	  "checkpoint_sync=true,compatibility=(release=,require_max=,"
+	  "require_min=),debug_mode=(checkpoint_retention=0,eviction=false,"
+	  "rollback_error=0,table_logging=false),direct_io=,"
 	  "encryption=(keyid=,name=,secretkey=),error_prefix=,"
 	  "eviction=(threads_max=8,threads_min=1),"
 	  "eviction_checkpoint_target=1,eviction_dirty_target=5,"
@@ -1633,14 +1690,16 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
 	  "timing_stress_for_test=,transaction_sync=(enabled=false,"
 	  "method=fsync),verbose=,version=(major=0,minor=0),write_through=",
-	  confchk_wiredtiger_open_basecfg, 43
+	  confchk_wiredtiger_open_basecfg, 45
 	},
 	{ "wiredtiger_open_usercfg",
 	  "async=(enabled=false,ops_max=1024,threads=2),buffer_alignment=-1"
 	  ",builtin_extension_config=,cache_cursors=true,"
-	  "cache_max_wait_ms=0,cache_overhead=8,cache_size=100MB,"
-	  "checkpoint=(log_size=0,wait=0),checkpoint_sync=true,"
-	  "compatibility=(release=,require_max=,require_min=),direct_io=,"
+	  "cache_max_wait_ms=0,cache_overflow=(file_max=0),cache_overhead=8"
+	  ",cache_size=100MB,checkpoint=(log_size=0,wait=0),"
+	  "checkpoint_sync=true,compatibility=(release=,require_max=,"
+	  "require_min=),debug_mode=(checkpoint_retention=0,eviction=false,"
+	  "rollback_error=0,table_logging=false),direct_io=,"
 	  "encryption=(keyid=,name=,secretkey=),error_prefix=,"
 	  "eviction=(threads_max=8,threads_min=1),"
 	  "eviction_checkpoint_target=1,eviction_dirty_target=5,"
@@ -1659,7 +1718,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "path=\".\",sources=,timestamp=\"%b %d %H:%M:%S\",wait=0),"
 	  "timing_stress_for_test=,transaction_sync=(enabled=false,"
 	  "method=fsync),verbose=,write_through=",
-	  confchk_wiredtiger_open_usercfg, 42
+	  confchk_wiredtiger_open_usercfg, 44
 	},
 	{ NULL, NULL, NULL, 0 }
 };
diff --git a/src/third_party/wiredtiger/src/conn/api_calc_modify.c b/src/third_party/wiredtiger/src/conn/api_calc_modify.c
index 4a435a85ef1..a8091498ee6 100644
--- a/src/third_party/wiredtiger/src/conn/api_calc_modify.c
+++ b/src/third_party/wiredtiger/src/conn/api_calc_modify.c
@@ -69,16 +69,31 @@ static void
 __cm_extend(WT_CM_STATE *cms,
     const uint8_t *m1, const uint8_t *m2, WT_CM_MATCH *match)
 {
+	ptrdiff_t n;
 	const uint8_t *p1, *p2;
 
-	/* Step past the end and before the beginning of the matching block. */
+	p1 = m1;
+	p2 = m2;
+
+	/*
+	 * Keep skipping half of the remaining bytes while they compare equal.
+	 * This is significantly faster than our byte-at-a-time loop below.
+	 */
 	for (p1 = m1, p2 = m2;
-	    p1 < cms->e1 && p2 < cms->e2 && *p1 == *p2;
-	    p1++, p2++)
+	    (n = WT_MIN(cms->e1 - p1, cms->e2 - p2) / 2) > 8 &&
+	    memcmp(p1, p2, (size_t)n) == 0;
+	    p1 += n, p2 += n)
+		;
+
+	/* Step past the end and before the beginning of the matching block. */
+	for (n = WT_MIN(cms->e1 - p1, cms->e2 - p2);
+	    n > 0 && *p1 == *p2;
+	    n--, p1++, p2++)
 		;
 
-	for (; m1 >= cms->used1 && m2 >= cms->used2 && *m1 == *m2;
-	    m1--, m2--)
+	for (n = WT_MIN(m1 - cms->used1, m2 - cms->used2);
+	    n > 0 && *m1 == *m2;
+	    n--, m1--, m2--)
 		;
 
 	match->m1 = m1 + 1;
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index ef0072c45ac..54199fd38ad 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1816,6 +1816,57 @@ err:	/*
 	return (ret);
 }
 
+/*
+ * __wt_debug_mode_config --
+ *	Set debugging configuration.
+ */
+int
+__wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[])
+{
+	WT_CACHE *cache;
+	WT_CONFIG_ITEM cval;
+	WT_CONNECTION_IMPL *conn;
+	WT_TXN_GLOBAL *txn_global;
+
+	conn = S2C(session);
+	cache = conn->cache;
+	txn_global = &conn->txn_global;
+
+	WT_RET(__wt_config_gets(session,
+	    cfg, "debug_mode.checkpoint_retention", &cval));
+	conn->debug_ckpt_cnt = (uint32_t)cval.val;
+	if (cval.val == 0) {
+		if (conn->debug_ckpt != NULL)
+			__wt_free(session, conn->debug_ckpt);
+		conn->debug_ckpt = NULL;
+	} else if (conn->debug_ckpt != NULL)
+		WT_RET(__wt_realloc(session, NULL,
+		    conn->debug_ckpt_cnt, &conn->debug_ckpt));
+	else
+		WT_RET(__wt_calloc_def(session,
+		    conn->debug_ckpt_cnt, &conn->debug_ckpt));
+
+	WT_RET(__wt_config_gets(session,
+	    cfg, "debug_mode.eviction", &cval));
+	if (cval.val)
+		F_SET(cache, WT_CACHE_EVICT_DEBUG_MODE);
+	else
+		F_CLR(cache, WT_CACHE_EVICT_DEBUG_MODE);
+
+	WT_RET(__wt_config_gets(session,
+	    cfg, "debug_mode.rollback_error", &cval));
+	txn_global->debug_rollback = (uint64_t)cval.val;
+
+	WT_RET(__wt_config_gets(session,
+	    cfg, "debug_mode.table_logging", &cval));
+	if (cval.val)
+		FLD_SET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE);
+	else
+		FLD_CLR(conn->log_flags, WT_CONN_LOG_DEBUG_MODE);
+
+	return (0);
+}
+
 /* Simple structure for name and flag configuration searches. */
 typedef struct {
 	const char *name;
@@ -2707,6 +2758,12 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
 	session = conn->default_session;
 
 	/*
+	 * This function expects the cache to be created so parse this after
+	 * the rest of the connection is set up.
+	 */
+	WT_ERR(__wt_debug_mode_config(session, cfg));
+
+	/*
 	 * Load the extensions after initialization completes; extensions expect
 	 * everything else to be in place, and the extensions call back into the
 	 * library.
diff --git a/src/third_party/wiredtiger/src/conn/conn_handle.c b/src/third_party/wiredtiger/src/conn/conn_handle.c
index a3818b3c914..faee6216ed7 100644
--- a/src/third_party/wiredtiger/src/conn/conn_handle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_handle.c
@@ -132,8 +132,9 @@ __wt_connection_destroy(WT_CONNECTION_IMPL *conn)
 
 	/* Free allocated memory. */
 	__wt_free(session, conn->cfg);
-	__wt_free(session, conn->home);
+	__wt_free(session, conn->debug_ckpt);
 	__wt_free(session, conn->error_prefix);
+	__wt_free(session, conn->home);
 	__wt_free(session, conn->sessions);
 	__wt_stat_connection_discard(session, conn);
 
diff --git a/src/third_party/wiredtiger/src/conn/conn_log.c b/src/third_party/wiredtiger/src/conn/conn_log.c
index 8bc111346c5..cd93e459e0a 100644
--- a/src/third_party/wiredtiger/src/conn/conn_log.c
+++ b/src/third_party/wiredtiger/src/conn/conn_log.c
@@ -372,9 +372,19 @@ __log_archive_once(WT_SESSION_IMPL *session, uint32_t backup_file)
 	 */
 	if (backup_file != 0)
 		min_lognum = WT_MIN(log->ckpt_lsn.l.file, backup_file);
-	else
-		min_lognum = WT_MIN(
-		    log->ckpt_lsn.l.file, log->sync_lsn.l.file);
+	else {
+		/*
+		 * Figure out the minimum log file to archive. Use the
+		 * LSN in the debugging array if necessary.
+		 */
+		if (conn->debug_ckpt_cnt == 0)
+			min_lognum = WT_MIN(
+			    log->ckpt_lsn.l.file, log->sync_lsn.l.file);
+		else
+			min_lognum = WT_MIN(
+			    conn->debug_ckpt[conn->debug_ckpt_cnt - 1].l.file,
+			    log->sync_lsn.l.file);
+	}
 	__wt_verbose(session, WT_VERB_LOG,
 	    "log_archive: archive to log number %" PRIu32, min_lognum);
 
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 7a2b52f40f9..fba1132ecb7 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -238,7 +238,7 @@ __wt_connection_workers(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_RET(__wt_meta_track_init(session));
 
 	/* Create the lookaside table. */
-	WT_RET(__wt_las_create(session));
+	WT_RET(__wt_las_create(session, cfg));
 
 	/*
 	 * Start eviction threads.
diff --git a/src/third_party/wiredtiger/src/conn/conn_reconfig.c b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
index 1cd589c32c9..fa0726a1306 100644
--- a/src/third_party/wiredtiger/src/conn/conn_reconfig.c
+++ b/src/third_party/wiredtiger/src/conn/conn_reconfig.c
@@ -488,12 +488,14 @@ __wt_conn_reconfig(WT_SESSION_IMPL *session, const char **cfg)
 	WT_ERR(__wt_cache_config(session, true, cfg));
 	WT_ERR(__wt_capacity_server_create(session, cfg));
 	WT_ERR(__wt_checkpoint_server_create(session, cfg));
+	WT_ERR(__wt_debug_mode_config(session, cfg));
+	WT_ERR(__wt_las_config(session, cfg));
 	WT_ERR(__wt_logmgr_reconfig(session, cfg));
 	WT_ERR(__wt_lsm_manager_reconfig(session, cfg));
 	WT_ERR(__wt_statlog_create(session, cfg));
 	WT_ERR(__wt_sweep_config(session, cfg));
-	WT_ERR(__wt_verbose_config(session, cfg));
 	WT_ERR(__wt_timing_stress_config(session, cfg));
+	WT_ERR(__wt_verbose_config(session, cfg));
 
 	/* Third, merge everything together, creating a new connection state. */
 	WT_ERR(__wt_config_merge(session, cfg, NULL, &p));
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index b9747d1b681..0e806f20608 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -95,7 +95,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 			 * Ensure the ref state is restored to the previous
 			 * value if eviction fails.
 			 */
-			WT_ERR(__wt_evict(session, ref, true, ref->state));
+			WT_ERR(__wt_evict(session, ref, ref->state,
+			    WT_EVICT_CALL_CLOSING));
 			break;
 		case WT_SYNC_DISCARD:
 			/*
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 3001f3d23da..f40ed758a19 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -107,6 +107,25 @@ __evict_entry_priority(WT_SESSION_IMPL *session, WT_REF *ref)
 }
 
 /*
+ * __evict_lru_cmp_debug --
+ *	Qsort function: sort the eviction array.
+ *	Version for eviction debug mode.
+ */
+static int WT_CDECL
+__evict_lru_cmp_debug(const void *a_arg, const void *b_arg)
+{
+	const WT_EVICT_ENTRY *a, *b;
+	uint64_t a_score, b_score;
+
+	a = a_arg;
+	b = b_arg;
+	a_score = (a->ref == NULL ? UINT64_MAX : 0);
+	b_score = (b->ref == NULL ? UINT64_MAX : 0);
+
+	return ((a_score < b_score) ? -1 : (a_score == b_score) ? 0 : 1);
+}
+
+/*
  * __evict_lru_cmp --
  *	Qsort function: sort the eviction array.
  */
@@ -1257,8 +1276,17 @@ __evict_lru_walk(WT_SESSION_IMPL *session)
 		queue->evict_current = NULL;
 
 	entries = queue->evict_entries;
-	__wt_qsort(queue->evict_queue,
-	    entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
+	/*
+	 * Style note: __wt_qsort is a macro that can leave a dangling
+	 * else. Full curly braces are needed here for the compiler.
+	 */
+	if (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE)) {
+		__wt_qsort(queue->evict_queue,
+		    entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp_debug);
+	} else {
+		__wt_qsort(queue->evict_queue,
+		    entries, sizeof(WT_EVICT_ENTRY), __evict_lru_cmp);
+	}
 
 	/* Trim empty entries from the end. */
 	while (entries > 0 && queue->evict_queue[entries - 1].ref == NULL)
@@ -1975,12 +2003,14 @@ __evict_walk_tree(WT_SESSION_IMPL *session,
 		 * cache (indicated by seeing an internal page that is the
 		 * parent of the last page we saw).
 		 *
-		 * Also skip internal page unless we get aggressive or the tree
-		 * is idle (indicated by the tree being skipped for walks).
+		 * Also skip internal page unless we get aggressive, the tree
+		 * is idle (indicated by the tree being skipped for walks),
+		 * or we are in eviction debug mode.
 		 * The goal here is that if trees become completely idle, we
 		 * eventually push them out of cache completely.
 		 */
-		if (WT_PAGE_IS_INTERNAL(page)) {
+		if (!F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) &&
+		    WT_PAGE_IS_INTERNAL(page)) {
 			if (page == last_parent)
 				continue;
 			if (btree->evict_walk_period == 0 &&
@@ -2320,7 +2350,7 @@ __evict_page(WT_SESSION_IMPL *session, bool is_server)
 	__wt_cache_read_gen_bump(session, ref->page);
 
 	WT_WITH_BTREE(session, btree,
-	     ret = __wt_evict(session, ref, false, previous_state));
+	    ret = __wt_evict(session, ref, previous_state, 0));
 
 	(void)__wt_atomic_subv32(&btree->evict_busy, 1);
 
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index e75f0ef1bed..2510815401f 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -8,9 +8,9 @@
 
 #include "wt_internal.h"
 
-static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool);
-static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool);
-static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool, bool *);
+static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, uint32_t);
+static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, uint32_t);
+static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t, bool *);
 
 /*
  * __evict_exclusive_clear --
@@ -51,19 +51,20 @@ __evict_exclusive(WT_SESSION_IMPL *session, WT_REF *ref)
  *	Release a reference to a page, and attempt to immediately evict it.
  */
 int
-__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
+__wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
 {
 	WT_BTREE *btree;
 	WT_DECL_RET;
 	WT_PAGE *page;
 	uint64_t time_start, time_stop;
-	uint32_t previous_state;
+	uint32_t evict_flags, previous_state;
 	bool locked, too_big;
 
 	btree = S2BT(session);
 	locked = false;
 	page = ref->page;
 	time_start = __wt_clock(session);
+	evict_flags = LF_ISSET(WT_READ_NO_SPLIT) ? WT_EVICT_CALL_NO_SPLIT : 0;
 
 	/*
 	 * This function always releases the hazard pointer - ensure that's
@@ -89,7 +90,7 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
 	 * Track how long the call to evict took. If eviction is successful then
 	 * we have one of two pairs of stats to increment.
 	 */
-	ret = __wt_evict(session, ref, false, previous_state);
+	ret = __wt_evict(session, ref, previous_state, evict_flags);
 	time_stop = __wt_clock(session);
 	if (ret == 0) {
 		if (too_big) {
@@ -124,20 +125,25 @@ __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref)
  */
 int
 __wt_evict(WT_SESSION_IMPL *session,
-    WT_REF *ref, bool closing, uint32_t previous_state)
+    WT_REF *ref, uint32_t previous_state, uint32_t flags)
 {
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_PAGE *page;
-	bool clean_page, inmem_split, local_gen, tree_dead;
+	bool clean_page, closing, inmem_split, local_gen, tree_dead;
 
 	conn = S2C(session);
 	page = ref->page;
+	closing = LF_ISSET(WT_EVICT_CALL_CLOSING);
 	local_gen = false;
 
 	__wt_verbose(session, WT_VERB_EVICT,
 	    "page %p (%s)", (void *)page, __wt_page_type_string(page->type));
 
+	tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
+	if (tree_dead)
+		LF_SET(WT_EVICT_CALL_NO_SPLIT);
+
 	/*
 	 * Enter the eviction generation. If we re-enter eviction, leave the
 	 * previous eviction generation (which must be as low as the current
@@ -171,7 +177,7 @@ __wt_evict(WT_SESSION_IMPL *session,
 	 * Make this check for clean pages, too: while unlikely eviction would
 	 * choose an internal page with children, it's not disallowed.
 	 */
-	WT_ERR(__evict_review(session, ref, closing, &inmem_split));
+	WT_ERR(__evict_review(session, ref, flags, &inmem_split));
 
 	/*
 	 * If there was an in-memory split, the tree has been left in the state
@@ -208,7 +214,6 @@ __wt_evict(WT_SESSION_IMPL *session,
 	}
 
 	/* Update the reference and discard the page. */
-	tree_dead = F_ISSET(session->dhandle, WT_DHANDLE_DEAD);
 	if (__wt_ref_is_root(ref))
 		__wt_ref_out(session, ref);
 	else if ((clean_page && !F_ISSET(conn, WT_CONN_IN_MEMORY)) || tree_dead)
@@ -216,10 +221,9 @@ __wt_evict(WT_SESSION_IMPL *session,
 		 * Pages that belong to dead trees never write back to disk
 		 * and can't support page splits.
 		 */
-		WT_ERR(__evict_page_clean_update(
-		    session, ref, tree_dead || closing));
+		WT_ERR(__evict_page_clean_update(session, ref, flags));
 	else
-		WT_ERR(__evict_page_dirty_update(session, ref, closing));
+		WT_ERR(__evict_page_dirty_update(session, ref, flags));
 
 	if (clean_page) {
 		WT_STAT_CONN_INCR(session, cache_eviction_clean);
@@ -250,7 +254,7 @@ done:	/* Leave any local eviction generation. */
  *	split.
  */
 static int
-__evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
+__evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
 {
 	WT_DECL_RET;
 	WT_PAGE *parent;
@@ -264,7 +268,7 @@ __evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	 * Avoid doing reverse splits when closing the file, it is wasted work
 	 * and some structures may have already been freed.
 	 */
-	if (!closing) {
+	if (!LF_ISSET(WT_EVICT_CALL_NO_SPLIT | WT_EVICT_CALL_CLOSING)) {
 		parent = ref->home;
 		WT_INTL_INDEX_GET(session, parent, pindex);
 		ndeleted = __wt_atomic_addv32(&pindex->deleted_entries, 1);
@@ -302,9 +306,12 @@ __evict_delete_ref(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
  *	Update a clean page's reference on eviction.
  */
 static int
-__evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
+__evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
 {
 	WT_DECL_RET;
+	bool closing;
+
+	closing = LF_ISSET(WT_EVICT_CALL_CLOSING);
 
 	/*
 	 * Before discarding a page, assert that all updates are globally
@@ -334,7 +341,7 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		WT_REF_SET_STATE(ref, WT_REF_LOOKASIDE);
 	} else if (ref->addr == NULL) {
 		WT_WITH_PAGE_INDEX(session,
-		    ret = __evict_delete_ref(session, ref, closing));
+		    ret = __evict_delete_ref(session, ref, flags));
 		WT_RET_BUSY_OK(ret);
 	} else
 		WT_REF_SET_STATE(ref, WT_REF_DISK);
@@ -347,14 +354,17 @@ __evict_page_clean_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
  *	Update a dirty page's reference on eviction.
  */
 static int
-__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
+__evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref,
+    uint32_t evict_flags)
 {
 	WT_ADDR *addr;
 	WT_DECL_RET;
 	WT_MULTI multi;
 	WT_PAGE_MODIFY *mod;
+	bool closing;
 
 	mod = ref->page->modify;
+	closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING);
 
 	WT_ASSERT(session, ref->addr == NULL);
 
@@ -370,7 +380,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		 */
 		__wt_ref_out(session, ref);
 		WT_WITH_PAGE_INDEX(session,
-		    ret = __evict_delete_ref(session, ref, closing));
+		    ret = __evict_delete_ref(session, ref, evict_flags));
 		WT_RET_BUSY_OK(ret);
 		break;
 	case WT_PM_REC_MULTIBLOCK:			/* Multiple blocks */
@@ -511,20 +521,22 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
  */
 static int
 __evict_review(
-    WT_SESSION_IMPL *session, WT_REF *ref, bool closing, bool *inmem_splitp)
+    WT_SESSION_IMPL *session, WT_REF *ref, uint32_t evict_flags,
+	bool *inmem_splitp)
 {
 	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_PAGE *page;
 	uint32_t flags;
-	bool lookaside_retry, *lookaside_retryp, modified;
+	bool closing, lookaside_retry, *lookaside_retryp, modified;
 
 	*inmem_splitp = false;
 
 	conn = S2C(session);
 	page = ref->page;
 	flags = WT_REC_EVICT;
+	closing = FLD_ISSET(evict_flags, WT_EVICT_CALL_CLOSING);
 	if (!WT_SESSION_BTREE_SYNC(session))
 		LF_SET(WT_REC_VISIBLE_ALL);
 
@@ -644,7 +656,13 @@ __evict_review(
 		else if (!WT_IS_METADATA(session->dhandle)) {
 			LF_SET(WT_REC_UPDATE_RESTORE);
 
-			if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
+			/*
+			 * Scrub if we're supposed to or toss it in sometimes
+			 * if we are in debugging mode.
+			 */
+			if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB) ||
+			    (F_ISSET(cache, WT_CACHE_EVICT_DEBUG_MODE) &&
+			    __wt_random(&session->rnd) % 3 == 0))
 				LF_SET(WT_REC_SCRUB);
 
 			/*
@@ -653,8 +671,16 @@ __evict_review(
 			 * suggests trying the lookaside table.
 			 */
 			if (F_ISSET(cache, WT_CACHE_EVICT_LOOKASIDE) &&
-			    !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE))
+			    !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE)) {
+				if (F_ISSET(cache,
+				    WT_CACHE_EVICT_DEBUG_MODE) &&
+				    __wt_random(&session->rnd) % 10 == 0) {
+					LF_CLR(WT_REC_SCRUB |
+					    WT_REC_UPDATE_RESTORE);
+					LF_SET(WT_REC_LOOKASIDE);
+				}
 				lookaside_retryp = &lookaside_retry;
+			}
 		}
 	}
 
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index 46f507ebedf..9859b3b607a 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -1114,9 +1114,9 @@ struct __wt_update {
 
 /*
  * WT_MODIFY_MEM_FACTOR	--
- *	Limit update chains to a factor of the base document size.
+ *	Limit update chains to a fraction of the base document size.
  */
-#define	WT_MODIFY_MEM_FACTOR	1
+#define	WT_MODIFY_MEM_FRACTION	10
 
 /*
  * WT_INSERT --
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index f7ff274cfb8..17722a806e5 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -194,6 +194,12 @@ struct __wt_btree {
 	uint64_t    bytes_dirty_total;	/* Bytes ever dirtied in cache. */
 
 	/*
+	 * The maximum bytes allowed to be used for the table on disk.  This is
+	 * currently only used for the lookaside table.
+	 */
+	uint64_t    file_max;
+
+	/*
 	 * We flush pages from the tree (in order to make checkpoint faster),
 	 * without a high-level lock.  To avoid multiple threads flushing at
 	 * the same time, lock the tree.
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 5e0f0521ded..e728790b02c 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -918,6 +918,7 @@ __wt_row_leaf_key_set_cell(WT_PAGE *page, WT_ROW *rip, WT_CELL *cell)
 	 */
 	v = WT_CELL_ENCODE_OFFSET(WT_PAGE_DISK_OFFSET(page, cell)) |
 	    WT_CELL_FLAG;
+	WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
 	WT_ROW_KEY_SET(rip, v);
 }
 
@@ -937,6 +938,7 @@ __wt_row_leaf_key_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
 	v = WT_K_ENCODE_KEY_LEN(unpack->size) |
 	    WT_K_ENCODE_KEY_OFFSET(WT_PAGE_DISK_OFFSET(page, unpack->data)) |
 	    WT_K_FLAG;
+	WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
 	WT_ROW_KEY_SET(rip, v);
 }
 
@@ -975,6 +977,7 @@ __wt_row_leaf_value_set(WT_PAGE *page, WT_ROW *rip, WT_CELL_UNPACK *unpack)
 	    WT_KV_ENCODE_VALUE_LEN(unpack->size) |
 	    WT_KV_ENCODE_KEY_OFFSET(key_offset) |
 	    WT_KV_ENCODE_VALUE_OFFSET(value_offset) | WT_KV_FLAG;
+	WT_ASSERT(NULL, WT_ROW_SLOT(page, rip) < page->entries);
 	WT_ROW_KEY_SET(rip, v);
 }
 
@@ -1516,7 +1519,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags)
 				WT_IGNORE_RET(
 				    __wt_page_evict_urgent(session, ref));
 		} else {
-			WT_RET_BUSY_OK(__wt_page_release_evict(session, ref));
+			WT_RET_BUSY_OK(__wt_page_release_evict(session, ref,
+			    flags));
 			return (0);
 		}
 	}
diff --git a/src/third_party/wiredtiger/src/include/cache.h b/src/third_party/wiredtiger/src/include/cache.h
index 7966d9802b3..c4c0ee5d5d4 100644
--- a/src/third_party/wiredtiger/src/include/cache.h
+++ b/src/third_party/wiredtiger/src/include/cache.h
@@ -54,6 +54,7 @@ typedef enum __wt_cache_op {
 	WT_SYNC_WRITE_LEAVES
 } WT_CACHE_OP;
 
+#define	WT_LAS_FILE_MIN		(100 * WT_MEGABYTE)
 #define	WT_LAS_NUM_SESSIONS	5
 #define	WT_LAS_SWEEP_ENTRIES	(20 * WT_THOUSAND)
 #define	WT_LAS_SWEEP_SEC	2
@@ -171,7 +172,7 @@ struct __wt_cache {
 	 * Score of how aggressive eviction should be about selecting eviction
 	 * candidates. If eviction is struggling to make progress, this score
 	 * rises (up to a maximum of 100), at which point the cache is "stuck"
-	 * and transaction will be rolled back.
+	 * and transactions will be rolled back.
 	 */
 	uint32_t evict_aggressive_score;
 
@@ -251,11 +252,12 @@ struct __wt_cache {
 /* AUTOMATIC FLAG VALUE GENERATION START */
 #define	WT_CACHE_EVICT_CLEAN	  0x01u	/* Evict clean pages */
 #define	WT_CACHE_EVICT_CLEAN_HARD 0x02u	/* Clean % blocking app threads */
-#define	WT_CACHE_EVICT_DIRTY	  0x04u	/* Evict dirty pages */
-#define	WT_CACHE_EVICT_DIRTY_HARD 0x08u	/* Dirty % blocking app threads */
-#define	WT_CACHE_EVICT_LOOKASIDE  0x10u	/* Try lookaside eviction */
-#define	WT_CACHE_EVICT_SCRUB	  0x20u	/* Scrub dirty pages */
-#define	WT_CACHE_EVICT_URGENT	  0x40u	/* Pages are in the urgent queue */
+#define	WT_CACHE_EVICT_DEBUG_MODE 0x04u	/* Aggressive debugging mode */
+#define	WT_CACHE_EVICT_DIRTY	  0x08u	/* Evict dirty pages */
+#define	WT_CACHE_EVICT_DIRTY_HARD 0x10u	/* Dirty % blocking app threads */
+#define	WT_CACHE_EVICT_LOOKASIDE  0x20u	/* Try lookaside eviction */
+#define	WT_CACHE_EVICT_SCRUB	  0x40u	/* Scrub dirty pages */
+#define	WT_CACHE_EVICT_URGENT	  0x80u	/* Pages are in the urgent queue */
 /* AUTOMATIC FLAG VALUE GENERATION STOP */
 #define	WT_CACHE_EVICT_ALL	(WT_CACHE_EVICT_CLEAN | WT_CACHE_EVICT_DIRTY)
 	uint32_t flags;
@@ -290,3 +292,9 @@ struct __wt_cache_pool {
 /* AUTOMATIC FLAG VALUE GENERATION STOP */
 	uint8_t flags;
 };
+
+/* Flags used with __wt_evict */
+/* AUTOMATIC FLAG VALUE GENERATION START */
+#define	WT_EVICT_CALL_CLOSING  0x1u		/* Closing connection or tree */
+#define	WT_EVICT_CALL_NO_SPLIT 0x2u		/* Splits not allowed */
+/* AUTOMATIC FLAG VALUE GENERATION STOP */
diff --git a/src/third_party/wiredtiger/src/include/cell.h b/src/third_party/wiredtiger/src/include/cell.h
new file mode 100644
index 00000000000..5e079a613ad
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/cell.h
@@ -0,0 +1,162 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+/*
+ * WT_CELL --
+ *	Variable-length cell type.
+ *
+ * Pages containing variable-length keys or values data (the WT_PAGE_ROW_INT,
+ * WT_PAGE_ROW_LEAF, WT_PAGE_COL_INT and WT_PAGE_COL_VAR page types), have
+ * cells after the page header.
+ *
+ * There are 4 basic cell types: keys and data (each of which has an overflow
+ * form), deleted cells and off-page references.  The cell is usually followed
+ * by additional data, varying by type: a key or data cell is followed by a set
+ * of bytes, an address cookie follows overflow or off-page cells.
+ *
+ * Deleted cells are place-holders for column-store files, where entries cannot
+ * be removed in order to preserve the record count.
+ *
+ * Here's the cell use by page type:
+ *
+ * WT_PAGE_ROW_INT (row-store internal page):
+ *	Keys and offpage-reference pairs (a WT_CELL_KEY or WT_CELL_KEY_OVFL
+ * cell followed by a WT_CELL_ADDR_XXX cell).
+ *
+ * WT_PAGE_ROW_LEAF (row-store leaf page):
+ *	Keys with optional data cells (a WT_CELL_KEY or WT_CELL_KEY_OVFL cell,
+ *	normally followed by a WT_CELL_{VALUE,VALUE_COPY,VALUE_OVFL} cell).
+ *
+ *	WT_PAGE_ROW_LEAF pages optionally prefix-compress keys, using a single
+ *	byte count immediately following the cell.
+ *
+ * WT_PAGE_COL_INT (Column-store internal page):
+ *	Off-page references (a WT_CELL_ADDR_XXX cell).
+ *
+ * WT_PAGE_COL_VAR (Column-store leaf page storing variable-length cells):
+ *	Data cells (a WT_CELL_{VALUE,VALUE_COPY,VALUE_OVFL} cell), or deleted
+ * cells (a WT_CELL_DEL cell).
+ *
+ * Each cell starts with a descriptor byte:
+ *
+ * Bits 1 and 2 are reserved for "short" key and value cells (that is, a cell
+ * carrying data less than 64B, where we can store the data length in the cell
+ * descriptor byte):
+ *	0x00	Not a short key/data cell
+ *	0x01	Short key cell
+ *	0x10	Short key cell, with a following prefix-compression byte
+ *	0x11	Short value cell
+ * In these cases, the other 6 bits of the descriptor byte are the data length.
+ *
+ * Bit 3 marks an 8B packed, uint64_t value following the cell description byte.
+ * (A run-length counter or a record number for variable-length column store.)
+ *
+ * Bit 4 is unused.
+ *
+ * Bits 5-8 are cell "types".
+ */
+#define	WT_CELL_KEY_SHORT	0x01		/* Short key */
+#define	WT_CELL_KEY_SHORT_PFX	0x02		/* Short key with prefix byte */
+#define	WT_CELL_VALUE_SHORT	0x03		/* Short data */
+#define	WT_CELL_SHORT_TYPE(v)	((v) & 0x03U)
+
+#define	WT_CELL_SHORT_MAX	63		/* Maximum short key/value */
+#define	WT_CELL_SHORT_SHIFT	2		/* Shift for short key/value */
+
+#define	WT_CELL_64V		0x04		/* Associated value */
+
+/*
+ * We could use bit 4 as a single bit (similar to bit 3), or as a type bit in a
+ * backward compatible way by adding bit 4 to the type mask and adding new types
+ * that incorporate it.
+ */
+#define	WT_CELL_UNUSED_BIT4	0x08		/* Unused */
+
+/*
+ * WT_CELL_ADDR_INT is an internal block location, WT_CELL_ADDR_LEAF is a leaf
+ * block location, and WT_CELL_ADDR_LEAF_NO is a leaf block location where the
+ * page has no overflow items.  (The goal is to speed up truncation as we don't
+ * have to read pages without overflow items in order to delete them.  Note,
+ * WT_CELL_ADDR_LEAF_NO is not guaranteed to be set on every page without
+ * overflow items, the only guarantee is that if set, the page has no overflow
+ * items.)
+ *
+ * WT_CELL_VALUE_COPY is a reference to a previous cell on the page, supporting
+ * value dictionaries: if the two values are the same, we only store them once
+ * and have the second and subsequent use reference the original.
+ */
+#define	WT_CELL_ADDR_DEL	 (0)		/* Address: deleted */
+#define	WT_CELL_ADDR_INT	 (1 << 4)	/* Address: internal  */
+#define	WT_CELL_ADDR_LEAF	 (2 << 4)	/* Address: leaf */
+#define	WT_CELL_ADDR_LEAF_NO	 (3 << 4)	/* Address: leaf no overflow */
+#define	WT_CELL_DEL		 (4 << 4)	/* Deleted value */
+#define	WT_CELL_KEY		 (5 << 4)	/* Key */
+#define	WT_CELL_KEY_OVFL	 (6 << 4)	/* Overflow key */
+#define	WT_CELL_KEY_OVFL_RM	(12 << 4)	/* Overflow key (removed) */
+#define	WT_CELL_KEY_PFX		 (7 << 4)	/* Key with prefix byte */
+#define	WT_CELL_VALUE		 (8 << 4)	/* Value */
+#define	WT_CELL_VALUE_COPY	 (9 << 4)	/* Value copy */
+#define	WT_CELL_VALUE_OVFL	(10 << 4)	/* Overflow value */
+#define	WT_CELL_VALUE_OVFL_RM	(11 << 4)	/* Overflow value (removed) */
+
+#define	WT_CELL_TYPE_MASK	(0x0fU << 4)	/* Maximum 16 cell types */
+#define	WT_CELL_TYPE(v)		((v) & WT_CELL_TYPE_MASK)
+
+/*
+ * When we aren't able to create a short key or value (and, in the case of a
+ * value, there's no associated RLE), the key or value is at least 64B, else
+ * we'd have been able to store it as a short cell.  Decrement/Increment the
+ * size before storing it, in the hopes that relatively small key/value sizes
+ * will pack into a single byte instead of two bytes.
+ */
+#define	WT_CELL_SIZE_ADJUST	64
+
+/*
+ * WT_CELL --
+ *	Variable-length, on-page cell header.
+ */
+struct __wt_cell {
+	/*
+	 * Maximum of 16 bytes:
+	 * 1: cell descriptor byte
+	 * 1: prefix compression count
+	 * 9: associated 64-bit value	(uint64_t encoding, max 9 bytes)
+	 * 5: data length		(uint32_t encoding, max 5 bytes)
+	 *
+	 * This calculation is pessimistic: the prefix compression count and
+	 * 64V value overlap, the 64V value and data length are optional.
+	 */
+	uint8_t __chunk[1 + 1 + WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
+};
+
+/*
+ * WT_CELL_UNPACK --
+ *	Unpacked cell.
+ */
+struct __wt_cell_unpack {
+	WT_CELL *cell;			/* Cell's disk image address */
+
+	uint64_t v;			/* RLE count or recno */
+
+	/*
+	 * !!!
+	 * The size and __len fields are reasonably type size_t; don't change
+	 * the type, performance drops significantly if they're type size_t.
+	 */
+	const void *data;		/* Data */
+	uint32_t    size;		/* Data size */
+
+	uint32_t __len;			/* Cell + data length (usually) */
+
+	uint8_t prefix;			/* Cell prefix length */
+
+	uint8_t raw;			/* Raw cell type (include "shorts") */
+	uint8_t type;			/* Cell type */
+
+	uint8_t ovfl;			/* boolean: cell is an overflow */
+};
diff --git a/src/third_party/wiredtiger/src/include/cell.i b/src/third_party/wiredtiger/src/include/cell.i
index f518acfcbb0..c807737c494 100644
--- a/src/third_party/wiredtiger/src/include/cell.i
+++ b/src/third_party/wiredtiger/src/include/cell.i
@@ -7,161 +7,6 @@
  */
 
 /*
- * WT_CELL --
- *	Variable-length cell type.
- *
- * Pages containing variable-length keys or values data (the WT_PAGE_ROW_INT,
- * WT_PAGE_ROW_LEAF, WT_PAGE_COL_INT and WT_PAGE_COL_VAR page types), have
- * cells after the page header.
- *
- * There are 4 basic cell types: keys and data (each of which has an overflow
- * form), deleted cells and off-page references.  The cell is usually followed
- * by additional data, varying by type: a key or data cell is followed by a set
- * of bytes, an address cookie follows overflow or off-page cells.
- *
- * Deleted cells are place-holders for column-store files, where entries cannot
- * be removed in order to preserve the record count.
- *
- * Here's the cell use by page type:
- *
- * WT_PAGE_ROW_INT (row-store internal page):
- *	Keys and offpage-reference pairs (a WT_CELL_KEY or WT_CELL_KEY_OVFL
- * cell followed by a WT_CELL_ADDR_XXX cell).
- *
- * WT_PAGE_ROW_LEAF (row-store leaf page):
- *	Keys with optional data cells (a WT_CELL_KEY or WT_CELL_KEY_OVFL cell,
- *	normally followed by a WT_CELL_{VALUE,VALUE_COPY,VALUE_OVFL} cell).
- *
- *	WT_PAGE_ROW_LEAF pages optionally prefix-compress keys, using a single
- *	byte count immediately following the cell.
- *
- * WT_PAGE_COL_INT (Column-store internal page):
- *	Off-page references (a WT_CELL_ADDR_XXX cell).
- *
- * WT_PAGE_COL_VAR (Column-store leaf page storing variable-length cells):
- *	Data cells (a WT_CELL_{VALUE,VALUE_COPY,VALUE_OVFL} cell), or deleted
- * cells (a WT_CELL_DEL cell).
- *
- * Each cell starts with a descriptor byte:
- *
- * Bits 1 and 2 are reserved for "short" key and value cells (that is, a cell
- * carrying data less than 64B, where we can store the data length in the cell
- * descriptor byte):
- *	0x00	Not a short key/data cell
- *	0x01	Short key cell
- *	0x10	Short key cell, with a following prefix-compression byte
- *	0x11	Short value cell
- * In these cases, the other 6 bits of the descriptor byte are the data length.
- *
- * Bit 3 marks an 8B packed, uint64_t value following the cell description byte.
- * (A run-length counter or a record number for variable-length column store.)
- *
- * Bit 4 is unused.
- *
- * Bits 5-8 are cell "types".
- */
-#define	WT_CELL_KEY_SHORT	0x01		/* Short key */
-#define	WT_CELL_KEY_SHORT_PFX	0x02		/* Short key with prefix byte */
-#define	WT_CELL_VALUE_SHORT	0x03		/* Short data */
-#define	WT_CELL_SHORT_TYPE(v)	((v) & 0x03U)
-
-#define	WT_CELL_SHORT_MAX	63		/* Maximum short key/value */
-#define	WT_CELL_SHORT_SHIFT	2		/* Shift for short key/value */
-
-#define	WT_CELL_64V		0x04		/* Associated value */
-
-/*
- * We could use bit 4 as a single bit (similar to bit 3), or as a type bit in a
- * backward compatible way by adding bit 4 to the type mask and adding new types
- * that incorporate it.
- */
-#define	WT_CELL_UNUSED_BIT4	0x08		/* Unused */
-
-/*
- * WT_CELL_ADDR_INT is an internal block location, WT_CELL_ADDR_LEAF is a leaf
- * block location, and WT_CELL_ADDR_LEAF_NO is a leaf block location where the
- * page has no overflow items.  (The goal is to speed up truncation as we don't
- * have to read pages without overflow items in order to delete them.  Note,
- * WT_CELL_ADDR_LEAF_NO is not guaranteed to be set on every page without
- * overflow items, the only guarantee is that if set, the page has no overflow
- * items.)
- *
- * WT_CELL_VALUE_COPY is a reference to a previous cell on the page, supporting
- * value dictionaries: if the two values are the same, we only store them once
- * and have the second and subsequent use reference the original.
- */
-#define	WT_CELL_ADDR_DEL	 (0)		/* Address: deleted */
-#define	WT_CELL_ADDR_INT	 (1 << 4)	/* Address: internal  */
-#define	WT_CELL_ADDR_LEAF	 (2 << 4)	/* Address: leaf */
-#define	WT_CELL_ADDR_LEAF_NO	 (3 << 4)	/* Address: leaf no overflow */
-#define	WT_CELL_DEL		 (4 << 4)	/* Deleted value */
-#define	WT_CELL_KEY		 (5 << 4)	/* Key */
-#define	WT_CELL_KEY_OVFL	 (6 << 4)	/* Overflow key */
-#define	WT_CELL_KEY_OVFL_RM	(12 << 4)	/* Overflow key (removed) */
-#define	WT_CELL_KEY_PFX		 (7 << 4)	/* Key with prefix byte */
-#define	WT_CELL_VALUE		 (8 << 4)	/* Value */
-#define	WT_CELL_VALUE_COPY	 (9 << 4)	/* Value copy */
-#define	WT_CELL_VALUE_OVFL	(10 << 4)	/* Overflow value */
-#define	WT_CELL_VALUE_OVFL_RM	(11 << 4)	/* Overflow value (removed) */
-
-#define	WT_CELL_TYPE_MASK	(0x0fU << 4)	/* Maximum 16 cell types */
-#define	WT_CELL_TYPE(v)		((v) & WT_CELL_TYPE_MASK)
-
-/*
- * When we aren't able to create a short key or value (and, in the case of a
- * value, there's no associated RLE), the key or value is at least 64B, else
- * we'd have been able to store it as a short cell.  Decrement/Increment the
- * size before storing it, in the hopes that relatively small key/value sizes
- * will pack into a single byte instead of two bytes.
- */
-#define	WT_CELL_SIZE_ADJUST	64
-
-/*
- * WT_CELL --
- *	Variable-length, on-page cell header.
- */
-struct __wt_cell {
-	/*
-	 * Maximum of 16 bytes:
-	 * 1: cell descriptor byte
-	 * 1: prefix compression count
-	 * 9: associated 64-bit value	(uint64_t encoding, max 9 bytes)
-	 * 5: data length		(uint32_t encoding, max 5 bytes)
-	 *
-	 * This calculation is pessimistic: the prefix compression count and
-	 * 64V value overlap, the 64V value and data length are optional.
-	 */
-	uint8_t __chunk[1 + 1 + WT_INTPACK64_MAXSIZE + WT_INTPACK32_MAXSIZE];
-};
-
-/*
- * WT_CELL_UNPACK --
- *	Unpacked cell.
- */
-struct __wt_cell_unpack {
-	WT_CELL *cell;			/* Cell's disk image address */
-
-	uint64_t v;			/* RLE count or recno */
-
-	/*
-	 * !!!
-	 * The size and __len fields are reasonably type size_t; don't change
-	 * the type, performance drops significantly if they're type size_t.
-	 */
-	const void *data;		/* Data */
-	uint32_t    size;		/* Data size */
-
-	uint32_t __len;			/* Cell + data length (usually) */
-
-	uint8_t prefix;			/* Cell prefix length */
-
-	uint8_t raw;			/* Raw cell type (include "shorts") */
-	uint8_t type;			/* Cell type */
-
-	uint8_t ovfl;			/* boolean: cell is an overflow */
-};
-
-/*
  * WT_CELL_FOREACH --
  *	Walk the cells on a page.
  */
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index a23434ea9e2..73ac6c85522 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -193,6 +193,9 @@ struct __wt_connection_impl {
 	WT_SPINLOCK optrack_map_spinlock; /* Translation file spinlock. */
 	uintmax_t optrack_pid;		/* Cache the process ID. */
 
+	WT_LSN		*debug_ckpt;	/* Debug mode checkpoint LSNs. */
+	uint32_t	 debug_ckpt_cnt;/* Checkpoint retention number */
+
 	void  **foc;			/* Free-on-close array */
 	size_t  foc_cnt;		/* Array entries */
 	size_t  foc_size;		/* Array size */
@@ -321,15 +324,16 @@ struct __wt_connection_impl {
 
 /* AUTOMATIC FLAG VALUE GENERATION START */
 #define	WT_CONN_LOG_ARCHIVE		0x001u	/* Archive is enabled */
-#define	WT_CONN_LOG_DOWNGRADED		0x002u	/* Running older version */
-#define	WT_CONN_LOG_ENABLED		0x004u	/* Logging is enabled */
-#define	WT_CONN_LOG_EXISTED		0x008u	/* Log files found */
-#define	WT_CONN_LOG_FORCE_DOWNGRADE	0x010u	/* Force downgrade */
-#define	WT_CONN_LOG_RECOVER_DIRTY	0x020u	/* Recovering unclean */
-#define	WT_CONN_LOG_RECOVER_DONE	0x040u	/* Recovery completed */
-#define	WT_CONN_LOG_RECOVER_ERR		0x080u	/* Error if recovery required */
-#define	WT_CONN_LOG_RECOVER_FAILED	0x100u	/* Recovery failed */
-#define	WT_CONN_LOG_ZERO_FILL		0x200u	/* Manually zero files */
+#define	WT_CONN_LOG_DEBUG_MODE		0x002u	/* Debug-mode logging enabled */
+#define	WT_CONN_LOG_DOWNGRADED		0x004u	/* Running older version */
+#define	WT_CONN_LOG_ENABLED		0x008u	/* Logging is enabled */
+#define	WT_CONN_LOG_EXISTED		0x010u	/* Log files found */
+#define	WT_CONN_LOG_FORCE_DOWNGRADE	0x020u	/* Force downgrade */
+#define	WT_CONN_LOG_RECOVER_DIRTY	0x040u	/* Recovering unclean */
+#define	WT_CONN_LOG_RECOVER_DONE	0x080u	/* Recovery completed */
+#define	WT_CONN_LOG_RECOVER_ERR		0x100u	/* Error if recovery required */
+#define	WT_CONN_LOG_RECOVER_FAILED	0x200u	/* Recovery failed */
+#define	WT_CONN_LOG_ZERO_FILL		0x400u	/* Manually zero files */
 /* AUTOMATIC FLAG VALUE GENERATION STOP */
 	uint32_t	 log_flags;	/* Global logging configuration */
 	WT_CONDVAR	*log_cond;	/* Log server wait mutex */
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index aa313fa2caf..1ecfaf6eef6 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -202,9 +202,10 @@ extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_
 extern WT_UPDATE *__wt_update_obsolete_check(WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
 extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_config(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern bool __wt_las_empty(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
-extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_create(WT_SESSION_IMPL *session, const char **cfg) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_las_cursor_open(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_las_cursor(WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
@@ -254,6 +255,7 @@ extern int __wt_encryptor_config(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *cval,
 extern int __wt_conn_remove_encryptor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_extractor_config(WT_SESSION_IMPL *session, const char *uri, const char *config, WT_EXTRACTOR **extractorp, int *ownp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_conn_remove_extractor(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_verbose_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_verbose_dump_sessions(WT_SESSION_IMPL *session, bool show_cursors) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_timing_stress_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -386,11 +388,11 @@ extern bool __wt_page_evict_urgent(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC
 extern void __wt_evict_priority_set(WT_SESSION_IMPL *session, uint64_t v);
 extern void __wt_evict_priority_clear(WT_SESSION_IMPL *session);
 extern int __wt_verbose_dump_cache(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing, uint32_t previous_state) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_page_release_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t previous_state, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_curstat_cache_walk(WT_SESSION_IMPL *session);
 extern int __wt_log_printf(WT_SESSION_IMPL *session, const char *format, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn);
+extern void __wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckpt_lsn);
 extern int __wt_log_flush_lsn(WT_SESSION_IMPL *session, WT_LSN *lsn, bool start) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_log_background(WT_SESSION_IMPL *session, WT_LSN *lsn);
 extern int __wt_log_force_sync(WT_SESSION_IMPL *session, WT_LSN *min_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -446,6 +448,9 @@ extern int __wt_logop_checkpoint_start_print(WT_SESSION_IMPL *session, const uin
 extern int __wt_logop_prev_lsn_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, WT_LSN *prev_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_logop_prev_lsn_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *prev_lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_txn_timestamp_pack(WT_SESSION_IMPL *session, WT_ITEM *logrec, uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, uint64_t prepare_ts, uint64_t read_ts) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_txn_timestamp_unpack(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_op_printlog(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
 extern int __wt_log_slot_switch(WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced, bool *did_work) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -597,6 +602,21 @@ extern int __wt_ext_unpack_item(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, WT
 extern int __wt_ext_unpack_int(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, int64_t *ip) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_ext_unpack_str(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, const char **sp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_ext_unpack_uint(WT_EXTENSION_API *wt_api, WT_PACK_STREAM *ps, uint64_t *up) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_child_modify(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_fix_slvg(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_col_var(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern void __wt_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r);
+extern void __wt_rec_dictionary_reset(WT_RECONCILE *r);
+extern int __wt_rec_dictionary_lookup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_row_leaf(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_ovfl_discard_add(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_ovfl_discard_free(WT_SESSION_IMPL *session, WT_PAGE *page);
@@ -605,14 +625,16 @@ extern int __wt_ovfl_reuse_add(WT_SESSION_IMPL *session, WT_PAGE *page, const ui
 extern void __wt_ovfl_reuse_free(WT_SESSION_IMPL *session, WT_PAGE *page);
 extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, bool *upd_savedp, WT_UPDATE **updp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags, bool *lookaside_retryp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern uint32_t __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize);
+extern int __wt_rec_split_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page, uint64_t recno, uint64_t max) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split_crossing_bnd(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_fix(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type, uint64_t rle) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_alter(WT_SESSION_IMPL *session, const char *uri, const char *newcfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_direct_io_size_check(WT_SESSION_IMPL *session, const char **cfg, const char *config_name, uint32_t *allocsizep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_schema_colgroup_source(WT_SESSION_IMPL *session, WT_TABLE *table, const char *cgname, const char *config, WT_ITEM *buf) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -840,6 +862,7 @@ extern void __wt_txn_op_free(WT_SESSION_IMPL *session, WT_TXN_OP *op);
 extern int __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_log_commit(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, WT_LSN *ckpt_lsn) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_ts_log(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_checkpoint_log(WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_truncate_log(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_txn_truncate_end(WT_SESSION_IMPL *session);
@@ -861,7 +884,7 @@ extern int __wt_txn_global_set_timestamp(WT_SESSION_IMPL *session, const char *c
 extern int __wt_timestamp_validate(WT_SESSION_IMPL *session, const char *name, wt_timestamp_t ts, WT_CONFIG_ITEM *cval) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_txn_parse_prepare_timestamp(WT_SESSION_IMPL *session, const char *cfg[], wt_timestamp_t *timestamp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[], bool *set_tsp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_txn_set_commit_timestamp(WT_SESSION_IMPL *session);
 extern void __wt_txn_clear_commit_timestamp(WT_SESSION_IMPL *session);
 extern void __wt_txn_set_read_timestamp(WT_SESSION_IMPL *session);
diff --git a/src/third_party/wiredtiger/src/include/log.h b/src/third_party/wiredtiger/src/include/log.h
index 463f92a34a3..e7e49b8b0ce 100644
--- a/src/third_party/wiredtiger/src/include/log.h
+++ b/src/third_party/wiredtiger/src/include/log.h
@@ -21,6 +21,9 @@
 #define	WT_LOG_SYNC_ENABLED	0x10u
 /* AUTOMATIC FLAG VALUE GENERATION STOP */
 
+#define	WT_LOGOP_IGNORE	0x80000000
+#define	WT_LOGOP_IS_IGNORED(val)	(val & WT_LOGOP_IGNORE)
+
 /*
  * WT_LSN --
  *	A log sequence number, representing a position in the transaction log.
diff --git a/src/third_party/wiredtiger/src/include/meta.h b/src/third_party/wiredtiger/src/include/meta.h
index e221cad1481..e4b369f736d 100644
--- a/src/third_party/wiredtiger/src/include/meta.h
+++ b/src/third_party/wiredtiger/src/include/meta.h
@@ -27,6 +27,7 @@
 #define	WT_METAFILE_SLVG	"WiredTiger.wt.orig"	/* Metadata copy */
 #define	WT_METAFILE_URI		"file:WiredTiger.wt"	/* Metadata table URI */
 
+#define	WT_LAS_FILE		"WiredTigerLAS.wt"	/* Lookaside table */
 #define	WT_LAS_URI		"file:WiredTigerLAS.wt"	/* Lookaside table URI*/
 
 #define	WT_SYSTEM_PREFIX	"system:"		/* System URI prefix */
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
new file mode 100644
index 00000000000..fdb47f3d3d9
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -0,0 +1,294 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * Reconciliation is the process of taking an in-memory page, walking each entry
+ * in the page, building a backing disk image in a temporary buffer representing
+ * that information, and writing that buffer to disk.  What could be simpler?
+ *
+ * WT_RECONCILE --
+ *	Information tracking a single page reconciliation.
+ */
+typedef struct {
+	WT_REF  *ref;			/* Page being reconciled */
+	WT_PAGE *page;
+	uint32_t flags;			/* Caller's configuration */
+
+	/*
+	 * Track start/stop write generation to decide if all changes to the
+	 * page are written.
+	 */
+	uint32_t orig_write_gen;
+
+	/*
+	 * Track start/stop checkpoint generations to decide if lookaside table
+	 * records are correct.
+	 */
+	uint64_t orig_btree_checkpoint_gen;
+	uint64_t orig_txn_checkpoint_gen;
+
+	/*
+	 * Track the oldest running transaction and whether to skew lookaside
+	 * to the newest update.
+	 */
+	bool las_skew_newest;
+	uint64_t last_running;
+
+	/* Track the page's min/maximum transactions. */
+	uint64_t max_txn;
+	wt_timestamp_t max_timestamp;
+
+	/* Lookaside boundary tracking. */
+	uint64_t unstable_txn;
+	wt_timestamp_t unstable_timestamp;
+
+	u_int updates_seen;		/* Count of updates seen. */
+	u_int updates_unstable;		/* Count of updates not visible_all. */
+
+	bool update_uncommitted;	/* An update was uncommitted */
+	bool update_used;		/* An update could be used */
+
+	/*
+	 * When we can't mark the page clean (for example, checkpoint found some
+	 * uncommitted updates), there's a leave-dirty flag.
+	 */
+	bool leave_dirty;
+
+	/*
+	 * Track if reconciliation has seen any overflow items.  If a leaf page
+	 * with no overflow items is written, the parent page's address cell is
+	 * set to the leaf-no-overflow type.  This means we can delete the leaf
+	 * page without reading it because we don't have to discard any overflow
+	 * items it might reference.
+	 *
+	 * The test test is per-page reconciliation, that is, once we see an
+	 * overflow item on the page, all subsequent leaf pages written for the
+	 * page will not be leaf-no-overflow type, regardless of whether or not
+	 * they contain overflow items.  In other words, leaf-no-overflow is not
+	 * guaranteed to be set on every page that doesn't contain an overflow
+	 * item, only that if it is set, the page contains no overflow items.
+	 * XXX
+	 * This was originally done because raw compression couldn't do better,
+	 * now that raw compression has been removed, we should do better.
+	 */
+	bool	ovfl_items;
+
+	/*
+	 * Track if reconciliation of a row-store leaf page has seen empty (zero
+	 * length) values.  We don't write out anything for empty values, so if
+	 * there are empty values on a page, we have to make two passes over the
+	 * page when it's read to figure out how many keys it has, expensive in
+	 * the common case of no empty values and (entries / 2) keys.  Likewise,
+	 * a page with only empty values is another common data set, and keys on
+	 * that page will be equal to the number of entries.  In both cases, set
+	 * a flag in the page's on-disk header.
+	 *
+	 * The test is per-page reconciliation as described above for the
+	 * overflow-item test.
+	 */
+	bool	all_empty_value, any_empty_value;
+
+	/*
+	 * Reconciliation gets tricky if we have to split a page, which happens
+	 * when the disk image we create exceeds the page type's maximum disk
+	 * image size.
+	 *
+	 * First, the target size of the page we're building.
+	 */
+	uint32_t page_size;		/* Page size */
+
+	/*
+	 * Second, the split size: if we're doing the page layout, split to a
+	 * smaller-than-maximum page size when a split is required so we don't
+	 * repeatedly split a packed page.
+	 */
+	uint32_t split_size;		/* Split page size */
+	uint32_t min_split_size;	/* Minimum split page size */
+
+	/*
+	 * We maintain two split chunks in the memory during reconciliation to
+	 * be written out as pages. As we get to the end of the data, if the
+	 * last one turns out to be smaller than the minimum split size, we go
+	 * back into the penultimate chunk and split at this minimum split size
+	 * boundary. This moves some data from the penultimate chunk to the last
+	 * chunk, hence increasing the size of the last page written without
+	 * decreasing the penultimate page size beyond the minimum split size.
+	 * For this reason, we maintain an expected split percentage boundary
+	 * and a minimum split percentage boundary.
+	 *
+	 * Chunks are referenced by current and previous pointers. In case of a
+	 * split, previous references the first chunk and current switches to
+	 * the second chunk. If reconciliation generates more split chunks, the
+	 * the previous chunk is written to the disk and current and previous
+	 * swap.
+	 */
+	struct __wt_rec_chunk {
+		/*
+		 * The recno and entries fields are the starting record number
+		 * of the split chunk (for column-store splits), and the number
+		 * of entries in the split chunk.
+		 *
+		 * The key for a row-store page; no column-store key is needed
+		 * because the page's recno, stored in the recno field, is the
+		 * column-store key.
+		 */
+		uint32_t entries;
+		uint64_t recno;
+		WT_ITEM  key;
+
+		uint32_t min_entries;
+		uint64_t min_recno;
+		WT_ITEM  min_key;
+
+		/* Minimum split-size boundary buffer offset. */
+		size_t   min_offset;
+
+		WT_ITEM image;				/* disk-image */
+	} chunkA, chunkB, *cur_ptr, *prev_ptr;
+
+	/*
+	 * We track current information about the current record number, the
+	 * number of entries copied into the disk image buffer, where we are
+	 * in the buffer, and how much memory remains. Those values are
+	 * packaged here rather than passing pointers to stack locations
+	 * around the code.
+	 */
+	uint64_t recno;			/* Current record number */
+	uint32_t entries;		/* Current number of entries */
+	uint8_t *first_free;		/* Current first free byte */
+	size_t	 space_avail;		/* Remaining space in this chunk */
+	/* Remaining space in this chunk to put a minimum size boundary */
+	size_t	 min_space_avail;
+
+	/*
+	 * Saved update list, supporting the WT_REC_UPDATE_RESTORE and
+	 * WT_REC_LOOKASIDE configurations. While reviewing updates for each
+	 * page, we save WT_UPDATE lists here, and then move them to per-block
+	 * areas as the blocks are defined.
+	 */
+	WT_SAVE_UPD *supd;		/* Saved updates */
+	uint32_t     supd_next;
+	size_t	     supd_allocated;
+	size_t       supd_memsize;	/* Size of saved update structures */
+
+	/* List of pages we've written so far. */
+	WT_MULTI *multi;
+	uint32_t  multi_next;
+	size_t	  multi_allocated;
+
+	/*
+	 * Root pages are written when wrapping up the reconciliation, remember
+	 * the image we're going to write.
+	 */
+	WT_ITEM *wrapup_checkpoint;
+	bool	 wrapup_checkpoint_compressed;
+
+	/*
+	 * We don't need to keep the 0th key around on internal pages, the
+	 * search code ignores them as nothing can sort less by definition.
+	 * There's some trickiness here, see the code for comments on how
+	 * these fields work.
+	 */
+	bool	cell_zero;		/* Row-store internal page 0th key */
+
+	/*
+	 * We calculate checksums to find previously written identical blocks,
+	 * but once a match fails during an eviction, there's no point trying
+	 * again.
+	 */
+	bool	evict_matching_checksum_failed;
+
+	/*
+	 * WT_REC_DICTIONARY --
+	 *	We optionally build a dictionary of values for leaf pages. Where
+	 * two value cells are identical, only write the value once, the second
+	 * and subsequent copies point to the original cell. The dictionary is
+	 * fixed size, but organized in a skip-list to make searches faster.
+	 */
+	struct __wt_rec_dictionary {
+		uint64_t hash;				/* Hash value */
+		uint32_t offset;			/* Matching cell */
+
+		u_int depth;				/* Skiplist */
+		WT_REC_DICTIONARY *next[0];
+	} **dictionary;					/* Dictionary */
+	u_int dictionary_next, dictionary_slots;	/* Next, max entries */
+							/* Skiplist head. */
+	WT_REC_DICTIONARY *dictionary_head[WT_SKIP_MAXDEPTH];
+
+	/*
+	 * WT_REC_KV--
+	 *	An on-page key/value item we're building.
+	 */
+	struct __wt_rec_kv {
+		WT_ITEM	 buf;		/* Data */
+		WT_CELL	 cell;		/* Cell and cell's length */
+		size_t cell_len;
+		size_t len;		/* Total length of cell + data */
+	} k, v;				/* Key/Value being built */
+
+	WT_ITEM *cur, _cur;		/* Key/Value being built */
+	WT_ITEM *last, _last;		/* Last key/value built */
+
+	bool key_pfx_compress;		/* If can prefix-compress next key */
+	bool key_pfx_compress_conf;	/* If prefix compression configured */
+	bool key_sfx_compress;		/* If can suffix-compress next key */
+	bool key_sfx_compress_conf;	/* If suffix compression configured */
+
+	bool is_bulk_load;		/* If it's a bulk load */
+
+	WT_SALVAGE_COOKIE *salvage;	/* If it's a salvage operation */
+
+	bool cache_write_lookaside;	/* Used the lookaside table */
+	bool cache_write_restore;	/* Used update/restoration */
+
+	uint32_t tested_ref_state;	/* Debugging information */
+
+	/*
+	 * XXX
+	 * In the case of a modified update, we may need a copy of the current
+	 * value as a set of bytes. We call back into the btree code using a
+	 * fake cursor to do that work. This a layering violation and fragile,
+	 * we need a better solution.
+	 */
+	WT_CURSOR_BTREE update_modify_cbt;
+} WT_RECONCILE;
+
+/*
+ * WT_CHILD_RELEASE, WT_CHILD_RELEASE_ERR --
+ *	Macros to clean up during internal-page reconciliation, releasing the
+ *	hazard pointer we're holding on child pages.
+ */
+#define	WT_CHILD_RELEASE(session, hazard, ref) do {			\
+	if (hazard) {							\
+		(hazard) = false;					\
+		WT_TRET(						\
+		    __wt_page_release(session, ref, WT_READ_NO_EVICT));	\
+	}								\
+} while (0)
+#define	WT_CHILD_RELEASE_ERR(session, hazard, ref) do {			\
+	WT_CHILD_RELEASE(session, hazard, ref);				\
+	WT_ERR(ret);							\
+} while (0)
+
+typedef enum {
+    WT_CHILD_IGNORE,				/* Ignored child */
+    WT_CHILD_MODIFIED,				/* Modified child */
+    WT_CHILD_ORIGINAL,				/* Original child */
+    WT_CHILD_PROXY				/* Deleted child: proxy */
+} WT_CHILD_STATE;
+
+/*
+ * Macros from fixed-length entries to/from bytes.
+ */
+#define	WT_FIX_BYTES_TO_ENTRIES(btree, bytes)				\
+    ((uint32_t)((((bytes) * 8) / (btree)->bitcnt)))
+#define	WT_FIX_ENTRIES_TO_BYTES(btree, entries)				\
+	((uint32_t)WT_ALIGN((entries) * (btree)->bitcnt, 8))
diff --git a/src/third_party/wiredtiger/src/include/reconcile.i b/src/third_party/wiredtiger/src/include/reconcile.i
new file mode 100644
index 00000000000..b56b8dc1404
--- /dev/null
+++ b/src/third_party/wiredtiger/src/include/reconcile.i
@@ -0,0 +1,257 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+#define	WT_CROSSING_MIN_BND(r, next_len)				\
+	((r)->cur_ptr->min_offset == 0 &&				\
+	    (next_len) > (r)->min_space_avail)
+#define	WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail)
+#define	WT_CHECK_CROSSING_BND(r, next_len)				\
+	(WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len))
+
+/*
+ * __wt_rec_vtype --
+ *	Return a value cell's address type.
+ */
+static inline u_int
+__wt_rec_vtype(WT_ADDR *addr)
+{
+	if (addr->type == WT_ADDR_INT)
+		return (WT_CELL_ADDR_INT);
+	if (addr->type == WT_ADDR_LEAF)
+		return (WT_CELL_ADDR_LEAF);
+	return (WT_CELL_ADDR_LEAF_NO);
+}
+
+/*
+ * __wt_rec_need_split --
+ *	Check whether adding some bytes to the page requires a split.
+ */
+static inline bool
+__wt_rec_need_split(WT_RECONCILE *r, size_t len)
+{
+	/*
+	 * In the case of a row-store leaf page, trigger a split if a threshold
+	 * number of saved updates is reached. This allows pages to split for
+	 * update/restore and lookaside eviction when there is no visible data
+	 * causing the disk image to grow.
+	 *
+	 * In the case of small pages or large keys, we might try to split when
+	 * a page has no updates or entries, which isn't possible. To consider
+	 * update/restore or lookaside information, require either page entries
+	 * or updates that will be attached to the image. The limit is one of
+	 * either, but it doesn't make sense to create pages or images with few
+	 * entries or updates, even where page sizes are small (especially as
+	 * updates that will eventually become overflow items can throw off our
+	 * calculations). Bound the combination at something reasonable.
+	 */
+	if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10)
+		len += r->supd_memsize;
+
+	/* Check for the disk image crossing a boundary. */
+	return (WT_CHECK_CROSSING_BND(r, len));
+}
+
+/*
+ * __wt_rec_incr --
+ *	Update the memory tracking structure for a set of new entries.
+ */
+static inline void
+__wt_rec_incr(
+    WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size)
+{
+	/*
+	 * The buffer code is fragile and prone to off-by-one errors -- check
+	 * for overflow in diagnostic mode.
+	 */
+	WT_ASSERT(session, r->space_avail >= size);
+	WT_ASSERT(session, WT_BLOCK_FITS(r->first_free, size,
+	    r->cur_ptr->image.mem, r->cur_ptr->image.memsize));
+
+	r->entries += v;
+	r->space_avail -= size;
+	r->first_free += size;
+
+	/*
+	 * If offset for the minimum split size boundary is not set, we have not
+	 * yet reached the minimum boundary, reduce the space available for it.
+	 */
+	if (r->cur_ptr->min_offset == 0) {
+		if (r->min_space_avail >= size)
+			r->min_space_avail -= size;
+		else
+			r->min_space_avail = 0;
+	}
+}
+
+/*
+ * __wt_rec_copy_incr --
+ *	Copy a key/value cell and buffer pair into the new image.
+ */
+static inline void
+__wt_rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REC_KV *kv)
+{
+	size_t len;
+	uint8_t *p, *t;
+
+	/*
+	 * If there's only one chunk of data to copy (because the cell and data
+	 * are being copied from the original disk page), the cell length won't
+	 * be set, the WT_ITEM data/length will reference the data to be copied.
+	 *
+	 * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do
+	 * the copy in-line.
+	 */
+	for (p = r->first_free,
+	    t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len)
+		*p++ = *t++;
+
+	/* The data can be quite large -- call memcpy. */
+	if (kv->buf.size != 0)
+		memcpy(p, kv->buf.data, kv->buf.size);
+
+	WT_ASSERT(session, kv->len == kv->cell_len + kv->buf.size);
+	__wt_rec_incr(session, r, 1, kv->len);
+}
+
+/*
+ * __wt_rec_cell_build_addr --
+ *	Process an address reference and return a cell structure to be stored
+ *	on the page.
+ */
+static inline void
+__wt_rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+    const void *addr, size_t size, u_int cell_type, uint64_t recno)
+{
+	WT_REC_KV *val;
+
+	val = &r->v;
+
+	WT_ASSERT(session, size != 0 || cell_type == WT_CELL_ADDR_DEL);
+
+	/*
+	 * We don't check the address size because we can't store an address on
+	 * an overflow page: if the address won't fit, the overflow page's
+	 * address won't fit either.  This possibility must be handled by Btree
+	 * configuration, we have to disallow internal page sizes that are too
+	 * small with respect to the largest address cookie the underlying block
+	 * manager might return.
+	 */
+
+	/*
+	 * We don't copy the data into the buffer, it's not necessary; just
+	 * re-point the buffer's data/length fields.
+	 */
+	val->buf.data = addr;
+	val->buf.size = size;
+	val->cell_len =
+	    __wt_cell_pack_addr(&val->cell, cell_type, recno, val->buf.size);
+	val->len = val->cell_len + val->buf.size;
+}
+
+/*
+ * __wt_rec_cell_build_val --
+ *	Process a data item and return a WT_CELL structure and byte string to
+ *	be stored on the page.
+ */
+static inline int
+__wt_rec_cell_build_val(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, const void *data, size_t size, uint64_t rle)
+{
+	WT_BTREE *btree;
+	WT_REC_KV *val;
+
+	btree = S2BT(session);
+
+	val = &r->v;
+
+	/*
+	 * We don't copy the data into the buffer, it's not necessary; just
+	 * re-point the buffer's data/length fields.
+	 */
+	val->buf.data = data;
+	val->buf.size = size;
+
+	/* Handle zero-length cells quickly. */
+	if (size != 0) {
+		/* Optionally compress the data using the Huffman engine. */
+		if (btree->huffman_value != NULL)
+			WT_RET(__wt_huffman_encode(
+			    session, btree->huffman_value,
+			    val->buf.data, (uint32_t)val->buf.size, &val->buf));
+
+		/* Create an overflow object if the data won't fit. */
+		if (val->buf.size > btree->maxleafvalue) {
+			WT_STAT_DATA_INCR(session, rec_overflow_value);
+
+			return (__wt_rec_cell_build_ovfl(
+			    session, r, val, WT_CELL_VALUE_OVFL, rle));
+		}
+	}
+	val->cell_len = __wt_cell_pack_data(&val->cell, rle, val->buf.size);
+	val->len = val->cell_len + val->buf.size;
+
+	return (0);
+}
+
+/*
+ * __wt_rec_dict_replace --
+ *	Check for a dictionary match.
+ */
+static inline int
+__wt_rec_dict_replace(
+    WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t rle, WT_REC_KV *val)
+{
+	WT_REC_DICTIONARY *dp;
+	uint64_t offset;
+
+	/*
+	 * We optionally create a dictionary of values and only write a unique
+	 * value once per page, using a special "copy" cell for all subsequent
+	 * copies of the value.  We have to do the cell build and resolution at
+	 * this low level because we need physical cell offsets for the page.
+	 *
+	 * Sanity check: short-data cells can be smaller than dictionary-copy
+	 * cells.  If the data is already small, don't bother doing the work.
+	 * This isn't just work avoidance: on-page cells can't grow as a result
+	 * of writing a dictionary-copy cell, the reconciliation functions do a
+	 * split-boundary test based on the size required by the value's cell;
+	 * if we grow the cell after that test we'll potentially write off the
+	 * end of the buffer's memory.
+	 */
+	if (val->buf.size <= WT_INTPACK32_MAXSIZE)
+		return (0);
+	WT_RET(__wt_rec_dictionary_lookup(session, r, val, &dp));
+	if (dp == NULL)
+		return (0);
+
+	/*
+	 * If the dictionary offset isn't set, we're creating a new entry in the
+	 * dictionary, set its location.
+	 *
+	 * If the dictionary offset is set, we have a matching value. Create a
+	 * copy cell instead.
+	 */
+	if (dp->offset == 0)
+		dp->offset = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem);
+	else {
+		/*
+		 * The offset is the byte offset from this cell to the previous,
+		 * matching cell, NOT the byte offset from the beginning of the
+		 * page.
+		 */
+		offset = (uint64_t)WT_PTRDIFF(r->first_free,
+		    (uint8_t *)r->cur_ptr->image.mem + dp->offset);
+		val->len = val->cell_len =
+		    __wt_cell_pack_copy(&val->cell, rle, offset);
+		val->buf.data = NULL;
+		val->buf.size = 0;
+	}
+	return (0);
+}
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index a1fc065d263..b0e66d69743 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -371,6 +371,8 @@ struct __wt_connection_stats {
 	int64_t cache_lookaside_score;
 	int64_t cache_lookaside_entries;
 	int64_t cache_lookaside_insert;
+	int64_t cache_lookaside_ondisk_max;
+	int64_t cache_lookaside_ondisk;
 	int64_t cache_lookaside_remove;
 	int64_t cache_eviction_checkpoint;
 	int64_t cache_eviction_get_ref;
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index a1f6634922a..928c3c13ad5 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -153,6 +153,8 @@ struct __wt_txn_global {
 	WT_TXN_STATE	  checkpoint_state;	/* Checkpoint's txn state */
 	wt_timestamp_t	  checkpoint_timestamp;	/* Checkpoint's timestamp */
 
+	volatile uint64_t debug_ops;		/* Debug mode op counter */
+	uint64_t	  debug_rollback;	/* Debug mode rollback */
 	volatile uint64_t metadata_pinned;	/* Oldest ID for metadata */
 
 	/* Named snapshot state. */
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index 1cd615fa3bd..de10e8c44b9 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -425,6 +425,42 @@ __wt_txn_op_apply_prepare_state(
 }
 
 /*
+ * __wt_txn_op_delete_commit_apply_timestamps --
+ *	Apply the correct start and durable timestamps to any
+ *	updates in the page del update list.
+ */
+static inline void
+__wt_txn_op_delete_commit_apply_timestamps(
+    WT_SESSION_IMPL *session, WT_REF *ref)
+{
+	WT_TXN *txn;
+	WT_UPDATE **updp;
+	uint32_t previous_state;
+
+	txn = &session->txn;
+
+	/*
+	 * Lock the ref to ensure we don't race with eviction freeing the page
+	 * deleted update list or with a page instantiate.
+	 */
+	for (;; __wt_yield()) {
+		previous_state = ref->state;
+		WT_ASSERT(session, previous_state != WT_REF_READING);
+		if (previous_state != WT_REF_LOCKED && WT_REF_CAS_STATE(
+		    session, ref, previous_state, WT_REF_LOCKED))
+			break;
+	}
+
+	for (updp = ref->page_del->update_list;
+	    updp != NULL && *updp != NULL; ++updp) {
+		(*updp)->timestamp = txn->commit_timestamp;
+	}
+
+	/* Unlock the page by setting it back to it's previous state */
+	WT_REF_SET_STATE(ref, previous_state);
+}
+
+/*
  * __wt_txn_op_set_timestamp --
  *	Decide whether to copy a commit timestamp into an update. If the op
  *	structure doesn't have a populated update or ref field or in prepared
@@ -471,6 +507,10 @@ __wt_txn_op_set_timestamp(WT_SESSION_IMPL *session, WT_TXN_OP *op)
 		    &op->u.ref->page_del->timestamp : &op->u.op_upd->timestamp;
 		if (*timestamp == 0)
 			*timestamp = txn->commit_timestamp;
+
+		if (op->type == WT_TXN_OP_REF_DELETE)
+			__wt_txn_op_delete_commit_apply_timestamps(
+			    session, op->u.ref);
 	}
 }
 
@@ -1075,13 +1115,19 @@ static inline int
 __wt_txn_update_check(WT_SESSION_IMPL *session, WT_UPDATE *upd)
 {
 	WT_TXN *txn;
+	WT_TXN_GLOBAL *txn_global;
 	bool ignore_prepare_set;
 
 	txn = &session->txn;
+	txn_global = &S2C(session)->txn_global;
 
 	if (txn->isolation != WT_ISO_SNAPSHOT)
 		return (0);
 
+	if (txn_global->debug_rollback != 0 &&
+	    ++txn_global->debug_ops % txn_global->debug_rollback == 0)
+		return (__wt_txn_rollback_required(session,
+		    "debug mode simulated conflict"));
 	/*
 	 * Always include prepared transactions in this check: they are not
 	 * supposed to affect visibility for update operations.
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 17bfb813151..2fe91e312e4 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -2196,6 +2196,16 @@ struct __wt_connection {
 	 * application thread will wait for space to be available in cache
 	 * before giving up.  Default will wait forever., an integer greater
 	 * than or equal to 0; default \c 0.}
+	 * @config{cache_overflow = (, cache overflow configuration options., a
+	 * set of related configuration options defined below.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, The maximum number of bytes
+	 * that WiredTiger is allowed to use for its cache overflow mechanism.
+	 * If the cache overflow file exceeds this size\, a panic will be
+	 * triggered.  The default value means that the cache overflow file is
+	 * unbounded and may use as much space as the filesystem will
+	 * accommodate.  The minimum non-zero setting is 100MB., an integer
+	 * greater than or equal to 0; default \c 0.}
+	 * @config{ ),,}
 	 * @config{cache_overhead, assume the heap allocator overhead is the
 	 * specified percentage\, and adjust the cache usage by that amount (for
 	 * example\, if there is 10GB of data in cache\, a percentage of 10
@@ -2228,6 +2238,28 @@ struct __wt_connection {
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;release, compatibility release
 	 * version string., a string; default empty.}
 	 * @config{ ),,}
+	 * @config{debug_mode = (, control the settings of various extended
+	 * debugging features., a set of related configuration options defined
+	 * below.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;checkpoint_retention, adjust
+	 * log archiving to retain the log records of this number of
+	 * checkpoints.  Zero or one means perform normal archiving., an integer
+	 * between 0 and 1024; default \c 0.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal
+	 * algorithms to change skew to force lookaside eviction to happen more
+	 * aggressively.  This includes but is not limited to not skewing
+	 * newest\, not favoring leaf pages\, and modifying the eviction score
+	 * mechanism., a boolean flag; default \c false.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error, return a WT_ROLLBACK
+	 * error from a transaction operation about every Nth operation to
+	 * simulate a collision., an integer between 0 and 10M; default \c 0.}
+	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;table_logging, if true\, write
+	 * transaction related information to the log for all operations\, even
+	 * operations for tables with logging turned off.  This setting
+	 * introduces a log format change that may break older versions of
+	 * WiredTiger.  These operations are informational and skipped in
+	 * recovery., a boolean flag; default \c false.}
+	 * @config{ ),,}
 	 * @config{error_prefix, prefix string for error messages., a string;
 	 * default empty.}
 	 * @config{eviction = (, eviction configuration options., a set of
@@ -2795,6 +2827,15 @@ struct __wt_connection {
  * thread will wait for space to be available in cache before giving up.
  * Default will wait forever., an integer greater than or equal to 0; default \c
  * 0.}
+ * @config{cache_overflow = (, cache overflow configuration options., a set of
+ * related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;file_max, The maximum number of bytes that
+ * WiredTiger is allowed to use for its cache overflow mechanism.  If the cache
+ * overflow file exceeds this size\, a panic will be triggered.  The default
+ * value means that the cache overflow file is unbounded and may use as much
+ * space as the filesystem will accommodate.  The minimum non-zero setting is
+ * 100MB., an integer greater than or equal to 0; default \c 0.}
+ * @config{ ),,}
  * @config{cache_overhead, assume the heap allocator overhead is the specified
  * percentage\, and adjust the cache usage by that amount (for example\, if
  * there is 10GB of data in cache\, a percentage of 10 means WiredTiger treats
@@ -2843,6 +2884,27 @@ struct __wt_connection {
  * true.}
  * @config{create, create the database if it does not exist., a boolean flag;
  * default \c false.}
+ * @config{debug_mode = (, control the settings of various extended debugging
+ * features., a set of related configuration options defined below.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;checkpoint_retention, adjust log archiving to
+ * retain the log records of this number of checkpoints.  Zero or one means
+ * perform normal archiving., an integer between 0 and 1024; default \c 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;eviction, if true\, modify internal
+ * algorithms to change skew to force lookaside eviction to happen more
+ * aggressively.  This includes but is not limited to not skewing newest\, not
+ * favoring leaf pages\, and modifying the eviction score mechanism., a boolean
+ * flag; default \c false.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;rollback_error,
+ * return a WT_ROLLBACK error from a transaction operation about every Nth
+ * operation to simulate a collision., an integer between 0 and 10M; default \c
+ * 0.}
+ * @config{&nbsp;&nbsp;&nbsp;&nbsp;table_logging, if true\, write
+ * transaction related information to the log for all operations\, even
+ * operations for tables with logging turned off.  This setting introduces a log
+ * format change that may break older versions of WiredTiger.  These operations
+ * are informational and skipped in recovery., a boolean flag; default \c
+ * false.}
+ * @config{ ),,}
  * @config{direct_io, Use \c O_DIRECT on POSIX systems\, and \c
  * FILE_FLAG_NO_BUFFERING on Windows to access files.  Options are given as a
  * list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io
@@ -4921,6 +4983,12 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_LOGOP_COL_MODIFY	9
 /*! row-store modify */
 #define	WT_LOGOP_ROW_MODIFY	10
+/*
+ * NOTE: Diagnostic-only log operations should have values in
+ * the ignore range.
+ */
+/*! Diagnostic: transaction timestamps */
+#define	WT_LOGOP_TXN_TIMESTAMP	(WT_LOGOP_IGNORE | 11)
 /*! @} */
 
 /*******************************************
@@ -5034,737 +5102,741 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_CONN_CACHE_LOOKASIDE_ENTRIES		1045
 /*! cache: cache overflow table insert calls */
 #define	WT_STAT_CONN_CACHE_LOOKASIDE_INSERT		1046
+/*! cache: cache overflow table max on-disk size */
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_ONDISK_MAX		1047
+/*! cache: cache overflow table on-disk size */
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_ONDISK		1048
 /*! cache: cache overflow table remove calls */
-#define	WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE		1047
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE		1049
 /*! cache: checkpoint blocked page eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT		1048
+#define	WT_STAT_CONN_CACHE_EVICTION_CHECKPOINT		1050
 /*! cache: eviction calls to get a page */
-#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF		1049
+#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF		1051
 /*! cache: eviction calls to get a page found queue empty */
-#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY	1050
+#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY	1052
 /*! cache: eviction calls to get a page found queue empty after locking */
-#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2	1051
+#define	WT_STAT_CONN_CACHE_EVICTION_GET_REF_EMPTY2	1053
 /*! cache: eviction currently operating in aggressive mode */
-#define	WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET	1052
+#define	WT_STAT_CONN_CACHE_EVICTION_AGGRESSIVE_SET	1054
 /*! cache: eviction empty score */
-#define	WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE		1053
+#define	WT_STAT_CONN_CACHE_EVICTION_EMPTY_SCORE		1055
 /*! cache: eviction passes of a file */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALK_PASSES		1054
+#define	WT_STAT_CONN_CACHE_EVICTION_WALK_PASSES		1056
 /*! cache: eviction server candidate queue empty when topping up */
-#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY		1055
+#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_EMPTY		1057
 /*! cache: eviction server candidate queue not empty when topping up */
-#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY	1056
+#define	WT_STAT_CONN_CACHE_EVICTION_QUEUE_NOT_EMPTY	1058
 /*! cache: eviction server evicting pages */
-#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING	1057
+#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_EVICTING	1059
 /*!
  * cache: eviction server slept, because we did not make progress with
  * eviction
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT	1058
+#define	WT_STAT_CONN_CACHE_EVICTION_SERVER_SLEPT	1060
 /*! cache: eviction server unable to reach eviction goal */
-#define	WT_STAT_CONN_CACHE_EVICTION_SLOW		1059
+#define	WT_STAT_CONN_CACHE_EVICTION_SLOW		1061
 /*! cache: eviction state */
-#define	WT_STAT_CONN_CACHE_EVICTION_STATE		1060
+#define	WT_STAT_CONN_CACHE_EVICTION_STATE		1062
 /*! cache: eviction walk target pages histogram - 0-9 */
-#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10	1061
+#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT10	1063
 /*! cache: eviction walk target pages histogram - 10-31 */
-#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32	1062
+#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT32	1064
 /*! cache: eviction walk target pages histogram - 128 and higher */
-#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128	1063
+#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_GE128	1065
 /*! cache: eviction walk target pages histogram - 32-63 */
-#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64	1064
+#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT64	1066
 /*! cache: eviction walk target pages histogram - 64-128 */
-#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128	1065
+#define	WT_STAT_CONN_CACHE_EVICTION_TARGET_PAGE_LT128	1067
 /*! cache: eviction walks abandoned */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED	1066
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ABANDONED	1068
 /*! cache: eviction walks gave up because they restarted their walk twice */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED	1067
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_STOPPED	1069
 /*!
  * cache: eviction walks gave up because they saw too many pages and
  * found no candidates
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS	1068
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_NO_TARGETS	1070
 /*!
  * cache: eviction walks gave up because they saw too many pages and
  * found too few candidates
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO	1069
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_GAVE_UP_RATIO	1071
 /*! cache: eviction walks reached end of tree */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED		1070
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ENDED		1072
 /*! cache: eviction walks started from root of tree */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT	1071
+#define	WT_STAT_CONN_CACHE_EVICTION_WALK_FROM_ROOT	1073
 /*! cache: eviction walks started from saved location in tree */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS	1072
+#define	WT_STAT_CONN_CACHE_EVICTION_WALK_SAVED_POS	1074
 /*! cache: eviction worker thread active */
-#define	WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS	1073
+#define	WT_STAT_CONN_CACHE_EVICTION_ACTIVE_WORKERS	1075
 /*! cache: eviction worker thread created */
-#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED	1074
+#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_CREATED	1076
 /*! cache: eviction worker thread evicting pages */
-#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING	1075
+#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_EVICTING	1077
 /*! cache: eviction worker thread removed */
-#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED	1076
+#define	WT_STAT_CONN_CACHE_EVICTION_WORKER_REMOVED	1078
 /*! cache: eviction worker thread stable number */
-#define	WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS	1077
+#define	WT_STAT_CONN_CACHE_EVICTION_STABLE_STATE_WORKERS	1079
 /*!
  * cache: failed eviction of pages that exceeded the in-memory maximum
  * count
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL		1078
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL		1080
 /*!
  * cache: failed eviction of pages that exceeded the in-memory maximum
  * time (usecs)
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME	1079
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_FAIL_TIME	1081
 /*! cache: files with active eviction walks */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE	1080
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_ACTIVE	1082
 /*! cache: files with new eviction walks started */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED	1081
+#define	WT_STAT_CONN_CACHE_EVICTION_WALKS_STARTED	1083
 /*! cache: force re-tuning of eviction workers once in a while */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE	1082
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_RETUNE	1084
 /*! cache: hazard pointer blocked page eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_HAZARD		1083
+#define	WT_STAT_CONN_CACHE_EVICTION_HAZARD		1085
 /*! cache: hazard pointer check calls */
-#define	WT_STAT_CONN_CACHE_HAZARD_CHECKS		1084
+#define	WT_STAT_CONN_CACHE_HAZARD_CHECKS		1086
 /*! cache: hazard pointer check entries walked */
-#define	WT_STAT_CONN_CACHE_HAZARD_WALKS			1085
+#define	WT_STAT_CONN_CACHE_HAZARD_WALKS			1087
 /*! cache: hazard pointer maximum array length */
-#define	WT_STAT_CONN_CACHE_HAZARD_MAX			1086
+#define	WT_STAT_CONN_CACHE_HAZARD_MAX			1088
 /*! cache: in-memory page passed criteria to be split */
-#define	WT_STAT_CONN_CACHE_INMEM_SPLITTABLE		1087
+#define	WT_STAT_CONN_CACHE_INMEM_SPLITTABLE		1089
 /*! cache: in-memory page splits */
-#define	WT_STAT_CONN_CACHE_INMEM_SPLIT			1088
+#define	WT_STAT_CONN_CACHE_INMEM_SPLIT			1090
 /*! cache: internal pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_INTERNAL		1089
+#define	WT_STAT_CONN_CACHE_EVICTION_INTERNAL		1091
 /*! cache: internal pages split during eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL	1090
+#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL	1092
 /*! cache: leaf pages split during eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF		1091
+#define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF		1093
 /*! cache: maximum bytes configured */
-#define	WT_STAT_CONN_CACHE_BYTES_MAX			1092
+#define	WT_STAT_CONN_CACHE_BYTES_MAX			1094
 /*! cache: maximum page size at eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE	1093
+#define	WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE	1095
 /*! cache: modified pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_DIRTY		1094
+#define	WT_STAT_CONN_CACHE_EVICTION_DIRTY		1096
 /*! cache: modified pages evicted by application threads */
-#define	WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY		1095
+#define	WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY		1097
 /*! cache: operations timed out waiting for space in cache */
-#define	WT_STAT_CONN_CACHE_TIMED_OUT_OPS		1096
+#define	WT_STAT_CONN_CACHE_TIMED_OUT_OPS		1098
 /*! cache: overflow pages read into cache */
-#define	WT_STAT_CONN_CACHE_READ_OVERFLOW		1097
+#define	WT_STAT_CONN_CACHE_READ_OVERFLOW		1099
 /*! cache: page split during eviction deepened the tree */
-#define	WT_STAT_CONN_CACHE_EVICTION_DEEPEN		1098
+#define	WT_STAT_CONN_CACHE_EVICTION_DEEPEN		1100
 /*! cache: page written requiring cache overflow records */
-#define	WT_STAT_CONN_CACHE_WRITE_LOOKASIDE		1099
+#define	WT_STAT_CONN_CACHE_WRITE_LOOKASIDE		1101
 /*! cache: pages currently held in the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_INUSE			1100
+#define	WT_STAT_CONN_CACHE_PAGES_INUSE			1102
 /*! cache: pages evicted because they exceeded the in-memory maximum count */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE		1101
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE		1103
 /*!
  * cache: pages evicted because they exceeded the in-memory maximum time
  * (usecs)
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME		1102
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME		1104
 /*! cache: pages evicted because they had chains of deleted items count */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE	1103
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE	1105
 /*!
  * cache: pages evicted because they had chains of deleted items time
  * (usecs)
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME	1104
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME	1106
 /*! cache: pages evicted by application threads */
-#define	WT_STAT_CONN_CACHE_EVICTION_APP			1105
+#define	WT_STAT_CONN_CACHE_EVICTION_APP			1107
 /*! cache: pages queued for eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED	1106
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED	1108
 /*! cache: pages queued for urgent eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT	1107
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT	1109
 /*! cache: pages queued for urgent eviction during walk */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST	1108
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST	1110
 /*! cache: pages read into cache */
-#define	WT_STAT_CONN_CACHE_READ				1109
+#define	WT_STAT_CONN_CACHE_READ				1111
 /*! cache: pages read into cache after truncate */
-#define	WT_STAT_CONN_CACHE_READ_DELETED			1110
+#define	WT_STAT_CONN_CACHE_READ_DELETED			1112
 /*! cache: pages read into cache after truncate in prepare state */
-#define	WT_STAT_CONN_CACHE_READ_DELETED_PREPARED	1111
+#define	WT_STAT_CONN_CACHE_READ_DELETED_PREPARED	1113
 /*! cache: pages read into cache requiring cache overflow entries */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE		1112
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE		1114
 /*! cache: pages read into cache requiring cache overflow for checkpoint */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT	1113
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_CHECKPOINT	1115
 /*! cache: pages read into cache skipping older cache overflow entries */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED	1114
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_SKIPPED	1116
 /*!
  * cache: pages read into cache with skipped cache overflow entries
  * needed later
  */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY		1115
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY		1117
 /*!
  * cache: pages read into cache with skipped cache overflow entries
  * needed later by checkpoint
  */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT	1116
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE_DELAY_CHECKPOINT	1118
 /*! cache: pages requested from the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_REQUESTED		1117
+#define	WT_STAT_CONN_CACHE_PAGES_REQUESTED		1119
 /*! cache: pages seen by eviction walk */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN		1118
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN		1120
 /*! cache: pages selected for eviction unable to be evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_FAIL		1119
+#define	WT_STAT_CONN_CACHE_EVICTION_FAIL		1121
 /*! cache: pages walked for eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALK		1120
+#define	WT_STAT_CONN_CACHE_EVICTION_WALK		1122
 /*! cache: pages written from cache */
-#define	WT_STAT_CONN_CACHE_WRITE			1121
+#define	WT_STAT_CONN_CACHE_WRITE			1123
 /*! cache: pages written requiring in-memory restoration */
-#define	WT_STAT_CONN_CACHE_WRITE_RESTORE		1122
+#define	WT_STAT_CONN_CACHE_WRITE_RESTORE		1124
 /*! cache: percentage overhead */
-#define	WT_STAT_CONN_CACHE_OVERHEAD			1123
+#define	WT_STAT_CONN_CACHE_OVERHEAD			1125
 /*! cache: tracked bytes belonging to internal pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_INTERNAL		1124
+#define	WT_STAT_CONN_CACHE_BYTES_INTERNAL		1126
 /*! cache: tracked bytes belonging to leaf pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_LEAF			1125
+#define	WT_STAT_CONN_CACHE_BYTES_LEAF			1127
 /*! cache: tracked dirty bytes in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_DIRTY			1126
+#define	WT_STAT_CONN_CACHE_BYTES_DIRTY			1128
 /*! cache: tracked dirty pages in the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_DIRTY			1127
+#define	WT_STAT_CONN_CACHE_PAGES_DIRTY			1129
 /*! cache: unmodified pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_CLEAN		1128
+#define	WT_STAT_CONN_CACHE_EVICTION_CLEAN		1130
 /*! capacity: background fsync file handles considered */
-#define	WT_STAT_CONN_FSYNC_ALL_FH_TOTAL			1129
+#define	WT_STAT_CONN_FSYNC_ALL_FH_TOTAL			1131
 /*! capacity: background fsync file handles synced */
-#define	WT_STAT_CONN_FSYNC_ALL_FH			1130
+#define	WT_STAT_CONN_FSYNC_ALL_FH			1132
 /*! capacity: background fsync time (msecs) */
-#define	WT_STAT_CONN_FSYNC_ALL_TIME			1131
+#define	WT_STAT_CONN_FSYNC_ALL_TIME			1133
 /*! capacity: threshold to call fsync */
-#define	WT_STAT_CONN_CAPACITY_THRESHOLD			1132
+#define	WT_STAT_CONN_CAPACITY_THRESHOLD			1134
 /*! capacity: throttled bytes read */
-#define	WT_STAT_CONN_CAPACITY_BYTES_READ		1133
+#define	WT_STAT_CONN_CAPACITY_BYTES_READ		1135
 /*! capacity: throttled bytes written for checkpoint */
-#define	WT_STAT_CONN_CAPACITY_BYTES_CKPT		1134
+#define	WT_STAT_CONN_CAPACITY_BYTES_CKPT		1136
 /*! capacity: throttled bytes written for eviction */
-#define	WT_STAT_CONN_CAPACITY_BYTES_EVICT		1135
+#define	WT_STAT_CONN_CAPACITY_BYTES_EVICT		1137
 /*! capacity: throttled bytes written for log */
-#define	WT_STAT_CONN_CAPACITY_BYTES_LOG			1136
+#define	WT_STAT_CONN_CAPACITY_BYTES_LOG			1138
 /*! capacity: throttled bytes written total */
-#define	WT_STAT_CONN_CAPACITY_BYTES_WRITTEN		1137
+#define	WT_STAT_CONN_CAPACITY_BYTES_WRITTEN		1139
 /*! capacity: time waiting due to total capacity (usecs) */
-#define	WT_STAT_CONN_CAPACITY_TIME_TOTAL		1138
+#define	WT_STAT_CONN_CAPACITY_TIME_TOTAL		1140
 /*! capacity: time waiting during checkpoint (usecs) */
-#define	WT_STAT_CONN_CAPACITY_TIME_CKPT			1139
+#define	WT_STAT_CONN_CAPACITY_TIME_CKPT			1141
 /*! capacity: time waiting during eviction (usecs) */
-#define	WT_STAT_CONN_CAPACITY_TIME_EVICT		1140
+#define	WT_STAT_CONN_CAPACITY_TIME_EVICT		1142
 /*! capacity: time waiting during logging (usecs) */
-#define	WT_STAT_CONN_CAPACITY_TIME_LOG			1141
+#define	WT_STAT_CONN_CAPACITY_TIME_LOG			1143
 /*! capacity: time waiting during read (usecs) */
-#define	WT_STAT_CONN_CAPACITY_TIME_READ			1142
+#define	WT_STAT_CONN_CAPACITY_TIME_READ			1144
 /*! connection: auto adjusting condition resets */
-#define	WT_STAT_CONN_COND_AUTO_WAIT_RESET		1143
+#define	WT_STAT_CONN_COND_AUTO_WAIT_RESET		1145
 /*! connection: auto adjusting condition wait calls */
-#define	WT_STAT_CONN_COND_AUTO_WAIT			1144
+#define	WT_STAT_CONN_COND_AUTO_WAIT			1146
 /*! connection: detected system time went backwards */
-#define	WT_STAT_CONN_TIME_TRAVEL			1145
+#define	WT_STAT_CONN_TIME_TRAVEL			1147
 /*! connection: files currently open */
-#define	WT_STAT_CONN_FILE_OPEN				1146
+#define	WT_STAT_CONN_FILE_OPEN				1148
 /*! connection: memory allocations */
-#define	WT_STAT_CONN_MEMORY_ALLOCATION			1147
+#define	WT_STAT_CONN_MEMORY_ALLOCATION			1149
 /*! connection: memory frees */
-#define	WT_STAT_CONN_MEMORY_FREE			1148
+#define	WT_STAT_CONN_MEMORY_FREE			1150
 /*! connection: memory re-allocations */
-#define	WT_STAT_CONN_MEMORY_GROW			1149
+#define	WT_STAT_CONN_MEMORY_GROW			1151
 /*! connection: pthread mutex condition wait calls */
-#define	WT_STAT_CONN_COND_WAIT				1150
+#define	WT_STAT_CONN_COND_WAIT				1152
 /*! connection: pthread mutex shared lock read-lock calls */
-#define	WT_STAT_CONN_RWLOCK_READ			1151
+#define	WT_STAT_CONN_RWLOCK_READ			1153
 /*! connection: pthread mutex shared lock write-lock calls */
-#define	WT_STAT_CONN_RWLOCK_WRITE			1152
+#define	WT_STAT_CONN_RWLOCK_WRITE			1154
 /*! connection: total fsync I/Os */
-#define	WT_STAT_CONN_FSYNC_IO				1153
+#define	WT_STAT_CONN_FSYNC_IO				1155
 /*! connection: total read I/Os */
-#define	WT_STAT_CONN_READ_IO				1154
+#define	WT_STAT_CONN_READ_IO				1156
 /*! connection: total write I/Os */
-#define	WT_STAT_CONN_WRITE_IO				1155
+#define	WT_STAT_CONN_WRITE_IO				1157
 /*! cursor: cached cursor count */
-#define	WT_STAT_CONN_CURSOR_CACHED_COUNT		1156
+#define	WT_STAT_CONN_CURSOR_CACHED_COUNT		1158
 /*! cursor: cursor close calls that result in cache */
-#define	WT_STAT_CONN_CURSOR_CACHE			1157
+#define	WT_STAT_CONN_CURSOR_CACHE			1159
 /*! cursor: cursor create calls */
-#define	WT_STAT_CONN_CURSOR_CREATE			1158
+#define	WT_STAT_CONN_CURSOR_CREATE			1160
 /*! cursor: cursor insert calls */
-#define	WT_STAT_CONN_CURSOR_INSERT			1159
+#define	WT_STAT_CONN_CURSOR_INSERT			1161
 /*! cursor: cursor modify calls */
-#define	WT_STAT_CONN_CURSOR_MODIFY			1160
+#define	WT_STAT_CONN_CURSOR_MODIFY			1162
 /*! cursor: cursor next calls */
-#define	WT_STAT_CONN_CURSOR_NEXT			1161
+#define	WT_STAT_CONN_CURSOR_NEXT			1163
 /*! cursor: cursor operation restarted */
-#define	WT_STAT_CONN_CURSOR_RESTART			1162
+#define	WT_STAT_CONN_CURSOR_RESTART			1164
 /*! cursor: cursor prev calls */
-#define	WT_STAT_CONN_CURSOR_PREV			1163
+#define	WT_STAT_CONN_CURSOR_PREV			1165
 /*! cursor: cursor remove calls */
-#define	WT_STAT_CONN_CURSOR_REMOVE			1164
+#define	WT_STAT_CONN_CURSOR_REMOVE			1166
 /*! cursor: cursor reserve calls */
-#define	WT_STAT_CONN_CURSOR_RESERVE			1165
+#define	WT_STAT_CONN_CURSOR_RESERVE			1167
 /*! cursor: cursor reset calls */
-#define	WT_STAT_CONN_CURSOR_RESET			1166
+#define	WT_STAT_CONN_CURSOR_RESET			1168
 /*! cursor: cursor search calls */
-#define	WT_STAT_CONN_CURSOR_SEARCH			1167
+#define	WT_STAT_CONN_CURSOR_SEARCH			1169
 /*! cursor: cursor search near calls */
-#define	WT_STAT_CONN_CURSOR_SEARCH_NEAR			1168
+#define	WT_STAT_CONN_CURSOR_SEARCH_NEAR			1170
 /*! cursor: cursor sweep buckets */
-#define	WT_STAT_CONN_CURSOR_SWEEP_BUCKETS		1169
+#define	WT_STAT_CONN_CURSOR_SWEEP_BUCKETS		1171
 /*! cursor: cursor sweep cursors closed */
-#define	WT_STAT_CONN_CURSOR_SWEEP_CLOSED		1170
+#define	WT_STAT_CONN_CURSOR_SWEEP_CLOSED		1172
 /*! cursor: cursor sweep cursors examined */
-#define	WT_STAT_CONN_CURSOR_SWEEP_EXAMINED		1171
+#define	WT_STAT_CONN_CURSOR_SWEEP_EXAMINED		1173
 /*! cursor: cursor sweeps */
-#define	WT_STAT_CONN_CURSOR_SWEEP			1172
+#define	WT_STAT_CONN_CURSOR_SWEEP			1174
 /*! cursor: cursor update calls */
-#define	WT_STAT_CONN_CURSOR_UPDATE			1173
+#define	WT_STAT_CONN_CURSOR_UPDATE			1175
 /*! cursor: cursors reused from cache */
-#define	WT_STAT_CONN_CURSOR_REOPEN			1174
+#define	WT_STAT_CONN_CURSOR_REOPEN			1176
 /*! cursor: open cursor count */
-#define	WT_STAT_CONN_CURSOR_OPEN_COUNT			1175
+#define	WT_STAT_CONN_CURSOR_OPEN_COUNT			1177
 /*! cursor: truncate calls */
-#define	WT_STAT_CONN_CURSOR_TRUNCATE			1176
+#define	WT_STAT_CONN_CURSOR_TRUNCATE			1178
 /*! data-handle: connection data handles currently active */
-#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1177
+#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1179
 /*! data-handle: connection sweep candidate became referenced */
-#define	WT_STAT_CONN_DH_SWEEP_REF			1178
+#define	WT_STAT_CONN_DH_SWEEP_REF			1180
 /*! data-handle: connection sweep dhandles closed */
-#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1179
+#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1181
 /*! data-handle: connection sweep dhandles removed from hash list */
-#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1180
+#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1182
 /*! data-handle: connection sweep time-of-death sets */
-#define	WT_STAT_CONN_DH_SWEEP_TOD			1181
+#define	WT_STAT_CONN_DH_SWEEP_TOD			1183
 /*! data-handle: connection sweeps */
-#define	WT_STAT_CONN_DH_SWEEPS				1182
+#define	WT_STAT_CONN_DH_SWEEPS				1184
 /*! data-handle: session dhandles swept */
-#define	WT_STAT_CONN_DH_SESSION_HANDLES			1183
+#define	WT_STAT_CONN_DH_SESSION_HANDLES			1185
 /*! data-handle: session sweep attempts */
-#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1184
+#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1186
 /*! lock: checkpoint lock acquisitions */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_COUNT		1185
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_COUNT		1187
 /*! lock: checkpoint lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION	1186
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION	1188
 /*! lock: checkpoint lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL	1187
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL	1189
 /*!
  * lock: commit timestamp queue lock application thread time waiting
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION	1188
+#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_APPLICATION	1190
 /*! lock: commit timestamp queue lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL	1189
+#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WAIT_INTERNAL	1191
 /*! lock: commit timestamp queue read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT	1190
+#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_READ_COUNT	1192
 /*! lock: commit timestamp queue write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT	1191
+#define	WT_STAT_CONN_LOCK_COMMIT_TIMESTAMP_WRITE_COUNT	1193
 /*! lock: dhandle lock application thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION	1192
+#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION	1194
 /*! lock: dhandle lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL		1193
+#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL		1195
 /*! lock: dhandle read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT		1194
+#define	WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT		1196
 /*! lock: dhandle write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT		1195
+#define	WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT		1197
 /*! lock: metadata lock acquisitions */
-#define	WT_STAT_CONN_LOCK_METADATA_COUNT		1196
+#define	WT_STAT_CONN_LOCK_METADATA_COUNT		1198
 /*! lock: metadata lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION	1197
+#define	WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION	1199
 /*! lock: metadata lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL	1198
+#define	WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL	1200
 /*!
  * lock: read timestamp queue lock application thread time waiting
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION	1199
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_APPLICATION	1201
 /*! lock: read timestamp queue lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL	1200
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WAIT_INTERNAL	1202
 /*! lock: read timestamp queue read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT	1201
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_READ_COUNT	1203
 /*! lock: read timestamp queue write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT	1202
+#define	WT_STAT_CONN_LOCK_READ_TIMESTAMP_WRITE_COUNT	1204
 /*! lock: schema lock acquisitions */
-#define	WT_STAT_CONN_LOCK_SCHEMA_COUNT			1203
+#define	WT_STAT_CONN_LOCK_SCHEMA_COUNT			1205
 /*! lock: schema lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION	1204
+#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION	1206
 /*! lock: schema lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL		1205
+#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL		1207
 /*!
  * lock: table lock application thread time waiting for the table lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION	1206
+#define	WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION	1208
 /*!
  * lock: table lock internal thread time waiting for the table lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL		1207
+#define	WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL		1209
 /*! lock: table read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TABLE_READ_COUNT		1208
+#define	WT_STAT_CONN_LOCK_TABLE_READ_COUNT		1210
 /*! lock: table write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT		1209
+#define	WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT		1211
 /*! lock: txn global lock application thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION	1210
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_APPLICATION	1212
 /*! lock: txn global lock internal thread time waiting (usecs) */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL	1211
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WAIT_INTERNAL	1213
 /*! lock: txn global read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT		1212
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_READ_COUNT		1214
 /*! lock: txn global write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT	1213
+#define	WT_STAT_CONN_LOCK_TXN_GLOBAL_WRITE_COUNT	1215
 /*! log: busy returns attempting to switch slots */
-#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1214
+#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1216
 /*! log: force archive time sleeping (usecs) */
-#define	WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP		1215
+#define	WT_STAT_CONN_LOG_FORCE_ARCHIVE_SLEEP		1217
 /*! log: log bytes of payload data */
-#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1216
+#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1218
 /*! log: log bytes written */
-#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1217
+#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1219
 /*! log: log files manually zero-filled */
-#define	WT_STAT_CONN_LOG_ZERO_FILLS			1218
+#define	WT_STAT_CONN_LOG_ZERO_FILLS			1220
 /*! log: log flush operations */
-#define	WT_STAT_CONN_LOG_FLUSH				1219
+#define	WT_STAT_CONN_LOG_FLUSH				1221
 /*! log: log force write operations */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE			1220
+#define	WT_STAT_CONN_LOG_FORCE_WRITE			1222
 /*! log: log force write operations skipped */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1221
+#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1223
 /*! log: log records compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1222
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1224
 /*! log: log records not compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1223
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1225
 /*! log: log records too small to compress */
-#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1224
+#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1226
 /*! log: log release advances write LSN */
-#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1225
+#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1227
 /*! log: log scan operations */
-#define	WT_STAT_CONN_LOG_SCANS				1226
+#define	WT_STAT_CONN_LOG_SCANS				1228
 /*! log: log scan records requiring two reads */
-#define	WT_STAT_CONN_LOG_SCAN_REREADS			1227
+#define	WT_STAT_CONN_LOG_SCAN_REREADS			1229
 /*! log: log server thread advances write LSN */
-#define	WT_STAT_CONN_LOG_WRITE_LSN			1228
+#define	WT_STAT_CONN_LOG_WRITE_LSN			1230
 /*! log: log server thread write LSN walk skipped */
-#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1229
+#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1231
 /*! log: log sync operations */
-#define	WT_STAT_CONN_LOG_SYNC				1230
+#define	WT_STAT_CONN_LOG_SYNC				1232
 /*! log: log sync time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DURATION			1231
+#define	WT_STAT_CONN_LOG_SYNC_DURATION			1233
 /*! log: log sync_dir operations */
-#define	WT_STAT_CONN_LOG_SYNC_DIR			1232
+#define	WT_STAT_CONN_LOG_SYNC_DIR			1234
 /*! log: log sync_dir time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1233
+#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1235
 /*! log: log write operations */
-#define	WT_STAT_CONN_LOG_WRITES				1234
+#define	WT_STAT_CONN_LOG_WRITES				1236
 /*! log: logging bytes consolidated */
-#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1235
+#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1237
 /*! log: maximum log file size */
-#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1236
+#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1238
 /*! log: number of pre-allocated log files to create */
-#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1237
+#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1239
 /*! log: pre-allocated log files not ready and missed */
-#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1238
+#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1240
 /*! log: pre-allocated log files prepared */
-#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1239
+#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1241
 /*! log: pre-allocated log files used */
-#define	WT_STAT_CONN_LOG_PREALLOC_USED			1240
+#define	WT_STAT_CONN_LOG_PREALLOC_USED			1242
 /*! log: records processed by log scan */
-#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1241
+#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1243
 /*! log: slot close lost race */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSE_RACE		1242
+#define	WT_STAT_CONN_LOG_SLOT_CLOSE_RACE		1244
 /*! log: slot close unbuffered waits */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF		1243
+#define	WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF		1245
 /*! log: slot closures */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1244
+#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1246
 /*! log: slot join atomic update races */
-#define	WT_STAT_CONN_LOG_SLOT_RACES			1245
+#define	WT_STAT_CONN_LOG_SLOT_RACES			1247
 /*! log: slot join calls atomic updates raced */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_RACE		1246
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_RACE		1248
 /*! log: slot join calls did not yield */
-#define	WT_STAT_CONN_LOG_SLOT_IMMEDIATE			1247
+#define	WT_STAT_CONN_LOG_SLOT_IMMEDIATE			1249
 /*! log: slot join calls found active slot closed */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE		1248
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE		1250
 /*! log: slot join calls slept */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP		1249
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP		1251
 /*! log: slot join calls yielded */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD			1250
+#define	WT_STAT_CONN_LOG_SLOT_YIELD			1252
 /*! log: slot join found active slot closed */
-#define	WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED		1251
+#define	WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED		1253
 /*! log: slot joins yield time (usecs) */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_DURATION		1252
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_DURATION		1254
 /*! log: slot transitions unable to find free slot */
-#define	WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS		1253
+#define	WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS		1255
 /*! log: slot unbuffered writes */
-#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1254
+#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1256
 /*! log: total in-memory size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1255
+#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1257
 /*! log: total log buffer size */
-#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1256
+#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1258
 /*! log: total size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1257
+#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1259
 /*! log: written slots coalesced */
-#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1258
+#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1260
 /*! log: yields waiting for previous log file close */
-#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1259
+#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1261
 /*! perf: file system read latency histogram (bucket 1) - 10-49ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50	1260
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT50	1262
 /*! perf: file system read latency histogram (bucket 2) - 50-99ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100	1261
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT100	1263
 /*! perf: file system read latency histogram (bucket 3) - 100-249ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250	1262
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT250	1264
 /*! perf: file system read latency histogram (bucket 4) - 250-499ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500	1263
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT500	1265
 /*! perf: file system read latency histogram (bucket 5) - 500-999ms */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000	1264
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_LT1000	1266
 /*! perf: file system read latency histogram (bucket 6) - 1000ms+ */
-#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000	1265
+#define	WT_STAT_CONN_PERF_HIST_FSREAD_LATENCY_GT1000	1267
 /*! perf: file system write latency histogram (bucket 1) - 10-49ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50	1266
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT50	1268
 /*! perf: file system write latency histogram (bucket 2) - 50-99ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100	1267
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT100	1269
 /*! perf: file system write latency histogram (bucket 3) - 100-249ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250	1268
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT250	1270
 /*! perf: file system write latency histogram (bucket 4) - 250-499ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500	1269
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT500	1271
 /*! perf: file system write latency histogram (bucket 5) - 500-999ms */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000	1270
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_LT1000	1272
 /*! perf: file system write latency histogram (bucket 6) - 1000ms+ */
-#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000	1271
+#define	WT_STAT_CONN_PERF_HIST_FSWRITE_LATENCY_GT1000	1273
 /*! perf: operation read latency histogram (bucket 1) - 100-249us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250	1272
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT250	1274
 /*! perf: operation read latency histogram (bucket 2) - 250-499us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500	1273
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT500	1275
 /*! perf: operation read latency histogram (bucket 3) - 500-999us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000	1274
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT1000	1276
 /*! perf: operation read latency histogram (bucket 4) - 1000-9999us */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000	1275
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_LT10000	1277
 /*! perf: operation read latency histogram (bucket 5) - 10000us+ */
-#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000	1276
+#define	WT_STAT_CONN_PERF_HIST_OPREAD_LATENCY_GT10000	1278
 /*! perf: operation write latency histogram (bucket 1) - 100-249us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250	1277
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT250	1279
 /*! perf: operation write latency histogram (bucket 2) - 250-499us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500	1278
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT500	1280
 /*! perf: operation write latency histogram (bucket 3) - 500-999us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000	1279
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT1000	1281
 /*! perf: operation write latency histogram (bucket 4) - 1000-9999us */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000	1280
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_LT10000	1282
 /*! perf: operation write latency histogram (bucket 5) - 10000us+ */
-#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000	1281
+#define	WT_STAT_CONN_PERF_HIST_OPWRITE_LATENCY_GT10000	1283
 /*! reconciliation: fast-path pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1282
+#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1284
 /*! reconciliation: page reconciliation calls */
-#define	WT_STAT_CONN_REC_PAGES				1283
+#define	WT_STAT_CONN_REC_PAGES				1285
 /*! reconciliation: page reconciliation calls for eviction */
-#define	WT_STAT_CONN_REC_PAGES_EVICTION			1284
+#define	WT_STAT_CONN_REC_PAGES_EVICTION			1286
 /*! reconciliation: pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE			1285
+#define	WT_STAT_CONN_REC_PAGE_DELETE			1287
 /*! reconciliation: split bytes currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1286
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1288
 /*! reconciliation: split objects currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1287
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1289
 /*! session: open session count */
-#define	WT_STAT_CONN_SESSION_OPEN			1288
+#define	WT_STAT_CONN_SESSION_OPEN			1290
 /*! session: session query timestamp calls */
-#define	WT_STAT_CONN_SESSION_QUERY_TS			1289
+#define	WT_STAT_CONN_SESSION_QUERY_TS			1291
 /*! session: table alter failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL		1290
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL		1292
 /*! session: table alter successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS	1291
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS	1293
 /*! session: table alter unchanged and skipped */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP		1292
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP		1294
 /*! session: table compact failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1293
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1295
 /*! session: table compact successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1294
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1296
 /*! session: table create failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1295
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1297
 /*! session: table create successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1296
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1298
 /*! session: table drop failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1297
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1299
 /*! session: table drop successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1298
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1300
 /*! session: table rebalance failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL	1299
+#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL	1301
 /*! session: table rebalance successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS	1300
+#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS	1302
 /*! session: table rename failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1301
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1303
 /*! session: table rename successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1302
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1304
 /*! session: table salvage failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1303
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1305
 /*! session: table salvage successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1304
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1306
 /*! session: table truncate failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1305
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1307
 /*! session: table truncate successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1306
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1308
 /*! session: table verify failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1307
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1309
 /*! session: table verify successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1308
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1310
 /*! thread-state: active filesystem fsync calls */
-#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1309
+#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1311
 /*! thread-state: active filesystem read calls */
-#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1310
+#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1312
 /*! thread-state: active filesystem write calls */
-#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1311
+#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1313
 /*! thread-yield: application thread time evicting (usecs) */
-#define	WT_STAT_CONN_APPLICATION_EVICT_TIME		1312
+#define	WT_STAT_CONN_APPLICATION_EVICT_TIME		1314
 /*! thread-yield: application thread time waiting for cache (usecs) */
-#define	WT_STAT_CONN_APPLICATION_CACHE_TIME		1313
+#define	WT_STAT_CONN_APPLICATION_CACHE_TIME		1315
 /*!
  * thread-yield: connection close blocked waiting for transaction state
  * stabilization
  */
-#define	WT_STAT_CONN_TXN_RELEASE_BLOCKED		1314
+#define	WT_STAT_CONN_TXN_RELEASE_BLOCKED		1316
 /*! thread-yield: connection close yielded for lsm manager shutdown */
-#define	WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM		1315
+#define	WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM		1317
 /*! thread-yield: data handle lock yielded */
-#define	WT_STAT_CONN_DHANDLE_LOCK_BLOCKED		1316
+#define	WT_STAT_CONN_DHANDLE_LOCK_BLOCKED		1318
 /*!
  * thread-yield: get reference for page index and slot time sleeping
  * (usecs)
  */
-#define	WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED	1317
+#define	WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED	1319
 /*! thread-yield: log server sync yielded for log write */
-#define	WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED		1318
+#define	WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED		1320
 /*! thread-yield: page access yielded due to prepare state change */
-#define	WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE	1319
+#define	WT_STAT_CONN_PREPARED_TRANSITION_BLOCKED_PAGE	1321
 /*! thread-yield: page acquire busy blocked */
-#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1320
+#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1322
 /*! thread-yield: page acquire eviction blocked */
-#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1321
+#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1323
 /*! thread-yield: page acquire locked blocked */
-#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1322
+#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1324
 /*! thread-yield: page acquire read blocked */
-#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1323
+#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1325
 /*! thread-yield: page acquire time sleeping (usecs) */
-#define	WT_STAT_CONN_PAGE_SLEEP				1324
+#define	WT_STAT_CONN_PAGE_SLEEP				1326
 /*!
  * thread-yield: page delete rollback time sleeping for state change
  * (usecs)
  */
-#define	WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED		1325
+#define	WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED		1327
 /*! thread-yield: page reconciliation yielded due to child modification */
-#define	WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE		1326
+#define	WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE		1328
 /*! transaction: Number of prepared updates */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT		1327
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_COUNT		1329
 /*! transaction: Number of prepared updates added to cache overflow */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS	1328
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_LOOKASIDE_INSERTS	1330
 /*! transaction: Number of prepared updates resolved */
-#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED	1329
+#define	WT_STAT_CONN_TXN_PREPARED_UPDATES_RESOLVED	1331
 /*! transaction: commit timestamp queue entries walked */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED		1330
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_WALKED		1332
 /*! transaction: commit timestamp queue insert to empty */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY		1331
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_EMPTY		1333
 /*! transaction: commit timestamp queue inserts to head */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD		1332
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD		1334
 /*! transaction: commit timestamp queue inserts total */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS		1333
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS		1335
 /*! transaction: commit timestamp queue length */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN		1334
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN		1336
 /*! transaction: number of named snapshots created */
-#define	WT_STAT_CONN_TXN_SNAPSHOTS_CREATED		1335
+#define	WT_STAT_CONN_TXN_SNAPSHOTS_CREATED		1337
 /*! transaction: number of named snapshots dropped */
-#define	WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED		1336
+#define	WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED		1338
 /*! transaction: prepared transactions */
-#define	WT_STAT_CONN_TXN_PREPARE			1337
+#define	WT_STAT_CONN_TXN_PREPARE			1339
 /*! transaction: prepared transactions committed */
-#define	WT_STAT_CONN_TXN_PREPARE_COMMIT			1338
+#define	WT_STAT_CONN_TXN_PREPARE_COMMIT			1340
 /*! transaction: prepared transactions currently active */
-#define	WT_STAT_CONN_TXN_PREPARE_ACTIVE			1339
+#define	WT_STAT_CONN_TXN_PREPARE_ACTIVE			1341
 /*! transaction: prepared transactions rolled back */
-#define	WT_STAT_CONN_TXN_PREPARE_ROLLBACK		1340
+#define	WT_STAT_CONN_TXN_PREPARE_ROLLBACK		1342
 /*! transaction: query timestamp calls */
-#define	WT_STAT_CONN_TXN_QUERY_TS			1341
+#define	WT_STAT_CONN_TXN_QUERY_TS			1343
 /*! transaction: read timestamp queue entries walked */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_WALKED		1342
+#define	WT_STAT_CONN_TXN_READ_QUEUE_WALKED		1344
 /*! transaction: read timestamp queue insert to empty */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_EMPTY		1343
+#define	WT_STAT_CONN_TXN_READ_QUEUE_EMPTY		1345
 /*! transaction: read timestamp queue inserts to head */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_HEAD		1344
+#define	WT_STAT_CONN_TXN_READ_QUEUE_HEAD		1346
 /*! transaction: read timestamp queue inserts total */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_INSERTS		1345
+#define	WT_STAT_CONN_TXN_READ_QUEUE_INSERTS		1347
 /*! transaction: read timestamp queue length */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_LEN			1346
+#define	WT_STAT_CONN_TXN_READ_QUEUE_LEN			1348
 /*! transaction: rollback to stable calls */
-#define	WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE		1347
+#define	WT_STAT_CONN_TXN_ROLLBACK_TO_STABLE		1349
 /*! transaction: rollback to stable updates aborted */
-#define	WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED		1348
+#define	WT_STAT_CONN_TXN_ROLLBACK_UPD_ABORTED		1350
 /*! transaction: rollback to stable updates removed from cache overflow */
-#define	WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED		1349
+#define	WT_STAT_CONN_TXN_ROLLBACK_LAS_REMOVED		1351
 /*! transaction: set timestamp calls */
-#define	WT_STAT_CONN_TXN_SET_TS				1350
+#define	WT_STAT_CONN_TXN_SET_TS				1352
 /*! transaction: set timestamp commit calls */
-#define	WT_STAT_CONN_TXN_SET_TS_COMMIT			1351
+#define	WT_STAT_CONN_TXN_SET_TS_COMMIT			1353
 /*! transaction: set timestamp commit updates */
-#define	WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD		1352
+#define	WT_STAT_CONN_TXN_SET_TS_COMMIT_UPD		1354
 /*! transaction: set timestamp oldest calls */
-#define	WT_STAT_CONN_TXN_SET_TS_OLDEST			1353
+#define	WT_STAT_CONN_TXN_SET_TS_OLDEST			1355
 /*! transaction: set timestamp oldest updates */
-#define	WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD		1354
+#define	WT_STAT_CONN_TXN_SET_TS_OLDEST_UPD		1356
 /*! transaction: set timestamp stable calls */
-#define	WT_STAT_CONN_TXN_SET_TS_STABLE			1355
+#define	WT_STAT_CONN_TXN_SET_TS_STABLE			1357
 /*! transaction: set timestamp stable updates */
-#define	WT_STAT_CONN_TXN_SET_TS_STABLE_UPD		1356
+#define	WT_STAT_CONN_TXN_SET_TS_STABLE_UPD		1358
 /*! transaction: transaction begins */
-#define	WT_STAT_CONN_TXN_BEGIN				1357
+#define	WT_STAT_CONN_TXN_BEGIN				1359
 /*! transaction: transaction checkpoint currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1358
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1360
 /*! transaction: transaction checkpoint generation */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1359
+#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1361
 /*! transaction: transaction checkpoint max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1360
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1362
 /*! transaction: transaction checkpoint min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1361
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1363
 /*! transaction: transaction checkpoint most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1362
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1364
 /*! transaction: transaction checkpoint scrub dirty target */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1363
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1365
 /*! transaction: transaction checkpoint scrub time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1364
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1366
 /*! transaction: transaction checkpoint total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1365
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1367
 /*! transaction: transaction checkpoints */
-#define	WT_STAT_CONN_TXN_CHECKPOINT			1366
+#define	WT_STAT_CONN_TXN_CHECKPOINT			1368
 /*!
  * transaction: transaction checkpoints skipped because database was
  * clean
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1367
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1369
 /*! transaction: transaction failures due to cache overflow */
-#define	WT_STAT_CONN_TXN_FAIL_CACHE			1368
+#define	WT_STAT_CONN_TXN_FAIL_CACHE			1370
 /*!
  * transaction: transaction fsync calls for checkpoint after allocating
  * the transaction ID
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1369
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1371
 /*!
  * transaction: transaction fsync duration for checkpoint after
  * allocating the transaction ID (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1370
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1372
 /*! transaction: transaction range of IDs currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_RANGE			1371
+#define	WT_STAT_CONN_TXN_PINNED_RANGE			1373
 /*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1372
+#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1374
 /*!
  * transaction: transaction range of IDs currently pinned by named
  * snapshots
  */
-#define	WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE		1373
+#define	WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE		1375
 /*! transaction: transaction range of timestamps currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP		1374
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP		1376
 /*! transaction: transaction range of timestamps pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT	1375
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_CHECKPOINT	1377
 /*!
  * transaction: transaction range of timestamps pinned by the oldest
  * timestamp
  */
-#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST	1376
+#define	WT_STAT_CONN_TXN_PINNED_TIMESTAMP_OLDEST	1378
 /*! transaction: transaction sync calls */
-#define	WT_STAT_CONN_TXN_SYNC				1377
+#define	WT_STAT_CONN_TXN_SYNC				1379
 /*! transaction: transactions committed */
-#define	WT_STAT_CONN_TXN_COMMIT				1378
+#define	WT_STAT_CONN_TXN_COMMIT				1380
 /*! transaction: transactions rolled back */
-#define	WT_STAT_CONN_TXN_ROLLBACK			1379
+#define	WT_STAT_CONN_TXN_ROLLBACK			1381
 /*! transaction: update conflicts */
-#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1380
+#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1382
 
 /*!
  * @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index d93f6a3be7f..9e31180dbb1 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -271,6 +271,12 @@ struct __wt_page_modify;
     typedef struct __wt_page_modify WT_PAGE_MODIFY;
 struct __wt_process;
     typedef struct __wt_process WT_PROCESS;
+struct __wt_rec_chunk;
+    typedef struct __wt_rec_chunk WT_REC_CHUNK;
+struct __wt_rec_dictionary;
+    typedef struct __wt_rec_dictionary WT_REC_DICTIONARY;
+struct __wt_rec_kv;
+    typedef struct __wt_rec_kv WT_REC_KV;
 struct __wt_ref;
     typedef struct __wt_ref WT_REF;
 struct __wt_ref_hist;
@@ -362,6 +368,7 @@ typedef uint64_t wt_timestamp_t;
 #include "btree.h"
 #include "cache.h"
 #include "capacity.h"
+#include "cell.h"
 #include "compact.h"
 #include "config.h"
 #include "cursor.h"
@@ -372,6 +379,7 @@ typedef uint64_t wt_timestamp_t;
 #include "meta.h"
 #include "optrack.h"
 #include "os.h"
+#include "reconcile.h"
 #include "schema.h"
 #include "thread_group.h"
 #include "txn.h"
@@ -407,6 +415,7 @@ typedef uint64_t wt_timestamp_t;
 #include "os_fs.i"
 #include "os_fstream.i"
 #include "packing.i"
+#include "reconcile.i"
 #include "serial.i"
 
 #if defined(__cplusplus)
diff --git a/src/third_party/wiredtiger/src/log/log.c b/src/third_party/wiredtiger/src/log/log.c
index 9e27a996251..1963a3770fc 100644
--- a/src/third_party/wiredtiger/src/log/log.c
+++ b/src/third_party/wiredtiger/src/log/log.c
@@ -236,16 +236,26 @@ __log_fs_write(WT_SESSION_IMPL *session,
  *	thread as needed.
  */
 void
-__wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckp_lsn)
+__wt_log_ckpt(WT_SESSION_IMPL *session, WT_LSN *ckpt_lsn)
 {
 	WT_CONNECTION_IMPL *conn;
 	WT_LOG *log;
+	int i;
 
 	conn = S2C(session);
 	log = conn->log;
-	log->ckpt_lsn = *ckp_lsn;
+	log->ckpt_lsn = *ckpt_lsn;
 	if (conn->log_cond != NULL)
 		__wt_cond_signal(session, conn->log_cond);
+	/*
+	 * If we are storing debugging LSNs to retain additional log files
+	 * from archiving, then rotate the newest LSN into the array.
+	 */
+	if (conn->debug_ckpt_cnt != 0) {
+		for (i = (int)conn->debug_ckpt_cnt - 1; i > 0; --i)
+			conn->debug_ckpt[i] = conn->debug_ckpt[i - 1];
+		conn->debug_ckpt[0] = *ckpt_lsn;
+	}
 }
 
 /*
diff --git a/src/third_party/wiredtiger/src/log/log_auto.c b/src/third_party/wiredtiger/src/log/log_auto.c
index f6d7afed0c2..d7f59fd920e 100644
--- a/src/third_party/wiredtiger/src/log/log_auto.c
+++ b/src/third_party/wiredtiger/src/log/log_auto.c
@@ -149,7 +149,7 @@ __wt_logop_col_modify_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"col_modify\",\n"));
 	WT_ERR(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_ERR(__wt_fprintf(session, args->fs,
 	    "        \"recno\": %" PRIu64 ",\n", recno));
 	WT_ERR(__logrec_make_json_str(session, &escaped, &value));
@@ -224,7 +224,7 @@ __wt_logop_col_put_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"col_put\",\n"));
 	WT_ERR(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_ERR(__wt_fprintf(session, args->fs,
 	    "        \"recno\": %" PRIu64 ",\n", recno));
 	WT_ERR(__logrec_make_json_str(session, &escaped, &value));
@@ -295,7 +295,7 @@ __wt_logop_col_remove_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"col_remove\",\n"));
 	WT_RET(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_RET(__wt_fprintf(session, args->fs,
 	    "        \"recno\": %" PRIu64 "", recno));
 	return (0);
@@ -357,7 +357,7 @@ __wt_logop_col_truncate_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"col_truncate\",\n"));
 	WT_RET(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_RET(__wt_fprintf(session, args->fs,
 	    "        \"start\": %" PRIu64 ",\n", start));
 	WT_RET(__wt_fprintf(session, args->fs,
@@ -424,7 +424,7 @@ __wt_logop_row_modify_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"row_modify\",\n"));
 	WT_ERR(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_ERR(__logrec_make_json_str(session, &escaped, &key));
 	WT_ERR(__wt_fprintf(session, args->fs,
 	    "        \"key\": \"%s\",\n", escaped));
@@ -505,7 +505,7 @@ __wt_logop_row_put_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"row_put\",\n"));
 	WT_ERR(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_ERR(__logrec_make_json_str(session, &escaped, &key));
 	WT_ERR(__wt_fprintf(session, args->fs,
 	    "        \"key\": \"%s\",\n", escaped));
@@ -585,7 +585,7 @@ __wt_logop_row_remove_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"row_remove\",\n"));
 	WT_ERR(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_ERR(__logrec_make_json_str(session, &escaped, &key));
 	WT_ERR(__wt_fprintf(session, args->fs,
 	    "        \"key\": \"%s\"", escaped));
@@ -659,7 +659,7 @@ __wt_logop_row_truncate_print(WT_SESSION_IMPL *session,
 	WT_RET(__wt_fprintf(session, args->fs,
 	    " \"optype\": \"row_truncate\",\n"));
 	WT_ERR(__wt_fprintf(session, args->fs,
-	    "        \"fileid\": %" PRIu32 ",\n", fileid));
+	    "        \"fileid\": %" PRIu32 " 0x%" PRIx32 ",\n", fileid, fileid));
 	WT_ERR(__logrec_make_json_str(session, &escaped, &start));
 	WT_ERR(__wt_fprintf(session, args->fs,
 	    "        \"start\": \"%s\",\n", escaped));
@@ -798,6 +798,82 @@ __wt_logop_prev_lsn_print(WT_SESSION_IMPL *session,
 }
 
 int
+__wt_logop_txn_timestamp_pack(
+    WT_SESSION_IMPL *session, WT_ITEM *logrec,
+    uint64_t time_sec, uint64_t time_nsec, uint64_t commit_ts, uint64_t durable_ts, uint64_t first_ts, uint64_t prepare_ts, uint64_t read_ts)
+{
+	const char *fmt = WT_UNCHECKED_STRING(IIQQQQQQQ);
+	size_t size;
+	uint32_t optype, recsize;
+
+	optype = WT_LOGOP_TXN_TIMESTAMP;
+	WT_RET(__wt_struct_size(session, &size, fmt,
+	    optype, 0, time_sec, time_nsec, commit_ts, durable_ts, first_ts, prepare_ts, read_ts));
+
+	__wt_struct_size_adjust(session, &size);
+	WT_RET(__wt_buf_extend(session, logrec, logrec->size + size));
+	recsize = (uint32_t)size;
+	WT_RET(__wt_struct_pack(session,
+	    (uint8_t *)logrec->data + logrec->size, size, fmt,
+	    optype, recsize, time_sec, time_nsec, commit_ts, durable_ts, first_ts, prepare_ts, read_ts));
+
+	logrec->size += (uint32_t)size;
+	return (0);
+}
+
+int
+__wt_logop_txn_timestamp_unpack(
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    uint64_t *time_secp, uint64_t *time_nsecp, uint64_t *commit_tsp, uint64_t *durable_tsp, uint64_t *first_tsp, uint64_t *prepare_tsp, uint64_t *read_tsp)
+{
+	WT_DECL_RET;
+	const char *fmt = WT_UNCHECKED_STRING(IIQQQQQQQ);
+	uint32_t optype, size;
+
+	if ((ret = __wt_struct_unpack(session, *pp, WT_PTRDIFF(end, *pp), fmt,
+	    &optype, &size, time_secp, time_nsecp, commit_tsp, durable_tsp, first_tsp, prepare_tsp, read_tsp)) != 0)
+		WT_RET_MSG(session, ret, "logop_txn_timestamp: unpack failure");
+	WT_ASSERT(session, optype == WT_LOGOP_TXN_TIMESTAMP);
+
+	*pp += size;
+	return (0);
+}
+
+int
+__wt_logop_txn_timestamp_print(WT_SESSION_IMPL *session,
+    const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
+{
+	uint64_t time_sec;
+	uint64_t time_nsec;
+	uint64_t commit_ts;
+	uint64_t durable_ts;
+	uint64_t first_ts;
+	uint64_t prepare_ts;
+	uint64_t read_ts;
+
+	WT_RET(__wt_logop_txn_timestamp_unpack(
+	    session, pp, end, &time_sec, &time_nsec, &commit_ts, &durable_ts, &first_ts, &prepare_ts, &read_ts));
+
+	WT_RET(__wt_fprintf(session, args->fs,
+	    " \"optype\": \"txn_timestamp\",\n"));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"time_sec\": %" PRIu64 ",\n", time_sec));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"time_nsec\": %" PRIu64 ",\n", time_nsec));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"commit_ts\": %" PRIu64 ",\n", commit_ts));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"durable_ts\": %" PRIu64 ",\n", durable_ts));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"first_ts\": %" PRIu64 ",\n", first_ts));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"prepare_ts\": %" PRIu64 ",\n", prepare_ts));
+	WT_RET(__wt_fprintf(session, args->fs,
+	    "        \"read_ts\": %" PRIu64 "", read_ts));
+	return (0);
+}
+
+int
 __wt_txn_op_printlog(WT_SESSION_IMPL *session,
     const uint8_t **pp, const uint8_t *end, WT_TXN_PRINTLOG_ARGS *args)
 {
@@ -848,6 +924,10 @@ __wt_txn_op_printlog(WT_SESSION_IMPL *session,
 		WT_RET(__wt_logop_prev_lsn_print(session, pp, end, args));
 		break;
 
+	case WT_LOGOP_TXN_TIMESTAMP:
+		WT_RET(__wt_logop_txn_timestamp_print(session, pp, end, args));
+		break;
+
 	WT_ILLEGAL_VALUE(session, optype);
 	}
 
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_child.c b/src/third_party/wiredtiger/src/reconcile/rec_child.c
new file mode 100644
index 00000000000..f1d261c8f42
--- /dev/null
+++ b/src/third_party/wiredtiger/src/reconcile/rec_child.c
@@ -0,0 +1,329 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __rec_child_deleted --
+ *	Handle pages with leaf pages in the WT_REF_DELETED state.
+ */
+static int
+__rec_child_deleted(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep)
+{
+	WT_PAGE_DELETED *page_del;
+
+	page_del = ref->page_del;
+
+	/*
+	 * Internal pages with child leaf pages in the WT_REF_DELETED state are
+	 * a special case during reconciliation.  First, if the deletion was a
+	 * result of a session truncate call, the deletion may not be visible to
+	 * us. In that case, we proceed as with any change not visible during
+	 * reconciliation by ignoring the change for the purposes of writing the
+	 * internal page.
+	 *
+	 * In this case, there must be an associated page-deleted structure, and
+	 * it holds the transaction ID we care about.
+	 *
+	 * In some cases, there had better not be any updates we can't see.
+	 *
+	 * A visible update to be in READY state (i.e. not in LOCKED or
+	 * PREPARED state), for truly visible to others.
+	 */
+	if (F_ISSET(r, WT_REC_VISIBILITY_ERR) && page_del != NULL &&
+	    __wt_page_del_active(session, ref, false))
+		WT_PANIC_RET(session, EINVAL,
+		    "reconciliation illegally skipped an update");
+
+	/*
+	 * Deal with any underlying disk blocks.
+	 *
+	 * First, check to see if there is an address associated with this leaf:
+	 * if there isn't, we're done, the underlying page is already gone.  If
+	 * the page still exists, check for any transactions in the system that
+	 * might want to see the page's state before it's deleted.
+	 *
+	 * If any such transactions exist, we cannot discard the underlying leaf
+	 * page to the block manager because the transaction may eventually read
+	 * it.  However, this write might be part of a checkpoint, and should we
+	 * recover to that checkpoint, we'll need to delete the leaf page, else
+	 * we'd leak it.  The solution is to write a proxy cell on the internal
+	 * page ensuring the leaf page is eventually discarded.
+	 *
+	 * If no such transactions exist, we can discard the leaf page to the
+	 * block manager and no cell needs to be written at all.  We do this
+	 * outside of the underlying tracking routines because this action is
+	 * permanent and irrevocable.  (Clearing the address means we've lost
+	 * track of the disk address in a permanent way.  This is safe because
+	 * there's no path to reading the leaf page again: if there's ever a
+	 * read into this part of the name space again, the cache read function
+	 * instantiates an entirely new page.)
+	 */
+	if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) {
+		/*
+		 * Minor memory cleanup: if a truncate call deleted this page
+		 * and we were ever forced to instantiate the page in memory,
+		 * we would have built a list of updates in the page reference
+		 * in order to be able to commit/rollback the truncate. We just
+		 * passed a visibility test, discard the update list.
+		 */
+		if (page_del != NULL) {
+			__wt_free(session, ref->page_del->update_list);
+			__wt_free(session, ref->page_del);
+		}
+
+		WT_RET(__wt_ref_block_free(session, ref));
+	}
+
+	/*
+	 * If the original page is gone, we can skip the slot on the internal
+	 * page.
+	 */
+	if (ref->addr == NULL) {
+		*statep = WT_CHILD_IGNORE;
+		return (0);
+	}
+
+	/*
+	 * Internal pages with deletes that aren't stable cannot be evicted, we
+	 * don't have sufficient information to restore the page's information
+	 * if subsequently read (we wouldn't know which transactions should see
+	 * the original page and which should see the deleted page).
+	 */
+	if (F_ISSET(r, WT_REC_EVICT))
+		return (__wt_set_return(session, EBUSY));
+
+	/*
+	 * If there are deleted child pages we can't discard immediately, keep
+	 * the page dirty so they are eventually freed.
+	 */
+	r->leave_dirty = true;
+
+	/*
+	 * If the original page cannot be freed, we need to keep a slot on the
+	 * page to reference it from the parent page.
+	 *
+	 * If the delete is not visible in this checkpoint, write the original
+	 * address normally. Otherwise, we have to write a proxy record.
+	 * If the delete state is not ready, then delete is not visible as it
+	 * is in prepared state.
+	 */
+	if (!__wt_page_del_active(session, ref, false))
+		*statep = WT_CHILD_PROXY;
+
+	return (0);
+}
+
+/*
+ * __wt_rec_child_modify --
+ *	Return if the internal page's child references any modifications.
+ */
+int
+__wt_rec_child_modify(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep)
+{
+	WT_DECL_RET;
+	WT_PAGE_MODIFY *mod;
+
+	/* We may acquire a hazard pointer our caller must release. */
+	*hazardp = false;
+
+	/* Default to using the original child address. */
+	*statep = WT_CHILD_ORIGINAL;
+
+	/*
+	 * This function is called when walking an internal page to decide how
+	 * to handle child pages referenced by the internal page.
+	 *
+	 * Internal pages are reconciled for two reasons: first, when evicting
+	 * an internal page, second by the checkpoint code when writing internal
+	 * pages.  During eviction, all pages should be in the WT_REF_DISK or
+	 * WT_REF_DELETED state. During checkpoint, eviction that might affect
+	 * review of an internal page is prohibited, however, as the subtree is
+	 * not reserved for our exclusive use, there are other page states that
+	 * must be considered.
+	 */
+	for (;; __wt_yield()) {
+		switch (r->tested_ref_state = ref->state) {
+		case WT_REF_DISK:
+			/* On disk, not modified by definition. */
+			goto done;
+
+		case WT_REF_DELETED:
+			/*
+			 * The child is in a deleted state.
+			 *
+			 * It's possible the state could change underneath us as
+			 * the page is read in, and we can race between checking
+			 * for a deleted state and looking at the transaction ID
+			 * to see if the delete is visible to us.  Lock down the
+			 * structure.
+			 */
+			if (!WT_REF_CAS_STATE(
+			    session, ref, WT_REF_DELETED, WT_REF_LOCKED))
+				break;
+			ret = __rec_child_deleted(session, r, ref, statep);
+			WT_REF_SET_STATE(ref, WT_REF_DELETED);
+			goto done;
+
+		case WT_REF_LOCKED:
+			/*
+			 * Locked.
+			 *
+			 * We should never be here during eviction, active child
+			 * pages in an evicted page's subtree fails the eviction
+			 * attempt.
+			 */
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
+				return (__wt_set_return(session, EBUSY));
+
+			/*
+			 * If called during checkpoint, the child is being
+			 * considered by the eviction server or the child is a
+			 * truncated page being read.  The eviction may have
+			 * started before the checkpoint and so we must wait
+			 * for the eviction to be resolved.  I suspect we could
+			 * handle reads of truncated pages, but we can't
+			 * distinguish between the two and reads of truncated
+			 * pages aren't expected to be common.
+			 */
+			break;
+
+		case WT_REF_LIMBO:
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			/* FALLTHROUGH */
+		case WT_REF_LOOKASIDE:
+			/*
+			 * On disk or in cache with lookaside updates.
+			 *
+			 * We should never be here during eviction: active
+			 * child pages in an evicted page's subtree fails the
+			 * eviction attempt.
+			 */
+			if (F_ISSET(r, WT_REC_EVICT) &&
+			    __wt_page_las_active(session, ref)) {
+				WT_ASSERT(session, false);
+				return (__wt_set_return(session, EBUSY));
+			}
+
+			/*
+			 * A page evicted with lookaside entries may not have
+			 * an address, if no updates were visible to
+			 * reconciliation.  Any child pages in that state
+			 * should be ignored.
+			 */
+			if (ref->addr == NULL) {
+				*statep = WT_CHILD_IGNORE;
+				WT_CHILD_RELEASE(session, *hazardp, ref);
+			}
+			goto done;
+
+		case WT_REF_MEM:
+			/*
+			 * In memory.
+			 *
+			 * We should never be here during eviction, active child
+			 * pages in an evicted page's subtree fails the eviction
+			 * attempt.
+			 */
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
+				return (__wt_set_return(session, EBUSY));
+
+			/*
+			 * If called during checkpoint, acquire a hazard pointer
+			 * so the child isn't evicted, it's an in-memory case.
+			 *
+			 * This call cannot return split/restart, we have a lock
+			 * on the parent which prevents a child page split.
+			 *
+			 * Set WT_READ_NO_WAIT because we're only interested in
+			 * the WT_REF's final state. Pages in transition might
+			 * change WT_REF state during our read, and then return
+			 * WT_NOTFOUND to us. In that case, loop and look again.
+			 */
+			ret = __wt_page_in(session, ref,
+			    WT_READ_CACHE | WT_READ_NO_EVICT |
+			    WT_READ_NO_GEN | WT_READ_NO_WAIT);
+			if (ret == WT_NOTFOUND) {
+				ret = 0;
+				break;
+			}
+			WT_RET(ret);
+			*hazardp = true;
+			goto in_memory;
+
+		case WT_REF_READING:
+			/*
+			 * Being read, not modified by definition.
+			 *
+			 * We should never be here during eviction, active child
+			 * pages in an evicted page's subtree fails the eviction
+			 * attempt.
+			 */
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
+				return (__wt_set_return(session, EBUSY));
+			goto done;
+
+		case WT_REF_SPLIT:
+			/*
+			 * The page was split out from under us.
+			 *
+			 * We should never be here during eviction, active child
+			 * pages in an evicted page's subtree fails the eviction
+			 * attempt.
+			 *
+			 * We should never be here during checkpoint, dirty page
+			 * eviction is shutout during checkpoint, all splits in
+			 * process will have completed before we walk any pages
+			 * for checkpoint.
+			 */
+			WT_ASSERT(session, WT_REF_SPLIT != WT_REF_SPLIT);
+			return (__wt_set_return(session, EBUSY));
+
+		WT_ILLEGAL_VALUE(session, r->tested_ref_state);
+		}
+		WT_STAT_CONN_INCR(session, child_modify_blocked_page);
+	}
+
+in_memory:
+	/*
+	 * In-memory states: the child is potentially modified if the page's
+	 * modify structure has been instantiated. If the modify structure
+	 * exists and the page has actually been modified, set that state.
+	 * If that's not the case, we would normally use the original cell's
+	 * disk address as our reference, however there are two special cases,
+	 * both flagged by a missing block address.
+	 *
+	 * First, if forced to instantiate a deleted child page and it's never
+	 * modified, we end up here with a page that has a modify structure, no
+	 * modifications, and no disk address. Ignore those pages, they're not
+	 * modified and there is no reason to write the cell.
+	 *
+	 * Second, insert splits are permitted during checkpoint. When doing the
+	 * final checkpoint pass, we first walk the internal page's page-index
+	 * and write out any dirty pages we find, then we write out the internal
+	 * page in post-order traversal. If we found the split page in the first
+	 * step, it will have an address; if we didn't find the split page in
+	 * the first step, it won't have an address and we ignore it, it's not
+	 * part of the checkpoint.
+	 */
+	mod = ref->page->modify;
+	if (mod != NULL && mod->rec_result != 0)
+		*statep = WT_CHILD_MODIFIED;
+	else if (ref->addr == NULL) {
+		*statep = WT_CHILD_IGNORE;
+		WT_CHILD_RELEASE(session, *hazardp, ref);
+	}
+
+done:	WT_DIAGNOSTIC_YIELD;
+	return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_col.c b/src/third_party/wiredtiger/src/reconcile/rec_col.c
new file mode 100644
index 00000000000..6a57a9c26d6
--- /dev/null
+++ b/src/third_party/wiredtiger/src/reconcile/rec_col.c
@@ -0,0 +1,1077 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __rec_col_fix_bulk_insert_split_check --
+ *	Check if a bulk-loaded fixed-length column store page needs to split.
+ */
+static inline int
+__rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk)
+{
+	WT_BTREE *btree;
+	WT_RECONCILE *r;
+	WT_SESSION_IMPL *session;
+
+	session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
+	r = cbulk->reconcile;
+	btree = S2BT(session);
+
+	if (cbulk->entry == cbulk->nrecs) {
+		if (cbulk->entry != 0) {
+			/*
+			 * If everything didn't fit, update the counters and
+			 * split.
+			 *
+			 * Boundary: split or write the page.
+			 *
+			 * No need to have a minimum split size boundary, all
+			 * pages are filled 100% except the last, allowing it to
+			 * grow in the future.
+			 */
+			__wt_rec_incr(session, r, cbulk->entry,
+			    __bitstr_size(
+			    (size_t)cbulk->entry * btree->bitcnt));
+			WT_RET(__wt_rec_split(session, r, 0));
+		}
+		cbulk->entry = 0;
+		cbulk->nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
+	}
+	return (0);
+}
+
+/*
+ * __wt_bulk_insert_fix --
+ *	Fixed-length column-store bulk insert.
+ */
+int
+__wt_bulk_insert_fix(
+    WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
+{
+	WT_BTREE *btree;
+	WT_CURSOR *cursor;
+	WT_RECONCILE *r;
+
+	r = cbulk->reconcile;
+	btree = S2BT(session);
+	cursor = &cbulk->cbt.iface;
+
+	WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
+	__bit_setv(r->first_free, cbulk->entry,
+	    btree->bitcnt, deleted ? 0 : ((uint8_t *)cursor->value.data)[0]);
+	++cbulk->entry;
+	++r->recno;
+
+	return (0);
+}
+
+/*
+ * __wt_bulk_insert_fix_bitmap --
+ *	Fixed-length column-store bulk insert.
+ */
+int
+__wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
+{
+	WT_BTREE *btree;
+	WT_CURSOR *cursor;
+	WT_RECONCILE *r;
+	uint32_t entries, offset, page_entries, page_size;
+	const uint8_t *data;
+
+	r = cbulk->reconcile;
+	btree = S2BT(session);
+	cursor = &cbulk->cbt.iface;
+
+	if (((r->recno - 1) * btree->bitcnt) & 0x7)
+		WT_RET_MSG(session, EINVAL,
+		    "Bulk bitmap load not aligned on a byte boundary");
+	for (data = cursor->value.data,
+	    entries = (uint32_t)cursor->value.size;
+	    entries > 0;
+	    entries -= page_entries, data += page_size) {
+		WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
+
+		page_entries = WT_MIN(entries, cbulk->nrecs - cbulk->entry);
+		page_size = __bitstr_size(page_entries * btree->bitcnt);
+		offset = __bitstr_size(cbulk->entry * btree->bitcnt);
+		memcpy(r->first_free + offset, data, page_size);
+		cbulk->entry += page_entries;
+		r->recno += page_entries;
+	}
+	return (0);
+}
+
+/*
+ * __wt_bulk_insert_var --
+ *	Variable-length column-store bulk insert.
+ */
+int
+__wt_bulk_insert_var(
+    WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
+{
+	WT_BTREE *btree;
+	WT_RECONCILE *r;
+	WT_REC_KV *val;
+
+	r = cbulk->reconcile;
+	btree = S2BT(session);
+
+	val = &r->v;
+	if (deleted) {
+		val->cell_len = __wt_cell_pack_del(&val->cell, cbulk->rle);
+		val->buf.data = NULL;
+		val->buf.size = 0;
+		val->len = val->cell_len;
+	} else
+		/*
+		 * Store the bulk cursor's last buffer, not the current value,
+		 * we're tracking duplicates, which means we want the previous
+		 * value seen, not the current value.
+		 */
+		WT_RET(__wt_rec_cell_build_val(session,
+		    r, cbulk->last.data, cbulk->last.size, cbulk->rle));
+
+	/* Boundary: split or write the page. */
+	if (WT_CROSSING_SPLIT_BND(r, val->len))
+		WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
+
+	/* Copy the value onto the page. */
+	if (btree->dictionary)
+		WT_RET(__wt_rec_dict_replace(session, r, cbulk->rle, val));
+	__wt_rec_copy_incr(session, r, val);
+
+	/* Update the starting record number in case we split. */
+	r->recno += cbulk->rle;
+
+	return (0);
+}
+
+/*
+ * __rec_col_merge --
+ *	Merge in a split page.
+ */
+static int
+__rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+{
+	WT_ADDR *addr;
+	WT_MULTI *multi;
+	WT_PAGE_MODIFY *mod;
+	WT_REC_KV *val;
+	uint32_t i;
+
+	mod = page->modify;
+
+	val = &r->v;
+
+	/* For each entry in the split array... */
+	for (multi = mod->mod_multi,
+	    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+		/* Update the starting record number in case we split. */
+		r->recno = multi->key.recno;
+
+		/* Build the value cell. */
+		addr = &multi->addr;
+		__wt_rec_cell_build_addr(session, r,
+		    addr->addr, addr->size, __wt_rec_vtype(addr), r->recno);
+
+		/* Boundary: split or write the page. */
+		if (__wt_rec_need_split(r, val->len))
+			WT_RET(__wt_rec_split_crossing_bnd(
+			    session, r, val->len));
+
+		/* Copy the value onto the page. */
+		__wt_rec_copy_incr(session, r, val);
+	}
+	return (0);
+}
+
+/*
+ * __wt_rec_col_int --
+ *	Reconcile a column-store internal page.
+ */
+int
+__wt_rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
+{
+	WT_ADDR *addr;
+	WT_BTREE *btree;
+	WT_CELL_UNPACK *vpack, _vpack;
+	WT_CHILD_STATE state;
+	WT_DECL_RET;
+	WT_PAGE *child, *page;
+	WT_REC_KV *val;
+	WT_REF *ref;
+	bool hazard;
+
+	btree = S2BT(session);
+	page = pageref->page;
+	child = NULL;
+	hazard = false;
+
+	val = &r->v;
+	vpack = &_vpack;
+
+	WT_RET(__wt_rec_split_init(session,
+	    r, page, pageref->ref_recno, btree->maxintlpage_precomp));
+
+	/* For each entry in the in-memory page... */
+	WT_INTL_FOREACH_BEGIN(session, page, ref) {
+		/* Update the starting record number in case we split. */
+		r->recno = ref->ref_recno;
+
+		/*
+		 * Modified child.
+		 * The page may be emptied or internally created during a split.
+		 * Deleted/split pages are merged into the parent and discarded.
+		 */
+		WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state));
+		addr = NULL;
+		child = ref->page;
+
+		switch (state) {
+		case WT_CHILD_IGNORE:
+			/* Ignored child. */
+			WT_CHILD_RELEASE_ERR(session, hazard, ref);
+			continue;
+
+		case WT_CHILD_MODIFIED:
+			/*
+			 * Modified child. Empty pages are merged into the
+			 * parent and discarded.
+			 */
+			switch (child->modify->rec_result) {
+			case WT_PM_REC_EMPTY:
+				/*
+				 * Column-store pages are almost never empty, as
+				 * discarding a page would remove a chunk of the
+				 * name space.  The exceptions are pages created
+				 * when the tree is created, and never filled.
+				 */
+				WT_CHILD_RELEASE_ERR(session, hazard, ref);
+				continue;
+			case WT_PM_REC_MULTIBLOCK:
+				WT_ERR(__rec_col_merge(session, r, child));
+				WT_CHILD_RELEASE_ERR(session, hazard, ref);
+				continue;
+			case WT_PM_REC_REPLACE:
+				addr = &child->modify->mod_replace;
+				break;
+			WT_ILLEGAL_VALUE_ERR(
+			    session, child->modify->rec_result);
+			}
+			break;
+		case WT_CHILD_ORIGINAL:
+			/* Original child. */
+			break;
+		case WT_CHILD_PROXY:
+			/*
+			 * Deleted child where we write a proxy cell, not yet
+			 * supported for column-store.
+			 */
+			WT_ERR(__wt_illegal_value(session, state));
+		}
+
+		/*
+		 * Build the value cell.  The child page address is in one of 3
+		 * places: if the page was replaced, the page's modify structure
+		 * references it and we built the value cell just above in the
+		 * switch statement.  Else, the WT_REF->addr reference points to
+		 * an on-page cell or an off-page WT_ADDR structure: if it's an
+		 * on-page cell and we copy it from the page, else build a new
+		 * cell.
+		 */
+		if (addr == NULL && __wt_off_page(page, ref->addr))
+			addr = ref->addr;
+		if (addr == NULL) {
+			__wt_cell_unpack(ref->addr, vpack);
+			val->buf.data = ref->addr;
+			val->buf.size = __wt_cell_total_len(vpack);
+			val->cell_len = 0;
+			val->len = val->buf.size;
+		} else
+			__wt_rec_cell_build_addr(session, r,
+			    addr->addr, addr->size,
+			    __wt_rec_vtype(addr), ref->ref_recno);
+		WT_CHILD_RELEASE_ERR(session, hazard, ref);
+
+		/* Boundary: split or write the page. */
+		if (__wt_rec_need_split(r, val->len))
+			WT_ERR(__wt_rec_split_crossing_bnd(
+			    session, r, val->len));
+
+		/* Copy the value onto the page. */
+		__wt_rec_copy_incr(session, r, val);
+	} WT_INTL_FOREACH_END;
+
+	/* Write the remnant page. */
+	return (__wt_rec_split_finish(session, r));
+
+err:	WT_CHILD_RELEASE(session, hazard, ref);
+	return (ret);
+}
+
+/*
+ * __wt_rec_col_fix --
+ *	Reconcile a fixed-width, column-store leaf page.
+ */
+int
+__wt_rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
+{
+	WT_BTREE *btree;
+	WT_INSERT *ins;
+	WT_PAGE *page;
+	WT_UPDATE *upd;
+	uint64_t recno;
+	uint32_t entry, nrecs;
+
+	btree = S2BT(session);
+	page = pageref->page;
+
+	WT_RET(__wt_rec_split_init(
+	    session, r, page, pageref->ref_recno, btree->maxleafpage));
+
+	/* Copy the original, disk-image bytes into place. */
+	memcpy(r->first_free, page->pg_fix_bitf,
+	    __bitstr_size((size_t)page->entries * btree->bitcnt));
+
+	/* Update any changes to the original on-page data items. */
+	WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) {
+		WT_RET(__wt_rec_txn_read(
+		    session, r, ins, NULL, NULL, NULL, &upd));
+		if (upd != NULL)
+			__bit_setv(r->first_free,
+			    WT_INSERT_RECNO(ins) - pageref->ref_recno,
+			    btree->bitcnt, *upd->data);
+	}
+
+	/* Calculate the number of entries per page remainder. */
+	entry = page->entries;
+	nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail) - page->entries;
+	r->recno += entry;
+
+	/* Walk any append list. */
+	for (ins =
+	    WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
+		if (ins == NULL) {
+			/*
+			 * If the page split, instantiate any missing records in
+			 * the page's name space. (Imagine record 98 is
+			 * transactionally visible, 99 wasn't created or is not
+			 * yet visible, 100 is visible. Then the page splits and
+			 * record 100 moves to another page. When we reconcile
+			 * the original page, we write record 98, then we don't
+			 * see record 99 for whatever reason. If we've moved
+			 * record 100, we don't know to write a deleted record
+			 * 99 on the page.)
+			 *
+			 * The record number recorded during the split is the
+			 * first key on the split page, that is, one larger than
+			 * the last key on this page, we have to decrement it.
+			 */
+			if ((recno =
+			    page->modify->mod_col_split_recno) == WT_RECNO_OOB)
+				break;
+			recno -= 1;
+
+			/*
+			 * The following loop assumes records to write, and the
+			 * previous key might have been visible.
+			 */
+			if (r->recno > recno)
+				break;
+			upd = NULL;
+		} else {
+			WT_RET(__wt_rec_txn_read(
+			    session, r, ins, NULL, NULL, NULL, &upd));
+			recno = WT_INSERT_RECNO(ins);
+		}
+		for (;;) {
+			/*
+			 * The application may have inserted records which left
+			 * gaps in the name space.
+			 */
+			for (;
+			    nrecs > 0 && r->recno < recno;
+			    --nrecs, ++entry, ++r->recno)
+				__bit_setv(
+				    r->first_free, entry, btree->bitcnt, 0);
+
+			if (nrecs > 0) {
+				__bit_setv(r->first_free, entry, btree->bitcnt,
+				    upd == NULL ? 0 : *upd->data);
+				--nrecs;
+				++entry;
+				++r->recno;
+				break;
+			}
+
+			/*
+			 * If everything didn't fit, update the counters and
+			 * split.
+			 *
+			 * Boundary: split or write the page.
+			 *
+			 * No need to have a minimum split size boundary, all
+			 * pages are filled 100% except the last, allowing it to
+			 * grow in the future.
+			 */
+			__wt_rec_incr(session, r, entry,
+			    __bitstr_size((size_t)entry * btree->bitcnt));
+			WT_RET(__wt_rec_split(session, r, 0));
+
+			/* Calculate the number of entries per page. */
+			entry = 0;
+			nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
+		}
+
+		/*
+		 * Execute this loop once without an insert item to catch any
+		 * missing records due to a split, then quit.
+		 */
+		if (ins == NULL)
+			break;
+	}
+
+	/* Update the counters. */
+	__wt_rec_incr(
+	    session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
+
+	/* Write the remnant page. */
+	return (__wt_rec_split_finish(session, r));
+}
+
+/*
+ * __wt_rec_col_fix_slvg --
+ *	Reconcile a fixed-width, column-store leaf page created during salvage.
+ */
+int
+__wt_rec_col_fix_slvg(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
+{
+	WT_BTREE *btree;
+	WT_PAGE *page;
+	uint64_t page_start, page_take;
+	uint32_t entry, nrecs;
+
+	btree = S2BT(session);
+	page = pageref->page;
+
+	/*
+	 * !!!
+	 * It's vanishingly unlikely and probably impossible for fixed-length
+	 * column-store files to have overlapping key ranges.  It's possible
+	 * for an entire key range to go missing (if a page is corrupted and
+	 * lost), but because pages can't split, it shouldn't be possible to
+	 * find pages where the key ranges overlap.  That said, we check for
+	 * it during salvage and clean up after it here because it doesn't
+	 * cost much and future column-store formats or operations might allow
+	 * for fixed-length format ranges to overlap during salvage, and I
+	 * don't want to have to retrofit the code later.
+	 */
+	WT_RET(__wt_rec_split_init(
+	    session, r, page, pageref->ref_recno, btree->maxleafpage));
+
+	/* We may not be taking all of the entries on the original page. */
+	page_take = salvage->take == 0 ? page->entries : salvage->take;
+	page_start = salvage->skip == 0 ? 0 : salvage->skip;
+
+	/* Calculate the number of entries per page. */
+	entry = 0;
+	nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
+
+	for (; nrecs > 0 && salvage->missing > 0;
+	    --nrecs, --salvage->missing, ++entry)
+		__bit_setv(r->first_free, entry, btree->bitcnt, 0);
+
+	for (; nrecs > 0 && page_take > 0;
+	    --nrecs, --page_take, ++page_start, ++entry)
+		__bit_setv(r->first_free, entry, btree->bitcnt,
+		    __bit_getv(page->pg_fix_bitf,
+			(uint32_t)page_start, btree->bitcnt));
+
+	r->recno += entry;
+	__wt_rec_incr(session, r, entry,
+	    __bitstr_size((size_t)entry * btree->bitcnt));
+
+	/*
+	 * We can't split during salvage -- if everything didn't fit, it's
+	 * all gone wrong.
+	 */
+	if (salvage->missing != 0 || page_take != 0)
+		WT_PANIC_RET(session, WT_PANIC,
+		    "%s page too large, attempted split during salvage",
+		    __wt_page_type_string(page->type));
+
+	/* Write the page. */
+	return (__wt_rec_split_finish(session, r));
+}
+
+/*
+ * __rec_col_var_helper --
+ *	Create a column-store variable length record cell and write it onto a
+ *	page.
+ */
+static int
+__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+    WT_SALVAGE_COOKIE *salvage,
+    WT_ITEM *value, bool deleted, uint8_t overflow_type, uint64_t rle)
+{
+	WT_BTREE *btree;
+	WT_REC_KV *val;
+
+	btree = S2BT(session);
+
+	val = &r->v;
+
+	/*
+	 * Occasionally, salvage needs to discard records from the beginning or
+	 * end of the page, and because the items may be part of a RLE cell, do
+	 * the adjustments here. It's not a mistake we don't bother telling
+	 * our caller we've handled all the records from the page we care about,
+	 * and can quit processing the page: salvage is a rare operation and I
+	 * don't want to complicate our caller's loop.
+	 */
+	if (salvage != NULL) {
+		if (salvage->done)
+			return (0);
+		if (salvage->skip != 0) {
+			if (rle <= salvage->skip) {
+				salvage->skip -= rle;
+				return (0);
+			}
+			rle -= salvage->skip;
+			salvage->skip = 0;
+		}
+		if (salvage->take != 0) {
+			if (rle <= salvage->take)
+				salvage->take -= rle;
+			else {
+				rle = salvage->take;
+				salvage->take = 0;
+			}
+			if (salvage->take == 0)
+				salvage->done = true;
+		}
+	}
+
+	if (deleted) {
+		val->cell_len = __wt_cell_pack_del(&val->cell, rle);
+		val->buf.data = NULL;
+		val->buf.size = 0;
+		val->len = val->cell_len;
+	} else if (overflow_type) {
+		val->cell_len = __wt_cell_pack_ovfl(
+		    &val->cell, overflow_type, rle, value->size);
+		val->buf.data = value->data;
+		val->buf.size = value->size;
+		val->len = val->cell_len + value->size;
+	} else
+		WT_RET(__wt_rec_cell_build_val(
+		    session, r, value->data, value->size, rle));
+
+	/* Boundary: split or write the page. */
+	if (__wt_rec_need_split(r, val->len))
+		WT_RET(__wt_rec_split_crossing_bnd(session, r, val->len));
+
+	/* Copy the value onto the page. */
+	if (!deleted && !overflow_type && btree->dictionary)
+		WT_RET(__wt_rec_dict_replace(session, r, rle, val));
+	__wt_rec_copy_incr(session, r, val);
+
+	/* Update the starting record number in case we split. */
+	r->recno += rle;
+
+	return (0);
+}
+
+/*
+ * __wt_rec_col_var --
+ *	Reconcile a variable-width column-store leaf page.
+ */
+int
+__wt_rec_col_var(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
+{
+	enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
+	WT_BTREE *btree;
+	WT_CELL *cell;
+	WT_CELL_UNPACK *vpack, _vpack;
+	WT_COL *cip;
+	WT_CURSOR_BTREE *cbt;
+	WT_DECL_ITEM(orig);
+	WT_DECL_RET;
+	WT_INSERT *ins;
+	WT_ITEM *last;
+	WT_PAGE *page;
+	WT_UPDATE *upd;
+	uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
+	uint32_t i, size;
+	bool deleted, last_deleted, orig_deleted, update_no_copy;
+	const void *data;
+
+	btree = S2BT(session);
+	page = pageref->page;
+	last = r->last;
+	vpack = &_vpack;
+	cbt = &r->update_modify_cbt;
+
+	WT_RET(__wt_rec_split_init(session,
+	    r, page, pageref->ref_recno, btree->maxleafpage_precomp));
+
+	WT_RET(__wt_scr_alloc(session, 0, &orig));
+	data = NULL;
+	size = 0;
+	upd = NULL;
+
+	/*
+	 * The salvage code may be calling us to reconcile a page where there
+	 * were missing records in the column-store name space.  If taking the
+	 * first record from on the page, it might be a deleted record, so we
+	 * have to give the RLE code a chance to figure that out.  Else, if
+	 * not taking the first record from the page, write a single element
+	 * representing the missing records onto a new page.  (Don't pass the
+	 * salvage cookie to our helper function in this case, we're handling
+	 * one of the salvage cookie fields on our own, and we don't need the
+	 * helper function's assistance.)
+	 */
+	rle = 0;
+	last_deleted = false;
+	if (salvage != NULL && salvage->missing != 0) {
+		if (salvage->skip == 0) {
+			rle = salvage->missing;
+			last_deleted = true;
+
+			/*
+			 * Correct the number of records we're going to "take",
+			 * pretending the missing records were on the page.
+			 */
+			salvage->take += salvage->missing;
+		} else
+			WT_ERR(__rec_col_var_helper(session,
+			    r, NULL, NULL, true, false, salvage->missing));
+	}
+
+	/*
+	 * We track two data items through this loop: the previous (last) item
+	 * and the current item: if the last item is the same as the current
+	 * item, we increment the RLE count for the last item; if the last item
+	 * is different from the current item, we write the last item onto the
+	 * page, and replace it with the current item.  The r->recno counter
+	 * tracks records written to the page, and is incremented by the helper
+	 * function immediately after writing records to the page.  The record
+	 * number of our source record, that is, the current item, is maintained
+	 * in src_recno.
+	 */
+	src_recno = r->recno + rle;
+
+	/* For each entry in the in-memory page... */
+	WT_COL_FOREACH(page, cip, i) {
+		ovfl_state = OVFL_IGNORE;
+		if ((cell = WT_COL_PTR(page, cip)) == NULL) {
+			nrepeat = 1;
+			ins = NULL;
+			orig_deleted = true;
+		} else {
+			__wt_cell_unpack(cell, vpack);
+			nrepeat = __wt_cell_rle(vpack);
+			ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
+
+			/*
+			 * If the original value is "deleted", there's no value
+			 * to compare, we're done.
+			 */
+			orig_deleted = vpack->type == WT_CELL_DEL;
+			if (orig_deleted)
+				goto record_loop;
+
+			/*
+			 * Overflow items are tricky: we don't know until we're
+			 * finished processing the set of values if we need the
+			 * overflow value or not.  If we don't use the overflow
+			 * item at all, we have to discard it from the backing
+			 * file, otherwise we'll leak blocks on the checkpoint.
+			 * That's safe because if the backing overflow value is
+			 * still needed by any running transaction, we'll cache
+			 * a copy in the update list.
+			 *
+			 * Regardless, we avoid copying in overflow records: if
+			 * there's a WT_INSERT entry that modifies a reference
+			 * counted overflow record, we may have to write copies
+			 * of the overflow record, and in that case we'll do the
+			 * comparisons, but we don't read overflow items just to
+			 * see if they match records on either side.
+			 */
+			if (vpack->ovfl) {
+				ovfl_state = OVFL_UNUSED;
+				goto record_loop;
+			}
+
+			/*
+			 * If data is Huffman encoded, we have to decode it in
+			 * order to compare it with the last item we saw, which
+			 * may have been an update string.  This guarantees we
+			 * find every single pair of objects we can RLE encode,
+			 * including applications updating an existing record
+			 * where the new value happens (?) to match a Huffman-
+			 * encoded value in a previous or next record.
+			 */
+			WT_ERR(__wt_dsk_cell_data_ref(
+			    session, WT_PAGE_COL_VAR, vpack, orig));
+		}
+
+record_loop:	/*
+		 * Generate on-page entries: loop repeat records, looking for
+		 * WT_INSERT entries matching the record number.  The WT_INSERT
+		 * lists are in sorted order, so only need check the next one.
+		 */
+		for (n = 0;
+		    n < nrepeat; n += repeat_count, src_recno += repeat_count) {
+			upd = NULL;
+			if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
+				WT_ERR(__wt_rec_txn_read(
+				    session, r, ins, cip, vpack, NULL, &upd));
+				ins = WT_SKIP_NEXT(ins);
+			}
+
+			update_no_copy = true;	/* No data copy */
+			repeat_count = 1;	/* Single record */
+			deleted = false;
+
+			if (upd != NULL) {
+				switch (upd->type) {
+				case WT_UPDATE_MODIFY:
+					cbt->slot = WT_COL_SLOT(page, cip);
+					WT_ERR(__wt_value_return_upd(
+					    session, cbt, upd,
+					    F_ISSET(r, WT_REC_VISIBLE_ALL)));
+					data = cbt->iface.value.data;
+					size = (uint32_t)cbt->iface.value.size;
+					update_no_copy = false;
+					break;
+				case WT_UPDATE_STANDARD:
+					data = upd->data;
+					size = upd->size;
+					break;
+				case WT_UPDATE_TOMBSTONE:
+					deleted = true;
+					break;
+				WT_ILLEGAL_VALUE_ERR(session, upd->type);
+				}
+			} else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
+				/*
+				 * If doing an update save and restore, and the
+				 * underlying value is a removed overflow value,
+				 * we end up here.
+				 *
+				 * If necessary, when the overflow value was
+				 * originally removed, reconciliation appended
+				 * a globally visible copy of the value to the
+				 * key's update list, meaning the on-page item
+				 * isn't accessed after page re-instantiation.
+				 *
+				 * Assert the case.
+				 */
+				WT_ASSERT(session,
+				    F_ISSET(r, WT_REC_UPDATE_RESTORE));
+
+				/*
+				 * The on-page value will never be accessed,
+				 * write a placeholder record.
+				 */
+				data = "ovfl-unused";
+				size = WT_STORE_SIZE(strlen("ovfl-unused"));
+			} else {
+				update_no_copy = false;	/* Maybe data copy */
+
+				/*
+				 * The repeat count is the number of records up
+				 * to the next WT_INSERT record, or up to the
+				 * end of the entry if we have no more WT_INSERT
+				 * records.
+				 */
+				if (ins == NULL)
+					repeat_count = nrepeat - n;
+				else
+					repeat_count =
+					    WT_INSERT_RECNO(ins) - src_recno;
+
+				deleted = orig_deleted;
+				if (deleted)
+					goto compare;
+
+				/*
+				 * If we are handling overflow items, use the
+				 * overflow item itself exactly once, after
+				 * which we have to copy it into a buffer and
+				 * from then on use a complete copy because we
+				 * are re-creating a new overflow record each
+				 * time.
+				 */
+				switch (ovfl_state) {
+				case OVFL_UNUSED:
+					/*
+					 * An as-yet-unused overflow item.
+					 *
+					 * We're going to copy the on-page cell,
+					 * write out any record we're tracking.
+					 */
+					if (rle != 0) {
+						WT_ERR(__rec_col_var_helper(
+						    session, r, salvage, last,
+						    last_deleted, 0, rle));
+						rle = 0;
+					}
+
+					last->data = vpack->data;
+					last->size = vpack->size;
+					WT_ERR(__rec_col_var_helper(
+					    session, r, salvage, last, false,
+					    WT_CELL_VALUE_OVFL, repeat_count));
+
+					/* Track if page has overflow items. */
+					r->ovfl_items = true;
+
+					ovfl_state = OVFL_USED;
+					continue;
+				case OVFL_USED:
+					/*
+					 * Original is an overflow item; we used
+					 * it for a key and now we need another
+					 * copy; read it into memory.
+					 */
+					WT_ERR(__wt_dsk_cell_data_ref(session,
+					    WT_PAGE_COL_VAR, vpack, orig));
+
+					ovfl_state = OVFL_IGNORE;
+					/* FALLTHROUGH */
+				case OVFL_IGNORE:
+					/*
+					 * Original is an overflow item and we
+					 * were forced to copy it into memory,
+					 * or the original wasn't an overflow
+					 * item; use the data copied into orig.
+					 */
+					data = orig->data;
+					size = (uint32_t)orig->size;
+					break;
+				}
+			}
+
+compare:		/*
+			 * If we have a record against which to compare, and
+			 * the records compare equal, increment the rle counter
+			 * and continue.  If the records don't compare equal,
+			 * output the last record and swap the last and current
+			 * buffers: do NOT update the starting record number,
+			 * we've been doing that all along.
+			 */
+			if (rle != 0) {
+				if ((deleted && last_deleted) ||
+				    (!last_deleted && !deleted &&
+				    last->size == size &&
+				    memcmp(last->data, data, size) == 0)) {
+					rle += repeat_count;
+					continue;
+				}
+				WT_ERR(__rec_col_var_helper(session, r,
+				    salvage, last, last_deleted, 0, rle));
+			}
+
+			/*
+			 * Swap the current/last state.
+			 *
+			 * Reset RLE counter and turn on comparisons.
+			 */
+			if (!deleted) {
+				/*
+				 * We can't simply assign the data values into
+				 * the last buffer because they may have come
+				 * from a copy built from an encoded/overflow
+				 * cell and creating the next record is going
+				 * to overwrite that memory.  Check, because
+				 * encoded/overflow cells aren't that common
+				 * and we'd like to avoid the copy.  If data
+				 * was taken from the current unpack structure
+				 * (which points into the page), or was taken
+				 * from an update structure, we can just use
+				 * the pointers, they're not moving.
+				 */
+				if (data == vpack->data || update_no_copy) {
+					last->data = data;
+					last->size = size;
+				} else
+					WT_ERR(__wt_buf_set(
+					    session, last, data, size));
+			}
+			last_deleted = deleted;
+			rle = repeat_count;
+		}
+
+		/*
+		 * The first time we find an overflow record we never used,
+		 * discard the underlying blocks, they're no longer useful.
+		 */
+		if (ovfl_state == OVFL_UNUSED &&
+		    vpack->raw != WT_CELL_VALUE_OVFL_RM)
+			WT_ERR(__wt_ovfl_remove(
+			    session, page, vpack, F_ISSET(r, WT_REC_EVICT)));
+	}
+
+	/* Walk any append list. */
+	for (ins =
+	    WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
+		if (ins == NULL) {
+			/*
+			 * If the page split, instantiate any missing records in
+			 * the page's name space. (Imagine record 98 is
+			 * transactionally visible, 99 wasn't created or is not
+			 * yet visible, 100 is visible. Then the page splits and
+			 * record 100 moves to another page. When we reconcile
+			 * the original page, we write record 98, then we don't
+			 * see record 99 for whatever reason. If we've moved
+			 * record 100, we don't know to write a deleted record
+			 * 99 on the page.)
+			 *
+			 * Assert the recorded record number is past the end of
+			 * the page.
+			 *
+			 * The record number recorded during the split is the
+			 * first key on the split page, that is, one larger than
+			 * the last key on this page, we have to decrement it.
+			 */
+			if ((n = page->
+			    modify->mod_col_split_recno) == WT_RECNO_OOB)
+				break;
+			WT_ASSERT(session, n >= src_recno);
+			n -= 1;
+
+			upd = NULL;
+		} else {
+			WT_ERR(__wt_rec_txn_read(
+			    session, r, ins, NULL, NULL, NULL, &upd));
+			n = WT_INSERT_RECNO(ins);
+		}
+		while (src_recno <= n) {
+			deleted = false;
+			update_no_copy = true;
+
+			/*
+			 * The application may have inserted records which left
+			 * gaps in the name space, and these gaps can be huge.
+			 * If we're in a set of deleted records, skip the boring
+			 * part.
+			 */
+			if (src_recno < n) {
+				deleted = true;
+				if (last_deleted) {
+					/*
+					 * The record adjustment is decremented
+					 * by one so we can naturally fall into
+					 * the RLE accounting below, where we
+					 * increment rle by one, then continue
+					 * in the outer loop, where we increment
+					 * src_recno by one.
+					 */
+					skip = (n - src_recno) - 1;
+					rle += skip;
+					src_recno += skip;
+				}
+			} else if (upd == NULL)
+				deleted = true;
+			else
+				switch (upd->type) {
+				case WT_UPDATE_MODIFY:
+					/*
+					 * Impossible slot, there's no backing
+					 * on-page item.
+					 */
+					cbt->slot = UINT32_MAX;
+					WT_ERR(__wt_value_return_upd(
+					    session, cbt, upd,
+					    F_ISSET(r, WT_REC_VISIBLE_ALL)));
+					data = cbt->iface.value.data;
+					size = (uint32_t)cbt->iface.value.size;
+					update_no_copy = false;
+					break;
+				case WT_UPDATE_STANDARD:
+					data = upd->data;
+					size = upd->size;
+					break;
+				case WT_UPDATE_TOMBSTONE:
+					deleted = true;
+					break;
+				WT_ILLEGAL_VALUE_ERR(session, upd->type);
+				}
+
+			/*
+			 * Handle RLE accounting and comparisons -- see comment
+			 * above, this code fragment does the same thing.
+			 */
+			if (rle != 0) {
+				if ((deleted && last_deleted) ||
+				    (!last_deleted && !deleted &&
+				    last->size == size &&
+				    memcmp(last->data, data, size) == 0)) {
+					++rle;
+					goto next;
+				}
+				WT_ERR(__rec_col_var_helper(session, r,
+				    salvage, last, last_deleted, 0, rle));
+			}
+
+			/*
+			 * Swap the current/last state. We can't simply assign
+			 * the data values into the last buffer because they may
+			 * be a temporary copy built from a chain of modified
+			 * updates and creating the next record will overwrite
+			 * that memory. Check, we'd like to avoid the copy. If
+			 * data was taken from an update structure, we can just
+			 * use the pointers, they're not moving.
+			 */
+			if (!deleted) {
+				if (update_no_copy) {
+					last->data = data;
+					last->size = size;
+				} else
+					WT_ERR(__wt_buf_set(
+					    session, last, data, size));
+			}
+
+			/* Ready for the next loop, reset the RLE counter. */
+			last_deleted = deleted;
+			rle = 1;
+
+			/*
+			 * Move to the next record. It's not a simple increment
+			 * because if it's the maximum record, incrementing it
+			 * wraps to 0 and this turns into an infinite loop.
+			 */
+next:			if (src_recno == UINT64_MAX)
+				break;
+			++src_recno;
+		}
+
+		/*
+		 * Execute this loop once without an insert item to catch any
+		 * missing records due to a split, then quit.
+		 */
+		if (ins == NULL)
+			break;
+	}
+
+	/* If we were tracking a record, write it. */
+	if (rle != 0)
+		WT_ERR(__rec_col_var_helper(
+		    session, r, salvage, last, last_deleted, 0, rle));
+
+	/* Write the remnant page. */
+	ret = __wt_rec_split_finish(session, r);
+
+err:	__wt_scr_free(session, &orig);
+	return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
new file mode 100644
index 00000000000..11707f77620
--- /dev/null
+++ b/src/third_party/wiredtiger/src/reconcile/rec_dictionary.c
@@ -0,0 +1,200 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __rec_dictionary_skip_search --
+ *	Search a dictionary skiplist.
+ */
+static WT_REC_DICTIONARY *
+__rec_dictionary_skip_search(WT_REC_DICTIONARY **head, uint64_t hash)
+{
+	WT_REC_DICTIONARY **e;
+	int i;
+
+	/*
+	 * Start at the highest skip level, then go as far as possible at each
+	 * level before stepping down to the next.
+	 */
+	for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
+		if (*e == NULL) {		/* Empty levels */
+			--i;
+			--e;
+			continue;
+		}
+
+		/*
+		 * Return any exact matches: we don't care in what search level
+		 * we found a match.
+		 */
+		if ((*e)->hash == hash)		/* Exact match */
+			return (*e);
+		if ((*e)->hash > hash) {	/* Drop down a level */
+			--i;
+			--e;
+		} else				/* Keep going at this level */
+			e = &(*e)->next[i];
+	}
+	return (NULL);
+}
+
+/*
+ * __rec_dictionary_skip_search_stack --
+ *	Search a dictionary skiplist, returning an insert/remove stack.
+ */
+static void
+__rec_dictionary_skip_search_stack(
+    WT_REC_DICTIONARY **head, WT_REC_DICTIONARY ***stack, uint64_t hash)
+{
+	WT_REC_DICTIONARY **e;
+	int i;
+
+	/*
+	 * Start at the highest skip level, then go as far as possible at each
+	 * level before stepping down to the next.
+	 */
+	for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;)
+		if (*e == NULL || (*e)->hash > hash)
+			stack[i--] = e--;	/* Drop down a level */
+		else
+			e = &(*e)->next[i];	/* Keep going at this level */
+}
+
+/*
+ * __rec_dictionary_skip_insert --
+ *	Insert an entry into the dictionary skip-list.
+ */
+static void
+__rec_dictionary_skip_insert(
+    WT_REC_DICTIONARY **head, WT_REC_DICTIONARY *e, uint64_t hash)
+{
+	WT_REC_DICTIONARY **stack[WT_SKIP_MAXDEPTH];
+	u_int i;
+
+	/* Insert the new entry into the skiplist. */
+	__rec_dictionary_skip_search_stack(head, stack, hash);
+	for (i = 0; i < e->depth; ++i) {
+		e->next[i] = *stack[i];
+		*stack[i] = e;
+	}
+}
+
+/*
+ * __wt_rec_dictionary_init --
+ *	Allocate and initialize the dictionary.
+ */
+int
+__wt_rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots)
+{
+	u_int depth, i;
+
+	/* Free any previous dictionary. */
+	__wt_rec_dictionary_free(session, r);
+
+	r->dictionary_slots = slots;
+	WT_RET(__wt_calloc(session,
+	    r->dictionary_slots, sizeof(WT_REC_DICTIONARY *), &r->dictionary));
+	for (i = 0; i < r->dictionary_slots; ++i) {
+		depth = __wt_skip_choose_depth(session);
+		WT_RET(__wt_calloc(session, 1,
+		    sizeof(WT_REC_DICTIONARY) +
+		    depth * sizeof(WT_REC_DICTIONARY *), &r->dictionary[i]));
+		r->dictionary[i]->depth = depth;
+	}
+	return (0);
+}
+
+/*
+ * __wt_rec_dictionary_free --
+ *	Free the dictionary.
+ */
+void
+__wt_rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+{
+	u_int i;
+
+	if (r->dictionary == NULL)
+		return;
+
+	/*
+	 * We don't correct dictionary_slots when we fail during allocation,
+	 * but that's OK, the value is either NULL or a memory reference to
+	 * be free'd.
+	 */
+	for (i = 0; i < r->dictionary_slots; ++i)
+		__wt_free(session, r->dictionary[i]);
+	__wt_free(session, r->dictionary);
+}
+
+/*
+ * __wt_rec_dictionary_reset --
+ *	Reset the dictionary when reconciliation restarts and when crossing a
+ *	page boundary (a potential split).
+ */
+void
+__wt_rec_dictionary_reset(WT_RECONCILE *r)
+{
+	if (r->dictionary_slots) {
+		r->dictionary_next = 0;
+		memset(r->dictionary_head, 0, sizeof(r->dictionary_head));
+	}
+}
+
+/*
+ * __wt_rec_dictionary_lookup --
+ *	Check the dictionary for a matching value on this page.
+ */
+int
+__wt_rec_dictionary_lookup(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REC_KV *val, WT_REC_DICTIONARY **dpp)
+{
+	WT_REC_DICTIONARY *dp, *next;
+	uint64_t hash;
+	bool match;
+
+	*dpp = NULL;
+
+	/* Search the dictionary, and return any match we find. */
+	hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
+	for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
+	    dp != NULL && dp->hash == hash; dp = dp->next[0]) {
+		WT_RET(__wt_cell_pack_data_match(
+		    (WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset),
+		    &val->cell, val->buf.data, &match));
+		if (match) {
+			WT_STAT_DATA_INCR(session, rec_dictionary);
+			*dpp = dp;
+			return (0);
+		}
+	}
+
+	/*
+	 * We're not doing value replacement in the dictionary.  We stop adding
+	 * new entries if we run out of empty dictionary slots (but continue to
+	 * use the existing entries).  I can't think of any reason a leaf page
+	 * value is more likely to be seen because it was seen more recently
+	 * than some other value: if we find working sets where that's not the
+	 * case, it shouldn't be too difficult to maintain a pointer which is
+	 * the next dictionary slot to re-use.
+	 */
+	if (r->dictionary_next >= r->dictionary_slots)
+		return (0);
+
+	/*
+	 * Set the hash value, we'll add this entry into the dictionary when we
+	 * write it into the page's disk image buffer (because that's when we
+	 * know where on the page it will be written).
+	 */
+	next = r->dictionary[r->dictionary_next++];
+	next->offset = 0;		/* Not necessary, just cautious. */
+	next->hash = hash;
+	__rec_dictionary_skip_insert(r->dictionary_head, next, hash);
+	*dpp = next;
+	return (0);
+}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_row.c b/src/third_party/wiredtiger/src/reconcile/rec_row.c
new file mode 100644
index 00000000000..dc249f6a22f
--- /dev/null
+++ b/src/third_party/wiredtiger/src/reconcile/rec_row.c
@@ -0,0 +1,1025 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __rec_key_state_update --
+ *	Update prefix and suffix compression based on the last key.
+ */
+static inline void
+__rec_key_state_update(WT_RECONCILE *r, bool ovfl_key)
+{
+	WT_ITEM *a;
+
+	/*
+	 * If writing an overflow key onto the page, don't update the "last key"
+	 * value, and leave the state of prefix compression alone.  (If we are
+	 * currently doing prefix compression, we have a key state which will
+	 * continue to work, we're just skipping the key just created because
+	 * it's an overflow key and doesn't participate in prefix compression.
+	 * If we are not currently doing prefix compression, we can't start, an
+	 * overflow key doesn't give us any state.)
+	 *
+	 * Additionally, if we wrote an overflow key onto the page, turn off the
+	 * suffix compression of row-store internal node keys.  (When we split,
+	 * "last key" is the largest key on the previous page, and "cur key" is
+	 * the first key on the next page, which is being promoted.  In some
+	 * cases we can discard bytes from the "cur key" that are not needed to
+	 * distinguish between the "last key" and "cur key", compressing the
+	 * size of keys on internal nodes.  If we just built an overflow key,
+	 * we're not going to update the "last key", making suffix compression
+	 * impossible for the next key. Alternatively, we could remember where
+	 * the last key was on the page, detect it's an overflow key, read it
+	 * from disk and do suffix compression, but that's too much work for an
+	 * unlikely event.)
+	 *
+	 * If we're not writing an overflow key on the page, update the last-key
+	 * value and turn on both prefix and suffix compression.
+	 */
+	if (ovfl_key)
+		r->key_sfx_compress = false;
+	else {
+		a = r->cur;
+		r->cur = r->last;
+		r->last = a;
+
+		r->key_pfx_compress = r->key_pfx_compress_conf;
+		r->key_sfx_compress = r->key_sfx_compress_conf;
+	}
+}
+
+/*
+ * __rec_cell_build_int_key --
+ *	Process a key and return a WT_CELL structure and byte string to be
+ *	stored on a row-store internal page.
+ */
+static int
+__rec_cell_build_int_key(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
+{
+	WT_BTREE *btree;
+	WT_REC_KV *key;
+
+	*is_ovflp = false;
+
+	btree = S2BT(session);
+
+	key = &r->k;
+
+	/* Copy the bytes into the "current" and key buffers. */
+	WT_RET(__wt_buf_set(session, r->cur, data, size));
+	WT_RET(__wt_buf_set(session, &key->buf, data, size));
+
+	/* Create an overflow object if the data won't fit. */
+	if (size > btree->maxintlkey) {
+		WT_STAT_DATA_INCR(session, rec_overflow_key_internal);
+
+		*is_ovflp = true;
+		return (__wt_rec_cell_build_ovfl(
+		    session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0));
+	}
+
+	key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
+	key->len = key->cell_len + key->buf.size;
+
+	return (0);
+}
+
+/*
+ * __rec_cell_build_leaf_key --
+ *	Process a key and return a WT_CELL structure and byte string to be
+ *	stored on a row-store leaf page.
+ */
+static int
+__rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
+{
+	WT_BTREE *btree;
+	WT_REC_KV *key;
+	size_t pfx_max;
+	const uint8_t *a, *b;
+	uint8_t pfx;
+
+	*is_ovflp = false;
+
+	btree = S2BT(session);
+
+	key = &r->k;
+
+	pfx = 0;
+	if (data == NULL)
+		/*
+		 * When data is NULL, our caller has a prefix compressed key
+		 * they can't use (probably because they just crossed a split
+		 * point).  Use the full key saved when last called, instead.
+		 */
+		WT_RET(__wt_buf_set(
+		    session, &key->buf, r->cur->data, r->cur->size));
+	else {
+		/*
+		 * Save a copy of the key for later reference: we use the full
+		 * key for prefix-compression comparisons, and if we are, for
+		 * any reason, unable to use the compressed key we generate.
+		 */
+		WT_RET(__wt_buf_set(session, r->cur, data, size));
+
+		/*
+		 * Do prefix compression on the key.  We know by definition the
+		 * previous key sorts before the current key, which means the
+		 * keys must differ and we just need to compare up to the
+		 * shorter of the two keys.
+		 */
+		if (r->key_pfx_compress) {
+			/*
+			 * We can't compress out more than 256 bytes, limit the
+			 * comparison to that.
+			 */
+			pfx_max = UINT8_MAX;
+			if (size < pfx_max)
+				pfx_max = size;
+			if (r->last->size < pfx_max)
+				pfx_max = r->last->size;
+			for (a = data, b = r->last->data; pfx < pfx_max; ++pfx)
+				if (*a++ != *b++)
+					break;
+
+			/*
+			 * Prefix compression may cost us CPU and memory when
+			 * the page is re-loaded, don't do it unless there's
+			 * reasonable gain.
+			 */
+			if (pfx < btree->prefix_compression_min)
+				pfx = 0;
+			else
+				WT_STAT_DATA_INCRV(
+				    session, rec_prefix_compression, pfx);
+		}
+
+		/* Copy the non-prefix bytes into the key buffer. */
+		WT_RET(__wt_buf_set(
+		    session, &key->buf, (uint8_t *)data + pfx, size - pfx));
+	}
+
+	/* Optionally compress the key using the Huffman engine. */
+	if (btree->huffman_key != NULL)
+		WT_RET(__wt_huffman_encode(session, btree->huffman_key,
+		    key->buf.data, (uint32_t)key->buf.size, &key->buf));
+
+	/* Create an overflow object if the data won't fit. */
+	if (key->buf.size > btree->maxleafkey) {
+		/*
+		 * Overflow objects aren't prefix compressed -- rebuild any
+		 * object that was prefix compressed.
+		 */
+		if (pfx == 0) {
+			WT_STAT_DATA_INCR(session, rec_overflow_key_leaf);
+
+			*is_ovflp = true;
+			return (__wt_rec_cell_build_ovfl(
+			    session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0));
+		}
+		return (
+		    __rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
+	}
+
+	key->cell_len = __wt_cell_pack_leaf_key(&key->cell, pfx, key->buf.size);
+	key->len = key->cell_len + key->buf.size;
+
+	return (0);
+}
+
+/*
+ * __wt_bulk_insert_row --
+ *	Row-store bulk insert.
+ */
+int
+__wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
+{
+	WT_BTREE *btree;
+	WT_CURSOR *cursor;
+	WT_RECONCILE *r;
+	WT_REC_KV *key, *val;
+	bool ovfl_key;
+
+	r = cbulk->reconcile;
+	btree = S2BT(session);
+	cursor = &cbulk->cbt.iface;
+
+	key = &r->k;
+	val = &r->v;
+	WT_RET(__rec_cell_build_leaf_key(session, r,	/* Build key cell */
+	    cursor->key.data, cursor->key.size, &ovfl_key));
+	WT_RET(__wt_rec_cell_build_val(session, r,	/* Build value cell */
+	    cursor->value.data, cursor->value.size, (uint64_t)0));
+
+	/* Boundary: split or write the page. */
+	if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
+		/*
+		 * Turn off prefix compression until a full key written to the
+		 * new page, and (unless already working with an overflow key),
+		 * rebuild the key without compression.
+		 */
+		if (r->key_pfx_compress_conf) {
+			r->key_pfx_compress = false;
+			if (!ovfl_key)
+				WT_RET(__rec_cell_build_leaf_key(
+				    session, r, NULL, 0, &ovfl_key));
+		}
+		WT_RET(__wt_rec_split_crossing_bnd(
+		    session, r, key->len + val->len));
+	}
+
+	/* Copy the key/value pair onto the page. */
+	__wt_rec_copy_incr(session, r, key);
+	if (val->len == 0)
+		r->any_empty_value = true;
+	else {
+		r->all_empty_value = false;
+		if (btree->dictionary)
+			WT_RET(__wt_rec_dict_replace(session, r, 0, val));
+		__wt_rec_copy_incr(session, r, val);
+	}
+
+	/* Update compression state. */
+	__rec_key_state_update(r, ovfl_key);
+
+	return (0);
+}
+
+/*
+ * __rec_row_merge --
+ *	Merge in a split page.
+ */
+static int
+__rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+{
+	WT_ADDR *addr;
+	WT_MULTI *multi;
+	WT_PAGE_MODIFY *mod;
+	WT_REC_KV *key, *val;
+	uint32_t i;
+	bool ovfl_key;
+
+	mod = page->modify;
+
+	key = &r->k;
+	val = &r->v;
+
+	/* For each entry in the split array... */
+	for (multi = mod->mod_multi,
+	    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+		/* Build the key and value cells. */
+		WT_RET(__rec_cell_build_int_key(session, r,
+		    WT_IKEY_DATA(multi->key.ikey),
+		    r->cell_zero ? 1 : multi->key.ikey->size, &ovfl_key));
+		r->cell_zero = false;
+
+		addr = &multi->addr;
+		__wt_rec_cell_build_addr(session, r,
+		    addr->addr, addr->size, __wt_rec_vtype(addr), WT_RECNO_OOB);
+
+		/* Boundary: split or write the page. */
+		if (__wt_rec_need_split(r, key->len + val->len))
+			WT_RET(__wt_rec_split_crossing_bnd(
+			    session, r, key->len + val->len));
+
+		/* Copy the key and value onto the page. */
+		__wt_rec_copy_incr(session, r, key);
+		__wt_rec_copy_incr(session, r, val);
+
+		/* Update compression state. */
+		__rec_key_state_update(r, ovfl_key);
+	}
+	return (0);
+}
+
+/*
+ * __wt_rec_row_int --
+ *	Reconcile a row-store internal page.
+ */
+int
+__wt_rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
+{
+	WT_ADDR *addr;
+	WT_BTREE *btree;
+	WT_CELL *cell;
+	WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+	WT_CHILD_STATE state;
+	WT_DECL_RET;
+	WT_IKEY *ikey;
+	WT_PAGE *child;
+	WT_REC_KV *key, *val;
+	WT_REF *ref;
+	size_t size;
+	u_int vtype;
+	bool hazard, key_onpage_ovfl, ovfl_key;
+	const void *p;
+
+	btree = S2BT(session);
+	child = NULL;
+	hazard = false;
+
+	key = &r->k;
+	kpack = &_kpack;
+	WT_CLEAR(*kpack);	/* -Wuninitialized */
+	val = &r->v;
+	vpack = &_vpack;
+	WT_CLEAR(*vpack);	/* -Wuninitialized */
+
+	ikey = NULL;		/* -Wuninitialized */
+	cell = NULL;
+	key_onpage_ovfl = false;
+
+	WT_RET(__wt_rec_split_init(
+	    session, r, page, 0, btree->maxintlpage_precomp));
+
+	/*
+	 * Ideally, we'd never store the 0th key on row-store internal pages
+	 * because it's never used during tree search and there's no reason
+	 * to waste the space.  The problem is how we do splits: when we split,
+	 * we've potentially picked out several "split points" in the buffer
+	 * which is overflowing the maximum page size, and when the overflow
+	 * happens, we go back and physically split the buffer, at those split
+	 * points, into new pages.  It would be both difficult and expensive
+	 * to re-process the 0th key at each split point to be an empty key,
+	 * so we don't do that.  However, we are reconciling an internal page
+	 * for whatever reason, and the 0th key is known to be useless.  We
+	 * truncate the key to a single byte, instead of removing it entirely,
+	 * it simplifies various things in other parts of the code (we don't
+	 * have to special case transforming the page from its disk image to
+	 * its in-memory version, for example).
+	 */
+	r->cell_zero = true;
+
+	/* For each entry in the in-memory page... */
+	WT_INTL_FOREACH_BEGIN(session, page, ref) {
+		/*
+		 * There are different paths if the key is an overflow item vs.
+		 * a straight-forward on-page value. If an overflow item, we
+		 * would have instantiated it, and we can use that fact to set
+		 * things up.
+		 *
+		 * Note the cell reference and unpacked key cell are available
+		 * only in the case of an instantiated, off-page key, we don't
+		 * bother setting them if that's not possible.
+		 */
+		if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) {
+			cell = NULL;
+			key_onpage_ovfl = false;
+			ikey = __wt_ref_key_instantiated(ref);
+			if (ikey != NULL && ikey->cell_offset != 0) {
+				cell =
+				    WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
+				__wt_cell_unpack(cell, kpack);
+				key_onpage_ovfl = kpack->ovfl &&
+				    kpack->raw != WT_CELL_KEY_OVFL_RM;
+			}
+		}
+
+		WT_ERR(__wt_rec_child_modify(session, r, ref, &hazard, &state));
+		addr = ref->addr;
+		child = ref->page;
+
+		switch (state) {
+		case WT_CHILD_IGNORE:
+			/*
+			 * Ignored child.
+			 *
+			 * Overflow keys referencing pages we're not writing are
+			 * no longer useful, schedule them for discard.  Don't
+			 * worry about instantiation, internal page keys are
+			 * always instantiated.  Don't worry about reuse,
+			 * reusing this key in this reconciliation is unlikely.
+			 */
+			if (key_onpage_ovfl)
+				WT_ERR(__wt_ovfl_discard_add(
+				    session, page, kpack->cell));
+			WT_CHILD_RELEASE_ERR(session, hazard, ref);
+			continue;
+
+		case WT_CHILD_MODIFIED:
+			/*
+			 * Modified child.  Empty pages are merged into the
+			 * parent and discarded.
+			 */
+			switch (child->modify->rec_result) {
+			case WT_PM_REC_EMPTY:
+				/*
+				 * Overflow keys referencing empty pages are no
+				 * longer useful, schedule them for discard.
+				 * Don't worry about instantiation, internal
+				 * page keys are always instantiated.  Don't
+				 * worry about reuse, reusing this key in this
+				 * reconciliation is unlikely.
+				 */
+				if (key_onpage_ovfl)
+					WT_ERR(__wt_ovfl_discard_add(
+					    session, page, kpack->cell));
+				WT_CHILD_RELEASE_ERR(session, hazard, ref);
+				continue;
+			case WT_PM_REC_MULTIBLOCK:
+				/*
+				 * Overflow keys referencing split pages are no
+				 * longer useful (the split page's key is the
+				 * interesting key); schedule them for discard.
+				 * Don't worry about instantiation, internal
+				 * page keys are always instantiated.  Don't
+				 * worry about reuse, reusing this key in this
+				 * reconciliation is unlikely.
+				 */
+				if (key_onpage_ovfl)
+					WT_ERR(__wt_ovfl_discard_add(
+					    session, page, kpack->cell));
+
+				WT_ERR(__rec_row_merge(session, r, child));
+				WT_CHILD_RELEASE_ERR(session, hazard, ref);
+				continue;
+			case WT_PM_REC_REPLACE:
+				/*
+				 * If the page is replaced, the page's modify
+				 * structure has the page's address.
+				 */
+				addr = &child->modify->mod_replace;
+				break;
+			WT_ILLEGAL_VALUE_ERR(
+			    session, child->modify->rec_result);
+			}
+			break;
+		case WT_CHILD_ORIGINAL:
+			/* Original child. */
+			break;
+		case WT_CHILD_PROXY:
+			/* Deleted child where we write a proxy cell. */
+			break;
+		}
+
+		/*
+		 * Build the value cell, the child page's address.  Addr points
+		 * to an on-page cell or an off-page WT_ADDR structure. There's
+		 * a special cell type in the case of page deletion requiring
+		 * a proxy cell, otherwise use the information from the addr or
+		 * original cell.
+		 */
+		if (__wt_off_page(page, addr)) {
+			p = addr->addr;
+			size = addr->size;
+			vtype = state == WT_CHILD_PROXY ?
+			    WT_CELL_ADDR_DEL : __wt_rec_vtype(addr);
+		} else {
+			__wt_cell_unpack(ref->addr, vpack);
+			p = vpack->data;
+			size = vpack->size;
+			vtype = state == WT_CHILD_PROXY ?
+			    WT_CELL_ADDR_DEL : (u_int)vpack->raw;
+		}
+		__wt_rec_cell_build_addr(
+		    session, r, p, size, vtype, WT_RECNO_OOB);
+		WT_CHILD_RELEASE_ERR(session, hazard, ref);
+
+		/*
+		 * Build key cell.
+		 * Truncate any 0th key, internal pages don't need 0th keys.
+		 */
+		if (key_onpage_ovfl) {
+			key->buf.data = cell;
+			key->buf.size = __wt_cell_total_len(kpack);
+			key->cell_len = 0;
+			key->len = key->buf.size;
+			ovfl_key = true;
+		} else {
+			__wt_ref_key(page, ref, &p, &size);
+			WT_ERR(__rec_cell_build_int_key(
+			    session, r, p, r->cell_zero ? 1 : size, &ovfl_key));
+		}
+		r->cell_zero = false;
+
+		/* Boundary: split or write the page. */
+		if (__wt_rec_need_split(r, key->len + val->len)) {
+			/*
+			 * In one path above, we copied address blocks from the
+			 * page rather than building the actual key. In that
+			 * case, we have to build the key now because we are
+			 * about to promote it.
+			 */
+			if (key_onpage_ovfl) {
+				WT_ERR(__wt_buf_set(session, r->cur,
+				    WT_IKEY_DATA(ikey), ikey->size));
+				key_onpage_ovfl = false;
+			}
+
+			WT_ERR(__wt_rec_split_crossing_bnd(
+			    session, r, key->len + val->len));
+		}
+
+		/* Copy the key and value onto the page. */
+		__wt_rec_copy_incr(session, r, key);
+		__wt_rec_copy_incr(session, r, val);
+
+		/* Update compression state. */
+		__rec_key_state_update(r, ovfl_key);
+	} WT_INTL_FOREACH_END;
+
+	/* Write the remnant page. */
+	return (__wt_rec_split_finish(session, r));
+
+err:	WT_CHILD_RELEASE(session, hazard, ref);
+	return (ret);
+}
+
+/*
+ * __rec_row_leaf_insert --
+ *	Walk an insert chain, writing K/V pairs.
+ */
+static int
+__rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
+{
+	WT_BTREE *btree;
+	WT_CURSOR_BTREE *cbt;
+	WT_REC_KV *key, *val;
+	WT_UPDATE *upd;
+	bool ovfl_key, upd_saved;
+
+	btree = S2BT(session);
+	cbt = &r->update_modify_cbt;
+
+	key = &r->k;
+	val = &r->v;
+
+	for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
+		WT_RET(__wt_rec_txn_read(
+		    session, r, ins, NULL, NULL, &upd_saved, &upd));
+
+		if (upd == NULL) {
+			/*
+			 * If no update is visible but some were saved, check
+			 * for splits.
+			 */
+			if (!upd_saved)
+				continue;
+			if (!__wt_rec_need_split(r, WT_INSERT_KEY_SIZE(ins)))
+				continue;
+
+			/* Copy the current key into place and then split. */
+			WT_RET(__wt_buf_set(session, r->cur,
+			    WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
+			WT_RET(__wt_rec_split_crossing_bnd(
+			    session, r, WT_INSERT_KEY_SIZE(ins)));
+
+			/*
+			 * Turn off prefix and suffix compression until a full
+			 * key is written into the new page.
+			 */
+			r->key_pfx_compress = r->key_sfx_compress = false;
+			continue;
+		}
+
+		switch (upd->type) {
+		case WT_UPDATE_MODIFY:
+			/*
+			 * Impossible slot, there's no backing on-page
+			 * item.
+			 */
+			cbt->slot = UINT32_MAX;
+			WT_RET(__wt_value_return_upd(
+			    session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
+			WT_RET(__wt_rec_cell_build_val(session, r,
+			    cbt->iface.value.data,
+			    cbt->iface.value.size, (uint64_t)0));
+			break;
+		case WT_UPDATE_STANDARD:
+			if (upd->size == 0)
+				val->len = 0;
+			else
+				WT_RET(__wt_rec_cell_build_val(session,
+				    r, upd->data, upd->size,
+				    (uint64_t)0));
+			break;
+		case WT_UPDATE_TOMBSTONE:
+			continue;
+		WT_ILLEGAL_VALUE(session, upd->type);
+		}
+
+		/* Build key cell. */
+		WT_RET(__rec_cell_build_leaf_key(session, r,
+		    WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
+
+		/* Boundary: split or write the page. */
+		if (__wt_rec_need_split(r, key->len + val->len)) {
+			/*
+			 * Turn off prefix compression until a full key written
+			 * to the new page, and (unless already working with an
+			 * overflow key), rebuild the key without compression.
+			 */
+			if (r->key_pfx_compress_conf) {
+				r->key_pfx_compress = false;
+				if (!ovfl_key)
+					WT_RET(__rec_cell_build_leaf_key(
+					    session, r, NULL, 0, &ovfl_key));
+			}
+
+			WT_RET(__wt_rec_split_crossing_bnd(
+			    session, r, key->len + val->len));
+		}
+
+		/* Copy the key/value pair onto the page. */
+		__wt_rec_copy_incr(session, r, key);
+		if (val->len == 0)
+			r->any_empty_value = true;
+		else {
+			r->all_empty_value = false;
+			if (btree->dictionary)
+				WT_RET(__wt_rec_dict_replace(
+				    session, r, 0, val));
+			__wt_rec_copy_incr(session, r, val);
+		}
+
+		/* Update compression state. */
+		__rec_key_state_update(r, ovfl_key);
+	}
+
+	return (0);
+}
+
+/*
+ * __wt_rec_row_leaf --
+ *	Reconcile a row-store leaf page.
+ */
+int
+__wt_rec_row_leaf(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
+{
+	WT_BTREE *btree;
+	WT_CELL *cell;
+	WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
+	WT_CURSOR_BTREE *cbt;
+	WT_DECL_ITEM(tmpkey);
+	WT_DECL_ITEM(tmpval);
+	WT_DECL_RET;
+	WT_IKEY *ikey;
+	WT_INSERT *ins;
+	WT_REC_KV *key, *val;
+	WT_ROW *rip;
+	WT_UPDATE *upd;
+	size_t size;
+	uint64_t slvg_skip;
+	uint32_t i;
+	bool dictionary, key_onpage_ovfl, ovfl_key;
+	void *copy;
+	const void *p;
+
+	btree = S2BT(session);
+	cbt = &r->update_modify_cbt;
+	slvg_skip = salvage == NULL ? 0 : salvage->skip;
+
+	key = &r->k;
+	val = &r->v;
+	vpack = &_vpack;
+
+	WT_RET(__wt_rec_split_init(
+	    session, r, page, 0, btree->maxleafpage_precomp));
+
+	/*
+	 * Write any K/V pairs inserted into the page before the first from-disk
+	 * key on the page.
+	 */
+	if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL)
+		WT_RET(__rec_row_leaf_insert(session, r, ins));
+
+	/*
+	 * Temporary buffers in which to instantiate any uninstantiated keys
+	 * or value items we need.
+	 */
+	WT_ERR(__wt_scr_alloc(session, 0, &tmpkey));
+	WT_ERR(__wt_scr_alloc(session, 0, &tmpval));
+
+	/* For each entry in the page... */
+	WT_ROW_FOREACH(page, rip, i) {
+		/*
+		 * The salvage code, on some rare occasions, wants to reconcile
+		 * a page but skip some leading records on the page.  Because
+		 * the row-store leaf reconciliation function copies keys from
+		 * the original disk page, this is non-trivial -- just changing
+		 * the in-memory pointers isn't sufficient, we have to change
+		 * the WT_CELL structures on the disk page, too.  It's ugly, but
+		 * we pass in a value that tells us how many records to skip in
+		 * this case.
+		 */
+		if (slvg_skip != 0) {
+			--slvg_skip;
+			continue;
+		}
+
+		/*
+		 * Figure out the key: set any cell reference (and unpack it),
+		 * set any instantiated key reference.
+		 */
+		copy = WT_ROW_KEY_COPY(rip);
+		(void)__wt_row_leaf_key_info(
+		    page, copy, &ikey, &cell, NULL, NULL);
+		if (cell == NULL)
+			kpack = NULL;
+		else {
+			kpack = &_kpack;
+			__wt_cell_unpack(cell, kpack);
+		}
+
+		/* Unpack the on-page value cell, and look for an update. */
+		__wt_row_leaf_value_cell(page, rip, NULL, vpack);
+		WT_ERR(__wt_rec_txn_read(
+		    session, r, NULL, rip, vpack, NULL, &upd));
+
+		/* Build value cell. */
+		dictionary = false;
+		if (upd == NULL) {
+			/*
+			 * When the page was read into memory, there may not
+			 * have been a value item.
+			 *
+			 * If there was a value item, check if it's a dictionary
+			 * cell (a copy of another item on the page).  If it's a
+			 * copy, we have to create a new value item as the old
+			 * item might have been discarded from the page.
+			 */
+			if (vpack->raw == WT_CELL_VALUE_COPY) {
+				/* If the item is Huffman encoded, decode it. */
+				if (btree->huffman_value == NULL) {
+					p = vpack->data;
+					size = vpack->size;
+				} else {
+					WT_ERR(__wt_huffman_decode(session,
+					    btree->huffman_value,
+					    vpack->data, vpack->size,
+					    tmpval));
+					p = tmpval->data;
+					size = tmpval->size;
+				}
+				WT_ERR(__wt_rec_cell_build_val(
+				    session, r, p, size, (uint64_t)0));
+				dictionary = true;
+			} else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
+				/*
+				 * If doing an update save and restore, and the
+				 * underlying value is a removed overflow value,
+				 * we end up here.
+				 *
+				 * If necessary, when the overflow value was
+				 * originally removed, reconciliation appended
+				 * a globally visible copy of the value to the
+				 * key's update list, meaning the on-page item
+				 * isn't accessed after page re-instantiation.
+				 *
+				 * Assert the case.
+				 */
+				WT_ASSERT(session,
+				    F_ISSET(r, WT_REC_UPDATE_RESTORE));
+
+				/*
+				 * If the key is also a removed overflow item,
+				 * don't write anything at all.
+				 *
+				 * We don't have to write anything because the
+				 * code re-instantiating the page gets the key
+				 * to match the saved list of updates from the
+				 * original page.  By not putting the key on
+				 * the page, we'll move the key/value set from
+				 * a row-store leaf page slot to an insert list,
+				 * but that shouldn't matter.
+				 *
+				 * The reason we bother with the test is because
+				 * overflows are expensive to write.  It's hard
+				 * to imagine a real workload where this test is
+				 * worth the effort, but it's a simple test.
+				 */
+				if (kpack != NULL &&
+				    kpack->raw == WT_CELL_KEY_OVFL_RM)
+					goto leaf_insert;
+
+				/*
+				 * The on-page value will never be accessed,
+				 * write a placeholder record.
+				 */
+				WT_ERR(__wt_rec_cell_build_val(session, r,
+				    "ovfl-unused", strlen("ovfl-unused"),
+				    (uint64_t)0));
+			} else {
+				val->buf.data = vpack->cell;
+				val->buf.size = __wt_cell_total_len(vpack);
+				val->cell_len = 0;
+				val->len = val->buf.size;
+
+				/* Track if page has overflow items. */
+				if (vpack->ovfl)
+					r->ovfl_items = true;
+			}
+		} else {
+			/*
+			 * The first time we find an overflow record we're not
+			 * going to use, discard the underlying blocks.
+			 */
+			if (vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
+				WT_ERR(__wt_ovfl_remove(session,
+				    page, vpack, F_ISSET(r, WT_REC_EVICT)));
+
+			switch (upd->type) {
+			case WT_UPDATE_MODIFY:
+				cbt->slot = WT_ROW_SLOT(page, rip);
+				WT_ERR(__wt_value_return_upd(session, cbt, upd,
+				    F_ISSET(r, WT_REC_VISIBLE_ALL)));
+				WT_ERR(__wt_rec_cell_build_val(session, r,
+				    cbt->iface.value.data,
+				    cbt->iface.value.size, (uint64_t)0));
+				dictionary = true;
+				break;
+			case WT_UPDATE_STANDARD:
+				/*
+				 * If no value, nothing needs to be copied.
+				 * Otherwise, build the value's chunk from the
+				 * update value.
+				 */
+				if (upd->size == 0) {
+					val->buf.data = NULL;
+					val->cell_len =
+					    val->len = val->buf.size = 0;
+				} else {
+					WT_ERR(__wt_rec_cell_build_val(
+					    session, r,
+					    upd->data, upd->size, (uint64_t)0));
+					dictionary = true;
+				}
+				break;
+			case WT_UPDATE_TOMBSTONE:
+				/*
+				 * If this key/value pair was deleted, we're
+				 * done.
+				 *
+				 * Overflow keys referencing discarded values
+				 * are no longer useful, discard the backing
+				 * blocks.  Don't worry about reuse, reusing
+				 * keys from a row-store page reconciliation
+				 * seems unlikely enough to ignore.
+				 */
+				if (kpack != NULL && kpack->ovfl &&
+				    kpack->raw != WT_CELL_KEY_OVFL_RM) {
+					/*
+					 * Keys are part of the name-space, we
+					 * can't remove them from the in-memory
+					 * tree; if an overflow key was deleted
+					 * without being instantiated (for
+					 * example, cursor-based truncation), do
+					 * it now.
+					 */
+					if (ikey == NULL)
+						WT_ERR(__wt_row_leaf_key(
+						    session,
+						    page, rip, tmpkey, true));
+
+					WT_ERR(__wt_ovfl_discard_add(
+					    session, page, kpack->cell));
+				}
+
+				/*
+				 * We aren't actually creating the key so we
+				 * can't use bytes from this key to provide
+				 * prefix information for a subsequent key.
+				 */
+				tmpkey->size = 0;
+
+				/* Proceed with appended key/value pairs. */
+				goto leaf_insert;
+			WT_ILLEGAL_VALUE_ERR(session, upd->type);
+			}
+		}
+
+		/*
+		 * Build key cell.
+		 *
+		 * If the key is an overflow key that hasn't been removed, use
+		 * the original backing blocks.
+		 */
+		key_onpage_ovfl = kpack != NULL &&
+		    kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
+		if (key_onpage_ovfl) {
+			key->buf.data = cell;
+			key->buf.size = __wt_cell_total_len(kpack);
+			key->cell_len = 0;
+			key->len = key->buf.size;
+			ovfl_key = true;
+
+			/*
+			 * We aren't creating a key so we can't use this key as
+			 * a prefix for a subsequent key.
+			 */
+			tmpkey->size = 0;
+
+			/* Track if page has overflow items. */
+			r->ovfl_items = true;
+		} else {
+			/*
+			 * Get the key from the page or an instantiated key, or
+			 * inline building the key from a previous key (it's a
+			 * fast path for simple, prefix-compressed keys), or by
+			 * by building the key from scratch.
+			 */
+			if (__wt_row_leaf_key_info(page, copy,
+			    NULL, &cell, &tmpkey->data, &tmpkey->size))
+				goto build;
+
+			kpack = &_kpack;
+			__wt_cell_unpack(cell, kpack);
+			if (btree->huffman_key == NULL &&
+			    kpack->type == WT_CELL_KEY &&
+			    tmpkey->size >= kpack->prefix) {
+				/*
+				 * The previous clause checked for a prefix of
+				 * zero, which means the temporary buffer must
+				 * have a non-zero size, and it references a
+				 * valid key.
+				 */
+				WT_ASSERT(session, tmpkey->size != 0);
+
+				/*
+				 * Grow the buffer as necessary, ensuring data
+				 * data has been copied into local buffer space,
+				 * then append the suffix to the prefix already
+				 * in the buffer.
+				 *
+				 * Don't grow the buffer unnecessarily or copy
+				 * data we don't need, truncate the item's data
+				 * length to the prefix bytes.
+				 */
+				tmpkey->size = kpack->prefix;
+				WT_ERR(__wt_buf_grow(session,
+				    tmpkey, tmpkey->size + kpack->size));
+				memcpy((uint8_t *)tmpkey->mem + tmpkey->size,
+				    kpack->data, kpack->size);
+				tmpkey->size += kpack->size;
+			} else
+				WT_ERR(__wt_row_leaf_key_copy(
+				    session, page, rip, tmpkey));
+build:
+			WT_ERR(__rec_cell_build_leaf_key(session, r,
+			    tmpkey->data, tmpkey->size, &ovfl_key));
+		}
+
+		/* Boundary: split or write the page. */
+		if (__wt_rec_need_split(r, key->len + val->len)) {
+			/*
+			 * If we copied address blocks from the page rather than
+			 * building the actual key, we have to build the key now
+			 * because we are about to promote it.
+			 */
+			if (key_onpage_ovfl) {
+				WT_ERR(__wt_dsk_cell_data_ref(session,
+				    WT_PAGE_ROW_LEAF, kpack, r->cur));
+				WT_NOT_READ(key_onpage_ovfl, false);
+			}
+
+			/*
+			 * Turn off prefix compression until a full key written
+			 * to the new page, and (unless already working with an
+			 * overflow key), rebuild the key without compression.
+			 */
+			if (r->key_pfx_compress_conf) {
+				r->key_pfx_compress = false;
+				if (!ovfl_key)
+					WT_ERR(__rec_cell_build_leaf_key(
+					    session, r, NULL, 0, &ovfl_key));
+			}
+
+			WT_ERR(__wt_rec_split_crossing_bnd(
+			    session, r, key->len + val->len));
+		}
+
+		/* Copy the key/value pair onto the page. */
+		__wt_rec_copy_incr(session, r, key);
+		if (val->len == 0)
+			r->any_empty_value = true;
+		else {
+			r->all_empty_value = false;
+			if (dictionary && btree->dictionary)
+				WT_ERR(__wt_rec_dict_replace(
+				    session, r, 0, val));
+			__wt_rec_copy_incr(session, r, val);
+		}
+
+		/* Update compression state. */
+		__rec_key_state_update(r, ovfl_key);
+
+leaf_insert:	/* Write any K/V pairs inserted into the page after this key. */
+		if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT(page, rip))) != NULL)
+			WT_ERR(__rec_row_leaf_insert(session, r, ins));
+	}
+
+	/* Write the remnant page. */
+	ret = __wt_rec_split_finish(session, r);
+
+err:	__wt_scr_free(session, &tmpkey);
+	__wt_scr_free(session, &tmpval);
+	return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_visibility.c b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
new file mode 100644
index 00000000000..97903db9e9e
--- /dev/null
+++ b/src/third_party/wiredtiger/src/reconcile/rec_visibility.c
@@ -0,0 +1,405 @@
+/*-
+ * Copyright (c) 2014-2019 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __rec_update_save --
+ *	Save a WT_UPDATE list for later restoration.
+ */
+static int
+__rec_update_save(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+    WT_INSERT *ins, void *ripcip, WT_UPDATE *onpage_upd, size_t upd_memsize)
+{
+	WT_RET(__wt_realloc_def(
+	    session, &r->supd_allocated, r->supd_next + 1, &r->supd));
+	r->supd[r->supd_next].ins = ins;
+	r->supd[r->supd_next].ripcip = ripcip;
+	r->supd[r->supd_next].onpage_upd = onpage_upd;
+	++r->supd_next;
+	r->supd_memsize += upd_memsize;
+	return (0);
+}
+
+/*
+ * __rec_append_orig_value --
+ *	Append the key's original value to its update list.
+ */
+static int
+__rec_append_orig_value(WT_SESSION_IMPL *session,
+    WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
+{
+	WT_DECL_ITEM(tmp);
+	WT_DECL_RET;
+	WT_UPDATE *append;
+	size_t size;
+
+	/* Done if at least one self-contained update is globally visible. */
+	for (;; upd = upd->next) {
+		if (WT_UPDATE_DATA_VALUE(upd) &&
+		    __wt_txn_upd_visible_all(session, upd))
+			return (0);
+
+		/* Add the original value after birthmarks. */
+		if (upd->type == WT_UPDATE_BIRTHMARK) {
+			WT_ASSERT(session, unpack != NULL &&
+			    unpack->type != WT_CELL_DEL);
+			break;
+		}
+
+		/* Leave reference at the last item in the chain. */
+		if (upd->next == NULL)
+			break;
+	}
+
+	/*
+	 * We need the original on-page value for some reader: get a copy and
+	 * append it to the end of the update list with a transaction ID that
+	 * guarantees its visibility.
+	 *
+	 * If we don't have a value cell, it's an insert/append list key/value
+	 * pair which simply doesn't exist for some reader; place a deleted
+	 * record at the end of the update list.
+	 */
+	append = NULL;			/* -Wconditional-uninitialized */
+	size = 0;			/* -Wconditional-uninitialized */
+	if (unpack == NULL || unpack->type == WT_CELL_DEL)
+		WT_RET(__wt_update_alloc(session,
+		    NULL, &append, &size, WT_UPDATE_TOMBSTONE));
+	else {
+		WT_RET(__wt_scr_alloc(session, 0, &tmp));
+		WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
+		WT_ERR(__wt_update_alloc(
+		    session, tmp, &append, &size, WT_UPDATE_STANDARD));
+	}
+
+	/*
+	 * If we're saving the original value for a birthmark, transfer over
+	 * the transaction ID and clear out the birthmark update.
+	 *
+	 * Else, set the entry's transaction information to the lowest possible
+	 * value. Cleared memory matches the lowest possible transaction ID and
+	 * timestamp, do nothing.
+	 */
+	if (upd->type == WT_UPDATE_BIRTHMARK) {
+		append->txnid = upd->txnid;
+		append->timestamp = upd->timestamp;
+		append->next = upd->next;
+	}
+
+	/* Append the new entry into the update list. */
+	WT_PUBLISH(upd->next, append);
+	__wt_cache_page_inmem_incr(session, page, size);
+
+	if (upd->type == WT_UPDATE_BIRTHMARK) {
+		upd->type = WT_UPDATE_STANDARD;
+		upd->txnid = WT_TXN_ABORTED;
+	}
+
+err:	__wt_scr_free(session, &tmp);
+	return (ret);
+}
+
+/*
+ * __wt_rec_txn_read --
+ *	Return the update in a list that should be written (or NULL if none can
+ *	be written).
+ */
+int
+__wt_rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
+    WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack,
+    bool *upd_savedp, WT_UPDATE **updp)
+{
+	WT_PAGE *page;
+	WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
+	wt_timestamp_t timestamp;
+	size_t upd_memsize;
+	uint64_t max_txn, txnid;
+	bool all_visible, prepared, skipped_birthmark, uncommitted, upd_saved;
+
+	if (upd_savedp != NULL)
+		*upd_savedp = false;
+	*updp = NULL;
+
+	page = r->page;
+	first_ts_upd = first_txn_upd = NULL;
+	upd_memsize = 0;
+	max_txn = WT_TXN_NONE;
+	prepared = skipped_birthmark = uncommitted = upd_saved = false;
+
+	/*
+	 * If called with a WT_INSERT item, use its WT_UPDATE list (which must
+	 * exist), otherwise check for an on-page row-store WT_UPDATE list
+	 * (which may not exist). Return immediately if the item has no updates.
+	 */
+	if (ins != NULL)
+		first_upd = ins->upd;
+	else if ((first_upd = WT_ROW_UPDATE(page, ripcip)) == NULL)
+		return (0);
+
+	for (upd = first_upd; upd != NULL; upd = upd->next) {
+		if ((txnid = upd->txnid) == WT_TXN_ABORTED)
+			continue;
+
+		++r->updates_seen;
+		upd_memsize += WT_UPDATE_MEMSIZE(upd);
+
+		/*
+		 * Track the first update in the chain that is not aborted and
+		 * the maximum transaction ID.
+		 */
+		if (first_txn_upd == NULL)
+			first_txn_upd = upd;
+
+		/* Track the largest transaction ID seen. */
+		if (WT_TXNID_LT(max_txn, txnid))
+			max_txn = txnid;
+
+		/*
+		 * Check whether the update was committed before reconciliation
+		 * started.  The global commit point can move forward during
+		 * reconciliation so we use a cached copy to avoid races when a
+		 * concurrent transaction commits or rolls back while we are
+		 * examining its updates. As prepared transaction id's are
+		 * globally visible, need to check the update state as well.
+		 */
+		if (F_ISSET(r, WT_REC_EVICT)) {
+			if (upd->prepare_state == WT_PREPARE_LOCKED ||
+			    upd->prepare_state == WT_PREPARE_INPROGRESS)
+				prepared = true;
+
+			if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+			    WT_TXNID_LE(r->last_running, txnid) :
+			    !__txn_visible_id(session, txnid))
+				uncommitted = r->update_uncommitted = true;
+
+		       if (prepared || uncommitted)
+			       continue;
+		}
+
+		/* Track the first update with non-zero timestamp. */
+		if (first_ts_upd == NULL && upd->timestamp != 0)
+			first_ts_upd = upd;
+
+		/*
+		 * Find the first update we can use.
+		 *
+		 * Update/restore eviction can handle any update (including
+		 * uncommitted updates).  Lookaside eviction can save any
+		 * committed update.  Regular eviction checks that the maximum
+		 * transaction ID and timestamp seen are stable.
+		 *
+		 * Lookaside and update/restore eviction try to choose the same
+		 * version as a subsequent checkpoint, so that checkpoint can
+		 * skip over pages with lookaside entries.  If the application
+		 * has supplied a stable timestamp, we assume (a) that it is
+		 * old, and (b) that the next checkpoint will use it, so we wait
+		 * to see a stable update.  If there is no stable timestamp, we
+		 * assume the next checkpoint will write the most recent version
+		 * (but we save enough information that checkpoint can fix
+		 * things up if we choose an update that is too new).
+		 */
+		if (*updp == NULL && r->las_skew_newest)
+			*updp = upd;
+
+		if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+		    !__wt_txn_upd_visible_all(session, upd) :
+		    !__wt_txn_upd_visible(session, upd)) {
+			if (F_ISSET(r, WT_REC_EVICT))
+				++r->updates_unstable;
+
+			/*
+			 * Rare case: when applications run at low isolation
+			 * levels, update/restore eviction may see a stable
+			 * update followed by an uncommitted update.  Give up
+			 * in that case: we need to discard updates from the
+			 * stable update and older for correctness and we can't
+			 * discard an uncommitted update.
+			 */
+			if (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
+			    *updp != NULL && (uncommitted || prepared)) {
+				r->leave_dirty = true;
+				return (__wt_set_return(session, EBUSY));
+			}
+
+			if (upd->type == WT_UPDATE_BIRTHMARK)
+				skipped_birthmark = true;
+
+			continue;
+		}
+
+		/*
+		 * Lookaside without stable timestamp was taken care of above
+		 * (set to the first uncommitted transaction). Lookaside with
+		 * stable timestamp always takes the first stable update.
+		 */
+		if (*updp == NULL)
+			*updp = upd;
+
+		if (!F_ISSET(r, WT_REC_EVICT))
+			break;
+	}
+
+	/* Keep track of the selected update. */
+	upd = *updp;
+
+	/* Reconciliation should never see an aborted or reserved update. */
+	WT_ASSERT(session, upd == NULL ||
+	    (upd->txnid != WT_TXN_ABORTED && upd->type != WT_UPDATE_RESERVE));
+
+	/* If all of the updates were aborted, quit. */
+	if (first_txn_upd == NULL) {
+		WT_ASSERT(session, upd == NULL);
+		return (0);
+	}
+
+	/* If no updates were skipped, record that we're making progress. */
+	if (upd == first_txn_upd)
+		r->update_used = true;
+
+	/*
+	 * The checkpoint transaction is special.  Make sure we never write
+	 * metadata updates from a checkpoint in a concurrent session.
+	 */
+	WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
+	    upd == NULL || upd->txnid == WT_TXN_NONE ||
+	    upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
+	    WT_SESSION_IS_CHECKPOINT(session));
+
+	/*
+	 * Track the most recent transaction in the page.  We store this in the
+	 * tree at the end of reconciliation in the service of checkpoints, it
+	 * is used to avoid discarding trees from memory when they have changes
+	 * required to satisfy a snapshot read.
+	 */
+	if (WT_TXNID_LT(r->max_txn, max_txn))
+		r->max_txn = max_txn;
+
+	/* Update the maximum timestamp. */
+	if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->timestamp)
+		r->max_timestamp = first_ts_upd->timestamp;
+
+	/*
+	 * If the update we chose was a birthmark, or we are doing
+	 * update-restore and we skipped a birthmark, the original on-page
+	 * value must be retained.
+	 */
+	if (upd != NULL &&
+	    (upd->type == WT_UPDATE_BIRTHMARK ||
+	    (F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark)))
+		*updp = NULL;
+
+	/*
+	 * Check if all updates on the page are visible.  If not, it must stay
+	 * dirty unless we are saving updates to the lookaside table.
+	 *
+	 * Updates can be out of transaction ID order (but not out of timestamp
+	 * order), so we track the maximum transaction ID and the newest update
+	 * with a timestamp (if any).
+	 */
+	timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->timestamp;
+	all_visible = upd == first_txn_upd && !(uncommitted || prepared) &&
+	    (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+	    __wt_txn_visible_all(session, max_txn, timestamp) :
+	    __wt_txn_visible(session, max_txn, timestamp));
+
+	if (all_visible)
+		goto check_original_value;
+
+	r->leave_dirty = true;
+
+	if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
+		WT_PANIC_RET(session, EINVAL,
+		    "reconciliation error, update not visible");
+
+	/*
+	 * If not trying to evict the page, we know what we'll write and we're
+	 * done.
+	 */
+	if (!F_ISSET(r, WT_REC_EVICT))
+		goto check_original_value;
+
+	/*
+	 * We are attempting eviction with changes that are not yet stable
+	 * (i.e. globally visible).  There are two ways to continue, the
+	 * save/restore eviction path or the lookaside table eviction path.
+	 * Both cannot be configured because the paths track different
+	 * information. The update/restore path can handle uncommitted changes,
+	 * by evicting most of the page and then creating a new, smaller page
+	 * to which we re-attach those changes. Lookaside eviction writes
+	 * changes into the lookaside table and restores them on demand if and
+	 * when the page is read back into memory.
+	 *
+	 * Both paths are configured outside of reconciliation: the save/restore
+	 * path is the WT_REC_UPDATE_RESTORE flag, the lookaside table path is
+	 * the WT_REC_LOOKASIDE flag.
+	 */
+	if (!F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE))
+		return (__wt_set_return(session, EBUSY));
+	if (uncommitted && !F_ISSET(r, WT_REC_UPDATE_RESTORE))
+		return (__wt_set_return(session, EBUSY));
+
+	WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
+
+	/*
+	 * The order of the updates on the list matters, we can't move only the
+	 * unresolved updates, move the entire update list.
+	 */
+	WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize));
+	upd_saved = true;
+	if (upd_savedp != NULL)
+		*upd_savedp = true;
+
+	/*
+	 * Track the first off-page update when saving history in the lookaside
+	 * table.  When skewing newest, we want the first (non-aborted) update
+	 * after the one stored on the page.  Otherwise, we want the update
+	 * before the on-page update.
+	 */
+	if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
+		if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
+			r->unstable_txn = first_upd->txnid;
+		if (first_ts_upd != NULL &&
+		    r->unstable_timestamp < first_ts_upd->timestamp)
+			r->unstable_timestamp = first_ts_upd->timestamp;
+	} else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+		for (upd = first_upd; upd != *updp; upd = upd->next) {
+			if (upd->txnid == WT_TXN_ABORTED)
+				continue;
+
+			if (upd->txnid != WT_TXN_NONE &&
+			    WT_TXNID_LT(upd->txnid, r->unstable_txn))
+				r->unstable_txn = upd->txnid;
+			if (upd->timestamp < r->unstable_timestamp)
+				r->unstable_timestamp = upd->timestamp;
+		}
+	}
+
+check_original_value:
+	/*
+	 * Paranoia: check that we didn't choose an update that has since been
+	 * rolled back.
+	 */
+	WT_ASSERT(session, *updp == NULL || (*updp)->txnid != WT_TXN_ABORTED);
+
+	/*
+	 * Returning an update means the original on-page value might be lost,
+	 * and that's a problem if there's a reader that needs it.  This call
+	 * makes a copy of the on-page value and if there is a birthmark in the
+	 * update list, replaces it.  We do that any time there are saved
+	 * updates and during reconciliation of a backing overflow record that
+	 * will be physically removed once it's no longer needed.
+	 */
+	if (*updp != NULL && (upd_saved ||
+	    (vpack != NULL && vpack->ovfl &&
+	    vpack->raw != WT_CELL_VALUE_OVFL_RM)))
+		WT_RET(
+		    __rec_append_orig_value(session, page, first_upd, vpack));
+
+	return (0);
+}
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 87ce7ca1cc3..1c873fc3d8a 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -8,307 +8,18 @@
 
 #include "wt_internal.h"
 
-struct __rec_chunk;		typedef struct __rec_chunk WT_CHUNK;
-struct __rec_dictionary;	typedef struct __rec_dictionary WT_DICTIONARY;
-struct __rec_kv;		typedef struct __rec_kv WT_KV;
-
-/*
- * Reconciliation is the process of taking an in-memory page, walking each entry
- * in the page, building a backing disk image in a temporary buffer representing
- * that information, and writing that buffer to disk.  What could be simpler?
- *
- * WT_RECONCILE --
- *	Information tracking a single page reconciliation.
- */
-typedef struct {
-	WT_REF  *ref;			/* Page being reconciled */
-	WT_PAGE *page;
-	uint32_t flags;			/* Caller's configuration */
-
-	/*
-	 * Track start/stop write generation to decide if all changes to the
-	 * page are written.
-	 */
-	uint32_t orig_write_gen;
-
-	/*
-	 * Track start/stop checkpoint generations to decide if lookaside table
-	 * records are correct.
-	 */
-	uint64_t orig_btree_checkpoint_gen;
-	uint64_t orig_txn_checkpoint_gen;
-
-	/*
-	 * Track the oldest running transaction and whether to skew lookaside
-	 * to the newest update.
-	 */
-	bool las_skew_newest;
-	uint64_t last_running;
-
-	/* Track the page's min/maximum transactions. */
-	uint64_t max_txn;
-	wt_timestamp_t max_timestamp;
-
-	/* Lookaside boundary tracking. */
-	uint64_t unstable_txn;
-	wt_timestamp_t unstable_timestamp;
-
-	u_int updates_seen;		/* Count of updates seen. */
-	u_int updates_unstable;		/* Count of updates not visible_all. */
-
-	bool update_uncommitted;	/* An update was uncommitted */
-	bool update_used;		/* An update could be used */
-
-	/*
-	 * When we can't mark the page clean (for example, checkpoint found some
-	 * uncommitted updates), there's a leave-dirty flag.
-	 */
-	bool leave_dirty;
-
-	/*
-	 * Track if reconciliation has seen any overflow items.  If a leaf page
-	 * with no overflow items is written, the parent page's address cell is
-	 * set to the leaf-no-overflow type.  This means we can delete the leaf
-	 * page without reading it because we don't have to discard any overflow
-	 * items it might reference.
-	 *
-	 * The test test is per-page reconciliation, that is, once we see an
-	 * overflow item on the page, all subsequent leaf pages written for the
-	 * page will not be leaf-no-overflow type, regardless of whether or not
-	 * they contain overflow items.  In other words, leaf-no-overflow is not
-	 * guaranteed to be set on every page that doesn't contain an overflow
-	 * item, only that if it is set, the page contains no overflow items.
-	 * XXX
-	 * This was originally done because raw compression couldn't do better,
-	 * now that raw compression has been removed, we should do better.
-	 */
-	bool	ovfl_items;
-
-	/*
-	 * Track if reconciliation of a row-store leaf page has seen empty (zero
-	 * length) values.  We don't write out anything for empty values, so if
-	 * there are empty values on a page, we have to make two passes over the
-	 * page when it's read to figure out how many keys it has, expensive in
-	 * the common case of no empty values and (entries / 2) keys.  Likewise,
-	 * a page with only empty values is another common data set, and keys on
-	 * that page will be equal to the number of entries.  In both cases, set
-	 * a flag in the page's on-disk header.
-	 *
-	 * The test is per-page reconciliation as described above for the
-	 * overflow-item test.
-	 */
-	bool	all_empty_value, any_empty_value;
-
-	/*
-	 * Reconciliation gets tricky if we have to split a page, which happens
-	 * when the disk image we create exceeds the page type's maximum disk
-	 * image size.
-	 *
-	 * First, the target size of the page we're building.
-	 */
-	uint32_t page_size;		/* Page size */
-
-	/*
-	 * Second, the split size: if we're doing the page layout, split to a
-	 * smaller-than-maximum page size when a split is required so we don't
-	 * repeatedly split a packed page.
-	 */
-	uint32_t split_size;		/* Split page size */
-	uint32_t min_split_size;	/* Minimum split page size */
-
-	/*
-	 * We maintain two split chunks in the memory during reconciliation to
-	 * be written out as pages. As we get to the end of the data, if the
-	 * last one turns out to be smaller than the minimum split size, we go
-	 * back into the penultimate chunk and split at this minimum split size
-	 * boundary. This moves some data from the penultimate chunk to the last
-	 * chunk, hence increasing the size of the last page written without
-	 * decreasing the penultimate page size beyond the minimum split size.
-	 * For this reason, we maintain an expected split percentage boundary
-	 * and a minimum split percentage boundary.
-	 *
-	 * Chunks are referenced by current and previous pointers. In case of a
-	 * split, previous references the first chunk and current switches to
-	 * the second chunk. If reconciliation generates more split chunks, the
-	 * the previous chunk is written to the disk and current and previous
-	 * swap.
-	 */
-	struct __rec_chunk {
-		/*
-		 * The recno and entries fields are the starting record number
-		 * of the split chunk (for column-store splits), and the number
-		 * of entries in the split chunk.
-		 *
-		 * The key for a row-store page; no column-store key is needed
-		 * because the page's recno, stored in the recno field, is the
-		 * column-store key.
-		 */
-		uint32_t entries;
-		uint64_t recno;
-		WT_ITEM  key;
-
-		uint32_t min_entries;
-		uint64_t min_recno;
-		WT_ITEM  min_key;
-
-		/* Minimum split-size boundary buffer offset. */
-		size_t   min_offset;
-
-		WT_ITEM image;				/* disk-image */
-	} chunkA, chunkB, *cur_ptr, *prev_ptr;
-
-	/*
-	 * We track current information about the current record number, the
-	 * number of entries copied into the disk image buffer, where we are
-	 * in the buffer, and how much memory remains. Those values are
-	 * packaged here rather than passing pointers to stack locations
-	 * around the code.
-	 */
-	uint64_t recno;			/* Current record number */
-	uint32_t entries;		/* Current number of entries */
-	uint8_t *first_free;		/* Current first free byte */
-	size_t	 space_avail;		/* Remaining space in this chunk */
-	/* Remaining space in this chunk to put a minimum size boundary */
-	size_t	 min_space_avail;
-
-	/*
-	 * Saved update list, supporting the WT_REC_UPDATE_RESTORE and
-	 * WT_REC_LOOKASIDE configurations. While reviewing updates for each
-	 * page, we save WT_UPDATE lists here, and then move them to per-block
-	 * areas as the blocks are defined.
-	 */
-	WT_SAVE_UPD *supd;		/* Saved updates */
-	uint32_t     supd_next;
-	size_t	     supd_allocated;
-	size_t       supd_memsize;	/* Size of saved update structures */
-
-	/* List of pages we've written so far. */
-	WT_MULTI *multi;
-	uint32_t  multi_next;
-	size_t	  multi_allocated;
-
-	/*
-	 * Root pages are written when wrapping up the reconciliation, remember
-	 * the image we're going to write.
-	 */
-	WT_ITEM *wrapup_checkpoint;
-	bool	 wrapup_checkpoint_compressed;
-
-	/*
-	 * We don't need to keep the 0th key around on internal pages, the
-	 * search code ignores them as nothing can sort less by definition.
-	 * There's some trickiness here, see the code for comments on how
-	 * these fields work.
-	 */
-	bool	cell_zero;		/* Row-store internal page 0th key */
-
-	/*
-	 * We calculate checksums to find previously written identical blocks,
-	 * but once a match fails during an eviction, there's no point trying
-	 * again.
-	 */
-	bool	evict_matching_checksum_failed;
-
-	/*
-	 * WT_DICTIONARY --
-	 *	We optionally build a dictionary of values for leaf pages. Where
-	 * two value cells are identical, only write the value once, the second
-	 * and subsequent copies point to the original cell. The dictionary is
-	 * fixed size, but organized in a skip-list to make searches faster.
-	 */
-	struct __rec_dictionary {
-		uint64_t hash;				/* Hash value */
-		uint32_t offset;			/* Matching cell */
-
-		u_int depth;				/* Skiplist */
-		WT_DICTIONARY *next[0];
-	} **dictionary;					/* Dictionary */
-	u_int dictionary_next, dictionary_slots;	/* Next, max entries */
-							/* Skiplist head. */
-	WT_DICTIONARY *dictionary_head[WT_SKIP_MAXDEPTH];
-
-	/*
-	 * WT_KV--
-	 *	An on-page key/value item we're building.
-	 */
-	struct __rec_kv {
-		WT_ITEM	 buf;		/* Data */
-		WT_CELL	 cell;		/* Cell and cell's length */
-		size_t cell_len;
-		size_t len;		/* Total length of cell + data */
-	} k, v;				/* Key/Value being built */
-
-	WT_ITEM *cur, _cur;		/* Key/Value being built */
-	WT_ITEM *last, _last;		/* Last key/value built */
-
-	bool key_pfx_compress;		/* If can prefix-compress next key */
-	bool key_pfx_compress_conf;	/* If prefix compression configured */
-	bool key_sfx_compress;		/* If can suffix-compress next key */
-	bool key_sfx_compress_conf;	/* If suffix compression configured */
-
-	bool is_bulk_load;		/* If it's a bulk load */
-
-	WT_SALVAGE_COOKIE *salvage;	/* If it's a salvage operation */
-
-	bool cache_write_lookaside;	/* Used the lookaside table */
-	bool cache_write_restore;	/* Used update/restoration */
-
-	uint32_t tested_ref_state;	/* Debugging information */
-
-	/*
-	 * XXX
-	 * In the case of a modified update, we may need a copy of the current
-	 * value as a set of bytes. We call back into the btree code using a
-	 * fake cursor to do that work. This a layering violation and fragile,
-	 * we need a better solution.
-	 */
-	WT_CURSOR_BTREE update_modify_cbt;
-} WT_RECONCILE;
-
-#define	WT_CROSSING_MIN_BND(r, next_len)				\
-	((r)->cur_ptr->min_offset == 0 &&				\
-	    (next_len) > (r)->min_space_avail)
-#define	WT_CROSSING_SPLIT_BND(r, next_len) ((next_len) > (r)->space_avail)
-#define	WT_CHECK_CROSSING_BND(r, next_len)				\
-	(WT_CROSSING_MIN_BND(r, next_len) || WT_CROSSING_SPLIT_BND(r, next_len))
-
-static void __rec_cell_build_addr(WT_SESSION_IMPL *,
-		WT_RECONCILE *, const void *, size_t, u_int, uint64_t);
-static int  __rec_cell_build_int_key(WT_SESSION_IMPL *,
-		WT_RECONCILE *, const void *, size_t, bool *);
-static int  __rec_cell_build_leaf_key(WT_SESSION_IMPL *,
-		WT_RECONCILE *, const void *, size_t, bool *);
-static int  __rec_cell_build_ovfl(WT_SESSION_IMPL *,
-		WT_RECONCILE *, WT_KV *, uint8_t, uint64_t);
-static int  __rec_cell_build_val(WT_SESSION_IMPL *,
-		WT_RECONCILE *, const void *, size_t, uint64_t);
 static void __rec_cleanup(WT_SESSION_IMPL *, WT_RECONCILE *);
-static int  __rec_col_fix(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
-static int  __rec_col_fix_slvg(WT_SESSION_IMPL *,
-		WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *);
-static int  __rec_col_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_REF *);
-static int  __rec_col_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
-static int  __rec_col_var(WT_SESSION_IMPL *,
-		WT_RECONCILE *, WT_REF *, WT_SALVAGE_COOKIE *);
-static int  __rec_col_var_helper(WT_SESSION_IMPL *, WT_RECONCILE *,
-		WT_SALVAGE_COOKIE *, WT_ITEM *, bool, uint8_t, uint64_t);
 static int  __rec_destroy_session(WT_SESSION_IMPL *);
 static int  __rec_init(WT_SESSION_IMPL *,
 		WT_REF *, uint32_t, WT_SALVAGE_COOKIE *, void *);
 static int  __rec_las_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *);
 static int  __rec_las_wrapup_err(WT_SESSION_IMPL *, WT_RECONCILE *);
 static int  __rec_root_write(WT_SESSION_IMPL *, WT_PAGE *, uint32_t);
-static int  __rec_row_int(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
-static int  __rec_row_leaf(WT_SESSION_IMPL *,
-		WT_RECONCILE *, WT_PAGE *, WT_SALVAGE_COOKIE *);
-static int  __rec_row_leaf_insert(
-		WT_SESSION_IMPL *, WT_RECONCILE *, WT_INSERT *);
-static int  __rec_row_merge(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
 static int  __rec_split_discard(WT_SESSION_IMPL *, WT_PAGE *);
 static int  __rec_split_row_promote(
 		WT_SESSION_IMPL *, WT_RECONCILE *, WT_ITEM *, uint8_t);
-static int  __rec_split_write(
-		WT_SESSION_IMPL *, WT_RECONCILE *, WT_CHUNK *, WT_ITEM *, bool);
+static int  __rec_split_write(WT_SESSION_IMPL *,
+		WT_RECONCILE *, WT_REC_CHUNK *, WT_ITEM *, bool);
 static int  __rec_write_check_complete(
 		WT_SESSION_IMPL *, WT_RECONCILE *, int, bool *);
 static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *);
@@ -316,12 +27,6 @@ static int  __rec_write_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
 static int  __rec_write_wrapup_err(
 		WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
 
-static void __rec_dictionary_free(WT_SESSION_IMPL *, WT_RECONCILE *);
-static int  __rec_dictionary_init(WT_SESSION_IMPL *, WT_RECONCILE *, u_int);
-static int  __rec_dictionary_lookup(
-		WT_SESSION_IMPL *, WT_RECONCILE *, WT_KV *, WT_DICTIONARY **);
-static void __rec_dictionary_reset(WT_RECONCILE *);
-
 /*
  * __wt_reconcile --
  *	Reconcile an in-memory page into its on-disk format, and write it.
@@ -435,23 +140,23 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
 	switch (page->type) {
 	case WT_PAGE_COL_FIX:
 		if (salvage != NULL)
-			ret = __rec_col_fix_slvg(session, r, ref, salvage);
+			ret = __wt_rec_col_fix_slvg(session, r, ref, salvage);
 		else
-			ret = __rec_col_fix(session, r, ref);
+			ret = __wt_rec_col_fix(session, r, ref);
 		break;
 	case WT_PAGE_COL_INT:
 		WT_WITH_PAGE_INDEX(session,
-		    ret = __rec_col_int(session, r, ref));
+		    ret = __wt_rec_col_int(session, r, ref));
 		break;
 	case WT_PAGE_COL_VAR:
-		ret = __rec_col_var(session, r, ref, salvage);
+		ret = __wt_rec_col_var(session, r, ref, salvage);
 		break;
 	case WT_PAGE_ROW_INT:
 		WT_WITH_PAGE_INDEX(session,
-		    ret = __rec_row_int(session, r, page));
+		    ret = __wt_rec_row_int(session, r, page));
 		break;
 	case WT_PAGE_ROW_LEAF:
-		ret = __rec_row_leaf(session, r, page, salvage);
+		ret = __wt_rec_row_leaf(session, r, page, salvage);
 		break;
 	default:
 		ret = __wt_illegal_value(session, page->type);
@@ -870,6 +575,12 @@ __rec_init(WT_SESSION_IMPL *session,
 	 * history, or the stable timestamp hasn't changed since last time this
 	 * page was successfully, skew oldest instead.
 	 */
+	if (F_ISSET(S2C(session)->cache, WT_CACHE_EVICT_DEBUG_MODE) &&
+	    __wt_random(&session->rnd) % 3 == 0)
+		r->las_skew_newest = false;
+	else
+		r->las_skew_newest =
+		    LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL);
 	r->las_skew_newest =
 	    LF_ISSET(WT_REC_LOOKASIDE) && LF_ISSET(WT_REC_VISIBLE_ALL);
 	if (r->las_skew_newest &&
@@ -965,9 +676,9 @@ __rec_init(WT_SESSION_IMPL *session,
 	 * Sanity check the size: 100 slots is the smallest dictionary we use.
 	 */
 	if (btree->dictionary != 0 && btree->dictionary > r->dictionary_slots)
-		WT_RET(__rec_dictionary_init(session,
+		WT_RET(__wt_rec_dictionary_init(session,
 		    r, btree->dictionary < 100 ? 100 : btree->dictionary));
-	__rec_dictionary_reset(r);
+	__wt_rec_dictionary_reset(r);
 
 	/*
 	 * Prefix compression discards repeated prefix bytes from row-store leaf
@@ -1059,7 +770,7 @@ __rec_destroy(WT_SESSION_IMPL *session, void *reconcilep)
 
 	__wt_free(session, r->supd);
 
-	__rec_dictionary_free(session, r);
+	__wt_rec_dictionary_free(session, r);
 
 	__wt_buf_free(session, &r->k.buf);
 	__wt_buf_free(session, &r->v.buf);
@@ -1083,914 +794,6 @@ __rec_destroy_session(WT_SESSION_IMPL *session)
 }
 
 /*
- * __rec_update_save --
- *	Save a WT_UPDATE list for later restoration.
- */
-static int
-__rec_update_save(WT_SESSION_IMPL *session, WT_RECONCILE *r,
-    WT_INSERT *ins, void *ripcip, WT_UPDATE *onpage_upd, size_t upd_memsize)
-{
-	WT_RET(__wt_realloc_def(
-	    session, &r->supd_allocated, r->supd_next + 1, &r->supd));
-	r->supd[r->supd_next].ins = ins;
-	r->supd[r->supd_next].ripcip = ripcip;
-	r->supd[r->supd_next].onpage_upd = onpage_upd;
-	++r->supd_next;
-	r->supd_memsize += upd_memsize;
-	return (0);
-}
-
-/*
- * __rec_append_orig_value --
- *	Append the key's original value to its update list.
- */
-static int
-__rec_append_orig_value(WT_SESSION_IMPL *session,
-    WT_PAGE *page, WT_UPDATE *upd, WT_CELL_UNPACK *unpack)
-{
-	WT_DECL_ITEM(tmp);
-	WT_DECL_RET;
-	WT_UPDATE *append;
-	size_t size;
-
-	/* Done if at least one self-contained update is globally visible. */
-	for (;; upd = upd->next) {
-		if (WT_UPDATE_DATA_VALUE(upd) &&
-		    __wt_txn_upd_visible_all(session, upd))
-			return (0);
-
-		/* Add the original value after birthmarks. */
-		if (upd->type == WT_UPDATE_BIRTHMARK) {
-			WT_ASSERT(session, unpack != NULL &&
-			    unpack->type != WT_CELL_DEL);
-			break;
-		}
-
-		/* Leave reference at the last item in the chain. */
-		if (upd->next == NULL)
-			break;
-	}
-
-	/*
-	 * We need the original on-page value for some reader: get a copy and
-	 * append it to the end of the update list with a transaction ID that
-	 * guarantees its visibility.
-	 *
-	 * If we don't have a value cell, it's an insert/append list key/value
-	 * pair which simply doesn't exist for some reader; place a deleted
-	 * record at the end of the update list.
-	 */
-	append = NULL;			/* -Wconditional-uninitialized */
-	size = 0;			/* -Wconditional-uninitialized */
-	if (unpack == NULL || unpack->type == WT_CELL_DEL)
-		WT_RET(__wt_update_alloc(session,
-		    NULL, &append, &size, WT_UPDATE_TOMBSTONE));
-	else {
-		WT_RET(__wt_scr_alloc(session, 0, &tmp));
-		WT_ERR(__wt_page_cell_data_ref(session, page, unpack, tmp));
-		WT_ERR(__wt_update_alloc(
-		    session, tmp, &append, &size, WT_UPDATE_STANDARD));
-	}
-
-	/*
-	 * If we're saving the original value for a birthmark, transfer over
-	 * the transaction ID and clear out the birthmark update.
-	 *
-	 * Else, set the entry's transaction information to the lowest possible
-	 * value. Cleared memory matches the lowest possible transaction ID and
-	 * timestamp, do nothing.
-	 */
-	if (upd->type == WT_UPDATE_BIRTHMARK) {
-		append->txnid = upd->txnid;
-		append->timestamp = upd->timestamp;
-		append->next = upd->next;
-	}
-
-	/* Append the new entry into the update list. */
-	WT_PUBLISH(upd->next, append);
-	__wt_cache_page_inmem_incr(session, page, size);
-
-	if (upd->type == WT_UPDATE_BIRTHMARK) {
-		upd->type = WT_UPDATE_STANDARD;
-		upd->txnid = WT_TXN_ABORTED;
-	}
-
-err:	__wt_scr_free(session, &tmp);
-	return (ret);
-}
-
-/*
- * __rec_txn_read --
- *	Return the update in a list that should be written (or NULL if none can
- *	be written).
- */
-static int
-__rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
-    WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack,
-    bool *upd_savedp, WT_UPDATE **updp)
-{
-	WT_PAGE *page;
-	WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
-	wt_timestamp_t timestamp;
-	size_t upd_memsize;
-	uint64_t max_txn, txnid;
-	bool all_visible, prepared, skipped_birthmark, uncommitted, upd_saved;
-
-	if (upd_savedp != NULL)
-		*upd_savedp = false;
-	*updp = NULL;
-
-	page = r->page;
-	first_ts_upd = first_txn_upd = NULL;
-	upd_memsize = 0;
-	max_txn = WT_TXN_NONE;
-	prepared = skipped_birthmark = uncommitted = upd_saved = false;
-
-	/*
-	 * If called with a WT_INSERT item, use its WT_UPDATE list (which must
-	 * exist), otherwise check for an on-page row-store WT_UPDATE list
-	 * (which may not exist). Return immediately if the item has no updates.
-	 */
-	if (ins != NULL)
-		first_upd = ins->upd;
-	else if ((first_upd = WT_ROW_UPDATE(page, ripcip)) == NULL)
-		return (0);
-
-	for (upd = first_upd; upd != NULL; upd = upd->next) {
-		if ((txnid = upd->txnid) == WT_TXN_ABORTED)
-			continue;
-
-		++r->updates_seen;
-		upd_memsize += WT_UPDATE_MEMSIZE(upd);
-
-		/*
-		 * Track the first update in the chain that is not aborted and
-		 * the maximum transaction ID.
-		 */
-		if (first_txn_upd == NULL)
-			first_txn_upd = upd;
-
-		/* Track the largest transaction ID seen. */
-		if (WT_TXNID_LT(max_txn, txnid))
-			max_txn = txnid;
-
-		/*
-		 * Check whether the update was committed before reconciliation
-		 * started.  The global commit point can move forward during
-		 * reconciliation so we use a cached copy to avoid races when a
-		 * concurrent transaction commits or rolls back while we are
-		 * examining its updates. As prepared transaction id's are
-		 * globally visible, need to check the update state as well.
-		 */
-		if (F_ISSET(r, WT_REC_EVICT)) {
-			if (upd->prepare_state == WT_PREPARE_LOCKED ||
-			    upd->prepare_state == WT_PREPARE_INPROGRESS)
-				prepared = true;
-
-			if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
-			    WT_TXNID_LE(r->last_running, txnid) :
-			    !__txn_visible_id(session, txnid))
-				uncommitted = r->update_uncommitted = true;
-
-		       if (prepared || uncommitted)
-			       continue;
-		}
-
-		/* Track the first update with non-zero timestamp. */
-		if (first_ts_upd == NULL && upd->timestamp != 0)
-			first_ts_upd = upd;
-
-		/*
-		 * Find the first update we can use.
-		 *
-		 * Update/restore eviction can handle any update (including
-		 * uncommitted updates).  Lookaside eviction can save any
-		 * committed update.  Regular eviction checks that the maximum
-		 * transaction ID and timestamp seen are stable.
-		 *
-		 * Lookaside and update/restore eviction try to choose the same
-		 * version as a subsequent checkpoint, so that checkpoint can
-		 * skip over pages with lookaside entries.  If the application
-		 * has supplied a stable timestamp, we assume (a) that it is
-		 * old, and (b) that the next checkpoint will use it, so we wait
-		 * to see a stable update.  If there is no stable timestamp, we
-		 * assume the next checkpoint will write the most recent version
-		 * (but we save enough information that checkpoint can fix
-		 * things up if we choose an update that is too new).
-		 */
-		if (*updp == NULL && r->las_skew_newest)
-			*updp = upd;
-
-		if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
-		    !__wt_txn_upd_visible_all(session, upd) :
-		    !__wt_txn_upd_visible(session, upd)) {
-			if (F_ISSET(r, WT_REC_EVICT))
-				++r->updates_unstable;
-
-			/*
-			 * Rare case: when applications run at low isolation
-			 * levels, update/restore eviction may see a stable
-			 * update followed by an uncommitted update.  Give up
-			 * in that case: we need to discard updates from the
-			 * stable update and older for correctness and we can't
-			 * discard an uncommitted update.
-			 */
-			if (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
-			    *updp != NULL && (uncommitted || prepared)) {
-				r->leave_dirty = true;
-				return (__wt_set_return(session, EBUSY));
-			}
-
-			if (upd->type == WT_UPDATE_BIRTHMARK)
-				skipped_birthmark = true;
-
-			continue;
-		}
-
-		/*
-		 * Lookaside without stable timestamp was taken care of above
-		 * (set to the first uncommitted transaction). Lookaside with
-		 * stable timestamp always takes the first stable update.
-		 */
-		if (*updp == NULL)
-			*updp = upd;
-	}
-
-	/* Keep track of the selected update. */
-	upd = *updp;
-
-	/* Reconciliation should never see an aborted or reserved update. */
-	WT_ASSERT(session, upd == NULL ||
-	    (upd->txnid != WT_TXN_ABORTED && upd->type != WT_UPDATE_RESERVE));
-
-	/* If all of the updates were aborted, quit. */
-	if (first_txn_upd == NULL) {
-		WT_ASSERT(session, upd == NULL);
-		return (0);
-	}
-
-	/* If no updates were skipped, record that we're making progress. */
-	if (upd == first_txn_upd)
-		r->update_used = true;
-
-	/*
-	 * The checkpoint transaction is special.  Make sure we never write
-	 * metadata updates from a checkpoint in a concurrent session.
-	 */
-	WT_ASSERT(session, !WT_IS_METADATA(session->dhandle) ||
-	    upd == NULL || upd->txnid == WT_TXN_NONE ||
-	    upd->txnid != S2C(session)->txn_global.checkpoint_state.id ||
-	    WT_SESSION_IS_CHECKPOINT(session));
-
-	/*
-	 * Track the most recent transaction in the page.  We store this in the
-	 * tree at the end of reconciliation in the service of checkpoints, it
-	 * is used to avoid discarding trees from memory when they have changes
-	 * required to satisfy a snapshot read.
-	 */
-	if (WT_TXNID_LT(r->max_txn, max_txn))
-		r->max_txn = max_txn;
-
-	/* Update the maximum timestamp. */
-	if (first_ts_upd != NULL && r->max_timestamp < first_ts_upd->timestamp)
-		r->max_timestamp = first_ts_upd->timestamp;
-
-	/*
-	 * If the update we chose was a birthmark, or we are doing
-	 * update-restore and we skipped a birthmark, the original on-page
-	 * value must be retained.
-	 */
-	if (upd != NULL &&
-	    (upd->type == WT_UPDATE_BIRTHMARK ||
-	    (F_ISSET(r, WT_REC_UPDATE_RESTORE) && skipped_birthmark)))
-		*updp = NULL;
-
-	/*
-	 * Check if all updates on the page are visible.  If not, it must stay
-	 * dirty unless we are saving updates to the lookaside table.
-	 *
-	 * Updates can be out of transaction ID order (but not out of timestamp
-	 * order), so we track the maximum transaction ID and the newest update
-	 * with a timestamp (if any).
-	 */
-	timestamp = first_ts_upd == NULL ? 0 : first_ts_upd->timestamp;
-	all_visible = upd == first_txn_upd && !(uncommitted || prepared) &&
-	    (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
-	    __wt_txn_visible_all(session, max_txn, timestamp) :
-	    __wt_txn_visible(session, max_txn, timestamp));
-
-	if (all_visible)
-		goto check_original_value;
-
-	r->leave_dirty = true;
-
-	if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
-		WT_PANIC_RET(session, EINVAL,
-		    "reconciliation error, update not visible");
-
-	/*
-	 * If not trying to evict the page, we know what we'll write and we're
-	 * done.
-	 */
-	if (!F_ISSET(r, WT_REC_EVICT))
-		goto check_original_value;
-
-	/*
-	 * We are attempting eviction with changes that are not yet stable
-	 * (i.e. globally visible).  There are two ways to continue, the
-	 * save/restore eviction path or the lookaside table eviction path.
-	 * Both cannot be configured because the paths track different
-	 * information. The update/restore path can handle uncommitted changes,
-	 * by evicting most of the page and then creating a new, smaller page
-	 * to which we re-attach those changes. Lookaside eviction writes
-	 * changes into the lookaside table and restores them on demand if and
-	 * when the page is read back into memory.
-	 *
-	 * Both paths are configured outside of reconciliation: the save/restore
-	 * path is the WT_REC_UPDATE_RESTORE flag, the lookaside table path is
-	 * the WT_REC_LOOKASIDE flag.
-	 */
-	if (!F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE))
-		return (__wt_set_return(session, EBUSY));
-	if (uncommitted && !F_ISSET(r, WT_REC_UPDATE_RESTORE))
-		return (__wt_set_return(session, EBUSY));
-
-	WT_ASSERT(session, r->max_txn != WT_TXN_NONE);
-
-	/*
-	 * The order of the updates on the list matters, we can't move only the
-	 * unresolved updates, move the entire update list.
-	 */
-	WT_RET(__rec_update_save(session, r, ins, ripcip, *updp, upd_memsize));
-	upd_saved = true;
-	if (upd_savedp != NULL)
-		*upd_savedp = true;
-
-	/*
-	 * Track the first off-page update when saving history in the lookaside
-	 * table.  When skewing newest, we want the first (non-aborted) update
-	 * after the one stored on the page.  Otherwise, we want the update
-	 * before the on-page update.
-	 */
-	if (F_ISSET(r, WT_REC_LOOKASIDE) && r->las_skew_newest) {
-		if (WT_TXNID_LT(r->unstable_txn, first_upd->txnid))
-			r->unstable_txn = first_upd->txnid;
-		if (first_ts_upd != NULL &&
-		    r->unstable_timestamp < first_ts_upd->timestamp)
-			r->unstable_timestamp = first_ts_upd->timestamp;
-	} else if (F_ISSET(r, WT_REC_LOOKASIDE)) {
-		for (upd = first_upd; upd != *updp; upd = upd->next) {
-			if (upd->txnid == WT_TXN_ABORTED)
-				continue;
-
-			if (upd->txnid != WT_TXN_NONE &&
-			    WT_TXNID_LT(upd->txnid, r->unstable_txn))
-				r->unstable_txn = upd->txnid;
-			if (upd->timestamp < r->unstable_timestamp)
-				r->unstable_timestamp = upd->timestamp;
-		}
-	}
-
-check_original_value:
-	/*
-	 * Paranoia: check that we didn't choose an update that has since been
-	 * rolled back.
-	 */
-	WT_ASSERT(session, *updp == NULL || (*updp)->txnid != WT_TXN_ABORTED);
-
-	/*
-	 * Returning an update means the original on-page value might be lost,
-	 * and that's a problem if there's a reader that needs it.  This call
-	 * makes a copy of the on-page value and if there is a birthmark in the
-	 * update list, replaces it.  We do that any time there are saved
-	 * updates and during reconciliation of a backing overflow record that
-	 * will be physically removed once it's no longer needed.
-	 */
-	if (*updp != NULL && (upd_saved ||
-	    (vpack != NULL && vpack->ovfl &&
-	    vpack->raw != WT_CELL_VALUE_OVFL_RM)))
-		WT_RET(
-		    __rec_append_orig_value(session, page, first_upd, vpack));
-
-	return (0);
-}
-
-/*
- * WT_CHILD_RELEASE, WT_CHILD_RELEASE_ERR --
- *	Macros to clean up during internal-page reconciliation, releasing the
- *	hazard pointer we're holding on child pages.
- */
-#define	WT_CHILD_RELEASE(session, hazard, ref) do {			\
-	if (hazard) {							\
-		(hazard) = false;					\
-		WT_TRET(						\
-		    __wt_page_release(session, ref, WT_READ_NO_EVICT));	\
-	}								\
-} while (0)
-#define	WT_CHILD_RELEASE_ERR(session, hazard, ref) do {			\
-	WT_CHILD_RELEASE(session, hazard, ref);				\
-	WT_ERR(ret);							\
-} while (0)
-
-typedef enum {
-    WT_CHILD_IGNORE,				/* Ignored child */
-    WT_CHILD_MODIFIED,				/* Modified child */
-    WT_CHILD_ORIGINAL,				/* Original child */
-    WT_CHILD_PROXY				/* Deleted child: proxy */
-} WT_CHILD_STATE;
-
-/*
- * __rec_child_deleted --
- *	Handle pages with leaf pages in the WT_REF_DELETED state.
- */
-static int
-__rec_child_deleted(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_REF *ref, WT_CHILD_STATE *statep)
-{
-	WT_PAGE_DELETED *page_del;
-
-	page_del = ref->page_del;
-
-	/*
-	 * Internal pages with child leaf pages in the WT_REF_DELETED state are
-	 * a special case during reconciliation.  First, if the deletion was a
-	 * result of a session truncate call, the deletion may not be visible to
-	 * us. In that case, we proceed as with any change not visible during
-	 * reconciliation by ignoring the change for the purposes of writing the
-	 * internal page.
-	 *
-	 * In this case, there must be an associated page-deleted structure, and
-	 * it holds the transaction ID we care about.
-	 *
-	 * In some cases, there had better not be any updates we can't see.
-	 *
-	 * A visible update to be in READY state (i.e. not in LOCKED or
-	 * PREPARED state), for truly visible to others.
-	 */
-	if (F_ISSET(r, WT_REC_VISIBILITY_ERR) && page_del != NULL &&
-	    __wt_page_del_active(session, ref, false))
-		WT_PANIC_RET(session, EINVAL,
-		    "reconciliation illegally skipped an update");
-
-	/*
-	 * Deal with any underlying disk blocks.
-	 *
-	 * First, check to see if there is an address associated with this leaf:
-	 * if there isn't, we're done, the underlying page is already gone.  If
-	 * the page still exists, check for any transactions in the system that
-	 * might want to see the page's state before it's deleted.
-	 *
-	 * If any such transactions exist, we cannot discard the underlying leaf
-	 * page to the block manager because the transaction may eventually read
-	 * it.  However, this write might be part of a checkpoint, and should we
-	 * recover to that checkpoint, we'll need to delete the leaf page, else
-	 * we'd leak it.  The solution is to write a proxy cell on the internal
-	 * page ensuring the leaf page is eventually discarded.
-	 *
-	 * If no such transactions exist, we can discard the leaf page to the
-	 * block manager and no cell needs to be written at all.  We do this
-	 * outside of the underlying tracking routines because this action is
-	 * permanent and irrevocable.  (Clearing the address means we've lost
-	 * track of the disk address in a permanent way.  This is safe because
-	 * there's no path to reading the leaf page again: if there's ever a
-	 * read into this part of the name space again, the cache read function
-	 * instantiates an entirely new page.)
-	 */
-	if (ref->addr != NULL && !__wt_page_del_active(session, ref, true)) {
-		/*
-		 * Minor memory cleanup: if a truncate call deleted this page
-		 * and we were ever forced to instantiate the page in memory,
-		 * we would have built a list of updates in the page reference
-		 * in order to be able to commit/rollback the truncate. We just
-		 * passed a visibility test, discard the update list.
-		 */
-		if (page_del != NULL) {
-			__wt_free(session, ref->page_del->update_list);
-			__wt_free(session, ref->page_del);
-		}
-
-		WT_RET(__wt_ref_block_free(session, ref));
-	}
-
-	/*
-	 * If the original page is gone, we can skip the slot on the internal
-	 * page.
-	 */
-	if (ref->addr == NULL) {
-		*statep = WT_CHILD_IGNORE;
-		return (0);
-	}
-
-	/*
-	 * Internal pages with deletes that aren't stable cannot be evicted, we
-	 * don't have sufficient information to restore the page's information
-	 * if subsequently read (we wouldn't know which transactions should see
-	 * the original page and which should see the deleted page).
-	 */
-	if (F_ISSET(r, WT_REC_EVICT))
-		return (__wt_set_return(session, EBUSY));
-
-	/*
-	 * If there are deleted child pages we can't discard immediately, keep
-	 * the page dirty so they are eventually freed.
-	 */
-	r->leave_dirty = true;
-
-	/*
-	 * If the original page cannot be freed, we need to keep a slot on the
-	 * page to reference it from the parent page.
-	 *
-	 * If the delete is not visible in this checkpoint, write the original
-	 * address normally. Otherwise, we have to write a proxy record.
-	 * If the delete state is not ready, then delete is not visible as it
-	 * is in prepared state.
-	 */
-	if (!__wt_page_del_active(session, ref, false))
-		*statep = WT_CHILD_PROXY;
-
-	return (0);
-}
-
-/*
- * __rec_child_modify --
- *	Return if the internal page's child references any modifications.
- */
-static int
-__rec_child_modify(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_REF *ref, bool *hazardp, WT_CHILD_STATE *statep)
-{
-	WT_DECL_RET;
-	WT_PAGE_MODIFY *mod;
-
-	/* We may acquire a hazard pointer our caller must release. */
-	*hazardp = false;
-
-	/* Default to using the original child address. */
-	*statep = WT_CHILD_ORIGINAL;
-
-	/*
-	 * This function is called when walking an internal page to decide how
-	 * to handle child pages referenced by the internal page.
-	 *
-	 * Internal pages are reconciled for two reasons: first, when evicting
-	 * an internal page, second by the checkpoint code when writing internal
-	 * pages.  During eviction, all pages should be in the WT_REF_DISK or
-	 * WT_REF_DELETED state. During checkpoint, eviction that might affect
-	 * review of an internal page is prohibited, however, as the subtree is
-	 * not reserved for our exclusive use, there are other page states that
-	 * must be considered.
-	 */
-	for (;; __wt_yield()) {
-		switch (r->tested_ref_state = ref->state) {
-		case WT_REF_DISK:
-			/* On disk, not modified by definition. */
-			goto done;
-
-		case WT_REF_DELETED:
-			/*
-			 * The child is in a deleted state.
-			 *
-			 * It's possible the state could change underneath us as
-			 * the page is read in, and we can race between checking
-			 * for a deleted state and looking at the transaction ID
-			 * to see if the delete is visible to us.  Lock down the
-			 * structure.
-			 */
-			if (!WT_REF_CAS_STATE(
-			    session, ref, WT_REF_DELETED, WT_REF_LOCKED))
-				break;
-			ret = __rec_child_deleted(session, r, ref, statep);
-			WT_REF_SET_STATE(ref, WT_REF_DELETED);
-			goto done;
-
-		case WT_REF_LOCKED:
-			/*
-			 * Locked.
-			 *
-			 * We should never be here during eviction, active child
-			 * pages in an evicted page's subtree fails the eviction
-			 * attempt.
-			 */
-			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
-			if (F_ISSET(r, WT_REC_EVICT))
-				return (__wt_set_return(session, EBUSY));
-
-			/*
-			 * If called during checkpoint, the child is being
-			 * considered by the eviction server or the child is a
-			 * truncated page being read.  The eviction may have
-			 * started before the checkpoint and so we must wait
-			 * for the eviction to be resolved.  I suspect we could
-			 * handle reads of truncated pages, but we can't
-			 * distinguish between the two and reads of truncated
-			 * pages aren't expected to be common.
-			 */
-			break;
-
-		case WT_REF_LIMBO:
-			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
-			/* FALLTHROUGH */
-		case WT_REF_LOOKASIDE:
-			/*
-			 * On disk or in cache with lookaside updates.
-			 *
-			 * We should never be here during eviction: active
-			 * child pages in an evicted page's subtree fails the
-			 * eviction attempt.
-			 */
-			if (F_ISSET(r, WT_REC_EVICT) &&
-			    __wt_page_las_active(session, ref)) {
-				WT_ASSERT(session, false);
-				return (__wt_set_return(session, EBUSY));
-			}
-
-			/*
-			 * A page evicted with lookaside entries may not have
-			 * an address, if no updates were visible to
-			 * reconciliation.  Any child pages in that state
-			 * should be ignored.
-			 */
-			if (ref->addr == NULL) {
-				*statep = WT_CHILD_IGNORE;
-				WT_CHILD_RELEASE(session, *hazardp, ref);
-			}
-			goto done;
-
-		case WT_REF_MEM:
-			/*
-			 * In memory.
-			 *
-			 * We should never be here during eviction, active child
-			 * pages in an evicted page's subtree fails the eviction
-			 * attempt.
-			 */
-			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
-			if (F_ISSET(r, WT_REC_EVICT))
-				return (__wt_set_return(session, EBUSY));
-
-			/*
-			 * If called during checkpoint, acquire a hazard pointer
-			 * so the child isn't evicted, it's an in-memory case.
-			 *
-			 * This call cannot return split/restart, we have a lock
-			 * on the parent which prevents a child page split.
-			 *
-			 * Set WT_READ_NO_WAIT because we're only interested in
-			 * the WT_REF's final state. Pages in transition might
-			 * change WT_REF state during our read, and then return
-			 * WT_NOTFOUND to us. In that case, loop and look again.
-			 */
-			ret = __wt_page_in(session, ref,
-			    WT_READ_CACHE | WT_READ_NO_EVICT |
-			    WT_READ_NO_GEN | WT_READ_NO_WAIT);
-			if (ret == WT_NOTFOUND) {
-				ret = 0;
-				break;
-			}
-			WT_RET(ret);
-			*hazardp = true;
-			goto in_memory;
-
-		case WT_REF_READING:
-			/*
-			 * Being read, not modified by definition.
-			 *
-			 * We should never be here during eviction, active child
-			 * pages in an evicted page's subtree fails the eviction
-			 * attempt.
-			 */
-			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
-			if (F_ISSET(r, WT_REC_EVICT))
-				return (__wt_set_return(session, EBUSY));
-			goto done;
-
-		case WT_REF_SPLIT:
-			/*
-			 * The page was split out from under us.
-			 *
-			 * We should never be here during eviction, active child
-			 * pages in an evicted page's subtree fails the eviction
-			 * attempt.
-			 *
-			 * We should never be here during checkpoint, dirty page
-			 * eviction is shutout during checkpoint, all splits in
-			 * process will have completed before we walk any pages
-			 * for checkpoint.
-			 */
-			WT_ASSERT(session, WT_REF_SPLIT != WT_REF_SPLIT);
-			return (__wt_set_return(session, EBUSY));
-
-		WT_ILLEGAL_VALUE(session, r->tested_ref_state);
-		}
-		WT_STAT_CONN_INCR(session, child_modify_blocked_page);
-	}
-
-in_memory:
-	/*
-	 * In-memory states: the child is potentially modified if the page's
-	 * modify structure has been instantiated. If the modify structure
-	 * exists and the page has actually been modified, set that state.
-	 * If that's not the case, we would normally use the original cell's
-	 * disk address as our reference, however there are two special cases,
-	 * both flagged by a missing block address.
-	 *
-	 * First, if forced to instantiate a deleted child page and it's never
-	 * modified, we end up here with a page that has a modify structure, no
-	 * modifications, and no disk address. Ignore those pages, they're not
-	 * modified and there is no reason to write the cell.
-	 *
-	 * Second, insert splits are permitted during checkpoint. When doing the
-	 * final checkpoint pass, we first walk the internal page's page-index
-	 * and write out any dirty pages we find, then we write out the internal
-	 * page in post-order traversal. If we found the split page in the first
-	 * step, it will have an address; if we didn't find the split page in
-	 * the first step, it won't have an address and we ignore it, it's not
-	 * part of the checkpoint.
-	 */
-	mod = ref->page->modify;
-	if (mod != NULL && mod->rec_result != 0)
-		*statep = WT_CHILD_MODIFIED;
-	else if (ref->addr == NULL) {
-		*statep = WT_CHILD_IGNORE;
-		WT_CHILD_RELEASE(session, *hazardp, ref);
-	}
-
-done:	WT_DIAGNOSTIC_YIELD;
-	return (ret);
-}
-
-/*
- * __rec_incr --
- *	Update the memory tracking structure for a set of new entries.
- */
-static inline void
-__rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size)
-{
-	/*
-	 * The buffer code is fragile and prone to off-by-one errors -- check
-	 * for overflow in diagnostic mode.
-	 */
-	WT_ASSERT(session, r->space_avail >= size);
-	WT_ASSERT(session, WT_BLOCK_FITS(r->first_free, size,
-	    r->cur_ptr->image.mem, r->cur_ptr->image.memsize));
-
-	r->entries += v;
-	r->space_avail -= size;
-	r->first_free += size;
-
-	/*
-	 * If offset for the minimum split size boundary is not set, we have not
-	 * yet reached the minimum boundary, reduce the space available for it.
-	 */
-	if (r->cur_ptr->min_offset == 0) {
-		if (r->min_space_avail >= size)
-			r->min_space_avail -= size;
-		else
-			r->min_space_avail = 0;
-	}
-}
-
-/*
- * __rec_copy_incr --
- *	Copy a key/value cell and buffer pair into the new image.
- */
-static inline void
-__rec_copy_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *kv)
-{
-	size_t len;
-	uint8_t *p, *t;
-
-	/*
-	 * If there's only one chunk of data to copy (because the cell and data
-	 * are being copied from the original disk page), the cell length won't
-	 * be set, the WT_ITEM data/length will reference the data to be copied.
-	 *
-	 * WT_CELLs are typically small, 1 or 2 bytes -- don't call memcpy, do
-	 * the copy in-line.
-	 */
-	for (p = r->first_free,
-	    t = (uint8_t *)&kv->cell, len = kv->cell_len; len > 0; --len)
-		*p++ = *t++;
-
-	/* The data can be quite large -- call memcpy. */
-	if (kv->buf.size != 0)
-		memcpy(p, kv->buf.data, kv->buf.size);
-
-	WT_ASSERT(session, kv->len == kv->cell_len + kv->buf.size);
-	__rec_incr(session, r, 1, kv->len);
-}
-
-/*
- * __rec_dict_replace --
- *	Check for a dictionary match.
- */
-static int
-__rec_dict_replace(
-    WT_SESSION_IMPL *session, WT_RECONCILE *r, uint64_t rle, WT_KV *val)
-{
-	WT_DICTIONARY *dp;
-	uint64_t offset;
-
-	/*
-	 * We optionally create a dictionary of values and only write a unique
-	 * value once per page, using a special "copy" cell for all subsequent
-	 * copies of the value.  We have to do the cell build and resolution at
-	 * this low level because we need physical cell offsets for the page.
-	 *
-	 * Sanity check: short-data cells can be smaller than dictionary-copy
-	 * cells.  If the data is already small, don't bother doing the work.
-	 * This isn't just work avoidance: on-page cells can't grow as a result
-	 * of writing a dictionary-copy cell, the reconciliation functions do a
-	 * split-boundary test based on the size required by the value's cell;
-	 * if we grow the cell after that test we'll potentially write off the
-	 * end of the buffer's memory.
-	 */
-	if (val->buf.size <= WT_INTPACK32_MAXSIZE)
-		return (0);
-	WT_RET(__rec_dictionary_lookup(session, r, val, &dp));
-	if (dp == NULL)
-		return (0);
-
-	/*
-	 * If the dictionary offset isn't set, we're creating a new entry in the
-	 * dictionary, set its location.
-	 *
-	 * If the dictionary offset is set, we have a matching value. Create a
-	 * copy cell instead.
-	 */
-	if (dp->offset == 0)
-		dp->offset = WT_PTRDIFF32(r->first_free, r->cur_ptr->image.mem);
-	else {
-		/*
-		 * The offset is the byte offset from this cell to the previous,
-		 * matching cell, NOT the byte offset from the beginning of the
-		 * page.
-		 */
-		offset = (uint64_t)WT_PTRDIFF(r->first_free,
-		    (uint8_t *)r->cur_ptr->image.mem + dp->offset);
-		val->len = val->cell_len =
-		    __wt_cell_pack_copy(&val->cell, rle, offset);
-		val->buf.data = NULL;
-		val->buf.size = 0;
-	}
-	return (0);
-}
-
-/*
- * __rec_key_state_update --
- *	Update prefix and suffix compression based on the last key.
- */
-static inline void
-__rec_key_state_update(WT_RECONCILE *r, bool ovfl_key)
-{
-	WT_ITEM *a;
-
-	/*
-	 * If writing an overflow key onto the page, don't update the "last key"
-	 * value, and leave the state of prefix compression alone.  (If we are
-	 * currently doing prefix compression, we have a key state which will
-	 * continue to work, we're just skipping the key just created because
-	 * it's an overflow key and doesn't participate in prefix compression.
-	 * If we are not currently doing prefix compression, we can't start, an
-	 * overflow key doesn't give us any state.)
-	 *
-	 * Additionally, if we wrote an overflow key onto the page, turn off the
-	 * suffix compression of row-store internal node keys.  (When we split,
-	 * "last key" is the largest key on the previous page, and "cur key" is
-	 * the first key on the next page, which is being promoted.  In some
-	 * cases we can discard bytes from the "cur key" that are not needed to
-	 * distinguish between the "last key" and "cur key", compressing the
-	 * size of keys on internal nodes.  If we just built an overflow key,
-	 * we're not going to update the "last key", making suffix compression
-	 * impossible for the next key. Alternatively, we could remember where
-	 * the last key was on the page, detect it's an overflow key, read it
-	 * from disk and do suffix compression, but that's too much work for an
-	 * unlikely event.)
-	 *
-	 * If we're not writing an overflow key on the page, update the last-key
-	 * value and turn on both prefix and suffix compression.
-	 */
-	if (ovfl_key)
-		r->key_sfx_compress = false;
-	else {
-		a = r->cur;
-		r->cur = r->last;
-		r->last = a;
-
-		r->key_pfx_compress = r->key_pfx_compress_conf;
-		r->key_sfx_compress = r->key_sfx_compress_conf;
-	}
-}
-
-/*
- * Macros from fixed-length entries to/from bytes.
- */
-#define	WT_FIX_BYTES_TO_ENTRIES(btree, bytes)				\
-    ((uint32_t)((((bytes) * 8) / (btree)->bitcnt)))
-#define	WT_FIX_ENTRIES_TO_BYTES(btree, entries)				\
-	((uint32_t)WT_ALIGN((entries) * (btree)->bitcnt, 8))
-
-/*
  * __rec_leaf_page_max --
  *	Figure out the maximum leaf page size for the reconciliation.
  */
@@ -2057,35 +860,6 @@ __rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 }
 
 /*
- * __rec_need_split --
- *	Check whether adding some bytes to the page requires a split.
- */
-static bool
-__rec_need_split(WT_RECONCILE *r, size_t len)
-{
-	/*
-	 * In the case of a row-store leaf page, trigger a split if a threshold
-	 * number of saved updates is reached. This allows pages to split for
-	 * update/restore and lookaside eviction when there is no visible data
-	 * causing the disk image to grow.
-	 *
-	 * In the case of small pages or large keys, we might try to split when
-	 * a page has no updates or entries, which isn't possible. To consider
-	 * update/restore or lookaside information, require either page entries
-	 * or updates that will be attached to the image. The limit is one of
-	 * either, but it doesn't make sense to create pages or images with few
-	 * entries or updates, even where page sizes are small (especially as
-	 * updates that will eventually become overflow items can throw off our
-	 * calculations). Bound the combination at something reasonable.
-	 */
-	if (r->page->type == WT_PAGE_ROW_LEAF && r->entries + r->supd_next > 10)
-		len += r->supd_memsize;
-
-	/* Check for the disk image crossing a boundary. */
-	return (WT_CHECK_CROSSING_BND(r, len));
-}
-
-/*
  * __wt_split_page_size --
  *	Given a split percentage, calculate split page size in bytes.
  */
@@ -2123,8 +897,8 @@ __wt_split_page_size(int split_pct, uint32_t maxpagesize, uint32_t allocsize)
  *	Initialize a single chunk structure.
  */
 static int
-__rec_split_chunk_init(
-    WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_CHUNK *chunk, size_t memsize)
+__rec_split_chunk_init(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REC_CHUNK *chunk, size_t memsize)
 {
 	chunk->min_recno = WT_RECNO_OOB;
 	chunk->min_entries = 0;
@@ -2158,16 +932,16 @@ __rec_split_chunk_init(
 }
 
 /*
- * __rec_split_init --
+ * __wt_rec_split_init --
  *	Initialization for the reconciliation split functions.
  */
-static int
-__rec_split_init(WT_SESSION_IMPL *session,
+int
+__wt_rec_split_init(WT_SESSION_IMPL *session,
     WT_RECONCILE *r, WT_PAGE *page, uint64_t recno, uint64_t max)
 {
 	WT_BM *bm;
 	WT_BTREE *btree;
-	WT_CHUNK *chunk;
+	WT_REC_CHUNK *chunk;
 	WT_REF *ref;
 	size_t corrected_page_size, disk_img_buf_size;
 
@@ -2463,21 +1237,21 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len)
 }
 
 /*
- * __rec_split --
+ * __wt_rec_split --
  *	Handle the page reconciliation bookkeeping.  (Did you know "bookkeeper"
  *	has 3 doubled letters in a row?  Sweet-tooth does, too.)
  */
-static int
-__rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
+int
+__wt_rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
 {
 	WT_BTREE *btree;
-	WT_CHUNK *tmp;
+	WT_REC_CHUNK *tmp;
 	size_t inuse;
 
 	btree = S2BT(session);
 
 	/* Fixed length col store can call with next_len 0 */
-	WT_ASSERT(session, next_len == 0 || __rec_need_split(r, next_len));
+	WT_ASSERT(session, next_len == 0 || __wt_rec_need_split(r, next_len));
 
 	/*
 	 * We should never split during salvage, and we're about to drop core
@@ -2495,11 +1269,11 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
 	 * Additionally, grow the buffer to contain the current item if we
 	 * haven't already consumed a reasonable portion of a split chunk.
 	 */
-	if (inuse < r->split_size / 2 && !__rec_need_split(r, 0))
+	if (inuse < r->split_size / 2 && !__wt_rec_need_split(r, 0))
 		goto done;
 
 	/* All page boundaries reset the dictionary. */
-	__rec_dictionary_reset(r);
+	__wt_rec_dictionary_reset(r);
 
 	/* Set the number of entries and size for the just finished chunk. */
 	r->cur_ptr->entries = r->entries;
@@ -2567,18 +1341,18 @@ done:  	/*
 }
 
 /*
- * __rec_split_crossing_bnd --
+ * __wt_rec_split_crossing_bnd --
  * 	Save the details for the minimum split size boundary or call for a
  * 	split.
  */
-static inline int
-__rec_split_crossing_bnd(
+int
+__wt_rec_split_crossing_bnd(
     WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
 {
 	WT_BTREE *btree;
 	size_t min_offset;
 
-	WT_ASSERT(session, __rec_need_split(r, next_len));
+	WT_ASSERT(session, __wt_rec_need_split(r, next_len));
 
 	/*
 	 * If crossing the minimum split size boundary, store the boundary
@@ -2587,7 +1361,7 @@ __rec_split_crossing_bnd(
 	 * large enough, just split at this point.
 	 */
 	if (WT_CROSSING_MIN_BND(r, next_len) &&
-	    !WT_CROSSING_SPLIT_BND(r, next_len) && !__rec_need_split(r, 0)) {
+	    !WT_CROSSING_SPLIT_BND(r, next_len) && !__wt_rec_need_split(r, 0)) {
 		btree = S2BT(session);
 		WT_ASSERT(session, r->cur_ptr->min_offset == 0);
 
@@ -2609,13 +1383,13 @@ __rec_split_crossing_bnd(
 			    session, r, &r->cur_ptr->min_key, r->page->type));
 
 		/* All page boundaries reset the dictionary. */
-		__rec_dictionary_reset(r);
+		__wt_rec_dictionary_reset(r);
 
 		return (0);
 	}
 
 	/* We are crossing a split boundary */
-	return (__rec_split(session, r, next_len));
+	return (__wt_rec_split(session, r, next_len));
 }
 
 /*
@@ -2632,8 +1406,8 @@ static int
 __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 {
 	WT_BTREE *btree;
-	WT_CHUNK *cur_ptr, *prev_ptr, *tmp;
 	WT_PAGE_HEADER *dsk;
+	WT_REC_CHUNK *cur_ptr, *prev_ptr, *tmp;
 	size_t combined_size, len_to_move;
 	uint8_t *cur_dsk_start;
 
@@ -2714,11 +1488,11 @@ __rec_split_finish_process_prev(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 }
 
 /*
- * __rec_split_finish --
+ * __wt_rec_split_finish --
  *	Finish processing a page.
  */
-static int
-__rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+int
+__wt_rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 {
 	/*
 	 * We're done reconciling, write the final page. We may arrive here with
@@ -2771,13 +1545,13 @@ __rec_supd_move(
  */
 static int
 __rec_split_write_supd(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_CHUNK *chunk, WT_MULTI *multi, bool last_block)
+    WT_RECONCILE *r, WT_REC_CHUNK *chunk, WT_MULTI *multi, bool last_block)
 {
 	WT_BTREE *btree;
-	WT_CHUNK *next;
 	WT_DECL_ITEM(key);
 	WT_DECL_RET;
 	WT_PAGE *page;
+	WT_REC_CHUNK *next;
 	WT_SAVE_UPD *supd;
 	WT_UPDATE *upd;
 	uint32_t i, j;
@@ -2876,7 +1650,7 @@ err:	__wt_scr_free(session, &key);
  */
 static void
 __rec_split_write_header(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_CHUNK *chunk, WT_MULTI *multi, WT_PAGE_HEADER *dsk)
+    WT_RECONCILE *r, WT_REC_CHUNK *chunk, WT_MULTI *multi, WT_PAGE_HEADER *dsk)
 {
 	WT_BTREE *btree;
 	WT_PAGE *page;
@@ -3088,7 +1862,7 @@ __rec_compression_adjust(WT_SESSION_IMPL *session,
  */
 static int
 __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
-    WT_CHUNK *chunk, WT_ITEM *compressed_image, bool last_block)
+    WT_REC_CHUNK *chunk, WT_ITEM *compressed_image, bool last_block)
 {
 	WT_BTREE *btree;
 	WT_MULTI *multi;
@@ -3304,7 +2078,7 @@ __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 
 	recno = btree->type == BTREE_ROW ? WT_RECNO_OOB : 1;
 
-	return (__rec_split_init(session,
+	return (__wt_rec_split_init(session,
 	    r, cbulk->leaf, recno, btree->maxleafpage_precomp));
 }
 
@@ -3326,7 +2100,7 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 	switch (btree->type) {
 	case BTREE_COL_FIX:
 		if (cbulk->entry != 0)
-			__rec_incr(session, r, cbulk->entry,
+			__wt_rec_incr(session, r, cbulk->entry,
 			    __bitstr_size(
 			    (size_t)cbulk->entry * btree->bitcnt));
 		break;
@@ -3338,7 +2112,7 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 		break;
 	}
 
-	WT_RET(__rec_split_finish(session, r));
+	WT_RET(__wt_rec_split_finish(session, r));
 	WT_RET(__rec_write_wrapup(session, r, r->page));
 	__rec_write_page_status(session, r);
 
@@ -3354,1912 +2128,6 @@ __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 }
 
 /*
- * __wt_bulk_insert_row --
- *	Row-store bulk insert.
- */
-int
-__wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
-{
-	WT_BTREE *btree;
-	WT_CURSOR *cursor;
-	WT_KV *key, *val;
-	WT_RECONCILE *r;
-	bool ovfl_key;
-
-	r = cbulk->reconcile;
-	btree = S2BT(session);
-	cursor = &cbulk->cbt.iface;
-
-	key = &r->k;
-	val = &r->v;
-	WT_RET(__rec_cell_build_leaf_key(session, r,	/* Build key cell */
-	    cursor->key.data, cursor->key.size, &ovfl_key));
-	WT_RET(__rec_cell_build_val(session, r,		/* Build value cell */
-	    cursor->value.data, cursor->value.size, (uint64_t)0));
-
-	/* Boundary: split or write the page. */
-	if (WT_CROSSING_SPLIT_BND(r, key->len + val->len)) {
-		/*
-		 * Turn off prefix compression until a full key written to the
-		 * new page, and (unless already working with an overflow key),
-		 * rebuild the key without compression.
-		 */
-		if (r->key_pfx_compress_conf) {
-			r->key_pfx_compress = false;
-			if (!ovfl_key)
-				WT_RET(__rec_cell_build_leaf_key(
-				    session, r, NULL, 0, &ovfl_key));
-		}
-		WT_RET(__rec_split_crossing_bnd(
-		    session, r, key->len + val->len));
-	}
-
-	/* Copy the key/value pair onto the page. */
-	__rec_copy_incr(session, r, key);
-	if (val->len == 0)
-		r->any_empty_value = true;
-	else {
-		r->all_empty_value = false;
-		if (btree->dictionary)
-			WT_RET(__rec_dict_replace(session, r, 0, val));
-		__rec_copy_incr(session, r, val);
-	}
-
-	/* Update compression state. */
-	__rec_key_state_update(r, ovfl_key);
-
-	return (0);
-}
-
-/*
- * __rec_col_fix_bulk_insert_split_check --
- *	Check if a bulk-loaded fixed-length column store page needs to split.
- */
-static inline int
-__rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk)
-{
-	WT_BTREE *btree;
-	WT_RECONCILE *r;
-	WT_SESSION_IMPL *session;
-
-	session = (WT_SESSION_IMPL *)cbulk->cbt.iface.session;
-	r = cbulk->reconcile;
-	btree = S2BT(session);
-
-	if (cbulk->entry == cbulk->nrecs) {
-		if (cbulk->entry != 0) {
-			/*
-			 * If everything didn't fit, update the counters and
-			 * split.
-			 *
-			 * Boundary: split or write the page.
-			 *
-			 * No need to have a minimum split size boundary, all
-			 * pages are filled 100% except the last, allowing it to
-			 * grow in the future.
-			 */
-			__rec_incr(session, r, cbulk->entry,
-			    __bitstr_size(
-			    (size_t)cbulk->entry * btree->bitcnt));
-			WT_RET(__rec_split(session, r, 0));
-		}
-		cbulk->entry = 0;
-		cbulk->nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
-	}
-	return (0);
-}
-
-/*
- * __wt_bulk_insert_fix --
- *	Fixed-length column-store bulk insert.
- */
-int
-__wt_bulk_insert_fix(
-    WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
-{
-	WT_BTREE *btree;
-	WT_CURSOR *cursor;
-	WT_RECONCILE *r;
-
-	r = cbulk->reconcile;
-	btree = S2BT(session);
-	cursor = &cbulk->cbt.iface;
-
-	WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
-	__bit_setv(r->first_free, cbulk->entry,
-	    btree->bitcnt, deleted ? 0 : ((uint8_t *)cursor->value.data)[0]);
-	++cbulk->entry;
-	++r->recno;
-
-	return (0);
-}
-
-/*
- * __wt_bulk_insert_fix_bitmap --
- *	Fixed-length column-store bulk insert.
- */
-int
-__wt_bulk_insert_fix_bitmap(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
-{
-	WT_BTREE *btree;
-	WT_CURSOR *cursor;
-	WT_RECONCILE *r;
-	uint32_t entries, offset, page_entries, page_size;
-	const uint8_t *data;
-
-	r = cbulk->reconcile;
-	btree = S2BT(session);
-	cursor = &cbulk->cbt.iface;
-
-	if (((r->recno - 1) * btree->bitcnt) & 0x7)
-		WT_RET_MSG(session, EINVAL,
-		    "Bulk bitmap load not aligned on a byte boundary");
-	for (data = cursor->value.data,
-	    entries = (uint32_t)cursor->value.size;
-	    entries > 0;
-	    entries -= page_entries, data += page_size) {
-		WT_RET(__rec_col_fix_bulk_insert_split_check(cbulk));
-
-		page_entries = WT_MIN(entries, cbulk->nrecs - cbulk->entry);
-		page_size = __bitstr_size(page_entries * btree->bitcnt);
-		offset = __bitstr_size(cbulk->entry * btree->bitcnt);
-		memcpy(r->first_free + offset, data, page_size);
-		cbulk->entry += page_entries;
-		r->recno += page_entries;
-	}
-	return (0);
-}
-
-/*
- * __wt_bulk_insert_var --
- *	Variable-length column-store bulk insert.
- */
-int
-__wt_bulk_insert_var(
-    WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk, bool deleted)
-{
-	WT_BTREE *btree;
-	WT_KV *val;
-	WT_RECONCILE *r;
-
-	r = cbulk->reconcile;
-	btree = S2BT(session);
-
-	val = &r->v;
-	if (deleted) {
-		val->cell_len = __wt_cell_pack_del(&val->cell, cbulk->rle);
-		val->buf.data = NULL;
-		val->buf.size = 0;
-		val->len = val->cell_len;
-	} else
-		/*
-		 * Store the bulk cursor's last buffer, not the current value,
-		 * we're tracking duplicates, which means we want the previous
-		 * value seen, not the current value.
-		 */
-		WT_RET(__rec_cell_build_val(session,
-		    r, cbulk->last.data, cbulk->last.size, cbulk->rle));
-
-	/* Boundary: split or write the page. */
-	if (WT_CROSSING_SPLIT_BND(r, val->len))
-		WT_RET(__rec_split_crossing_bnd(session, r, val->len));
-
-	/* Copy the value onto the page. */
-	if (btree->dictionary)
-		WT_RET(__rec_dict_replace(session, r, cbulk->rle, val));
-	__rec_copy_incr(session, r, val);
-
-	/* Update the starting record number in case we split. */
-	r->recno += cbulk->rle;
-
-	return (0);
-}
-
-/*
- * __rec_vtype --
- *	Return a value cell's address type.
- */
-static inline u_int
-__rec_vtype(WT_ADDR *addr)
-{
-	if (addr->type == WT_ADDR_INT)
-		return (WT_CELL_ADDR_INT);
-	if (addr->type == WT_ADDR_LEAF)
-		return (WT_CELL_ADDR_LEAF);
-	return (WT_CELL_ADDR_LEAF_NO);
-}
-
-/*
- * __rec_col_int --
- *	Reconcile a column-store internal page.
- */
-static int
-__rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
-{
-	WT_ADDR *addr;
-	WT_BTREE *btree;
-	WT_CELL_UNPACK *vpack, _vpack;
-	WT_CHILD_STATE state;
-	WT_DECL_RET;
-	WT_KV *val;
-	WT_PAGE *child, *page;
-	WT_REF *ref;
-	bool hazard;
-
-	btree = S2BT(session);
-	page = pageref->page;
-	child = NULL;
-	hazard = false;
-
-	val = &r->v;
-	vpack = &_vpack;
-
-	WT_RET(__rec_split_init(session,
-	    r, page, pageref->ref_recno, btree->maxintlpage_precomp));
-
-	/* For each entry in the in-memory page... */
-	WT_INTL_FOREACH_BEGIN(session, page, ref) {
-		/* Update the starting record number in case we split. */
-		r->recno = ref->ref_recno;
-
-		/*
-		 * Modified child.
-		 * The page may be emptied or internally created during a split.
-		 * Deleted/split pages are merged into the parent and discarded.
-		 */
-		WT_ERR(__rec_child_modify(session, r, ref, &hazard, &state));
-		addr = NULL;
-		child = ref->page;
-
-		switch (state) {
-		case WT_CHILD_IGNORE:
-			/* Ignored child. */
-			WT_CHILD_RELEASE_ERR(session, hazard, ref);
-			continue;
-
-		case WT_CHILD_MODIFIED:
-			/*
-			 * Modified child. Empty pages are merged into the
-			 * parent and discarded.
-			 */
-			switch (child->modify->rec_result) {
-			case WT_PM_REC_EMPTY:
-				/*
-				 * Column-store pages are almost never empty, as
-				 * discarding a page would remove a chunk of the
-				 * name space.  The exceptions are pages created
-				 * when the tree is created, and never filled.
-				 */
-				WT_CHILD_RELEASE_ERR(session, hazard, ref);
-				continue;
-			case WT_PM_REC_MULTIBLOCK:
-				WT_ERR(__rec_col_merge(session, r, child));
-				WT_CHILD_RELEASE_ERR(session, hazard, ref);
-				continue;
-			case WT_PM_REC_REPLACE:
-				addr = &child->modify->mod_replace;
-				break;
-			WT_ILLEGAL_VALUE_ERR(
-			    session, child->modify->rec_result);
-			}
-			break;
-		case WT_CHILD_ORIGINAL:
-			/* Original child. */
-			break;
-		case WT_CHILD_PROXY:
-			/*
-			 * Deleted child where we write a proxy cell, not yet
-			 * supported for column-store.
-			 */
-			WT_ERR(__wt_illegal_value(session, state));
-		}
-
-		/*
-		 * Build the value cell.  The child page address is in one of 3
-		 * places: if the page was replaced, the page's modify structure
-		 * references it and we built the value cell just above in the
-		 * switch statement.  Else, the WT_REF->addr reference points to
-		 * an on-page cell or an off-page WT_ADDR structure: if it's an
-		 * on-page cell and we copy it from the page, else build a new
-		 * cell.
-		 */
-		if (addr == NULL && __wt_off_page(page, ref->addr))
-			addr = ref->addr;
-		if (addr == NULL) {
-			__wt_cell_unpack(ref->addr, vpack);
-			val->buf.data = ref->addr;
-			val->buf.size = __wt_cell_total_len(vpack);
-			val->cell_len = 0;
-			val->len = val->buf.size;
-		} else
-			__rec_cell_build_addr(session, r,
-			    addr->addr, addr->size,
-			    __rec_vtype(addr), ref->ref_recno);
-		WT_CHILD_RELEASE_ERR(session, hazard, ref);
-
-		/* Boundary: split or write the page. */
-		if (__rec_need_split(r, val->len))
-			WT_ERR(__rec_split_crossing_bnd(session, r, val->len));
-
-		/* Copy the value onto the page. */
-		__rec_copy_incr(session, r, val);
-	} WT_INTL_FOREACH_END;
-
-	/* Write the remnant page. */
-	return (__rec_split_finish(session, r));
-
-err:	WT_CHILD_RELEASE(session, hazard, ref);
-	return (ret);
-}
-
-/*
- * __rec_col_merge --
- *	Merge in a split page.
- */
-static int
-__rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
-{
-	WT_ADDR *addr;
-	WT_KV *val;
-	WT_MULTI *multi;
-	WT_PAGE_MODIFY *mod;
-	uint32_t i;
-
-	mod = page->modify;
-
-	val = &r->v;
-
-	/* For each entry in the split array... */
-	for (multi = mod->mod_multi,
-	    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
-		/* Update the starting record number in case we split. */
-		r->recno = multi->key.recno;
-
-		/* Build the value cell. */
-		addr = &multi->addr;
-		__rec_cell_build_addr(session, r,
-		    addr->addr, addr->size, __rec_vtype(addr), r->recno);
-
-		/* Boundary: split or write the page. */
-		if (__rec_need_split(r, val->len))
-			WT_RET(__rec_split_crossing_bnd(session, r, val->len));
-
-		/* Copy the value onto the page. */
-		__rec_copy_incr(session, r, val);
-	}
-	return (0);
-}
-
-/*
- * __rec_col_fix --
- *	Reconcile a fixed-width, column-store leaf page.
- */
-static int
-__rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_REF *pageref)
-{
-	WT_BTREE *btree;
-	WT_INSERT *ins;
-	WT_PAGE *page;
-	WT_UPDATE *upd;
-	uint64_t recno;
-	uint32_t entry, nrecs;
-
-	btree = S2BT(session);
-	page = pageref->page;
-
-	WT_RET(__rec_split_init(
-	    session, r, page, pageref->ref_recno, btree->maxleafpage));
-
-	/* Copy the original, disk-image bytes into place. */
-	memcpy(r->first_free, page->pg_fix_bitf,
-	    __bitstr_size((size_t)page->entries * btree->bitcnt));
-
-	/* Update any changes to the original on-page data items. */
-	WT_SKIP_FOREACH(ins, WT_COL_UPDATE_SINGLE(page)) {
-		WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, NULL, &upd));
-		if (upd != NULL)
-			__bit_setv(r->first_free,
-			    WT_INSERT_RECNO(ins) - pageref->ref_recno,
-			    btree->bitcnt, *upd->data);
-	}
-
-	/* Calculate the number of entries per page remainder. */
-	entry = page->entries;
-	nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail) - page->entries;
-	r->recno += entry;
-
-	/* Walk any append list. */
-	for (ins =
-	    WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
-		if (ins == NULL) {
-			/*
-			 * If the page split, instantiate any missing records in
-			 * the page's name space. (Imagine record 98 is
-			 * transactionally visible, 99 wasn't created or is not
-			 * yet visible, 100 is visible. Then the page splits and
-			 * record 100 moves to another page. When we reconcile
-			 * the original page, we write record 98, then we don't
-			 * see record 99 for whatever reason. If we've moved
-			 * record 100, we don't know to write a deleted record
-			 * 99 on the page.)
-			 *
-			 * The record number recorded during the split is the
-			 * first key on the split page, that is, one larger than
-			 * the last key on this page, we have to decrement it.
-			 */
-			if ((recno =
-			    page->modify->mod_col_split_recno) == WT_RECNO_OOB)
-				break;
-			recno -= 1;
-
-			/*
-			 * The following loop assumes records to write, and the
-			 * previous key might have been visible.
-			 */
-			if (r->recno > recno)
-				break;
-			upd = NULL;
-		} else {
-			WT_RET(__rec_txn_read(
-			    session, r, ins, NULL, NULL, NULL, &upd));
-			recno = WT_INSERT_RECNO(ins);
-		}
-		for (;;) {
-			/*
-			 * The application may have inserted records which left
-			 * gaps in the name space.
-			 */
-			for (;
-			    nrecs > 0 && r->recno < recno;
-			    --nrecs, ++entry, ++r->recno)
-				__bit_setv(
-				    r->first_free, entry, btree->bitcnt, 0);
-
-			if (nrecs > 0) {
-				__bit_setv(r->first_free, entry, btree->bitcnt,
-				    upd == NULL ? 0 : *upd->data);
-				--nrecs;
-				++entry;
-				++r->recno;
-				break;
-			}
-
-			/*
-			 * If everything didn't fit, update the counters and
-			 * split.
-			 *
-			 * Boundary: split or write the page.
-			 *
-			 * No need to have a minimum split size boundary, all
-			 * pages are filled 100% except the last, allowing it to
-			 * grow in the future.
-			 */
-			__rec_incr(session, r, entry,
-			    __bitstr_size((size_t)entry * btree->bitcnt));
-			WT_RET(__rec_split(session, r, 0));
-
-			/* Calculate the number of entries per page. */
-			entry = 0;
-			nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
-		}
-
-		/*
-		 * Execute this loop once without an insert item to catch any
-		 * missing records due to a split, then quit.
-		 */
-		if (ins == NULL)
-			break;
-	}
-
-	/* Update the counters. */
-	__rec_incr(
-	    session, r, entry, __bitstr_size((size_t)entry * btree->bitcnt));
-
-	/* Write the remnant page. */
-	return (__rec_split_finish(session, r));
-}
-
-/*
- * __rec_col_fix_slvg --
- *	Reconcile a fixed-width, column-store leaf page created during salvage.
- */
-static int
-__rec_col_fix_slvg(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
-{
-	WT_BTREE *btree;
-	WT_PAGE *page;
-	uint64_t page_start, page_take;
-	uint32_t entry, nrecs;
-
-	btree = S2BT(session);
-	page = pageref->page;
-
-	/*
-	 * !!!
-	 * It's vanishingly unlikely and probably impossible for fixed-length
-	 * column-store files to have overlapping key ranges.  It's possible
-	 * for an entire key range to go missing (if a page is corrupted and
-	 * lost), but because pages can't split, it shouldn't be possible to
-	 * find pages where the key ranges overlap.  That said, we check for
-	 * it during salvage and clean up after it here because it doesn't
-	 * cost much and future column-store formats or operations might allow
-	 * for fixed-length format ranges to overlap during salvage, and I
-	 * don't want to have to retrofit the code later.
-	 */
-	WT_RET(__rec_split_init(
-	    session, r, page, pageref->ref_recno, btree->maxleafpage));
-
-	/* We may not be taking all of the entries on the original page. */
-	page_take = salvage->take == 0 ? page->entries : salvage->take;
-	page_start = salvage->skip == 0 ? 0 : salvage->skip;
-
-	/* Calculate the number of entries per page. */
-	entry = 0;
-	nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
-
-	for (; nrecs > 0 && salvage->missing > 0;
-	    --nrecs, --salvage->missing, ++entry)
-		__bit_setv(r->first_free, entry, btree->bitcnt, 0);
-
-	for (; nrecs > 0 && page_take > 0;
-	    --nrecs, --page_take, ++page_start, ++entry)
-		__bit_setv(r->first_free, entry, btree->bitcnt,
-		    __bit_getv(page->pg_fix_bitf,
-			(uint32_t)page_start, btree->bitcnt));
-
-	r->recno += entry;
-	__rec_incr(session, r, entry,
-	    __bitstr_size((size_t)entry * btree->bitcnt));
-
-	/*
-	 * We can't split during salvage -- if everything didn't fit, it's
-	 * all gone wrong.
-	 */
-	if (salvage->missing != 0 || page_take != 0)
-		WT_PANIC_RET(session, WT_PANIC,
-		    "%s page too large, attempted split during salvage",
-		    __wt_page_type_string(page->type));
-
-	/* Write the page. */
-	return (__rec_split_finish(session, r));
-}
-
-/*
- * __rec_col_var_helper --
- *	Create a column-store variable length record cell and write it onto a
- *	page.
- */
-static int
-__rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
-    WT_SALVAGE_COOKIE *salvage,
-    WT_ITEM *value, bool deleted, uint8_t overflow_type, uint64_t rle)
-{
-	WT_BTREE *btree;
-	WT_KV *val;
-
-	btree = S2BT(session);
-
-	val = &r->v;
-
-	/*
-	 * Occasionally, salvage needs to discard records from the beginning or
-	 * end of the page, and because the items may be part of a RLE cell, do
-	 * the adjustments here. It's not a mistake we don't bother telling
-	 * our caller we've handled all the records from the page we care about,
-	 * and can quit processing the page: salvage is a rare operation and I
-	 * don't want to complicate our caller's loop.
-	 */
-	if (salvage != NULL) {
-		if (salvage->done)
-			return (0);
-		if (salvage->skip != 0) {
-			if (rle <= salvage->skip) {
-				salvage->skip -= rle;
-				return (0);
-			}
-			rle -= salvage->skip;
-			salvage->skip = 0;
-		}
-		if (salvage->take != 0) {
-			if (rle <= salvage->take)
-				salvage->take -= rle;
-			else {
-				rle = salvage->take;
-				salvage->take = 0;
-			}
-			if (salvage->take == 0)
-				salvage->done = true;
-		}
-	}
-
-	if (deleted) {
-		val->cell_len = __wt_cell_pack_del(&val->cell, rle);
-		val->buf.data = NULL;
-		val->buf.size = 0;
-		val->len = val->cell_len;
-	} else if (overflow_type) {
-		val->cell_len = __wt_cell_pack_ovfl(
-		    &val->cell, overflow_type, rle, value->size);
-		val->buf.data = value->data;
-		val->buf.size = value->size;
-		val->len = val->cell_len + value->size;
-	} else
-		WT_RET(__rec_cell_build_val(
-		    session, r, value->data, value->size, rle));
-
-	/* Boundary: split or write the page. */
-	if (__rec_need_split(r, val->len))
-		WT_RET(__rec_split_crossing_bnd(session, r, val->len));
-
-	/* Copy the value onto the page. */
-	if (!deleted && !overflow_type && btree->dictionary)
-		WT_RET(__rec_dict_replace(session, r, rle, val));
-	__rec_copy_incr(session, r, val);
-
-	/* Update the starting record number in case we split. */
-	r->recno += rle;
-
-	return (0);
-}
-
-/*
- * __rec_col_var --
- *	Reconcile a variable-width column-store leaf page.
- */
-static int
-__rec_col_var(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_REF *pageref, WT_SALVAGE_COOKIE *salvage)
-{
-	enum { OVFL_IGNORE, OVFL_UNUSED, OVFL_USED } ovfl_state;
-	WT_BTREE *btree;
-	WT_CELL *cell;
-	WT_CELL_UNPACK *vpack, _vpack;
-	WT_COL *cip;
-	WT_CURSOR_BTREE *cbt;
-	WT_DECL_ITEM(orig);
-	WT_DECL_RET;
-	WT_INSERT *ins;
-	WT_ITEM *last;
-	WT_PAGE *page;
-	WT_UPDATE *upd;
-	uint64_t n, nrepeat, repeat_count, rle, skip, src_recno;
-	uint32_t i, size;
-	bool deleted, last_deleted, orig_deleted, update_no_copy;
-	const void *data;
-
-	btree = S2BT(session);
-	page = pageref->page;
-	last = r->last;
-	vpack = &_vpack;
-	cbt = &r->update_modify_cbt;
-
-	WT_RET(__rec_split_init(session,
-	    r, page, pageref->ref_recno, btree->maxleafpage_precomp));
-
-	WT_RET(__wt_scr_alloc(session, 0, &orig));
-	data = NULL;
-	size = 0;
-	upd = NULL;
-
-	/*
-	 * The salvage code may be calling us to reconcile a page where there
-	 * were missing records in the column-store name space.  If taking the
-	 * first record from on the page, it might be a deleted record, so we
-	 * have to give the RLE code a chance to figure that out.  Else, if
-	 * not taking the first record from the page, write a single element
-	 * representing the missing records onto a new page.  (Don't pass the
-	 * salvage cookie to our helper function in this case, we're handling
-	 * one of the salvage cookie fields on our own, and we don't need the
-	 * helper function's assistance.)
-	 */
-	rle = 0;
-	last_deleted = false;
-	if (salvage != NULL && salvage->missing != 0) {
-		if (salvage->skip == 0) {
-			rle = salvage->missing;
-			last_deleted = true;
-
-			/*
-			 * Correct the number of records we're going to "take",
-			 * pretending the missing records were on the page.
-			 */
-			salvage->take += salvage->missing;
-		} else
-			WT_ERR(__rec_col_var_helper(session,
-			    r, NULL, NULL, true, false, salvage->missing));
-	}
-
-	/*
-	 * We track two data items through this loop: the previous (last) item
-	 * and the current item: if the last item is the same as the current
-	 * item, we increment the RLE count for the last item; if the last item
-	 * is different from the current item, we write the last item onto the
-	 * page, and replace it with the current item.  The r->recno counter
-	 * tracks records written to the page, and is incremented by the helper
-	 * function immediately after writing records to the page.  The record
-	 * number of our source record, that is, the current item, is maintained
-	 * in src_recno.
-	 */
-	src_recno = r->recno + rle;
-
-	/* For each entry in the in-memory page... */
-	WT_COL_FOREACH(page, cip, i) {
-		ovfl_state = OVFL_IGNORE;
-		if ((cell = WT_COL_PTR(page, cip)) == NULL) {
-			nrepeat = 1;
-			ins = NULL;
-			orig_deleted = true;
-		} else {
-			__wt_cell_unpack(cell, vpack);
-			nrepeat = __wt_cell_rle(vpack);
-			ins = WT_SKIP_FIRST(WT_COL_UPDATE(page, cip));
-
-			/*
-			 * If the original value is "deleted", there's no value
-			 * to compare, we're done.
-			 */
-			orig_deleted = vpack->type == WT_CELL_DEL;
-			if (orig_deleted)
-				goto record_loop;
-
-			/*
-			 * Overflow items are tricky: we don't know until we're
-			 * finished processing the set of values if we need the
-			 * overflow value or not.  If we don't use the overflow
-			 * item at all, we have to discard it from the backing
-			 * file, otherwise we'll leak blocks on the checkpoint.
-			 * That's safe because if the backing overflow value is
-			 * still needed by any running transaction, we'll cache
-			 * a copy in the update list.
-			 *
-			 * Regardless, we avoid copying in overflow records: if
-			 * there's a WT_INSERT entry that modifies a reference
-			 * counted overflow record, we may have to write copies
-			 * of the overflow record, and in that case we'll do the
-			 * comparisons, but we don't read overflow items just to
-			 * see if they match records on either side.
-			 */
-			if (vpack->ovfl) {
-				ovfl_state = OVFL_UNUSED;
-				goto record_loop;
-			}
-
-			/*
-			 * If data is Huffman encoded, we have to decode it in
-			 * order to compare it with the last item we saw, which
-			 * may have been an update string.  This guarantees we
-			 * find every single pair of objects we can RLE encode,
-			 * including applications updating an existing record
-			 * where the new value happens (?) to match a Huffman-
-			 * encoded value in a previous or next record.
-			 */
-			WT_ERR(__wt_dsk_cell_data_ref(
-			    session, WT_PAGE_COL_VAR, vpack, orig));
-		}
-
-record_loop:	/*
-		 * Generate on-page entries: loop repeat records, looking for
-		 * WT_INSERT entries matching the record number.  The WT_INSERT
-		 * lists are in sorted order, so only need check the next one.
-		 */
-		for (n = 0;
-		    n < nrepeat; n += repeat_count, src_recno += repeat_count) {
-			upd = NULL;
-			if (ins != NULL && WT_INSERT_RECNO(ins) == src_recno) {
-				WT_ERR(__rec_txn_read(
-				    session, r, ins, cip, vpack, NULL, &upd));
-				ins = WT_SKIP_NEXT(ins);
-			}
-
-			update_no_copy = true;	/* No data copy */
-			repeat_count = 1;	/* Single record */
-			deleted = false;
-
-			if (upd != NULL) {
-				switch (upd->type) {
-				case WT_UPDATE_MODIFY:
-					cbt->slot = WT_COL_SLOT(page, cip);
-					WT_ERR(__wt_value_return_upd(
-					    session, cbt, upd,
-					    F_ISSET(r, WT_REC_VISIBLE_ALL)));
-					data = cbt->iface.value.data;
-					size = (uint32_t)cbt->iface.value.size;
-					update_no_copy = false;
-					break;
-				case WT_UPDATE_STANDARD:
-					data = upd->data;
-					size = upd->size;
-					break;
-				case WT_UPDATE_TOMBSTONE:
-					deleted = true;
-					break;
-				WT_ILLEGAL_VALUE_ERR(session, upd->type);
-				}
-			} else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
-				/*
-				 * If doing an update save and restore, and the
-				 * underlying value is a removed overflow value,
-				 * we end up here.
-				 *
-				 * If necessary, when the overflow value was
-				 * originally removed, reconciliation appended
-				 * a globally visible copy of the value to the
-				 * key's update list, meaning the on-page item
-				 * isn't accessed after page re-instantiation.
-				 *
-				 * Assert the case.
-				 */
-				WT_ASSERT(session,
-				    F_ISSET(r, WT_REC_UPDATE_RESTORE));
-
-				/*
-				 * The on-page value will never be accessed,
-				 * write a placeholder record.
-				 */
-				data = "ovfl-unused";
-				size = WT_STORE_SIZE(strlen("ovfl-unused"));
-			} else {
-				update_no_copy = false;	/* Maybe data copy */
-
-				/*
-				 * The repeat count is the number of records up
-				 * to the next WT_INSERT record, or up to the
-				 * end of the entry if we have no more WT_INSERT
-				 * records.
-				 */
-				if (ins == NULL)
-					repeat_count = nrepeat - n;
-				else
-					repeat_count =
-					    WT_INSERT_RECNO(ins) - src_recno;
-
-				deleted = orig_deleted;
-				if (deleted)
-					goto compare;
-
-				/*
-				 * If we are handling overflow items, use the
-				 * overflow item itself exactly once, after
-				 * which we have to copy it into a buffer and
-				 * from then on use a complete copy because we
-				 * are re-creating a new overflow record each
-				 * time.
-				 */
-				switch (ovfl_state) {
-				case OVFL_UNUSED:
-					/*
-					 * An as-yet-unused overflow item.
-					 *
-					 * We're going to copy the on-page cell,
-					 * write out any record we're tracking.
-					 */
-					if (rle != 0) {
-						WT_ERR(__rec_col_var_helper(
-						    session, r, salvage, last,
-						    last_deleted, 0, rle));
-						rle = 0;
-					}
-
-					last->data = vpack->data;
-					last->size = vpack->size;
-					WT_ERR(__rec_col_var_helper(
-					    session, r, salvage, last, false,
-					    WT_CELL_VALUE_OVFL, repeat_count));
-
-					/* Track if page has overflow items. */
-					r->ovfl_items = true;
-
-					ovfl_state = OVFL_USED;
-					continue;
-				case OVFL_USED:
-					/*
-					 * Original is an overflow item; we used
-					 * it for a key and now we need another
-					 * copy; read it into memory.
-					 */
-					WT_ERR(__wt_dsk_cell_data_ref(session,
-					    WT_PAGE_COL_VAR, vpack, orig));
-
-					ovfl_state = OVFL_IGNORE;
-					/* FALLTHROUGH */
-				case OVFL_IGNORE:
-					/*
-					 * Original is an overflow item and we
-					 * were forced to copy it into memory,
-					 * or the original wasn't an overflow
-					 * item; use the data copied into orig.
-					 */
-					data = orig->data;
-					size = (uint32_t)orig->size;
-					break;
-				}
-			}
-
-compare:		/*
-			 * If we have a record against which to compare, and
-			 * the records compare equal, increment the rle counter
-			 * and continue.  If the records don't compare equal,
-			 * output the last record and swap the last and current
-			 * buffers: do NOT update the starting record number,
-			 * we've been doing that all along.
-			 */
-			if (rle != 0) {
-				if ((deleted && last_deleted) ||
-				    (!last_deleted && !deleted &&
-				    last->size == size &&
-				    memcmp(last->data, data, size) == 0)) {
-					rle += repeat_count;
-					continue;
-				}
-				WT_ERR(__rec_col_var_helper(session, r,
-				    salvage, last, last_deleted, 0, rle));
-			}
-
-			/*
-			 * Swap the current/last state.
-			 *
-			 * Reset RLE counter and turn on comparisons.
-			 */
-			if (!deleted) {
-				/*
-				 * We can't simply assign the data values into
-				 * the last buffer because they may have come
-				 * from a copy built from an encoded/overflow
-				 * cell and creating the next record is going
-				 * to overwrite that memory.  Check, because
-				 * encoded/overflow cells aren't that common
-				 * and we'd like to avoid the copy.  If data
-				 * was taken from the current unpack structure
-				 * (which points into the page), or was taken
-				 * from an update structure, we can just use
-				 * the pointers, they're not moving.
-				 */
-				if (data == vpack->data || update_no_copy) {
-					last->data = data;
-					last->size = size;
-				} else
-					WT_ERR(__wt_buf_set(
-					    session, last, data, size));
-			}
-			last_deleted = deleted;
-			rle = repeat_count;
-		}
-
-		/*
-		 * The first time we find an overflow record we never used,
-		 * discard the underlying blocks, they're no longer useful.
-		 */
-		if (ovfl_state == OVFL_UNUSED &&
-		    vpack->raw != WT_CELL_VALUE_OVFL_RM)
-			WT_ERR(__wt_ovfl_remove(
-			    session, page, vpack, F_ISSET(r, WT_REC_EVICT)));
-	}
-
-	/* Walk any append list. */
-	for (ins =
-	    WT_SKIP_FIRST(WT_COL_APPEND(page));; ins = WT_SKIP_NEXT(ins)) {
-		if (ins == NULL) {
-			/*
-			 * If the page split, instantiate any missing records in
-			 * the page's name space. (Imagine record 98 is
-			 * transactionally visible, 99 wasn't created or is not
-			 * yet visible, 100 is visible. Then the page splits and
-			 * record 100 moves to another page. When we reconcile
-			 * the original page, we write record 98, then we don't
-			 * see record 99 for whatever reason. If we've moved
-			 * record 100, we don't know to write a deleted record
-			 * 99 on the page.)
-			 *
-			 * Assert the recorded record number is past the end of
-			 * the page.
-			 *
-			 * The record number recorded during the split is the
-			 * first key on the split page, that is, one larger than
-			 * the last key on this page, we have to decrement it.
-			 */
-			if ((n = page->
-			    modify->mod_col_split_recno) == WT_RECNO_OOB)
-				break;
-			WT_ASSERT(session, n >= src_recno);
-			n -= 1;
-
-			upd = NULL;
-		} else {
-			WT_ERR(__rec_txn_read(
-			    session, r, ins, NULL, NULL, NULL, &upd));
-			n = WT_INSERT_RECNO(ins);
-		}
-		while (src_recno <= n) {
-			deleted = false;
-			update_no_copy = true;
-
-			/*
-			 * The application may have inserted records which left
-			 * gaps in the name space, and these gaps can be huge.
-			 * If we're in a set of deleted records, skip the boring
-			 * part.
-			 */
-			if (src_recno < n) {
-				deleted = true;
-				if (last_deleted) {
-					/*
-					 * The record adjustment is decremented
-					 * by one so we can naturally fall into
-					 * the RLE accounting below, where we
-					 * increment rle by one, then continue
-					 * in the outer loop, where we increment
-					 * src_recno by one.
-					 */
-					skip = (n - src_recno) - 1;
-					rle += skip;
-					src_recno += skip;
-				}
-			} else if (upd == NULL)
-				deleted = true;
-			else
-				switch (upd->type) {
-				case WT_UPDATE_MODIFY:
-					/*
-					 * Impossible slot, there's no backing
-					 * on-page item.
-					 */
-					cbt->slot = UINT32_MAX;
-					WT_ERR(__wt_value_return_upd(
-					    session, cbt, upd,
-					    F_ISSET(r, WT_REC_VISIBLE_ALL)));
-					data = cbt->iface.value.data;
-					size = (uint32_t)cbt->iface.value.size;
-					update_no_copy = false;
-					break;
-				case WT_UPDATE_STANDARD:
-					data = upd->data;
-					size = upd->size;
-					break;
-				case WT_UPDATE_TOMBSTONE:
-					deleted = true;
-					break;
-				WT_ILLEGAL_VALUE_ERR(session, upd->type);
-				}
-
-			/*
-			 * Handle RLE accounting and comparisons -- see comment
-			 * above, this code fragment does the same thing.
-			 */
-			if (rle != 0) {
-				if ((deleted && last_deleted) ||
-				    (!last_deleted && !deleted &&
-				    last->size == size &&
-				    memcmp(last->data, data, size) == 0)) {
-					++rle;
-					goto next;
-				}
-				WT_ERR(__rec_col_var_helper(session, r,
-				    salvage, last, last_deleted, 0, rle));
-			}
-
-			/*
-			 * Swap the current/last state. We can't simply assign
-			 * the data values into the last buffer because they may
-			 * be a temporary copy built from a chain of modified
-			 * updates and creating the next record will overwrite
-			 * that memory. Check, we'd like to avoid the copy. If
-			 * data was taken from an update structure, we can just
-			 * use the pointers, they're not moving.
-			 */
-			if (!deleted) {
-				if (update_no_copy) {
-					last->data = data;
-					last->size = size;
-				} else
-					WT_ERR(__wt_buf_set(
-					    session, last, data, size));
-			}
-
-			/* Ready for the next loop, reset the RLE counter. */
-			last_deleted = deleted;
-			rle = 1;
-
-			/*
-			 * Move to the next record. It's not a simple increment
-			 * because if it's the maximum record, incrementing it
-			 * wraps to 0 and this turns into an infinite loop.
-			 */
-next:			if (src_recno == UINT64_MAX)
-				break;
-			++src_recno;
-		}
-
-		/*
-		 * Execute this loop once without an insert item to catch any
-		 * missing records due to a split, then quit.
-		 */
-		if (ins == NULL)
-			break;
-	}
-
-	/* If we were tracking a record, write it. */
-	if (rle != 0)
-		WT_ERR(__rec_col_var_helper(
-		    session, r, salvage, last, last_deleted, 0, rle));
-
-	/* Write the remnant page. */
-	ret = __rec_split_finish(session, r);
-
-err:	__wt_scr_free(session, &orig);
-	return (ret);
-}
-
-/*
- * __rec_row_int --
- *	Reconcile a row-store internal page.
- */
-static int
-__rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
-{
-	WT_ADDR *addr;
-	WT_BTREE *btree;
-	WT_CELL *cell;
-	WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
-	WT_CHILD_STATE state;
-	WT_DECL_RET;
-	WT_IKEY *ikey;
-	WT_KV *key, *val;
-	WT_PAGE *child;
-	WT_REF *ref;
-	size_t size;
-	u_int vtype;
-	bool hazard, key_onpage_ovfl, ovfl_key;
-	const void *p;
-
-	btree = S2BT(session);
-	child = NULL;
-	hazard = false;
-
-	key = &r->k;
-	kpack = &_kpack;
-	WT_CLEAR(*kpack);	/* -Wuninitialized */
-	val = &r->v;
-	vpack = &_vpack;
-	WT_CLEAR(*vpack);	/* -Wuninitialized */
-
-	ikey = NULL;		/* -Wuninitialized */
-	cell = NULL;
-	key_onpage_ovfl = false;
-
-	WT_RET(__rec_split_init(
-	    session, r, page, 0, btree->maxintlpage_precomp));
-
-	/*
-	 * Ideally, we'd never store the 0th key on row-store internal pages
-	 * because it's never used during tree search and there's no reason
-	 * to waste the space.  The problem is how we do splits: when we split,
-	 * we've potentially picked out several "split points" in the buffer
-	 * which is overflowing the maximum page size, and when the overflow
-	 * happens, we go back and physically split the buffer, at those split
-	 * points, into new pages.  It would be both difficult and expensive
-	 * to re-process the 0th key at each split point to be an empty key,
-	 * so we don't do that.  However, we are reconciling an internal page
-	 * for whatever reason, and the 0th key is known to be useless.  We
-	 * truncate the key to a single byte, instead of removing it entirely,
-	 * it simplifies various things in other parts of the code (we don't
-	 * have to special case transforming the page from its disk image to
-	 * its in-memory version, for example).
-	 */
-	r->cell_zero = true;
-
-	/* For each entry in the in-memory page... */
-	WT_INTL_FOREACH_BEGIN(session, page, ref) {
-		/*
-		 * There are different paths if the key is an overflow item vs.
-		 * a straight-forward on-page value. If an overflow item, we
-		 * would have instantiated it, and we can use that fact to set
-		 * things up.
-		 *
-		 * Note the cell reference and unpacked key cell are available
-		 * only in the case of an instantiated, off-page key, we don't
-		 * bother setting them if that's not possible.
-		 */
-		if (F_ISSET_ATOMIC(page, WT_PAGE_OVERFLOW_KEYS)) {
-			cell = NULL;
-			key_onpage_ovfl = false;
-			ikey = __wt_ref_key_instantiated(ref);
-			if (ikey != NULL && ikey->cell_offset != 0) {
-				cell =
-				    WT_PAGE_REF_OFFSET(page, ikey->cell_offset);
-				__wt_cell_unpack(cell, kpack);
-				key_onpage_ovfl = kpack->ovfl &&
-				    kpack->raw != WT_CELL_KEY_OVFL_RM;
-			}
-		}
-
-		WT_ERR(__rec_child_modify(session, r, ref, &hazard, &state));
-		addr = ref->addr;
-		child = ref->page;
-
-		switch (state) {
-		case WT_CHILD_IGNORE:
-			/*
-			 * Ignored child.
-			 *
-			 * Overflow keys referencing pages we're not writing are
-			 * no longer useful, schedule them for discard.  Don't
-			 * worry about instantiation, internal page keys are
-			 * always instantiated.  Don't worry about reuse,
-			 * reusing this key in this reconciliation is unlikely.
-			 */
-			if (key_onpage_ovfl)
-				WT_ERR(__wt_ovfl_discard_add(
-				    session, page, kpack->cell));
-			WT_CHILD_RELEASE_ERR(session, hazard, ref);
-			continue;
-
-		case WT_CHILD_MODIFIED:
-			/*
-			 * Modified child.  Empty pages are merged into the
-			 * parent and discarded.
-			 */
-			switch (child->modify->rec_result) {
-			case WT_PM_REC_EMPTY:
-				/*
-				 * Overflow keys referencing empty pages are no
-				 * longer useful, schedule them for discard.
-				 * Don't worry about instantiation, internal
-				 * page keys are always instantiated.  Don't
-				 * worry about reuse, reusing this key in this
-				 * reconciliation is unlikely.
-				 */
-				if (key_onpage_ovfl)
-					WT_ERR(__wt_ovfl_discard_add(
-					    session, page, kpack->cell));
-				WT_CHILD_RELEASE_ERR(session, hazard, ref);
-				continue;
-			case WT_PM_REC_MULTIBLOCK:
-				/*
-				 * Overflow keys referencing split pages are no
-				 * longer useful (the split page's key is the
-				 * interesting key); schedule them for discard.
-				 * Don't worry about instantiation, internal
-				 * page keys are always instantiated.  Don't
-				 * worry about reuse, reusing this key in this
-				 * reconciliation is unlikely.
-				 */
-				if (key_onpage_ovfl)
-					WT_ERR(__wt_ovfl_discard_add(
-					    session, page, kpack->cell));
-
-				WT_ERR(__rec_row_merge(session, r, child));
-				WT_CHILD_RELEASE_ERR(session, hazard, ref);
-				continue;
-			case WT_PM_REC_REPLACE:
-				/*
-				 * If the page is replaced, the page's modify
-				 * structure has the page's address.
-				 */
-				addr = &child->modify->mod_replace;
-				break;
-			WT_ILLEGAL_VALUE_ERR(
-			    session, child->modify->rec_result);
-			}
-			break;
-		case WT_CHILD_ORIGINAL:
-			/* Original child. */
-			break;
-		case WT_CHILD_PROXY:
-			/* Deleted child where we write a proxy cell. */
-			break;
-		}
-
-		/*
-		 * Build the value cell, the child page's address.  Addr points
-		 * to an on-page cell or an off-page WT_ADDR structure. There's
-		 * a special cell type in the case of page deletion requiring
-		 * a proxy cell, otherwise use the information from the addr or
-		 * original cell.
-		 */
-		if (__wt_off_page(page, addr)) {
-			p = addr->addr;
-			size = addr->size;
-			vtype = state == WT_CHILD_PROXY ?
-			    WT_CELL_ADDR_DEL : __rec_vtype(addr);
-		} else {
-			__wt_cell_unpack(ref->addr, vpack);
-			p = vpack->data;
-			size = vpack->size;
-			vtype = state == WT_CHILD_PROXY ?
-			    WT_CELL_ADDR_DEL : (u_int)vpack->raw;
-		}
-		__rec_cell_build_addr(session, r, p, size, vtype, WT_RECNO_OOB);
-		WT_CHILD_RELEASE_ERR(session, hazard, ref);
-
-		/*
-		 * Build key cell.
-		 * Truncate any 0th key, internal pages don't need 0th keys.
-		 */
-		if (key_onpage_ovfl) {
-			key->buf.data = cell;
-			key->buf.size = __wt_cell_total_len(kpack);
-			key->cell_len = 0;
-			key->len = key->buf.size;
-			ovfl_key = true;
-		} else {
-			__wt_ref_key(page, ref, &p, &size);
-			WT_ERR(__rec_cell_build_int_key(
-			    session, r, p, r->cell_zero ? 1 : size, &ovfl_key));
-		}
-		r->cell_zero = false;
-
-		/* Boundary: split or write the page. */
-		if (__rec_need_split(r, key->len + val->len)) {
-			/*
-			 * In one path above, we copied address blocks from the
-			 * page rather than building the actual key. In that
-			 * case, we have to build the key now because we are
-			 * about to promote it.
-			 */
-			if (key_onpage_ovfl) {
-				WT_ERR(__wt_buf_set(session, r->cur,
-				    WT_IKEY_DATA(ikey), ikey->size));
-				key_onpage_ovfl = false;
-			}
-
-			WT_ERR(__rec_split_crossing_bnd(
-			    session, r, key->len + val->len));
-		}
-
-		/* Copy the key and value onto the page. */
-		__rec_copy_incr(session, r, key);
-		__rec_copy_incr(session, r, val);
-
-		/* Update compression state. */
-		__rec_key_state_update(r, ovfl_key);
-	} WT_INTL_FOREACH_END;
-
-	/* Write the remnant page. */
-	return (__rec_split_finish(session, r));
-
-err:	WT_CHILD_RELEASE(session, hazard, ref);
-	return (ret);
-}
-
-/*
- * __rec_row_merge --
- *	Merge in a split page.
- */
-static int
-__rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
-{
-	WT_ADDR *addr;
-	WT_KV *key, *val;
-	WT_MULTI *multi;
-	WT_PAGE_MODIFY *mod;
-	uint32_t i;
-	bool ovfl_key;
-
-	mod = page->modify;
-
-	key = &r->k;
-	val = &r->v;
-
-	/* For each entry in the split array... */
-	for (multi = mod->mod_multi,
-	    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
-		/* Build the key and value cells. */
-		WT_RET(__rec_cell_build_int_key(session, r,
-		    WT_IKEY_DATA(multi->key.ikey),
-		    r->cell_zero ? 1 : multi->key.ikey->size, &ovfl_key));
-		r->cell_zero = false;
-
-		addr = &multi->addr;
-		__rec_cell_build_addr(session, r,
-		    addr->addr, addr->size, __rec_vtype(addr), WT_RECNO_OOB);
-
-		/* Boundary: split or write the page. */
-		if (__rec_need_split(r, key->len + val->len))
-			WT_RET(__rec_split_crossing_bnd(
-			    session, r, key->len + val->len));
-
-		/* Copy the key and value onto the page. */
-		__rec_copy_incr(session, r, key);
-		__rec_copy_incr(session, r, val);
-
-		/* Update compression state. */
-		__rec_key_state_update(r, ovfl_key);
-	}
-	return (0);
-}
-
-/*
- * __rec_row_leaf --
- *	Reconcile a row-store leaf page.
- */
-static int
-__rec_row_leaf(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_PAGE *page, WT_SALVAGE_COOKIE *salvage)
-{
-	WT_BTREE *btree;
-	WT_CELL *cell;
-	WT_CELL_UNPACK *kpack, _kpack, *vpack, _vpack;
-	WT_CURSOR_BTREE *cbt;
-	WT_DECL_ITEM(tmpkey);
-	WT_DECL_ITEM(tmpval);
-	WT_DECL_RET;
-	WT_IKEY *ikey;
-	WT_INSERT *ins;
-	WT_KV *key, *val;
-	WT_ROW *rip;
-	WT_UPDATE *upd;
-	size_t size;
-	uint64_t slvg_skip;
-	uint32_t i;
-	bool dictionary, key_onpage_ovfl, ovfl_key;
-	void *copy;
-	const void *p;
-
-	btree = S2BT(session);
-	cbt = &r->update_modify_cbt;
-	slvg_skip = salvage == NULL ? 0 : salvage->skip;
-
-	key = &r->k;
-	val = &r->v;
-	vpack = &_vpack;
-
-	WT_RET(__rec_split_init(
-	    session, r, page, 0, btree->maxleafpage_precomp));
-
-	/*
-	 * Write any K/V pairs inserted into the page before the first from-disk
-	 * key on the page.
-	 */
-	if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT_SMALLEST(page))) != NULL)
-		WT_RET(__rec_row_leaf_insert(session, r, ins));
-
-	/*
-	 * Temporary buffers in which to instantiate any uninstantiated keys
-	 * or value items we need.
-	 */
-	WT_ERR(__wt_scr_alloc(session, 0, &tmpkey));
-	WT_ERR(__wt_scr_alloc(session, 0, &tmpval));
-
-	/* For each entry in the page... */
-	WT_ROW_FOREACH(page, rip, i) {
-		/*
-		 * The salvage code, on some rare occasions, wants to reconcile
-		 * a page but skip some leading records on the page.  Because
-		 * the row-store leaf reconciliation function copies keys from
-		 * the original disk page, this is non-trivial -- just changing
-		 * the in-memory pointers isn't sufficient, we have to change
-		 * the WT_CELL structures on the disk page, too.  It's ugly, but
-		 * we pass in a value that tells us how many records to skip in
-		 * this case.
-		 */
-		if (slvg_skip != 0) {
-			--slvg_skip;
-			continue;
-		}
-
-		/*
-		 * Figure out the key: set any cell reference (and unpack it),
-		 * set any instantiated key reference.
-		 */
-		copy = WT_ROW_KEY_COPY(rip);
-		(void)__wt_row_leaf_key_info(
-		    page, copy, &ikey, &cell, NULL, NULL);
-		if (cell == NULL)
-			kpack = NULL;
-		else {
-			kpack = &_kpack;
-			__wt_cell_unpack(cell, kpack);
-		}
-
-		/* Unpack the on-page value cell, and look for an update. */
-		__wt_row_leaf_value_cell(page, rip, NULL, vpack);
-		WT_ERR(__rec_txn_read(
-		    session, r, NULL, rip, vpack, NULL, &upd));
-
-		/* Build value cell. */
-		dictionary = false;
-		if (upd == NULL) {
-			/*
-			 * When the page was read into memory, there may not
-			 * have been a value item.
-			 *
-			 * If there was a value item, check if it's a dictionary
-			 * cell (a copy of another item on the page).  If it's a
-			 * copy, we have to create a new value item as the old
-			 * item might have been discarded from the page.
-			 */
-			if (vpack->raw == WT_CELL_VALUE_COPY) {
-				/* If the item is Huffman encoded, decode it. */
-				if (btree->huffman_value == NULL) {
-					p = vpack->data;
-					size = vpack->size;
-				} else {
-					WT_ERR(__wt_huffman_decode(session,
-					    btree->huffman_value,
-					    vpack->data, vpack->size,
-					    tmpval));
-					p = tmpval->data;
-					size = tmpval->size;
-				}
-				WT_ERR(__rec_cell_build_val(
-				    session, r, p, size, (uint64_t)0));
-				dictionary = true;
-			} else if (vpack->raw == WT_CELL_VALUE_OVFL_RM) {
-				/*
-				 * If doing an update save and restore, and the
-				 * underlying value is a removed overflow value,
-				 * we end up here.
-				 *
-				 * If necessary, when the overflow value was
-				 * originally removed, reconciliation appended
-				 * a globally visible copy of the value to the
-				 * key's update list, meaning the on-page item
-				 * isn't accessed after page re-instantiation.
-				 *
-				 * Assert the case.
-				 */
-				WT_ASSERT(session,
-				    F_ISSET(r, WT_REC_UPDATE_RESTORE));
-
-				/*
-				 * If the key is also a removed overflow item,
-				 * don't write anything at all.
-				 *
-				 * We don't have to write anything because the
-				 * code re-instantiating the page gets the key
-				 * to match the saved list of updates from the
-				 * original page.  By not putting the key on
-				 * the page, we'll move the key/value set from
-				 * a row-store leaf page slot to an insert list,
-				 * but that shouldn't matter.
-				 *
-				 * The reason we bother with the test is because
-				 * overflows are expensive to write.  It's hard
-				 * to imagine a real workload where this test is
-				 * worth the effort, but it's a simple test.
-				 */
-				if (kpack != NULL &&
-				    kpack->raw == WT_CELL_KEY_OVFL_RM)
-					goto leaf_insert;
-
-				/*
-				 * The on-page value will never be accessed,
-				 * write a placeholder record.
-				 */
-				WT_ERR(__rec_cell_build_val(session, r,
-				    "ovfl-unused", strlen("ovfl-unused"),
-				    (uint64_t)0));
-			} else {
-				val->buf.data = vpack->cell;
-				val->buf.size = __wt_cell_total_len(vpack);
-				val->cell_len = 0;
-				val->len = val->buf.size;
-
-				/* Track if page has overflow items. */
-				if (vpack->ovfl)
-					r->ovfl_items = true;
-			}
-		} else {
-			/*
-			 * The first time we find an overflow record we're not
-			 * going to use, discard the underlying blocks.
-			 */
-			if (vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
-				WT_ERR(__wt_ovfl_remove(session,
-				    page, vpack, F_ISSET(r, WT_REC_EVICT)));
-
-			switch (upd->type) {
-			case WT_UPDATE_MODIFY:
-				cbt->slot = WT_ROW_SLOT(page, rip);
-				WT_ERR(__wt_value_return_upd(session, cbt, upd,
-				    F_ISSET(r, WT_REC_VISIBLE_ALL)));
-				WT_ERR(__rec_cell_build_val(session, r,
-				    cbt->iface.value.data,
-				    cbt->iface.value.size, (uint64_t)0));
-				dictionary = true;
-				break;
-			case WT_UPDATE_STANDARD:
-				/*
-				 * If no value, nothing needs to be copied.
-				 * Otherwise, build the value's chunk from the
-				 * update value.
-				 */
-				if (upd->size == 0) {
-					val->buf.data = NULL;
-					val->cell_len =
-					    val->len = val->buf.size = 0;
-				} else {
-					WT_ERR(__rec_cell_build_val(session, r,
-					    upd->data, upd->size,
-					    (uint64_t)0));
-					dictionary = true;
-				}
-				break;
-			case WT_UPDATE_TOMBSTONE:
-				/*
-				 * If this key/value pair was deleted, we're
-				 * done.
-				 *
-				 * Overflow keys referencing discarded values
-				 * are no longer useful, discard the backing
-				 * blocks.  Don't worry about reuse, reusing
-				 * keys from a row-store page reconciliation
-				 * seems unlikely enough to ignore.
-				 */
-				if (kpack != NULL && kpack->ovfl &&
-				    kpack->raw != WT_CELL_KEY_OVFL_RM) {
-					/*
-					 * Keys are part of the name-space, we
-					 * can't remove them from the in-memory
-					 * tree; if an overflow key was deleted
-					 * without being instantiated (for
-					 * example, cursor-based truncation), do
-					 * it now.
-					 */
-					if (ikey == NULL)
-						WT_ERR(__wt_row_leaf_key(
-						    session,
-						    page, rip, tmpkey, true));
-
-					WT_ERR(__wt_ovfl_discard_add(
-					    session, page, kpack->cell));
-				}
-
-				/*
-				 * We aren't actually creating the key so we
-				 * can't use bytes from this key to provide
-				 * prefix information for a subsequent key.
-				 */
-				tmpkey->size = 0;
-
-				/* Proceed with appended key/value pairs. */
-				goto leaf_insert;
-			WT_ILLEGAL_VALUE_ERR(session, upd->type);
-			}
-		}
-
-		/*
-		 * Build key cell.
-		 *
-		 * If the key is an overflow key that hasn't been removed, use
-		 * the original backing blocks.
-		 */
-		key_onpage_ovfl = kpack != NULL &&
-		    kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
-		if (key_onpage_ovfl) {
-			key->buf.data = cell;
-			key->buf.size = __wt_cell_total_len(kpack);
-			key->cell_len = 0;
-			key->len = key->buf.size;
-			ovfl_key = true;
-
-			/*
-			 * We aren't creating a key so we can't use this key as
-			 * a prefix for a subsequent key.
-			 */
-			tmpkey->size = 0;
-
-			/* Track if page has overflow items. */
-			r->ovfl_items = true;
-		} else {
-			/*
-			 * Get the key from the page or an instantiated key, or
-			 * inline building the key from a previous key (it's a
-			 * fast path for simple, prefix-compressed keys), or by
-			 * by building the key from scratch.
-			 */
-			if (__wt_row_leaf_key_info(page, copy,
-			    NULL, &cell, &tmpkey->data, &tmpkey->size))
-				goto build;
-
-			kpack = &_kpack;
-			__wt_cell_unpack(cell, kpack);
-			if (btree->huffman_key == NULL &&
-			    kpack->type == WT_CELL_KEY &&
-			    tmpkey->size >= kpack->prefix) {
-				/*
-				 * The previous clause checked for a prefix of
-				 * zero, which means the temporary buffer must
-				 * have a non-zero size, and it references a
-				 * valid key.
-				 */
-				WT_ASSERT(session, tmpkey->size != 0);
-
-				/*
-				 * Grow the buffer as necessary, ensuring data
-				 * data has been copied into local buffer space,
-				 * then append the suffix to the prefix already
-				 * in the buffer.
-				 *
-				 * Don't grow the buffer unnecessarily or copy
-				 * data we don't need, truncate the item's data
-				 * length to the prefix bytes.
-				 */
-				tmpkey->size = kpack->prefix;
-				WT_ERR(__wt_buf_grow(session,
-				    tmpkey, tmpkey->size + kpack->size));
-				memcpy((uint8_t *)tmpkey->mem + tmpkey->size,
-				    kpack->data, kpack->size);
-				tmpkey->size += kpack->size;
-			} else
-				WT_ERR(__wt_row_leaf_key_copy(
-				    session, page, rip, tmpkey));
-build:
-			WT_ERR(__rec_cell_build_leaf_key(session, r,
-			    tmpkey->data, tmpkey->size, &ovfl_key));
-		}
-
-		/* Boundary: split or write the page. */
-		if (__rec_need_split(r, key->len + val->len)) {
-			/*
-			 * If we copied address blocks from the page rather than
-			 * building the actual key, we have to build the key now
-			 * because we are about to promote it.
-			 */
-			if (key_onpage_ovfl) {
-				WT_ERR(__wt_dsk_cell_data_ref(session,
-				    WT_PAGE_ROW_LEAF, kpack, r->cur));
-				WT_NOT_READ(key_onpage_ovfl, false);
-			}
-
-			/*
-			 * Turn off prefix compression until a full key written
-			 * to the new page, and (unless already working with an
-			 * overflow key), rebuild the key without compression.
-			 */
-			if (r->key_pfx_compress_conf) {
-				r->key_pfx_compress = false;
-				if (!ovfl_key)
-					WT_ERR(__rec_cell_build_leaf_key(
-					    session, r, NULL, 0, &ovfl_key));
-			}
-
-			WT_ERR(__rec_split_crossing_bnd(
-			    session, r, key->len + val->len));
-		}
-
-		/* Copy the key/value pair onto the page. */
-		__rec_copy_incr(session, r, key);
-		if (val->len == 0)
-			r->any_empty_value = true;
-		else {
-			r->all_empty_value = false;
-			if (dictionary && btree->dictionary)
-				WT_ERR(__rec_dict_replace(session, r, 0, val));
-			__rec_copy_incr(session, r, val);
-		}
-
-		/* Update compression state. */
-		__rec_key_state_update(r, ovfl_key);
-
-leaf_insert:	/* Write any K/V pairs inserted into the page after this key. */
-		if ((ins = WT_SKIP_FIRST(WT_ROW_INSERT(page, rip))) != NULL)
-			WT_ERR(__rec_row_leaf_insert(session, r, ins));
-	}
-
-	/* Write the remnant page. */
-	ret = __rec_split_finish(session, r);
-
-err:	__wt_scr_free(session, &tmpkey);
-	__wt_scr_free(session, &tmpval);
-	return (ret);
-}
-
-/*
- * __rec_row_leaf_insert --
- *	Walk an insert chain, writing K/V pairs.
- */
-static int
-__rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
-{
-	WT_BTREE *btree;
-	WT_CURSOR_BTREE *cbt;
-	WT_KV *key, *val;
-	WT_UPDATE *upd;
-	bool ovfl_key, upd_saved;
-
-	btree = S2BT(session);
-	cbt = &r->update_modify_cbt;
-
-	key = &r->k;
-	val = &r->v;
-
-	for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
-		WT_RET(__rec_txn_read(
-		    session, r, ins, NULL, NULL, &upd_saved, &upd));
-
-		if (upd == NULL) {
-			/*
-			 * If no update is visible but some were saved, check
-			 * for splits.
-			 */
-			if (!upd_saved)
-				continue;
-			if (!__rec_need_split(r, WT_INSERT_KEY_SIZE(ins)))
-				continue;
-
-			/* Copy the current key into place and then split. */
-			WT_RET(__wt_buf_set(session, r->cur,
-			    WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins)));
-			WT_RET(__rec_split_crossing_bnd(
-			    session, r, WT_INSERT_KEY_SIZE(ins)));
-
-			/*
-			 * Turn off prefix and suffix compression until a full
-			 * key is written into the new page.
-			 */
-			r->key_pfx_compress = r->key_sfx_compress = false;
-			continue;
-		}
-
-		switch (upd->type) {
-		case WT_UPDATE_MODIFY:
-			/*
-			 * Impossible slot, there's no backing on-page
-			 * item.
-			 */
-			cbt->slot = UINT32_MAX;
-			WT_RET(__wt_value_return_upd(
-			    session, cbt, upd, F_ISSET(r, WT_REC_VISIBLE_ALL)));
-			WT_RET(__rec_cell_build_val(session, r,
-			    cbt->iface.value.data,
-			    cbt->iface.value.size, (uint64_t)0));
-			break;
-		case WT_UPDATE_STANDARD:
-			if (upd->size == 0)
-				val->len = 0;
-			else
-				WT_RET(__rec_cell_build_val(session,
-				    r, upd->data, upd->size,
-				    (uint64_t)0));
-			break;
-		case WT_UPDATE_TOMBSTONE:
-			continue;
-		WT_ILLEGAL_VALUE(session, upd->type);
-		}
-
-		/* Build key cell. */
-		WT_RET(__rec_cell_build_leaf_key(session, r,
-		    WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
-
-		/* Boundary: split or write the page. */
-		if (__rec_need_split(r, key->len + val->len)) {
-			/*
-			 * Turn off prefix compression until a full key written
-			 * to the new page, and (unless already working with an
-			 * overflow key), rebuild the key without compression.
-			 */
-			if (r->key_pfx_compress_conf) {
-				r->key_pfx_compress = false;
-				if (!ovfl_key)
-					WT_RET(__rec_cell_build_leaf_key(
-					    session, r, NULL, 0, &ovfl_key));
-			}
-
-			WT_RET(__rec_split_crossing_bnd(
-			    session, r, key->len + val->len));
-		}
-
-		/* Copy the key/value pair onto the page. */
-		__rec_copy_incr(session, r, key);
-		if (val->len == 0)
-			r->any_empty_value = true;
-		else {
-			r->all_empty_value = false;
-			if (btree->dictionary)
-				WT_RET(__rec_dict_replace(session, r, 0, val));
-			__rec_copy_incr(session, r, val);
-		}
-
-		/* Update compression state. */
-		__rec_key_state_update(r, ovfl_key);
-	}
-
-	return (0);
-}
-
-/*
  * __rec_split_discard --
  *	Discard the pages resulting from a previous split.
  */
@@ -5649,232 +2517,12 @@ __rec_las_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 }
 
 /*
- * __rec_cell_build_int_key --
- *	Process a key and return a WT_CELL structure and byte string to be
- *	stored on a row-store internal page.
- */
-static int
-__rec_cell_build_int_key(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
-{
-	WT_BTREE *btree;
-	WT_KV *key;
-
-	*is_ovflp = false;
-
-	btree = S2BT(session);
-
-	key = &r->k;
-
-	/* Copy the bytes into the "current" and key buffers. */
-	WT_RET(__wt_buf_set(session, r->cur, data, size));
-	WT_RET(__wt_buf_set(session, &key->buf, data, size));
-
-	/* Create an overflow object if the data won't fit. */
-	if (size > btree->maxintlkey) {
-		WT_STAT_DATA_INCR(session, rec_overflow_key_internal);
-
-		*is_ovflp = true;
-		return (__rec_cell_build_ovfl(
-		    session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0));
-	}
-
-	key->cell_len = __wt_cell_pack_int_key(&key->cell, key->buf.size);
-	key->len = key->cell_len + key->buf.size;
-
-	return (0);
-}
-
-/*
- * __rec_cell_build_leaf_key --
- *	Process a key and return a WT_CELL structure and byte string to be
- *	stored on a row-store leaf page.
- */
-static int
-__rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, const void *data, size_t size, bool *is_ovflp)
-{
-	WT_BTREE *btree;
-	WT_KV *key;
-	size_t pfx_max;
-	const uint8_t *a, *b;
-	uint8_t pfx;
-
-	*is_ovflp = false;
-
-	btree = S2BT(session);
-
-	key = &r->k;
-
-	pfx = 0;
-	if (data == NULL)
-		/*
-		 * When data is NULL, our caller has a prefix compressed key
-		 * they can't use (probably because they just crossed a split
-		 * point).  Use the full key saved when last called, instead.
-		 */
-		WT_RET(__wt_buf_set(
-		    session, &key->buf, r->cur->data, r->cur->size));
-	else {
-		/*
-		 * Save a copy of the key for later reference: we use the full
-		 * key for prefix-compression comparisons, and if we are, for
-		 * any reason, unable to use the compressed key we generate.
-		 */
-		WT_RET(__wt_buf_set(session, r->cur, data, size));
-
-		/*
-		 * Do prefix compression on the key.  We know by definition the
-		 * previous key sorts before the current key, which means the
-		 * keys must differ and we just need to compare up to the
-		 * shorter of the two keys.
-		 */
-		if (r->key_pfx_compress) {
-			/*
-			 * We can't compress out more than 256 bytes, limit the
-			 * comparison to that.
-			 */
-			pfx_max = UINT8_MAX;
-			if (size < pfx_max)
-				pfx_max = size;
-			if (r->last->size < pfx_max)
-				pfx_max = r->last->size;
-			for (a = data, b = r->last->data; pfx < pfx_max; ++pfx)
-				if (*a++ != *b++)
-					break;
-
-			/*
-			 * Prefix compression may cost us CPU and memory when
-			 * the page is re-loaded, don't do it unless there's
-			 * reasonable gain.
-			 */
-			if (pfx < btree->prefix_compression_min)
-				pfx = 0;
-			else
-				WT_STAT_DATA_INCRV(
-				    session, rec_prefix_compression, pfx);
-		}
-
-		/* Copy the non-prefix bytes into the key buffer. */
-		WT_RET(__wt_buf_set(
-		    session, &key->buf, (uint8_t *)data + pfx, size - pfx));
-	}
-
-	/* Optionally compress the key using the Huffman engine. */
-	if (btree->huffman_key != NULL)
-		WT_RET(__wt_huffman_encode(session, btree->huffman_key,
-		    key->buf.data, (uint32_t)key->buf.size, &key->buf));
-
-	/* Create an overflow object if the data won't fit. */
-	if (key->buf.size > btree->maxleafkey) {
-		/*
-		 * Overflow objects aren't prefix compressed -- rebuild any
-		 * object that was prefix compressed.
-		 */
-		if (pfx == 0) {
-			WT_STAT_DATA_INCR(session, rec_overflow_key_leaf);
-
-			*is_ovflp = true;
-			return (__rec_cell_build_ovfl(
-			    session, r, key, WT_CELL_KEY_OVFL, (uint64_t)0));
-		}
-		return (
-		    __rec_cell_build_leaf_key(session, r, NULL, 0, is_ovflp));
-	}
-
-	key->cell_len = __wt_cell_pack_leaf_key(&key->cell, pfx, key->buf.size);
-	key->len = key->cell_len + key->buf.size;
-
-	return (0);
-}
-
-/*
- * __rec_cell_build_addr --
- *	Process an address reference and return a cell structure to be stored
- *	on the page.
- */
-static void
-__rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
-    const void *addr, size_t size, u_int cell_type, uint64_t recno)
-{
-	WT_KV *val;
-
-	val = &r->v;
-
-	WT_ASSERT(session, size != 0 || cell_type == WT_CELL_ADDR_DEL);
-
-	/*
-	 * We don't check the address size because we can't store an address on
-	 * an overflow page: if the address won't fit, the overflow page's
-	 * address won't fit either.  This possibility must be handled by Btree
-	 * configuration, we have to disallow internal page sizes that are too
-	 * small with respect to the largest address cookie the underlying block
-	 * manager might return.
-	 */
-
-	/*
-	 * We don't copy the data into the buffer, it's not necessary; just
-	 * re-point the buffer's data/length fields.
-	 */
-	val->buf.data = addr;
-	val->buf.size = size;
-	val->cell_len =
-	    __wt_cell_pack_addr(&val->cell, cell_type, recno, val->buf.size);
-	val->len = val->cell_len + val->buf.size;
-}
-
-/*
- * __rec_cell_build_val --
- *	Process a data item and return a WT_CELL structure and byte string to
- *	be stored on the page.
- */
-static int
-__rec_cell_build_val(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, const void *data, size_t size, uint64_t rle)
-{
-	WT_BTREE *btree;
-	WT_KV *val;
-
-	btree = S2BT(session);
-
-	val = &r->v;
-
-	/*
-	 * We don't copy the data into the buffer, it's not necessary; just
-	 * re-point the buffer's data/length fields.
-	 */
-	val->buf.data = data;
-	val->buf.size = size;
-
-	/* Handle zero-length cells quickly. */
-	if (size != 0) {
-		/* Optionally compress the data using the Huffman engine. */
-		if (btree->huffman_value != NULL)
-			WT_RET(__wt_huffman_encode(
-			    session, btree->huffman_value,
-			    val->buf.data, (uint32_t)val->buf.size, &val->buf));
-
-		/* Create an overflow object if the data won't fit. */
-		if (val->buf.size > btree->maxleafvalue) {
-			WT_STAT_DATA_INCR(session, rec_overflow_value);
-
-			return (__rec_cell_build_ovfl(
-			    session, r, val, WT_CELL_VALUE_OVFL, rle));
-		}
-	}
-	val->cell_len = __wt_cell_pack_data(&val->cell, rle, val->buf.size);
-	val->len = val->cell_len + val->buf.size;
-
-	return (0);
-}
-
-/*
- * __rec_cell_build_ovfl --
+ * __wt_rec_cell_build_ovfl --
  *	Store overflow items in the file, returning the address cookie.
  */
-static int
-__rec_cell_build_ovfl(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_KV *kv, uint8_t type, uint64_t rle)
+int
+__wt_rec_cell_build_ovfl(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_REC_KV *kv, uint8_t type, uint64_t rle)
 {
 	WT_BM *bm;
 	WT_BTREE *btree;
@@ -5939,194 +2587,3 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
 err:	__wt_scr_free(session, &tmp);
 	return (ret);
 }
-
-/*
- * __rec_dictionary_skip_search --
- *	Search a dictionary skiplist.
- */
-static WT_DICTIONARY *
-__rec_dictionary_skip_search(WT_DICTIONARY **head, uint64_t hash)
-{
-	WT_DICTIONARY **e;
-	int i;
-
-	/*
-	 * Start at the highest skip level, then go as far as possible at each
-	 * level before stepping down to the next.
-	 */
-	for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;) {
-		if (*e == NULL) {		/* Empty levels */
-			--i;
-			--e;
-			continue;
-		}
-
-		/*
-		 * Return any exact matches: we don't care in what search level
-		 * we found a match.
-		 */
-		if ((*e)->hash == hash)		/* Exact match */
-			return (*e);
-		if ((*e)->hash > hash) {	/* Drop down a level */
-			--i;
-			--e;
-		} else				/* Keep going at this level */
-			e = &(*e)->next[i];
-	}
-	return (NULL);
-}
-
-/*
- * __rec_dictionary_skip_search_stack --
- *	Search a dictionary skiplist, returning an insert/remove stack.
- */
-static void
-__rec_dictionary_skip_search_stack(
-    WT_DICTIONARY **head, WT_DICTIONARY ***stack, uint64_t hash)
-{
-	WT_DICTIONARY **e;
-	int i;
-
-	/*
-	 * Start at the highest skip level, then go as far as possible at each
-	 * level before stepping down to the next.
-	 */
-	for (i = WT_SKIP_MAXDEPTH - 1, e = &head[i]; i >= 0;)
-		if (*e == NULL || (*e)->hash > hash)
-			stack[i--] = e--;	/* Drop down a level */
-		else
-			e = &(*e)->next[i];	/* Keep going at this level */
-}
-
-/*
- * __rec_dictionary_skip_insert --
- *	Insert an entry into the dictionary skip-list.
- */
-static void
-__rec_dictionary_skip_insert(
-    WT_DICTIONARY **head, WT_DICTIONARY *e, uint64_t hash)
-{
-	WT_DICTIONARY **stack[WT_SKIP_MAXDEPTH];
-	u_int i;
-
-	/* Insert the new entry into the skiplist. */
-	__rec_dictionary_skip_search_stack(head, stack, hash);
-	for (i = 0; i < e->depth; ++i) {
-		e->next[i] = *stack[i];
-		*stack[i] = e;
-	}
-}
-
-/*
- * __rec_dictionary_init --
- *	Allocate and initialize the dictionary.
- */
-static int
-__rec_dictionary_init(WT_SESSION_IMPL *session, WT_RECONCILE *r, u_int slots)
-{
-	u_int depth, i;
-
-	/* Free any previous dictionary. */
-	__rec_dictionary_free(session, r);
-
-	r->dictionary_slots = slots;
-	WT_RET(__wt_calloc(session,
-	    r->dictionary_slots, sizeof(WT_DICTIONARY *), &r->dictionary));
-	for (i = 0; i < r->dictionary_slots; ++i) {
-		depth = __wt_skip_choose_depth(session);
-		WT_RET(__wt_calloc(session, 1,
-		    sizeof(WT_DICTIONARY) + depth * sizeof(WT_DICTIONARY *),
-		    &r->dictionary[i]));
-		r->dictionary[i]->depth = depth;
-	}
-	return (0);
-}
-
-/*
- * __rec_dictionary_free --
- *	Free the dictionary.
- */
-static void
-__rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r)
-{
-	u_int i;
-
-	if (r->dictionary == NULL)
-		return;
-
-	/*
-	 * We don't correct dictionary_slots when we fail during allocation,
-	 * but that's OK, the value is either NULL or a memory reference to
-	 * be free'd.
-	 */
-	for (i = 0; i < r->dictionary_slots; ++i)
-		__wt_free(session, r->dictionary[i]);
-	__wt_free(session, r->dictionary);
-}
-
-/*
- * __rec_dictionary_reset --
- *	Reset the dictionary when reconciliation restarts and when crossing a
- *	page boundary (a potential split).
- */
-static void
-__rec_dictionary_reset(WT_RECONCILE *r)
-{
-	if (r->dictionary_slots) {
-		r->dictionary_next = 0;
-		memset(r->dictionary_head, 0, sizeof(r->dictionary_head));
-	}
-}
-
-/*
- * __rec_dictionary_lookup --
- *	Check the dictionary for a matching value on this page.
- */
-static int
-__rec_dictionary_lookup(
-    WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_KV *val, WT_DICTIONARY **dpp)
-{
-	WT_DICTIONARY *dp, *next;
-	uint64_t hash;
-	bool match;
-
-	*dpp = NULL;
-
-	/* Search the dictionary, and return any match we find. */
-	hash = __wt_hash_fnv64(val->buf.data, val->buf.size);
-	for (dp = __rec_dictionary_skip_search(r->dictionary_head, hash);
-	    dp != NULL && dp->hash == hash; dp = dp->next[0]) {
-		WT_RET(__wt_cell_pack_data_match(
-		    (WT_CELL *)((uint8_t *)r->cur_ptr->image.mem + dp->offset),
-		    &val->cell, val->buf.data, &match));
-		if (match) {
-			WT_STAT_DATA_INCR(session, rec_dictionary);
-			*dpp = dp;
-			return (0);
-		}
-	}
-
-	/*
-	 * We're not doing value replacement in the dictionary.  We stop adding
-	 * new entries if we run out of empty dictionary slots (but continue to
-	 * use the existing entries).  I can't think of any reason a leaf page
-	 * value is more likely to be seen because it was seen more recently
-	 * than some other value: if we find working sets where that's not the
-	 * case, it shouldn't be too difficult to maintain a pointer which is
-	 * the next dictionary slot to re-use.
-	 */
-	if (r->dictionary_next >= r->dictionary_slots)
-		return (0);
-
-	/*
-	 * Set the hash value, we'll add this entry into the dictionary when we
-	 * write it into the page's disk image buffer (because that's when we
-	 * know where on the page it will be written).
-	 */
-	next = r->dictionary[r->dictionary_next++];
-	next->offset = 0;		/* Not necessary, just cautious. */
-	next->hash = hash;
-	__rec_dictionary_skip_insert(r->dictionary_head, next, hash);
-	*dpp = next;
-	return (0);
-}
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 534d598b3f3..8160ef92bbd 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -781,6 +781,8 @@ static const char * const __stats_connection_desc[] = {
 	"cache: cache overflow score",
 	"cache: cache overflow table entries",
 	"cache: cache overflow table insert calls",
+	"cache: cache overflow table max on-disk size",
+	"cache: cache overflow table on-disk size",
 	"cache: cache overflow table remove calls",
 	"cache: checkpoint blocked page eviction",
 	"cache: eviction calls to get a page",
@@ -1204,6 +1206,8 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 		/* not clearing cache_lookaside_score */
 		/* not clearing cache_lookaside_entries */
 	stats->cache_lookaside_insert = 0;
+		/* not clearing cache_lookaside_ondisk_max */
+		/* not clearing cache_lookaside_ondisk */
 	stats->cache_lookaside_remove = 0;
 	stats->cache_eviction_checkpoint = 0;
 	stats->cache_eviction_get_ref = 0;
@@ -1616,6 +1620,10 @@ __wt_stat_connection_aggregate(
 	    WT_STAT_READ(from, cache_lookaside_entries);
 	to->cache_lookaside_insert +=
 	    WT_STAT_READ(from, cache_lookaside_insert);
+	to->cache_lookaside_ondisk_max +=
+	    WT_STAT_READ(from, cache_lookaside_ondisk_max);
+	to->cache_lookaside_ondisk +=
+	    WT_STAT_READ(from, cache_lookaside_ondisk);
 	to->cache_lookaside_remove +=
 	    WT_STAT_READ(from, cache_lookaside_remove);
 	to->cache_eviction_checkpoint +=
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index 81bf2bdea4f..b21ccd355ce 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -485,7 +485,7 @@ __wt_txn_config(WT_SESSION_IMPL *session, const char *cfg[])
 	if (cval.val)
 		F_SET(txn, WT_TXN_IGNORE_PREPARE);
 
-	WT_RET(__wt_txn_parse_read_timestamp(session, cfg));
+	WT_RET(__wt_txn_parse_read_timestamp(session, cfg, NULL));
 
 	return (0);
 }
@@ -933,8 +933,12 @@ __wt_txn_prepare(WT_SESSION_IMPL *session, const char *cfg[])
 
 	WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
 	WT_ASSERT(session, !F_ISSET(txn, WT_TXN_ERROR) || txn->mod_count == 0);
-	/* Transaction should not have updated any of the logged tables. */
-	WT_ASSERT(session, txn->logrec == NULL);
+	/*
+	 * A transaction should not have updated any of the logged tables,
+	 * if debug mode logging is not turned on.
+	 */
+	if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE))
+		WT_ASSERT(session, txn->logrec == NULL);
 
 	WT_RET(__wt_txn_context_check(session, true));
 
diff --git a/src/third_party/wiredtiger/src/txn/txn_log.c b/src/third_party/wiredtiger/src/txn/txn_log.c
index f55715eb91b..1b5beff581f 100644
--- a/src/third_party/wiredtiger/src/txn/txn_log.c
+++ b/src/third_party/wiredtiger/src/txn/txn_log.c
@@ -38,6 +38,7 @@ __txn_op_log_row_key_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 	if (cbt->ins == NULL) {
 		session = (WT_SESSION_IMPL *)cbt->iface.session;
 		page = cbt->ref->page;
+		WT_ASSERT(session, cbt->slot < page->entries);
 		rip = &page->pg_row[cbt->slot];
 		WT_ASSERT(session,
 		    __wt_row_leaf_key(session, page, rip, &key, false) == 0);
@@ -59,19 +60,15 @@ __txn_op_log_row_key_check(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
  *	Log an operation for the current transaction.
  */
 static int
-__txn_op_log(WT_SESSION_IMPL *session,
-    WT_ITEM *logrec, WT_TXN_OP *op, WT_CURSOR_BTREE *cbt)
+__txn_op_log(WT_SESSION_IMPL *session, WT_ITEM *logrec,
+    WT_TXN_OP *op, WT_CURSOR_BTREE *cbt, uint32_t fileid)
 {
 	WT_CURSOR *cursor;
 	WT_ITEM value;
 	WT_UPDATE *upd;
 	uint64_t recno;
-	uint32_t fileid;
 
 	cursor = &cbt->iface;
-
-	fileid = op->btree->id;
-
 	upd = op->u.op_upd;
 	value.data = upd->data;
 	value.size = upd->size;
@@ -210,7 +207,16 @@ __txn_logrec_init(WT_SESSION_IMPL *session)
 	if (txn->logrec != NULL)
 		return (0);
 
-	WT_ASSERT(session, txn->id != WT_TXN_NONE);
+	/*
+	 * The only way we should ever get in here without a txn id is if we
+	 * are recording diagnostic information. In that case, allocate an id.
+	 */
+	if (FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_DEBUG_MODE) &&
+	    txn->id == WT_TXN_NONE)
+		WT_RET(__wt_txn_id_check(session));
+	else
+		WT_ASSERT(session, txn->id != WT_TXN_NONE);
+
 	WT_RET(__wt_struct_size(session, &header_size, fmt, rectype, txn->id));
 	WT_RET(__wt_logrec_alloc(session, header_size, &logrec));
 
@@ -233,6 +239,7 @@ err:		__wt_logrec_free(session, &logrec);
 int
 __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 {
+	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_ITEM *logrec;
 	WT_TXN *txn;
@@ -240,11 +247,13 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 
 	uint32_t fileid;
 
+	conn = S2C(session);
 	txn = &session->txn;
 
-	if (!FLD_ISSET(S2C(session)->log_flags, WT_CONN_LOG_ENABLED) ||
+	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
 	    F_ISSET(session, WT_SESSION_NO_LOGGING) ||
-	    F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING))
+	    (F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING) &&
+	    !FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE)))
 		return (0);
 
 	/* We'd better have a transaction. */
@@ -255,6 +264,14 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 	op = txn->mod + txn->mod_count - 1;
 	fileid = op->btree->id;
 
+	/*
+	 * If this operation is diagnostic only, set the ignore bit on the
+	 * fileid so that recovery can skip it.
+	 */
+	if (F_ISSET(S2BT(session), WT_BTREE_NO_LOGGING) &&
+	    FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE))
+		FLD_SET(fileid, WT_LOGOP_IGNORE);
+
 	WT_RET(__txn_logrec_init(session));
 	logrec = txn->logrec;
 
@@ -267,7 +284,7 @@ __wt_txn_log_op(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 		break;
 	case WT_TXN_OP_BASIC_COL:
 	case WT_TXN_OP_BASIC_ROW:
-		ret = __txn_op_log(session, logrec, op, cbt);
+		ret = __txn_op_log(session, logrec, op, cbt, fileid);
 		break;
 	case WT_TXN_OP_TRUNCATE_COL:
 		ret = __wt_logop_col_truncate_pack(session, logrec, fileid,
@@ -366,6 +383,47 @@ __wt_txn_checkpoint_logread(WT_SESSION_IMPL *session,
 }
 
 /*
+ * __wt_txn_ts_log --
+ *	Write a log record recording timestamps in the transaction.
+ */
+int
+__wt_txn_ts_log(WT_SESSION_IMPL *session)
+{
+	struct timespec t;
+	WT_CONNECTION_IMPL *conn;
+	WT_ITEM *logrec;
+	WT_TXN *txn;
+	wt_timestamp_t commit, durable, first, prepare, read;
+
+	conn = S2C(session);
+	txn = &session->txn;
+
+	if (!FLD_ISSET(conn->log_flags, WT_CONN_LOG_ENABLED) ||
+	    F_ISSET(session, WT_SESSION_NO_LOGGING) ||
+	    !FLD_ISSET(conn->log_flags, WT_CONN_LOG_DEBUG_MODE))
+		return (0);
+
+	/* We'd better have a transaction running. */
+	WT_ASSERT(session, F_ISSET(txn, WT_TXN_RUNNING));
+
+	WT_RET(__txn_logrec_init(session));
+	logrec = txn->logrec;
+	commit = durable = first = prepare = read = WT_TS_NONE;
+	if (F_ISSET(txn, WT_TXN_HAS_TS_COMMIT)) {
+		commit = txn->commit_timestamp;
+		first = txn->first_commit_timestamp;
+	}
+	prepare = txn->prepare_timestamp;
+	if (F_ISSET(txn, WT_TXN_HAS_TS_READ))
+		read = txn->read_timestamp;
+
+	__wt_epoch(session, &t);
+	return (__wt_logop_txn_timestamp_pack(session, logrec,
+	    (uint64_t)t.tv_sec, (uint64_t)t.tv_nsec,
+	    commit, durable, first, prepare, read));
+}
+
+/*
  * __wt_txn_checkpoint_log --
  *	Write a log record for a checkpoint operation.
  */
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index 702196d17ee..b0960deb9c3 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -51,6 +51,11 @@ __recovery_cursor(WT_SESSION_IMPL *session, WT_RECOVERY *r,
 	c = NULL;
 
 	/*
+	 * File ids with the bit set to ignore this operation are skipped.
+	 */
+	if (WT_LOGOP_IS_IGNORED(id))
+		return (0);
+	/*
 	 * Metadata operations have an id of 0.  Match operations based
 	 * on the id and the current pass of recovery for metadata.
 	 *
@@ -115,7 +120,8 @@ __txn_op_apply(
 	WT_DECL_RET;
 	WT_ITEM key, start_key, stop_key, value;
 	WT_SESSION_IMPL *session;
-	uint64_t recno, start_recno, stop_recno;
+	wt_timestamp_t commit, durable, first, prepare, read;
+	uint64_t recno, start_recno, stop_recno, t_nsec, t_sec;
 	uint32_t fileid, mode, optype, opsize;
 
 	session = r->session;
@@ -125,6 +131,16 @@ __txn_op_apply(
 	WT_ERR(__wt_logop_read(session, pp, end, &optype, &opsize));
 	end = *pp + opsize;
 
+	/*
+	 * If it is an operation type that should be ignored, we're done.
+	 * Note that file ids within known operations also use the same
+	 * macros to indicate that operation should be ignored.
+	 */
+	if (WT_LOGOP_IS_IGNORED(optype)) {
+		*pp += opsize;
+		goto done;
+	}
+
 	switch (optype) {
 	case WT_LOGOP_COL_MODIFY:
 		WT_ERR(__wt_logop_col_modify_unpack(session, pp, end,
@@ -266,10 +282,20 @@ __txn_op_apply(
 			WT_TRET(stop->close(stop));
 		WT_ERR(ret);
 		break;
+	case WT_LOGOP_TXN_TIMESTAMP:
+		/*
+		 * Timestamp records are informational only. We have to
+		 * unpack it to properly move forward in the log record
+		 * to the next operation, but otherwise ignore.
+		 */
+		WT_ERR(__wt_logop_txn_timestamp_unpack(session, pp, end, &t_sec,
+		    &t_nsec, &commit, &durable, &first, &prepare, &read));
+		break;
 
 	WT_ILLEGAL_VALUE_ERR(session, optype);
 	}
 
+done:
 	/* Reset the cursor so it doesn't block eviction. */
 	if (cursor != NULL)
 		WT_ERR(cursor->reset(cursor));
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index b50da548f71..5ae391127b5 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -667,8 +667,10 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_DECL_RET;
 	WT_TXN *txn;
 	wt_timestamp_t ts;
+	bool set_ts;
 
 	txn = &session->txn;
+	set_ts = false;
 
 	/* Look for a commit timestamp. */
 	ret = __wt_config_gets_def(session, cfg, "commit_timestamp", 0, &cval);
@@ -678,6 +680,7 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
 		WT_RET(__wt_txn_parse_timestamp(session, "commit", &ts, &cval));
 		WT_RET(__wt_timestamp_validate(session, "commit", ts, &cval));
 		txn->commit_timestamp = ts;
+		set_ts = true;
 		__wt_txn_set_commit_timestamp(session);
 	} else
 		/*
@@ -687,7 +690,10 @@ __wt_txn_set_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
 		WT_RET(__wt_txn_context_prepare_check(session));
 
 	/* Look for a read timestamp. */
-	WT_RET(__wt_txn_parse_read_timestamp(session, cfg));
+	WT_RET(__wt_txn_parse_read_timestamp(session, cfg, &set_ts));
+
+	if (set_ts)
+		WT_RET(__wt_txn_ts_log(session));
 
 	return (0);
 }
@@ -775,7 +781,8 @@ __wt_txn_parse_prepare_timestamp(
  *	Parse a request to set a transaction's read_timestamp.
  */
 int
-__wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
+__wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session,
+    const char *cfg[], bool *set_tsp)
 {
 	WT_CONFIG_ITEM cval;
 	WT_TXN *txn;
@@ -844,6 +851,8 @@ __wt_txn_parse_read_timestamp(WT_SESSION_IMPL *session, const char *cfg[])
 			round_to_oldest = false;
 		}
 
+		if (set_tsp != NULL)
+			*set_tsp = true;
 		__wt_txn_set_read_timestamp(session);
 		__wt_readunlock(session, &txn_global->rwlock);
 		if (round_to_oldest) {
diff --git a/src/third_party/wiredtiger/test/csuite/Makefile.am b/src/third_party/wiredtiger/test/csuite/Makefile.am
index 362d0775a88..cb3e24b5177 100644
--- a/src/third_party/wiredtiger/test/csuite/Makefile.am
+++ b/src/third_party/wiredtiger/test/csuite/Makefile.am
@@ -127,6 +127,10 @@ test_wt4333_handle_locks_SOURCES = wt4333_handle_locks/main.c
 noinst_PROGRAMS += test_wt4333_handle_locks
 all_TESTS += test_wt4333_handle_locks
 
+test_wt4803_cache_overflow_abort_SOURCES = wt4803_cache_overflow_abort/main.c
+noinst_PROGRAMS += test_wt4803_cache_overflow_abort
+all_TESTS += test_wt4803_cache_overflow_abort
+
 # Run this during a "make check" smoke test.
 TESTS = $(all_TESTS)
 LOG_COMPILER = $(TEST_WRAPPER)
diff --git a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
index bf59fe674a7..23e2ca3ebd9 100644
--- a/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
+++ b/src/third_party/wiredtiger/test/csuite/timestamp_abort/main.c
@@ -71,9 +71,10 @@ static char home[1024];			/* Program working dir */
 #define	SESSION_MAX	(MAX_TH + 3 + MAX_TH * PREPARE_PCT)
 
 static const char * table_pfx = "table";
+static const char * const uri_collection = "collection";
 static const char * const uri_local = "local";
 static const char * const uri_oplog = "oplog";
-static const char * const uri_collection = "collection";
+static const char * const uri_shadow = "shadow";
 
 static const char * const ckpt_file = "checkpoint_done";
 
@@ -82,9 +83,13 @@ static volatile uint64_t global_ts = 1;
 
 #define	ENV_CONFIG_COMPAT	",compatibility=(release=\"2.9\")"
 #define	ENV_CONFIG_DEF						\
-    "create,log=(archive=false,file_max=10M,enabled),session_max=%" PRIu32
+    "cache_size=20M,create,log=(archive=true,file_max=10M,enabled),"	\
+    "debug_mode=(table_logging=true,checkpoint_retention=5),"		\
+    "statistics=(fast),statistics_log=(wait=1,json=true),session_max=%" PRIu32
 #define	ENV_CONFIG_TXNSYNC					\
-    "create,log=(archive=false,file_max=10M,enabled),"		\
+    "cache_size=20M,create,log=(archive=true,file_max=10M,enabled),"	\
+    "debug_mode=(table_logging=true,checkpoint_retention=5),"		\
+    "statistics=(fast),statistics_log=(wait=1,json=true),"		\
     "transaction_sync=(enabled,method=none),session_max=%" PRIu32
 #define	ENV_CONFIG_REC "log=(archive=false,recover=on)"
 
@@ -225,7 +230,7 @@ static WT_THREAD_RET
 thread_run(void *arg)
 {
 	FILE *fp;
-	WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
+	WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_shadow;
 	WT_ITEM data;
 	WT_RAND_STATE rnd;
 	WT_SESSION *prepared_session, *session;
@@ -286,6 +291,15 @@ thread_run(void *arg)
 		testutil_check(session->open_cursor(session,
 		    uri, NULL, NULL, &cur_coll));
 	testutil_check(__wt_snprintf(
+	    uri, sizeof(uri), "%s:%s", table_pfx, uri_shadow));
+	if (use_prep)
+		testutil_check(prepared_session->open_cursor(prepared_session,
+		    uri, NULL, NULL, &cur_shadow));
+	else
+		testutil_check(session->open_cursor(session,
+		    uri, NULL, NULL, &cur_shadow));
+
+	testutil_check(__wt_snprintf(
 	    uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
 	if (use_prep)
 		testutil_check(prepared_session->open_cursor(prepared_session,
@@ -315,7 +329,7 @@ thread_run(void *arg)
 
 		if (use_ts) {
 			testutil_check(pthread_rwlock_rdlock(&ts_lock));
-			active_ts = __wt_atomic_addv64(&global_ts, 1);
+			active_ts = __wt_atomic_addv64(&global_ts, 2);
 			testutil_check(__wt_snprintf(tscfg,
 			    sizeof(tscfg), "commit_timestamp=%" PRIx64,
 			    active_ts));
@@ -334,6 +348,7 @@ thread_run(void *arg)
 		cur_coll->set_key(cur_coll, kname);
 		cur_local->set_key(cur_local, kname);
 		cur_oplog->set_key(cur_oplog, kname);
+		cur_shadow->set_key(cur_shadow, kname);
 		/*
 		 * Put an informative string into the value so that it
 		 * can be viewed well in a binary dump.
@@ -351,6 +366,20 @@ thread_run(void *arg)
 		data.data = cbuf;
 		cur_coll->set_value(cur_coll, &data);
 		testutil_check(cur_coll->insert(cur_coll));
+		cur_shadow->set_value(cur_shadow, &data);
+		if (use_ts) {
+			/*
+			 * Change the timestamp in the middle of the
+			 * transaction so that we simulate a secondary.
+			 */
+			++active_ts;
+			testutil_check(__wt_snprintf(tscfg,
+			    sizeof(tscfg), "commit_timestamp=%" PRIx64,
+			    active_ts));
+			testutil_check(session->timestamp_transaction(
+			    session, tscfg));
+		}
+		testutil_check(cur_shadow->insert(cur_shadow));
 		data.size = __wt_random(&rnd) % MAX_VAL;
 		data.data = obuf;
 		cur_oplog->set_value(cur_oplog, &data);
@@ -437,6 +466,10 @@ run_workload(uint32_t nth)
 	testutil_check(session->create(session, uri,
 		"key_format=S,value_format=u,log=(enabled=false)"));
 	testutil_check(__wt_snprintf(
+	    uri, sizeof(uri), "%s:%s", table_pfx, uri_shadow));
+	testutil_check(session->create(session, uri,
+		"key_format=S,value_format=u,log=(enabled=false)"));
+	testutil_check(__wt_snprintf(
 	    uri, sizeof(uri), "%s:%s", table_pfx, uri_local));
 	testutil_check(session->create(session,
 	    uri, "key_format=S,value_format=u"));
@@ -548,7 +581,7 @@ main(int argc, char *argv[])
 	FILE *fp;
 	REPORT c_rep[MAX_TH], l_rep[MAX_TH], o_rep[MAX_TH];
 	WT_CONNECTION *conn;
-	WT_CURSOR *cur_coll, *cur_local, *cur_oplog;
+	WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_shadow;
 	WT_RAND_STATE rnd;
 	WT_SESSION *session;
 	pid_t pid;
@@ -725,6 +758,10 @@ main(int argc, char *argv[])
 	testutil_check(session->open_cursor(session,
 	    buf, NULL, NULL, &cur_coll));
 	testutil_check(__wt_snprintf(
+	    buf, sizeof(buf), "%s:%s", table_pfx, uri_shadow));
+	testutil_check(session->open_cursor(session,
+	    buf, NULL, NULL, &cur_shadow));
+	testutil_check(__wt_snprintf(
 	    buf, sizeof(buf), "%s:%s", table_pfx, uri_local));
 	testutil_check(session->open_cursor(session,
 	    buf, NULL, NULL, &cur_local));
@@ -798,13 +835,20 @@ main(int argc, char *argv[])
 			cur_coll->set_key(cur_coll, kname);
 			cur_local->set_key(cur_local, kname);
 			cur_oplog->set_key(cur_oplog, kname);
+			cur_shadow->set_key(cur_shadow, kname);
 			/*
 			 * The collection table should always only have the
-			 * data as of the checkpoint.
+			 * data as of the checkpoint. The shadow table should
+			 * always have the exact same data (or not) as the
+			 * collection table.
 			 */
 			if ((ret = cur_coll->search(cur_coll)) != 0) {
 				if (ret != WT_NOTFOUND)
 					testutil_die(ret, "search");
+				if ((ret = cur_shadow->search(cur_shadow)) == 0)
+					testutil_die(ret,
+					   "shadow search success");
+
 				/*
 				 * If we don't find a record, the stable
 				 * timestamp written to our file better be
@@ -841,7 +885,10 @@ main(int argc, char *argv[])
 				    " > stable ts %" PRIu64 "\n",
 				    fname, key, stable_fp, stable_val);
 				fatal = true;
-			}
+			} else if ((ret = cur_shadow->search(cur_shadow)) != 0)
+				/* Collection and shadow both have the data. */
+				testutil_die(ret, "shadow search failure");
+
 			/*
 			 * The local table should always have all data.
 			 */
diff --git a/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c b/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c
new file mode 100644
index 00000000000..7d9b0baf132
--- /dev/null
+++ b/src/third_party/wiredtiger/test/csuite/wt4803_cache_overflow_abort/main.c
@@ -0,0 +1,239 @@
+/*-
+ * Public Domain 2014-2019 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "test_util.h"
+
+#include <signal.h>
+#include <sys/wait.h>
+
+/*
+ * JIRA ticket reference: WT-4803
+ * Test case description: This test is checking the functionality of the
+ * lookaside file_max configuration. When the size of the lookaside file exceeds
+ * this value, we expect to panic.
+ * Failure mode: If we receive a panic in the test cases we weren't expecting to
+ * and vice versa.
+ */
+
+#define	NUM_KEYS	2000
+
+/*
+ * This is a global flag that should be set before running test_las_workload.
+ * It lets the child process know whether it should be expecting a panic or not
+ * so that it can adjust its exit code as needed.
+ */
+static bool expect_panic;
+
+static int
+handle_message(WT_EVENT_HANDLER *handler,
+    WT_SESSION *session, int error, const char *message)
+{
+	WT_UNUSED(handler);
+	WT_UNUSED(session);
+
+	(void)fprintf(
+	    stderr, "%s: %s\n", message, session->strerror(session, error));
+
+	if (error == WT_PANIC &&
+	    strstr(message, "exceeds maximum size") != NULL) {
+		fprintf(stderr, "Got cache overflow error (expect_panic=%s)\n",
+		    expect_panic ? "true" : "false");
+
+		/*
+		 * If we're expecting a panic, exit with zero to indicate to the
+		 * parent that this test was successful.
+		 *
+		 * If not, don't intercept. We'll naturally exit with non-zero
+		 * if we're terminating due to panic.
+		 */
+		if (expect_panic)
+			exit(EXIT_SUCCESS);
+	}
+
+	return (0);
+}
+
+static WT_EVENT_HANDLER event_handler = {
+	handle_message,
+	NULL,
+	NULL,
+	NULL
+};
+
+static void
+las_workload(TEST_OPTS *opts, const char *las_file_max)
+{
+	WT_CURSOR *cursor;
+	WT_SESSION *other_session, *session;
+	int i;
+	char buf[WT_MEGABYTE], open_config[128];
+
+	testutil_check(__wt_snprintf(open_config, sizeof(open_config),
+	    "create,cache_size=50MB,cache_overflow=(file_max=%s)",
+	    las_file_max));
+
+	testutil_check(wiredtiger_open(
+	    opts->home, &event_handler, open_config, &opts->conn));
+	testutil_check(
+	    opts->conn->open_session(opts->conn, NULL, NULL, &session));
+	testutil_check(
+	    session->create(session, opts->uri, "key_format=i,value_format=S"));
+	testutil_check(
+	    session->open_cursor(session, opts->uri, NULL, NULL, &cursor));
+
+	memset(buf, 0xA, WT_MEGABYTE);
+	buf[WT_MEGABYTE - 1] = '\0';
+
+	/* Populate the table. */
+	for (i = 0; i < NUM_KEYS; ++i) {
+		cursor->set_key(cursor, i);
+		cursor->set_value(cursor, buf);
+		testutil_check(cursor->insert(cursor));
+	}
+
+	/*
+	 * Open a snapshot isolation transaction in another session. This forces
+	 * the cache to retain all previous values. Then update all keys with a
+	 * new value in the original session while keeping that snapshot
+	 * transaction open. With the large value buffer, small cache and lots
+	 * of keys, this will force a lot of lookaside usage.
+	 *
+	 * When the file_max setting is small, the maximum size should easily be
+	 * reached and we should panic. When the maximum size is large or not
+	 * set, then we should succeed.
+	 */
+	testutil_check(
+	    opts->conn->open_session(opts->conn, NULL, NULL, &other_session));
+	testutil_check(other_session->begin_transaction(
+	    other_session, "isolation=snapshot"));
+
+	memset(buf, 0xB, WT_MEGABYTE);
+	buf[WT_MEGABYTE - 1] = '\0';
+
+	for (i = 0; i < NUM_KEYS; ++i) {
+		cursor->set_key(cursor, i);
+		cursor->set_value(cursor, buf);
+		testutil_check(cursor->update(cursor));
+	}
+
+	/*
+	 * Cleanup.
+	 * We do not get here when the file_max size is small because we will
+	 * have already hit the maximum and exited. This code only executes on
+	 * the successful path.
+	 */
+	testutil_check(
+	    other_session->rollback_transaction(other_session, NULL));
+	testutil_check(other_session->close(other_session, NULL));
+
+	testutil_check(cursor->close(cursor));
+	testutil_check(session->close(session, NULL));
+}
+
+static int
+test_las_workload(TEST_OPTS *opts, const char *las_file_max)
+{
+	pid_t pid;
+	int status;
+
+	/*
+	 * We're going to run this workload for different configurations of
+	 * file_max. So clean out the work directory each time.
+	 */
+	testutil_make_work_dir(opts->home);
+
+	/*
+	 * Since it's possible that the workload will panic and abort, we will
+	 * fork the process and execute the workload in the child process.
+	 *
+	 * This way, we can safely check the exit code of the child process and
+	 * confirm that it is what we expected.
+	 */
+	pid = fork();
+	if (pid < 0)
+		/* Failed fork. */
+		testutil_die(errno, "fork");
+	else if (pid == 0) {
+		/* Child process from here. */
+		las_workload(opts, las_file_max);
+
+		/*
+		 * If we're expecting a panic during the workload, we shouldn't
+		 * get to this point. Exit with non-zero to indicate to parent
+		 * that we should fail this test.
+		 */
+		fprintf(stderr,
+		    "Successfully completed workload (expect_panic=%s)\n",
+		    expect_panic ? "true" : "false");
+
+		if (expect_panic)
+			exit(EXIT_FAILURE);
+		else
+			exit(EXIT_SUCCESS);
+	}
+
+	/* Parent process from here. */
+	if (waitpid(pid, &status, 0) == -1)
+		testutil_die(errno, "waitpid");
+
+	return (status);
+}
+
+int
+main(int argc, char **argv)
+{
+	TEST_OPTS opts;
+
+	memset(&opts, 0x0, sizeof(opts));
+	testutil_check(testutil_parse_opts(argc, argv, &opts));
+
+	/*
+	 * The lookaside is unbounded.
+	 * We don't expect any failure since we can use as much as needed.
+	 */
+	expect_panic = false;
+	testutil_check(test_las_workload(&opts, "0"));
+
+	/*
+	 * The lookaside is limited to 5GB.
+	 * This is more than enough for this workload so we don't expect any
+	 * failure.
+	 */
+	expect_panic = false;
+	testutil_check(test_las_workload(&opts, "5GB"));
+
+	/*
+	 * The lookaside is limited to 100MB.
+	 * This is insufficient for this workload so we're expecting a failure.
+	 */
+	expect_panic = true;
+	testutil_check(test_las_workload(&opts, "100MB"));
+
+	testutil_cleanup(&opts);
+
+	return (0);
+}
diff --git a/src/third_party/wiredtiger/test/evergreen.yml b/src/third_party/wiredtiger/test/evergreen.yml
index be111c6432c..01aff272320 100644
--- a/src/third_party/wiredtiger/test/evergreen.yml
+++ b/src/third_party/wiredtiger/test/evergreen.yml
@@ -8,7 +8,7 @@ functions:
     command: git.get_project
     params:
       directory: wiredtiger
-  "fetch artifacts" : 
+  "fetch artifacts" :
     - command: s3.get
       params:
         aws_key: ${aws_key}
@@ -19,7 +19,7 @@ functions:
   "fetch mongo-tests repo" :
     command: shell.exec
     params:
-      script: | 
+      script: |
         git clone https://github.com/wiredtiger/mongo-tests
   "compile wiredtiger":
     command: shell.exec
@@ -35,7 +35,7 @@ functions:
           ./build_posix/reconf
           ${configure_env_vars|} ./configure --enable-diagnostic --enable-python --enable-zlib --enable-strict --enable-static --prefix=$(pwd)/LOCAL_INSTALL
           ${make_command|make} ${smp_command|} 2>&1
-    
+
           # On macOS, change the binary location with install_name_tool since DYLD_LIBRARY_PATH
           # appears not to work for dynamic modules loaded by python. For wt, the libtool generated
           # script has the wrong path for running on test machines.
@@ -609,6 +609,20 @@ tasks:
 
             ${test_env_vars|} $(pwd)/test_wt4156_metadata_salvage 2>&1
 
+  - name: csuite-wt4803-cache-overflow-abort-test
+    depends_on:
+      - name: compile
+    commands:
+      - func: "fetch artifacts"
+      - command: shell.exec
+        params:
+          working_dir: "wiredtiger/build_posix/test/csuite"
+          script: |
+            set -o errexit
+            set -o verbose
+
+            ${test_env_vars|} $(pwd)/test_wt4803_cache_overflow_abort 2>&1
+
   - name: csuite-rwlock-test
     depends_on:
       - name: compile
@@ -760,9 +774,9 @@ tasks:
 
   # Break out Python unit tests into multiple buckets/tasks based on test name and runtime
   # The test/suite/run.py script can work out test names by casting each command argument
-  # with "test_" prefix and "*.py" postfix. 
+  # with "test_" prefix and "*.py" postfix.
   #
-  # One example: 
+  # One example:
   # "test/suite/run.py [ab]" will be translated to testing "test_a*.py" and "test_b*.py"
 
   - name: unit-test-bucket00
@@ -934,18 +948,18 @@ tasks:
 
   - name: million-collection-test
     depends_on: []
-    run_on: 
+    run_on:
       - rhel62-large
-    commands: 
+    commands:
       - func: "fetch mongo-tests repo"
       - command: shell.exec
         params:
           working_dir: mongo-tests
-          script: | 
+          script: |
             set -o errexit
             set -o verbose
             ulimit -n 1000000
-            ulimit -c unlimited 
+            ulimit -c unlimited
             largescale/run-million-collection-test.sh .
 
 buildvariants:
@@ -1051,4 +1065,3 @@ buildvariants:
     - name: make-check-test
     - name: unit-test
     - name: fops
-
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode01.py b/src/third_party/wiredtiger/test/suite/test_debug_mode01.py
new file mode 100644
index 00000000000..88ba81f9c1c
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode01.py
@@ -0,0 +1,94 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_debug_mode01.py
+#    Test the debug mode settings. Test rollback_error in this one.
+class test_debug_mode01(wttest.WiredTigerTestCase):
+    conn_config = 'log=(enabled=true),debug_mode=(rollback_error=5)'
+    session_config = 'isolation=snapshot'
+    uri = 'file:test_debug'
+
+    entries = 22
+    min_error = entries // 5
+
+    def rollback_error(self, val, insert=True):
+        keys = range(1, self.entries)
+        c = self.session.open_cursor(self.uri, None)
+        # We expect some operations to return an exception so we cannot
+        # use the simple 'c[k] = 1'. But we must explicitly set the key
+        # and value and then use the insert or update primitives.
+        #
+        # Look for a generic 'WT_ROLLBACK' string not the specific
+        # simulated reason string.
+        msg = '/WT_ROLLBACK/'
+        rollback = 0
+        for k in keys:
+            self.session.begin_transaction()
+            c.set_key(k)
+            c.set_value(val)
+            # Execute the insert or update. It will return true if the simulated
+            # conflict exception is raised, false if no exception occurred.
+            if insert:
+                conflict = self.assertRaisesException(wiredtiger.WiredTigerError, \
+                    lambda:c.insert(), msg, True)
+            else:
+                conflict = self.assertRaisesException(wiredtiger.WiredTigerError, \
+                    lambda:c.update(), msg, True)
+
+            if conflict:
+                rollback += 1
+                self.pr("Key: " + str(k) + " Rolled back")
+                self.session.rollback_transaction()
+            else:
+                self.session.commit_transaction()
+        c.close()
+        return rollback
+
+    def test_rollback_error(self):
+        self.session.create(self.uri, 'key_format=i,value_format=i')
+        rollback = self.rollback_error(1)
+        rollback += self.rollback_error(2, False)
+        self.pr("Rollback: " + str(rollback))
+        self.pr("Minimum: " + str(self.min_error))
+        self.assertTrue(rollback >= self.min_error)
+
+    def test_rollback_error_off(self):
+        # The setting is added in to wiredtiger_open via the config above.
+        # Test that we can properly turn the setting off via reconfigure.
+        # There should then be no rollback errors.
+        self.conn.reconfigure("debug_mode=(rollback_error=0)")
+
+        self.session.create(self.uri, 'key_format=i,value_format=i')
+        rollback = self.rollback_error(1)
+        rollback += self.rollback_error(2)
+        self.assertTrue(rollback == 0)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode02.py b/src/third_party/wiredtiger/test/suite/test_debug_mode02.py
new file mode 100644
index 00000000000..0452e60fbd1
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode02.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+#
+# Public Domain 2024-2029 MongoDB, Inc.
+# Public Domain 2008-2024 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import fnmatch, os, time, wiredtiger, wttest
+
+# test_debug_mode02.py
+#    Test the debug mode settings. Test checkpoint_retention use.
+class test_debug_mode02(wttest.WiredTigerTestCase):
+    uri = 'file:test_debug'
+
+    entries = 100
+    loop = 0
+    retain = 5
+    log1 = 'WiredTigerLog.0000000001'
+    log2 = 'WiredTigerLog.0000000002'
+
+    def conn_config(self):
+        return 'log=(enabled=true,file_max=100K),debug_mode=(checkpoint_retention=%d)' % self.retain
+
+    def log_set(self):
+        logs = fnmatch.filter(os.listdir(self.home), "*gerLog*")
+        return set(logs)
+
+    def check_archive(self, logfile):
+        archived = False
+        for i in range(1,90):
+            # Sleep and then see if archive ran. We do this in a loop
+            # for slow machines. Max out at 90 seconds.
+            time.sleep(1.0)
+            if not os.path.exists(logfile):
+                archived = True
+                break
+        self.assertTrue(archived)
+
+    def advance_log_checkpoint(self):
+        # Advance the log file to the next file and write a checkpoint.
+        keys = range(1, self.entries)
+        cur_set = self.log_set()
+        c = self.session.open_cursor(self.uri, None)
+        new_set = cur_set
+        # Write data in small chunks until we switch log files.
+        while cur_set == new_set:
+            for k in keys:
+                c[k + (self.loop * self.entries)] = 1
+            self.loop += 1
+            new_set = self.log_set()
+        c.close()
+        # Write a checkpoint into the new log file.
+        self.session.checkpoint()
+
+    def test_checkpoint_retain(self):
+        self.session.create(self.uri, 'key_format=i,value_format=i')
+        # No log files should be archived while we have fewer than the
+        # retention number of logs. Make sure each iteration the new
+        # logs are a proper superset of the previous time.
+        for i in range(1, self.retain):
+            cur_set = self.log_set()
+            self.advance_log_checkpoint()
+            # We don't accomodate slow machines here because we don't expect
+            # the files the change and there is no way to know if archive ran
+            # otherwise.
+            time.sleep(1.0)
+            new_set = self.log_set()
+            self.assertTrue(new_set.issuperset(cur_set))
+
+        self.assertTrue(os.path.exists(self.log1))
+        self.advance_log_checkpoint()
+        self.check_archive(self.log1)
+
+    # Test that both zero and one archive as usual. And test reconfigure.
+    def test_checkpoint_retain_off(self):
+        self.conn.reconfigure("debug_mode=(checkpoint_retention=0)")
+        self.session.create(self.uri, 'key_format=i,value_format=i')
+
+        self.advance_log_checkpoint()
+        self.check_archive(self.log1)
+
+        self.conn.reconfigure("debug_mode=(checkpoint_retention=1)")
+        self.advance_log_checkpoint()
+        self.check_archive(self.log2)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode03.py b/src/third_party/wiredtiger/test/suite/test_debug_mode03.py
new file mode 100644
index 00000000000..feb5c0d904a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode03.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python
+#
+# Public Domain 2034-2039 MongoDB, Inc.
+# Public Domain 2008-2034 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_debug_mode03.py
+#    Test the debug mode settings. Test table_logging use.
+class test_debug_mode03(wttest.WiredTigerTestCase):
+    conn_config = 'log=(enabled=true,file_max=100K),debug_mode=(table_logging=true)'
+    uri = 'file:test_debug'
+    entries = 100
+    value = b'\x01\x02abcd\x03\x04'
+
+    def add_data(self):
+        # Add a binary value we can search for in the log.
+        keys = range(0, self.entries)
+        c = self.session.open_cursor(self.uri, None)
+        for k in keys:
+            c[k] = self.value
+        c.close()
+
+    def find_log_recs(self):
+        # Open a log cursor. We should find log records that have
+        # the value we inserted.
+        c = self.session.open_cursor("log:", None)
+        count = 0
+        while c.next() == 0:
+            # lsn.file, lsn.offset, opcount
+            keys = c.get_key()
+            # txnid, rectype, optype, fileid, logrec_key, logrec_value
+            values = c.get_value()
+            # Look for log records that have a key/value pair.
+            if values[4] != b'':
+                if self.value in values[5]:  # logrec_value
+                    count += 1
+        c.close()
+        return count
+
+    def test_table_logging(self):
+        self.session.create(self.uri, 'key_format=i,value_format=u,log=(enabled=false)')
+        self.add_data()
+        count = self.find_log_recs()
+        self.assertEqual(count, self.entries)
+
+    # Test that both zero and one archive as usual. And test reconfigure.
+    def test_table_logging_off(self):
+        self.conn.reconfigure("debug_mode=(table_logging=false)")
+        self.session.create(self.uri, 'key_format=i,value_format=u,log=(enabled=false)')
+        self.add_data()
+        count = self.find_log_recs()
+        self.assertEqual(count, 0)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_debug_mode04.py b/src/third_party/wiredtiger/test/suite/test_debug_mode04.py
new file mode 100644
index 00000000000..1f5429495e8
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_debug_mode04.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+#
+# Public Domain 2034-2039 MongoDB, Inc.
+# Public Domain 2008-2034 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+
+# test_debug_mode04.py
+#    Test the debug mode settings. Test eviction use.
+class test_debug_mode04(wttest.WiredTigerTestCase):
+    conn_config = 'log=(enabled=true,file_max=100K),debug_mode=(eviction=true)'
+    uri = 'file:test_debug'
+    entries = 100
+    value = b'\x01\x02abcd\x03\x04'
+
+    def add_data(self):
+        keys = range(0, self.entries)
+        c = self.session.open_cursor(self.uri, None)
+        for k in keys:
+            c[k] = self.value
+        c.close()
+
+    # Just test turning it on and off. There really isn't something
+    # specific to verify.
+    def test_table_logging(self):
+        self.session.create(self.uri, 'key_format=i,value_format=u')
+        self.add_data()
+
+    def test_table_logging_off(self):
+        self.conn.reconfigure("debug_mode=(eviction=false)")
+        self.session.create(self.uri, 'key_format=i,value_format=u')
+        self.add_data()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_las04.py b/src/third_party/wiredtiger/test/suite/test_las04.py
new file mode 100644
index 00000000000..9d35d3c17f3
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_las04.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_las04.py
+#   Test file_max configuration and reconfiguration for the lookaside table.
+#
+
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+# Taken from src/include/misc.h.
+WT_MB = 1048576
+
+class test_las04(wttest.WiredTigerTestCase):
+    uri = 'table:las_04'
+    in_memory_values = [
+        ('false', dict(in_memory=False)),
+        ('none', dict(in_memory=None)),
+        ('true', dict(in_memory=True))
+    ]
+    init_file_max_values = [
+        ('default', dict(init_file_max=None, init_stat_val=0)),
+        ('non-zero', dict(init_file_max='100MB', init_stat_val=(WT_MB * 100))),
+        ('zero', dict(init_file_max='0', init_stat_val=0))
+    ]
+    reconfig_file_max_values = [
+        ('non-zero', dict(reconfig_file_max='100MB',
+                          reconfig_stat_val=(WT_MB * 100))),
+        ('too-low', dict(reconfig_file_max='99MB', reconfig_stat_val=None)),
+        ('zero', dict(reconfig_file_max='0', reconfig_stat_val=0))
+    ]
+    scenarios = make_scenarios(init_file_max_values, reconfig_file_max_values,
+                               in_memory_values)
+
+    def conn_config(self):
+        config = 'statistics=(fast)'
+        if self.init_file_max is not None:
+            config += ',cache_overflow=(file_max={})'.format(self.init_file_max)
+        if self.in_memory is not None:
+            config += ',in_memory=' + ('true' if self.in_memory else 'false')
+        return config
+
+    def get_stat(self, stat):
+        stat_cursor = self.session.open_cursor('statistics:')
+        val = stat_cursor[stat][2]
+        stat_cursor.close()
+        return val
+
+    def test_las(self):
+        self.session.create(self.uri, 'key_format=S,value_format=S')
+
+        if self.in_memory:
+            # For in-memory configurations, we simply ignore any lookaside
+            # related configuration.
+            self.assertEqual(
+                self.get_stat(wiredtiger.stat.conn.cache_lookaside_ondisk_max),
+                0)
+        else:
+            self.assertEqual(
+                self.get_stat(wiredtiger.stat.conn.cache_lookaside_ondisk_max),
+                self.init_stat_val)
+
+        reconfigure = lambda: self.conn.reconfigure(
+            'cache_overflow=(file_max={})'.format(self.reconfig_file_max))
+
+        # We expect an error when the statistic value is None because the value
+        # is out of range.
+        if self.reconfig_stat_val is None:
+            self.assertRaisesWithMessage(
+                wiredtiger.WiredTigerError, reconfigure, '/below minimum/')
+            return
+
+        reconfigure()
+
+        if self.in_memory:
+            self.assertEqual(
+                self.get_stat(wiredtiger.stat.conn.cache_lookaside_ondisk_max),
+                0)
+        else:
+            self.assertEqual(
+                self.get_stat(wiredtiger.stat.conn.cache_lookaside_ondisk_max),
+                self.reconfig_stat_val)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp17.py b/src/third_party/wiredtiger/test/suite/test_timestamp17.py
new file mode 100644
index 00000000000..f03b002c0ed
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp17.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2019 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp17.py
+#   Test unintended timestamp usage on an update and ensure behavior
+#   matches expectations. Additionally, move the timestamp to ensure
+#   that values read are still consistent after those timestamps are
+#   moved.
+#
+
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+    return '%x' % t
+
+class test_timestamp17(wttest.WiredTigerTestCase, suite_subprocess):
+    tablename = 'test_timestamp17'
+    uri = 'table:' + tablename
+    session_config = 'isolation=snapshot'
+
+    def test_inconsistent_timestamping(self):
+        self.session.create(self.uri, 'key_format=i,value_format=i')
+        self.session.begin_transaction()
+        cur1 = self.session.open_cursor(self.uri)
+        cur1[1] = 1
+        self.session.commit_transaction('commit_timestamp=25')
+
+        self.session.begin_transaction()
+        cur1[1] = 2
+        self.session.commit_transaction('commit_timestamp=50')
+
+        self.session.begin_transaction()
+        cur1[1] = 3
+        self.session.commit_transaction('commit_timestamp=200')
+
+        self.session.begin_transaction()
+        cur1.set_key(1)
+        cur1.remove()
+        self.session.commit_transaction('commit_timestamp=100')
+
+        # Read before any updates and ensure we cannot find the key or value.
+        self.session.begin_transaction('read_timestamp=20')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        # Read at 25 and we should see 1.
+        self.session.begin_transaction('read_timestamp=25')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, 0)
+        value1 = cur1.get_value()
+        self.session.commit_transaction()
+        self.assertEqual(1, value1)
+
+        # Read at 50 and we should see 2.
+        self.session.begin_transaction('read_timestamp=50')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, 0)
+        value1 = cur1.get_value()
+        self.session.commit_transaction()
+        self.assertEqual(2, value1)
+
+        # Read at 100 and we should not find anything.
+        self.session.begin_transaction('read_timestamp=100')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        # Read at 200 and we should still not find anything.
+        self.session.begin_transaction('read_timestamp=200')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        # Read at 300 for further validation.
+        self.session.begin_transaction('read_timestamp=300')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        # Move oldest timestamp forward and
+        # confirm we see the correct numbers.
+        self.conn.set_timestamp('oldest_timestamp=49')
+
+        # Read at 49 and we should see 1.
+        self.session.begin_transaction('read_timestamp=49')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, 0)
+        value1 = cur1.get_value()
+        self.session.commit_transaction()
+        self.assertEqual(1, value1)
+
+        self.conn.set_timestamp('oldest_timestamp=99')
+
+        # Read at 99 and we should see 2.
+        self.session.begin_transaction('read_timestamp=99')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, 0)
+        value1 = cur1.get_value()
+        self.session.commit_transaction()
+        self.assertEqual(2, value1)
+
+        # Move oldest to the point at which we deleted.
+        self.conn.set_timestamp('oldest_timestamp=100')
+
+        # Read at 100 and we should not find anything.
+        self.session.begin_transaction('read_timestamp=100')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        # Read at 200 and we should not find anything.
+        self.session.begin_transaction('read_timestamp=200')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        # Move oldest timestamp to 200 to ensure history
+        # works as expected and we do not see the value 3.
+        self.conn.set_timestamp('oldest_timestamp=200')
+
+        self.session.begin_transaction('read_timestamp=200')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+        self.session.begin_transaction('read_timestamp=250')
+        cur1.set_key(1)
+        search_success = cur1.search()
+        self.assertEqual(search_success, wiredtiger.WT_NOTFOUND)
+        self.session.commit_transaction()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index c0b755d2230..045cac26552 100644
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -508,8 +508,9 @@ class WiredTigerTestCase(unittest.TestCase):
         """
         Like TestCase.assertRaises(), with some additional options.
         If the exceptionString argument is used, the exception's string
-        must match it. If optional is set, then no assertion occurs
-        if the exception doesn't occur.
+        must match it, or its pattern if the string starts and ends with
+        a slash. If optional is set, then no assertion occurs if the
+        exception doesn't occur.
         Returns true if the assertion is raised.
         """
         raised = False
@@ -519,9 +520,19 @@ class WiredTigerTestCase(unittest.TestCase):
             if not isinstance(err, exceptionType):
                 self.fail('Exception of incorrect type raised, got type: ' + \
                     str(type(err)))
-            if exceptionString != None and exceptionString != str(err):
-                self.fail('Exception with incorrect string raised, got: "' + \
-                    str(err) + '"')
+            if exceptionString != None:
+                # Match either a pattern or an exact string.
+                fail = False
+                self.pr('Expecting string msg: ' + exceptionString)
+                if len(exceptionString) > 2 and \
+                  exceptionString[0] == '/' and exceptionString[-1] == '/' :
+                      if re.search(exceptionString[1:-1], str(err)) == None:
+                        fail = True
+                elif exceptionString != str(err):
+                        fail = True
+                if fail:
+                    self.fail('Exception with incorrect string raised, got: "' + \
+                        str(err) + '" Expected: ' + exceptionString)
             raised = True
         if not raised and not optional:
             self.fail('no assertion raised')
author	Luke Chen <luke.chen@mongodb.com>	2019-07-01 16:37:06 +1000
committer	Luke Chen <luke.chen@mongodb.com>	2019-07-01 16:37:06 +1000
commit	663bbd278e0a11d37cb2ba02f5e1349a9ce985bb (patch)
tree	7c7ce5088a2bc015d9e1d86ca7421637516bb39f
parent	ce5e56f2c2c82b0ec65c8c8957326fe96bf1a7e7 (diff)
download	mongo-663bbd278e0a11d37cb2ba02f5e1349a9ce985bb.tar.gz