34 files changed, 844 insertions, 399 deletions
diff --git a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4 b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4
index 3600a39fe43..b912335fd16 100644
--- a/src/third_party/wiredtiger/build_posix/aclocal/strict.m4
+++ b/src/third_party/wiredtiger/build_posix/aclocal/strict.m4
@@ -134,6 +134,10 @@ AC_DEFUN([AM_CLANG_WARNINGS], [
 		w="$w -Wno-unused-command-line-argument";;
 	esac
 
+	# We occasionally use an extra semicolon to indicate an empty loop or
+	# conditional body.
+	w="$w -Wno-extra-semi-stmt"
+
 	# Ignore unrecognized options.
 	w="$w -Wno-unknown-warning-option"
 
diff --git a/src/third_party/wiredtiger/dist/s_string.ok b/src/third_party/wiredtiger/dist/s_string.ok
index 6ce1ad16a5f..563236661aa 100644
--- a/src/third_party/wiredtiger/dist/s_string.ok
+++ b/src/third_party/wiredtiger/dist/s_string.ok
@@ -1072,6 +1072,7 @@ ownp
 pR
 pS
 packv
+pagedump
 pagesize
 parens
 pareto
@@ -1325,6 +1326,7 @@ unmodify
 unordered
 unpackv
 unpadded
+unreconciled
 unreferenced
 unregister
 unsized
diff --git a/src/third_party/wiredtiger/examples/c/ex_async.c b/src/third_party/wiredtiger/examples/c/ex_async.c
index e9ffad4807c..85f783092fa 100644
--- a/src/third_party/wiredtiger/examples/c/ex_async.c
+++ b/src/third_party/wiredtiger/examples/c/ex_async.c
@@ -37,7 +37,7 @@ static const char *home;
 #elif defined(_WIN32)
 #define	ATOMIC_ADD(v, val)      (_InterlockedExchangeAdd(&(v), val) + val)
 #else
-#define	ATOMIC_ADD(v, val)      __sync_add_and_fetch(&(v), val)
+#define	ATOMIC_ADD(v, val)      __atomic_add_fetch(&(v), val, __ATOMIC_SEQ_CST)
 #endif
 
 static int global_error = 0;
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index b50a7f09165..2673308c46e 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
 {
-    "commit": "1a1197ef3c891458cd73290ad9b01c1e969f7e86", 
+    "commit": "280c572c8097a322e429a349f73135266f3faacf", 
     "github": "wiredtiger/wiredtiger.git", 
     "vendor": "wiredtiger", 
     "branch": "mongodb-4.2"
diff --git a/src/third_party/wiredtiger/src/async/async_api.c b/src/third_party/wiredtiger/src/async/async_api.c
index 9f9aa979139..0ef85b8cd28 100644
--- a/src/third_party/wiredtiger/src/async/async_api.c
+++ b/src/third_party/wiredtiger/src/async/async_api.c
@@ -160,8 +160,7 @@ retry:
 	WT_RET(__async_get_format(conn, uri, config, op));
 	op->unique_id = __wt_atomic_add64(&async->op_id, 1);
 	op->optype = WT_AOP_NONE;
-	(void)__wt_atomic_store32(
-	    &async->ops_index, (i + 1) % conn->async_size);
+	async->ops_index = (i + 1) % conn->async_size;
 	*opp = op;
 	return (0);
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 4d94bcdb23e..d045405f85a 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -1506,8 +1506,11 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
 	if (!F_ISSET(cursor, WT_CURSTD_KEY_INT) ||
 	    !F_ISSET(cursor, WT_CURSTD_VALUE_INT))
 		WT_ERR(__wt_btcur_search(cbt));
+
+	WT_ERR(__wt_modify_pack(cursor, &modify, entries, nentries));
+
 	orig = cursor->value.size;
-	WT_ERR(__wt_modify_apply_api(session, cursor, entries, nentries));
+	WT_ERR(__wt_modify_apply(cursor, modify->data));
 	new = cursor->value.size;
 	WT_ERR(__cursor_size_chk(session, &cursor->value));
 
@@ -1527,8 +1530,7 @@ __wt_btcur_modify(WT_CURSOR_BTREE *cbt, WT_MODIFY *entries, int nentries)
 	F_CLR(cursor, WT_CURSTD_OVERWRITE);
 	if (cursor->value.size <= 64 || __cursor_chain_exceeded(cbt))
 		ret = __btcur_update(cbt, &cursor->value, WT_UPDATE_STANDARD);
-	else if ((ret =
-	    __wt_modify_pack(session, &modify, entries, nentries)) == 0)
+	else
 		ret = __btcur_update(cbt, modify, WT_UPDATE_MODIFY);
 	if (overwrite)
 	       F_SET(cursor, WT_CURSTD_OVERWRITE);
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 685fb983718..9f5cadfecd0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -718,6 +718,7 @@ __wt_debug_page(
  */
 int
 __wt_debug_cursor_page(void *cursor_arg, const char *ofile)
+    WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
 {
 	WT_CURSOR *cursor;
 	WT_CURSOR_BTREE *cbt;
@@ -889,7 +890,7 @@ __debug_page_metadata(WT_DBG *ds, WT_REF *ref)
 	if (split_gen != 0)
 		WT_RET(ds->f(ds, ", split-gen=%" PRIu64, split_gen));
 	if (mod != NULL)
-		WT_RET(ds->f(ds, ", write-gen=%" PRIu32, mod->write_gen));
+		WT_RET(ds->f(ds, ", page-state=%" PRIu32, mod->page_state));
 	WT_RET(ds->f(ds,
 	    ", memory-size %" WT_SIZET_FMT, page->memory_footprint));
 	WT_RET(ds->f(ds, "\n"));
diff --git a/src/third_party/wiredtiger/src/btree/bt_ret.c b/src/third_party/wiredtiger/src/btree/bt_ret.c
index 52277efb85d..d41f76c6442 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ret.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ret.c
@@ -250,7 +250,7 @@ __wt_value_return_upd(WT_SESSION_IMPL *session,
 	 * updates.
 	 */
 	while (i > 0)
-		WT_ERR(__wt_modify_apply(session, cursor, listp[--i]->data));
+		WT_ERR(__wt_modify_apply(cursor, listp[--i]->data));
 
 err:	if (allocated_bytes != 0)
 		__wt_free(session, listp);
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index a7d34d49f84..d2ac866bc59 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -322,6 +322,10 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 			 * discarded), that is not wasted effort because
 			 * checkpoint doesn't need to write the page again.
 			 *
+			 * Once the transaction has given up it's snapshot it
+			 * is no longer safe to reconcile pages. That happens
+			 * prior to the final metadata checkpoint.
+			 *
 			 * XXX Only attempt this eviction when there are no
 			 * readers older than the checkpoint.  Otherwise, a bug
 			 * in eviction can mark the page clean and discard
@@ -331,6 +335,7 @@ __wt_sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 			if (!WT_PAGE_IS_INTERNAL(page) &&
 			    page->read_gen == WT_READGEN_WONT_NEED &&
 			    !tried_eviction &&
+			    F_ISSET(&session->txn, WT_TXN_HAS_SNAPSHOT) &&
 			    (!F_ISSET(txn, WT_TXN_HAS_TS_READ) ||
 			    txn->read_timestamp ==
 			    conn->txn_global.pinned_timestamp)) {
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 8603d329c15..a01ef5a49a7 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -30,6 +30,15 @@ __search_insert_append(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt,
 
 	if ((ins = WT_SKIP_LAST(ins_head)) == NULL)
 		return (0);
+	/*
+	 * Since the head of the skip list doesn't get mutated within this
+	 * function, the compiler may move this assignment above within the
+	 * loop below if it needs to (and may read a different value on each
+	 * loop due to other threads mutating the skip list).
+	 *
+	 * Place a read barrier here to avoid this issue.
+	 */
+	WT_READ_BARRIER();
 	key.data = WT_INSERT_KEY(ins);
 	key.size = WT_INSERT_KEY_SIZE(ins);
 
diff --git a/src/third_party/wiredtiger/src/conn/conn_capacity.c b/src/third_party/wiredtiger/src/conn/conn_capacity.c
index a75bdd259c4..38052a8e412 100644
--- a/src/third_party/wiredtiger/src/conn/conn_capacity.c
+++ b/src/third_party/wiredtiger/src/conn/conn_capacity.c
@@ -270,8 +270,7 @@ __capacity_reserve(uint64_t *reservation, uint64_t bytes, uint64_t capacity,
 			 * If the reservation clock is out of date, bring it
 			 * to within a second of a current time.
 			 */
-			(void)__wt_atomic_store64(reservation,
-			    (now_ns - WT_BILLION) + res_len);
+			*reservation = (now_ns - WT_BILLION) + res_len;
 	} else
 		res_value = now_ns;
 
diff --git a/src/third_party/wiredtiger/src/cursor/cur_join.c b/src/third_party/wiredtiger/src/cursor/cur_join.c
index 07bfe02a142..12be6929022 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_join.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_join.c
@@ -508,10 +508,8 @@ __curjoin_entry_in_range(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
 		if (!passed) {
 			if (iter != NULL &&
 			    (iter->is_equal ||
-			    F_ISSET(end, WT_CURJOIN_END_LT))) {
-				WT_RET(__curjoin_iter_bump(iter));
+			    F_ISSET(end, WT_CURJOIN_END_LT)))
 				return (WT_NOTFOUND);
-			}
 			if (!disjunction)
 				return (WT_NOTFOUND);
 			iter = NULL;
@@ -606,6 +604,9 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
 	WT_ITEM v;
 	bool bloom_found;
 
+	/* We cannot have a bloom filter on a join entry with subordinates. */
+	WT_ASSERT(session, entry->bloom == NULL || entry->subjoin == NULL);
+
 	if (entry->subjoin == NULL && iter != NULL &&
 	    (iter->end_pos + iter->end_skip >= entry->ends_next ||
 	    (iter->end_skip > 0 &&
@@ -633,16 +634,19 @@ __curjoin_entry_member(WT_SESSION_IMPL *session, WT_CURSOR_JOIN_ENTRY *entry,
 		bloom_found = true;
 	}
 	if (entry->subjoin != NULL) {
+		/*
+		 * If we have a subordinate join, the membership
+		 * check is delegated to it.
+		 */
 		WT_ASSERT(session,
 		    iter == NULL || entry->subjoin == iter->child->cjoin);
-		ret = __curjoin_entries_in_range(session, entry->subjoin,
-		    key, iter == NULL ? NULL : iter->child);
+		WT_ERR(__curjoin_entries_in_range(session, entry->subjoin,
+		    key, iter == NULL ? NULL : iter->child));
 		if (iter != NULL &&
-		    WT_CURJOIN_ITER_CONSUMED(iter->child)) {
-			WT_ERR(__curjoin_iter_bump(iter));
-			ret = WT_NOTFOUND;
-		}
-		return (ret);
+		    WT_CURJOIN_ITER_CONSUMED(iter->child))
+			return (WT_NOTFOUND);
+		/* There's nothing more to do for this node. */
+		return (0);
 	}
 	if (entry->index != NULL) {
 		/*
diff --git a/src/third_party/wiredtiger/src/cursor/cur_std.c b/src/third_party/wiredtiger/src/cursor/cur_std.c
index 073df6eaaf6..22d067ef90e 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_std.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_std.c
@@ -931,7 +931,7 @@ __cursor_modify(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
 
 	/* Get the current value, apply the modifications. */
 	WT_ERR(cursor->search(cursor));
-	WT_ERR(__wt_modify_apply_api(session, cursor, entries, nentries));
+	WT_ERR(__wt_modify_apply_api(cursor, entries, nentries));
 
 	/* We know both key and value are set, "overwrite" doesn't matter. */
 	ret = cursor->update(cursor);
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index a7c289a7b7f..03643f473e1 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -489,10 +489,21 @@ struct __wt_page_modify {
 	WT_SPINLOCK page_lock;		/* Page's spinlock */
 
 	/*
-	 * The write generation is incremented when a page is modified, a page
-	 * is clean if the write generation is 0.
+	 * The page state is incremented when a page is modified.
+	 *
+	 * WT_PAGE_CLEAN --
+	 *	The page is clean.
+	 * WT_PAGE_DIRTY_FIRST --
+	 *	The page is in this state after the first operation that marks a
+	 *	page dirty, or when reconciliation is checking to see if it has
+	 *	done enough work to be able to mark the page clean.
+	 * WT_PAGE_DIRTY --
+	 *	Two or more updates have been added to the page.
 	 */
-	uint32_t write_gen;
+#define	WT_PAGE_CLEAN		0
+#define	WT_PAGE_DIRTY_FIRST	1
+#define	WT_PAGE_DIRTY		2
+	uint32_t page_state;
 
 #define	WT_PM_REC_EMPTY		1	/* Reconciliation: no replacement */
 #define	WT_PM_REC_MULTIBLOCK	2	/* Reconciliation: multiple blocks */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index d0679a9fb38..3fa5d60f1f1 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -34,7 +34,8 @@ __wt_page_is_empty(WT_PAGE *page)
 static inline bool
 __wt_page_evict_clean(WT_PAGE *page)
 {
-	return (page->modify == NULL || (page->modify->write_gen == 0 &&
+	return (page->modify == NULL ||
+	    (page->modify->page_state == WT_PAGE_CLEAN &&
 	    page->modify->rec_result == 0));
 }
 
@@ -45,7 +46,8 @@ __wt_page_evict_clean(WT_PAGE *page)
 static inline bool
 __wt_page_is_modified(WT_PAGE *page)
 {
-	return (page->modify != NULL && page->modify->write_gen != 0);
+	return (page->modify != NULL &&
+	    page->modify->page_state != WT_PAGE_CLEAN);
 }
 
 /*
@@ -496,19 +498,25 @@ __wt_page_only_modify_set(WT_SESSION_IMPL *session, WT_PAGE *page)
 	WT_ASSERT(session, !F_ISSET(session->dhandle, WT_DHANDLE_DEAD));
 
 	last_running = 0;
-	if (page->modify->write_gen == 0)
+	if (page->modify->page_state == WT_PAGE_CLEAN)
 		last_running = S2C(session)->txn_global.last_running;
 
 	/*
-	 * We depend on atomic-add being a write barrier, that is, a barrier to
-	 * ensure all changes to the page are flushed before updating the page
-	 * write generation and/or marking the tree dirty, otherwise checkpoints
+	 * We depend on the atomic operation being a write barrier, that is, a
+	 * barrier to ensure all changes to the page are flushed before updating
+	 * the page state and/or marking the tree dirty, otherwise checkpoints
 	 * and/or page reconciliation might be looking at a clean page/tree.
 	 *
 	 * Every time the page transitions from clean to dirty, update the cache
 	 * and transactional information.
+	 *
+	 * The page state can only ever be incremented above dirty by the number
+	 * of concurrently running threads, so the counter will never approach
+	 * the point where it would wrap.
 	 */
-	if (__wt_atomic_add32(&page->modify->write_gen, 1) == 1) {
+	if (page->modify->page_state < WT_PAGE_DIRTY &&
+	    __wt_atomic_add32(&page->modify->page_state, 1) ==
+	    WT_PAGE_DIRTY_FIRST) {
 		__wt_cache_dirty_incr(session, page);
 
 		/*
@@ -579,7 +587,17 @@ __wt_page_modify_clear(WT_SESSION_IMPL *session, WT_PAGE *page)
 	 * Allow the call to be made on clean pages.
 	 */
 	if (__wt_page_is_modified(page)) {
-		page->modify->write_gen = 0;
+		/*
+		 * The only part where ordering matters is during
+		 * reconciliation where updates on other threads are performing
+		 * writes to the page state that need to be visible to the
+		 * reconciliation thread.
+		 *
+		 * Since clearing of the page state is not going to be happening
+		 * during reconciliation on a separate thread, there's no write
+		 * barrier needed here.
+		 */
+		page->modify->page_state = WT_PAGE_CLEAN;
 		__wt_cache_dirty_decr(session, page);
 	}
 }
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 80046127d3f..5dbd7115684 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -277,7 +277,7 @@ extern int __wt_curtable_get_value(WT_CURSOR *cursor, ...) WT_GCC_FUNC_DECL_ATTR
 extern int __wt_curtable_open(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR *owner, const char *cfg[], WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_debug_addr(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_debug_addr_print( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_debug_cursor_page(void *cursor_arg, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_debug_disk( WT_SESSION_IMPL *session, const WT_PAGE_HEADER *dsk, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_debug_mode_config(WT_SESSION_IMPL *session, const char *cfg[]) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_debug_offset(WT_SESSION_IMPL *session, wt_off_t offset, uint32_t size, uint32_t checksum, const char *ofile) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -503,9 +503,9 @@ extern int __wt_metadata_search(WT_SESSION_IMPL *session, const char *key, char
 extern int __wt_metadata_set_base_write_gen(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_metadata_turtle_rewrite(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_metadata_update( WT_SESSION_IMPL *session, const char *key, const char *value) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_apply( WT_SESSION_IMPL *session, WT_CURSOR *cursor, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_apply_api(WT_SESSION_IMPL *session, WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_modify_pack(WT_SESSION_IMPL *session, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_apply(WT_CURSOR *cursor, const void *modify) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default"))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_modify_pack(WT_CURSOR *cursor, WT_ITEM **modifyp, WT_MODIFY *entries, int nentries) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_msg(WT_SESSION_IMPL *session, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((cold)) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 2, 3))) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_multi_to_ref(WT_SESSION_IMPL *session, WT_PAGE *page, WT_MULTI *multi, WT_REF **refp, size_t *incrp, bool closing) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_name_check(WT_SESSION_IMPL *session, const char *str, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
diff --git a/src/third_party/wiredtiger/src/include/gcc.h b/src/third_party/wiredtiger/src/include/gcc.h
index a9d271ed0bd..7ee64cb663f 100644
--- a/src/third_party/wiredtiger/src/include/gcc.h
+++ b/src/third_party/wiredtiger/src/include/gcc.h
@@ -89,38 +89,40 @@
  * swap) operations.
  */
 
-#ifdef __clang__
 /*
- * We avoid __sync_bool_compare_and_swap with due to problems with optimization
- * with some versions of clang. See http://llvm.org/bugs/show_bug.cgi?id=21499
- * for details.
+ * We've hit optimization bugs with Clang 3.5 in the past when using the atomic
+ * builtins. See http://llvm.org/bugs/show_bug.cgi?id=21499 for details.
  */
-#define	WT_ATOMIC_CAS(ptr, old, new)					\
-	(__sync_val_compare_and_swap(ptr, old, new) == (old))
-#else
-#define	WT_ATOMIC_CAS(ptr, old, new)					\
-	__sync_bool_compare_and_swap(ptr, old, new)
+#if defined(__clang__) && \
+	defined(__clang_major__) && defined(__clang_minor__) && \
+	(((__clang_major__ == 3) && (__clang_minor__ <= 5)) || \
+	 (__clang_major__ < 3))
+#error "Clang versions 3.5 and earlier are unsupported by WiredTiger"
 #endif
+
+#define	WT_ATOMIC_CAS(ptr, oldp, new)					\
+	__atomic_compare_exchange_n(					\
+	    ptr, oldp, new, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)
 #define	WT_ATOMIC_CAS_FUNC(name, vp_arg, old_arg, new_arg)		\
 static inline bool							\
 __wt_atomic_cas##name(vp_arg, old_arg, new_arg)				\
 {									\
-	return (WT_ATOMIC_CAS(vp, old, new));				\
+	return (WT_ATOMIC_CAS(vp, &old, new));				\
 }
 WT_ATOMIC_CAS_FUNC(8, uint8_t *vp, uint8_t old, uint8_t new)
 WT_ATOMIC_CAS_FUNC(16, uint16_t *vp, uint16_t old, uint16_t new)
 WT_ATOMIC_CAS_FUNC(32, uint32_t *vp, uint32_t old, uint32_t new)
 WT_ATOMIC_CAS_FUNC(v32,							\
-    volatile uint32_t *vp, volatile uint32_t old, volatile uint32_t new)
+    volatile uint32_t *vp, uint32_t old, volatile uint32_t new)
 WT_ATOMIC_CAS_FUNC(i32, int32_t *vp, int32_t old, int32_t new)
 WT_ATOMIC_CAS_FUNC(iv32,						\
-    volatile int32_t *vp, volatile int32_t old, volatile int32_t new)
+    volatile int32_t *vp, int32_t old, volatile int32_t new)
 WT_ATOMIC_CAS_FUNC(64, uint64_t *vp, uint64_t old, uint64_t new)
 WT_ATOMIC_CAS_FUNC(v64,							\
-    volatile uint64_t *vp, volatile uint64_t old, volatile uint64_t new)
+    volatile uint64_t *vp, uint64_t old, volatile uint64_t new)
 WT_ATOMIC_CAS_FUNC(i64, int64_t *vp, int64_t old, int64_t new)
 WT_ATOMIC_CAS_FUNC(iv64,						\
-    volatile int64_t *vp, volatile int64_t old, volatile int64_t new)
+    volatile int64_t *vp, int64_t old, volatile int64_t new)
 WT_ATOMIC_CAS_FUNC(size, size_t *vp, size_t old, size_t new)
 
 /*
@@ -130,29 +132,24 @@ WT_ATOMIC_CAS_FUNC(size, size_t *vp, size_t old, size_t new)
 static inline bool
 __wt_atomic_cas_ptr(void *vp, void *old, void *new)
 {
-	return (WT_ATOMIC_CAS((void **)vp, old, new));
+	return (WT_ATOMIC_CAS((void **)vp, &old, new));
 }
 
 #define	WT_ATOMIC_FUNC(name, ret, vp_arg, v_arg)			\
 static inline ret							\
 __wt_atomic_add##name(vp_arg, v_arg)					\
 {									\
-	return (__sync_add_and_fetch(vp, v));				\
+	return (__atomic_add_fetch(vp, v, __ATOMIC_SEQ_CST));		\
 }									\
 static inline ret							\
 __wt_atomic_fetch_add##name(vp_arg, v_arg)				\
 {									\
-	return (__sync_fetch_and_add(vp, v));				\
-}									\
-static inline ret							\
-__wt_atomic_store##name(vp_arg, v_arg)					\
-{									\
-	return (__sync_lock_test_and_set(vp, v));			\
+	return (__atomic_fetch_add(vp, v, __ATOMIC_SEQ_CST));		\
 }									\
 static inline ret							\
 __wt_atomic_sub##name(vp_arg, v_arg)					\
 {									\
-	return (__sync_sub_and_fetch(vp, v));				\
+	return (__atomic_sub_fetch(vp, v, __ATOMIC_SEQ_CST));		\
 }
 WT_ATOMIC_FUNC(8, uint8_t, uint8_t *vp, uint8_t v)
 WT_ATOMIC_FUNC(16, uint16_t, uint16_t *vp, uint16_t v)
diff --git a/src/third_party/wiredtiger/src/include/lint.h b/src/third_party/wiredtiger/src/include/lint.h
index 903b0238b37..5d7cee531c2 100644
--- a/src/third_party/wiredtiger/src/include/lint.h
+++ b/src/third_party/wiredtiger/src/include/lint.h
@@ -35,15 +35,6 @@ __wt_atomic_fetch_add##name(type *vp, type v)				\
 	return (orig);							\
 }									\
 static inline ret							\
-__wt_atomic_store##name(type *vp, type v)				\
-{									\
-	type orig;							\
-									\
-	orig = *vp;							\
-	*vp = v;							\
-	return (orig);							\
-}									\
-static inline ret							\
 __wt_atomic_sub##name(type *vp, type v)					\
 {									\
 	*vp -= v;							\
diff --git a/src/third_party/wiredtiger/src/include/msvc.h b/src/third_party/wiredtiger/src/include/msvc.h
index 1586dae22b8..f4d8dc942f6 100644
--- a/src/third_party/wiredtiger/src/include/msvc.h
+++ b/src/third_party/wiredtiger/src/include/msvc.h
@@ -45,11 +45,6 @@ __wt_atomic_fetch_add##name(type *vp, type v)				\
 	return (_InterlockedExchangeAdd ## s((t *)(vp), (t)(v)));	\
 }									\
 static inline ret							\
-__wt_atomic_store##name(type *vp, type v)				\
-{									\
-	return (_InterlockedExchange ## s((t *)(vp), (t)(v)));		\
-}									\
-static inline ret							\
 __wt_atomic_sub##name(type *vp, type v)					\
 {									\
 	return (_InterlockedExchangeAdd ## s((t *)(vp), - (t)v) - (v));	\
diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i
index 15e7218dd28..660ee22ed96 100644
--- a/src/third_party/wiredtiger/src/include/mutex.i
+++ b/src/third_party/wiredtiger/src/include/mutex.i
@@ -68,7 +68,7 @@ __wt_spin_trylock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
 {
 	WT_UNUSED(session);
 
-	return (__sync_lock_test_and_set(&t->lock, 1) == 0 ? 0 : EBUSY);
+	return (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE) ? 0 : EBUSY);
 }
 
 /*
@@ -82,7 +82,7 @@ __wt_spin_lock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
 
 	WT_UNUSED(session);
 
-	while (__sync_lock_test_and_set(&t->lock, 1)) {
+	while (__atomic_test_and_set(&t->lock, __ATOMIC_ACQUIRE)) {
 		for (i = 0; t->lock && i < WT_SPIN_COUNT; i++)
 			WT_PAUSE();
 		if (t->lock)
@@ -99,7 +99,7 @@ __wt_spin_unlock(WT_SESSION_IMPL *session, WT_SPINLOCK *t)
 {
 	WT_UNUSED(session);
 
-	__sync_lock_release(&t->lock);
+	__atomic_clear(&t->lock, __ATOMIC_RELEASE);
 }
 
 #elif SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX ||			\
diff --git a/src/third_party/wiredtiger/src/include/reconcile.h b/src/third_party/wiredtiger/src/include/reconcile.h
index be6440c27bc..c3c46ec11c5 100644
--- a/src/third_party/wiredtiger/src/include/reconcile.h
+++ b/src/third_party/wiredtiger/src/include/reconcile.h
@@ -20,12 +20,6 @@ struct __wt_reconcile {
 	uint32_t flags;			/* Caller's configuration */
 
 	/*
-	 * Track start/stop write generation to decide if all changes to the
-	 * page are written.
-	 */
-	uint32_t orig_write_gen;
-
-	/*
 	 * Track start/stop checkpoint generations to decide if lookaside table
 	 * records are correct.
 	 */
diff --git a/src/third_party/wiredtiger/src/include/serial.i b/src/third_party/wiredtiger/src/include/serial.i
index 1c67a84adbf..701f73df84f 100644
--- a/src/third_party/wiredtiger/src/include/serial.i
+++ b/src/third_party/wiredtiger/src/include/serial.i
@@ -7,29 +7,6 @@
  */
 
 /*
- * __page_write_gen_wrapped_check --
- *	Confirm the page's write generation number won't wrap.
- */
-static inline int
-__page_write_gen_wrapped_check(WT_PAGE *page)
-{
-	/*
-	 * Check to see if the page's write generation is about to wrap (wildly
-	 * unlikely as it implies 4B updates between clean page reconciliations,
-	 * but technically possible), and fail the update.
-	 *
-	 * The check is outside of the serialization mutex because the page's
-	 * write generation is going to be a hot cache line, so technically it's
-	 * possible for the page's write generation to wrap between the test and
-	 * our subsequent modification of it.  However, the test is (4B-1M), and
-	 * there cannot be a million threads that have done the test but not yet
-	 * completed their modification.
-	 */
-	return (page->modify->write_gen >
-	    UINT32_MAX - WT_MILLION ? WT_RESTART : 0);
-}
-
-/*
  * __insert_simple_func --
  *	Worker function to add a WT_INSERT entry to the middle of a skiplist.
  */
@@ -163,9 +140,6 @@ __wt_col_append_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
 	new_ins = *new_insp;
 	*new_insp = NULL;
 
-	/* Check for page write generation wrap. */
-	WT_RET(__page_write_gen_wrapped_check(page));
-
 	/*
 	 * Acquire the page's spinlock unless we already have exclusive access.
 	 * Then call the worker function.
@@ -215,9 +189,6 @@ __wt_insert_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
 	new_ins = *new_insp;
 	*new_insp = NULL;
 
-	/* Check for page write generation wrap. */
-	WT_RET(__page_write_gen_wrapped_check(page));
-
 	simple = true;
 	for (i = 0; i < skipdepth; i++)
 		if (new_ins->next[i] == NULL)
@@ -272,9 +243,6 @@ __wt_update_serial(WT_SESSION_IMPL *session, WT_PAGE *page,
 	upd = *updp;
 	*updp = NULL;
 
-	/* Check for page write generation wrap. */
-	WT_RET(__page_write_gen_wrapped_check(page));
-
 	/*
 	 * All structure setup must be flushed before the structure is entered
 	 * into the list. We need a write barrier here, our callers depend on
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index ce58f9f7301..e9c6f7f8e9d 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -232,8 +232,6 @@ __wt_txn_resolve_prepared_op(
 			continue;
 		if (upd->txnid != txn->id)
 			break;
-		if (op->u.op_upd == NULL)
-			op->u.op_upd = upd;
 
 		++(*resolved_update_countp);
 
@@ -844,6 +842,7 @@ __wt_txn_upd_visible_type(WT_SESSION_IMPL *session, WT_UPDATE *upd)
 
 	return (WT_VISIBLE_TRUE);
 }
+
 /*
  * __wt_txn_upd_durable --
  *	Can the current transaction make the given update durable.
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index f7eeada4c8e..477894bcf14 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -456,14 +456,20 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 		}
 
 		/*
-		 * The page only might be clean; if the write generation is
-		 * unchanged since reconciliation started, it's clean.
+		 * We set the page state to mark it as having been dirtied for
+		 * the first time prior to reconciliation. A failed atomic cas
+		 * indicates that an update has taken place during
+		 * reconciliation.
 		 *
-		 * If the write generation changed, the page has been written
-		 * since reconciliation started and remains dirty (that can't
-		 * happen when evicting, the page is exclusively locked).
+		 * The page only might be clean; if the page state is unchanged
+		 * since reconciliation started, it's clean.
+		 *
+		 * If the page state changed, the page has been written since
+		 * reconciliation started and remains dirty (that can't happen
+		 * when evicting, the page is exclusively locked).
 		 */
-		if (__wt_atomic_cas32(&mod->write_gen, r->orig_write_gen, 0))
+		if (__wt_atomic_cas32(
+		    &mod->page_state, WT_PAGE_DIRTY_FIRST, WT_PAGE_CLEAN))
 			__wt_cache_dirty_decr(session, page);
 		else
 			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
@@ -602,13 +608,22 @@ __rec_init(WT_SESSION_IMPL *session,
 	r->page = page;
 
 	/*
-	 * Save the page's write generation before reading the page.
 	 * Save the transaction generations before reading the page.
 	 * These are all ordered reads, but we only need one.
 	 */
 	r->orig_btree_checkpoint_gen = btree->checkpoint_gen;
 	r->orig_txn_checkpoint_gen = __wt_gen(session, WT_GEN_CHECKPOINT);
-	WT_ORDERED_READ(r->orig_write_gen, page->modify->write_gen);
+
+	/*
+	 * Update the page state to indicate that all currently installed
+	 * updates will be included in this reconciliation if it would mark the
+	 * page clean.
+	 *
+	 * Add a write barrier to make it more likely that a thread adding an
+	 * update will see this state change.
+	 */
+	page->modify->page_state = WT_PAGE_DIRTY_FIRST;
+	WT_FULL_BARRIER();
 
 	/*
 	 * Cache the oldest running transaction ID.  This is used to check
diff --git a/src/third_party/wiredtiger/src/support/modify.c b/src/third_party/wiredtiger/src/support/modify.c
index be1b1970da6..e8260cb41b6 100644
--- a/src/third_party/wiredtiger/src/support/modify.c
+++ b/src/third_party/wiredtiger/src/support/modify.c
@@ -8,28 +8,60 @@
 
 #include "wt_internal.h"
 
+#define	WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied) do {	\
+	const size_t *__p = p;						\
+	const uint8_t *__data =						\
+	    (const uint8_t *)(__p + (size_t)(nentries) * 3);		\
+	int __i;							\
+	for (__i = 0; __i < (nentries); ++__i) {			\
+		memcpy(&(mod).data.size, __p++, sizeof(size_t));	\
+		memcpy(&(mod).offset, __p++, sizeof(size_t));		\
+		memcpy(&(mod).size, __p++, sizeof(size_t));		\
+		(mod).data.data = __data;				\
+		__data += (mod).data.size;				\
+		if (__i < (napplied))					\
+			continue;
+
+#define	WT_MODIFY_FOREACH_REVERSE(mod, p, nentries, napplied, datasz) do {\
+	const size_t *__p = (p) + (size_t)(nentries) * 3;		\
+	const uint8_t *__data = (const uint8_t *)__p + datasz;		\
+	int __i;							\
+	for (__i = (napplied); __i < (nentries); ++__i) {		\
+		memcpy(&(mod).size, --__p, sizeof(size_t));		\
+		memcpy(&(mod).offset, --__p, sizeof(size_t));		\
+		memcpy(&(mod).data.size, --__p, sizeof(size_t));	\
+		(mod).data.data = (__data -= (mod).data.size);
+
+#define	WT_MODIFY_FOREACH_END						\
+	}								\
+} while (0)
+
 /*
  * __wt_modify_pack --
  *	Pack a modify structure into a buffer.
  */
 int
-__wt_modify_pack(WT_SESSION_IMPL *session,
+__wt_modify_pack(WT_CURSOR *cursor,
     WT_ITEM **modifyp, WT_MODIFY *entries, int nentries)
 {
 	WT_ITEM *modify;
-	size_t len, *p;
+	WT_SESSION_IMPL *session;
+	size_t diffsz, len, *p;
 	uint8_t *data;
 	int i;
 
+	session = (WT_SESSION_IMPL *)cursor->session;
+
 	/*
 	 * Build the in-memory modify value. It's the entries count, followed
 	 * by the modify structure offsets written in order, followed by the
 	 * data (data at the end to minimize unaligned reads/writes).
 	 */
 	len = sizeof(size_t);                           /* nentries */
-	for (i = 0; i < nentries; ++i) {
+	for (i = 0, diffsz = 0; i < nentries; ++i) {
 		len += 3 * sizeof(size_t);              /* WT_MODIFY fields */
 		len += entries[i].data.size;            /* data */
+		diffsz += entries[i].size;		/* bytes touched */
 	}
 
 	WT_RET(__wt_scr_alloc(session, len, &modify));
@@ -48,6 +80,18 @@ __wt_modify_pack(WT_SESSION_IMPL *session,
 	}
 	modify->size = WT_PTRDIFF(data, modify->data);
 	*modifyp = modify;
+
+	/*
+	 * Update statistics. This is the common path called by
+	 * WT_CURSOR::modify implementations.
+	 */
+	WT_STAT_CONN_INCR(session, cursor_modify);
+	WT_STAT_DATA_INCR(session, cursor_modify);
+	WT_STAT_CONN_INCRV(session, cursor_modify_bytes, cursor->value.size);
+	WT_STAT_DATA_INCRV(session, cursor_modify_bytes, cursor->value.size);
+	WT_STAT_CONN_INCRV(session, cursor_modify_bytes_touch, diffsz);
+	WT_STAT_DATA_INCRV(session, cursor_modify_bytes_touch, diffsz);
+
 	return (0);
 }
 
@@ -56,51 +100,46 @@ __wt_modify_pack(WT_SESSION_IMPL *session,
  *	Apply a single modify structure change to the buffer.
  */
 static int
-__modify_apply_one(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
-    size_t data_size, size_t offset, size_t size, const uint8_t *data)
+__modify_apply_one(
+    WT_SESSION_IMPL *session, WT_ITEM *value, WT_MODIFY *modify, bool sformat)
 {
-	WT_ITEM *value;
-	size_t len;
-	uint8_t *from, *to;
-	bool sformat;
+	size_t data_size, item_offset, offset, size;
+	const uint8_t *data, *from;
+	uint8_t *to;
 
-	value = &cursor->value;
-	sformat = cursor->value_format[0] == 'S';
+	data = modify->data.data;
+	data_size = modify->data.size;
+	offset = modify->offset;
+	size = modify->size;
 
 	/*
 	 * Grow the buffer to the maximum size we'll need. This is pessimistic
 	 * because it ignores replacement bytes, but it's a simpler calculation.
 	 *
-	 * Grow the buffer before we fast-path the expected case. This function
-	 * is often called using a cursor buffer referencing on-page memory and
-	 * it's easy to overwrite a page. A side-effect of growing the buffer is
-	 * to ensure the buffer's value is in buffer-local memory.
+	 * Grow the buffer first. This function is often called using a cursor
+	 * buffer referencing on-page memory and it's easy to overwrite a page.
+	 * A side-effect of growing the buffer is to ensure the buffer's value
+	 * is in buffer-local memory.
 	 *
 	 * Because the buffer may reference an overflow item, the data may not
 	 * start at the start of the buffer's memory and we have to correct for
 	 * that.
 	 */
-	len = WT_DATA_IN_ITEM(value) ? WT_PTRDIFF(value->data, value->mem) : 0;
-	WT_RET(__wt_buf_grow(session, value,
-	    len + WT_MAX(value->size, offset) + data_size + (sformat ? 1 : 0)));
+	item_offset =
+	    WT_DATA_IN_ITEM(value) ? WT_PTRDIFF(value->data, value->mem) : 0;
+	WT_RET(__wt_buf_grow(session, value, item_offset +
+	    WT_MAX(value->size, offset) + data_size + (sformat ? 1 : 0)));
 
 	/*
-	 * Fast-path the expected case, where we're overwriting a set of bytes
+	 * Fast-path the common case, where we're overwriting a set of bytes
 	 * that already exist in the buffer.
 	 */
 	if (value->size > offset + data_size && data_size == size) {
-		memmove((uint8_t *)value->data + offset, data, data_size);
+		memcpy((uint8_t *)value->data + offset, data, data_size);
 		return (0);
 	}
 
 	/*
-	 * Decrement the size to discard the trailing nul (done after growing
-	 * the buffer to ensure it can be restored without further checking).
-	 */
-	if (sformat)
-		--value->size;
-
-	/*
 	 * If appending bytes past the end of the value, initialize gap bytes
 	 * and copy the new bytes into place.
 	 */
@@ -108,12 +147,8 @@ __modify_apply_one(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
 		if (value->size < offset)
 			memset((uint8_t *)value->data + value->size,
 			    sformat ? ' ' : 0, offset - value->size);
-		memmove((uint8_t *)value->data + offset, data, data_size);
+		memcpy((uint8_t *)value->data + offset, data, data_size);
 		value->size = offset + data_size;
-
-		/* Restore the trailing nul. */
-		if (sformat)
-			((char *)value->data)[value->size++] = '\0';
 		return (0);
 	}
 
@@ -125,9 +160,12 @@ __modify_apply_one(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
 	if (value->size < offset + size)
 		size = value->size - offset;
 
+	WT_ASSERT(session, value->size + (data_size - size) +
+	    (sformat ? 1 : 0) <= value->memsize);
+
 	if (data_size == size) {			/* Overwrite */
 		/* Copy in the new data. */
-		memmove((uint8_t *)value->data + offset, data, data_size);
+		memcpy((uint8_t *)value->data + offset, data, data_size);
 
 		/*
 		 * The new data must overlap the buffer's end (else, we'd use
@@ -137,7 +175,7 @@ __modify_apply_one(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
 		value->size = offset + data_size;
 	} else {					/* Shrink or grow */
 		/* Move trailing data forward/backward to its new location. */
-		from = (uint8_t *)value->data + (offset + size);
+		from = (const uint8_t *)value->data + (offset + size);
 		WT_ASSERT(session, WT_DATA_IN_ITEM(value) &&
 		    from + (value->size - (offset + size)) <=
 		    (uint8_t *)value->mem + value->memsize);
@@ -148,7 +186,7 @@ __modify_apply_one(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
 		memmove(to, from, value->size - (offset + size));
 
 		/* Copy in the new data. */
-		memmove((uint8_t *)value->data + offset, data, data_size);
+		memcpy((uint8_t *)value->data + offset, data, data_size);
 
 		/*
 		 * Correct the size. This works because of how the C standard
@@ -165,49 +203,134 @@ __modify_apply_one(WT_SESSION_IMPL *session, WT_CURSOR *cursor,
 		 value->size += (data_size - size);
 	}
 
-	/* Restore the trailing nul. */
-	if (sformat)
-		((char *)value->data)[value->size++] = '\0';
-
 	return (0);
 }
 
 /*
- * __wt_modify_apply_api --
- *	Apply a single set of WT_MODIFY changes to a buffer, the cursor API
- * interface.
+ * __modify_fast_path --
+ *	Process a set of modifications, applying any that can be made in place,
+ *	and check if the remaining ones are sorted and non-overlapping.
  */
-int
-__wt_modify_apply_api(WT_SESSION_IMPL *session,
-    WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
-    WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+static void
+__modify_fast_path(
+    WT_ITEM *value, const size_t *p, int nentries,
+    int *nappliedp, bool *overlapp, size_t *dataszp, size_t *destszp)
 {
-	size_t modified;
-	int i;
+	WT_MODIFY current, prev;
+	size_t datasz, destoff;
+	bool fastpath, first;
+
+	*overlapp = true;
+
+	datasz = destoff = 0;
+	WT_CLEAR(current);
+	WT_CLEAR(prev);			/* [-Werror=maybe-uninitialized] */
 
-	for (modified = 0, i = 0; i < nentries; ++i) {
-		modified += entries[i].size;
-		WT_RET(__modify_apply_one(session, cursor, entries[i].data.size,
-		    entries[i].offset, entries[i].size, entries[i].data.data));
-	}
 	/*
-	 * This API is used by some external test functions with a NULL
-	 * session pointer - they don't expect statistics to be incremented.
+	 * If the modifications are sorted and don't overlap in the old or new
+	 * values, we can do a fast application of all the modifications
+	 * modifications in a single pass.
+	 *
+	 * The requirement for ordering is unfortunate, but modifications are
+	 * performed in order, and applications specify byte offsets based on
+	 * that. In other words, byte offsets are cumulative, modifications
+	 * that shrink or grow the data affect subsequent modification's byte
+	 * offsets.
 	 */
-	if (session != NULL) {
-		WT_STAT_CONN_INCR(session, cursor_modify);
-		WT_STAT_DATA_INCR(session, cursor_modify);
-		WT_STAT_CONN_INCRV(session,
-		    cursor_modify_bytes, cursor->value.size);
-		WT_STAT_DATA_INCRV(session,
-		    cursor_modify_bytes, cursor->value.size);
-		WT_STAT_CONN_INCRV(session,
-		    cursor_modify_bytes_touch, modified);
-		WT_STAT_DATA_INCRV(session,
-		    cursor_modify_bytes_touch, modified);
-	}
+	fastpath = first = true;
+	*nappliedp = 0;
+	WT_MODIFY_FOREACH_BEGIN(current, p, nentries, 0) {
+		datasz += current.data.size;
 
-	return (0);
+		if (fastpath && current.data.size == current.size &&
+		    current.offset + current.size <= value->size) {
+			memcpy((uint8_t *)value->data + current.offset,
+			    current.data.data, current.data.size);
+			++(*nappliedp);
+			continue;
+		}
+		fastpath = false;
+
+		/* Step over the bytes before the current block. */
+		if (first)
+			destoff = current.offset;
+		else {
+			/* Check that entries are sorted and non-overlapping. */
+			if (current.offset < prev.offset + prev.size ||
+			    current.offset < prev.offset + prev.data.size)
+				return;
+			destoff += current.offset - (prev.offset + prev.size);
+		}
+
+		/*
+		 * If the source is past the end of the current value, we have
+		 * to deal with padding bytes.  Don't try to fast-path padding
+		 * bytes; it's not common and adds branches to the loop
+		 * applying the changes.
+		 */
+		if (current.offset + current.size > value->size)
+			return;
+
+		/*
+		 * If copying this block overlaps with the next one, we can't
+		 * build the value in reverse order.
+		 */
+		if (current.size != current.data.size &&
+		    current.offset + current.size > destoff)
+			return;
+
+		/* Step over the current modification. */
+		destoff += current.data.size;
+
+		prev = current;
+		first = false;
+	} WT_MODIFY_FOREACH_END;
+
+	/* Step over the final unmodified block. */
+	destoff += value->size - (current.offset + current.size);
+
+	*overlapp = false;
+	*dataszp = datasz;
+	*destszp = destoff;
+	return;
+}
+
+/*
+ * __modify_apply_no_overlap --
+ *	Apply a single set of WT_MODIFY changes to a buffer, where the changes
+ *	are in sorted order and none of the changes overlap.
+ */
+static void
+__modify_apply_no_overlap(WT_SESSION_IMPL *session, WT_ITEM *value,
+    const size_t *p, int nentries, int napplied, size_t datasz, size_t destsz)
+{
+	WT_MODIFY current;
+	size_t sz;
+	const uint8_t *from;
+	uint8_t *to;
+
+	from = (const uint8_t *)value->data + value->size;
+	to = (uint8_t *)value->data + destsz;
+	WT_MODIFY_FOREACH_REVERSE(current, p, nentries, napplied, datasz) {
+		/* Move the current unmodified block into place if necessary. */
+		sz = WT_PTRDIFF(to, value->data) -
+		    (current.offset + current.data.size);
+		from -= sz;
+		to -= sz;
+		WT_ASSERT(session, from >= (const uint8_t *)value->data &&
+		    to >= (uint8_t *)value->data);
+		WT_ASSERT(session,
+		    from + sz <= (const uint8_t *)value->data + value->size);
+
+		if (to != from)
+			memmove(to, from, sz);
+
+		from -= current.size;
+		to -= current.data.size;
+		memcpy(to, current.data.data, current.data.size);
+	} WT_MODIFY_FOREACH_END;
+
+	value->size = destsz;
 }
 
 /*
@@ -215,31 +338,91 @@ __wt_modify_apply_api(WT_SESSION_IMPL *session,
  *	Apply a single set of WT_MODIFY changes to a buffer.
  */
 int
-__wt_modify_apply(
-    WT_SESSION_IMPL *session, WT_CURSOR *cursor, const void *modify)
+__wt_modify_apply(WT_CURSOR *cursor, const void *modify)
 {
-	size_t data_size, nentries, offset, size;
+	WT_ITEM *value;
+	WT_MODIFY mod;
+	WT_SESSION_IMPL *session;
+	size_t datasz, destsz, item_offset, tmp;
 	const size_t *p;
-	const uint8_t *data;
+	int napplied, nentries;
+	bool overlap, sformat;
+
+	session = (WT_SESSION_IMPL *)cursor->session;
+	sformat = cursor->value_format[0] == 'S';
+	value = &cursor->value;
 
 	/*
-	 * Get the number of entries, and set a second pointer to reference the
-	 * change data. The modify string isn't necessarily aligned for size_t
-	 * access, copy to be sure.
+	 * Get the number of modify entries and set a second pointer to
+	 * reference the replacement data.
 	 */
 	p = modify;
-	memcpy(&nentries, p++, sizeof(size_t));
-	data = (uint8_t *)modify +
-	    sizeof(size_t) + (nentries * 3 * sizeof(size_t));
-
-	/* Step through the list of entries, applying them in order. */
-	for (; nentries-- > 0; data += data_size) {
-		memcpy(&data_size, p++, sizeof(size_t));
-		memcpy(&offset, p++, sizeof(size_t));
-		memcpy(&size, p++, sizeof(size_t));
-		WT_RET(__modify_apply_one(
-		    session, cursor, data_size, offset, size, data));
+	memcpy(&tmp, p++, sizeof(size_t));
+	nentries = (int)tmp;
+
+	/*
+	 * Grow the buffer first. This function is often called using a cursor
+	 * buffer referencing on-page memory and it's easy to overwrite a page.
+	 * A side-effect of growing the buffer is to ensure the buffer's value
+	 * is in buffer-local memory.
+	 *
+	 * Because the buffer may reference an overflow item, the data may not
+	 * start at the start of the buffer's memory and we have to correct for
+	 * that.
+	 */
+	item_offset = WT_DATA_IN_ITEM(value) ?
+	    WT_PTRDIFF(value->data, value->mem) : 0;
+	WT_RET(__wt_buf_grow(session, value, item_offset + value->size));
+
+	/*
+	 * Decrement the size to discard the trailing nul (done after growing
+	 * the buffer to ensure it can be restored without further checking).
+	 */
+	if (sformat)
+		--value->size;
+
+	__modify_fast_path(
+	    value, p, nentries, &napplied, &overlap, &datasz, &destsz);
+
+	if (napplied == nentries)
+		goto done;
+
+	if (!overlap) {
+		/* Grow the buffer first, correcting for the data offset. */
+		WT_RET(__wt_buf_grow(session, value, item_offset +
+		    WT_MAX(destsz, value->size) + (sformat ? 1 : 0)));
+
+		__modify_apply_no_overlap(
+		    session, value, p, nentries, napplied, datasz, destsz);
+		goto done;
 	}
 
+	WT_MODIFY_FOREACH_BEGIN(mod, p, nentries, napplied) {
+		WT_RET(__modify_apply_one(session, value, &mod, sformat));
+	} WT_MODIFY_FOREACH_END;
+
+done:	/* Restore the trailing nul. */
+	if (sformat)
+		((char *)value->data)[value->size++] = '\0';
+
 	return (0);
 }
+
+/*
+ * __wt_modify_apply_api --
+ *	Apply a single set of WT_MODIFY changes to a buffer, the cursor API
+ *	interface.
+ */
+int
+__wt_modify_apply_api(WT_CURSOR *cursor, WT_MODIFY *entries, int nentries)
+    WT_GCC_FUNC_ATTRIBUTE((visibility("default")))
+{
+	WT_DECL_ITEM(modify);
+	WT_DECL_RET;
+
+	WT_ERR(__wt_modify_pack(cursor, &modify, entries, nentries));
+	WT_ERR(__wt_modify_apply(cursor, modify->data));
+
+err:	__wt_scr_free((WT_SESSION_IMPL *)cursor->session, &modify);
+	return (ret);
+}
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index f888a470c8c..b3085080956 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -664,13 +664,15 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
 	WT_TXN *txn;
 	WT_TXN_OP *op;
 	WT_UPDATE *upd;
-	wt_timestamp_t op_timestamp;
+	wt_timestamp_t durable_op_timestamp, op_timestamp, prev_op_timestamp;
 	u_int i;
 	const char *open_cursor_cfg[] = {
 	    WT_CONFIG_BASE(session, WT_SESSION_open_cursor), NULL };
 	bool op_zero_ts, upd_zero_ts;
 
 	txn = &session->txn;
+	cursor = NULL;
+	durable_op_timestamp = prev_op_timestamp = WT_TS_NONE;
 
 	/*
 	 * Debugging checks on timestamps, if user requested them.
@@ -728,13 +730,15 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
 				WT_WITH_BTREE(session, op->btree,
 				    ret = __wt_btcur_search_uncommitted(
 				    (WT_CURSOR_BTREE *)cursor, &upd));
-					WT_TRET(cursor->close(cursor));
 				if (ret != 0)
 					WT_RET_MSG(session, EINVAL,
 					    "prepared update restore failed");
-				op->u.op_upd = upd;
 			} else
-				upd = op->u.op_upd->next;
+				upd = op->u.op_upd;
+
+			WT_ASSERT(session, upd != NULL);
+			op_timestamp = upd->start_ts;
+
 			/*
 			 * Skip over any aborted update structures, internally
 			 * created update structures or ones from our own
@@ -749,6 +753,22 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
 			 * first valid update in the chain. They're in
 			 * most recent order.
 			 */
+			if (upd != NULL) {
+				prev_op_timestamp = upd->start_ts;
+				durable_op_timestamp = upd->durable_ts;
+			}
+
+			/*
+			 * We no longer need to access the update structure so
+			 * it's safe to release our reference to the page.
+			 */
+			if (cursor != NULL) {
+				WT_ASSERT(
+				    session, F_ISSET(txn, WT_TXN_PREPARE));
+				WT_RET(cursor->close(cursor));
+				cursor = NULL;
+			}
+
 			if (upd == NULL)
 				continue;
 			/*
@@ -760,7 +780,7 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
 			 * Check timestamps are used in order.
 			 */
 			op_zero_ts = !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT);
-			upd_zero_ts = upd->start_ts == WT_TS_NONE;
+			upd_zero_ts = prev_op_timestamp == WT_TS_NONE;
 			if (op_zero_ts != upd_zero_ts)
 				WT_RET_MSG(session, EINVAL,
 				    "per-key timestamps used inconsistently");
@@ -772,7 +792,6 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
 			if (op_zero_ts)
 				continue;
 
-			op_timestamp = op->u.op_upd->start_ts;
 			/*
 			 * Only if the update structure doesn't have a timestamp
 			 * then use the one in the transaction structure.
@@ -780,11 +799,11 @@ __txn_commit_timestamps_assert(WT_SESSION_IMPL *session)
 			if (op_timestamp == WT_TS_NONE)
 				op_timestamp = txn->commit_timestamp;
 			if (F_ISSET(txn, WT_TXN_TS_COMMIT_KEYS) &&
-				op_timestamp < upd->start_ts)
+				op_timestamp < prev_op_timestamp)
 				WT_RET_MSG(session, EINVAL,
 				    "out of order commit timestamps");
 			if (F_ISSET(txn, WT_TXN_TS_DURABLE_KEYS) &&
-			    txn->durable_timestamp < upd->durable_ts)
+			    txn->durable_timestamp < durable_op_timestamp)
 				WT_RET_MSG(session, EINVAL,
 				    "out of order durable timestamps");
 		}
diff --git a/src/third_party/wiredtiger/src/txn/txn_recover.c b/src/third_party/wiredtiger/src/txn/txn_recover.c
index a5e3e139178..504b2c0e8b4 100644
--- a/src/third_party/wiredtiger/src/txn/txn_recover.c
+++ b/src/third_party/wiredtiger/src/txn/txn_recover.c
@@ -155,7 +155,7 @@ __txn_op_apply(
 			 * than using cursor modify to create a partial update
 			 * (for no particular reason than simplicity).
 			 */
-			WT_ERR(__wt_modify_apply(session, cursor, value.data));
+			WT_ERR(__wt_modify_apply(cursor, value.data));
 			WT_ERR(cursor->insert(cursor));
 		}
 		break;
@@ -222,7 +222,7 @@ __txn_op_apply(
 			 * than using cursor modify to create a partial update
 			 * (for no particular reason than simplicity).
 			 */
-			WT_ERR(__wt_modify_apply(session, cursor, value.data));
+			WT_ERR(__wt_modify_apply(cursor, value.data));
 			WT_ERR(cursor->insert(cursor));
 		}
 		break;
diff --git a/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c b/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c
index 879a8e96c6a..5a413c0df3b 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3338_partial_update/main.c
@@ -37,7 +37,7 @@
 #define	DATASIZE		1024
 #define	MAX_MODIFY_ENTRIES	37		/* Maximum modify vectors */
 
-static WT_MODIFY entries[1000];			/* Entries vector */
+static WT_MODIFY entries[MAX_MODIFY_ENTRIES];	/* Entries vector */
 static int nentries;				/* Entries count */
 
 /*
@@ -50,7 +50,6 @@ static char modify_repl[MAX_REPL_BYTES * 2];		/* Replacement bytes */
 
 static WT_RAND_STATE rnd;				/* RNG state */
 
-#if DEBUG
 /*
  * show --
  *	Dump out a buffer.
@@ -62,15 +61,10 @@ show(WT_ITEM *buf, const char *tag)
 	const uint8_t *a;
 
 	fprintf(stderr, "%s: %" WT_SIZET_FMT " bytes\n\t", tag, buf->size);
-	for (a = buf->data, i = 0; i < buf->size; ++i, ++a) {
-		if (isprint(*a))
-			fprintf(stderr, " %c", *a);
-		else
-			fprintf(stderr, " %#x", *a);
-	}
+	for (a = buf->data, i = 0; i < buf->size; ++i, ++a)
+		fprintf(stderr, " %c", isprint(*a) ? *a : '.');
 	fprintf(stderr, "\n");
 }
-#endif
 
 /*
  * modify_repl_init --
@@ -82,7 +76,7 @@ modify_repl_init(void)
 	size_t i;
 
 	for (i = 0; i < sizeof(modify_repl); ++i)
-		modify_repl[i] = "zyxwvutsrqponmlkjihgfedcba"[i % 26];
+		modify_repl[i] = 'Z' - (i % 26);
 }
 
 /*
@@ -95,13 +89,13 @@ modify_build(void)
 	int i;
 
 	/* Mess up the entries. */
-	memset(entries, 0xff, MAX_MODIFY_ENTRIES * sizeof(entries[0]));
+	memset(entries, 0xff, sizeof(entries));
 
 	/*
 	 * Randomly select a number of byte changes, offsets and lengths.
 	 * Allow a value of 0, the API should accept it.
 	 */
-	nentries = (int)(__wt_random(&rnd) % MAX_MODIFY_ENTRIES);
+	nentries = (int)(__wt_random(&rnd) % (MAX_MODIFY_ENTRIES + 1));
 	for (i = 0; i < nentries; ++i) {
 		entries[i].data.data =
 		    modify_repl + __wt_random(&rnd) % MAX_REPL_BYTES;
@@ -115,7 +109,7 @@ modify_build(void)
 		printf(
 		    "%d: {%.*s} %" WT_SIZET_FMT " bytes replacing %"
 		    WT_SIZET_FMT " bytes @ %" WT_SIZET_FMT "\n",
-		    i, (int)entries[i].data.size, entries[i].data.data,
+		    i, (int)entries[i].data.size, (char *)entries[i].data.data,
 		    entries[i].data.size, entries[i].size, entries[i].offset);
 #endif
 }
@@ -217,16 +211,29 @@ slow_apply_api(WT_ITEM *orig)
  *	Compare two results.
  */
 static void
-compare(WT_ITEM *local, WT_ITEM *library)
+compare(WT_ITEM *orig, WT_ITEM *local, WT_ITEM *library)
 {
-#if DEBUG
+	size_t i, max;
+	const uint8_t *p, *t;
+
+	max = WT_MIN(local->size, library->size);
 	if (local->size != library->size ||
 	    memcmp(local->data, library->data, local->size) != 0) {
-		fprintf(stderr, "results differ\n");
+		for (i = 0,
+		    p = local->data, t = library->data; i < max; ++i, ++p, ++t)
+			if (*p != *t)
+				break;
+		fprintf(stderr, "results differ: ");
+		if (max == 0)
+			fprintf(stderr,
+			    "identical up to %" WT_SIZET_FMT " bytes\n", max);
+		else
+			fprintf(stderr,
+			    "first mismatch at offset %" WT_SIZET_FMT "\n", i);
+		show(orig, "original");
 		show(local, "local results");
 		show(library, "library results");
 	}
-#endif
 	testutil_assert(
 	    local->size == library->size && memcmp(
 	    local->data, library->data, local->size) == 0);
@@ -250,16 +257,22 @@ compare(WT_ITEM *local, WT_ITEM *library)
  *	calculate-modify API.
  */
 static void
-modify_run(bool verbose)
+modify_run(TEST_OPTS *opts)
 {
 	WT_CURSOR *cursor, _cursor;
 	WT_DECL_RET;
 	WT_ITEM *localA, _localA, *localB, _localB;
+	WT_SESSION_IMPL *session;
 	size_t len;
 	int i, j;
+	u_char *p;
+	bool verbose;
+
+	session = (WT_SESSION_IMPL *)opts->session;
+	verbose = opts->verbose;
 
 	/* Initialize the RNG. */
-	__wt_random_init_seed(NULL, &rnd);
+	__wt_random_init_seed(session, &rnd);
 
 	/* Set up replacement information. */
 	modify_repl_init();
@@ -271,18 +284,24 @@ modify_run(bool verbose)
 	memset(&_localB, 0, sizeof(_localB));
 	cursor = &_cursor;
 	memset(&_cursor, 0, sizeof(_cursor));
+	cursor->session = (WT_SESSION *)session;
 	cursor->value_format = "u";
 
 #define	NRUNS	10000
 	for (i = 0; i < NRUNS; ++i) {
 		/* Create an initial value. */
 		len = (size_t)(__wt_random(&rnd) % MAX_REPL_BYTES);
-		testutil_check(__wt_buf_set(NULL, localA, modify_repl, len));
+		testutil_check(__wt_buf_set(session, localA, modify_repl, len));
 
 		for (j = 0; j < 1000; ++j) {
+			/* Make lower case so modifications are easy to see. */
+			for (p = localA->mem;
+			    WT_PTRDIFF(p, localA->mem) < localA->size; p++)
+				*p = __wt_tolower(*p);
+
 			/* Copy the current value into the second item. */
 			testutil_check(__wt_buf_set(
-			    NULL, localB, localA->data, localA->size));
+			    session, localB, localA->data, localA->size));
 
 			/*
 			 * Create a random set of modify vectors, run the
@@ -291,12 +310,12 @@ modify_run(bool verbose)
 			 * of modify.
 			 */
 			modify_build();
-			testutil_check(__wt_buf_set(
-			    NULL, &cursor->value, localA->data, localA->size));
+			testutil_check(__wt_buf_set(session,
+			    &cursor->value, localA->data, localA->size));
 			testutil_check(__wt_modify_apply_api(
-			    NULL, cursor, entries, nentries));
+			    cursor, entries, nentries));
 			slow_apply_api(localA);
-			compare(localA, &cursor->value);
+			compare(localB, localA, &cursor->value);
 
 			/*
 			 * Call the WiredTiger function to build a modification
@@ -305,18 +324,18 @@ modify_run(bool verbose)
 			 * against our implementation of modify.
 			 */
 			nentries = WT_ELEMENTS(entries);
-			ret = wiredtiger_calc_modify(NULL,
+			ret = wiredtiger_calc_modify(opts->session,
 			    localB, localA,
 			    WT_MAX(localB->size, localA->size) + 100,
 			    entries, &nentries);
 			if (ret == WT_NOTFOUND)
 				continue;
 			testutil_check(ret);
-			testutil_check(__wt_buf_set(
-			    NULL, &cursor->value, localB->data, localB->size));
+			testutil_check(__wt_buf_set(session,
+			    &cursor->value, localB->data, localB->size));
 			testutil_check(__wt_modify_apply_api(
-			    NULL, cursor, entries, nentries));
-			compare(localA, &cursor->value);
+			    cursor, entries, nentries));
+			compare(localB, localA, &cursor->value);
 		}
 		if (verbose) {
 			printf("%d (%d%%)\r", i, (i * 100) / NRUNS);
@@ -326,9 +345,9 @@ modify_run(bool verbose)
 	if (verbose)
 		printf("%d (100%%)\n", i);
 
-	__wt_buf_free(NULL, localA);
-	__wt_buf_free(NULL, localB);
-	__wt_buf_free(NULL, &cursor->value);
+	__wt_buf_free(session, localA);
+	__wt_buf_free(session, localB);
+	__wt_buf_free(session, &cursor->value);
 }
 
 int
@@ -342,9 +361,11 @@ main(int argc, char *argv[])
 	testutil_make_work_dir(opts->home);
 	testutil_check(
 	    wiredtiger_open(opts->home, NULL, "create", &opts->conn));
+	testutil_check(
+	    opts->conn->open_session(opts->conn, NULL, NULL, &opts->session));
 
 	/* Run the test. */
-	modify_run(opts->verbose);
+	modify_run(opts);
 
 	testutil_cleanup(opts);
 	return (EXIT_SUCCESS);
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 7cba583b2b4..9d97a2d0428 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -70,6 +70,7 @@ typedef struct {
 	char *home_config;			/* Run CONFIG file path */
 	char *home_init;			/* Initialize home command */
 	char *home_log;				/* Operation log file path */
+	char *home_pagedump;			/* Page dump filename */
 	char *home_rand;			/* RNG log file path */
 	char *home_salvage_copy;		/* Salvage copy command */
 	char *home_stats;			/* Statistics file path */
@@ -266,6 +267,8 @@ typedef enum { NEXT, PREV, SEARCH, SEARCH_NEAR } read_operation;
 
 typedef struct {
 	thread_op op;				/* Operation */
+	uint64_t  opid;				/* Operation ID */
+
 	uint64_t  keyno;			/* Row number */
 
 	uint64_t  ts;				/* Read/commit timestamp */
@@ -311,6 +314,8 @@ typedef struct {
 	WT_ITEM	 *lastkey, _lastkey;
 
 	bool repeatable_reads;			/* if read ops repeatable */
+	bool repeatable_wrap;			/* if circular buffer wrapped */
+	uint64_t opid;				/* Operation ID */
 	uint64_t read_ts;			/* read timestamp */
 	uint64_t commit_ts;			/* commit timestamp */
 	SNAP_OPS *snap, *snap_first, snap_list[512];
@@ -348,10 +353,9 @@ void	 key_gen_teardown(WT_ITEM *);
 void	 key_init(void);
 WT_THREAD_RET lrt(void *);
 void	 path_setup(const char *);
-void	 print_item(const char *, WT_ITEM *);
-void	 print_item_data(const char *, const uint8_t *, size_t);
 int	 read_row_worker(WT_CURSOR *, uint64_t, WT_ITEM *, WT_ITEM *, bool);
 uint32_t rng(WT_RAND_STATE *);
+void	 snap_init(TINFO *, uint64_t, bool);
 void	 snap_repeat_single(WT_CURSOR *, TINFO *);
 int	 snap_repeat_txn(WT_CURSOR *, TINFO *);
 void	 snap_repeat_update(TINFO *, bool);
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index a27dec3dd0c..7adfb795694 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -338,11 +338,10 @@ begin_transaction_ts(TINFO *tinfo, u_int *iso_configp)
 		    buf, sizeof(buf), "read_timestamp=%" PRIx64, ts));
 		ret = session->timestamp_transaction(session, buf);
 		if (ret == 0) {
-			tinfo->read_ts = ts;
-			tinfo->repeatable_reads = true;
+			snap_init(tinfo, ts, true);
 			logop(session,
 			    "begin snapshot read-ts=%" PRIu64 " (repeatable)",
-			    tinfo->read_ts);
+			    ts);
 			return;
 		}
 		if (ret != EINVAL)
@@ -371,11 +370,9 @@ begin_transaction_ts(TINFO *tinfo, u_int *iso_configp)
 
 	testutil_check(pthread_rwlock_unlock(&g.ts_lock));
 
-	tinfo->read_ts = ts;
-	tinfo->repeatable_reads = false;
-
-	logop(session, "begin snapshot read-ts=%" PRIu64 " (not repeatable)",
-	    tinfo->read_ts);
+	snap_init(tinfo, ts, false);
+	logop(session,
+	    "begin snapshot read-ts=%" PRIu64 " (not repeatable)", ts);
 }
 
 /*
@@ -415,9 +412,7 @@ begin_transaction(TINFO *tinfo, u_int *iso_configp)
 
 	wiredtiger_begin_transaction(session, config);
 
-	tinfo->read_ts = WT_TS_NONE;
-	tinfo->repeatable_reads = false;
-
+	snap_init(tinfo, WT_TS_NONE, false);
 	logop(session, "begin %s", log);
 }
 
@@ -719,8 +714,6 @@ ops(void *arg)
 				begin_transaction_ts(tinfo, &iso_config);
 			else
 				begin_transaction(tinfo, &iso_config);
-
-			tinfo->snap_first = tinfo->snap;
 			intxn = true;
 		}
 
@@ -899,7 +892,7 @@ remove_instead_of_truncate:
 			 */
 			greater_than = mmrand(&tinfo->rnd, 0, 1) == 1;
 			range = g.rows < 20 ?
-			    1 : mmrand(&tinfo->rnd, 1, (u_int)g.rows / 20);
+			    0 : mmrand(&tinfo->rnd, 0, (u_int)g.rows / 20);
 			tinfo->last = tinfo->keyno;
 			if (greater_than) {
 				if (g.c_reverse) {
diff --git a/src/third_party/wiredtiger/test/format/snap.c b/src/third_party/wiredtiger/test/format/snap.c
index a1853c56db9..b38f6958f1c 100644
--- a/src/third_party/wiredtiger/test/format/snap.c
+++ b/src/third_party/wiredtiger/test/format/snap.c
@@ -29,6 +29,22 @@
 #include "format.h"
 
 /*
+ * snap_init --
+ *	Initialize the repeatable operation tracking.
+ */
+void
+snap_init(TINFO *tinfo, uint64_t read_ts, bool repeatable_reads)
+{
+	++tinfo->opid;
+
+	tinfo->snap_first = tinfo->snap;
+
+	tinfo->read_ts = read_ts;
+	tinfo->repeatable_reads = repeatable_reads;
+	tinfo->repeatable_wrap = false;
+}
+
+/*
  * snap_track --
  *     Add a single snapshot isolation returned value to the list.
  */
@@ -40,10 +56,12 @@ snap_track(TINFO *tinfo, thread_op op)
 
 	snap = tinfo->snap;
 	snap->op = op;
+	snap->opid = tinfo->opid;
 	snap->keyno = tinfo->keyno;
 	snap->ts = WT_TS_NONE;
 	snap->repeatable = false;
 	snap->last = op == TRUNCATE ? tinfo->last : 0;
+	snap->ksize = snap->vsize = 0;
 
 	if (op == INSERT && g.type == ROW) {
 		ip = tinfo->key;
@@ -63,15 +81,43 @@ snap_track(TINFO *tinfo, thread_op op)
 		memcpy(snap->vdata, ip->data, snap->vsize = ip->size);
 	}
 
+	/* Move to the next slot, wrap at the end of the circular buffer. */
+	if (++tinfo->snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+		tinfo->snap = tinfo->snap_list;
+
 	/*
-	 * Move to the next slot, wrap at the end of the circular buffer.
-	 *
 	 * It's possible to pass this transaction's buffer starting point and
-	 * start replacing our own entries. That's OK, we just skip earlier
-	 * operations when we check.
+	 * start replacing our own entries. If that happens, we can't repeat
+	 * operations because we don't know which ones were previously modified.
 	 */
-	if (++tinfo->snap >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
-		tinfo->snap = tinfo->snap_list;
+	if (tinfo->snap->opid == tinfo->opid)
+		tinfo->repeatable_wrap = true;
+}
+
+/*
+ * print_item_data --
+ *	Display a single data/size pair, with a tag.
+ */
+static void
+print_item_data(const char *tag, const uint8_t *data, size_t size)
+{
+	static const char hex[] = "0123456789abcdef";
+	u_char ch;
+
+	fprintf(stderr, "%s {", tag);
+	if (g.type == FIX)
+		fprintf(stderr, "0x%02x", data[0]);
+	else
+		for (; size > 0; --size, ++data) {
+			ch = data[0];
+			if (__wt_isprint(ch))
+				fprintf(stderr, "%c", (int)ch);
+			else
+				fprintf(stderr, "%x%x",
+				    (u_int)hex[(data[0] & 0xf0) >> 4],
+				    (u_int)hex[data[0] & 0x0f]);
+		}
+	fprintf(stderr, "}\n");
 }
 
 /*
@@ -83,10 +129,14 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
 {
 	WT_DECL_RET;
 	WT_ITEM *key, *value;
+	uint64_t keyno;
 	uint8_t bitfield;
 
+	testutil_assert(snap->op != TRUNCATE);
+
 	key = tinfo->key;
 	value = tinfo->value;
+	keyno = snap->keyno;
 
 	/*
 	 * Retrieve the key/value pair by key. Row-store inserts have a unique
@@ -100,10 +150,10 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
 		switch (g.type) {
 		case FIX:
 		case VAR:
-			cursor->set_key(cursor, snap->keyno);
+			cursor->set_key(cursor, keyno);
 			break;
 		case ROW:
-			key_gen(key, snap->keyno);
+			key_gen(key, keyno);
 			cursor->set_key(cursor, key);
 			break;
 		}
@@ -125,12 +175,11 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
 	}
 
 	/* Check for simple matches. */
-	if (ret == 0 &&
-	    snap->op != REMOVE && snap->op != TRUNCATE &&
+	if (ret == 0 && snap->op != REMOVE &&
 	    value->size == snap->vsize &&
 	    memcmp(value->data, snap->vdata, value->size) == 0)
 		return (0);
-	if (ret == WT_NOTFOUND && (snap->op == REMOVE || snap->op == TRUNCATE))
+	if (ret == WT_NOTFOUND && snap->op == REMOVE)
 		return (0);
 
 	/*
@@ -142,18 +191,23 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
 		if (ret == WT_NOTFOUND &&
 		    snap->vsize == 1 && *(uint8_t *)snap->vdata == 0)
 			return (0);
-		if ((snap->op == REMOVE || snap->op == TRUNCATE) &&
+		if (snap->op == REMOVE &&
 		    value->size == 1 && *(uint8_t *)value->data == 0)
 			return (0);
 	}
 
 	/* Things went pear-shaped. */
+#ifdef HAVE_DIAGNOSTIC
+	fprintf(stderr,
+	    "snapshot-isolation error: Dumping page to %s\n", g.home_pagedump);
+	testutil_check(__wt_debug_cursor_page(cursor, g.home_pagedump));
+#endif
 	switch (g.type) {
 	case FIX:
 		testutil_die(ret,
 		    "snapshot-isolation: %" PRIu64 " search: "
 		    "expected {0x%02x}, found {0x%02x}",
-		    snap->keyno,
+		    keyno,
 		    snap->op == REMOVE ? 0 : *(uint8_t *)snap->vdata,
 		    ret == WT_NOTFOUND ? 0 : *(uint8_t *)value->data);
 		/* NOTREACHED */
@@ -177,8 +231,7 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
 		/* NOTREACHED */
 	case VAR:
 		fprintf(stderr,
-		    "snapshot-isolation %" PRIu64 " search mismatch\n",
-		    snap->keyno);
+		    "snapshot-isolation %" PRIu64 " search mismatch\n", keyno);
 
 		if (snap->op == REMOVE)
 			fprintf(stderr, "expected {deleted}\n");
@@ -190,8 +243,7 @@ snap_verify(WT_CURSOR *cursor, TINFO *tinfo, SNAP_OPS *snap)
 			print_item_data("   found", value->data, value->size);
 
 		testutil_die(ret,
-		    "snapshot-isolation: %" PRIu64 " search mismatch",
-		    snap->keyno);
+		    "snapshot-isolation: %" PRIu64 " search mismatch", keyno);
 		/* NOTREACHED */
 	}
 
@@ -209,7 +261,7 @@ snap_ts_clear(TINFO *tinfo, uint64_t ts)
 	SNAP_OPS *snap;
 	int count;
 
-	/* Check from the first operation to the last. */
+	/* Check from the first slot to the last. */
 	for (snap = tinfo->snap_list,
 	    count = WT_ELEMENTS(tinfo->snap_list); count > 0; --count, ++snap)
 		if (snap->repeatable && snap->ts <= ts)
@@ -253,12 +305,18 @@ snap_repeat_ok_match(SNAP_OPS *current, SNAP_OPS *a)
  * committed successfully.
  */
 static bool
-snap_repeat_ok_commit(
-    TINFO *tinfo, SNAP_OPS *current, SNAP_OPS *first, SNAP_OPS *last)
+snap_repeat_ok_commit(TINFO *tinfo, SNAP_OPS *current)
 {
 	SNAP_OPS *p;
 
 	/*
+	 * Truncates can't be repeated, we don't know the exact range of records
+	 * that were removed (if any).
+	 */
+	if (current->op == TRUNCATE)
+		return (false);
+
+	/*
 	 * For updates, check for subsequent changes to the record and don't
 	 * repeat the read. For reads, check for either subsequent or previous
 	 * changes to the record and don't repeat the read. (The reads are
@@ -266,13 +324,10 @@ snap_repeat_ok_commit(
 	 * do the repeatable read in that case.)
 	 */
 	for (p = current;;) {
-		/*
-		 * Wrap at the end of the circular buffer; "last" is the element
-		 * after the last element we want to test.
-		 */
+		/* Wrap at the end of the circular buffer. */
 		if (++p >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
 			p = tinfo->snap_list;
-		if (p == last)
+		if (p->opid != tinfo->opid)
 			break;
 
 		if (!snap_repeat_ok_match(current, p))
@@ -282,21 +337,18 @@ snap_repeat_ok_commit(
 	if (current->op != READ)
 		return (true);
 	for (p = current;;) {
-		/*
-		 * Wrap at the beginning of the circular buffer; "first" is the
-		 * last element we want to test.
-		 */
-		if (p == first)
-			return (true);
+		/* Wrap at the beginning of the circular buffer. */
 		if (--p < tinfo->snap_list)
 			p = &tinfo->snap_list[
 			    WT_ELEMENTS(tinfo->snap_list) - 1];
+		if (p->opid != tinfo->opid)
+			break;
 
 		if (!snap_repeat_ok_match(current, p))
 			return (false);
 
 	}
-	/* NOTREACHED */
+	return (true);
 }
 
 /*
@@ -305,7 +357,7 @@ snap_repeat_ok_commit(
  * transaction has rolled back.
  */
 static bool
-snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current, SNAP_OPS *first)
+snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current)
 {
 	SNAP_OPS *p;
 
@@ -318,21 +370,18 @@ snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current, SNAP_OPS *first)
 	 * the read in that case.
 	 */
 	for (p = current;;) {
-		/*
-		 * Wrap at the beginning of the circular buffer; "first" is the
-		 * last element we want to test.
-		 */
-		if (p == first)
-			return (true);
+		/* Wrap at the beginning of the circular buffer. */
 		if (--p < tinfo->snap_list)
 			p = &tinfo->snap_list[
 			    WT_ELEMENTS(tinfo->snap_list) - 1];
+		if (p->opid != tinfo->opid)
+			break;
 
 		if (!snap_repeat_ok_match(current, p))
 			return (false);
 
 	}
-	/* NOTREACHED */
+	return (true);
 }
 
 /*
@@ -342,31 +391,21 @@ snap_repeat_ok_rollback(TINFO *tinfo, SNAP_OPS *current, SNAP_OPS *first)
 int
 snap_repeat_txn(WT_CURSOR *cursor, TINFO *tinfo)
 {
-	SNAP_OPS *current, *stop;
+	SNAP_OPS *current;
+
+	/* If we wrapped the buffer, we can't repeat operations. */
+	if (tinfo->repeatable_wrap)
+		return (0);
 
 	/* Check from the first operation we saved to the last. */
-	for (current = tinfo->snap_first, stop = tinfo->snap;; ++current) {
+	for (current = tinfo->snap_first;; ++current) {
 		/* Wrap at the end of the circular buffer. */
 		if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
 			current = tinfo->snap_list;
-		if (current == stop)
+		if (current->opid != tinfo->opid)
 			break;
 
-		/*
-		 * We don't test all of the records in a truncate range, only
-		 * the first because that matches the rest of the isolation
-		 * checks. If a truncate range was from the start of the table,
-		 * switch to the record at the end. This is done in the first
-		 * routine that considers if operations are repeatable, and the
-		 * rest of those functions depend on it already being done.
-		 */
-		if (current->op == TRUNCATE && current->keyno == 0) {
-			current->keyno = current->last;
-			testutil_assert(current->keyno != 0);
-		}
-
-		if (snap_repeat_ok_commit(
-		    tinfo, current, tinfo->snap_first, stop))
+		if (snap_repeat_ok_commit(tinfo, current))
 			WT_RET(snap_verify(cursor, tinfo, current));
 	}
 
@@ -381,19 +420,18 @@ snap_repeat_txn(WT_CURSOR *cursor, TINFO *tinfo)
 void
 snap_repeat_update(TINFO *tinfo, bool committed)
 {
-	SNAP_OPS *start, *stop;
+	SNAP_OPS *current;
 
-	/*
-	 * Check from the first operation we saved to the last. It's possible
-	 * to update none at all if we did exactly the number of operations
-	 * in the circular buffer, it will look like we didn't do any. That's
-	 * OK, it's a big enough buffer that it's not going to matter.
-	 */
-	for (start = tinfo->snap_first, stop = tinfo->snap;; ++start) {
+	/* If we wrapped the buffer, we can't repeat operations. */
+	if (tinfo->repeatable_wrap)
+		return;
+
+	/* Check from the first operation we saved to the last. */
+	for (current = tinfo->snap_first;; ++current) {
 		/* Wrap at the end of the circular buffer. */
-		if (start >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
-			start = tinfo->snap_list;
-		if (start == stop)
+		if (current >= &tinfo->snap_list[WT_ELEMENTS(tinfo->snap_list)])
+			current = tinfo->snap_list;
+		if (current->opid != tinfo->opid)
 			break;
 
 		/*
@@ -401,23 +439,23 @@ snap_repeat_update(TINFO *tinfo, bool committed)
 		 * timestamp chosen wasn't older than all concurrently running
 		 * uncommitted updates.
 		 */
-		if (!tinfo->repeatable_reads && start->op == READ)
+		if (!tinfo->repeatable_reads && current->op == READ)
 			continue;
 
 		/*
 		 * Second, check based on the transaction resolution (the rules
 		 * are different if the transaction committed or rolled back).
 		 */
-		start->repeatable = committed ? snap_repeat_ok_commit(
-		    tinfo, start, tinfo->snap_first, stop) :
-		    snap_repeat_ok_rollback(tinfo, start, tinfo->snap_first);
+		current->repeatable = committed ?
+		    snap_repeat_ok_commit(tinfo, current) :
+		    snap_repeat_ok_rollback(tinfo, current);
 
 		/*
 		 * Repeat reads at the transaction's read timestamp and updates
 		 * at the commit timestamp.
 		 */
-		if (start->repeatable)
-			start->ts = start->op == READ ?
+		if (current->repeatable)
+			current->ts = current->op == READ ?
 			    tinfo->read_ts : tinfo->commit_ts;
 	}
 }
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 6f7783a3a32..91d9bf51697 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -330,6 +330,12 @@ path_setup(const char *home)
 	g.home_log = dmalloc(len);
 	testutil_check(__wt_snprintf(g.home_log, len, "%s/%s", g.home, "log"));
 
+	/* Page dump file. */
+	len = strlen(g.home) + strlen("pagedump") + 2;
+	g.home_pagedump = dmalloc(len);
+	testutil_check(__wt_snprintf(
+	    g.home_pagedump, len, "%s/%s", g.home, "pagedump"));
+
 	/* RNG log file. */
 	len = strlen(g.home) + strlen("rand") + 2;
 	g.home_rand = dmalloc(len);
@@ -675,39 +681,3 @@ alter(void *arg)
 	testutil_check(session->close(session, NULL));
 	return (WT_THREAD_RET_VALUE);
 }
-
-/*
- * print_item_data --
- *	Display a single data/size pair, with a tag.
- */
-void
-print_item_data(const char *tag, const uint8_t *data, size_t size)
-{
-	static const char hex[] = "0123456789abcdef";
-	u_char ch;
-
-	fprintf(stderr, "\t%s {", tag);
-	if (g.type == FIX)
-		fprintf(stderr, "0x%02x", data[0]);
-	else
-		for (; size > 0; --size, ++data) {
-			ch = data[0];
-			if (__wt_isprint(ch))
-				fprintf(stderr, "%c", (int)ch);
-			else
-				fprintf(stderr, "%x%x",
-				    (u_int)hex[(data[0] & 0xf0) >> 4],
-				    (u_int)hex[data[0] & 0x0f]);
-		}
-	fprintf(stderr, "}\n");
-}
-
-/*
- * print_item --
- *	Display a single data/size pair, with a tag.
- */
-void
-print_item(const char *tag, WT_ITEM *item)
-{
-	print_item_data(tag, item->data, item->size);
-}
diff --git a/src/third_party/wiredtiger/test/suite/test_compat02.py b/src/third_party/wiredtiger/test/suite/test_compat02.py
index de5862513a8..a92c3f54300 100644
--- a/src/third_party/wiredtiger/test/suite/test_compat02.py
+++ b/src/third_party/wiredtiger/test/suite/test_compat02.py
@@ -133,6 +133,13 @@ class test_compat02(wttest.WiredTigerTestCase, suite_subprocess):
         # version. That configuration needs an existing database for it to be
         # useful. Test for success or failure based on the relative versions
         # configured.
+
+        # Turn on checkpoint verbose to debug a rare occurence of a test
+        # hanging, most likely during the checkpoint of conn.close.
+        self.pr("Closing connection")
+        self.conn.reconfigure('verbose=(checkpoint)')
+        with self.expectedStdoutPattern('.'):
+            self.conn.close()
         compat_str = ''
         if (self.max_req != 'none'):
             compat_str += 'compatibility=(require_max="%s"),' % self.max_req
@@ -140,7 +147,6 @@ class test_compat02(wttest.WiredTigerTestCase, suite_subprocess):
             compat_str += 'compatibility=(require_min="%s"),' % self.min_req
         if (self.rel != 'none'):
             compat_str += 'compatibility=(release="%s"),' % self.rel
-        self.conn.close()
         log_str = 'log=(enabled,file_max=%s,archive=false),' % self.logmax
         restart_config = log_str + compat_str
         self.pr("Restart conn " + restart_config)
diff --git a/src/third_party/wiredtiger/test/suite/test_txn19.py b/src/third_party/wiredtiger/test/suite/test_txn19.py
index 604d8bed8bb..c63e9f9c6e9 100755
--- a/src/third_party/wiredtiger/test/suite/test_txn19.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn19.py
@@ -53,6 +53,20 @@ def corrupt(fname, truncate, offset, writeit):
         if writeit:
             log.write(writeit)
 
+def copy_for_crash_restart(olddir, newdir):
+    ''' Simulate a crash from olddir and restart in newdir. '''
+    # with the connection still open, copy files to new directory
+    shutil.rmtree(newdir, ignore_errors=True)
+    os.mkdir(newdir)
+    for fname in os.listdir(olddir):
+        fullname = os.path.join(olddir, fname)
+        # Skip lock file on Windows since it is locked
+        if os.path.isfile(fullname) and \
+            "WiredTiger.lock" not in fullname and \
+            "Tmplog" not in fullname and \
+            "Preplog" not in fullname:
+            shutil.copy(fullname, newdir)
+
 class test_txn19(wttest.WiredTigerTestCase, suite_subprocess):
     base_config = 'log=(archive=false,enabled,file_max=100K),' + \
                   'transaction_sync=(enabled,method=none)'
@@ -158,20 +172,6 @@ class test_txn19(wttest.WiredTigerTestCase, suite_subprocess):
                     self.tty('LOGS ' + msg + ': ' + str(i) + ' is empty')
         self.tty('LOGS ' + msg + ': ' + str(loglist))
 
-    def copy_for_crash_restart(self, olddir, newdir):
-        ''' Simulate a crash from olddir and restart in newdir. '''
-        # with the connection still open, copy files to new directory
-        shutil.rmtree(newdir, ignore_errors=True)
-        os.mkdir(newdir)
-        for fname in os.listdir(olddir):
-            fullname = os.path.join(olddir, fname)
-            # Skip lock file on Windows since it is locked
-            if os.path.isfile(fullname) and \
-                "WiredTiger.lock" not in fullname and \
-                "Tmplog" not in fullname and \
-                "Preplog" not in fullname:
-                shutil.copy(fullname, newdir)
-
     # Generate a value that is a bit over half the size of the log file.
     def valuegen(self, i):
         return str(i) + 'A' * (1024 * 60)   # ~60K
@@ -280,7 +280,7 @@ class test_txn19(wttest.WiredTigerTestCase, suite_subprocess):
         self.session.create(self.uri, self.create_params)
         self.inserts([x for x in range(0, self.nrecords)])
         newdir = "RESTART"
-        self.copy_for_crash_restart(self.home, newdir)
+        copy_for_crash_restart(self.home, newdir)
         self.close_conn()
         #self.show_logs(newdir, 'before corruption')
         self.corrupt_log(newdir)
@@ -346,12 +346,210 @@ class test_txn19(wttest.WiredTigerTestCase, suite_subprocess):
         newdir2 = "RESTART2"
         self.inserts([self.nrecords, self.nrecords + 1])
         expect.extend([self.nrecords, self.nrecords + 1])
-        self.copy_for_crash_restart(newdir, newdir2)
+        copy_for_crash_restart(newdir, newdir2)
         self.checks(expect)
         self.reopen_conn(newdir)
         self.checks(expect)
         self.reopen_conn(newdir2, self.conn_config)
         self.checks(expect)
 
+class test_txn19_meta(wttest.WiredTigerTestCase, suite_subprocess):
+    base_config = 'log=(archive=false,enabled,file_max=100K),' + \
+                  'transaction_sync=(enabled,method=none)'
+    conn_config = base_config
+
+    # The type of corruption to be applied
+    corruption_scenarios = [
+        ('removal', dict(kind='removal', f=lambda fname:
+            os.remove(fname))),
+        ('truncate', dict(kind='truncate', f=lambda fname:
+            corrupt(fname, True, 0, None))),
+        ('truncate-middle', dict(kind='truncate-middle', f=lambda fname:
+            corrupt(fname, True, 1024 * 25, None))),
+        ('zero-begin', dict(kind='zero', f=lambda fname:
+            corrupt(fname, False, 0, '\0' * 4096))),
+        ('zero-trunc', dict(kind='zero', f=lambda fname:
+            corrupt(fname, True, 0, '\0' * 4096))),
+        ('zero-end', dict(kind='zero-end', f=lambda fname:
+            corrupt(fname, False, -1, '\0' * 4096))),
+        ('garbage-begin', dict(kind='garbage-begin', f=lambda fname:
+            corrupt(fname, False, 0, 'Bad!' * 1024))),
+        ('garbage-middle', dict(kind='garbage-middle', f=lambda fname:
+            corrupt(fname, False, 1024 * 25, 'Bad!' * 1024))),
+        ('garbage-end', dict(kind='garbage-end', f=lambda fname:
+            corrupt(fname, False, -1, 'Bad!' * 1024))),
+    ]
+    # File to be corrupted
+    filename_scenarios = [
+        ('WiredTiger', dict(filename='WiredTiger')),
+        ('WiredTiger.basecfg', dict(filename='WiredTiger.basecfg')),
+        ('WiredTiger.turtle', dict(filename='WiredTiger.turtle')),
+        ('WiredTiger.wt', dict(filename='WiredTiger.wt')),
+        ('WiredTigerLAS.wt', dict(filename='WiredTigerLAS.wt')),
+    ]
+
+    # In many cases, wiredtiger_open without any salvage options will
+    # just work.  We list those cases here.
+    openable = [
+        "removal:WiredTiger.basecfg",
+        "removal:WiredTiger.turtle",
+        "removal:WiredTigerLAS.wt",
+        "truncate:WiredTiger",
+        "truncate:WiredTiger.basecfg",
+        "truncate:WiredTigerLAS.wt",
+        "truncate-middle:WiredTiger",
+        "truncate-middle:WiredTiger.basecfg",
+        "truncate-middle:WiredTiger.turtle",
+        "truncate-middle:WiredTiger.wt",
+        "truncate-middle:WiredTigerLAS.wt",
+        "zero:WiredTiger",
+        "zero:WiredTiger.basecfg",
+        "zero:WiredTigerLAS.wt",
+        "zero-end:WiredTiger",
+        "zero-end:WiredTiger.basecfg",
+        "zero-end:WiredTiger.turtle",
+        "zero-end:WiredTiger.wt",
+        "zero-end:WiredTigerLAS.wt",
+        "garbage-begin:WiredTiger",
+        "garbage-begin:WiredTigerLAS.wt",
+        "garbage-middle:WiredTiger",
+        "garbage-middle:WiredTiger.basecfg",
+        "garbage-middle:WiredTiger.turtle",
+        "garbage-middle:WiredTiger.wt",
+        "garbage-middle:WiredTigerLAS.wt",
+        "garbage-end:WiredTiger",
+        "garbage-end:WiredTiger.turtle",
+        "garbage-end:WiredTiger.wt",
+        "garbage-end:WiredTigerLAS.wt",
+    ]
+
+    # The cases for which salvage will not work, represented in the
+    # form (self.kind + ':' + self.filename)
+    not_salvageable = [
+        "removal:WiredTiger.turtle",
+        "removal:WiredTiger.wt",
+        "truncate:WiredTiger.wt",
+        "zero:WiredTiger.wt",
+        "garbage-begin:WiredTiger.basecfg",
+        "garbage-begin:WiredTiger.wt",
+        "garbage-end:WiredTiger.basecfg",
+    ]
+
+    scenarios = make_scenarios(corruption_scenarios, filename_scenarios)
+    uri = 'table:test_txn19_meta_'
+    ntables = 5
+    create_params = 'key_format=i,value_format=S'
+    nrecords = 1000                                  # records per table.
+    suffixes = [ str(x) for x in range(0, ntables)]  # [ '0', '1', ... ]
+
+    def valuegen(self, i):
+        return str(i) + 'A' * 1024
+
+    # Insert a list of keys
+    def inserts(self, keylist):
+        for suffix in self.suffixes:
+            c = self.session.open_cursor(self.uri + suffix)
+            for i in keylist:
+                c[i] = self.valuegen(i)
+            c.close()
+
+    def checks(self, expectlist):
+        for suffix in self.suffixes:
+            c = self.session.open_cursor(self.uri + suffix, None, None)
+            gotlist = []
+            for key, value in c:
+                gotlist.append(key)
+                self.assertEqual(self.valuegen(key), value)
+            self.assertEqual(expectlist, gotlist)
+            c.close()
+
+    def corrupt_meta(self, homedir):
+        # Mark this test has having corrupted files
+        self.databaseCorrupted()
+        filename = os.path.join(homedir, self.filename)
+        self.f(filename)
+
+    def is_openable(self):
+        key = self.kind + ':' + self.filename
+        return key in self.openable
+
+    def is_salvageable(self):
+        key = self.kind + ':' + self.filename
+        return key not in self.not_salvageable
+
+    def test_corrupt_meta(self):
+        errfile = 'list.err'
+        outfile = 'list.out'
+        newdir = "RESTART"
+        newdir2 = "RESTART2"
+        expect = list(range(0, self.nrecords))
+        salvage_config = self.base_config + ',salvage=true'
+
+        for suffix in self.suffixes:
+            self.session.create(self.uri + suffix, self.create_params)
+        self.inserts(expect)
+
+        # Simulate a crash by copying the contents of the directory
+        # before closing.  After we corrupt the copy, make another
+        # copy of the corrupted directory.
+        #
+        # The first corrupted copy will be used to run:
+        #    wiredtiger_open without salvage flag, followed by:
+        #    wiredtiger_open with salvage flag.
+        # The second directory will be used to run:
+        #    wiredtiger_open with salvage flag first.
+
+        copy_for_crash_restart(self.home, newdir)
+        self.close_conn()
+        self.corrupt_meta(newdir)
+        copy_for_crash_restart(newdir, newdir2)
+
+        # In cases of corruption, we cannot always call wiredtiger_open
+        # directly, because there may be a panic, and abort() is called
+        # in diagnostic mode which terminates the Python interpreter.
+        #
+        # Running any wt command externally to Python allows
+        # us to observe the failure or success safely.
+        # Use -R to force recover=on, which is the default for
+        # wiredtiger_open, (wt utilities normally have recover=error)
+
+        expect_fail = not self.is_openable()
+        self.runWt(['-h', newdir, '-C', self.base_config, '-R', 'list'],
+            errfilename=errfile, outfilename=outfile, failure=expect_fail,
+            closeconn=False)
+
+        if expect_fail:
+            self.check_file_contains_one_of(errfile,
+                ['/unknown configuration key/',
+                '/handle-open:/',
+                '/turtle file read error: WT_NOTFOUND: item not found/',
+                'WT_ERROR: non-specific WiredTiger error',
+                'WT_TRY_SALVAGE: database corruption detected'])
+
+        for salvagedir in [ newdir, newdir2 ]:
+            # Removing the 'WiredTiger.turtle' file has weird behavior:
+            #  Immediately doing wiredtiger_open (without salvage) succeeds.
+            #  Following that, wiredtiger_open w/ salvage also succeeds.
+            #
+            #  But, immediately after the corruption, if we run
+            #  wiredtiger_open with salvage, it will fail.
+            # This anomoly should be fixed or explained.
+            if salvagedir == newdir and self.kind == 'removal' and \
+               self.filename == 'WiredTiger.turtle':
+                continue
+
+            if self.is_salvageable():
+                self.reopen_conn(salvagedir, salvage_config)
+                self.checks(expect)
+            else:
+                # Certain cases are not currently salvageable, they result in
+                # an error during the wiredtiger_open.  But the nature of the
+                # messages produced during the error is variable by which case
+                # it is, and even variable from system to system.
+                with self.expectedStdoutPattern('.'):
+                    self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+                        lambda: self.reopen_conn(salvagedir, salvage_config),
+                        '/.*/')
+
 if __name__ == '__main__':
     wttest.run()