Import wiredtiger: 0cd3d5bbd8a5c8779f1129c6754b4463403e788f from branch mongodb-3.6

ref: 6f561957cb..0cd3d5bbd8 for: 3.5.14 WT-3200 LSM bug: Failed lookup in bloom filter. WT-3435 Lookaside eviction should be able to save unstable updates WT-3453 Enhance lookaside table test coverage in Python suite WT-3559 Detect when a checkpoint races with metadata changes WT-3579 Enhance support for running wtperf workloads with workgen WT-3582 Cache stuck full of internal pages WT-3593 Add an API to enforce consistent use of timestamps (#3667) WT-3599 reconciliation calculates block matching checksums too frequently. WT-3600 timestamp API lets you set timestamps with invalid characters WT-3612 Improve documentation of durability with backup cursors WT-3613 test/format cache full with LSM WT-3618 WT remove solaris from evergreen builds WT-3620 POSIX thread attribute structures must be destroyed WT-3621 Add test for full backups with concurrent table creation WT-3622 Allow upper case hexadecimal timestamps WT-3627 test_txn14.test_txn14.test_log_flush timeout WT-3631 Convert timestamps to integers in Python tests before comparing WT-3636 Account for page image sizes in cache consistently WT-3638 format failure, update list without complete visible record WT-3639 Test/format tried to drop named checkpoints during a hot backup WT-3641 Track maximum timestamp used in each btree WT-3642 Avoid lookaside reads for dead trees
author: Alex Gorrod <alexander.gorrod@mongodb.com> 2017-10-10 16:29:49 +1100
committer: Alex Gorrod <alexander.gorrod@mongodb.com> 2017-10-10 16:37:55 +1100
commit: 39998ac6928c4e7f3acd2f7ee2fc5fb4df056c18 (patch)
tree: c075233cd32c6ec0205af77db475836c0fba60e9 /src/third_party
parent: dd094ce1bc1fb424ccc6dd71939e5c7a30159e2e (diff)
download: mongo-39998ac6928c4e7f3acd2f7ee2fc5fb4df056c18.tar.gz
95 files changed, 3140 insertions, 1644 deletions
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py
index ed21fffe8dc..2d60e1522f5 100644
--- a/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py
+++ b/src/third_party/wiredtiger/bench/workgen/runner/runner/__init__.py
@@ -88,5 +88,5 @@ except:
 shutil.rmtree('WT_TEST', True)
 os.mkdir('WT_TEST')
 
-from .core import txn, extensions_config, op_group_transaction, op_log_like, op_multi_table
+from .core import txn, extensions_config, op_append, op_group_transaction, op_log_like, op_multi_table, op_populate_with_range
 from .latency import workload_latency
diff --git a/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py b/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py
index 2c8311c4ca7..a8977d9593e 100644
--- a/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py
+++ b/src/third_party/wiredtiger/bench/workgen/runner/runner/core.py
@@ -28,7 +28,7 @@
 #
 # runner/core.py
 #   Core functions available to all runners
-import glob, os
+import glob, os, random
 from workgen import Key, Operation, OpList, Table, Transaction, Value
 
 # txn --
@@ -100,14 +100,73 @@ def extensions_config(exts):
         result = ',extensions=[' + ','.join(extfiles.values()) + ']'
     return result
 
-def _op_multi_table_as_list(ops_arg, tables):
+_PARETO_SHAPE = 1.5
+_BILLION = 1000000000
+
+# Choose a value from a range of ints based on the pareto parameter
+# The pareto value is interpreted as in wtperf, a number between 0 and 100.
+def _choose_pareto(nrange, pareto):
+    rval = random.randint(0, _BILLION)
+
+    # Use Pareto distribution to give 80/20 hot/cold values.
+    S1 = -1 / _PARETO_SHAPE
+    S2 = nrange * (pareto.param / 100.0) * (_PARETO_SHAPE - 1)
+    U = 1 - rval / (_BILLION * 1.0)
+    rval = (pow(U, S1) - 1) * S2
+    if rval >= nrange:
+        rval = 0
+    return int(rval)
+
+# Get the list of subordinate operations that are listed in the group.
+# Generally, the op._optype == Operation.OP_NONE, it indicates that
+# the operation contains a group of subordinates.
+#
+# XXX
+# Note that this function should be called for all iteration, rather than:
+#    for o in op._group
+# because a bug in SWIG versions <= 2.0.11 would cause the above fragment
+# to produce a segmentation violation as described here:
+#    https://sourceforge.net/p/swig/mailman/message/32838320/
+def _op_get_group_list(op):
+    grouplist = op._group
+    result = []
+    if grouplist != None:
+        result.extend(grouplist)
+    return result
+
+def _op_multi_table_as_list(ops_arg, tables, pareto_tables, multiplier):
     result = []
     if ops_arg._optype != Operation.OP_NONE:
-        for table in tables:
-            result.append(Operation(ops_arg._optype, table, ops_arg._key, ops_arg._value))
+        if pareto_tables <= 0:
+            for table in tables:
+                for i in range(0, multiplier):
+                    result.append(Operation(ops_arg._optype, table, ops_arg._key, ops_arg._value))
+        else:
+            # Use the multiplier unless the length of the list will be large.
+            # In any case, make sure there's at least a multiplier of 3, to
+            # give a chance to hit all/most of the tables.
+            ntables = len(tables)
+            count = ntables * multiplier
+            if count > 1000:
+                count = 1000
+                mincount = ntables * 3
+                if mincount > count:
+                    count = mincount
+            for i in range(0, count):
+                tnum = _choose_pareto(ntables, pareto_tables)
+                # Modify the pareto value to make it more flat
+                # as tnum gets higher.  Workgen knows how to handle
+                # a portion of a pareto range.
+                table = tables[tnum]
+                key = Key(ops_arg._key)
+                key._pareto.range_low = (1.0 * i)/count
+                key._pareto.range_high = (1.0 * (i + 1))/count
+                result.append(Operation(ops_arg._optype, table, key, ops_arg._value))
     else:
-        for op in ops._group:
-            result.extend(_op_multi_table_as_list(op, tables))
+        for op in _op_get_group_list(ops_arg):
+            for o in _op_multi_table_as_list(op, tables, pareto_tables, \
+                                             multiplier):
+                result.append(Operation(o))
     return result
 
 # A convenient way to build a list of operations
@@ -118,11 +177,52 @@ def op_append(op1, op2):
         op1 += op2
     return op1
 
+# Require consistent use of pareto on the set of operations,
+# that keeps our algorithm reasonably simple.
+def _check_pareto(ops_arg, cur = 0):
+    if ops_arg._key != None and ops_arg._key._keytype == Key.KEYGEN_PARETO:
+        p = ops_arg._key._pareto
+        if cur != 0 and p != cur:
+            raise Exception('mixed pareto values for ops within a ' + \
+                            'single thread not supported')
+        cur = p
+    if ops_arg._group != None:
+        for op in _op_get_group_list(ops_arg):
+            cur = _check_pareto(op, cur)
+    return cur
+
+_primes = [83, 89, 97, 101, 103, 107, 109, 113]
+
 # Emulate wtperf's table_count option.  Spread the given operations over
-# a set of tables.
-def op_multi_table(ops_arg, tables):
+# a set of tables.  For example, given 5 operations and 4 tables, we return
+# a set of 20 operations for all possibilities.
+#
+# When we detect that pareto is used with a range partition, things get
+# trickier, because we'll want a higher proportion of operations channelled
+# to the first tables.  Workgen only supports individual operations on a
+# single table, so to get good Pareto distribution, we first expand the
+# number in the total set of operations, and then choose a higher proportion
+# of the tables.  We need to expand the number of operations to make sure
+# that the lower tables get some hits.  While it's not perfect (without
+# creating a huge multiplier) it's a reasonable approximation for most
+# cases.  Within each table's access, the pareto parameters have to be
+# adjusted to account for the each table's position in the total
+# distribution.  For example, the lowest priority table will have a much
+# more even distribution.
+def op_multi_table(ops_arg, tables, range_partition = False):
     ops = None
-    for op in _op_multi_table_as_list(ops_arg, tables):
+    multiplier = 1
+    if range_partition:
+        pareto_tables = _check_pareto(ops_arg)
+    else:
+        pareto_tables = 0
+    if pareto_tables != 0:
+        multiplier = _primes[random.randint(0, len(_primes) - 1)]
+    ops_list = _op_multi_table_as_list(ops_arg, tables, pareto_tables, \
+                                       multiplier)
+    if pareto_tables != 0:
+        random.shuffle(ops_list)
+    for op in ops_list:
         ops = op_append(ops, op)
     return ops
 
@@ -152,7 +252,7 @@ def op_log_like(op, log_table, ops_per_txn):
                 op = txn(op)       # txn for each action.
     else:
         oplist = []
-        for op2 in op._group:
+        for op2 in _op_get_group_list(op):
             if op2._optype == Operation.OP_NONE:
                 oplist.append(op_log_like(op2, log_table))
             elif ops_per_txn == 0 and _optype_is_write(op2._optype):
@@ -182,10 +282,8 @@ def op_group_transaction(ops_arg, ops_per_txn, txn_config):
         raise Exception('grouping transactions with multipliers not supported')
 
     oplist = []
-    ops = None
-    nops = 0
     txgroup = []
-    for op in ops_arg._group:
+    for op in _op_get_group_list(ops_arg):
         if op.optype == Operation.OP_NONE:
             oplist.append(_op_transaction_list(txgroup, txn_config))
             txgroup = []
@@ -199,3 +297,39 @@ def op_group_transaction(ops_arg, ops_per_txn, txn_config):
         oplist.append(_op_transaction_list(txgroup, txn_config))
     ops_arg._group = OpList(oplist)
     return ops_arg
+
+# Populate using range partition with the random range.
+# We will totally fill 0 or more tables (fill_tables), and 0 or
+# 1 table will be partially filled.  The rest (if any) will
+# by completely unfilled, to be filled/accessed during
+# the regular part of the run.
+def op_populate_with_range(ops_arg, tables, icount, random_range, pop_threads):
+    table_count = len(tables)
+    entries_per_table = (icount + random_range) / table_count
+    if entries_per_table == 0:
+        # This can happen if table_count is huge relative to
+        # icount/random_range.  Not really worth handling.
+        raise Exception('table_count > (icount + random_range), seems absurd')
+    if (icount + random_range) % table_count != 0:
+        # This situation is not handled well by our simple algorithm,
+        # we won't get exactly icount entries added during the populate.
+        raise Exception('(icount + random_range) is not evenly divisible by ' +
+                        'table_count')
+    if entries_per_table % pop_threads != 0:
+        # Another situation that is not handled exactly.
+        raise Exception('(icount + random_range) is not evenly divisible by ' +
+                        'populate_threads')
+    fill_tables = icount / entries_per_table
+    fill_per_thread = entries_per_table / pop_threads
+    ops = None
+    for i in range(0, fill_tables):
+        op = Operation(ops_arg)
+        op._table = tables[i]
+        ops = op_append(ops, op * fill_per_thread)
+    partial_fill = icount % entries_per_table
+    if partial_fill > 0:
+        fill_per_thread = partial_fill / pop_threads
+        op = Operation(ops_arg)
+        op._table = tables[fill_tables]
+        ops = op_append(ops, op * fill_per_thread)
+    return ops
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.cxx b/src/third_party/wiredtiger/bench/workgen/workgen.cxx
index ce9debcca2f..31e21e6f6c9 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen.cxx
+++ b/src/third_party/wiredtiger/bench/workgen/workgen.cxx
@@ -240,7 +240,8 @@ Context& Context::operator=(const Context &other) {
 }
 
 ContextInternal::ContextInternal() : _tint(), _table_names(),
-    _recno(NULL), _recno_alloced(0), _tint_last(0), _context_count(0) {
+    _table_runtime(NULL), _runtime_alloced(0), _tint_last(0),
+    _context_count(0) {
     uint32_t count;
     if ((count = workgen_atomic_add32(&context_count, 1)) != 1)
         THROW("multiple Contexts not supported");
@@ -248,20 +249,20 @@ ContextInternal::ContextInternal() : _tint(), _table_names(),
 }
 
 ContextInternal::~ContextInternal() {
-    if (_recno != NULL)
-        delete _recno;
+    if (_table_runtime != NULL)
+        delete _table_runtime;
 }
 
 int ContextInternal::create_all() {
-    if (_recno_alloced != _tint_last) {
+    if (_runtime_alloced != _tint_last) {
         // The array references are 1-based, we'll waste one entry.
-        uint64_t *new_recno = new uint64_t[_tint_last + 1];
-        memcpy(new_recno, _recno, sizeof(uint64_t) * _recno_alloced);
-        memset(&new_recno[_recno_alloced], 0,
-          sizeof(uint64_t) * (_tint_last - _recno_alloced + 1));
-        delete _recno;
-        _recno = new_recno;
-        _recno_alloced = _tint_last;
+        TableRuntime *new_table_runtime = new TableRuntime[_tint_last + 1];
+        memcpy(new_table_runtime, _table_runtime, sizeof(uint64_t) * _runtime_alloced);
+        memset(&new_table_runtime[_runtime_alloced], 0,
+          sizeof(uint64_t) * (_tint_last - _runtime_alloced + 1));
+        delete _table_runtime;
+        _table_runtime = new_table_runtime;
+        _runtime_alloced = _tint_last;
     }
     return (0);
 }
@@ -301,7 +302,9 @@ int Monitor::run() {
     workgen_version(version, sizeof(version));
     Stats prev_interval;
     while (!_stop) {
-        for (int i = 0; i < options->sample_interval && !_stop; i++)
+        int waitsecs = (first && options->warmup > 0) ? options->warmup :
+          options->sample_interval;
+        for (int i = 0; i < waitsecs && !_stop; i++)
             sleep(1);
         if (_stop)
             break;
@@ -387,6 +390,22 @@ int Monitor::run() {
     return (0);
 }
 
+ParetoOptions ParetoOptions::DEFAULT;
+ParetoOptions::ParetoOptions(int param_arg) : param(param_arg), range_low(0.0),
+    range_high(1.0), _options() {
+    _options.add_int("param", param,
+      "0 is disabled, otherwise a range from 1 (most aggressive) to "
+      "100 (least aggressive)");
+    _options.add_double("range_low", range_low,
+      "between 0.0 and 1.0, starting range of the pareto distribution");
+    _options.add_double("range_high", range_high,
+      "between 0.0 and 1.0, ending range of the pareto distribution");
+}
+ParetoOptions::ParetoOptions(const ParetoOptions &other) :
+    param(other.param), range_low(other.range_low),
+    range_high(other.range_high), _options(other._options) {}
+ParetoOptions::~ParetoOptions() {}
+
 ThreadRunner::ThreadRunner() :
     _errno(0), _exception(), _thread(NULL), _context(NULL), _icontext(NULL),
     _workload(NULL), _wrunner(NULL), _rand_state(NULL),
@@ -536,9 +555,12 @@ void ThreadRunner::op_create_all(Operation *op, size_t &keysize,
 
     op->create_all();
     if (op->_optype != Operation::OP_NONE) {
-        op->kv_compute_max(true);
+        op->kv_compute_max(true, false);
         if (OP_HAS_VALUE(op))
-            op->kv_compute_max(false);
+            op->kv_compute_max(false, op->_table.options.random_value);
+        if (op->_key._keytype == Key::KEYGEN_PARETO &&
+          op->_key._pareto.param == 0)
+            THROW("Key._pareto value must be set if KEYGEN_PARETO specified");
         op->kv_size_buffer(true, keysize);
         op->kv_size_buffer(false, valuesize);
 
@@ -575,17 +597,66 @@ void ThreadRunner::op_create_all(Operation *op, size_t &keysize,
             op_create_all(&*i, keysize, valuesize);
 }
 
-uint64_t ThreadRunner::op_get_key_recno(Operation *op, tint_t tint) {
+
+#define	PARETO_SHAPE	1.5
+
+// Return a value within the interval [ 0, recno_max )
+// that is weighted toward lower numbers with pareto_param at 0 (the minimum),
+// and more evenly distributed with pareto_param at 100 (the maximum).
+//
+static uint64_t
+pareto_calculation(uint32_t randint, uint64_t recno_max,
+  ParetoOptions &pareto) {
+    double S1, S2, U;
+    uint32_t result;
+    double r;
+
+    r = (double)randint;
+    if (pareto.range_high != 1.0 || pareto.range_low != 0.0) {
+        if (pareto.range_high <= pareto.range_low ||
+          pareto.range_high > 1.0 || pareto.range_low < 0.0)
+            THROW("Pareto illegal range");
+        r = (pareto.range_low * (double)UINT32_MAX) +
+          r * (pareto.range_high - pareto.range_low);
+    }
+    S1 = (-1 / PARETO_SHAPE);
+    S2 = recno_max * (pareto.param / 100.0) * (PARETO_SHAPE - 1);
+    U = 1 - r / (double)UINT32_MAX;    // interval [0, 1)
+    result = (uint64_t)((pow(U, S1) - 1) * S2);
+
+    // This Pareto calculation chooses out of range values less than 20%
+    // of the time, depending on pareto_param.  For param of 0, it is
+    // never out of range, for param of 100, 19.2%. For the default
+    // pareto_param of 20, it will be out of range 2.7% of the time.
+    // Out of range values are channelled into the first key,
+    // making it "hot".  Unfortunately, that means that using a higher
+    // param can get a lot lumped into the first bucket.
+    //
+    // XXX This matches the behavior of wtperf, we may consider instead
+    // retrying (modifying the random number) until we get a good value.
+    //
+    if (result > recno_max)
+        result = 0;
+    return (result);
+}
+
+uint64_t ThreadRunner::op_get_key_recno(Operation *op, uint64_t range,
+  tint_t tint) {
     uint64_t recno_count;
-    uint32_t rand;
+    uint32_t rval;
 
     (void)op;
-    recno_count = _icontext->_recno[tint];
+    if (range > 0)
+        recno_count = range;
+    else
+        recno_count = _icontext->_table_runtime[tint]._max_recno;
     if (recno_count == 0)
         // The file has no entries, returning 0 forces a WT_NOTFOUND return.
         return (0);
-    rand = workgen_random(_rand_state);
-    return (rand % recno_count + 1);  // recnos are one-based.
+    rval = workgen_random(_rand_state);
+    if (op->_key._keytype == Key::KEYGEN_PARETO)
+        rval = pareto_calculation(rval, recno_count, op->_key._pareto);
+    return (rval % recno_count + 1);  // recnos are one-based.
 }
 
 int ThreadRunner::op_run(Operation *op) {
@@ -594,12 +665,14 @@ int ThreadRunner::op_run(Operation *op) {
     WT_CURSOR *cursor;
     WT_DECL_RET;
     uint64_t recno;
+    uint64_t range;
     bool measure_latency, own_cursor;
 
     track = NULL;
     cursor = NULL;
     recno = 0;
     own_cursor = false;
+    range = op->_table.options.range;
     if (_throttle != NULL) {
         if (_throttle_ops >= _throttle_limit && !_in_transaction) {
             WT_ERR(_throttle->throttle(_throttle_ops,
@@ -621,19 +694,24 @@ int ThreadRunner::op_run(Operation *op) {
     switch (op->_optype) {
     case Operation::OP_INSERT:
         track = &_stats.insert;
-        recno = workgen_atomic_add64(&_icontext->_recno[tint], 1);
+        if (op->_key._keytype == Key::KEYGEN_APPEND ||
+          op->_key._keytype == Key::KEYGEN_AUTO)
+            recno = workgen_atomic_add64(
+              &_icontext->_table_runtime[tint]._max_recno, 1);
+        else
+            recno = op_get_key_recno(op, range, tint);
         break;
     case Operation::OP_REMOVE:
         track = &_stats.remove;
-        recno = op_get_key_recno(op, tint);
+        recno = op_get_key_recno(op, range, tint);
         break;
     case Operation::OP_SEARCH:
         track = &_stats.read;
-        recno = op_get_key_recno(op, tint);
+        recno = op_get_key_recno(op, range, tint);
         break;
     case Operation::OP_UPDATE:
         track = &_stats.update;
-        recno = op_get_key_recno(op, tint);
+        recno = op_get_key_recno(op, range, tint);
         break;
     case Operation::OP_NONE:
         recno = 0;
@@ -651,6 +729,7 @@ int ThreadRunner::op_run(Operation *op) {
       track->track_latency() &&
       (track->ops % _workload->options.sample_rate == 0);
 
+    VERBOSE(*this, "OP " << op->_optype << " " << op->_table._uri.c_str() << ", recno=" << recno);
     timespec start;
     if (measure_latency)
         workgen_epoch(&start);
@@ -663,10 +742,13 @@ int ThreadRunner::op_run(Operation *op) {
         _in_transaction = true;
     }
     if (op->_optype != Operation::OP_NONE) {
-        op->kv_gen(true, recno, _keybuf);
+        op->kv_gen(true, 0, recno, _keybuf);
         cursor->set_key(cursor, _keybuf);
         if (OP_HAS_VALUE(op)) {
-            op->kv_gen(false, recno, _valuebuf);
+            uint32_t r = 0;
+            if (op->_table.options.random_value)
+                r = workgen_random(_rand_state);
+            op->kv_gen(false, r, recno, _valuebuf);
             cursor->set_value(cursor, _valuebuf);
         }
         switch (op->_optype) {
@@ -969,7 +1051,7 @@ void Operation::get_static_counts(Stats &stats, int multiplier) {
             i->get_static_counts(stats, multiplier * _repeatgroup);
 }
 
-void Operation::kv_compute_max(bool iskey) {
+void Operation::kv_compute_max(bool iskey, bool has_random) {
     uint64_t max;
     int size;
 
@@ -981,6 +1063,14 @@ void Operation::kv_compute_max(bool iskey) {
         THROW("Key.size too small for table '" << _table._uri << "'");
     if (!iskey && size < 1)
         THROW("Value.size too small for table '" << _table._uri << "'");
+    if (has_random) {
+        if (iskey)
+            THROW("Random keys not allowed");
+        size -= RANDOMIZER_SIZE;
+        if (size < 1)
+            THROW("Value.size with random values too small for table '"
+              << _table._uri << "'");
+    }
 
     if (size > 1)
         max = power64(10, (size - 1)) - 1;
@@ -1006,7 +1096,8 @@ void Operation::kv_size_buffer(bool iskey, size_t &maxsize) const {
     }
 }
 
-void Operation::kv_gen(bool iskey, uint64_t n, char *result) const {
+void Operation::kv_gen(bool iskey, uint32_t randomizer, uint64_t n,
+  char *result) const {
     uint64_t max;
     int size;
 
@@ -1015,6 +1106,12 @@ void Operation::kv_gen(bool iskey, uint64_t n, char *result) const {
     if (n > max)
         THROW((iskey ? "Key" : "Value") << " (" << n
           << ") too large for size (" << size << ")");
+    if (randomizer != 0) {
+        randomizer %= 1000;
+        snprintf(result, 6, ":%3.3d:", randomizer);
+        n -= RANDOMIZER_SIZE;
+        result += RANDOMIZER_SIZE;
+    }
     workgen_u64_to_string_zf(n, result, size);
 }
 
@@ -1338,14 +1435,20 @@ void Stats::track_latency(bool latency) {
     truncate.track_latency(latency);
 }
 
-TableOptions::TableOptions() : key_size(0), value_size(0), _options() {
+TableOptions::TableOptions() : key_size(0), value_size(0),
+    random_value(false), range(0), _options() {
     _options.add_int("key_size", key_size,
       "default size of the key, unless overridden by Key.size");
     _options.add_int("value_size", value_size,
       "default size of the value, unless overridden by Value.size");
+    _options.add_bool("random_value", random_value,
+      "generate random content for the value");
+    _options.add_int("range", range,
+      "if zero, keys are inserted at the end and reads/updates are in the current range, if non-zero, inserts/reads/updates are at a random key between 0 and the given range");
 }
 TableOptions::TableOptions(const TableOptions &other) :
     key_size(other.key_size), value_size(other.value_size),
+    random_value(other.random_value), range(other.range),
     _options(other._options) {}
 TableOptions::~TableOptions() {}
 
@@ -1376,7 +1479,7 @@ TableInternal::~TableInternal() {}
 
 WorkloadOptions::WorkloadOptions() : max_latency(0),
     report_file("workload.stat"), report_interval(0), run_time(0),
-    sample_file("sample.json"), sample_interval(0), sample_rate(1),
+    sample_file("sample.json"), sample_interval(0), sample_rate(1), warmup(0),
     _options() {
     _options.add_int("max_latency", max_latency,
       "prints warning if any latency measured exceeds this number of "
@@ -1399,6 +1502,8 @@ WorkloadOptions::WorkloadOptions() : max_latency(0),
     _options.add_int("sample_rate", sample_rate,
       "how often the latency of operations is measured. 1 for every operation, "
       "2 for every second operation, 3 for every third operation etc.");
+    _options.add_int("warmup", warmup,
+      "how long to run the workload phase before starting measurements");
 }
 
 WorkloadOptions::WorkloadOptions(const WorkloadOptions &other) :
@@ -1569,7 +1674,8 @@ int WorkloadRunner::run_all() {
 
     workgen_epoch(&_start);
     timespec end = _start + options->run_time;
-    timespec next_report = _start + options->report_interval;
+    timespec next_report = _start +
+      ((options->warmup > 0) ? options->warmup : options->report_interval);
 
     // Start all threads
     if (options->sample_interval > 0) {
@@ -1653,6 +1759,8 @@ int WorkloadRunner::run_all() {
         if (exception == NULL && !_trunners[i]._exception._str.empty())
             exception = &_trunners[i]._exception;
     }
+
+    workgen_epoch(&now);
     if (options->sample_interval > 0) {
         WT_TRET(pthread_join(monitor._handle, &status));
         if (monitor._errno != 0)
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen.h b/src/third_party/wiredtiger/bench/workgen/workgen.h
index a12e4dc4c89..2a116e1c89e 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen.h
+++ b/src/third_party/wiredtiger/bench/workgen/workgen.h
@@ -171,6 +171,8 @@ struct Context {
 struct TableOptions {
     int key_size;
     int value_size;
+    bool random_value;
+    int range;
 
     TableOptions();
     TableOptions(const TableOptions &other);
@@ -179,6 +181,8 @@ struct TableOptions {
     void describe(std::ostream &os) const {
 	os << "key_size " << key_size;
 	os << ", value_size " << value_size;
+	os << ", random_value " << random_value;
+	os << ", range " << range;
     }
 
     std::string help() const { return _options.help(); }
@@ -210,16 +214,46 @@ struct Table {
 #endif
 };
 
+struct ParetoOptions {
+    int param;
+    double range_low;
+    double range_high;
+    ParetoOptions(int param = 0);
+    ParetoOptions(const ParetoOptions &other);
+    ~ParetoOptions();
+
+    void describe(std::ostream &os) const {
+	os << "parameter " << param;
+	if (range_low != 0.0 || range_high != 1.0) {
+	    os << "range [" << range_low << "-" << range_high << "]";
+	}
+    }
+
+    std::string help() const { return _options.help(); }
+    std::string help_description(const char *option_name) const {
+	return _options.help_description(option_name); }
+    std::string help_type(const char *option_name) const {
+	return _options.help_type(option_name); }
+
+    static ParetoOptions DEFAULT;
+private:
+    OptionsList _options;
+};
+
 struct Key {
     typedef enum {
 	KEYGEN_AUTO, KEYGEN_APPEND, KEYGEN_PARETO, KEYGEN_UNIFORM } KeyType;
     KeyType _keytype;
     int _size;
+    ParetoOptions _pareto;
 
     /* XXX specify more about key distribution */
-    Key() : _keytype(KEYGEN_AUTO), _size(0) {}
-    Key(KeyType keytype, int size) : _keytype(keytype), _size(size) {}
-    Key(const Key &other) : _keytype(other._keytype), _size(other._size) {}
+    Key() : _keytype(KEYGEN_AUTO), _size(0), _pareto(ParetoOptions::DEFAULT) {}
+    Key(KeyType keytype, int size=0,
+      const ParetoOptions &pareto=ParetoOptions::DEFAULT) :
+	_keytype(keytype), _size(size), _pareto(pareto) {}
+    Key(const Key &other) : _keytype(other._keytype), _size(other._size),
+	_pareto(other._pareto) {}
     ~Key() {}
 
     void describe(std::ostream &os) const {
@@ -273,8 +307,9 @@ struct Operation {
     Operation& operator=(const Operation &other);
     void create_all();
     void get_static_counts(Stats &stats, int multiplier);
-    void kv_compute_max(bool);
-    void kv_gen(bool, uint64_t, char *) const;
+    void kv_compute_max(bool iskey, bool has_random);
+    void kv_gen(bool iskey, uint32_t randomizer, uint64_t n,
+      char *result) const;
     void kv_size_buffer(bool iskey, size_t &size) const;
     void size_check() const;
 #endif
@@ -365,6 +400,7 @@ struct WorkloadOptions {
     int sample_interval;
     int sample_rate;
     std::string sample_file;
+    int warmup;
 
     WorkloadOptions();
     WorkloadOptions(const WorkloadOptions &other);
diff --git a/src/third_party/wiredtiger/bench/workgen/workgen_int.h b/src/third_party/wiredtiger/bench/workgen/workgen_int.h
index a8d008a3bc5..c7a5a7121e9 100644
--- a/src/third_party/wiredtiger/bench/workgen/workgen_int.h
+++ b/src/third_party/wiredtiger/bench/workgen/workgen_int.h
@@ -36,6 +36,8 @@ extern "C" {
 }
 #endif
 
+#define	RANDOMIZER_SIZE  5    /* ":000:" prefix */
+
 namespace workgen {
 
 // A 'tint' or ('table integer') is a unique small value integer
@@ -126,7 +128,7 @@ struct ThreadRunner {
     int run();
 
     void op_create_all(Operation *, size_t &keysize, size_t &valuesize);
-    uint64_t op_get_key_recno(Operation *, tint_t tint);
+    uint64_t op_get_key_recno(Operation *, uint64_t range, tint_t tint);
     void op_get_static_counts(Operation *, Stats &, int);
     int op_run(Operation *);
 
@@ -153,11 +155,18 @@ struct Monitor {
     int run();
 };
 
+struct TableRuntime {
+    uint64_t _max_recno;                           // highest recno allocated
+    bool _disjoint;                                // does key space have holes?
+
+    TableRuntime() : _max_recno(0), _disjoint(0) {}
+};
+
 struct ContextInternal {
     std::map<std::string, tint_t> _tint;           // maps uri -> tint_t
     std::map<tint_t, std::string> _table_names;    // reverse mapping
-    uint64_t *_recno;                              // # entries per tint_t
-    uint32_t _recno_alloced;                       // length of allocated _recno
+    TableRuntime *_table_runtime;                  // # entries per tint_t
+    uint32_t _runtime_alloced;                     // length of _table_runtime
     tint_t _tint_last;                             // last tint allocated
     // unique id per context, to work with multiple contexts, starts at 1.
     uint32_t _context_count;
diff --git a/src/third_party/wiredtiger/bench/workgen/wtperf.py b/src/third_party/wiredtiger/bench/workgen/wtperf.py
index 3a196fe7b57..2837be6d064 100644
--- a/src/third_party/wiredtiger/bench/workgen/wtperf.py
+++ b/src/third_party/wiredtiger/bench/workgen/wtperf.py
@@ -34,7 +34,7 @@
 # See also the usage() function.
 #
 from __future__ import print_function
-import os, sys, tempfile
+import os, shutil, sys, tempfile
 
 def eprint(*args, **kwargs):
     print(*args, file=sys.stderr, **kwargs)
@@ -52,13 +52,15 @@ class Options(object):
     pass
 
 class Translator:
-    def __init__(self, filename, prefix, verbose):
+    def __init__(self, filename, prefix, verbose, homedir):
         self.filename = filename
         self.prefix = prefix
         self.verbose = verbose
+        self.homedir = homedir
         self.linenum = 0
-        self.opts = {}
-        self.used_opts = {}
+        self.opts_map = {}
+        self.opts_used = {}
+        self.options = lambda: None   # options behaves as an attribute dict
         self.has_error = False
 
     def error_file_line(self, fname, linenum, msg):
@@ -70,15 +72,17 @@ class Translator:
         self.error_file_line(self.filename, self.linenum, msg)
 
     # Report an error and unwind the stack
-    def fatal_error(self, msg, errtype):
+    def fatal_error(self, msg, errtype = 'configuration error'):
         self.error(msg)
         raise TranslateException(errtype)
 
-    supported_opt_list = [ 'compression', 'conn_config', 'icount',
-                           'key_sz', 'log_like_table',
+    supported_opt_list = [ 'close_conn', 'compression', 'compact',
+                           'conn_config', 'create', 'icount',
+                           'key_sz', 'log_like_table', 'pareto',
                            'populate_ops_per_txn', 'populate_threads',
-                           'reopen_connection',
-                           'table_config', 'table_count',
+                           'random_range', 'random_value', 'range_partition',
+                           'readonly', 'reopen_connection', 'run_ops',
+                           'sess_config', 'table_config', 'table_count',
                            'threads', 'transaction_config', 'value_sz' ]
 
     def set_opt(self, optname, val):
@@ -98,23 +102,32 @@ class Translator:
                 v = int(val)   # it might be an integer
             except ValueError:
                 v = val        # it's a string after all
-        self.opts[optname] = OptionValue(v, self.filename, self.linenum)
+        self.opts_map[optname] = OptionValue(v, self.filename, self.linenum)
 
-    def get_opt(self, optname, dfault):
-        if optname in self.opts:
-            ret = self.opts[optname]
+    def _get_opt(self, optname, dfault):
+        if optname in self.opts_map:
+            ret = self.opts_map[optname]
             self.filename = ret.filename
             self.linenum = ret.linenum
-            self.used_opts[optname] = 1
+            self.opts_used[optname] = 1
             return ret.value
         else:
             return dfault
 
+    def get_string_opt(self, optname, dfault):
+        v = self._get_opt(optname, dfault)
+        setattr(self.options, optname, v)
+        return v
+
     def get_int_opt(self, optname, dfault):
-        return self.get_opt(optname, dfault) + 0
+        v = self._get_opt(optname, dfault) + 0
+        setattr(self.options, optname, v)
+        return v
 
     def get_boolean_opt(self, optname, dfault):
-        return not not self.get_opt(optname, dfault)
+        v = not not self._get_opt(optname, dfault)
+        setattr(self.options, optname, v)
+        return v
 
     # Split a string 'left_side=right_side' into two parts
     def split_assign(self, s):
@@ -159,17 +172,33 @@ class Translator:
     def assign_str(self, left, right):
         return left + '=' + str(right) + '\n'
 
-    def add_operation_str(self, count, opname, multi):
+    def add_operation_str(self, count, opname, multi, pareto):
         result = ''
         tablename = 'tables[0]' if multi else 'table'
         if count > 1:
             result += str(count) + ' * '
         if count > 0:
-            result += 'Operation(Operation.' + opname + ', ' + \
-                      tablename + ') + \\\n'
+            result += 'Operation(Operation.' + opname + ', ' + tablename
+            if pareto > 0:
+                result += ', Key(Key.KEYGEN_PARETO, 0, ParetoOptions(' + \
+                          str(pareto) + '))'
+            elif opname == 'OP_INSERT' and self.options.random_range != 0:
+                result += ', Key(Key.KEYGEN_UNIFORM)'
+            result += ') + \\\n'
             result += '      '
         return result
 
+    def copy_config(self):
+        # Note: If we add the capability of setting options on the command
+        # line, we won't be able to do a simple copy.
+        config_save = os.path.join(self.homedir, 'CONFIG.wtperf')
+        suffix = 0
+        while os.path.exists(config_save):
+            suffix += 1
+            config_save = os.path.join(self.homedir, \
+                                       'CONFIG.wtperf.' + str(suffix))
+        shutil.copyfile(self.filename, config_save)
+
     # Wtperf's throttle is based on the number of regular operations,
     # not including log_like operations.  Workgen counts all operations,
     # it doesn't treat log operations any differently.  Adjust the throttle
@@ -191,11 +220,13 @@ class Translator:
         return (new_throttle, comment)
 
     def parse_threads(self, threads_config):
+        opts = self.options
         tdecls = ''
         tlist = self.split_config_parens(threads_config)
         table_count = self.get_int_opt('table_count', 1)
         log_like_table = self.get_boolean_opt('log_like_table', False)
-        txn_config = self.get_opt('transaction_config', '')
+        txn_config = self.get_string_opt('transaction_config', '')
+        run_ops = self.get_int_opt('run_ops', -1)
         if log_like_table:
             tdecls += 'log_name = "table:log"\n'
             tdecls += 's.create(log_name, "key_format=S,value_format=S," +' + \
@@ -219,6 +250,7 @@ class Translator:
             topts.throttle = 0
             topts.update = 0
             topts.updates = 0
+            topts.random_range = 0
 
             for o in self.split_config_parens(t):
                 (k, v) = self.split_assign(o)
@@ -239,19 +271,41 @@ class Translator:
             if topts.inserts + topts.reads + topts.updates == 0:
                 self.fatal_error('need read/insert/update/...',
                                  'thread config error')
+
             tdecls += 'ops = '
-            tdecls += self.add_operation_str(topts.inserts, 'OP_INSERT', multi)
-            tdecls += self.add_operation_str(topts.reads, 'OP_SEARCH', multi)
-            tdecls += self.add_operation_str(topts.updates, 'OP_UPDATE', multi)
+            tdecls += self.add_operation_str(topts.inserts, 'OP_INSERT',
+                multi, opts.pareto)
+            tdecls += self.add_operation_str(topts.reads, 'OP_SEARCH',
+                multi, opts.pareto)
+            tdecls += self.add_operation_str(topts.updates, 'OP_UPDATE',
+                multi, opts.pareto)
             tdecls = tdecls.rstrip(' \n\\+') + '\n'
+            range_partition = opts.range_partition
+
+            # Pareto with multiple tables is handled in op_multi_table.
             if multi:
-                tdecls += 'ops = op_multi_table(ops, tables)\n'
+                tdecls += 'ops = op_multi_table(ops, tables, ' + \
+                          str(range_partition) + ')\n'
             if topts.ops_per_txn > 0:
                 tdecls += 'ops = op_group_transaction(ops, ' + \
                           str(topts.ops_per_txn) + ', "' + txn_config + '")\n'
             if log_like_table:
                 tdecls += 'ops = op_log_like(ops, log_table, ' + \
                           str(topts.ops_per_txn) + ')\n'
+            if run_ops != -1:
+                if len(tlist) > 1:
+                    self.fatal_error('run_ops currently supported with a '
+                                     'single type of thread')
+                tdecls += '\n'
+                if multi:
+                    tdecls += \
+                        '# Note that op_multi_table has already multiplied\n' +\
+                        '# the number of operations by the number of tables.\n'
+                tdecls += 'ops = ops * (' + \
+                          str(run_ops) + ' / (' + str(topts.count) + \
+                          ' * table_count))' + \
+                          '     # run_ops = ' + str(run_ops) + \
+                          ', thread.count = ' + str(topts.count) + '\n'
             tdecls += thread_name + ' = Thread(ops)\n'
             if topts.throttle > 0:
                 (throttle, comment) = self.calc_throttle(topts, log_like_table)
@@ -273,6 +327,134 @@ class Translator:
             # An error has already been reported
             return None
 
+    def check_divisibility(self, icount, random_range, divisor_name, divisor):
+        if (icount + random_range) % divisor != 0:
+            if random_range == 0:
+                dividend = 'icount'
+            else:
+                dividend = '(icount + random_range)'
+                self.fatal_error(dividend + ' is not evenly divisible by ' +
+                                 divisor_name + ', this is not handled ' +
+                                 'precisely by wtperf.py')
+
+    def translate_table_create(self):
+        opts = self.options
+        s = ''
+        s += 'wtperf_table_config = "key_format=S,value_format=S,type=lsm," +\\\n'
+        s += '    "exclusive=true,allocation_size=4kb," +\\\n'
+        s += '    "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"\n'
+        if opts.compression != '':
+            s += 'compress_table_config = "block_compressor=' + opts.compression + ',"\n'
+        else:
+            s += 'compress_table_config = ""\n'
+        s += 'table_config = "' + opts.table_config + '"\n'
+        s += 'tables = []\n'
+        s += 'table_count = ' + str(opts.table_count) + '\n'
+        if opts.table_count == 1:
+            s += 'tname = "table:test.wt"\n'
+            indent = ''
+        else:
+            s += 'for i in range(0, table_count):\n'
+            s += '    tname = "table:test" + str(i) + ".wt"\n'
+            indent = '    '
+
+        s += indent + 'table = Table(tname)\n'
+        s += indent + 's.create(tname, wtperf_table_config +\\\n'
+        s += indent + '         compress_table_config + table_config)\n'
+        s += indent + 'table.options.key_size = ' + str(opts.key_sz) + '\n'
+        s += indent + 'table.options.value_size = ' + str(opts.value_sz) + '\n'
+        if opts.random_value:
+            s += indent + 'table.options.random_value = True\n'
+        if opts.random_range != 0:
+            # In wtperf, the icount plus random_range is the key range
+            table_range = (opts.random_range + opts.icount) / opts.table_count
+            s += indent + 'table.options.range = ' + str(table_range) + '\n'
+        s += indent + 'tables.append(table)\n'
+        return s
+
+    def translate_populate(self):
+        opts = self.options
+        s = '\n'
+        if opts.icount == 0:
+            if opts.populate_threads != 0:
+                self.error("populate_threads > 0, icount == 0")
+            return ''
+        if opts.populate_threads == 0:
+            self.fatal_error('icount != 0 and populate_threads == 0: ' +\
+                             'cannot populate entries with no threads')
+        s += 'populate_threads = ' + str(opts.populate_threads) + '\n'
+        s += 'icount = ' + str(opts.icount) + '\n'
+        need_ops_per_thread = True
+
+        # Since we're separating the populating by table, and also
+        # into multiple threads, we currently require that
+        # (icount + random_range) is evenly divisible by table count
+        # and by number of populating threads.  It's possible to handle
+        # the cases when this is not true, but it hardly seems worth
+        # the extra complexity.  Also, these could be made into warnings,
+        # and actually create fewer entries than icount, but that could be
+        # confusing.
+        self.check_divisibility(opts.icount, opts.random_range,
+                                'table_count', opts.table_count)
+        self.check_divisibility(opts.icount, opts.random_range,
+                                '(populate_threads * table_count)',
+                                opts.populate_threads * opts.table_count)
+
+        if opts.table_count == 1:
+            s += 'pop_ops = Operation(Operation.OP_INSERT, table)\n'
+        elif opts.range_partition and opts.random_range > 0:
+            # Populating using a range partition is complex enough
+            # to handle in its own function.  It does all the operations
+            # for the thread, so we don't need a multiplier at the end.
+            need_ops_per_thread = False
+
+            s += 'random_range = ' + str(opts.random_range) + '\n'
+            s += 'pop_ops = Operation(Operation.OP_INSERT, tables[0])\n'
+            s += 'pop_ops = op_populate_with_range(pop_ops, tables, ' + \
+                 'icount, random_range, populate_threads)\n'
+        else:
+            s += '# There are multiple tables to be filled during populate,\n'
+            s += '# the icount is split between them all.\n'
+            s += 'pop_ops = Operation(Operation.OP_INSERT, tables[0])\n'
+            s += 'pop_ops = op_multi_table(pop_ops, tables)\n'
+
+        if need_ops_per_thread:
+            s += 'nops_per_thread = icount / (populate_threads * table_count)\n'
+            op_mult = ' * nops_per_thread'
+        else:
+            op_mult = ''
+
+        pop_per_txn = opts.populate_ops_per_txn
+        if pop_per_txn > 0:
+            s += 'pop_ops = op_group_transaction(pop_ops, ' + \
+                 str(pop_per_txn) + ', "' + opts.transaction_config + '")\n'
+        s += 'pop_thread = Thread(pop_ops' + op_mult + ')\n'
+        s += 'pop_workload = Workload(context, populate_threads * pop_thread)\n'
+        if self.verbose > 0:
+            s += 'print("populate:")\n'
+        s += 'pop_workload.run(conn)\n'
+
+        # If configured, compact to allow LSM merging to complete.  We
+        # set an unlimited timeout because if we close the connection
+        # then any in-progress compact/merge is aborted.
+        if opts.compact:
+            if opts.async_threads == 0:
+                self.fatal_error('unexpected value for async_threads')
+            s += '\n'
+            if self.verbose > 0:
+                s += 'print("compact after populate:")\n'
+            s += 'import time\n'
+            s += 'start_time = time.time()\n'
+            s += 'async_callback = WtperfAsyncCallback()\n'
+            s += 'for i in range(0, table_count):\n'
+            s += '    op = conn.async_new_op(tables[i]._uri, "timeout=0", async_callback)\n'
+            s += '    op.compact()\n'
+            s += 'conn.async_flush()\n'
+            s += 'print("compact completed in {} seconds".format(' + \
+                'time.time() - start_time))\n'
+
+        return s
+
     def translate_inner(self):
         workloadopts = ''
         with open(self.filename) as fin:
@@ -286,19 +468,40 @@ class Translator:
                     continue
                 (key, val) = self.split_assign(line)
                 if key in [ 'max_latency', 'report_file', 'report_interval',
-                            'run_time', 'sample_interval', 'sample_rate' ]:
+                            'run_time', 'sample_interval', 'sample_rate',
+                            'warmup' ]:
                     workloadopts += 'workload.options.' + key + '=' + val + '\n'
                 else:
                     self.set_opt(key, val)
 
-        table_count = self.get_int_opt('table_count', 1)
-        conn_config = self.get_opt('conn_config', '')
-        table_config = self.get_opt('table_config', '')
-        key_sz = self.get_int_opt('key_sz', 20)
-        value_sz = self.get_int_opt('value_sz', 100)
-        reopen = self.get_boolean_opt('reopen_connection', False)
-        compression = self.get_opt('compression', '')
-        txn_config = self.get_opt('transaction_config', '')
+        conn_config = self.get_string_opt('conn_config', '')
+        sess_config = self.get_string_opt('sess_config', '')
+        create = self.get_boolean_opt('create', True)
+        reopen_connection = self.get_boolean_opt('reopen_connection', False)
+        readonly = self.get_boolean_opt('readonly', False)
+        close_conn = self.get_boolean_opt('close_conn', True)
+        compression = self.get_string_opt('compression', '')
+        self.get_int_opt('table_count', 1)
+        self.get_string_opt('table_config', '')
+        self.get_int_opt('key_sz', 20)
+        self.get_int_opt('value_sz', 100)
+        self.get_int_opt('icount', 0)
+        self.get_int_opt('populate_threads', 1)
+        self.get_int_opt('populate_ops_per_txn', 0)
+        self.get_boolean_opt('range_partition', False)
+        self.get_int_opt('random_range', 0)
+        self.get_boolean_opt('random_value', False)
+        self.get_string_opt('transaction_config', '')
+        self.get_boolean_opt('compact', False)
+        self.get_int_opt('async_threads', 0)
+        self.get_int_opt('pareto', 0)
+        opts = self.options
+        if opts.range_partition and opts.random_range == 0:
+            self.fatal_error('range_partition requires random_range to be set')
+        if opts.random_range > 0 and not opts.range_partition and \
+           opts.table_count != 1:
+            self.fatal_error('random_range and multiple tables without ' + \
+                             'range_partition is not supported')
 
         s = '#/usr/bin/env python\n'
         s += '# generated from ' + self.filename + '\n'
@@ -307,93 +510,75 @@ class Translator:
         s += 'from wiredtiger import *\n'
         s += 'from workgen import *\n'
         s += '\n'
+        async_config = ''
+        if opts.compact and opts.async_threads == 0:
+            opts.async_threads = 2;
+        if opts.async_threads > 0:
+            # Assume the default of 1024 for the max ops, although we
+            # could bump that up to 4096 if needed.
+            async_config = ',async=(enabled=true,threads=' + \
+                str(opts.async_threads) + ')'
+            s += '# this can be further customized\n'
+            s += 'class WtperfAsyncCallback(AsyncCallback):\n'
+            s += '    def __init__(self):\n'
+            s += '        pass\n'
+            s += '    def notify_error(self, key, value, optype, desc):\n'
+            s += '        print("ERROR: async notify(" + str(key) + "," + \\\n'
+            s += '             str(value) + "," + str(optype) + "): " + desc)\n'
+            s += '    def notify(self, op, op_ret, flags):\n'
+            s += '        if op_ret != 0:\n'
+            s += '            self.notify_error(op._key, op._value,\\\n'
+            s += '                op._optype, wiredtiger_strerror(op_ret))\n'
+            s += '        return op_ret\n'
+            s += '\n'
         s += 'context = Context()\n'
-        s += 'conn_config = "' + conn_config + '"\n'
+        extra_config = ''
+        s += 'conn_config = ""\n'
+
+        if async_config != '':
+            s += 'conn_config += ",' + async_config + '"  # async config\n'
+        if conn_config != '':
+            s += 'conn_config += ",' + conn_config + '"   # explicitly added\n'
         if compression != '':
             s += 'conn_config += extensions_config(["compressors/' + \
-                 compression + '"])\n'
+                compression + '"])\n'
             compression = 'block_compressor=' + compression + ','
-        s += 'conn = wiredtiger_open("WT_TEST", "create," + conn_config)\n'
-        s += 's = conn.open_session()\n'
+        s += 'conn = wiredtiger_open("' + self.homedir + \
+             '", "create," + conn_config)\n'
+        s += 's = conn.open_session("' + sess_config + '")\n'
         s += '\n'
-        s += 'wtperf_table_config = "key_format=S,value_format=S,type=lsm," +\\\n'
-        s += '    "exclusive=true,allocation_size=4kb," +\\\n'
-        s += '    "internal_page_max=64kb,leaf_page_max=4kb,split_pct=100,"\n'
-        s += 'compress_table_config = "' + compression + '"\n'
-        s += 'table_config = "' + table_config + '"\n'
-        if table_count == 1:
-            s += 'tname = "file:test.wt"\n'
-            s += 's.create(tname, wtperf_table_config +\\\n'
-            s += '         compress_table_config + table_config)\n'
-            s += 'table = Table(tname)\n'
-            s += 'table.options.key_size = ' + str(key_sz) + '\n'
-            s += 'table.options.value_size = ' + str(value_sz) + '\n'
-        else:
-            s += 'table_count = ' + str(table_count) + '\n'
-            s += 'tables = []\n'
-            s += 'for i in range(0, table_count):\n'
-            s += '    tname = "file:test" + str(i) + ".wt"\n'
-            s += '    s.create(tname, ' + \
-                 'wtperf_table_config + ' + \
-                 'compress_table_config + table_config)\n'
-            s += '    t = Table(tname)\n'
-            s += '    t.options.key_size = ' + str(key_sz) + '\n'
-            s += '    t.options.value_size = ' + str(value_sz) + '\n'
-            s += '    tables.append(t)\n'
-            s += '\n'
-
-        icount = self.get_int_opt('icount', 0)
-        pop_thread = self.get_int_opt('populate_threads', 1)
-        pop_per_txn = self.get_int_opt('populate_ops_per_txn', 0)
-        if icount != 0:
-            if pop_thread == 0:
-                self.fatal_error('icount != 0 and populate_threads == 0: ' +\
-                                 'cannot populate entries with no threads')
-            elif pop_thread == 1:
-                mult = ''
-            else:
-                mult = str(pop_thread) + ' * '
-
-            # if there are multiple tables to be filled during populate,
-            # the icount is split between them all.
-            nops_per_thread = icount / (pop_thread * table_count)
-            if table_count == 1:
-                s += 'pop_ops = Operation(Operation.OP_INSERT, table)\n'
-            else:
-                s += 'pop_ops = Operation(Operation.OP_INSERT, tables[0])\n'
-                s += 'pop_ops = op_multi_table(pop_ops, tables)\n'
-            if pop_per_txn > 0:
-                s += 'pop_ops = op_group_transaction(pop_ops, ' + \
-                          str(pop_per_txn) + ', "' + txn_config + '")\n'
-            s += 'pop_thread = Thread(pop_ops * ' + str(nops_per_thread) + ')\n'
-            s += 'pop_workload = Workload(context, ' + mult + 'pop_thread)\n'
-            if self.verbose > 0:
-                s += 'print("populate:")\n'
-            s += 'pop_workload.run(conn)\n'
-        else:
-            if self.get_int_opt('populate_threads', 0) != 0:
-                self.error("populate_threads > 0, icount == 0")
+        s += self.translate_table_create()
+        if create:
+            s += self.translate_populate()
 
-        thread_config = self.get_opt('threads', '')
+        thread_config = self.get_string_opt('threads', '')
         if thread_config != '':
             (t_create, t_var) = self.parse_threads(thread_config)
             s += '\n' + t_create
-            if reopen:
+            if reopen_connection:
                 s += '\n# reopen the connection\n'
                 s += 'conn.close()\n'
+                if readonly:
+                    'conn_config += ",readonly=true"\n'
                 s += 'conn = wiredtiger_open(' + \
-                     '"WT_TEST", "create," + conn_config)\n'
+                     '"' + self.homedir + '", "create," + conn_config)\n'
                 s += '\n'
             s += 'workload = Workload(context, ' + t_var + ')\n'
             s += workloadopts
             if self.verbose > 0:
                 s += 'print("workload:")\n'
-            s += 'workload.run(conn)\n'
-
-        for o in self.used_opts:
-            del self.opts[o]
-        if len(self.opts) != 0:
-            self.error('internal error, options not handled: ' + str(self.opts))
+            s += 'workload.run(conn)\n\n'
+            s += 'latency_filename = "' + self.homedir + '/latency.out"\n'
+            s += 'latency.workload_latency(workload, latency_filename)\n'
+
+        if close_conn:
+            s += 'conn.close()\n'
+
+        for o in self.opts_used:
+            del self.opts_map[o]
+        if len(self.opts_map) != 0:
+            self.error('internal error, options not handled: ' +
+                       str(self.opts_map))
         return s
 
 def usage():
@@ -416,13 +601,17 @@ prefix = (
   'sys.path.append("' + runner_dir + '")\n\n')
 
 exit_status = 0
+homedir = 'WT_TEST'
 for arg in sys.argv[1:]:
-    if arg == '--python':
+    if arg == '--pydebug':
+        import pdb
+        pdb.set_trace()
+    elif arg == '--python':
         py_out = True
     elif arg == '--verbose' or arg == '-v':
         verbose += 1
     elif arg.endswith('.wtperf'):
-        translator = Translator(arg, prefix, verbose)
+        translator = Translator(arg, prefix, verbose, homedir)
         pysrc = translator.translate()
         if translator.has_error:
             exit_status = 1
@@ -432,8 +621,20 @@ for arg in sys.argv[1:]:
             (outfd, tmpfile) = tempfile.mkstemp(suffix='.py')
             os.write(outfd, pysrc)
             os.close(outfd)
-            execfile(tmpfile)
+            # We make a copy of the configuration file in the home
+            # directory after the run, because the wiredtiger_open
+            # in the generated code will clean out the directory first.
+            raised = None
+            try:
+                execfile(tmpfile)
+            except Exception, exception:
+                raised = exception
+            if not os.path.isdir(homedir):
+                os.makedirs(homedir)
+            translator.copy_config()
             os.remove(tmpfile)
+            if raised != None:
+                raise raised
     else:
         usage()
         sys.exit(1)
diff --git a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf
index de5299bbac1..8b56a86e022 100644
--- a/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf
+++ b/src/third_party/wiredtiger/bench/wtperf/runners/500m-btree-80r20u.wtperf
@@ -11,7 +11,7 @@ compression="snappy"
 # close_conn as false allows this test to close/finish faster, but if running
 # as the set, the next test will need to run recovery.
 close_conn=false
-sess_config="isolation=snapshot
+sess_config="isolation=snapshot"
 table_count=2
 key_sz=40
 value_sz=120
diff --git a/src/third_party/wiredtiger/dist/api_data.py b/src/third_party/wiredtiger/dist/api_data.py
index dbd3dcbb233..32faec8709d 100644
--- a/src/third_party/wiredtiger/dist/api_data.py
+++ b/src/third_party/wiredtiger/dist/api_data.py
@@ -131,6 +131,20 @@ file_runtime_config = [
         do not ever evict the object's pages from cache. Not compatible with
         LSM tables; see @ref tuning_cache_resident for more information''',
         type='boolean'),
+    Config('assert', '', r'''
+        enable enhanced checking. ''',
+        type='category', subconfig= [
+        Config('commit_timestamp', 'none', r'''
+            verify that timestamps should 'always' or 'never' be used
+            on modifications with this table.  Verification is 'none'
+            if mixed update use is allowed.''',
+            choices=['always','never','none']),
+        Config('read_timestamp', 'none', r'''
+            verify that timestamps should 'always' or 'never' be used
+            on reads with this table.  Verification is 'none'
+            if mixed read use is allowed.''',
+            choices=['always','never','none'])
+        ], undoc=True),
     Config('log', '', r'''
         the transaction log configuration for this object.  Only valid if
         log is enabled in ::wiredtiger_open''',
diff --git a/src/third_party/wiredtiger/dist/flags.py b/src/third_party/wiredtiger/dist/flags.py
index 8c0448b27c1..df897bcb91e 100644
--- a/src/third_party/wiredtiger/dist/flags.py
+++ b/src/third_party/wiredtiger/dist/flags.py
@@ -23,6 +23,7 @@ flags = {
     ],
     'page_read' : [
         'READ_CACHE',
+        'READ_LOOKASIDE',
         'READ_NOTFOUND_OK',
         'READ_NO_EMPTY',
         'READ_NO_EVICT',
@@ -35,14 +36,15 @@ flags = {
         'READ_WONT_NEED',
     ],
     'rec_write' : [
-        'CHECKPOINTING',
-        'EVICTING',
-        'EVICT_IN_MEMORY',
-        'EVICT_INMEM_SPLIT',
-        'EVICT_LOOKASIDE',
-        'EVICT_SCRUB',
-        'EVICT_UPDATE_RESTORE',
-        'VISIBILITY_ERR',
+        'REC_CHECKPOINT',
+        'REC_EVICT',
+        'REC_INMEM_SPLIT',
+        'REC_IN_MEMORY',
+        'REC_LOOKASIDE',
+        'REC_SCRUB',
+        'REC_UPDATE_RESTORE',
+        'REC_VISIBILITY_ERR',
+        'REC_VISIBLE_ALL',
     ],
     'timing_stress_for_test' : [
         'TIMING_STRESS_CHECKPOINT_SLOW',
@@ -102,6 +104,7 @@ flags = {
         'CONN_CKPT_SYNC',
         'CONN_CLOSING',
         'CONN_CLOSING_NO_MORE_OPENS',
+        'CONN_EVICTION_NO_LOOKASIDE',
         'CONN_EVICTION_RUN',
         'CONN_IN_MEMORY',
         'CONN_LAS_OPEN',
diff --git a/src/third_party/wiredtiger/dist/s_define.list b/src/third_party/wiredtiger/dist/s_define.list
index dcaf975434f..b2f6cbec43e 100644
--- a/src/third_party/wiredtiger/dist/s_define.list
+++ b/src/third_party/wiredtiger/dist/s_define.list
@@ -58,6 +58,7 @@ WT_STAT_INCRV_BASE
 WT_STAT_WRITE
 WT_TIMEDIFF_US
 WT_TRET_ERROR_OK
+WT_TXN_TIMESTAMP_FLAG_CHECK
 WT_UPDATE_SIZE
 WT_WITH_LOCK_NOWAIT
 WT_WITH_LOCK_WAIT
diff --git a/src/third_party/wiredtiger/dist/stat_data.py b/src/third_party/wiredtiger/dist/stat_data.py
index 06e7dccd943..24610b9ab14 100644
--- a/src/third_party/wiredtiger/dist/stat_data.py
+++ b/src/third_party/wiredtiger/dist/stat_data.py
@@ -254,6 +254,7 @@ connection_stats = [
     CacheStat('cache_hazard_walks', 'hazard pointer check entries walked'),
     CacheStat('cache_inmem_split', 'in-memory page splits'),
     CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
+    CacheStat('cache_lookaside_entries', 'lookaside table entries', 'no_clear,no_scale'),
     CacheStat('cache_lookaside_insert', 'lookaside table insert calls'),
     CacheStat('cache_lookaside_remove', 'lookaside table remove calls'),
     CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
diff --git a/src/third_party/wiredtiger/examples/c/ex_all.c b/src/third_party/wiredtiger/examples/c/ex_all.c
index a0c6f87ceda..dcd9dd406df 100644
--- a/src/third_party/wiredtiger/examples/c/ex_all.c
+++ b/src/third_party/wiredtiger/examples/c/ex_all.c
@@ -209,9 +209,9 @@ cursor_ops(WT_SESSION *session)
 	value.size = strlen("another value");
 	cursor->set_value(cursor, &value);
 	/*! [Set the cursor's raw value] */
-	}
 
 	error_check(cursor->insert(cursor));
+	}
 
 	/*! [Return the next record] */
 	error_check(cursor->next(cursor));
diff --git a/src/third_party/wiredtiger/import.data b/src/third_party/wiredtiger/import.data
index 3ed326b1854..6c4f2ee7138 100644
--- a/src/third_party/wiredtiger/import.data
+++ b/src/third_party/wiredtiger/import.data
@@ -1,5 +1,5 @@
 {
-    "commit": "6f561957cb5606f504f9fe5a124c80386b210b1a", 
+    "commit": "0cd3d5bbd8a5c8779f1129c6754b4463403e788f", 
     "github": "wiredtiger/wiredtiger.git", 
     "vendor": "wiredtiger", 
     "branch": "mongodb-3.6"
diff --git a/src/third_party/wiredtiger/src/bloom/bloom.c b/src/third_party/wiredtiger/src/bloom/bloom.c
index a39d50e68c1..6f4050b3eb6 100644
--- a/src/third_party/wiredtiger/src/bloom/bloom.c
+++ b/src/third_party/wiredtiger/src/bloom/bloom.c
@@ -274,6 +274,7 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
 	WT_ASSERT(bloom->session, bloom->bitstring == NULL);
 
 	/* Create a cursor on the first time through. */
+	c = NULL;
 	WT_ERR(__bloom_open_cursor(bloom, NULL));
 	c = bloom->c;
 
@@ -301,6 +302,8 @@ __wt_bloom_hash_get(WT_BLOOM *bloom, WT_BLOOM_HASH *bhash)
 err:	/* Don't return WT_NOTFOUND from a failed search. */
 	if (ret == WT_NOTFOUND)
 		ret = WT_ERROR;
+	if (c != NULL)
+		(void)c->reset(c);
 	__wt_err(bloom->session, ret, "Failed lookup in bloom filter");
 	return (ret);
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_cursor.c b/src/third_party/wiredtiger/src/btree/bt_cursor.c
index 51882a7e466..ee800ca80ee 100644
--- a/src/third_party/wiredtiger/src/btree/bt_cursor.c
+++ b/src/third_party/wiredtiger/src/btree/bt_cursor.c
@@ -334,7 +334,7 @@ __cursor_col_search(
 	WT_DECL_RET;
 
 	WT_WITH_PAGE_INDEX(session,
-	    ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt));
+	    ret = __wt_col_search(session, cbt->iface.recno, leaf, cbt, false));
 	return (ret);
 }
 
@@ -348,8 +348,8 @@ __cursor_row_search(
 {
 	WT_DECL_RET;
 
-	WT_WITH_PAGE_INDEX(session,
-	    ret = __wt_row_search(session, &cbt->iface.key, leaf, cbt, insert));
+	WT_WITH_PAGE_INDEX(session, ret = __wt_row_search(
+	    session, &cbt->iface.key, leaf, cbt, insert, false));
 	return (ret);
 }
 
@@ -445,6 +445,7 @@ __wt_btcur_search(WT_CURSOR_BTREE *cbt)
 	WT_STAT_CONN_INCR(session, cursor_search);
 	WT_STAT_DATA_INCR(session, cursor_search);
 
+	WT_RET(__wt_txn_search_check(session));
 	__cursor_state_save(cursor, &state);
 
 	/*
@@ -534,6 +535,7 @@ __wt_btcur_search_near(WT_CURSOR_BTREE *cbt, int *exactp)
 	WT_STAT_CONN_INCR(session, cursor_search_near);
 	WT_STAT_DATA_INCR(session, cursor_search_near);
 
+	WT_RET(__wt_txn_search_check(session));
 	__cursor_state_save(cursor, &state);
 
 	/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_debug.c b/src/third_party/wiredtiger/src/btree/bt_debug.c
index 778adcc3dfd..f0388bd1f07 100644
--- a/src/third_party/wiredtiger/src/btree/bt_debug.c
+++ b/src/third_party/wiredtiger/src/btree/bt_debug.c
@@ -1124,6 +1124,9 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
 	case WT_REF_LOCKED:
 		state = "locked";
 		break;
+	case WT_REF_LOOKASIDE:
+		state = "lookaside";
+		break;
 	case WT_REF_MEM:
 		state = "memory";
 		break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_delete.c b/src/third_party/wiredtiger/src/btree/bt_delete.c
index 093192dbaa0..20e592d12bc 100644
--- a/src/third_party/wiredtiger/src/btree/bt_delete.c
+++ b/src/third_party/wiredtiger/src/btree/bt_delete.c
@@ -85,12 +85,6 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
 	/*
 	 * Atomically switch the page's state to lock it.  If the page is not
 	 * on-disk, other threads may be using it, no fast delete.
-	 *
-	 * Possible optimization: if the page is already deleted and the delete
-	 * is visible to us (the delete has been committed), we could skip the
-	 * page instead of instantiating it and figuring out there are no rows
-	 * in the page.  While that's a huge amount of work to no purpose, it's
-	 * unclear optimizing for overlapping range deletes is worth the effort.
 	 */
 	if (ref->state != WT_REF_DISK ||
 	    !__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_LOCKED))
@@ -164,6 +158,7 @@ __wt_delete_page_rollback(WT_SESSION_IMPL *session, WT_REF *ref)
 	for (sleep_count = yield_count = 0;;) {
 		switch (ref->state) {
 		case WT_REF_DISK:
+		case WT_REF_LOOKASIDE:
 		case WT_REF_READING:
 			WT_ASSERT(session, 0);		/* Impossible, assert */
 			break;
diff --git a/src/third_party/wiredtiger/src/btree/bt_discard.c b/src/third_party/wiredtiger/src/btree/bt_discard.c
index 806a9770057..1aae991a407 100644
--- a/src/third_party/wiredtiger/src/btree/bt_discard.c
+++ b/src/third_party/wiredtiger/src/btree/bt_discard.c
@@ -316,8 +316,14 @@ __wt_free_ref(
 	 */
 	__wt_ref_addr_free(session, ref);
 
-	/* Free any page-deleted information. */
-	if (ref->page_del != NULL) {
+	/*
+	 * Free any lookaside or page-deleted information.  We only expect a
+	 * lookaside structure for lookaside references, but can see
+	 * page-deleted information in other cases (such as WT_REF_MEM).
+	 */
+	if (ref->state == WT_REF_LOOKASIDE)
+		__wt_free(session, ref->page_las);
+	else if (ref->page_del != NULL) {
 		__wt_free(session, ref->page_del->update_list);
 		__wt_free(session, ref->page_del);
 	}
diff --git a/src/third_party/wiredtiger/src/btree/bt_handle.c b/src/third_party/wiredtiger/src/btree/bt_handle.c
index fd52c53861a..4ab88cea01e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_handle.c
+++ b/src/third_party/wiredtiger/src/btree/bt_handle.c
@@ -398,6 +398,29 @@ __btree_conf(WT_SESSION_IMPL *session, WT_CKPT *ckpt)
 	else
 		btree->checksum = CKSUM_UNCOMPRESSED;
 
+	/* Debugging information */
+	WT_RET(__wt_config_gets(session,
+	    cfg, "assert.commit_timestamp", &cval));
+	if (WT_STRING_MATCH("always", cval.str, cval.len)) {
+		FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS);
+		FLD_CLR(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER);
+	} else if (WT_STRING_MATCH("never", cval.str, cval.len)) {
+		FLD_SET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER);
+		FLD_CLR(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS);
+	} else
+		FLD_CLR(btree->assert_flags,
+		    WT_ASSERT_COMMIT_TS_ALWAYS | WT_ASSERT_COMMIT_TS_NEVER);
+	WT_RET(__wt_config_gets(session, cfg, "assert.read_timestamp", &cval));
+	if (WT_STRING_MATCH("always", cval.str, cval.len)) {
+		FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS);
+		FLD_CLR(btree->assert_flags, WT_ASSERT_READ_TS_NEVER);
+	} else if (WT_STRING_MATCH("never", cval.str, cval.len)) {
+		FLD_SET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER);
+		FLD_CLR(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS);
+	} else
+		FLD_CLR(btree->assert_flags,
+		    WT_ASSERT_READ_TS_ALWAYS | WT_ASSERT_READ_TS_NEVER);
+
 	/* Huffman encoding */
 	WT_RET(__wt_btree_huffman_open(session));
 
@@ -549,7 +572,7 @@ __wt_btree_tree_open(
 	 * the allocated copy of the disk image on return, the in-memory object
 	 * steals it.
 	 */
-	WT_ERR(__wt_page_inmem(session, NULL, dsk.data, dsk.memsize,
+	WT_ERR(__wt_page_inmem(session, NULL, dsk.data,
 	    WT_DATA_IN_ITEM(&dsk) ?
 	    WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page));
 	dsk.mem = NULL;
diff --git a/src/third_party/wiredtiger/src/btree/bt_ovfl.c b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
index ebd0eb0cb71..d65073a398f 100644
--- a/src/third_party/wiredtiger/src/btree/bt_ovfl.c
+++ b/src/third_party/wiredtiger/src/btree/bt_ovfl.c
@@ -147,7 +147,7 @@ err:	__wt_scr_free(session, &tmp);
  */
 int
 __wt_ovfl_remove(WT_SESSION_IMPL *session,
-    WT_PAGE *page, WT_CELL_UNPACK *unpack, bool checkpoint)
+    WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting)
 {
 	/*
 	 * This function solves two problems in reconciliation.
@@ -188,7 +188,7 @@ __wt_ovfl_remove(WT_SESSION_IMPL *session,
 	 * We only have to do this for checkpoints: in any eviction mode, there
 	 * can't be threads sitting in our update lists.
 	 */
-	if (checkpoint)
+	if (!evicting)
 		WT_RET(__ovfl_cache(session, page, unpack));
 
 	/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_page.c b/src/third_party/wiredtiger/src/btree/bt_page.c
index 5316b19a41e..d3df9f6bf78 100644
--- a/src/third_party/wiredtiger/src/btree/bt_page.c
+++ b/src/third_party/wiredtiger/src/btree/bt_page.c
@@ -127,8 +127,8 @@ err:			if ((pindex = WT_INTL_INDEX_GET_SAFE(page)) != NULL) {
  *	Build in-memory page information.
  */
 int
-__wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
-    const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep)
+__wt_page_inmem(WT_SESSION_IMPL *session,
+    WT_REF *ref, const void *image, uint32_t flags, WT_PAGE **pagep)
 {
 	WT_DECL_RET;
 	WT_PAGE *page;
@@ -196,8 +196,13 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
 	 * Track the memory allocated to build this page so we can update the
 	 * cache statistics in a single call. If the disk image is in allocated
 	 * memory, start with that.
+	 *
+	 * Accounting is based on the page-header's in-memory disk size instead
+	 * of the buffer memory used to instantiate the page image even though
+	 * the values might not match exactly, because that's the only value we
+	 * have when discarding the page image and accounting needs to match.
 	 */
-	size = LF_ISSET(WT_PAGE_DISK_ALLOC) ? memsize : 0;
+	size = LF_ISSET(WT_PAGE_DISK_ALLOC) ? dsk->mem_size : 0;
 
 	switch (page->type) {
 	case WT_PAGE_COL_FIX:
@@ -218,9 +223,10 @@ __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref,
 	WT_ILLEGAL_VALUE_ERR(session);
 	}
 
-	/* Update the page's in-memory size and the cache statistics. */
+	/* Update the page's cache statistics. */
 	__wt_cache_page_inmem_incr(session, page, size);
-	__wt_cache_page_image_incr(session, dsk->mem_size);
+	if (LF_ISSET(WT_PAGE_DISK_ALLOC))
+		__wt_cache_page_image_incr(session, dsk->mem_size);
 
 	/* Link the new internal page to the parent. */
 	if (ref != NULL) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_random.c b/src/third_party/wiredtiger/src/btree/bt_random.c
index f28c4e10594..268b040bd6e 100644
--- a/src/third_party/wiredtiger/src/btree/bt_random.c
+++ b/src/third_party/wiredtiger/src/btree/bt_random.c
@@ -231,15 +231,17 @@ restart:	/*
 		for (i = 0; i < entries; ++i) {
 			descent =
 			    pindex->index[__wt_random(&session->rnd) % entries];
-			if (descent->state == WT_REF_MEM ||
-			    descent->state == WT_REF_DISK)
+			if (descent->state == WT_REF_DISK ||
+			    descent->state == WT_REF_LOOKASIDE ||
+			    descent->state == WT_REF_MEM)
 				break;
 		}
 		if (i == entries)
 			for (i = 0; i < entries; ++i) {
 				descent = pindex->index[i];
-				if (descent->state == WT_REF_MEM ||
-				    descent->state == WT_REF_DISK)
+				if (descent->state == WT_REF_DISK ||
+				    descent->state == WT_REF_LOOKASIDE ||
+				    descent->state == WT_REF_MEM)
 					break;
 			}
 		if (i == entries || descent == NULL) {
diff --git a/src/third_party/wiredtiger/src/btree/bt_read.c b/src/third_party/wiredtiger/src/btree/bt_read.c
index edab3c8c217..ab8a8d7916b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_read.c
+++ b/src/third_party/wiredtiger/src/btree/bt_read.c
@@ -8,72 +8,8 @@
 
 #include "wt_internal.h"
 
-static void __btree_verbose_lookaside_read(WT_SESSION_IMPL *);
-
-/*
- * __wt_las_remove_block --
- *	Remove all records matching a key prefix from the lookaside store.
- */
-int
-__wt_las_remove_block(WT_SESSION_IMPL *session,
-    WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size)
-{
-	WT_DECL_RET;
-	WT_ITEM las_addr, las_key, las_timestamp;
-	uint64_t las_counter, las_txnid, remove_cnt;
-	uint32_t las_id;
-	int exact;
-
-	remove_cnt = 0;
-
-	/*
-	 * Search for the block's unique prefix and step through all matching
-	 * records, removing them.
-	 */
-	las_addr.data = addr;
-	las_addr.size = addr_size;
-	las_key.size = 0;
-	las_timestamp.size = 0;
-	cursor->set_key(cursor, btree_id, &las_addr,
-	    (uint64_t)0, (uint32_t)0, &las_timestamp, &las_key);
-	if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0)
-		ret = cursor->next(cursor);
-	for (; ret == 0; ret = cursor->next(cursor)) {
-		WT_ERR(cursor->get_key(cursor, &las_id, &las_addr, &las_counter,
-		    &las_txnid, &las_timestamp, &las_key));
-
-		/*
-		 * Confirm the search using the unique prefix; if not a match,
-		 * we're done searching for records for this page.
-		 */
-		 if (las_id != btree_id ||
-		     las_addr.size != addr_size ||
-		     memcmp(las_addr.data, addr, addr_size) != 0)
-			break;
-
-		/*
-		 * Cursor opened overwrite=true: won't return WT_NOTFOUND should
-		 * another thread remove the record before we do, and the cursor
-		 * remains positioned in that case.
-		 */
-		WT_ERR(cursor->remove(cursor));
-		++remove_cnt;
-	}
-	WT_ERR_NOTFOUND_OK(ret);
-
-err:	/*
-	 * If there were races to remove records, we can over-count.  All
-	 * arithmetic is signed, so underflow isn't fatal, but check anyway so
-	 * we don't skew low over time.
-	 */
-	if (remove_cnt > S2C(session)->las_record_cnt)
-		S2C(session)->las_record_cnt = 0;
-	else if (remove_cnt > 0)
-		(void)__wt_atomic_sub64(
-		    &S2C(session)->las_record_cnt, remove_cnt);
-
-	return (ret);
-}
+static void __btree_verbose_lookaside_read(
+		WT_SESSION_IMPL *, uint32_t, uint64_t);
 
 /*
  * __col_instantiate --
@@ -88,13 +24,17 @@ __col_instantiate(WT_SESSION_IMPL *session,
 
 	page = ref->page;
 
-	/* Discard any of the updates we don't need. */
+	/*
+	 * Discard any of the updates we don't need.
+	 *
+	 * Just free the memory: it hasn't been accounted for on the page yet.
+	 */
 	if (updlist->next != NULL &&
 	    (upd = __wt_update_obsolete_check(session, page, updlist)) != NULL)
-		__wt_update_obsolete_free(session, page, upd);
+		__wt_free_update_list(session, upd);
 
 	/* Search the page and add updates. */
-	WT_RET(__wt_col_search(session, recno, ref, cbt));
+	WT_RET(__wt_col_search(session, recno, ref, cbt, true));
 	WT_RET(__wt_col_modify(
 	    session, cbt, recno, NULL, updlist, WT_UPDATE_INVALID, false));
 	return (0);
@@ -113,13 +53,17 @@ __row_instantiate(WT_SESSION_IMPL *session,
 
 	page = ref->page;
 
-	/* Discard any of the updates we don't need. */
+	/*
+	 * Discard any of the updates we don't need.
+	 *
+	 * Just free the memory: it hasn't been accounted for on the page yet.
+	 */
 	if (updlist->next != NULL &&
 	    (upd = __wt_update_obsolete_check(session, page, updlist)) != NULL)
-		__wt_update_obsolete_free(session, page, upd);
+		__wt_free_update_list(session, upd);
 
 	/* Search the page and add updates. */
-	WT_RET(__wt_row_search(session, key, ref, cbt, true));
+	WT_RET(__wt_row_search(session, key, ref, cbt, true, true));
 	WT_RET(__wt_row_modify(
 	    session, cbt, key, NULL, updlist, WT_UPDATE_INVALID, false));
 	return (0);
@@ -130,23 +74,21 @@ __row_instantiate(WT_SESSION_IMPL *session,
  *	Instantiate lookaside update records in a recently read page.
  */
 static int
-__las_page_instantiate(WT_SESSION_IMPL *session,
-    WT_REF *ref, uint32_t read_id, const uint8_t *addr, size_t addr_size)
+__las_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t btree_id)
 {
 	WT_CURSOR *cursor;
 	WT_CURSOR_BTREE cbt;
 	WT_DECL_ITEM(current_key);
 	WT_DECL_RET;
-	WT_DECL_TIMESTAMP(timestamp)
-	WT_ITEM las_addr, las_key, las_timestamp, las_value;
+	WT_ITEM las_key, las_timestamp, las_value;
 	WT_PAGE *page;
 	WT_UPDATE *first_upd, *last_upd, *upd;
 	size_t incr, total_incr;
-	uint64_t current_recno, las_counter, las_txnid, recno, upd_txnid;
+	uint64_t current_recno, las_counter, las_pageid, las_txnid, recno;
 	uint32_t las_id, session_flags;
+	const uint8_t *p;
 	uint8_t upd_type;
 	int exact;
-	const uint8_t *p;
 
 	cursor = NULL;
 	page = ref->page;
@@ -174,47 +116,29 @@ __las_page_instantiate(WT_SESSION_IMPL *session,
 	 * Search for the block's unique prefix, stepping through any matching
 	 * records.
 	 */
-	las_addr.data = addr;
-	las_addr.size = addr_size;
-	las_timestamp.size = 0;
-	cursor->set_key(cursor, read_id, &las_addr,
-	    (uint64_t)0, (uint32_t)0, &las_timestamp, &las_key);
+	cursor->set_key(cursor,
+	    btree_id, ref->page_las->las_pageid, (uint64_t)0, &las_key);
 	if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0)
 		ret = cursor->next(cursor);
 	for (; ret == 0; ret = cursor->next(cursor)) {
-		WT_ERR(cursor->get_key(cursor, &las_id, &las_addr, &las_counter,
-		    &las_txnid, &las_timestamp, &las_key));
+		WT_ERR(cursor->get_key(cursor,
+		    &las_id, &las_pageid, &las_counter, &las_key));
 
 		/*
 		 * Confirm the search using the unique prefix; if not a match,
 		 * we're done searching for records for this page.
 		 */
-		if (las_id != read_id ||
-		    las_addr.size != addr_size ||
-		    memcmp(las_addr.data, addr, addr_size) != 0)
+		if (las_id != btree_id ||
+		    las_pageid != ref->page_las->las_pageid)
 			break;
 
-		/*
-		 * If the on-page value has become globally visible, this record
-		 * is no longer needed.
-		 *
-		 * Copy the timestamp from the cursor to avoid unaligned reads.
-		 */
-#ifdef HAVE_TIMESTAMPS
-		WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE);
-		memcpy(&timestamp, las_timestamp.data, las_timestamp.size);
-#endif
-		if (__wt_txn_visible_all(
-		    session, las_txnid, WT_TIMESTAMP_NULL(&timestamp)))
-			continue;
-
 		/* Allocate the WT_UPDATE structure. */
 		WT_ERR(cursor->get_value(cursor,
-		    &upd_txnid, &las_timestamp, &upd_type, &las_value));
+		    &las_txnid, &las_timestamp, &upd_type, &las_value));
 		WT_ERR(__wt_update_alloc(
 		    session, &las_value, &upd, &incr, upd_type));
 		total_incr += incr;
-		upd->txnid = upd_txnid;
+		upd->txnid = las_txnid;
 #ifdef HAVE_TIMESTAMPS
 		WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE);
 		memcpy(&upd->timestamp, las_timestamp.data, las_timestamp.size);
@@ -287,16 +211,8 @@ __las_page_instantiate(WT_SESSION_IMPL *session,
 	if (total_incr != 0) {
 		__wt_cache_page_inmem_incr(session, page, total_incr);
 
-		/*
-		 * We've modified/dirtied the page, but that's not necessary and
-		 * if we keep the page clean, it's easier to evict. We leave the
-		 * lookaside table updates in place, so if we evict this page
-		 * without dirtying it, any future instantiation of it will find
-		 * the records it needs. If the page is dirtied before eviction,
-		 * then we'll write any needed lookaside table records for the
-		 * new location of the page.
-		 */
-		__wt_page_modify_clear(session, page);
+		/* Make sure the page is included in the next checkpoint. */
+		page->modify->first_dirty_txn = WT_TXN_FIRST;
 	}
 
 err:	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
@@ -384,12 +300,12 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
 {
 	struct timespec start, stop;
 	WT_BTREE *btree;
+	WT_CURSOR *las_cursor;
 	WT_DECL_RET;
 	WT_ITEM tmp;
 	WT_PAGE *page;
-	const WT_PAGE_HEADER *dsk;
 	size_t addr_size;
-	uint32_t previous_state;
+	uint32_t new_state, previous_state, session_flags;
 	const uint8_t *addr;
 	bool timer;
 
@@ -404,26 +320,36 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
 
 	/*
 	 * Attempt to set the state to WT_REF_READING for normal reads, or
-	 * WT_REF_LOCKED, for deleted pages.  If successful, we've won the
-	 * race, read the page.
+	 * WT_REF_LOCKED, for deleted pages or pages with lookaside entries.
+	 * If successful, we've won the race, read the page.
 	 */
-	if (__wt_atomic_casv32(&ref->state, WT_REF_DISK, WT_REF_READING))
-		previous_state = WT_REF_DISK;
-	else if (__wt_atomic_casv32(&ref->state, WT_REF_DELETED, WT_REF_LOCKED))
-		previous_state = WT_REF_DELETED;
-	else
+	switch (previous_state = ref->state) {
+	case WT_REF_DISK:
+		new_state = WT_REF_READING;
+		break;
+	case WT_REF_DELETED:
+	case WT_REF_LOOKASIDE:
+		new_state = WT_REF_LOCKED;
+		break;
+	default:
+		return (0);
+	}
+	if (!__wt_atomic_casv32(&ref->state, previous_state, new_state))
 		return (0);
 
 	/*
-	 * Get the address: if there is no address, the page was deleted, but a
-	 * subsequent search or insert is forcing re-creation of the name space.
+	 * Get the address: if there is no address, the page was deleted or had
+	 * only lookaside entries, and a subsequent search or insert is forcing
+	 * re-creation of the name space.
 	 */
 	__wt_ref_info(ref, &addr, &addr_size, NULL);
 	if (addr == NULL) {
-		WT_ASSERT(session, previous_state == WT_REF_DELETED);
+		WT_ASSERT(session, previous_state != WT_REF_DISK);
 
 		WT_ERR(__wt_btree_new_leaf_page(session, &page));
 		ref->page = page;
+		if (previous_state == WT_REF_LOOKASIDE)
+			goto skip_read;
 		goto done;
 	}
 
@@ -441,16 +367,18 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
 		WT_STAT_CONN_INCRV(session, cache_read_app_time,
 		    WT_TIMEDIFF_US(stop, start));
 	}
-	WT_ERR(__wt_page_inmem(session, ref, tmp.data, tmp.memsize,
-	    WT_DATA_IN_ITEM(&tmp) ?
-	    WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page));
 
 	/*
-	 * Clear the local reference to an allocated copy of the disk image on
-	 * return; the page steals it, errors in this code should not free it.
+	 * Build the in-memory version of the page. Clear our local reference to
+	 * the allocated copy of the disk image on return, the in-memory object
+	 * steals it.
 	 */
+	WT_ERR(__wt_page_inmem(session, ref, tmp.data,
+	    WT_DATA_IN_ITEM(&tmp) ?
+	    WT_PAGE_DISK_ALLOC : WT_PAGE_DISK_MAPPED, &page));
 	tmp.mem = NULL;
 
+skip_read:
 	/*
 	 * If reading for a checkpoint, there's no additional work to do, the
 	 * page on disk is correct as written.
@@ -468,18 +396,31 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
 	 * We only care if the lookaside table is currently active, check that
 	 * before doing any work.
 	 */
-	dsk = tmp.data;
-	if (F_ISSET(dsk, WT_PAGE_LAS_UPDATE) && __wt_las_is_written(session)) {
-		__btree_verbose_lookaside_read(session);
+	if (previous_state == WT_REF_LOOKASIDE) {
+		WT_ASSERT(session, (ref->page->dsk == NULL ||
+		    F_ISSET(ref->page->dsk, WT_PAGE_LAS_UPDATE)));
+
+		__btree_verbose_lookaside_read(
+		    session, btree->id, ref->page_las->las_pageid);
 		WT_STAT_CONN_INCR(session, cache_read_lookaside);
 		WT_STAT_DATA_INCR(session, cache_read_lookaside);
+		WT_ERR(__las_page_instantiate(session, ref, btree->id));
 
-		WT_ERR(__las_page_instantiate(
-		    session, ref, btree->id, addr, addr_size));
+		/*
+		 * The page is instantiated so we no longer need the lookaside
+		 * entries.  Note that we are discarding updates so the page
+		 * must be marked available even if these operations fail.
+		 */
+		__wt_las_cursor(session, &las_cursor, &session_flags);
+		WT_TRET(__wt_las_remove_block(
+		    session, las_cursor, btree->id, ref->page_las->las_pageid));
+		__wt_free(session, ref->page_las);
+		WT_TRET(__wt_las_cursor_close(
+		    session, &las_cursor, session_flags));
 	}
 
 done:	WT_PUBLISH(ref->state, WT_REF_MEM);
-	return (0);
+	return (ret);
 
 err:	/*
 	 * If the function building an in-memory version of the page failed,
@@ -512,7 +453,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 	WT_PAGE *page;
 	uint64_t sleep_cnt, wait_cnt;
 	int force_attempts;
-	bool busy, cache_work, evict_soon, stalled;
+	bool busy, cache_work, did_read, evict_soon, stalled;
 
 	btree = S2BT(session);
 
@@ -525,7 +466,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 		WT_STAT_DATA_INCR(session, cache_pages_requested);
 	}
 
-	for (evict_soon = stalled = false,
+	for (did_read = evict_soon = stalled = false,
 	    force_attempts = 0, sleep_cnt = wait_cnt = 0;;) {
 		switch (ref->state) {
 		case WT_REF_DELETED:
@@ -534,8 +475,26 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 				return (WT_NOTFOUND);
 			/* FALLTHROUGH */
 		case WT_REF_DISK:
-			if (LF_ISSET(WT_READ_CACHE))
-				return (WT_NOTFOUND);
+		case WT_REF_LOOKASIDE:
+			if (LF_ISSET(WT_READ_CACHE)) {
+				if (ref->state != WT_REF_LOOKASIDE)
+					return (WT_NOTFOUND);
+				if (!LF_ISSET(WT_READ_LOOKASIDE))
+					return (WT_NOTFOUND);
+#ifdef HAVE_TIMESTAMPS
+				/*
+				 * Skip lookaside pages if reading as of a
+				 * timestamp and all the updates are in the
+				 * future.
+				 */
+				if (F_ISSET(
+				    &session->txn, WT_TXN_HAS_TS_READ) &&
+				    __wt_timestamp_cmp(
+				    &ref->page_las->min_timestamp,
+				    &session->txn.read_timestamp) > 0)
+					return (WT_NOTFOUND);
+#endif
+			}
 
 			/*
 			 * The page isn't in memory, read it. If this thread is
@@ -548,6 +507,12 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 			WT_RET(__page_read(session, ref));
 
 			/*
+			 * We just read a page, don't evict it before we have a
+			 * chance to use it.
+			 */
+			did_read = true;
+
+			/*
 			 * If configured to not trash the cache, leave the page
 			 * generation unset, we'll set it before returning to
 			 * the oldest read generation, so the page is forcibly
@@ -610,7 +575,7 @@ __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 			 * the page's generation number. If eviction isn't being
 			 * done on this file, we're done.
 			 */
-			if (LF_ISSET(WT_READ_NO_EVICT) ||
+			if (did_read || LF_ISSET(WT_READ_NO_EVICT) ||
 			    F_ISSET(session, WT_SESSION_NO_EVICTION) ||
 			    btree->evict_disabled > 0 || btree->lsm_primary)
 				goto skip_evict;
@@ -706,7 +671,8 @@ skip_evict:
  *	performing a lookaside table read.
  */
 static void
-__btree_verbose_lookaside_read(WT_SESSION_IMPL *session)
+__btree_verbose_lookaside_read(
+    WT_SESSION_IMPL *session, uint32_t las_id, uint64_t las_pageid)
 {
 #ifdef HAVE_VERBOSE
 	WT_CONNECTION_IMPL *conn;
@@ -733,10 +699,14 @@ __btree_verbose_lookaside_read(WT_SESSION_IMPL *session)
 		if (__wt_atomic_casv64(&conn->las_verb_gen_read,
 			ckpt_gen_last, ckpt_gen_current)) {
 			__wt_verbose(session, WT_VERB_LOOKASIDE,
-			    "%s", "Read from lookaside file triggered.");
+			    "Read from lookaside file triggered for "
+			    "file ID %" PRIu32 ", page ID %" PRIu64,
+			    las_id, las_pageid);
 		}
 	}
 #else
 	WT_UNUSED(session);
+	WT_UNUSED(las_id);
+	WT_UNUSED(las_pageid);
 #endif
 }
diff --git a/src/third_party/wiredtiger/src/btree/bt_rebalance.c b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
index 7f9693f22c0..c6d9253b2d3 100644
--- a/src/third_party/wiredtiger/src/btree/bt_rebalance.c
+++ b/src/third_party/wiredtiger/src/btree/bt_rebalance.c
@@ -262,9 +262,12 @@ __rebalance_row_leaf_key(WT_SESSION_IMPL *session,
 	 * We need the first key from a leaf page. Leaf pages are relatively
 	 * complex (Huffman encoding, prefix compression, and so on), do the
 	 * work to instantiate the page and copy the first key to the buffer.
+	 *
+	 * Page flags are 0 because we aren't releasing the memory used to read
+	 * the page into memory and we don't want page discard to free it.
 	 */
 	WT_RET(__wt_bt_read(session, rs->tmp1, addr, addr_len));
-	WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, 0, &page));
+	WT_RET(__wt_page_inmem(session, NULL, rs->tmp1->data, 0, &page));
 	ret = __wt_row_leaf_key_copy(session, page, &page->pg_row[0], key);
 	__wt_page_out(session, &page);
 	return (ret);
diff --git a/src/third_party/wiredtiger/src/btree/bt_slvg.c b/src/third_party/wiredtiger/src/btree/bt_slvg.c
index 4a43dd67ff6..e2da77348f0 100644
--- a/src/third_party/wiredtiger/src/btree/bt_slvg.c
+++ b/src/third_party/wiredtiger/src/btree/bt_slvg.c
@@ -588,8 +588,12 @@ __slvg_trk_leaf(WT_SESSION_IMPL *session,
 		 * and copy the full keys, then free the page. We do this on
 		 * every leaf page, and if you need to speed up the salvage,
 		 * it's probably a great place to start.
+		 *
+		 * Page flags are 0 because we aren't releasing the memory used
+		 * to read the page into memory and we don't want page discard
+		 * to free it.
 		 */
-		WT_ERR(__wt_page_inmem(session, NULL, dsk, 0, 0, &page));
+		WT_ERR(__wt_page_inmem(session, NULL, dsk, 0, &page));
 		WT_ERR(__wt_row_leaf_key_copy(session,
 		    page, &page->pg_row[0], &trk->row_start));
 		WT_ERR(__wt_row_leaf_key_copy(session,
@@ -1285,7 +1289,8 @@ __slvg_col_build_leaf(WT_SESSION_IMPL *session, WT_TRACK *trk, WT_REF *ref)
 
 	/* Write the new version of the leaf page to disk. */
 	WT_ERR(__slvg_modify_init(session, page));
-	WT_ERR(__wt_reconcile(session, ref, cookie, WT_VISIBILITY_ERR, NULL));
+	WT_ERR(__wt_reconcile(
+	    session, ref, cookie, WT_REC_VISIBILITY_ERR, NULL));
 
 	/* Reset the page. */
 	page->pg_var = save_col_var;
@@ -1735,10 +1740,13 @@ __slvg_row_trk_update_start(
 	 * Read and instantiate the WT_TRACK page (we don't have to verify the
 	 * page, nor do we have to be quiet on error, we've already read this
 	 * page successfully).
+	 *
+	 * Page flags are 0 because we aren't releasing the memory used to read
+	 * the page into memory and we don't want page discard to free it.
 	 */
 	WT_RET(__wt_scr_alloc(session, trk->trk_size, &dsk));
 	WT_ERR(__wt_bt_read(session, dsk, trk->trk_addr, trk->trk_addr_size));
-	WT_ERR(__wt_page_inmem(session, NULL, dsk->mem, 0, 0, &page));
+	WT_ERR(__wt_page_inmem(session, NULL, dsk->data, 0, &page));
 
 	/*
 	 * Walk the page, looking for a key sorting greater than the specified
@@ -1998,7 +2006,8 @@ __slvg_row_build_leaf(
 
 	/* Write the new version of the leaf page to disk. */
 	WT_ERR(__slvg_modify_init(session, page));
-	WT_ERR(__wt_reconcile(session, ref, cookie, WT_VISIBILITY_ERR, NULL));
+	WT_ERR(__wt_reconcile(
+	    session, ref, cookie, WT_REC_VISIBILITY_ERR, NULL));
 
 	/* Reset the page. */
 	page->entries += skip_stop;
diff --git a/src/third_party/wiredtiger/src/btree/bt_split.c b/src/third_party/wiredtiger/src/btree/bt_split.c
index 1e76deb66d7..884ee9b5c8b 100644
--- a/src/third_party/wiredtiger/src/btree/bt_split.c
+++ b/src/third_party/wiredtiger/src/btree/bt_split.c
@@ -1385,10 +1385,12 @@ __split_multi_inmem(
 	WT_DECL_RET;
 	WT_PAGE *page;
 	WT_SAVE_UPD *supd;
-	WT_UPDATE *upd;
+	WT_UPDATE *prev_upd, *upd;
 	uint64_t recno;
 	uint32_t i, slot;
 
+	WT_ASSERT(session, multi->las_pageid == 0);
+
 	/*
 	 * In 04/2016, we removed column-store record numbers from the WT_PAGE
 	 * structure, leading to hard-to-debug problems because we corrupt the
@@ -1409,9 +1411,8 @@ __split_multi_inmem(
 	 * when discarding the original page, and our caller will discard the
 	 * allocated page on error, when discarding the allocated WT_REF.
 	 */
-	WT_RET(__wt_page_inmem(session, ref,
-	    multi->disk_image, ((WT_PAGE_HEADER *)multi->disk_image)->mem_size,
-	    WT_PAGE_DISK_ALLOC, &page));
+	WT_RET(__wt_page_inmem(
+	    session, ref, multi->disk_image, WT_PAGE_DISK_ALLOC, &page));
 	multi->disk_image = NULL;
 
 	/*
@@ -1434,7 +1435,7 @@ __split_multi_inmem(
 	__wt_btcur_open(&cbt);
 
 	/* Re-create each modification we couldn't write. */
-	for (i = 0, supd = multi->supd; i < multi->supd_entries; ++i, ++supd)
+	for (i = 0, supd = multi->supd; i < multi->supd_entries; ++i, ++supd) {
 		switch (orig->type) {
 		case WT_PAGE_COL_FIX:
 		case WT_PAGE_COL_VAR:
@@ -1443,7 +1444,8 @@ __split_multi_inmem(
 			recno = WT_INSERT_RECNO(supd->ins);
 
 			/* Search the page. */
-			WT_ERR(__wt_col_search(session, recno, ref, &cbt));
+			WT_ERR(__wt_col_search(
+			    session, recno, ref, &cbt, true));
 
 			/* Apply the modification. */
 			WT_ERR(__wt_col_modify(session, &cbt,
@@ -1465,7 +1467,8 @@ __split_multi_inmem(
 			}
 
 			/* Search the page. */
-			WT_ERR(__wt_row_search(session, key, ref, &cbt, true));
+			WT_ERR(__wt_row_search(
+			    session, key, ref, &cbt, true, true));
 
 			/* Apply the modification. */
 			WT_ERR(__wt_row_modify(session,
@@ -1474,6 +1477,37 @@ __split_multi_inmem(
 		WT_ILLEGAL_VALUE_ERR(session);
 		}
 
+		/*
+		 * Discard the update used to create the on-page disk image.
+		 * This is not just a performance issue: if the update used to
+		 * create the value for this on-page disk image was a modify,
+		 * and it was applied to the previous on-page value to
+		 * determine a value to write to this disk image, that update
+		 * cannot be applied to the new on-page value without risking
+		 * corruption.
+		 */
+		if (supd->onpage_upd != NULL) {
+			for (prev_upd = upd; prev_upd != NULL &&
+			    prev_upd->next != supd->onpage_upd;
+			    prev_upd = prev_upd->next)
+				;
+			/*
+			 * If the on-page update was in fact a tombstone, there
+			 * will be no value on the page.  Don't throw the
+			 * tombstone away: we may need it to correctly resolve
+			 * modifications.
+			 */
+			if (supd->onpage_upd->type == WT_UPDATE_DELETED &&
+			   prev_upd != NULL)
+				prev_upd = prev_upd->next;
+			if (prev_upd != NULL) {
+				__wt_update_obsolete_free(
+				    session, page, prev_upd->next);
+				prev_upd->next = NULL;
+			}
+		}
+	}
+
 	/*
 	 * When modifying the page we set the first dirty transaction to the
 	 * last transaction currently running.  However, the updates we made
@@ -1620,7 +1654,16 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
 		addr->type = multi->addr.type;
 		WT_RET(__wt_memdup(session,
 		    multi->addr.addr, addr->size, &addr->addr));
-		ref->state = WT_REF_DISK;
+		if (multi->las_pageid != 0) {
+			WT_RET(__wt_calloc_one(session, &ref->page_las));
+			ref->page_las->las_pageid = multi->las_pageid;
+#ifdef HAVE_TIMESTAMPS
+			__wt_timestamp_set(&ref->page_las->min_timestamp,
+			    &multi->las_min_timestamp);
+#endif
+			ref->state = WT_REF_LOOKASIDE;
+		} else
+			ref->state = WT_REF_DISK;
 	}
 
 	/*
diff --git a/src/third_party/wiredtiger/src/btree/bt_sync.c b/src/third_party/wiredtiger/src/btree/bt_sync.c
index 75f1c6ef930..02ff0a1a4be 100644
--- a/src/third_party/wiredtiger/src/btree/bt_sync.c
+++ b/src/third_party/wiredtiger/src/btree/bt_sync.c
@@ -62,6 +62,81 @@ __sync_checkpoint_can_skip(WT_SESSION_IMPL *session, WT_PAGE *page)
 }
 
 /*
+ * __sync_dup_walk --
+ *	Duplicate a tree walk point.
+ */
+static inline int
+__sync_dup_walk(
+    WT_SESSION_IMPL *session, WT_REF *walk, uint32_t flags, WT_REF **dupp)
+{
+	WT_REF *old;
+	bool busy;
+
+	if ((old = *dupp) != NULL) {
+		*dupp = NULL;
+		WT_RET(__wt_page_release(session, old, flags));
+	}
+
+	/* It is okay to duplicate a walk before it starts. */
+	if (walk == NULL || __wt_ref_is_root(walk)) {
+		*dupp = walk;
+		return (0);
+	}
+
+	/* Get a duplicate hazard pointer. */
+	for (;;) {
+#ifdef HAVE_DIAGNOSTIC
+		WT_RET(
+		    __wt_hazard_set(session, walk, &busy, __func__, __LINE__));
+#else
+		WT_RET(__wt_hazard_set(session, walk, &busy));
+#endif
+		/*
+		 * We already have a hazard pointer, we should generally be able
+		 * to get another one. We can get spurious busy errors (e.g., if
+		 * eviction is attempting to lock the page. Keep trying: we have
+		 * one hazard pointer so we should be able to get another one.
+		 */
+		if (!busy)
+			break;
+		__wt_yield();
+	}
+
+	*dupp = walk;
+	return (0);
+}
+
+/*
+ * __sync_evict_page --
+ *	Attempt to evict a page during a checkpoint walk.
+ */
+static int
+__sync_evict_page(WT_SESSION_IMPL *session, WT_REF **walkp, uint32_t flags)
+{
+	WT_DECL_RET;
+	WT_REF *next, *to_evict;
+
+	to_evict = *walkp;
+	next = NULL;
+
+	/*
+	 * Get the ref after the page we're trying to evicting.  If the
+	 * eviction is successful, the walk will continue from here.
+	 */
+	WT_RET(__sync_dup_walk(session, to_evict, flags, &next));
+	WT_ERR(__wt_tree_walk(session, &next, flags));
+
+	WT_ERR(__wt_page_release_evict(session, to_evict));
+
+	/* Success: continue the walk at the next page. */
+	*walkp = next;
+	return (0);
+
+err:	WT_TRET(__wt_page_release(session, next, flags));
+	return (ret);
+}
+
+/*
  * __sync_file --
  *	Flush pages for a specific file.
  */
@@ -73,22 +148,23 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 	WT_CONNECTION_IMPL *conn;
 	WT_DECL_RET;
 	WT_PAGE *page;
-	WT_REF *walk;
+	WT_REF *prev, *walk;
 	WT_TXN *txn;
 	uint64_t internal_bytes, internal_pages, leaf_bytes, leaf_pages;
 	uint64_t oldest_id, saved_pinned_id;
 	uint32_t flags;
-	bool timer;
+	bool evict_failed, skip_walk, timer;
 
 	conn = S2C(session);
 	btree = S2BT(session);
-	walk = NULL;
+	prev = walk = NULL;
 	txn = &session->txn;
-	saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
-	flags = WT_READ_CACHE | WT_READ_NO_GEN;
+	evict_failed = skip_walk = false;
 
+	flags = WT_READ_CACHE | WT_READ_NO_GEN;
 	internal_bytes = leaf_bytes = 0;
 	internal_pages = leaf_pages = 0;
+	saved_pinned_id = WT_SESSION_TXN_STATE(session)->pinned_id;
 	timer = WT_VERBOSE_ISSET(session, WT_VERB_CHECKPOINT);
 	if (timer)
 		__wt_epoch(session, &start);
@@ -119,8 +195,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		 */
 		oldest_id = __wt_txn_oldest_id(session);
 
-		flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL;
-		for (walk = NULL;;) {
+		LF_SET(WT_READ_NO_WAIT | WT_READ_SKIP_INTL);
+		for (;;) {
 			WT_ERR(__wt_tree_walk(session, &walk, flags));
 			if (walk == NULL)
 				break;
@@ -139,7 +215,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 				leaf_bytes += page->memory_footprint;
 				++leaf_pages;
 				WT_ERR(__wt_reconcile(session,
-				    walk, NULL, WT_CHECKPOINTING, NULL));
+				    walk, NULL, WT_REC_CHECKPOINT, NULL));
 			}
 		}
 		break;
@@ -184,9 +260,19 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		btree->checkpointing = WT_CKPT_RUNNING;
 
 		/* Write all dirty in-cache pages. */
-		flags |= WT_READ_NO_EVICT;
-		for (walk = NULL;;) {
-			WT_ERR(__wt_tree_walk(session, &walk, flags));
+		LF_SET(WT_READ_NO_EVICT);
+
+		/* Read pages with lookaside entries and evict them asap. */
+		LF_SET(WT_READ_LOOKASIDE | WT_READ_WONT_NEED);
+
+		for (;;) {
+			if (!skip_walk) {
+				WT_ERR(__sync_dup_walk(
+				    session, walk, flags, &prev));
+				WT_ERR(__wt_tree_walk(session, &walk, flags));
+			}
+			skip_walk = false;
+
 			if (walk == NULL)
 				break;
 
@@ -221,8 +307,39 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 				leaf_bytes += page->memory_footprint;
 				++leaf_pages;
 			}
+
+			/*
+			 * If the page needs forced eviction, try to do that
+			 * now.
+			 *
+			 * For eviction to have a chance, we first need to move
+			 * the walk point to the next page checkpoint will
+			 * visit.  We want to avoid this code being too special
+			 * purpose, so try to reuse the ordinary eviction path.
+			 *
+			 * If eviction succeeded, it steps to the next ref, so
+			 * we have to skip the next walk.  If eviction fails,
+			 * remember so we don't retry it.
+			 */
+			if (!WT_PAGE_IS_INTERNAL(page) &&
+			    page->read_gen == WT_READGEN_OLDEST &&
+			    !evict_failed) {
+				if ((ret = __sync_evict_page(
+				    session, &walk, flags)) == 0) {
+					evict_failed = false;
+					skip_walk = true;
+				} else {
+					walk = prev;
+					prev = NULL;
+					evict_failed = true;
+				}
+				WT_ERR_BUSY_OK(ret);
+				continue;
+			}
+
+			evict_failed = false;
 			WT_ERR(__wt_reconcile(
-			    session, walk, NULL, WT_CHECKPOINTING, NULL));
+			    session, walk, NULL, WT_REC_CHECKPOINT, NULL));
 		}
 		break;
 	case WT_SYNC_CLOSE:
@@ -244,8 +361,8 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 	}
 
 err:	/* On error, clear any left-over tree walk. */
-	if (walk != NULL)
-		WT_TRET(__wt_page_release(session, walk, flags));
+	WT_TRET(__wt_page_release(session, walk, flags));
+	WT_TRET(__wt_page_release(session, prev, flags));
 
 	/*
 	 * If we got a snapshot in order to write pages, and there was no
diff --git a/src/third_party/wiredtiger/src/btree/bt_walk.c b/src/third_party/wiredtiger/src/btree/bt_walk.c
index fcc2336a3e5..b68c6b9c5c6 100644
--- a/src/third_party/wiredtiger/src/btree/bt_walk.c
+++ b/src/third_party/wiredtiger/src/btree/bt_walk.c
@@ -472,6 +472,11 @@ restart:	/*
 				if (LF_ISSET(WT_READ_NO_WAIT) &&
 				    ref->state != WT_REF_MEM)
 					break;
+
+				/* Skip lookaside pages if not requested. */
+				if (ref->state == WT_REF_LOOKASIDE &&
+				    !LF_ISSET(WT_READ_LOOKASIDE))
+					break;
 			} else if (LF_ISSET(WT_READ_TRUNCATE)) {
 				/*
 				 * Avoid pulling a deleted page back in to try
diff --git a/src/third_party/wiredtiger/src/btree/col_srch.c b/src/third_party/wiredtiger/src/btree/col_srch.c
index 78ee367dc69..10bc3894a0d 100644
--- a/src/third_party/wiredtiger/src/btree/col_srch.c
+++ b/src/third_party/wiredtiger/src/btree/col_srch.c
@@ -62,7 +62,7 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
  */
 int
 __wt_col_search(WT_SESSION_IMPL *session,
-    uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+    uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore)
 {
 	WT_BTREE *btree;
 	WT_COL *cip;
@@ -90,16 +90,15 @@ __wt_col_search(WT_SESSION_IMPL *session,
 
 	/*
 	 * We may be searching only a single leaf page, not the full tree. In
-	 * the normal case where the page links to a parent, check the page's
+	 * the normal case where we are searching a tree, check the page's
 	 * parent keys before doing the full search, it's faster when the
-	 * cursor is being re-positioned. (One case where the page doesn't
-	 * have a parent is if it is being re-instantiated in memory as part
-	 * of a split).
+	 * cursor is being re-positioned.  Skip this if the page is being
+	 * re-instantiated in memory.
 	 */
 	if (leaf != NULL) {
 		WT_ASSERT(session, search_recno != WT_RECNO_OOB);
 
-		if (leaf->home != NULL) {
+		if (!restore) {
 			WT_RET(__check_leaf_key_range(
 			    session, recno, leaf, cbt));
 			if (cbt->compare != 0) {
diff --git a/src/third_party/wiredtiger/src/btree/row_srch.c b/src/third_party/wiredtiger/src/btree/row_srch.c
index 3a9a6eb0f9b..16081e841dc 100644
--- a/src/third_party/wiredtiger/src/btree/row_srch.c
+++ b/src/third_party/wiredtiger/src/btree/row_srch.c
@@ -205,7 +205,8 @@ __check_leaf_key_range(WT_SESSION_IMPL *session,
  */
 int
 __wt_row_search(WT_SESSION_IMPL *session,
-    WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert)
+    WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt,
+    bool insert, bool restore)
 {
 	WT_BTREE *btree;
 	WT_COLLATOR *collator;
@@ -250,14 +251,13 @@ __wt_row_search(WT_SESSION_IMPL *session,
 
 	/*
 	 * We may be searching only a single leaf page, not the full tree. In
-	 * the normal case where the page links to a parent, check the page's
+	 * the normal case where we are searching a tree, check the page's
 	 * parent keys before doing the full search, it's faster when the
-	 * cursor is being re-positioned. (One case where the page doesn't
-	 * have a parent is if it is being re-instantiated in memory as part
-	 * of a split).
+	 * cursor is being re-positioned.  Skip this if the page is being
+	 * re-instantiated in memory.
 	 */
 	if (leaf != NULL) {
-		if (leaf->home != NULL) {
+		if (!restore) {
 			WT_RET(__check_leaf_key_range(
 			    session, srch_key, leaf, cbt));
 			if (cbt->compare != 0) {
diff --git a/src/third_party/wiredtiger/src/cache/cache_las.c b/src/third_party/wiredtiger/src/cache/cache_las.c
index e1e47b9eecb..d9a5dbc2096 100644
--- a/src/third_party/wiredtiger/src/cache/cache_las.c
+++ b/src/third_party/wiredtiger/src/cache/cache_las.c
@@ -89,17 +89,24 @@ __wt_las_create(WT_SESSION_IMPL *session)
 	WT_RET(__wt_session_create(session, WT_LAS_URI, WT_LAS_FORMAT));
 
 	/*
+	 * Flag that the lookaside table has been created (before creating the
+	 * connection's lookaside table session, it checks before creating a
+	 * lookaside table cursor.
+	 */
+	F_SET(conn, WT_CONN_LAS_OPEN);
+
+	/*
 	 * Open a shared internal session used to access the lookaside table.
 	 * This session should never be tapped for eviction.
 	 */
 	session_flags = WT_SESSION_LOOKASIDE_CURSOR | WT_SESSION_NO_EVICTION;
-	WT_RET(__wt_open_internal_session(
+	WT_ERR(__wt_open_internal_session(
 	    conn, "lookaside table", true, session_flags, &conn->las_session));
 
-	/* Flag that the lookaside table has been created. */
-	F_SET(conn, WT_CONN_LAS_OPEN);
-
 	return (0);
+
+err:	F_CLR(conn, WT_CONN_LAS_OPEN);
+	return (ret);
 }
 
 /*
@@ -127,38 +134,6 @@ __wt_las_destroy(WT_SESSION_IMPL *session)
 }
 
 /*
- * __wt_las_set_written --
- *	Flag that the lookaside table has been written.
- */
-void
-__wt_las_set_written(WT_SESSION_IMPL *session)
-{
-	WT_CONNECTION_IMPL *conn;
-
-	conn = S2C(session);
-	if (!conn->las_written) {
-		conn->las_written = true;
-
-		/*
-		 * Future page reads must deal with lookaside table records.
-		 * No write could be cached until a future read might matter,
-		 * the barrier is more documentation than requirement.
-		 */
-		WT_FULL_BARRIER();
-	}
-}
-
-/*
- * __wt_las_is_written --
- *	Return if the lookaside table has been written.
- */
-bool
-__wt_las_is_written(WT_SESSION_IMPL *session)
-{
-	return (S2C(session)->las_written);
-}
-
-/*
  * __wt_las_cursor_open --
  *	Open a new lookaside table cursor.
  */
@@ -280,129 +255,48 @@ __wt_las_cursor_close(
 }
 
 /*
- * __wt_las_sweep --
- *	Sweep the lookaside table.
+ * __wt_las_remove_block --
+ *	Remove all records matching a key prefix from the lookaside store.
  */
 int
-__wt_las_sweep(WT_SESSION_IMPL *session)
+__wt_las_remove_block(WT_SESSION_IMPL *session,
+    WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid)
 {
-	WT_CONNECTION_IMPL *conn;
-	WT_CURSOR *cursor;
 	WT_DECL_RET;
-	WT_DECL_TIMESTAMP(timestamp)
-	WT_ITEM *key;
-	WT_ITEM las_addr, las_key, las_timestamp;
-	uint64_t cnt, las_counter, las_txnid, remove_cnt;
-	uint32_t las_id, session_flags;
-	int notused;
+	WT_ITEM las_key;
+	uint64_t las_counter, las_pageid, remove_cnt;
+	uint32_t las_id;
+	int exact;
 
-	conn = S2C(session);
-	cursor = NULL;
-	key = &conn->las_sweep_key;
 	remove_cnt = 0;
-	session_flags = 0;		/* [-Werror=maybe-uninitialized] */
-
-	__wt_las_cursor(session, &cursor, &session_flags);
 
 	/*
-	 * If we're not starting a new sweep, position the cursor using the key
-	 * from the last call (we don't care if we're before or after the key,
-	 * just roughly in the same spot is fine).
+	 * Search for the block's unique prefix and step through all matching
+	 * records, removing them.
 	 */
-	if (key->size != 0) {
-		__wt_cursor_set_raw_key(cursor, key);
-		ret = cursor->search_near(cursor, &notused);
-
-		/*
-		 * Don't search for the same key twice; if we don't set a new
-		 * key below, it's because we've reached the end of the table
-		 * and we want the next pass to start at the beginning of the
-		 * table. Searching for the same key could leave us stuck at
-		 * the end of the table, repeatedly checking the same rows.
-		 */
-		key->size = 0;
-		if (ret != 0)
-			goto srch_notfound;
-	}
-
-	/*
-	 * The sweep server wakes up every 10 seconds (by default), it's a slow
-	 * moving thread. Try to review the entire lookaside table once every 5
-	 * minutes, or every 30 calls.
-	 *
-	 * The reason is because the lookaside table exists because we're seeing
-	 * cache/eviction pressure (it allows us to trade performance and disk
-	 * space for cache space), and it's likely lookaside blocks are being
-	 * evicted, and reading them back in doesn't help things. A trickier,
-	 * but possibly better, alternative might be to review all lookaside
-	 * blocks in the cache in order to get rid of them, and slowly review
-	 * lookaside blocks that have already been evicted.
-	 */
-	cnt = WT_MAX(100, conn->las_record_cnt / 30);
-
-	/* Discard pages we read as soon as we're done with them. */
-	F_SET(session, WT_SESSION_NO_CACHE);
+	las_key.size = 0;
+	cursor->set_key(cursor, btree_id, pageid, (uint64_t)0, &las_key);
+	if ((ret = cursor->search_near(cursor, &exact)) == 0 && exact < 0)
+		ret = cursor->next(cursor);
+	for (; ret == 0; ret = cursor->next(cursor)) {
+		WT_ERR(cursor->get_key(cursor,
+		    &las_id, &las_pageid, &las_counter, &las_key));
 
-	/* Walk the file. */
-	for (; cnt > 0 && (ret = cursor->next(cursor)) == 0; --cnt) {
 		/*
-		 * If the loop terminates after completing a work unit, we will
-		 * continue the table sweep next time. Get a local copy of the
-		 * sweep key, we're going to reset the cursor; do so before
-		 * calling cursor.remove, cursor.remove can discard our hazard
-		 * pointer and the page could be evicted from underneath us.
+		 * Confirm the search using the unique prefix; if not a match,
+		 * we're done searching for records for this page.  Note that
+		 * page ID zero is special: it is a wild card indicating that
+		 * all pages in the tree should be removed.
 		 */
-		if (cnt == 1) {
-			WT_ERR(__wt_cursor_get_raw_key(cursor, key));
-			if (!WT_DATA_IN_ITEM(key))
-				WT_ERR(__wt_buf_set(
-				    session, key, key->data, key->size));
-		}
+		 if (las_id != btree_id ||
+		    (pageid != 0 && las_pageid != pageid))
+			break;
 
-		/*
-		 * Cursor opened overwrite=true: won't return WT_NOTFOUND should
-		 * another thread remove the record before we do, and the cursor
-		 * remains positioned in that case.
-		 */
-		WT_ERR(cursor->get_key(cursor, &las_id, &las_addr, &las_counter,
-		    &las_txnid, &las_timestamp, &las_key));
-
-		/*
-		 * If the on-page record transaction ID associated with the
-		 * record is globally visible, the record can be discarded.
-		 *
-		 * Copy the timestamp from the cursor to avoid unaligned reads.
-		 */
-#ifdef HAVE_TIMESTAMPS
-		WT_ASSERT(session, las_timestamp.size == WT_TIMESTAMP_SIZE);
-		memcpy(&timestamp, las_timestamp.data, las_timestamp.size);
-#endif
-		if (__wt_txn_visible_all(
-		    session, las_txnid, WT_TIMESTAMP_NULL(&timestamp))) {
-			WT_ERR(cursor->remove(cursor));
-			++remove_cnt;
-		}
+		WT_ERR(cursor->remove(cursor));
+		++remove_cnt;
 	}
-
-srch_notfound:
 	WT_ERR_NOTFOUND_OK(ret);
 
-	if (0) {
-err:		__wt_buf_free(session, key);
-	}
-
-	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
-
-	/*
-	 * If there were races to remove records, we can over-count. Underflow
-	 * isn't fatal, but check anyway so we don't skew low over time.
-	 */
-	if (remove_cnt > conn->las_record_cnt)
-		conn->las_record_cnt = 0;
-	else if (remove_cnt > 0)
-		(void)__wt_atomic_sub64(&conn->las_record_cnt, remove_cnt);
-
-	F_CLR(session, WT_SESSION_NO_CACHE);
-
+err:	WT_STAT_CONN_DECRV(session, cache_lookaside_entries, remove_cnt);
 	return (ret);
 }
diff --git a/src/third_party/wiredtiger/src/config/config_def.c b/src/third_party/wiredtiger/src/config/config_def.c
index a16ba6ba28c..d7f4f6fe148 100644
--- a/src/third_party/wiredtiger/src/config/config_def.c
+++ b/src/third_party/wiredtiger/src/config/config_def.c
@@ -202,6 +202,16 @@ static const WT_CONFIG_CHECK confchk_WT_CURSOR_reconfigure[] = {
 	{ NULL, NULL, NULL, NULL, NULL, 0 }
 };
 
+static const WT_CONFIG_CHECK confchk_assert_subconfigs[] = {
+	{ "commit_timestamp", "string",
+	    NULL, "choices=[\"always\",\"never\",\"none\"]",
+	    NULL, 0 },
+	{ "read_timestamp", "string",
+	    NULL, "choices=[\"always\",\"never\",\"none\"]",
+	    NULL, 0 },
+	{ NULL, NULL, NULL, NULL, NULL, 0 }
+};
+
 static const WT_CONFIG_CHECK
     confchk_WT_SESSION_create_log_subconfigs[] = {
 	{ "enabled", "boolean", NULL, NULL, NULL, 0 },
@@ -212,6 +222,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_alter[] = {
 	{ "access_pattern_hint", "string",
 	    NULL, "choices=[\"none\",\"random\",\"sequential\"]",
 	    NULL, 0 },
+	{ "assert", "category",
+	    NULL, NULL,
+	    confchk_assert_subconfigs, 2 },
 	{ "cache_resident", "boolean", NULL, NULL, NULL, 0 },
 	{ "log", "category",
 	    NULL, NULL,
@@ -285,6 +298,9 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_create[] = {
 	    NULL, "min=512B,max=128MB",
 	    NULL, 0 },
 	{ "app_metadata", "string", NULL, NULL, NULL, 0 },
+	{ "assert", "category",
+	    NULL, NULL,
+	    confchk_assert_subconfigs, 2 },
 	{ "block_allocation", "string",
 	    NULL, "choices=[\"first\",\"best\"]",
 	    NULL, 0 },
@@ -470,6 +486,9 @@ static const WT_CONFIG_CHECK confchk_file_config[] = {
 	    NULL, "min=512B,max=128MB",
 	    NULL, 0 },
 	{ "app_metadata", "string", NULL, NULL, NULL, 0 },
+	{ "assert", "category",
+	    NULL, NULL,
+	    confchk_assert_subconfigs, 2 },
 	{ "block_allocation", "string",
 	    NULL, "choices=[\"first\",\"best\"]",
 	    NULL, 0 },
@@ -531,6 +550,9 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
 	    NULL, "min=512B,max=128MB",
 	    NULL, 0 },
 	{ "app_metadata", "string", NULL, NULL, NULL, 0 },
+	{ "assert", "category",
+	    NULL, NULL,
+	    confchk_assert_subconfigs, 2 },
 	{ "block_allocation", "string",
 	    NULL, "choices=[\"first\",\"best\"]",
 	    NULL, 0 },
@@ -612,6 +634,9 @@ static const WT_CONFIG_CHECK confchk_lsm_meta[] = {
 	    NULL, "min=512B,max=128MB",
 	    NULL, 0 },
 	{ "app_metadata", "string", NULL, NULL, NULL, 0 },
+	{ "assert", "category",
+	    NULL, NULL,
+	    confchk_assert_subconfigs, 2 },
 	{ "block_allocation", "string",
 	    NULL, "choices=[\"first\",\"best\"]",
 	    NULL, 0 },
@@ -1180,8 +1205,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  confchk_WT_CURSOR_reconfigure, 2
 	},
 	{ "WT_SESSION.alter",
-	  "access_pattern_hint=none,cache_resident=false,log=(enabled=true)",
-	  confchk_WT_SESSION_alter, 3
+	  "access_pattern_hint=none,assert=(commit_timestamp=none,"
+	  "read_timestamp=none),cache_resident=false,log=(enabled=true)",
+	  confchk_WT_SESSION_alter, 4
 	},
 	{ "WT_SESSION.begin_transaction",
 	  "isolation=,name=,priority=0,read_timestamp=,snapshot=,sync=",
@@ -1205,6 +1231,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "WT_SESSION.create",
 	  "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+	  "assert=(commit_timestamp=none,read_timestamp=none),"
 	  "block_allocation=best,block_compressor=,cache_resident=false,"
 	  "checksum=uncompressed,colgroups=,collator=,columns=,dictionary=0"
 	  ",encryption=(keyid=,name=),exclusive=false,extractor=,"
@@ -1220,7 +1247,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
 	  "prefix_compression_min=4,source=,split_deepen_min_child=0,"
 	  "split_deepen_per_child=0,split_pct=90,type=file,value_format=u",
-	  confchk_WT_SESSION_create, 42
+	  confchk_WT_SESSION_create, 43
 	},
 	{ "WT_SESSION.drop",
 	  "checkpoint_wait=true,force=false,lock_wait=true,"
@@ -1307,6 +1334,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "file.config",
 	  "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+	  "assert=(commit_timestamp=none,read_timestamp=none),"
 	  "block_allocation=best,block_compressor=,cache_resident=false,"
 	  "checksum=uncompressed,collator=,columns=,dictionary=0,"
 	  "encryption=(keyid=,name=),format=btree,huffman_key=,"
@@ -1318,10 +1346,11 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
 	  "prefix_compression_min=4,split_deepen_min_child=0,"
 	  "split_deepen_per_child=0,split_pct=90,value_format=u",
-	  confchk_file_config, 35
+	  confchk_file_config, 36
 	},
 	{ "file.meta",
 	  "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+	  "assert=(commit_timestamp=none,read_timestamp=none),"
 	  "block_allocation=best,block_compressor=,cache_resident=false,"
 	  "checkpoint=,checkpoint_lsn=,checksum=uncompressed,collator=,"
 	  "columns=,dictionary=0,encryption=(keyid=,name=),format=btree,"
@@ -1334,7 +1363,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "os_cache_max=0,prefix_compression=false,prefix_compression_min=4"
 	  ",split_deepen_min_child=0,split_deepen_per_child=0,split_pct=90,"
 	  "value_format=u,version=(major=0,minor=0)",
-	  confchk_file_meta, 39
+	  confchk_file_meta, 40
 	},
 	{ "index.meta",
 	  "app_metadata=,collator=,columns=,extractor=,immutable=false,"
@@ -1343,6 +1372,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	},
 	{ "lsm.meta",
 	  "access_pattern_hint=none,allocation_size=4KB,app_metadata=,"
+	  "assert=(commit_timestamp=none,read_timestamp=none),"
 	  "block_allocation=best,block_compressor=,cache_resident=false,"
 	  "checksum=uncompressed,chunks=,collator=,columns=,dictionary=0,"
 	  "encryption=(keyid=,name=),format=btree,huffman_key=,"
@@ -1358,7 +1388,7 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=false,"
 	  "prefix_compression_min=4,split_deepen_min_child=0,"
 	  "split_deepen_per_child=0,split_pct=90,value_format=u",
-	  confchk_lsm_meta, 39
+	  confchk_lsm_meta, 40
 	},
 	{ "table.meta",
 	  "app_metadata=,colgroups=,collator=,columns=,key_format=u,"
diff --git a/src/third_party/wiredtiger/src/conn/conn_api.c b/src/third_party/wiredtiger/src/conn/conn_api.c
index 284e7e9883b..55251491129 100644
--- a/src/third_party/wiredtiger/src/conn/conn_api.c
+++ b/src/third_party/wiredtiger/src/conn/conn_api.c
@@ -1084,6 +1084,9 @@ err:	/*
 			WT_TRET(wt_session->close(wt_session, config));
 		}
 
+	/* Shut down transactions (wait for in-flight operations to complete. */
+	WT_TRET(__wt_txn_global_shutdown(session));
+
 	/*
 	 * Perform a system-wide checkpoint so that all tables are consistent
 	 * with each other.  All transactions are resolved but ignore
diff --git a/src/third_party/wiredtiger/src/conn/conn_cache.c b/src/third_party/wiredtiger/src/conn/conn_cache.c
index 5515eb026ca..625350cf3e6 100644
--- a/src/third_party/wiredtiger/src/conn/conn_cache.c
+++ b/src/third_party/wiredtiger/src/conn/conn_cache.c
@@ -300,6 +300,11 @@ __wt_cache_destroy(WT_SESSION_IMPL *session)
 		    "cache server: exiting with %" PRIu64 " pages in "
 		    "memory and %" PRIu64 " pages evicted",
 		    cache->pages_inmem, cache->pages_evict);
+	if (cache->bytes_image != 0)
+		__wt_errx(session,
+		    "cache server: exiting with %" PRIu64 " image bytes in "
+		    "memory",
+		    cache->bytes_image);
 	if (cache->bytes_inmem != 0)
 		__wt_errx(session,
 		    "cache server: exiting with %" PRIu64 " bytes in memory",
diff --git a/src/third_party/wiredtiger/src/conn/conn_ckpt.c b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
index a47524af2d7..d968d4e4b2b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_ckpt.c
+++ b/src/third_party/wiredtiger/src/conn/conn_ckpt.c
@@ -161,8 +161,11 @@ __ckpt_server_start(WT_CONNECTION_IMPL *conn)
 	 *
 	 * Checkpoint does enough I/O it may be called upon to perform slow
 	 * operations for the block manager.
+	 *
+	 * The checkpoint thread reads the lookaside table for outdated records,
+	 * it gets its own cursor for that purpose.
 	 */
-	session_flags = WT_SESSION_CAN_WAIT;
+	session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_LOOKASIDE_CURSOR;
 	WT_RET(__wt_open_internal_session(conn,
 	    "checkpoint-server", true, session_flags, &conn->ckpt_session));
 	session = conn->ckpt_session;
diff --git a/src/third_party/wiredtiger/src/conn/conn_dhandle.c b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
index 56a37cf16eb..2606c9d083b 100644
--- a/src/third_party/wiredtiger/src/conn/conn_dhandle.c
+++ b/src/third_party/wiredtiger/src/conn/conn_dhandle.c
@@ -774,13 +774,14 @@ __wt_conn_dhandle_discard(WT_SESSION_IMPL *session)
 	__wt_session_close_cache(session);
 
 	/*
-	 * Close open data handles: first, everything but the metadata file (as
-	 * closing a normal file may open and write the metadata file), then
-	 * the metadata file.
+	 * Close open data handles: first, everything apart from metadata and
+	 * lookaside (as closing a normal file may write metadata and read
+	 * lookaside entries).  Then close whatever is left open.
 	 */
 restart:
 	TAILQ_FOREACH(dhandle, &conn->dhqh, q) {
-		if (WT_IS_METADATA(dhandle))
+		if (WT_IS_METADATA(dhandle) ||
+		    strcmp(dhandle->name, WT_LAS_URI) == 0)
 			continue;
 
 		WT_WITH_DHANDLE(session, dhandle,
@@ -789,6 +790,9 @@ restart:
 		goto restart;
 	}
 
+	/* Shut down the lookaside table after all eviction is complete. */
+	WT_TRET(__wt_las_destroy(session));
+
 	/*
 	 * Closing the files may have resulted in entries on our default
 	 * session's list of open data handles, specifically, we added the
@@ -807,7 +811,7 @@ restart:
 	if (session->meta_cursor != NULL)
 		WT_TRET(session->meta_cursor->close(session->meta_cursor));
 
-	/* Close the metadata file handle. */
+	/* Close the remaining handles. */
 	WT_TAILQ_SAFE_REMOVE_BEGIN(dhandle, &conn->dhqh, q, dhandle_tmp) {
 		WT_WITH_DHANDLE(session, dhandle,
 		    WT_TRET(__wt_conn_dhandle_discard_single(
diff --git a/src/third_party/wiredtiger/src/conn/conn_open.c b/src/third_party/wiredtiger/src/conn/conn_open.c
index 2865dc9e2fa..e72fa5c00a4 100644
--- a/src/third_party/wiredtiger/src/conn/conn_open.c
+++ b/src/third_party/wiredtiger/src/conn/conn_open.c
@@ -75,9 +75,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
 	wt_conn = &conn->iface;
 	session = conn->default_session;
 
-	/* Shut down transactions (wait for in-flight operations to complete. */
-	WT_TRET(__wt_txn_global_shutdown(session));
-
 	/* Shut down the subsystems, ensuring workers see the state change. */
 	F_SET(conn, WT_CONN_CLOSING);
 	WT_FULL_BARRIER();
@@ -111,9 +108,6 @@ __wt_connection_close(WT_CONNECTION_IMPL *conn)
 	/* The eviction server is shut down last. */
 	WT_TRET(__wt_evict_destroy(session));
 
-	/* Shut down the lookaside table, after all eviction is complete. */
-	WT_TRET(__wt_las_destroy(session));
-
 	/* Close open data handles. */
 	WT_TRET(__wt_conn_dhandle_discard(session));
 
diff --git a/src/third_party/wiredtiger/src/conn/conn_sweep.c b/src/third_party/wiredtiger/src/conn/conn_sweep.c
index 008aa6c08d8..a164e34fe33 100644
--- a/src/third_party/wiredtiger/src/conn/conn_sweep.c
+++ b/src/third_party/wiredtiger/src/conn/conn_sweep.c
@@ -278,12 +278,10 @@ __sweep_server(void *arg)
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
 	time_t now;
-	uint64_t last_las_sweep_id, oldest_id;
 	u_int dead_handles;
 
 	session = arg;
 	conn = S2C(session);
-	last_las_sweep_id = WT_TXN_NONE;
 
 	/*
 	 * Sweep for dead and excess handles.
@@ -302,26 +300,6 @@ __sweep_server(void *arg)
 		WT_STAT_CONN_INCR(session, dh_sweeps);
 
 		/*
-		 * Sweep the lookaside table. If the lookaside table hasn't yet
-		 * been written, there's no work to do.
-		 *
-		 * Don't sweep the lookaside table if the cache is stuck full.
-		 * The sweep uses the cache and can exacerbate the problem.
-		 * If we try to sweep when the cache is full or we aren't
-		 * making progress in eviction, sweeping can wind up constantly
-		 * bringing in and evicting pages from the lookaside table,
-		 * which will stop the cache from moving into the stuck state.
-		 */
-		if (__wt_las_is_written(session) &&
-		    !__wt_cache_stuck(session)) {
-			oldest_id = __wt_txn_oldest_id(session);
-			if (WT_TXNID_LT(last_las_sweep_id, oldest_id)) {
-				WT_ERR(__wt_las_sweep(session));
-				last_las_sweep_id = oldest_id;
-			}
-		}
-
-		/*
 		 * Mark handles with a time of death, and report whether any
 		 * handles are marked dead.  If sweep_idle_time is 0, handles
 		 * never become idle.
@@ -403,14 +381,9 @@ __wt_sweep_create(WT_SESSION_IMPL *session)
 	 * Handle sweep does enough I/O it may be called upon to perform slow
 	 * operations for the block manager.
 	 *
-	 * The sweep thread sweeps the lookaside table for outdated records,
-	 * it gets its own cursor for that purpose.
-	 *
 	 * Don't tap the sweep thread for eviction.
 	 */
 	session_flags = WT_SESSION_CAN_WAIT | WT_SESSION_NO_EVICTION;
-	if (F_ISSET(conn, WT_CONN_LAS_OPEN))
-		session_flags |= WT_SESSION_LOOKASIDE_CURSOR;
 	WT_RET(__wt_open_internal_session(
 	    conn, "sweep-server", true, session_flags, &conn->sweep_session));
 	session = conn->sweep_session;
@@ -453,8 +426,5 @@ __wt_sweep_destroy(WT_SESSION_IMPL *session)
 		conn->sweep_session = NULL;
 	}
 
-	/* Discard any saved lookaside key. */
-	__wt_buf_free(session, &conn->las_sweep_key);
-
 	return (ret);
 }
diff --git a/src/third_party/wiredtiger/src/cursor/cur_file.c b/src/third_party/wiredtiger/src/cursor/cur_file.c
index 9aa93ade372..22ba6d1dee1 100644
--- a/src/third_party/wiredtiger/src/cursor/cur_file.c
+++ b/src/third_party/wiredtiger/src/cursor/cur_file.c
@@ -263,14 +263,20 @@ __wt_curfile_insert_check(WT_CURSOR *cursor)
 	WT_CURSOR_BTREE *cbt;
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
+	int tret;
 
 	cbt = (WT_CURSOR_BTREE *)cursor;
+	tret = 0;
 	CURSOR_UPDATE_API_CALL_BTREE(cursor, session, update, cbt->btree);
 	WT_ERR(__cursor_checkkey(cursor));
 
-	ret = __wt_btcur_insert_check(cbt);
+	tret = __wt_btcur_insert_check(cbt);
 
+	/*
+	 * Detecting a conflict should not cause transaction error.
+	 */
 err:	CURSOR_UPDATE_API_END(session, ret);
+	WT_TRET(tret);
 	return (ret);
 }
 
diff --git a/src/third_party/wiredtiger/src/docs/backup.dox b/src/third_party/wiredtiger/src/docs/backup.dox
index 91b15da9275..b952a975788 100644
--- a/src/third_party/wiredtiger/src/docs/backup.dox
+++ b/src/third_party/wiredtiger/src/docs/backup.dox
@@ -59,10 +59,12 @@ During the period the backup cursor is open, database checkpoints can
 be created, but no checkpoints can be deleted.  This may result in
 significant file growth.
 
-Additionally, if a crash occurs during the period the backup cursor is open and
-logging is disabled, then the system will be restored to the most recent
-checkpoint prior to the opening of the backup cursor, even if later database
-checkpoints were created.
+Additionally, if a crash occurs during the period the backup cursor is
+open and logging is disabled (in other words, when depending on
+checkpoints for durability), then the system will be restored to the
+most recent checkpoint prior to the opening of the backup cursor, even
+if later database checkpoints were completed. <b>Note this exception to
+WiredTiger's checkpoint durability guarantees.</b>
 
 The following is a programmatic example of creating a backup:
 
diff --git a/src/third_party/wiredtiger/src/docs/checkpoint.dox b/src/third_party/wiredtiger/src/docs/checkpoint.dox
index ec28fea13c3..3d636cd17b6 100644
--- a/src/third_party/wiredtiger/src/docs/checkpoint.dox
+++ b/src/third_party/wiredtiger/src/docs/checkpoint.dox
@@ -22,6 +22,10 @@ configuration to ::wiredtiger_open.
 All transactional updates committed before a checkpoint are made durable
 by the checkpoint, therefore the frequency of checkpoints limits the
 volume of data that may be lost due to application or system failure.
+<b>This guarantee has an exception:</b> If a crash occurs when a backup
+cursor is open, then the system will be restored to the most recent
+checkpoint prior to the opening of the backup cursor, even if later
+database checkpoints were completed.
 
 Data sources that are involved in an exclusive operation when the
 checkpoint starts, including bulk load, verify or salvage, will be skipped
diff --git a/src/third_party/wiredtiger/src/docs/transactions.dox b/src/third_party/wiredtiger/src/docs/transactions.dox
index d9cc72dcf24..4ba6d5d2526 100644
--- a/src/third_party/wiredtiger/src/docs/transactions.dox
+++ b/src/third_party/wiredtiger/src/docs/transactions.dox
@@ -165,8 +165,8 @@ transaction timestamp functionality.
 
 Applications can assign explicit commit timestamps to transactions, then read
 "as of" a timestamp.  Timestamps are communicated to WiredTiger using a
-lower case hexadecimal encoding, so the encoded value can be twice as long as
-the raw timestamp value.
+hexadecimal encoding, so the encoded value can be twice as long as the raw
+timestamp value.
 
 Setting a read timestamp in WT_SESSION::begin_transaction forces a transaction
 to run at snapshot isolation and ignore any commits with a newer timestamp.
diff --git a/src/third_party/wiredtiger/src/evict/evict_file.c b/src/third_party/wiredtiger/src/evict/evict_file.c
index 56638934305..f2a09a0a769 100644
--- a/src/third_party/wiredtiger/src/evict/evict_file.c
+++ b/src/third_party/wiredtiger/src/evict/evict_file.c
@@ -16,11 +16,15 @@ int
 __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 {
 	WT_BTREE *btree;
+	WT_CURSOR *las_cursor;
+	WT_DATA_HANDLE *dhandle;
 	WT_DECL_RET;
 	WT_PAGE *page;
 	WT_REF *next_ref, *ref;
+	uint32_t session_flags, walk_flags;
 
-	btree = S2BT(session);
+	dhandle = session->dhandle;
+	btree = dhandle->handle;
 
 	/*
 	 * We need exclusive access to the file, we're about to discard the root
@@ -28,7 +32,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 	 */
 	WT_ASSERT(session,
 	    btree->evict_disabled > 0 ||
-	    !F_ISSET(session->dhandle, WT_DHANDLE_OPEN));
+	    !F_ISSET(dhandle, WT_DHANDLE_OPEN));
 
 	/*
 	 * We do discard objects without pages in memory. If that's the case,
@@ -37,14 +41,39 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 	if (btree->root.page == NULL)
 		return (0);
 
+	walk_flags = WT_READ_CACHE | WT_READ_NO_EVICT;
+
+	/*
+	 * If discarding a dead tree, remove any lookaside entries.  This deals
+	 * with the case where a tree is dropped with "force=true".  It happens
+	 * that we also force-drop the lookaside table itself: it can never
+	 * participate in lookaside eviction, and we can't open a cursor on it
+	 * as we are discarding it.
+	 *
+	 * We use the special page ID zero so that all lookaside entries for
+	 * the tree are removed.
+	 */
+	if (F_ISSET(dhandle, WT_DHANDLE_DEAD) &&
+	    F_ISSET(S2C(session), WT_CONN_LAS_OPEN) &&
+	    !F_ISSET(btree, WT_BTREE_LOOKASIDE)) {
+		WT_ASSERT(session, !WT_IS_METADATA(dhandle));
+
+		__wt_las_cursor(session, &las_cursor, &session_flags);
+		WT_TRET(__wt_las_remove_block(
+		    session, las_cursor, btree->id, 0));
+		WT_TRET(__wt_las_cursor_close(
+		    session, &las_cursor, session_flags));
+		WT_RET(ret);
+	} else
+		FLD_SET(walk_flags, WT_READ_LOOKASIDE);
+
 	/* Make sure the oldest transaction ID is up-to-date. */
 	WT_RET(__wt_txn_update_oldest(
 	    session, WT_TXN_OLDEST_STRICT | WT_TXN_OLDEST_WAIT));
 
 	/* Walk the tree, discarding pages. */
 	next_ref = NULL;
-	WT_ERR(__wt_tree_walk(
-	    session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
+	WT_ERR(__wt_tree_walk(session, &next_ref, walk_flags));
 	while ((ref = next_ref) != NULL) {
 		page = ref->page;
 
@@ -69,8 +98,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		 * error, retrying later.
 		 */
 		if (syncop == WT_SYNC_CLOSE && __wt_page_is_modified(page))
-			WT_ERR(__wt_reconcile(
-			    session, ref, NULL, WT_EVICTING, NULL));
+			WT_ERR(__wt_reconcile(session, ref, NULL,
+			    WT_REC_EVICT | WT_REC_VISIBLE_ALL, NULL));
 
 		/*
 		 * We can't evict the page just returned to us (it marks our
@@ -81,8 +110,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		 * the reconciliation, the next walk call could miss a page in
 		 * the tree.
 		 */
-		WT_ERR(__wt_tree_walk(session,
-		    &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
+		WT_ERR(__wt_tree_walk(session, &next_ref, walk_flags));
 
 		switch (syncop) {
 		case WT_SYNC_CLOSE:
@@ -96,7 +124,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 			 * Discard the page regardless of whether it is dirty.
 			 */
 			WT_ASSERT(session,
-			    F_ISSET(session->dhandle, WT_DHANDLE_DEAD) ||
+			    F_ISSET(dhandle, WT_DHANDLE_DEAD) ||
 			    __wt_page_can_evict(session, ref, NULL));
 			__wt_ref_out(session, ref);
 			break;
@@ -111,7 +139,7 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 err:		/* On error, clear any left-over tree walk. */
 		if (next_ref != NULL)
 			WT_TRET(__wt_page_release(
-			    session, next_ref, WT_READ_NO_EVICT));
+			    session, next_ref, walk_flags));
 	}
 
 	return (ret);
diff --git a/src/third_party/wiredtiger/src/evict/evict_lru.c b/src/third_party/wiredtiger/src/evict/evict_lru.c
index 28d7bd2f1fa..8dd48738735 100644
--- a/src/third_party/wiredtiger/src/evict/evict_lru.c
+++ b/src/third_party/wiredtiger/src/evict/evict_lru.c
@@ -460,6 +460,7 @@ int
 __wt_evict_create(WT_SESSION_IMPL *session)
 {
 	WT_CONNECTION_IMPL *conn;
+	uint32_t session_flags;
 
 	conn = S2C(session);
 
@@ -471,10 +472,12 @@ __wt_evict_create(WT_SESSION_IMPL *session)
 	 * Create the eviction thread group.
 	 * Set the group size to the maximum allowed sessions.
 	 */
+	session_flags = WT_THREAD_CAN_WAIT |
+	    WT_THREAD_LOOKASIDE | WT_THREAD_PANIC_FAIL;
 	WT_RET(__wt_thread_group_create(session, &conn->evict_threads,
 	    "eviction-server", conn->evict_threads_min, conn->evict_threads_max,
-	     WT_THREAD_CAN_WAIT | WT_THREAD_PANIC_FAIL, __wt_evict_thread_chk,
-	     __wt_evict_thread_run, __wt_evict_thread_stop));
+	    session_flags, __wt_evict_thread_chk, __wt_evict_thread_run,
+	    __wt_evict_thread_stop));
 
 #if defined(HAVE_DIAGNOSTIC) || defined(HAVE_VERBOSE)
 	/*
@@ -1874,6 +1877,24 @@ __evict_walk_file(WT_SESSION_IMPL *session,
 		    F_ISSET(session->dhandle, WT_DHANDLE_DEAD))
 			goto fast;
 
+		/*
+		 * If application threads are blocked waiting for eviction (so
+		 * we are going to consider lookaside), and the only thing
+		 * preventing a clean page from being evicted is that it
+		 * contains historical data, mark it dirty so we can do
+		 * lookaside eviction.
+		 */
+		if (F_ISSET(cache, WT_CACHE_EVICT_CLEAN_HARD |
+		    WT_CACHE_EVICT_DIRTY_HARD) &&
+		    !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE) &&
+		    !modified && page->modify != NULL &&
+		    !__wt_txn_visible_all(session, page->modify->rec_max_txn,
+		    WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp))) {
+			__wt_page_only_modify_set(session, page);
+			modified = true;
+			goto fast;
+		}
+
 		/* Skip clean pages if appropriate. */
 		if (!modified && !F_ISSET(cache, WT_CACHE_EVICT_CLEAN))
 			continue;
@@ -1905,14 +1926,19 @@ __evict_walk_file(WT_SESSION_IMPL *session,
 			goto fast;
 
 		/*
-		 * If the oldest transaction hasn't changed since the last time
-		 * this page was written, it's unlikely we can make progress.
-		 * Similarly, if the most recent update on the page is not yet
-		 * globally visible, eviction will fail.  These heuristics
-		 * attempt to avoid repeated attempts to evict the same page.
+		 * If there are active transaction and oldest transaction
+		 * hasn't changed since the last time this page was written,
+		 * it's unlikely we can make progress.  Similarly, if the most
+		 * recent update on the page is not yet globally visible,
+		 * eviction will fail.  This heuristic avoids repeated attempts
+		 * to evict the same page.
+		 *
+		 * We skip this for the lookaside table because updates there
+		 * can be evicted as soon as they are committed.
 		 */
 		mod = page->modify;
-		if (modified && txn_global->current != txn_global->oldest_id &&
+		if (modified && !F_ISSET(btree, WT_BTREE_LOOKASIDE) &&
+		    txn_global->current != txn_global->oldest_id &&
 		    (mod->last_eviction_id == __wt_txn_oldest_id(session) ||
 		    !__wt_txn_visible_all(session, mod->update_txn, NULL)))
 			continue;
@@ -2424,6 +2450,7 @@ static int
 __verbose_dump_cache_single(WT_SESSION_IMPL *session,
     uint64_t *total_bytesp, uint64_t *total_dirty_bytesp)
 {
+	WT_BTREE *btree;
 	WT_DATA_HANDLE *dhandle;
 	WT_PAGE *page;
 	WT_REF *next_walk;
@@ -2469,11 +2496,12 @@ __verbose_dump_cache_single(WT_SESSION_IMPL *session,
 	}
 
 	dhandle = session->dhandle;
-	if (dhandle->checkpoint == NULL)
-		WT_RET(__wt_msg(session, "%s(<live>):", dhandle->name));
-	else
-		WT_RET(__wt_msg(session, "%s(checkpoint=%s):",
-		    dhandle->name, dhandle->checkpoint));
+	btree = dhandle->handle;
+	WT_RET(__wt_msg(session, "%s(%s%s)%s%s:",
+	    dhandle->name, dhandle->checkpoint != NULL ? "checkpoint=" : "",
+	    dhandle->checkpoint != NULL ? dhandle->checkpoint : "<live>",
+	    btree->evict_disabled != 0 ?  "eviction disabled" : "",
+	    btree->evict_disabled_open ? " at open" : ""));
 	if (intl_pages != 0)
 		WT_RET(__wt_msg(session,
 		    "internal: "
diff --git a/src/third_party/wiredtiger/src/evict/evict_page.c b/src/third_party/wiredtiger/src/evict/evict_page.c
index ada1c39ddcf..7536e3593e8 100644
--- a/src/third_party/wiredtiger/src/evict/evict_page.c
+++ b/src/third_party/wiredtiger/src/evict/evict_page.c
@@ -10,7 +10,7 @@
 
 static int __evict_page_clean_update(WT_SESSION_IMPL *, WT_REF *, bool);
 static int __evict_page_dirty_update(WT_SESSION_IMPL *, WT_REF *, bool);
-static int __evict_review(WT_SESSION_IMPL *, WT_REF *, uint32_t *, bool);
+static int __evict_review(WT_SESSION_IMPL *, WT_REF *, bool, uint32_t *);
 
 /*
  * __evict_exclusive_clear --
@@ -127,9 +127,6 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 
 	conn = S2C(session);
 
-	/* Checkpoints should never do eviction. */
-	WT_ASSERT(session, !WT_SESSION_IS_CHECKPOINT(session));
-
 	/* Enter the eviction generation. */
 	__wt_session_gen_enter(session, WT_GEN_EVICT);
 
@@ -146,13 +143,13 @@ __wt_evict(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 	 * to make this check for clean pages, too: while unlikely eviction
 	 * would choose an internal page with children, it's not disallowed.
 	 */
-	WT_ERR(__evict_review(session, ref, &flags, closing));
+	WT_ERR(__evict_review(session, ref, closing, &flags));
 
 	/*
 	 * If there was an in-memory split, the tree has been left in the state
 	 * we want: there is nothing more to do.
 	 */
-	if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
+	if (LF_ISSET(WT_REC_INMEM_SPLIT))
 		goto done;
 
 	/* Count evictions of internal pages during normal operation. */
@@ -312,9 +309,6 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		 * of the page, if we're forced to "read" into that namespace,
 		 * we'll instantiate a new page instead of trying to read from
 		 * the backing store.
-		 *
-		 * Publish: a barrier to ensure the structure fields are set
-		 * before the state change makes the page available to readers.
 		 */
 		__wt_ref_out(session, ref);
 		ref->addr = NULL;
@@ -353,19 +347,37 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, bool closing)
 		 * Publish: a barrier to ensure the structure fields are set
 		 * before the state change makes the page available to readers.
 		 */
-		WT_RET(__wt_calloc_one(session, &addr));
-		*addr = mod->mod_replace;
-		mod->mod_replace.addr = NULL;
-		mod->mod_replace.size = 0;
-		ref->addr = addr;
+		if (mod->mod_replace.addr == NULL)
+			ref->addr = NULL;
+		else {
+			WT_RET(__wt_calloc_one(session, &addr));
+			*addr = mod->mod_replace;
+			mod->mod_replace.addr = NULL;
+			mod->mod_replace.size = 0;
+			ref->addr = addr;
+		}
 
 		/*
 		 * Eviction wants to keep this page if we have a disk image,
 		 * re-instantiate the page in memory, else discard the page.
 		 */
 		if (mod->mod_disk_image == NULL) {
-			__wt_ref_out(session, ref);
-			WT_PUBLISH(ref->state, WT_REF_DISK);
+			if (mod->mod_replace_las_pageid != 0) {
+				WT_RET(
+				    __wt_calloc_one(session, &ref->page_las));
+				ref->page_las->las_pageid =
+				    mod->mod_replace_las_pageid;
+#ifdef HAVE_TIMESTAMPS
+				__wt_timestamp_set(
+				    &ref->page_las->min_timestamp,
+				    &mod->mod_replace_las_min_timestamp);
+#endif
+				__wt_ref_out(session, ref);
+				WT_PUBLISH(ref->state, WT_REF_LOOKASIDE);
+			} else {
+				__wt_ref_out(session, ref);
+				WT_PUBLISH(ref->state, WT_REF_DISK);
+			}
 		} else {
 			/*
 			 * The split code works with WT_MULTI structures, build
@@ -413,7 +425,7 @@ __evict_child_check(WT_SESSION_IMPL *session, WT_REF *parent)
  */
 static int
 __evict_review(
-    WT_SESSION_IMPL *session, WT_REF *ref, uint32_t *flagsp, bool closing)
+    WT_SESSION_IMPL *session, WT_REF *ref, bool closing, uint32_t *flagsp)
 {
 	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
@@ -423,7 +435,9 @@ __evict_review(
 	bool lookaside_retry, *lookaside_retryp, modified;
 
 	conn = S2C(session);
-	flags = WT_EVICTING;
+	flags = WT_REC_EVICT;
+	if (!WT_SESSION_IS_CHECKPOINT(session))
+		LF_SET(WT_REC_VISIBLE_ALL);
 	*flagsp = flags;
 
 	/*
@@ -502,7 +516,7 @@ __evict_review(
 		 * the page stays in memory and the tree is left in the desired
 		 * state: avoid the usual cleanup.
 		 */
-		if (LF_ISSET(WT_EVICT_INMEM_SPLIT))
+		if (LF_ISSET(WT_REC_INMEM_SPLIT))
 			return (__wt_split_insert(session, ref));
 	}
 
@@ -545,22 +559,27 @@ __evict_review(
 	lookaside_retryp = NULL;
 
 	if (closing)
-		LF_SET(WT_VISIBILITY_ERR);
-	else if (!WT_PAGE_IS_INTERNAL(page)) {
+		LF_SET(WT_REC_VISIBILITY_ERR);
+	else if (!WT_PAGE_IS_INTERNAL(page) &&
+	    !F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE)) {
 		if (F_ISSET(conn, WT_CONN_IN_MEMORY))
-			LF_SET(WT_EVICT_IN_MEMORY |
-			    WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE);
+			LF_SET(WT_REC_IN_MEMORY |
+			    WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
 		else {
-			LF_SET(WT_EVICT_UPDATE_RESTORE);
+			if (!WT_SESSION_IS_CHECKPOINT(session)) {
+				LF_SET(WT_REC_UPDATE_RESTORE);
 
-			if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
-				LF_SET(WT_EVICT_SCRUB);
+				if (F_ISSET(cache, WT_CACHE_EVICT_SCRUB))
+					LF_SET(WT_REC_SCRUB);
+			}
 
 			/*
 			 * Check if reconciliation suggests trying the
 			 * lookaside table.
 			 */
-			lookaside_retryp = &lookaside_retry;
+			if (__wt_cache_aggressive(session) &&
+			    !F_ISSET(conn, WT_CONN_EVICTION_NO_LOOKASIDE))
+				lookaside_retryp = &lookaside_retry;
 		}
 	}
 
@@ -574,9 +593,9 @@ __evict_review(
 	 * table, allowing the eviction of pages we'd otherwise have to retain
 	 * in cache to support older readers.
 	 */
-	if (ret == EBUSY && lookaside_retry && __wt_cache_stuck(session)) {
-		LF_CLR(WT_EVICT_SCRUB | WT_EVICT_UPDATE_RESTORE);
-		LF_SET(WT_EVICT_LOOKASIDE);
+	if (ret == EBUSY && lookaside_retry) {
+		LF_CLR(WT_REC_SCRUB | WT_REC_UPDATE_RESTORE);
+		LF_SET(WT_REC_LOOKASIDE);
 		ret = __wt_reconcile(session, ref, NULL, flags, NULL);
 	}
 
@@ -584,6 +603,18 @@ __evict_review(
 	WT_RET(ret);
 
 	/*
+	 * If attempting eviction in service of a checkpoint, we may
+	 * successfully reconcile but then find that there are updates on the
+	 * page too new to evict.  Give up in that case: checkpoint will
+	 * reconcile the page normally.
+	 */
+	if (WT_SESSION_IS_CHECKPOINT(session) && !__wt_page_is_modified(page) &&
+	    !LF_ISSET(WT_REC_LOOKASIDE) &&
+	    !__wt_txn_visible_all(session, page->modify->rec_max_txn,
+	    WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp)))
+		return (EBUSY);
+
+	/*
 	 * Success: assert the page is clean or reconciliation was configured
 	 * for update/restore. If the page is clean, assert that reconciliation
 	 * was configured for a lookaside table, or it's not a durable object
@@ -591,10 +622,10 @@ __evict_review(
 	 * visible.
 	 */
 	WT_ASSERT(session,
-	    !__wt_page_is_modified(page) || LF_ISSET(WT_EVICT_UPDATE_RESTORE));
+	    !__wt_page_is_modified(page) || LF_ISSET(WT_REC_UPDATE_RESTORE));
 	WT_ASSERT(session,
 	    __wt_page_is_modified(page) ||
-	    LF_ISSET(WT_EVICT_LOOKASIDE) ||
+	    LF_ISSET(WT_REC_LOOKASIDE) ||
 	    F_ISSET(S2BT(session), WT_BTREE_LOOKASIDE) ||
 	    __wt_txn_visible_all(session, page->modify->rec_max_txn,
 	    WT_TIMESTAMP_NULL(&page->modify->rec_max_timestamp)));
diff --git a/src/third_party/wiredtiger/src/include/api.h b/src/third_party/wiredtiger/src/include/api.h
index 60ed31b64e8..3eb951f81ac 100644
--- a/src/third_party/wiredtiger/src/include/api.h
+++ b/src/third_party/wiredtiger/src/include/api.h
@@ -39,9 +39,15 @@
 } while (0)
 
 /* An API call wrapped in a transaction if necessary. */
+#ifdef HAVE_TIMESTAMPS
+#define	WT_TXN_TIMESTAMP_FLAG_CHECK(s) __wt_txn_timestamp_flags((s))
+#else
+#define	WT_TXN_TIMESTAMP_FLAG_CHECK(s)
+#endif
 #define	TXN_API_CALL(s, h, n, bt, config, cfg) do {			\
 	bool __autotxn = false;						\
 	API_CALL(s, h, n, bt, config, cfg);				\
+	WT_TXN_TIMESTAMP_FLAG_CHECK(s);					\
 	__autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
 	if (__autotxn)							\
 		F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT)
@@ -50,6 +56,7 @@
 #define	TXN_API_CALL_NOCONF(s, h, n, dh) do {				\
 	bool __autotxn = false;						\
 	API_CALL_NOCONF(s, h, n, dh);					\
+	WT_TXN_TIMESTAMP_FLAG_CHECK(s);					\
 	__autotxn = !F_ISSET(&(s)->txn, WT_TXN_AUTOCOMMIT | WT_TXN_RUNNING);\
 	if (__autotxn)							\
 		F_SET(&(s)->txn, WT_TXN_AUTOCOMMIT)
diff --git a/src/third_party/wiredtiger/src/include/btmem.h b/src/third_party/wiredtiger/src/include/btmem.h
index f0d810281c2..486ab7562a1 100644
--- a/src/third_party/wiredtiger/src/include/btmem.h
+++ b/src/third_party/wiredtiger/src/include/btmem.h
@@ -165,13 +165,13 @@ struct __wt_ovfl_reuse {
  * Lookaside table support: when a page is being reconciled for eviction and has
  * updates that might be required by earlier readers in the system, the updates
  * are written into a lookaside table, and restored as necessary if the page is
- * read. The key is a unique marker for the page (a file ID plus an address),
- * a counter (used to ensure the update records remain in the original order),
- * the on-page item's transaction ID and timestamp (so we can discard any
- * update records from the lookaside table once the on-page item's transaction
- * is globally visible), and the page key (byte-string for row-store, record
- * number for column-store).  The value is the WT_UPDATE structure's
- * transaction ID, update size and value.
+ * read.
+ *
+ * The key is a unique marker for the page (a file ID plus a page ID), a
+ * counter (used to ensure the update records remain in the original order),
+ * and the record's key (byte-string for row-store, record number for
+ * column-store).  The value is the WT_UPDATE structure's transaction ID,
+ * timestamp, update type and value.
  *
  * As the key for the lookaside table is different for row- and column-store, we
  * store both key types in a WT_ITEM, building/parsing them in the code, because
@@ -182,7 +182,7 @@ struct __wt_ovfl_reuse {
  * the row-store key is relatively large.
  */
 #define	WT_LAS_FORMAT							\
-    "key_format=" WT_UNCHECKED_STRING(IuQQuu)				\
+    "key_format=" WT_UNCHECKED_STRING(IQQu)				\
     ",value_format=" WT_UNCHECKED_STRING(QuBu)
 
 /*
@@ -239,11 +239,19 @@ struct __wt_page_modify {
 		 * re-instantiate the page in memory.
 		 */
 		void	*disk_image;
+
+		/* The page has lookaside entries. */
+		uint64_t las_pageid;
+		WT_DECL_TIMESTAMP(las_min_timestamp)
 	} r;
 #undef	mod_replace
 #define	mod_replace	u1.r.replace
 #undef	mod_disk_image
 #define	mod_disk_image	u1.r.disk_image
+#undef	mod_replace_las_pageid
+#define	mod_replace_las_pageid	u1.r.las_pageid
+#undef	mod_replace_las_min_timestamp
+#define	mod_replace_las_min_timestamp	u1.r.las_min_timestamp
 
 	struct {			/* Multiple replacement blocks */
 	struct __wt_multi {
@@ -274,8 +282,7 @@ struct __wt_page_modify {
 		struct __wt_save_upd {
 			WT_INSERT *ins;		/* Insert list reference */
 			WT_ROW	  *ripcip;	/* Original on-page reference */
-			uint64_t   onpage_txn;
-			WT_DECL_TIMESTAMP(onpage_timestamp)
+			WT_UPDATE *onpage_upd;
 		} *supd;
 		uint32_t supd_entries;
 
@@ -289,6 +296,9 @@ struct __wt_page_modify {
 		WT_ADDR	 addr;
 		uint32_t size;
 		uint32_t checksum;
+
+		uint64_t las_pageid;
+		WT_DECL_TIMESTAMP(las_min_timestamp)
 	} *multi;
 	uint32_t multi_entries;		/* Multiple blocks element count */
 	} m;
@@ -659,6 +669,10 @@ struct __wt_page {
  *	thread that set the page to WT_REF_LOCKED has exclusive access, no
  *	other thread may use the WT_REF until the state is changed.
  *
+ * WT_REF_LOOKASIDE:
+ *	The page is on disk (as per WT_REF_DISK) and has entries in the
+ *	lookaside table that must be applied before the page can be read.
+ *
  * WT_REF_MEM:
  *	Set by a reading thread once the page has been read from disk; the page
  *	is in the cache and the page reference is OK.
@@ -696,10 +710,20 @@ struct __wt_page {
  *	Related information for fast-delete, on-disk pages.
  */
 struct __wt_page_deleted {
-	volatile uint64_t txnid;			/* Transaction ID */
+	volatile uint64_t txnid;		/* Transaction ID */
 	WT_DECL_TIMESTAMP(timestamp)
 
-	WT_UPDATE **update_list;	/* List of updates for abort */
+	WT_UPDATE **update_list;		/* List of updates for abort */
+};
+
+/*
+ * WT_PAGE_LOOKASIDE --
+ *	Related information for on-disk pages with lookaside entries.
+ */
+struct __wt_page_lookaside {
+	uint64_t las_pageid;			/* Page ID in lookaside */
+	WT_DECL_TIMESTAMP(min_timestamp)	/* Oldest timestamp in
+						   lookaside for the page */
 };
 
 /*
@@ -718,12 +742,13 @@ struct __wt_ref {
 	WT_PAGE * volatile home;	/* Reference page */
 	volatile uint32_t pindex_hint;	/* Reference page index hint */
 
-#define	WT_REF_DISK	0		/* Page is on disk */
-#define	WT_REF_DELETED	1		/* Page is on disk, but deleted */
-#define	WT_REF_LOCKED	2		/* Page locked for exclusive access */
-#define	WT_REF_MEM	3		/* Page is in cache and valid */
-#define	WT_REF_READING	4		/* Page being read */
-#define	WT_REF_SPLIT	5		/* Parent page split (WT_REF dead) */
+#define	WT_REF_DISK	 0		/* Page is on disk */
+#define	WT_REF_DELETED	 1		/* Page is on disk, but deleted */
+#define	WT_REF_LOCKED	 2		/* Page locked for exclusive access */
+#define	WT_REF_LOOKASIDE 3		/* Page is on disk with lookaside */
+#define	WT_REF_MEM	 4		/* Page is in cache and valid */
+#define	WT_REF_READING	 5		/* Page being read */
+#define	WT_REF_SPLIT	 6		/* Parent page split (WT_REF dead) */
 	volatile uint32_t state;	/* Page state */
 
 	/*
@@ -745,7 +770,10 @@ struct __wt_ref {
 #undef	ref_ikey
 #define	ref_ikey	key.ikey
 
-	WT_PAGE_DELETED	*page_del;	/* Deleted on-disk page information */
+	union {
+		WT_PAGE_DELETED	*page_del;	/* Deleted page information */
+		WT_PAGE_LOOKASIDE *page_las;	/* Lookaside information */
+	};
 };
 /*
  * WT_REF_SIZE is the expected structure size -- we verify the build to ensure
diff --git a/src/third_party/wiredtiger/src/include/btree.h b/src/third_party/wiredtiger/src/include/btree.h
index 8184d606022..7dc9b4a11a7 100644
--- a/src/third_party/wiredtiger/src/include/btree.h
+++ b/src/third_party/wiredtiger/src/include/btree.h
@@ -97,6 +97,12 @@ struct __wt_btree {
 	uint64_t maxmempage;		/* In-memory page max size */
 	uint64_t splitmempage;		/* In-memory split trigger size */
 
+#define	WT_ASSERT_COMMIT_TS_ALWAYS	0x0001
+#define	WT_ASSERT_COMMIT_TS_NEVER	0x0002
+#define	WT_ASSERT_READ_TS_ALWAYS	0x0004
+#define	WT_ASSERT_READ_TS_NEVER		0x0008
+	uint32_t assert_flags;		/* Debugging assertion information */
+
 	void *huffman_key;		/* Key huffman encoding */
 	void *huffman_value;		/* Value huffman encoding */
 
@@ -128,6 +134,7 @@ struct __wt_btree {
 	u_int	rec_multiblock_max;	/* Maximum blocks written for a page */
 
 	uint64_t last_recno;		/* Column-store last record number */
+	uint64_t las_pageid;		/* Lookaside table page ID counter */
 
 	WT_REF	root;			/* Root page reference */
 	bool	modified;		/* If the tree ever modified */
diff --git a/src/third_party/wiredtiger/src/include/btree.i b/src/third_party/wiredtiger/src/include/btree.i
index 4d6844e10cc..3b196dca673 100644
--- a/src/third_party/wiredtiger/src/include/btree.i
+++ b/src/third_party/wiredtiger/src/include/btree.i
@@ -1303,19 +1303,20 @@ __wt_page_can_evict(
 	 */
 	if (__wt_leaf_page_can_split(session, page)) {
 		if (evict_flagsp != NULL)
-			FLD_SET(*evict_flagsp, WT_EVICT_INMEM_SPLIT);
+			FLD_SET(*evict_flagsp, WT_REC_INMEM_SPLIT);
 		return (true);
 	}
 
 	modified = __wt_page_is_modified(page);
 
 	/*
-	 * If the file is being checkpointed, we can't evict dirty pages:
-	 * if we write a page and free the previous version of the page, that
+	 * If the file is being checkpointed, other threads can't evict dirty
+	 * pages: if a page is written and the previous version freed, that
 	 * previous version might be referenced by an internal page already
-	 * been written in the checkpoint, leaving the checkpoint inconsistent.
+	 * written in the checkpoint, leaving the checkpoint inconsistent.
 	 */
-	if (modified && btree->checkpointing != WT_CKPT_OFF) {
+	if (modified && btree->checkpointing != WT_CKPT_OFF &&
+	    !WT_SESSION_IS_CHECKPOINT(session)) {
 		WT_STAT_CONN_INCR(session, cache_eviction_checkpoint);
 		WT_STAT_DATA_INCR(session, cache_eviction_checkpoint);
 		return (false);
diff --git a/src/third_party/wiredtiger/src/include/connection.h b/src/third_party/wiredtiger/src/include/connection.h
index e5593357347..1d7b6142685 100644
--- a/src/third_party/wiredtiger/src/include/connection.h
+++ b/src/third_party/wiredtiger/src/include/connection.h
@@ -365,10 +365,6 @@ struct __wt_connection_impl {
 	 */
 	WT_SPINLOCK	 las_lock;	/* Lookaside table spinlock */
 	WT_SESSION_IMPL *las_session;	/* Lookaside table session */
-	bool		 las_written;	/* Lookaside table has been written */
-
-	WT_ITEM		 las_sweep_key;	/* Sweep server's saved key */
-	uint64_t	 las_record_cnt;/* Count of lookaside records */
 
 	/*
 	 * The "lookaside_activity" verbose messages are throttled to once per
diff --git a/src/third_party/wiredtiger/src/include/extern.h b/src/third_party/wiredtiger/src/include/extern.h
index 362acc71c0f..23897a05dfb 100644
--- a/src/third_party/wiredtiger/src/include/extern.h
+++ b/src/third_party/wiredtiger/src/include/extern.h
@@ -152,14 +152,13 @@ extern const char *__wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref,
 extern const char *__wt_addr_string(WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size, WT_ITEM *buf);
 extern int __wt_ovfl_read(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, WT_ITEM *store, bool *decoded) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_ovfl_discard_remove(WT_SESSION_IMPL *session, WT_PAGE *page);
-extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool checkpoint) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_ovfl_remove(WT_SESSION_IMPL *session, WT_PAGE *page, WT_CELL_UNPACK *unpack, bool evicting) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_ovfl_discard(WT_SESSION_IMPL *session, WT_CELL *cell) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type, uint32_t alloc_entries, bool alloc_refs, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, size_t memsize, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_page_inmem(WT_SESSION_IMPL *session, WT_REF *ref, const void *image, uint32_t flags, WT_PAGE **pagep) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_random_descent(WT_SESSION_IMPL *session, WT_REF **refp, bool eviction) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_btcur_next_random(WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, const uint8_t *addr, size_t addr_size) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int
 __wt_page_in_func(WT_SESSION_IMPL *session, WT_REF *ref, uint32_t flags
 #ifdef HAVE_DIAGNOSTIC
@@ -186,7 +185,7 @@ extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_
 extern int __wt_tree_walk_custom_skip( WT_SESSION_IMPL *session, WT_REF **refp, int (*skip_func)(WT_SESSION_IMPL *, WT_REF *, void *, bool *), void *func_cookie, uint32_t flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_tree_walk_skip( WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, const WT_ITEM *value, WT_UPDATE *upd_arg, u_int modify_type, bool exclusive) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
@@ -200,16 +199,14 @@ extern int __wt_update_alloc(WT_SESSION_IMPL *session, const WT_ITEM *value, WT_
 extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
 extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
 extern int __wt_search_insert(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_INSERT_HEAD *ins_head, WT_ITEM *srch_key) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert, bool restore) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
 extern int __wt_las_create(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern int __wt_las_destroy(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern void __wt_las_set_written(WT_SESSION_IMPL *session);
-extern bool __wt_las_is_written(WT_SESSION_IMPL *session);
 extern int __wt_las_cursor_open(WT_SESSION_IMPL *session, WT_CURSOR **cursorp) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern void __wt_las_cursor( WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t *session_flags);
 extern int __wt_las_cursor_close( 	WT_SESSION_IMPL *session, WT_CURSOR **cursorp, uint32_t session_flags) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
-extern int __wt_las_sweep(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
+extern int __wt_las_remove_block(WT_SESSION_IMPL *session, WT_CURSOR *cursor, uint32_t btree_id, uint64_t pageid) WT_GCC_FUNC_DECL_ATTRIBUTE((warn_unused_result));
 extern uint32_t __wt_checksum_sw(const void *chunk, size_t len) WT_GCC_FUNC_DECL_ATTRIBUTE((visibility("default")));
 extern void __wt_checksum_init(void);
 extern void __wt_config_initn( WT_SESSION_IMPL *session, WT_CONFIG *conf, const char *str, size_t len);
diff --git a/src/third_party/wiredtiger/src/include/flags.h b/src/third_party/wiredtiger/src/include/flags.h
index ccb32900dc4..65b4ce34752 100644
--- a/src/third_party/wiredtiger/src/include/flags.h
+++ b/src/third_party/wiredtiger/src/include/flags.h
@@ -2,32 +2,26 @@
  * DO NOT EDIT: automatically built by dist/flags.py.
  * flags section: BEGIN
  */
-#define	WT_CHECKPOINTING				0x00000001
 #define	WT_CONN_CACHE_POOL				0x00000001
 #define	WT_CONN_CKPT_SYNC				0x00000002
 #define	WT_CONN_CLOSING					0x00000004
 #define	WT_CONN_CLOSING_NO_MORE_OPENS			0x00000008
-#define	WT_CONN_EVICTION_RUN				0x00000010
-#define	WT_CONN_IN_MEMORY				0x00000020
-#define	WT_CONN_LAS_OPEN				0x00000040
-#define	WT_CONN_LEAK_MEMORY				0x00000080
-#define	WT_CONN_LSM_MERGE				0x00000100
-#define	WT_CONN_PANIC					0x00000200
-#define	WT_CONN_READONLY				0x00000400
-#define	WT_CONN_RECOVERING				0x00000800
-#define	WT_CONN_SERVER_ASYNC				0x00001000
-#define	WT_CONN_SERVER_CHECKPOINT			0x00002000
-#define	WT_CONN_SERVER_LOG				0x00004000
-#define	WT_CONN_SERVER_LSM				0x00008000
-#define	WT_CONN_SERVER_STATISTICS			0x00010000
-#define	WT_CONN_SERVER_SWEEP				0x00020000
-#define	WT_CONN_WAS_BACKUP				0x00040000
-#define	WT_EVICTING					0x00000002
-#define	WT_EVICT_INMEM_SPLIT				0x00000004
-#define	WT_EVICT_IN_MEMORY				0x00000008
-#define	WT_EVICT_LOOKASIDE				0x00000010
-#define	WT_EVICT_SCRUB					0x00000020
-#define	WT_EVICT_UPDATE_RESTORE				0x00000040
+#define	WT_CONN_EVICTION_NO_LOOKASIDE			0x00000010
+#define	WT_CONN_EVICTION_RUN				0x00000020
+#define	WT_CONN_IN_MEMORY				0x00000040
+#define	WT_CONN_LAS_OPEN				0x00000080
+#define	WT_CONN_LEAK_MEMORY				0x00000100
+#define	WT_CONN_LSM_MERGE				0x00000200
+#define	WT_CONN_PANIC					0x00000400
+#define	WT_CONN_READONLY				0x00000800
+#define	WT_CONN_RECOVERING				0x00001000
+#define	WT_CONN_SERVER_ASYNC				0x00002000
+#define	WT_CONN_SERVER_CHECKPOINT			0x00004000
+#define	WT_CONN_SERVER_LOG				0x00008000
+#define	WT_CONN_SERVER_LSM				0x00010000
+#define	WT_CONN_SERVER_STATISTICS			0x00020000
+#define	WT_CONN_SERVER_SWEEP				0x00040000
+#define	WT_CONN_WAS_BACKUP				0x00080000
 #define	WT_LOGSCAN_FIRST				0x00000001
 #define	WT_LOGSCAN_FROM_CKP				0x00000002
 #define	WT_LOGSCAN_ONE					0x00000004
@@ -38,16 +32,26 @@
 #define	WT_LOG_FSYNC					0x00000008
 #define	WT_LOG_SYNC_ENABLED				0x00000010
 #define	WT_READ_CACHE					0x00000001
-#define	WT_READ_NOTFOUND_OK				0x00000002
-#define	WT_READ_NO_EMPTY				0x00000004
-#define	WT_READ_NO_EVICT				0x00000008
-#define	WT_READ_NO_GEN					0x00000010
-#define	WT_READ_NO_WAIT					0x00000020
-#define	WT_READ_PREV					0x00000040
-#define	WT_READ_RESTART_OK				0x00000080
-#define	WT_READ_SKIP_INTL				0x00000100
-#define	WT_READ_TRUNCATE				0x00000200
-#define	WT_READ_WONT_NEED				0x00000400
+#define	WT_READ_LOOKASIDE				0x00000002
+#define	WT_READ_NOTFOUND_OK				0x00000004
+#define	WT_READ_NO_EMPTY				0x00000008
+#define	WT_READ_NO_EVICT				0x00000010
+#define	WT_READ_NO_GEN					0x00000020
+#define	WT_READ_NO_WAIT					0x00000040
+#define	WT_READ_PREV					0x00000080
+#define	WT_READ_RESTART_OK				0x00000100
+#define	WT_READ_SKIP_INTL				0x00000200
+#define	WT_READ_TRUNCATE				0x00000400
+#define	WT_READ_WONT_NEED				0x00000800
+#define	WT_REC_CHECKPOINT				0x00000001
+#define	WT_REC_EVICT					0x00000002
+#define	WT_REC_INMEM_SPLIT				0x00000004
+#define	WT_REC_IN_MEMORY				0x00000008
+#define	WT_REC_LOOKASIDE				0x00000010
+#define	WT_REC_SCRUB					0x00000020
+#define	WT_REC_UPDATE_RESTORE				0x00000040
+#define	WT_REC_VISIBILITY_ERR				0x00000080
+#define	WT_REC_VISIBLE_ALL				0x00000100
 #define	WT_SESSION_CAN_WAIT				0x00000001
 #define	WT_SESSION_INTERNAL				0x00000002
 #define	WT_SESSION_LOCKED_CHECKPOINT			0x00000004
@@ -118,7 +122,6 @@
 #define	WT_VERB_VERIFY					0x10000000
 #define	WT_VERB_VERSION					0x20000000
 #define	WT_VERB_WRITE					0x40000000
-#define	WT_VISIBILITY_ERR				0x00000080
 /*
  * flags section: END
  * DO NOT EDIT: automatically built by dist/flags.py.
diff --git a/src/third_party/wiredtiger/src/include/mutex.i b/src/third_party/wiredtiger/src/include/mutex.i
index 5b14bb24730..871ccf63be8 100644
--- a/src/third_party/wiredtiger/src/include/mutex.i
+++ b/src/third_party/wiredtiger/src/include/mutex.i
@@ -113,11 +113,15 @@ static inline int
 __wt_spin_init(WT_SESSION_IMPL *session, WT_SPINLOCK *t, const char *name)
 {
 #if SPINLOCK_TYPE == SPINLOCK_PTHREAD_MUTEX_ADAPTIVE
+	WT_DECL_RET;
 	pthread_mutexattr_t attr;
 
 	WT_RET(pthread_mutexattr_init(&attr));
-	WT_RET(pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP));
-	WT_RET(pthread_mutex_init(&t->lock, &attr));
+	ret = pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ADAPTIVE_NP);
+	if (ret == 0)
+		ret = pthread_mutex_init(&t->lock, &attr);
+	WT_TRET(pthread_mutexattr_destroy(&attr));
+	WT_RET(ret);
 #else
 	WT_RET(pthread_mutex_init(&t->lock, NULL));
 #endif
diff --git a/src/third_party/wiredtiger/src/include/schema.h b/src/third_party/wiredtiger/src/include/schema.h
index 9ab4c12f0d0..bae5fc8cc04 100644
--- a/src/third_party/wiredtiger/src/include/schema.h
+++ b/src/third_party/wiredtiger/src/include/schema.h
@@ -296,7 +296,9 @@ struct __wt_table {
 		F_CLR(session, WT_SESSION_LOCKED_CHECKPOINT);		\
 		__wt_spin_unlock(session, &__conn->checkpoint_lock);	\
 	}								\
+	__wt_yield();							\
 	op;								\
+	__wt_yield();							\
 	if (__checkpoint_locked) {					\
 		__wt_spin_lock(session, &__conn->checkpoint_lock);	\
 		F_SET(session, WT_SESSION_LOCKED_CHECKPOINT);		\
diff --git a/src/third_party/wiredtiger/src/include/session.h b/src/third_party/wiredtiger/src/include/session.h
index bd69cc36405..bea436e05e2 100644
--- a/src/third_party/wiredtiger/src/include/session.h
+++ b/src/third_party/wiredtiger/src/include/session.h
@@ -77,9 +77,6 @@ struct __wt_session_impl {
 	enum { WT_COMPACT_NONE=0,
 	    WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
 
-	/*
-	 * Lookaside table cursor, sweep and eviction worker threads only.
-	 */
 	WT_CURSOR	*las_cursor;	/* Lookaside table cursor */
 
 	WT_CURSOR *meta_cursor;		/* Metadata file */
diff --git a/src/third_party/wiredtiger/src/include/stat.h b/src/third_party/wiredtiger/src/include/stat.h
index c7110c245c7..922b211bec4 100644
--- a/src/third_party/wiredtiger/src/include/stat.h
+++ b/src/third_party/wiredtiger/src/include/stat.h
@@ -341,6 +341,7 @@ struct __wt_connection_stats {
 	int64_t cache_eviction_internal;
 	int64_t cache_eviction_split_internal;
 	int64_t cache_eviction_split_leaf;
+	int64_t cache_lookaside_entries;
 	int64_t cache_lookaside_insert;
 	int64_t cache_lookaside_remove;
 	int64_t cache_bytes_max;
diff --git a/src/third_party/wiredtiger/src/include/thread_group.h b/src/third_party/wiredtiger/src/include/thread_group.h
index 7375f9dfd87..97eda6ab674 100644
--- a/src/third_party/wiredtiger/src/include/thread_group.h
+++ b/src/third_party/wiredtiger/src/include/thread_group.h
@@ -23,8 +23,9 @@ struct __wt_thread {
 	 */
 #define	WT_THREAD_ACTIVE	0x01	/* thread is active or paused */
 #define	WT_THREAD_CAN_WAIT	0x02	/* WT_SESSION_CAN_WAIT */
-#define	WT_THREAD_PANIC_FAIL	0x04	/* panic if the thread fails */
-#define	WT_THREAD_RUN		0x08	/* thread is running */
+#define	WT_THREAD_LOOKASIDE	0x04	/* open lookaside cursor */
+#define	WT_THREAD_PANIC_FAIL	0x08	/* panic if the thread fails */
+#define	WT_THREAD_RUN		0x10	/* thread is running */
 	uint32_t flags;
 
 	/*
diff --git a/src/third_party/wiredtiger/src/include/txn.h b/src/third_party/wiredtiger/src/include/txn.h
index 69481409aaf..6b78c78a5cd 100644
--- a/src/third_party/wiredtiger/src/include/txn.h
+++ b/src/third_party/wiredtiger/src/include/txn.h
@@ -246,17 +246,19 @@ struct __wt_txn {
 	WT_ITEM		*ckpt_snapshot;
 	bool		full_ckpt;
 
-#define	WT_TXN_AUTOCOMMIT	0x001
-#define	WT_TXN_ERROR		0x002
-#define	WT_TXN_HAS_ID		0x004
-#define	WT_TXN_HAS_SNAPSHOT	0x008
-#define	WT_TXN_HAS_TS_COMMIT	0x010
-#define	WT_TXN_HAS_TS_READ	0x020
-#define	WT_TXN_NAMED_SNAPSHOT	0x040
-#define	WT_TXN_PUBLIC_TS_COMMIT	0x080
-#define	WT_TXN_PUBLIC_TS_READ	0x100
-#define	WT_TXN_READONLY		0x200
-#define	WT_TXN_RUNNING		0x400
-#define	WT_TXN_SYNC_SET		0x800
+#define	WT_TXN_AUTOCOMMIT	0x00001
+#define	WT_TXN_ERROR		0x00002
+#define	WT_TXN_HAS_ID		0x00004
+#define	WT_TXN_HAS_SNAPSHOT	0x00008
+#define	WT_TXN_HAS_TS_COMMIT	0x00010
+#define	WT_TXN_HAS_TS_READ	0x00020
+#define	WT_TXN_NAMED_SNAPSHOT	0x00040
+#define	WT_TXN_PUBLIC_TS_COMMIT	0x00080
+#define	WT_TXN_PUBLIC_TS_READ	0x00100
+#define	WT_TXN_READONLY		0x00200
+#define	WT_TXN_RUNNING		0x00400
+#define	WT_TXN_SYNC_SET		0x00800
+#define	WT_TXN_TS_COMMIT_ALWAYS	0x01000
+#define	WT_TXN_TS_COMMIT_NEVER	0x02000
 	uint32_t flags;
 };
diff --git a/src/third_party/wiredtiger/src/include/txn.i b/src/third_party/wiredtiger/src/include/txn.i
index e53ab6a69ee..26dcd01fe5e 100644
--- a/src/third_party/wiredtiger/src/include/txn.i
+++ b/src/third_party/wiredtiger/src/include/txn.i
@@ -10,6 +10,26 @@ static inline int __wt_txn_id_check(WT_SESSION_IMPL *session);
 static inline void __wt_txn_read_last(WT_SESSION_IMPL *session);
 
 #ifdef HAVE_TIMESTAMPS
+/*
+ * __wt_txn_timestamp_flags --
+ *	Set txn related timestamp flags.
+ */
+static inline void
+__wt_txn_timestamp_flags(WT_SESSION_IMPL *session)
+{
+	WT_BTREE *btree;
+
+	if (session->dhandle == NULL)
+		return;
+	btree = S2BT(session);
+	if (btree == NULL)
+		return;
+	if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_ALWAYS))
+		F_SET(&session->txn, WT_TXN_TS_COMMIT_ALWAYS);
+	if (FLD_ISSET(btree->assert_flags, WT_ASSERT_COMMIT_TS_NEVER))
+		F_SET(&session->txn, WT_TXN_TS_COMMIT_NEVER);
+}
+
 #if WT_TIMESTAMP_SIZE == 8
 #define	WT_WITH_TIMESTAMP_READLOCK(session, l, e)       e
 
@@ -635,6 +655,37 @@ __wt_txn_id_check(WT_SESSION_IMPL *session)
 }
 
 /*
+ * __wt_txn_search_check --
+ *	Check if the current transaction can search.
+ */
+static inline int
+__wt_txn_search_check(WT_SESSION_IMPL *session)
+{
+#ifdef  HAVE_TIMESTAMPS
+	WT_BTREE *btree;
+	WT_TXN *txn;
+
+	txn = &session->txn;
+	btree = S2BT(session);
+	/*
+	 * If the user says a table should always use a read timestamp,
+	 * verify this transaction has one.  Same if it should never have
+	 * a read timestamp.
+	 */
+	if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_ALWAYS) &&
+	    !F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
+		WT_RET_MSG(session, EINVAL, "read_timestamp required and "
+		    "none set on this transaction");
+	if (FLD_ISSET(btree->assert_flags, WT_ASSERT_READ_TS_NEVER) &&
+	    F_ISSET(txn, WT_TXN_PUBLIC_TS_READ))
+		WT_RET_MSG(session, EINVAL, "no read_timestamp required and "
+		    "timestamp set on this transaction");
+#endif
+	WT_UNUSED(session);
+	return (0);
+}
+
+/*
  * __wt_txn_update_check --
  *	Check if the current transaction can update an item.
  */
diff --git a/src/third_party/wiredtiger/src/include/wiredtiger.in b/src/third_party/wiredtiger/src/include/wiredtiger.in
index 1e526edaedc..830850f102b 100644
--- a/src/third_party/wiredtiger/src/include/wiredtiger.in
+++ b/src/third_party/wiredtiger/src/include/wiredtiger.in
@@ -4841,454 +4841,456 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_INTERNAL	1082
 /*! cache: leaf pages split during eviction */
 #define	WT_STAT_CONN_CACHE_EVICTION_SPLIT_LEAF		1083
+/*! cache: lookaside table entries */
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_ENTRIES		1084
 /*! cache: lookaside table insert calls */
-#define	WT_STAT_CONN_CACHE_LOOKASIDE_INSERT		1084
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_INSERT		1085
 /*! cache: lookaside table remove calls */
-#define	WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE		1085
+#define	WT_STAT_CONN_CACHE_LOOKASIDE_REMOVE		1086
 /*! cache: maximum bytes configured */
-#define	WT_STAT_CONN_CACHE_BYTES_MAX			1086
+#define	WT_STAT_CONN_CACHE_BYTES_MAX			1087
 /*! cache: maximum page size at eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE	1087
+#define	WT_STAT_CONN_CACHE_EVICTION_MAXIMUM_PAGE_SIZE	1088
 /*! cache: modified pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_DIRTY		1088
+#define	WT_STAT_CONN_CACHE_EVICTION_DIRTY		1089
 /*! cache: modified pages evicted by application threads */
-#define	WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY		1089
+#define	WT_STAT_CONN_CACHE_EVICTION_APP_DIRTY		1090
 /*! cache: overflow pages read into cache */
-#define	WT_STAT_CONN_CACHE_READ_OVERFLOW		1090
+#define	WT_STAT_CONN_CACHE_READ_OVERFLOW		1091
 /*! cache: page split during eviction deepened the tree */
-#define	WT_STAT_CONN_CACHE_EVICTION_DEEPEN		1091
+#define	WT_STAT_CONN_CACHE_EVICTION_DEEPEN		1092
 /*! cache: page written requiring lookaside records */
-#define	WT_STAT_CONN_CACHE_WRITE_LOOKASIDE		1092
+#define	WT_STAT_CONN_CACHE_WRITE_LOOKASIDE		1093
 /*! cache: pages currently held in the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_INUSE			1093
+#define	WT_STAT_CONN_CACHE_PAGES_INUSE			1094
 /*! cache: pages evicted because they exceeded the in-memory maximum count */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE		1094
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE		1095
 /*!
  * cache: pages evicted because they exceeded the in-memory maximum time
  * (usecs)
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME		1095
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_TIME		1096
 /*! cache: pages evicted because they had chains of deleted items count */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE	1096
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE	1097
 /*!
  * cache: pages evicted because they had chains of deleted items time
  * (usecs)
  */
-#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME	1097
+#define	WT_STAT_CONN_CACHE_EVICTION_FORCE_DELETE_TIME	1098
 /*! cache: pages evicted by application threads */
-#define	WT_STAT_CONN_CACHE_EVICTION_APP			1098
+#define	WT_STAT_CONN_CACHE_EVICTION_APP			1099
 /*! cache: pages queued for eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED	1099
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED	1100
 /*! cache: pages queued for urgent eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT	1100
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_URGENT	1101
 /*! cache: pages queued for urgent eviction during walk */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST	1101
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_QUEUED_OLDEST	1102
 /*! cache: pages read into cache */
-#define	WT_STAT_CONN_CACHE_READ				1102
+#define	WT_STAT_CONN_CACHE_READ				1103
 /*! cache: pages read into cache requiring lookaside entries */
-#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE		1103
+#define	WT_STAT_CONN_CACHE_READ_LOOKASIDE		1104
 /*! cache: pages requested from the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_REQUESTED		1104
+#define	WT_STAT_CONN_CACHE_PAGES_REQUESTED		1105
 /*! cache: pages seen by eviction walk */
-#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN		1105
+#define	WT_STAT_CONN_CACHE_EVICTION_PAGES_SEEN		1106
 /*! cache: pages selected for eviction unable to be evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_FAIL		1106
+#define	WT_STAT_CONN_CACHE_EVICTION_FAIL		1107
 /*! cache: pages walked for eviction */
-#define	WT_STAT_CONN_CACHE_EVICTION_WALK		1107
+#define	WT_STAT_CONN_CACHE_EVICTION_WALK		1108
 /*! cache: pages written from cache */
-#define	WT_STAT_CONN_CACHE_WRITE			1108
+#define	WT_STAT_CONN_CACHE_WRITE			1109
 /*! cache: pages written requiring in-memory restoration */
-#define	WT_STAT_CONN_CACHE_WRITE_RESTORE		1109
+#define	WT_STAT_CONN_CACHE_WRITE_RESTORE		1110
 /*! cache: percentage overhead */
-#define	WT_STAT_CONN_CACHE_OVERHEAD			1110
+#define	WT_STAT_CONN_CACHE_OVERHEAD			1111
 /*! cache: tracked bytes belonging to internal pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_INTERNAL		1111
+#define	WT_STAT_CONN_CACHE_BYTES_INTERNAL		1112
 /*! cache: tracked bytes belonging to leaf pages in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_LEAF			1112
+#define	WT_STAT_CONN_CACHE_BYTES_LEAF			1113
 /*! cache: tracked dirty bytes in the cache */
-#define	WT_STAT_CONN_CACHE_BYTES_DIRTY			1113
+#define	WT_STAT_CONN_CACHE_BYTES_DIRTY			1114
 /*! cache: tracked dirty pages in the cache */
-#define	WT_STAT_CONN_CACHE_PAGES_DIRTY			1114
+#define	WT_STAT_CONN_CACHE_PAGES_DIRTY			1115
 /*! cache: unmodified pages evicted */
-#define	WT_STAT_CONN_CACHE_EVICTION_CLEAN		1115
+#define	WT_STAT_CONN_CACHE_EVICTION_CLEAN		1116
 /*! connection: auto adjusting condition resets */
-#define	WT_STAT_CONN_COND_AUTO_WAIT_RESET		1116
+#define	WT_STAT_CONN_COND_AUTO_WAIT_RESET		1117
 /*! connection: auto adjusting condition wait calls */
-#define	WT_STAT_CONN_COND_AUTO_WAIT			1117
+#define	WT_STAT_CONN_COND_AUTO_WAIT			1118
 /*! connection: detected system time went backwards */
-#define	WT_STAT_CONN_TIME_TRAVEL			1118
+#define	WT_STAT_CONN_TIME_TRAVEL			1119
 /*! connection: files currently open */
-#define	WT_STAT_CONN_FILE_OPEN				1119
+#define	WT_STAT_CONN_FILE_OPEN				1120
 /*! connection: memory allocations */
-#define	WT_STAT_CONN_MEMORY_ALLOCATION			1120
+#define	WT_STAT_CONN_MEMORY_ALLOCATION			1121
 /*! connection: memory frees */
-#define	WT_STAT_CONN_MEMORY_FREE			1121
+#define	WT_STAT_CONN_MEMORY_FREE			1122
 /*! connection: memory re-allocations */
-#define	WT_STAT_CONN_MEMORY_GROW			1122
+#define	WT_STAT_CONN_MEMORY_GROW			1123
 /*! connection: pthread mutex condition wait calls */
-#define	WT_STAT_CONN_COND_WAIT				1123
+#define	WT_STAT_CONN_COND_WAIT				1124
 /*! connection: pthread mutex shared lock read-lock calls */
-#define	WT_STAT_CONN_RWLOCK_READ			1124
+#define	WT_STAT_CONN_RWLOCK_READ			1125
 /*! connection: pthread mutex shared lock write-lock calls */
-#define	WT_STAT_CONN_RWLOCK_WRITE			1125
+#define	WT_STAT_CONN_RWLOCK_WRITE			1126
 /*! connection: total fsync I/Os */
-#define	WT_STAT_CONN_FSYNC_IO				1126
+#define	WT_STAT_CONN_FSYNC_IO				1127
 /*! connection: total read I/Os */
-#define	WT_STAT_CONN_READ_IO				1127
+#define	WT_STAT_CONN_READ_IO				1128
 /*! connection: total write I/Os */
-#define	WT_STAT_CONN_WRITE_IO				1128
+#define	WT_STAT_CONN_WRITE_IO				1129
 /*! cursor: cursor create calls */
-#define	WT_STAT_CONN_CURSOR_CREATE			1129
+#define	WT_STAT_CONN_CURSOR_CREATE			1130
 /*! cursor: cursor insert calls */
-#define	WT_STAT_CONN_CURSOR_INSERT			1130
+#define	WT_STAT_CONN_CURSOR_INSERT			1131
 /*! cursor: cursor modify calls */
-#define	WT_STAT_CONN_CURSOR_MODIFY			1131
+#define	WT_STAT_CONN_CURSOR_MODIFY			1132
 /*! cursor: cursor next calls */
-#define	WT_STAT_CONN_CURSOR_NEXT			1132
+#define	WT_STAT_CONN_CURSOR_NEXT			1133
 /*! cursor: cursor prev calls */
-#define	WT_STAT_CONN_CURSOR_PREV			1133
+#define	WT_STAT_CONN_CURSOR_PREV			1134
 /*! cursor: cursor remove calls */
-#define	WT_STAT_CONN_CURSOR_REMOVE			1134
+#define	WT_STAT_CONN_CURSOR_REMOVE			1135
 /*! cursor: cursor reserve calls */
-#define	WT_STAT_CONN_CURSOR_RESERVE			1135
+#define	WT_STAT_CONN_CURSOR_RESERVE			1136
 /*! cursor: cursor reset calls */
-#define	WT_STAT_CONN_CURSOR_RESET			1136
+#define	WT_STAT_CONN_CURSOR_RESET			1137
 /*! cursor: cursor restarted searches */
-#define	WT_STAT_CONN_CURSOR_RESTART			1137
+#define	WT_STAT_CONN_CURSOR_RESTART			1138
 /*! cursor: cursor search calls */
-#define	WT_STAT_CONN_CURSOR_SEARCH			1138
+#define	WT_STAT_CONN_CURSOR_SEARCH			1139
 /*! cursor: cursor search near calls */
-#define	WT_STAT_CONN_CURSOR_SEARCH_NEAR			1139
+#define	WT_STAT_CONN_CURSOR_SEARCH_NEAR			1140
 /*! cursor: cursor update calls */
-#define	WT_STAT_CONN_CURSOR_UPDATE			1140
+#define	WT_STAT_CONN_CURSOR_UPDATE			1141
 /*! cursor: truncate calls */
-#define	WT_STAT_CONN_CURSOR_TRUNCATE			1141
+#define	WT_STAT_CONN_CURSOR_TRUNCATE			1142
 /*! data-handle: connection data handles currently active */
-#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1142
+#define	WT_STAT_CONN_DH_CONN_HANDLE_COUNT		1143
 /*! data-handle: connection sweep candidate became referenced */
-#define	WT_STAT_CONN_DH_SWEEP_REF			1143
+#define	WT_STAT_CONN_DH_SWEEP_REF			1144
 /*! data-handle: connection sweep dhandles closed */
-#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1144
+#define	WT_STAT_CONN_DH_SWEEP_CLOSE			1145
 /*! data-handle: connection sweep dhandles removed from hash list */
-#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1145
+#define	WT_STAT_CONN_DH_SWEEP_REMOVE			1146
 /*! data-handle: connection sweep time-of-death sets */
-#define	WT_STAT_CONN_DH_SWEEP_TOD			1146
+#define	WT_STAT_CONN_DH_SWEEP_TOD			1147
 /*! data-handle: connection sweeps */
-#define	WT_STAT_CONN_DH_SWEEPS				1147
+#define	WT_STAT_CONN_DH_SWEEPS				1148
 /*! data-handle: session dhandles swept */
-#define	WT_STAT_CONN_DH_SESSION_HANDLES			1148
+#define	WT_STAT_CONN_DH_SESSION_HANDLES			1149
 /*! data-handle: session sweep attempts */
-#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1149
+#define	WT_STAT_CONN_DH_SESSION_SWEEPS			1150
 /*! lock: checkpoint lock acquisitions */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_COUNT		1150
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_COUNT		1151
 /*! lock: checkpoint lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION	1151
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_APPLICATION	1152
 /*! lock: checkpoint lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL	1152
+#define	WT_STAT_CONN_LOCK_CHECKPOINT_WAIT_INTERNAL	1153
 /*!
  * lock: dhandle lock application thread time waiting for the dhandle
  * lock (usecs)
  */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION	1153
+#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_APPLICATION	1154
 /*!
  * lock: dhandle lock internal thread time waiting for the dhandle lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL		1154
+#define	WT_STAT_CONN_LOCK_DHANDLE_WAIT_INTERNAL		1155
 /*! lock: dhandle read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT		1155
+#define	WT_STAT_CONN_LOCK_DHANDLE_READ_COUNT		1156
 /*! lock: dhandle write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT		1156
+#define	WT_STAT_CONN_LOCK_DHANDLE_WRITE_COUNT		1157
 /*! lock: metadata lock acquisitions */
-#define	WT_STAT_CONN_LOCK_METADATA_COUNT		1157
+#define	WT_STAT_CONN_LOCK_METADATA_COUNT		1158
 /*! lock: metadata lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION	1158
+#define	WT_STAT_CONN_LOCK_METADATA_WAIT_APPLICATION	1159
 /*! lock: metadata lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL	1159
+#define	WT_STAT_CONN_LOCK_METADATA_WAIT_INTERNAL	1160
 /*! lock: schema lock acquisitions */
-#define	WT_STAT_CONN_LOCK_SCHEMA_COUNT			1160
+#define	WT_STAT_CONN_LOCK_SCHEMA_COUNT			1161
 /*! lock: schema lock application thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION	1161
+#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_APPLICATION	1162
 /*! lock: schema lock internal thread wait time (usecs) */
-#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL		1162
+#define	WT_STAT_CONN_LOCK_SCHEMA_WAIT_INTERNAL		1163
 /*!
  * lock: table lock application thread time waiting for the table lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION	1163
+#define	WT_STAT_CONN_LOCK_TABLE_WAIT_APPLICATION	1164
 /*!
  * lock: table lock internal thread time waiting for the table lock
  * (usecs)
  */
-#define	WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL		1164
+#define	WT_STAT_CONN_LOCK_TABLE_WAIT_INTERNAL		1165
 /*! lock: table read lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TABLE_READ_COUNT		1165
+#define	WT_STAT_CONN_LOCK_TABLE_READ_COUNT		1166
 /*! lock: table write lock acquisitions */
-#define	WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT		1166
+#define	WT_STAT_CONN_LOCK_TABLE_WRITE_COUNT		1167
 /*! log: busy returns attempting to switch slots */
-#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1167
+#define	WT_STAT_CONN_LOG_SLOT_SWITCH_BUSY		1168
 /*! log: force checkpoint calls slept */
-#define	WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP		1168
+#define	WT_STAT_CONN_LOG_FORCE_CKPT_SLEEP		1169
 /*! log: log bytes of payload data */
-#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1169
+#define	WT_STAT_CONN_LOG_BYTES_PAYLOAD			1170
 /*! log: log bytes written */
-#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1170
+#define	WT_STAT_CONN_LOG_BYTES_WRITTEN			1171
 /*! log: log files manually zero-filled */
-#define	WT_STAT_CONN_LOG_ZERO_FILLS			1171
+#define	WT_STAT_CONN_LOG_ZERO_FILLS			1172
 /*! log: log flush operations */
-#define	WT_STAT_CONN_LOG_FLUSH				1172
+#define	WT_STAT_CONN_LOG_FLUSH				1173
 /*! log: log force write operations */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE			1173
+#define	WT_STAT_CONN_LOG_FORCE_WRITE			1174
 /*! log: log force write operations skipped */
-#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1174
+#define	WT_STAT_CONN_LOG_FORCE_WRITE_SKIP		1175
 /*! log: log records compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1175
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITES		1176
 /*! log: log records not compressed */
-#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1176
+#define	WT_STAT_CONN_LOG_COMPRESS_WRITE_FAILS		1177
 /*! log: log records too small to compress */
-#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1177
+#define	WT_STAT_CONN_LOG_COMPRESS_SMALL			1178
 /*! log: log release advances write LSN */
-#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1178
+#define	WT_STAT_CONN_LOG_RELEASE_WRITE_LSN		1179
 /*! log: log scan operations */
-#define	WT_STAT_CONN_LOG_SCANS				1179
+#define	WT_STAT_CONN_LOG_SCANS				1180
 /*! log: log scan records requiring two reads */
-#define	WT_STAT_CONN_LOG_SCAN_REREADS			1180
+#define	WT_STAT_CONN_LOG_SCAN_REREADS			1181
 /*! log: log server thread advances write LSN */
-#define	WT_STAT_CONN_LOG_WRITE_LSN			1181
+#define	WT_STAT_CONN_LOG_WRITE_LSN			1182
 /*! log: log server thread write LSN walk skipped */
-#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1182
+#define	WT_STAT_CONN_LOG_WRITE_LSN_SKIP			1183
 /*! log: log sync operations */
-#define	WT_STAT_CONN_LOG_SYNC				1183
+#define	WT_STAT_CONN_LOG_SYNC				1184
 /*! log: log sync time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DURATION			1184
+#define	WT_STAT_CONN_LOG_SYNC_DURATION			1185
 /*! log: log sync_dir operations */
-#define	WT_STAT_CONN_LOG_SYNC_DIR			1185
+#define	WT_STAT_CONN_LOG_SYNC_DIR			1186
 /*! log: log sync_dir time duration (usecs) */
-#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1186
+#define	WT_STAT_CONN_LOG_SYNC_DIR_DURATION		1187
 /*! log: log write operations */
-#define	WT_STAT_CONN_LOG_WRITES				1187
+#define	WT_STAT_CONN_LOG_WRITES				1188
 /*! log: logging bytes consolidated */
-#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1188
+#define	WT_STAT_CONN_LOG_SLOT_CONSOLIDATED		1189
 /*! log: maximum log file size */
-#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1189
+#define	WT_STAT_CONN_LOG_MAX_FILESIZE			1190
 /*! log: number of pre-allocated log files to create */
-#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1190
+#define	WT_STAT_CONN_LOG_PREALLOC_MAX			1191
 /*! log: pre-allocated log files not ready and missed */
-#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1191
+#define	WT_STAT_CONN_LOG_PREALLOC_MISSED		1192
 /*! log: pre-allocated log files prepared */
-#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1192
+#define	WT_STAT_CONN_LOG_PREALLOC_FILES			1193
 /*! log: pre-allocated log files used */
-#define	WT_STAT_CONN_LOG_PREALLOC_USED			1193
+#define	WT_STAT_CONN_LOG_PREALLOC_USED			1194
 /*! log: records processed by log scan */
-#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1194
+#define	WT_STAT_CONN_LOG_SCAN_RECORDS			1195
 /*! log: slot close lost race */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSE_RACE		1195
+#define	WT_STAT_CONN_LOG_SLOT_CLOSE_RACE		1196
 /*! log: slot close unbuffered waits */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF		1196
+#define	WT_STAT_CONN_LOG_SLOT_CLOSE_UNBUF		1197
 /*! log: slot closures */
-#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1197
+#define	WT_STAT_CONN_LOG_SLOT_CLOSES			1198
 /*! log: slot join atomic update races */
-#define	WT_STAT_CONN_LOG_SLOT_RACES			1198
+#define	WT_STAT_CONN_LOG_SLOT_RACES			1199
 /*! log: slot join calls atomic updates raced */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_RACE		1199
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_RACE		1200
 /*! log: slot join calls did not yield */
-#define	WT_STAT_CONN_LOG_SLOT_IMMEDIATE			1200
+#define	WT_STAT_CONN_LOG_SLOT_IMMEDIATE			1201
 /*! log: slot join calls found active slot closed */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE		1201
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_CLOSE		1202
 /*! log: slot join calls slept */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP		1202
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_SLEEP		1203
 /*! log: slot join calls yielded */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD			1203
+#define	WT_STAT_CONN_LOG_SLOT_YIELD			1204
 /*! log: slot join found active slot closed */
-#define	WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED		1204
+#define	WT_STAT_CONN_LOG_SLOT_ACTIVE_CLOSED		1205
 /*! log: slot joins yield time (usecs) */
-#define	WT_STAT_CONN_LOG_SLOT_YIELD_DURATION		1205
+#define	WT_STAT_CONN_LOG_SLOT_YIELD_DURATION		1206
 /*! log: slot transitions unable to find free slot */
-#define	WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS		1206
+#define	WT_STAT_CONN_LOG_SLOT_NO_FREE_SLOTS		1207
 /*! log: slot unbuffered writes */
-#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1207
+#define	WT_STAT_CONN_LOG_SLOT_UNBUFFERED		1208
 /*! log: total in-memory size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1208
+#define	WT_STAT_CONN_LOG_COMPRESS_MEM			1209
 /*! log: total log buffer size */
-#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1209
+#define	WT_STAT_CONN_LOG_BUFFER_SIZE			1210
 /*! log: total size of compressed records */
-#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1210
+#define	WT_STAT_CONN_LOG_COMPRESS_LEN			1211
 /*! log: written slots coalesced */
-#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1211
+#define	WT_STAT_CONN_LOG_SLOT_COALESCED			1212
 /*! log: yields waiting for previous log file close */
-#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1212
+#define	WT_STAT_CONN_LOG_CLOSE_YIELDS			1213
 /*! reconciliation: fast-path pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1213
+#define	WT_STAT_CONN_REC_PAGE_DELETE_FAST		1214
 /*! reconciliation: page reconciliation calls */
-#define	WT_STAT_CONN_REC_PAGES				1214
+#define	WT_STAT_CONN_REC_PAGES				1215
 /*! reconciliation: page reconciliation calls for eviction */
-#define	WT_STAT_CONN_REC_PAGES_EVICTION			1215
+#define	WT_STAT_CONN_REC_PAGES_EVICTION			1216
 /*! reconciliation: pages deleted */
-#define	WT_STAT_CONN_REC_PAGE_DELETE			1216
+#define	WT_STAT_CONN_REC_PAGE_DELETE			1217
 /*! reconciliation: split bytes currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1217
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_BYTES		1218
 /*! reconciliation: split objects currently awaiting free */
-#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1218
+#define	WT_STAT_CONN_REC_SPLIT_STASHED_OBJECTS		1219
 /*! session: open cursor count */
-#define	WT_STAT_CONN_SESSION_CURSOR_OPEN		1219
+#define	WT_STAT_CONN_SESSION_CURSOR_OPEN		1220
 /*! session: open session count */
-#define	WT_STAT_CONN_SESSION_OPEN			1220
+#define	WT_STAT_CONN_SESSION_OPEN			1221
 /*! session: table alter failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL		1221
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_FAIL		1222
 /*! session: table alter successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS	1222
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SUCCESS	1223
 /*! session: table alter unchanged and skipped */
-#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP		1223
+#define	WT_STAT_CONN_SESSION_TABLE_ALTER_SKIP		1224
 /*! session: table compact failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1224
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_FAIL		1225
 /*! session: table compact successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1225
+#define	WT_STAT_CONN_SESSION_TABLE_COMPACT_SUCCESS	1226
 /*! session: table create failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1226
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_FAIL		1227
 /*! session: table create successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1227
+#define	WT_STAT_CONN_SESSION_TABLE_CREATE_SUCCESS	1228
 /*! session: table drop failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1228
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_FAIL		1229
 /*! session: table drop successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1229
+#define	WT_STAT_CONN_SESSION_TABLE_DROP_SUCCESS		1230
 /*! session: table rebalance failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL	1230
+#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_FAIL	1231
 /*! session: table rebalance successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS	1231
+#define	WT_STAT_CONN_SESSION_TABLE_REBALANCE_SUCCESS	1232
 /*! session: table rename failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1232
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_FAIL		1233
 /*! session: table rename successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1233
+#define	WT_STAT_CONN_SESSION_TABLE_RENAME_SUCCESS	1234
 /*! session: table salvage failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1234
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_FAIL		1235
 /*! session: table salvage successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1235
+#define	WT_STAT_CONN_SESSION_TABLE_SALVAGE_SUCCESS	1236
 /*! session: table truncate failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1236
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_FAIL	1237
 /*! session: table truncate successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1237
+#define	WT_STAT_CONN_SESSION_TABLE_TRUNCATE_SUCCESS	1238
 /*! session: table verify failed calls */
-#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1238
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_FAIL		1239
 /*! session: table verify successful calls */
-#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1239
+#define	WT_STAT_CONN_SESSION_TABLE_VERIFY_SUCCESS	1240
 /*! thread-state: active filesystem fsync calls */
-#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1240
+#define	WT_STAT_CONN_THREAD_FSYNC_ACTIVE		1241
 /*! thread-state: active filesystem read calls */
-#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1241
+#define	WT_STAT_CONN_THREAD_READ_ACTIVE			1242
 /*! thread-state: active filesystem write calls */
-#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1242
+#define	WT_STAT_CONN_THREAD_WRITE_ACTIVE		1243
 /*! thread-yield: application thread time evicting (usecs) */
-#define	WT_STAT_CONN_APPLICATION_EVICT_TIME		1243
+#define	WT_STAT_CONN_APPLICATION_EVICT_TIME		1244
 /*! thread-yield: application thread time waiting for cache (usecs) */
-#define	WT_STAT_CONN_APPLICATION_CACHE_TIME		1244
+#define	WT_STAT_CONN_APPLICATION_CACHE_TIME		1245
 /*!
  * thread-yield: connection close blocked waiting for transaction state
  * stabilization
  */
-#define	WT_STAT_CONN_TXN_RELEASE_BLOCKED		1245
+#define	WT_STAT_CONN_TXN_RELEASE_BLOCKED		1246
 /*! thread-yield: connection close yielded for lsm manager shutdown */
-#define	WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM		1246
+#define	WT_STAT_CONN_CONN_CLOSE_BLOCKED_LSM		1247
 /*! thread-yield: data handle lock yielded */
-#define	WT_STAT_CONN_DHANDLE_LOCK_BLOCKED		1247
+#define	WT_STAT_CONN_DHANDLE_LOCK_BLOCKED		1248
 /*!
  * thread-yield: get reference for page index and slot time sleeping
  * (usecs)
  */
-#define	WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED	1248
+#define	WT_STAT_CONN_PAGE_INDEX_SLOT_REF_BLOCKED	1249
 /*! thread-yield: log server sync yielded for log write */
-#define	WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED		1249
+#define	WT_STAT_CONN_LOG_SERVER_SYNC_BLOCKED		1250
 /*! thread-yield: page acquire busy blocked */
-#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1250
+#define	WT_STAT_CONN_PAGE_BUSY_BLOCKED			1251
 /*! thread-yield: page acquire eviction blocked */
-#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1251
+#define	WT_STAT_CONN_PAGE_FORCIBLE_EVICT_BLOCKED	1252
 /*! thread-yield: page acquire locked blocked */
-#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1252
+#define	WT_STAT_CONN_PAGE_LOCKED_BLOCKED		1253
 /*! thread-yield: page acquire read blocked */
-#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1253
+#define	WT_STAT_CONN_PAGE_READ_BLOCKED			1254
 /*! thread-yield: page acquire time sleeping (usecs) */
-#define	WT_STAT_CONN_PAGE_SLEEP				1254
+#define	WT_STAT_CONN_PAGE_SLEEP				1255
 /*!
  * thread-yield: page delete rollback time sleeping for state change
  * (usecs)
  */
-#define	WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED		1255
+#define	WT_STAT_CONN_PAGE_DEL_ROLLBACK_BLOCKED		1256
 /*! thread-yield: page reconciliation yielded due to child modification */
-#define	WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE		1256
+#define	WT_STAT_CONN_CHILD_MODIFY_BLOCKED_PAGE		1257
 /*!
  * thread-yield: tree descend one level yielded for split page index
  * update
  */
-#define	WT_STAT_CONN_TREE_DESCEND_BLOCKED		1257
+#define	WT_STAT_CONN_TREE_DESCEND_BLOCKED		1258
 /*! transaction: number of named snapshots created */
-#define	WT_STAT_CONN_TXN_SNAPSHOTS_CREATED		1258
+#define	WT_STAT_CONN_TXN_SNAPSHOTS_CREATED		1259
 /*! transaction: number of named snapshots dropped */
-#define	WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED		1259
+#define	WT_STAT_CONN_TXN_SNAPSHOTS_DROPPED		1260
 /*! transaction: transaction begins */
-#define	WT_STAT_CONN_TXN_BEGIN				1260
+#define	WT_STAT_CONN_TXN_BEGIN				1261
 /*! transaction: transaction checkpoint currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1261
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1262
 /*! transaction: transaction checkpoint generation */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1262
+#define	WT_STAT_CONN_TXN_CHECKPOINT_GENERATION		1263
 /*! transaction: transaction checkpoint max time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1263
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MAX		1264
 /*! transaction: transaction checkpoint min time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1264
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_MIN		1265
 /*! transaction: transaction checkpoint most recent time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1265
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_RECENT		1266
 /*! transaction: transaction checkpoint scrub dirty target */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1266
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TARGET	1267
 /*! transaction: transaction checkpoint scrub time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1267
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SCRUB_TIME		1268
 /*! transaction: transaction checkpoint total time (msecs) */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1268
+#define	WT_STAT_CONN_TXN_CHECKPOINT_TIME_TOTAL		1269
 /*! transaction: transaction checkpoints */
-#define	WT_STAT_CONN_TXN_CHECKPOINT			1269
+#define	WT_STAT_CONN_TXN_CHECKPOINT			1270
 /*!
  * transaction: transaction checkpoints skipped because database was
  * clean
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1270
+#define	WT_STAT_CONN_TXN_CHECKPOINT_SKIPPED		1271
 /*! transaction: transaction failures due to cache overflow */
-#define	WT_STAT_CONN_TXN_FAIL_CACHE			1271
+#define	WT_STAT_CONN_TXN_FAIL_CACHE			1272
 /*!
  * transaction: transaction fsync calls for checkpoint after allocating
  * the transaction ID
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1272
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST		1273
 /*!
  * transaction: transaction fsync duration for checkpoint after
  * allocating the transaction ID (usecs)
  */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1273
+#define	WT_STAT_CONN_TXN_CHECKPOINT_FSYNC_POST_DURATION	1274
 /*! transaction: transaction range of IDs currently pinned */
-#define	WT_STAT_CONN_TXN_PINNED_RANGE			1274
+#define	WT_STAT_CONN_TXN_PINNED_RANGE			1275
 /*! transaction: transaction range of IDs currently pinned by a checkpoint */
-#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1275
+#define	WT_STAT_CONN_TXN_PINNED_CHECKPOINT_RANGE	1276
 /*!
  * transaction: transaction range of IDs currently pinned by named
  * snapshots
  */
-#define	WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE		1276
+#define	WT_STAT_CONN_TXN_PINNED_SNAPSHOT_RANGE		1277
 /*! transaction: transaction sync calls */
-#define	WT_STAT_CONN_TXN_SYNC				1277
+#define	WT_STAT_CONN_TXN_SYNC				1278
 /*! transaction: transactions commit timestamp queue inserts to head */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD		1278
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_HEAD		1279
 /*! transaction: transactions commit timestamp queue inserts total */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS		1279
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_INSERTS		1280
 /*! transaction: transactions commit timestamp queue length */
-#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN		1280
+#define	WT_STAT_CONN_TXN_COMMIT_QUEUE_LEN		1281
 /*! transaction: transactions committed */
-#define	WT_STAT_CONN_TXN_COMMIT				1281
+#define	WT_STAT_CONN_TXN_COMMIT				1282
 /*! transaction: transactions read timestamp queue inserts to head */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_HEAD		1282
+#define	WT_STAT_CONN_TXN_READ_QUEUE_HEAD		1283
 /*! transaction: transactions read timestamp queue inserts total */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_INSERTS		1283
+#define	WT_STAT_CONN_TXN_READ_QUEUE_INSERTS		1284
 /*! transaction: transactions read timestamp queue length */
-#define	WT_STAT_CONN_TXN_READ_QUEUE_LEN			1284
+#define	WT_STAT_CONN_TXN_READ_QUEUE_LEN			1285
 /*! transaction: transactions rolled back */
-#define	WT_STAT_CONN_TXN_ROLLBACK			1285
+#define	WT_STAT_CONN_TXN_ROLLBACK			1286
 /*! transaction: update conflicts */
-#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1286
+#define	WT_STAT_CONN_TXN_UPDATE_CONFLICT		1287
 
 /*!
  * @}
diff --git a/src/third_party/wiredtiger/src/include/wt_internal.h b/src/third_party/wiredtiger/src/include/wt_internal.h
index 84617dfcab8..b25ed08e30f 100644
--- a/src/third_party/wiredtiger/src/include/wt_internal.h
+++ b/src/third_party/wiredtiger/src/include/wt_internal.h
@@ -258,6 +258,8 @@ struct __wt_page_header;
     typedef struct __wt_page_header WT_PAGE_HEADER;
 struct __wt_page_index;
     typedef struct __wt_page_index WT_PAGE_INDEX;
+struct __wt_page_lookaside;
+    typedef struct __wt_page_lookaside WT_PAGE_LOOKASIDE;
 struct __wt_page_modify;
     typedef struct __wt_page_modify WT_PAGE_MODIFY;
 struct __wt_process;
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_tree.c b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
index a42fbbe511b..95d025247a6 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_tree.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_tree.c
@@ -102,8 +102,6 @@ __lsm_tree_discard(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
 static void
 __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
 {
-	int i;
-
 	/*
 	 * Stop any new work units being added. The barrier is necessary
 	 * because we rely on the state change being visible before checking
@@ -118,8 +116,7 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
 	 * we know a user is holding a reference to the tree, so exclusive
 	 * access is not available.
 	 */
-	for (i = 0;
-	    lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1); ++i) {
+	while (lsm_tree->queue_ref > 0 || (final && lsm_tree->refcnt > 1)) {
 		/*
 		 * Remove any work units from the manager queues. Do this step
 		 * repeatedly in case a work unit was in the process of being
@@ -133,10 +130,8 @@ __lsm_tree_close(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, bool final)
 		 * other schema level operations will return EBUSY, even though
 		 * we're dropping the schema lock here.
 		 */
-		if (i % WT_THOUSAND == 0)
-			WT_WITHOUT_LOCKS(session,
-			    __wt_lsm_manager_clear_tree(session, lsm_tree));
-		__wt_yield();
+		WT_WITHOUT_LOCKS(session,
+		    __wt_lsm_manager_clear_tree(session, lsm_tree));
 	}
 }
 
diff --git a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
index 879913bccec..05e5fe5b07e 100644
--- a/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
+++ b/src/third_party/wiredtiger/src/lsm/lsm_work_unit.c
@@ -320,11 +320,12 @@ int
 __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
     WT_LSM_TREE *lsm_tree, WT_LSM_CHUNK *chunk)
 {
+	WT_BTREE *btree;
 	WT_DECL_RET;
 	WT_TXN_ISOLATION saved_isolation;
-	bool flush_set, release_btree;
+	bool flush_set, release_dhandle;
 
-	flush_set = release_btree = false;
+	flush_set = release_dhandle = false;
 
 	/*
 	 * If the chunk is already checkpointed, make sure it is also evicted.
@@ -374,7 +375,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
 	 * take a long time.
 	 */
 	WT_ERR(__wt_session_get_dhandle(session, chunk->uri, NULL, NULL, 0));
-	release_btree = true;
+	release_dhandle = true;
 
 	/*
 	 * Set read-uncommitted: we have already checked that all of the updates
@@ -407,9 +408,6 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
 	if (ret != 0)
 		WT_ERR_MSG(session, ret, "LSM checkpoint");
 
-	release_btree = false;
-	WT_ERR(__wt_session_release_dhandle(session));
-
 	/* Now the file is written, get the chunk size. */
 	WT_ERR(__wt_lsm_tree_set_chunk_size(session, chunk));
 
@@ -429,6 +427,19 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
 	if (ret != 0)
 		WT_ERR_MSG(session, ret, "LSM metadata write");
 
+	/*
+	 * Enable eviction on the live chunk so it doesn't block the cache.
+	 * Future reads should direct to the on-disk chunk anyway.
+	 */
+	btree = session->dhandle->handle;
+	if (btree->evict_disabled_open) {
+		btree->evict_disabled_open = false;
+		__wt_evict_file_exclusive_off(session);
+	}
+
+	release_dhandle = false;
+	WT_ERR(__wt_session_release_dhandle(session));
+
 	WT_PUBLISH(chunk->flushing, 0);
 	flush_set = false;
 
@@ -448,7 +459,7 @@ __wt_lsm_checkpoint_chunk(WT_SESSION_IMPL *session,
 
 err:	if (flush_set)
 		WT_PUBLISH(chunk->flushing, 0);
-	if (release_btree)
+	if (release_dhandle)
 		WT_TRET(__wt_session_release_dhandle(session));
 
 	return (ret);
diff --git a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
index 5d0295d94ce..533d2a0ab08 100644
--- a/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
+++ b/src/third_party/wiredtiger/src/os_posix/os_mtx_cond.c
@@ -26,8 +26,11 @@ __wt_cond_alloc(WT_SESSION_IMPL *session, const char *name, WT_CONDVAR **condp)
 	pthread_condattr_t condattr;
 
 	WT_ERR(pthread_condattr_init(&condattr));
-	WT_ERR(pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC));
-	WT_ERR(pthread_cond_init(&cond->cond, &condattr));
+	ret = pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
+	if (ret == 0)
+		ret = pthread_cond_init(&cond->cond, &condattr);
+	WT_TRET(pthread_condattr_destroy(&condattr));
+	WT_ERR(ret);
 	}
 #else
 	WT_ERR(pthread_cond_init(&cond->cond, NULL));
diff --git a/src/third_party/wiredtiger/src/reconcile/rec_write.c b/src/third_party/wiredtiger/src/reconcile/rec_write.c
index 4cb5ae12e5b..af43a56f877 100644
--- a/src/third_party/wiredtiger/src/reconcile/rec_write.c
+++ b/src/third_party/wiredtiger/src/reconcile/rec_write.c
@@ -45,13 +45,13 @@ typedef struct {
 	uint64_t last_running;
 	WT_DECL_TIMESTAMP(stable_timestamp)
 
-	/* Track the page's maximum transaction. */
+	/* Track the page's min/maximum transactions. */
 	uint64_t max_txn;
 	WT_DECL_TIMESTAMP(max_timestamp)
+	WT_DECL_TIMESTAMP(min_saved_timestamp)
 
-	uint64_t update_mem_all;	/* Total update memory size */
-	uint64_t update_mem_saved;	/* Saved update memory size */
-	uint64_t update_mem_uncommitted;/* Uncommitted update memory size */
+	bool update_uncommitted;	/* An update was uncommitted */
+	bool update_used;		/* An update could be used */
 
 	/*
 	 * When we can't mark the page clean (for example, checkpoint found some
@@ -154,8 +154,6 @@ typedef struct {
 	 */
 	struct __rec_chunk {
 		/*
-		 * Current and minimum boundaries.
-		 *
 		 * The recno and entries fields are the starting record number
 		 * of the split chunk (for column-store splits), and the number
 		 * of entries in the split chunk.
@@ -193,8 +191,8 @@ typedef struct {
 	size_t	 min_space_avail;
 
 	/*
-	 * Saved update list, supporting the WT_EVICT_UPDATE_RESTORE and
-	 * WT_EVICT_LOOKASIDE configurations. While reviewing updates for each
+	 * Saved update list, supporting the WT_REC_UPDATE_RESTORE and
+	 * WT_REC_LOOKASIDE configurations. While reviewing updates for each
 	 * page, we save WT_UPDATE lists here, and then move them to per-block
 	 * areas as the blocks are defined.
 	 */
@@ -220,7 +218,14 @@ typedef struct {
 	 * There's some trickiness here, see the code for comments on how
 	 * these fields work.
 	 */
-	bool	 cell_zero;		/* Row-store internal page 0th key */
+	bool	cell_zero;		/* Row-store internal page 0th key */
+
+	/*
+	 * We calculate checksums to find previously written identical blocks,
+	 * but once a match fails during an eviction, there's no point trying
+	 * again.
+	 */
+	bool	evict_matching_checksum_failed;
 
 	/*
 	 * WT_DICTIONARY --
@@ -324,7 +329,7 @@ static int  __rec_split_write(
 static int  __rec_update_las(
 		WT_SESSION_IMPL *, WT_RECONCILE *, uint32_t, WT_MULTI *);
 static int  __rec_write_check_complete(
-		WT_SESSION_IMPL *, WT_RECONCILE *, bool *);
+		WT_SESSION_IMPL *, WT_RECONCILE *, int, bool *);
 static void __rec_write_page_status(WT_SESSION_IMPL *, WT_RECONCILE *);
 static int  __rec_write_wrapup(WT_SESSION_IMPL *, WT_RECONCILE *, WT_PAGE *);
 static int  __rec_write_wrapup_err(
@@ -335,7 +340,8 @@ static int  __rec_dictionary_init(WT_SESSION_IMPL *, WT_RECONCILE *, u_int);
 static int  __rec_dictionary_lookup(
 		WT_SESSION_IMPL *, WT_RECONCILE *, WT_KV *, WT_DICTIONARY **);
 static void __rec_dictionary_reset(WT_RECONCILE *);
-static void __rec_verbose_lookaside_write(WT_SESSION_IMPL *);
+static void __rec_verbose_lookaside_write(
+		WT_SESSION_IMPL *, uint32_t, uint64_t);
 
 /*
  * __wt_reconcile --
@@ -361,9 +367,21 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
 	__wt_verbose(session, WT_VERB_RECONCILE,
 	    "%p reconcile %s (%s%s%s)",
 	    (void *)ref, __wt_page_type_string(page->type),
-	    LF_ISSET(WT_EVICTING) ? "evict" : "checkpoint",
-	    LF_ISSET(WT_EVICT_LOOKASIDE) ? ", lookaside" : "",
-	    LF_ISSET(WT_EVICT_UPDATE_RESTORE) ? ", update/restore" : "");
+	    LF_ISSET(WT_REC_EVICT) ? "evict" : "checkpoint",
+	    LF_ISSET(WT_REC_LOOKASIDE) ? ", lookaside" : "",
+	    LF_ISSET(WT_REC_UPDATE_RESTORE) ? ", update/restore" : "");
+
+	/*
+	 * Sanity check flags.
+	 *
+	 * We can only do update/restore eviction when the version that ends up
+	 * in the page image is the oldest one any reader could need.
+	 * Otherwise we would need to keep updates in memory that go back older
+	 * than the version in the disk image, and since modify operations
+	 * aren't idempotent, that is problematic.
+	 */
+	WT_ASSERT(session, !LF_ISSET(WT_REC_UPDATE_RESTORE) ||
+	    LF_ISSET(WT_REC_VISIBLE_ALL));
 
 	/* We shouldn't get called with a clean page, that's an error. */
 	WT_ASSERT(session, __wt_page_is_modified(page));
@@ -380,7 +398,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
 	WT_PAGE_LOCK(session, page);
 
 	oldest_id = __wt_txn_oldest_id(session);
-	if (LF_ISSET(WT_EVICTING))
+	if (LF_ISSET(WT_REC_EVICT))
 		mod->last_eviction_id = oldest_id;
 
 #ifdef HAVE_DIAGNOSTIC
@@ -426,9 +444,8 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
 	WT_ILLEGAL_VALUE_SET(session);
 	}
 
-	/* Checks for a successful reconciliation. */
-	if (ret == 0)
-		ret = __rec_write_check_complete(session, r, lookaside_retryp);
+	/* Check for a successful reconciliation. */
+	WT_TRET(__rec_write_check_complete(session, r, ret, lookaside_retryp));
 
 	/* Wrap up the page reconciliation. */
 	if (ret == 0 && (ret = __rec_write_wrapup(session, r, page)) == 0)
@@ -442,7 +459,7 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
 	/* Update statistics. */
 	WT_STAT_CONN_INCR(session, rec_pages);
 	WT_STAT_DATA_INCR(session, rec_pages);
-	if (LF_ISSET(WT_EVICTING)) {
+	if (LF_ISSET(WT_REC_EVICT)) {
 		WT_STAT_CONN_INCR(session, rec_pages_eviction);
 		WT_STAT_DATA_INCR(session, rec_pages_eviction);
 	}
@@ -478,14 +495,16 @@ __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref,
 			WT_TRET(session->block_manager_cleanup(session));
 
 		WT_TRET(__rec_destroy_session(session));
+	}
 
-		/*
-		 * We track removed overflow objects in case there's a reader
-		 * in transit when they're removed. Any form of eviction locks
-		 * out readers, we can discard them all.
-		 */
+	/*
+	 * We track removed overflow objects in case there's a reader in
+	 * transit when they're removed. Any form of eviction locks out
+	 * readers, we can discard them all.
+	 */
+	if (LF_ISSET(WT_REC_EVICT))
 		__wt_ovfl_discard_remove(session, page);
-	}
+
 	WT_RET(ret);
 
 	/*
@@ -531,7 +550,7 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	 * drain lookaside table reconciliations, and this isn't a problem for
 	 * most workloads.
 	 */
-	if (!F_ISSET(r, WT_EVICT_LOOKASIDE))
+	if (!F_ISSET(r, WT_REC_LOOKASIDE))
 		return (false);
 	if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
 		return (false);
@@ -549,7 +568,7 @@ __rec_las_checkpoint_test(WT_SESSION_IMPL *session, WT_RECONCILE *r)
  */
 static int
 __rec_write_check_complete(
-    WT_SESSION_IMPL *session, WT_RECONCILE *r, bool *lookaside_retryp)
+    WT_SESSION_IMPL *session, WT_RECONCILE *r, int tret, bool *lookaside_retryp)
 {
 	/*
 	 * Tests in this function are lookaside tests and tests to decide if
@@ -558,7 +577,7 @@ __rec_write_check_complete(
 	 * checks for in-memory eviction because a small cache can force us to
 	 * rewrite every possible page.
 	 */
-	if (F_ISSET(r, WT_EVICT_IN_MEMORY))
+	if (F_ISSET(r, WT_REC_IN_MEMORY))
 		return (0);
 
 	/*
@@ -569,17 +588,26 @@ __rec_write_check_complete(
 		return (EBUSY);
 
 	/*
-	 * Eviction can configure lookaside table reconciliation, consider if
-	 * it's worth giving up this reconciliation attempt and falling back to
-	 * using the lookaside table.  We continue with evict/restore if
-	 * switching to the lookaside doesn't make sense for any reason: we
-	 * won't retry an evict/restore reconciliation until/unless the
-	 * transactional system moves forward, so at worst it's a single wasted
-	 * effort.
+	 * Fall back to lookaside eviction during checkpoints if a page can't
+	 * be evicted.
+	 */
+	if (tret == EBUSY && lookaside_retryp != NULL &&
+	    !F_ISSET(r, WT_REC_UPDATE_RESTORE) && !r->update_uncommitted)
+		*lookaside_retryp = true;
+
+	/* Don't continue if we have already given up. */
+	WT_RET(tret);
+
+	/*
+	 * Check if this reconciliation attempt is making progress.  If there's
+	 * any sign of progress, don't fall back to the lookaside table.
 	 *
-	 * First, check if the lookaside table is a possible alternative.
+	 * Check if the current reconciliation split, in which case we'll
+	 * likely get to write at least one of the blocks.  If we've created a
+	 * page image for a page that previously didn't have one, or we had a
+	 * page image and it is now empty, that's also progress.
 	 */
-	if (lookaside_retryp == NULL)
+	if (r->multi_next > 1)
 		return (0);
 
 	/*
@@ -590,38 +618,20 @@ __rec_write_check_complete(
 	 * If no updates were saved, eviction will succeed without needing to
 	 * restore anything.
 	 */
-	if (!F_ISSET(r, WT_EVICT_UPDATE_RESTORE) || r->supd == NULL)
-		return (0);
-
-	/*
-	 * Check if this reconciliation attempt is making progress.  If there's
-	 * any sign of progress, don't fall back to the lookaside table.
-	 *
-	 * Check if the current reconciliation split, in which case we'll likely
-	 * get to write at least one of the blocks.
-	 */
-	if (r->multi_next > 1)
+	if (!F_ISSET(r, WT_REC_UPDATE_RESTORE) || lookaside_retryp == NULL ||
+	    (r->multi_next == 1 && r->multi->supd_entries == 0))
 		return (0);
 
 	/*
 	 * Check if the current reconciliation applied some updates, in which
 	 * case evict/restore should gain us some space.
-	 */
-	if (r->update_mem_saved != r->update_mem_all)
-		return (0);
-
-	/*
+	 *
 	 * Check if lookaside eviction is possible.  If any of the updates we
-	 * saw were uncommitted, the lookaside table cannot be used: it only
-	 * helps with older readers preventing eviction.
+	 * saw were uncommitted, the lookaside table cannot be used.
 	 */
-	if (r->update_mem_uncommitted != 0)
+	if (r->update_used || r->update_uncommitted)
 		return (0);
 
-	/*
-	 * The current evict/restore approach shows no signs of being useful,
-	 * lookaside is possible, suggest the lookaside table.
-	 */
 	*lookaside_retryp = true;
 	return (EBUSY);
 }
@@ -665,8 +675,8 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 		 * eviction path.
 		 */
 		WT_ASSERT(session,
-		    !F_ISSET(r, WT_EVICTING) ||
-		    F_ISSET(r, WT_EVICT_UPDATE_RESTORE));
+		    !F_ISSET(r, WT_REC_EVICT) ||
+		    F_ISSET(r, WT_REC_UPDATE_RESTORE));
 	} else {
 		/*
 		 * Track the page's maximum transaction ID (used to decide if
@@ -685,7 +695,7 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 		 * about the maximum transaction ID of current updates in the
 		 * tree, and checkpoint visits every dirty page in the tree.
 		 */
-		if (F_ISSET(r, WT_EVICTING)) {
+		if (!F_ISSET(r, WT_REC_EVICT)) {
 			if (WT_TXNID_LT(btree->rec_max_txn, r->max_txn))
 				btree->rec_max_txn = r->max_txn;
 #ifdef HAVE_TIMESTAMPS
@@ -707,7 +717,7 @@ __rec_write_page_status(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 		if (__wt_atomic_cas32(&mod->write_gen, r->orig_write_gen, 0))
 			__wt_cache_dirty_decr(session, page);
 		else
-			WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
 	}
 }
 
@@ -903,49 +913,50 @@ __rec_init(WT_SESSION_IMPL *session,
 #endif
 
 	/*
+	 * When operating on the lookaside table, we should never try
+	 * update/restore or lookaside eviction.
+	 */
+	WT_ASSERT(session, !F_ISSET(btree, WT_BTREE_LOOKASIDE) ||
+	    !LF_ISSET(WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE));
+
+	/*
 	 * Lookaside table eviction is configured when eviction gets aggressive,
 	 * adjust the flags for cases we don't support.
+	 *
+	 * We don't yet support fixed-length column-store combined with the
+	 * lookaside table. It's not hard to do, but the underlying function
+	 * that reviews which updates can be written to the evicted page and
+	 * which updates need to be written to the lookaside table needs access
+	 * to the original value from the page being evicted, and there's no
+	 * code path for that in the case of fixed-length column-store objects.
+	 * (Row-store and variable-width column-store objects provide a
+	 * reference to the unpacked on-page cell for this purpose, but there
+	 * isn't an on-page cell for fixed-length column-store objects.) For
+	 * now, turn it off.
 	 */
-	if (LF_ISSET(WT_EVICT_LOOKASIDE)) {
-		/*
-		 * Saving lookaside table updates into the lookaside table won't
-		 * work.
-		 */
-		if (F_ISSET(btree, WT_BTREE_LOOKASIDE))
-			LF_CLR(WT_EVICT_LOOKASIDE);
+	if (page->type == WT_PAGE_COL_FIX)
+		LF_CLR(WT_REC_LOOKASIDE);
 
-		/*
-		 * We don't yet support fixed-length column-store combined with
-		 * the lookaside table. It's not hard to do, but the underlying
-		 * function that reviews which updates can be written to the
-		 * evicted page and which updates need to be written to the
-		 * lookaside table needs access to the original value from the
-		 * page being evicted, and there's no code path for that in the
-		 * case of fixed-length column-store objects. (Row-store and
-		 * variable-width column-store objects provide a reference to
-		 * the unpacked on-page cell for this purpose, but there isn't
-		 * an on-page cell for fixed-length column-store objects.) For
-		 * now, turn it off.
-		 */
-		if (page->type == WT_PAGE_COL_FIX)
-			LF_CLR(WT_EVICT_LOOKASIDE);
+	/*
+	 * Check for a lookaside table and checkpoint collision, and if we find
+	 * one, turn off the lookaside file (we've gone to all the effort of
+	 * getting exclusive access to the page, might as well try and evict
+	 * it).
+	 */
+	if (LF_ISSET(WT_REC_LOOKASIDE) && __rec_las_checkpoint_test(session, r))
+		LF_CLR(WT_REC_LOOKASIDE);
 
-		/*
-		 * Check for a lookaside table and checkpoint collision, and if
-		 * we find one, turn off the lookaside file (we've gone to all
-		 * the effort of getting exclusive access to the page, might as
-		 * well try and evict it).
-		 */
-		if (__rec_las_checkpoint_test(session, r))
-			LF_CLR(WT_EVICT_LOOKASIDE);
-	}
 	r->flags = flags;
 
-	/* Track the page's maximum transaction ID. */
+	/* Track the page's min/maximum transaction */
 	r->max_txn = WT_TXN_NONE;
+#ifdef HAVE_TIMESTAMPS
+	__wt_timestamp_set_zero(&r->max_timestamp);
+	__wt_timestamp_set_inf(&r->min_saved_timestamp);
+#endif
 
-	/* Track if all updates were skipped. */
-	r->update_mem_all = r->update_mem_saved = r->update_mem_uncommitted = 0;
+	/* Track if updates were used and/or uncommitted. */
+	r->update_used = r->update_uncommitted = false;
 
 	/* Track if the page can be marked clean. */
 	r->leave_dirty = false;
@@ -973,6 +984,8 @@ __rec_init(WT_SESSION_IMPL *session,
 	r->wrapup_checkpoint = NULL;
 	r->wrapup_checkpoint_compressed = false;
 
+	r->evict_matching_checksum_failed = false;
+
 	/*
 	 * Dictionary compression only writes repeated values once.  We grow
 	 * the dictionary as necessary, always using the largest size we've
@@ -1032,7 +1045,7 @@ __rec_init(WT_SESSION_IMPL *session,
 /*
  * __rec_cleanup --
  *	Clean up after a reconciliation run, except for structures cached
- * across runs.
+ *	across runs.
  */
 static void
 __rec_cleanup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
@@ -1113,19 +1126,13 @@ __rec_destroy_session(WT_SESSION_IMPL *session)
  */
 static int
 __rec_update_save(WT_SESSION_IMPL *session,
-    WT_RECONCILE *r, WT_INSERT *ins, void *ripcip, WT_UPDATE *upd)
+    WT_RECONCILE *r, WT_INSERT *ins, void *ripcip, WT_UPDATE *onpage_upd)
 {
 	WT_RET(__wt_realloc_def(
 	    session, &r->supd_allocated, r->supd_next + 1, &r->supd));
 	r->supd[r->supd_next].ins = ins;
 	r->supd[r->supd_next].ripcip = ripcip;
-	r->supd[r->supd_next].onpage_txn =
-	    upd == NULL ? WT_TXN_NONE : upd->txnid;
-#ifdef HAVE_TIMESTAMPS
-	if (upd != NULL)
-		__wt_timestamp_set(
-		    &r->supd[r->supd_next].onpage_timestamp, &upd->timestamp);
-#endif
+	r->supd[r->supd_next].onpage_upd = onpage_upd;
 	++r->supd_next;
 	return (0);
 }
@@ -1136,7 +1143,7 @@ __rec_update_save(WT_SESSION_IMPL *session,
  */
 static int
 __rec_append_orig_value(WT_SESSION_IMPL *session,
-    WT_PAGE *page, WT_UPDATE *upd_list, WT_CELL_UNPACK *unpack)
+    WT_PAGE *page, WT_UPDATE *first_upd, WT_CELL_UNPACK *unpack)
 {
 	WT_DECL_ITEM(tmp);
 	WT_DECL_RET;
@@ -1147,7 +1154,7 @@ __rec_append_orig_value(WT_SESSION_IMPL *session,
 	 * If at least one self-contained update is globally visible, we're
 	 * done.
 	 */
-	for (upd = upd_list; upd != NULL; upd = upd->next)
+	for (upd = first_upd; upd != NULL; upd = upd->next)
 		if (WT_UPDATE_DATA_VALUE(upd) &&
 		    __wt_txn_upd_visible_all(session, upd))
 			return (0);
@@ -1180,7 +1187,7 @@ __rec_append_orig_value(WT_SESSION_IMPL *session,
 	 *
 	 * Append the new entry to the update list.
 	 */
-	for (upd = upd_list; upd->next != NULL; upd = upd->next)
+	for (upd = first_upd; upd->next != NULL; upd = upd->next)
 		;
 	WT_PUBLISH(upd->next, append);
 	__wt_cache_page_inmem_incr(session, page, size);
@@ -1192,138 +1199,114 @@ err:	__wt_scr_free(session, &tmp);
 /*
  * __rec_txn_read --
  *	Return the update in a list that should be written (or NULL if none can
- * be written).
+ *	be written).
  */
 static int
 __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
     WT_INSERT *ins, void *ripcip, WT_CELL_UNPACK *vpack, WT_UPDATE **updp)
 {
 	WT_BTREE *btree;
-	WT_DECL_TIMESTAMP(max_timestamp)
 	WT_PAGE *page;
-	WT_UPDATE *upd, *upd_list;
-	size_t update_mem;
+	WT_UPDATE *first_ts_upd, *first_txn_upd, *first_upd, *upd;
+	wt_timestamp_t *timestampp;
 	uint64_t max_txn, txnid;
-	bool skipped;
+	bool all_visible, uncommitted;
 
 	*updp = NULL;
 
 	btree = S2BT(session);
 	page = r->page;
+	first_ts_upd = first_txn_upd = NULL;
+	max_txn = WT_TXN_NONE;
+	uncommitted = false;
 
 	/*
 	 * If called with a WT_INSERT item, use its WT_UPDATE list (which must
 	 * exist), otherwise check for an on-page row-store WT_UPDATE list
 	 * (which may not exist). Return immediately if the item has no updates.
 	 */
-	if (ins == NULL) {
-		if ((upd_list = WT_ROW_UPDATE(page, ripcip)) == NULL)
-			return (0);
-	} else
-		upd_list = ins->upd;
+	if (ins != NULL)
+		first_upd = ins->upd;
+	else if ((first_upd = WT_ROW_UPDATE(page, ripcip)) == NULL)
+		return (0);
 
-	skipped = false;
-	update_mem = 0;
-	max_txn = WT_TXN_NONE;
-#ifdef HAVE_TIMESTAMPS
-	__wt_timestamp_set_zero(&max_timestamp);
-#endif
+	for (upd = first_upd; upd != NULL; upd = upd->next) {
+		if ((txnid = upd->txnid) == WT_TXN_ABORTED)
+			continue;
 
-	if (F_ISSET(r, WT_EVICTING)) {
-		/* Discard obsolete updates. */
-		if ((upd = __wt_update_obsolete_check(
-		    session, page, upd_list->next)) != NULL)
-			__wt_update_obsolete_free(session, page, upd);
+		/*
+		 * Track the first update in the chain that is not aborted and
+		 * the maximum transaction ID.
+		 */
+		if (first_txn_upd == NULL)
+			first_txn_upd = upd;
 
-		for (upd = upd_list; upd != NULL; upd = upd->next) {
-			/* Track the total memory in the update chain. */
-			update_mem += WT_UPDATE_MEMSIZE(upd);
+		/* Track the largest transaction ID seen. */
+		if (WT_TXNID_LT(max_txn, txnid))
+			max_txn = txnid;
 
-			if ((txnid = upd->txnid) == WT_TXN_ABORTED)
-				continue;
+		/*
+		 * Check whether the update was committed before reconciliation
+		 * started.  The global commit point can move forward during
+		 * reconciliation so we use a cached copy to avoid races when a
+		 * concurrent transaction commits or rolls back while we are
+		 * examining its updates.
+		 */
+		if (WT_TXNID_LE(r->last_running, txnid))
+			uncommitted = r->update_uncommitted = true;
 
-			/*
-			 * Track the largest/smallest transaction IDs on the
-			 * list.
-			 */
-			if (WT_TXNID_LT(max_txn, txnid))
-				max_txn = txnid;
+		/*
+		 * Find the first update we can use.
+		 *
+		 * Update/restore eviction can handle any update (including
+		 * uncommitted updates).  Lookaside eviction can save any
+		 * committed update.  Regular eviction checks that the maximum
+		 * transaction ID and timestamp seen are stable.
+		 *
+		 * Use the first committed entry we find in the lookaside
+		 * table.
+		 */
+		if (F_ISSET(btree, WT_BTREE_LOOKASIDE) && !uncommitted) {
+			*updp = upd;
+			break;
+		}
 
+		if (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+		    !__wt_txn_upd_visible_all(session, upd) :
+		    !__wt_txn_upd_visible(session, upd)) {
 			/*
-			 * Find the first update we can use.
-			 *
-			 * Check whether the update was committed before
-			 * reconciliation started.  The global commit point can
-			 * move forward during reconciliation so we use a
-			 * cached copy to avoid races when a concurrent
-			 * transaction commits or rolls back while we are
-			 * examining its updates.
-			 *
-			 * Lookaside eviction can cope with any committed
-			 * update.  Other eviction modes check that the maximum
-			 * transaction ID and timestamp seen are stable.
-			 *
-			 * When reconciling for eviction, track whether any
-			 * uncommitted updates are found.
+			 * Rare case: when applications run at low isolation
+			 * levels, update/restore eviction may see a stable
+			 * update followed by an uncommitted update.  Give up
+			 * in that case: we need to discard updates from the
+			 * stable update and older for correctness and we can't
+			 * discard an uncommitted update.
 			 */
-			if (WT_TXNID_LE(r->last_running, txnid)) {
-				skipped = true;
-				continue;
-			}
-
-			if (*updp == NULL)
-				*updp = upd;
+			if (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
+			    *updp != NULL && uncommitted)
+				return (EBUSY);
 
-#ifdef HAVE_TIMESTAMPS
-			/* Track min/max timestamps. */
-			if (__wt_timestamp_cmp(
-			    &upd->timestamp, &max_timestamp) > 0)
-				__wt_timestamp_set(
-				    &max_timestamp, &upd->timestamp);
-#endif
+			continue;
 		}
-	} else
-		for (upd = upd_list; upd != NULL; upd = upd->next) {
-			if ((txnid = upd->txnid) == WT_TXN_ABORTED)
-				continue;
 
-			/* Track the largest transaction ID on the list. */
-			if (WT_TXNID_LT(max_txn, txnid))
-				max_txn = txnid;
+		if (*updp == NULL)
+			*updp = upd;
 
-			/*
-			 * Find the first update we can use.
-			 *
-			 * Checkpoint can only write updates visible as of its
-			 * snapshot.
-			 *
-			 * When reconciling for a checkpoint, track whether any
-			 * updates were skipped on the way to finding the first
-			 * visible update.
-			 */
-			if (*updp == NULL) {
-				if (__wt_txn_upd_visible(session, upd))
-					*updp = upd;
-				else
-					skipped = true;
-			}
-		}
+#ifdef HAVE_TIMESTAMPS
+		/* Track the first update with non-zero timestamp. */
+		if (first_ts_upd == NULL &&
+		    !__wt_timestamp_iszero(&upd->timestamp))
+			first_ts_upd = upd;
+#endif
+	}
 
 	/* Reconciliation should never see an aborted or reserved update. */
 	WT_ASSERT(session, *updp == NULL ||
 	    ((*updp)->txnid != WT_TXN_ABORTED &&
 	    (*updp)->type != WT_UPDATE_RESERVED));
 
-	r->update_mem_all += update_mem;
-
-	/*
-	 * If all of the updates were aborted, quit. This test is not strictly
-	 * necessary because the above loop exits with skipped not set and the
-	 * maximum transaction left at its initial value of WT_TXN_NONE, so
-	 * the test below will be branch true and return, but it's cheap and a
-	 * little more explicit, and makes Coverity happy.
-	 */
-	if (max_txn == WT_TXN_NONE)
+	/* If all of the updates were aborted, quit. */
+	if (first_txn_upd == NULL)
 		return (0);
 
 	/*
@@ -1334,140 +1317,104 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 	 */
 	if (WT_TXNID_LT(r->max_txn, max_txn))
 		r->max_txn = max_txn;
+
 #ifdef HAVE_TIMESTAMPS
-	if (__wt_timestamp_cmp(&r->max_timestamp, &max_timestamp) < 0)
-		__wt_timestamp_set(&r->max_timestamp, &max_timestamp);
+	if (first_ts_upd != NULL &&
+	    __wt_timestamp_cmp(&r->max_timestamp, &first_ts_upd->timestamp) < 0)
+		__wt_timestamp_set(&r->max_timestamp, &first_ts_upd->timestamp);
 #endif
 
 	/*
-	 * If there are no skipped updates and all updates are globally visible,
-	 * the page can be marked clean and we're done, regardless if evicting
-	 * or checkpointing.
-	 *
-	 * We have to check both: the oldest transaction ID may have moved while
-	 * we were scanning the update list, so it is possible to find a skipped
-	 * update, but then find all updates are stable at the end of the scan.
-	 *
-	 * Skip the visibility check for the lookaside table as a special-case,
-	 * we know there are no older readers of that table.
+	 * The checkpoint transaction is special.  Make sure we never write
+	 * (metadata) updates from a checkpoint in a concurrent session.
 	 */
-	if (!skipped && (F_ISSET(btree, WT_BTREE_LOOKASIDE) ||
-	    __wt_txn_visible_all(session,
-	    max_txn, WT_TIMESTAMP_NULL(&max_timestamp)))) {
-		/*
-		 * The checkpoint transaction is special.  Make sure we never
-		 * write (metadata) updates from a checkpoint in a concurrent
-		 * session.
-		 */
-		WT_ASSERT(session, *updp == NULL ||
-		    (*updp)->txnid !=
-		    S2C(session)->txn_global.checkpoint_state.id ||
-		    WT_SESSION_IS_CHECKPOINT(session));
+	WT_ASSERT(session, *updp == NULL || (*updp)->txnid == WT_TXN_NONE ||
+	    (*updp)->txnid != S2C(session)->txn_global.checkpoint_state.id ||
+	    WT_SESSION_IS_CHECKPOINT(session));
 
-		goto check_original_value;
-	}
+	/*
+	 * If there are no skipped updates, record that we're making progress.
+	 */
+	if (*updp == first_txn_upd)
+		r->update_used = true;
 
 	/*
-	 * In some cases, there had better not be skipped updates or updates not
-	 * yet globally visible.
+	 * Check if all updates on the page are visible.  If not, it must stay
+	 * dirty unless we are saving updates to the lookaside table.
+	 *
+	 * Updates can be out of transaction ID order (but not out of timestamp
+	 * order), so we track the maximum transaction ID and the newest update
+	 * with a timestamp (if any).
 	 */
-	if (F_ISSET(r, WT_VISIBILITY_ERR))
+#ifdef HAVE_TIMESTAMPS
+	timestampp = first_ts_upd == NULL ? NULL : &first_ts_upd->timestamp;
+#else
+	WT_UNUSED(first_ts_upd);
+	timestampp = NULL;
+#endif
+	if (F_ISSET(btree, WT_BTREE_LOOKASIDE))
+		all_visible = !uncommitted;
+	else
+		all_visible = *updp == first_txn_upd &&
+		    (F_ISSET(r, WT_REC_VISIBLE_ALL) ?
+		    __wt_txn_visible_all(session, max_txn, timestampp) :
+		    __wt_txn_visible(session, max_txn, timestampp));
+
+	if (all_visible)
+		goto check_original_value;
+
+	if (F_ISSET(r, WT_REC_VISIBILITY_ERR))
 		WT_PANIC_RET(session, EINVAL,
-		    "reconciliation error, uncommitted update or update not "
-		    "globally visible");
+		    "reconciliation error, update not visible");
+	if (!F_ISSET(r, WT_REC_LOOKASIDE))
+		r->leave_dirty = true;
 
 	/*
 	 * If not trying to evict the page, we know what we'll write and we're
-	 * done. Because some updates were skipped or are not globally visible,
-	 * the page can't be marked clean.
+	 * done.
 	 */
-	if (!F_ISSET(r, WT_EVICTING)) {
-		r->leave_dirty = true;
+	if (!F_ISSET(r, WT_REC_EVICT))
 		goto check_original_value;
-	}
 
 	/*
-	 * Evicting with either uncommitted changes or not-yet-globally-visible
-	 * changes. There are two ways to continue, the save/restore eviction
-	 * path or the lookaside table eviction path. Both cannot be configured
-	 * because the paths track different information. The save/restore path
-	 * can handle both uncommitted and not-yet-globally-visible changes, by
-	 * evicting most of the page and then creating a new, smaller page into
-	 * which we re-instantiate those changes. The lookaside table path can
-	 * only handle not-yet-globally-visible changes by writing those changes
-	 * into the lookaside table and restoring them on demand if and when the
-	 * page is read back into memory.
+	 * We are attempting eviction with changes that are not yet stable
+	 * (i.e. globally visible).  There are two ways to continue, the
+	 * save/restore eviction path or the lookaside table eviction path.
+	 * Both cannot be configured because the paths track different
+	 * information. The update/restore path can handle uncommitted changes,
+	 * by evicting most of the page and then creating a new, smaller page
+	 * to which we re-attach those changes. Lookaside eviction writes
+	 * changes into the lookaside table and restores them on demand if and
+	 * when the page is read back into memory.
 	 *
 	 * Both paths are configured outside of reconciliation: the save/restore
-	 * path is the WT_EVICT_UPDATE_RESTORE flag, the lookaside table path is
-	 * the WT_EVICT_LOOKASIDE flag.
+	 * path is the WT_REC_UPDATE_RESTORE flag, the lookaside table path is
+	 * the WT_REC_LOOKASIDE flag.
 	 */
-	if (!F_ISSET(r, WT_EVICT_LOOKASIDE | WT_EVICT_UPDATE_RESTORE))
+	if (!F_ISSET(r, WT_REC_LOOKASIDE | WT_REC_UPDATE_RESTORE) &&
+	    !F_ISSET(btree, WT_BTREE_LOOKASIDE))
 		return (EBUSY);
-	if (skipped && !F_ISSET(r, WT_EVICT_UPDATE_RESTORE))
+	if (uncommitted && !F_ISSET(r, WT_REC_UPDATE_RESTORE))
 		return (EBUSY);
 
 	/*
-	 * Track the memory required by the update chain.
-	 *
-	 * A page with no uncommitted (skipped) updates, that can't be evicted
-	 * because some updates aren't yet globally visible, can be evicted by
-	 * writing previous versions of the updates to the lookaside file. That
-	 * test is just checking if the skipped updates memory is zero.
-	 *
-	 * If that's not possible (there are skipped updates), we can rewrite
-	 * the pages in-memory, but we don't want to unless there's memory to
-	 * recover. That test is comparing the memory we'd recover to the memory
-	 * we'd have to re-instantiate as part of the rewrite.
+	 * The order of the updates on the list matters, we can't move only the
+	 * unresolved updates, move the entire update list.
 	 */
-	r->update_mem_saved += update_mem;
-	if (skipped)
-		r->update_mem_uncommitted += update_mem;
+	WT_RET(__rec_update_save(session, r, ins, ripcip, *updp));
 
 #ifdef HAVE_TIMESTAMPS
-	/*
-	 * Don't allow lookaside eviction with updates newer than the stable
-	 * timestamp.  Also don't recommend lookaside eviction in that case.
-	 */
-	if (__wt_timestamp_cmp(&max_timestamp, &r->stable_timestamp) > 0) {
-		if (!F_ISSET(r, WT_EVICT_UPDATE_RESTORE))
-			return (EBUSY);
-
-		if (!skipped)
-			r->update_mem_uncommitted += update_mem;
+	/* Track the oldest saved timestamp for lookaside. */
+	if (F_ISSET(r, WT_REC_LOOKASIDE)) {
+		for (upd = first_upd; upd->next != NULL; upd = upd->next)
+			;
+		if (__wt_timestamp_cmp(
+		    &r->min_saved_timestamp, &upd->timestamp) > 0)
+			__wt_timestamp_set(
+			    &r->min_saved_timestamp, &upd->timestamp);
 	}
 #endif
 
-	if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE)) {
-		/*
-		 * The save/restore eviction path.
-		 *
-		 * Clear the returned update, it's not needed. If there's an
-		 * on-page key/value pair to which the update list applies, our
-		 * caller writes it to the disk image. If an insert/append list,
-		 * our caller can ignore the key/value pair (everything needed
-		 * is in the update list), or in the case of row-store, write
-		 * the key to the disk image to split up the insert/append list.
-		 */
-		*updp = NULL;
-
-		/* The page can't be marked clean. */
-		r->leave_dirty = true;
-	}
-
-	/*
-	 * The order of the updates on the list matters, we can't move only the
-	 * unresolved updates, move the entire update list.
-	 *
-	 * If we skipped updates, the transaction value is never used.  If we
-	 * didn't skip updates, the list of updates are eventually written to
-	 * the lookaside table, and associated with each update record is the
-	 * transaction ID of the update we wrote in the reconciled page; once
-	 * that transaction ID is globally visible, we know we no longer need
-	 * the lookaside table records, allowing them to be discarded.
-	 */
-	WT_RET(__rec_update_save(session, r, ins, ripcip, *updp));
-
 check_original_value:
 	/*
 	 * Returning an update means the original on-page value might be lost,
@@ -1477,10 +1424,11 @@ check_original_value:
 	 * record that will be physically removed once it's no longer needed.
 	 */
 	if (*updp != NULL &&
-	    (F_ISSET(r, WT_EVICT_LOOKASIDE) ||
-	    (vpack != NULL &&
+	    (F_ISSET(r, WT_REC_LOOKASIDE) ||
+	    (*updp != NULL && vpack != NULL &&
 	    vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)))
-		WT_RET(__rec_append_orig_value(session, page, *updp, vpack));
+		WT_RET(
+		    __rec_append_orig_value(session, page, first_upd, vpack));
 
 	return (0);
 }
@@ -1488,7 +1436,7 @@ check_original_value:
 /*
  * WT_CHILD_RELEASE, WT_CHILD_RELEASE_ERR --
  *	Macros to clean up during internal-page reconciliation, releasing the
- * hazard pointer we're holding on child pages.
+ *	hazard pointer we're holding on child pages.
  */
 #define	WT_CHILD_RELEASE(session, hazard, ref) do {			\
 	if (hazard) {							\
@@ -1534,7 +1482,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
 	 *
 	 * In some cases, there had better not be any updates we can't see.
 	 */
-	if (F_ISSET(r, WT_VISIBILITY_ERR) && page_del != NULL &&
+	if (F_ISSET(r, WT_REC_VISIBILITY_ERR) && page_del != NULL &&
 	    !__wt_txn_visible(session,
 	    page_del->txnid, WT_TIMESTAMP_NULL(&page_del->timestamp)))
 		WT_PANIC_RET(session, EINVAL,
@@ -1600,7 +1548,7 @@ __rec_child_deleted(WT_SESSION_IMPL *session,
 	 * if subsequently read (we wouldn't know which transactions should see
 	 * the original page and which should see the deleted page).
 	 */
-	if (F_ISSET(r, WT_EVICTING))
+	if (F_ISSET(r, WT_REC_EVICT))
 		return (EBUSY);
 
 	/*
@@ -1683,10 +1631,9 @@ __rec_child_modify(WT_SESSION_IMPL *session,
 			 * pages in an evicted page's subtree fails the eviction
 			 * attempt.
 			 */
-			if (F_ISSET(r, WT_EVICTING)) {
-				WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
 				return (EBUSY);
-			}
 
 			/*
 			 * If called during checkpoint, the child is being
@@ -1700,6 +1647,20 @@ __rec_child_modify(WT_SESSION_IMPL *session,
 			 */
 			break;
 
+		case WT_REF_LOOKASIDE:
+			/*
+			 * On disk, with lookaside updates.
+			 *
+			 * We should never be here during eviction, active
+			 * child pages in an evicted page's subtree fails the
+			 * eviction attempt.
+			 */
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
+				return (EBUSY);
+
+			goto done;
+
 		case WT_REF_MEM:
 			/*
 			 * In memory.
@@ -1708,10 +1669,9 @@ __rec_child_modify(WT_SESSION_IMPL *session,
 			 * pages in an evicted page's subtree fails the eviction
 			 * attempt.
 			 */
-			if (F_ISSET(r, WT_EVICTING)) {
-				WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
 				return (EBUSY);
-			}
 
 			/*
 			 * If called during checkpoint, acquire a hazard pointer
@@ -1739,10 +1699,9 @@ __rec_child_modify(WT_SESSION_IMPL *session,
 			 * pages in an evicted page's subtree fails the eviction
 			 * attempt.
 			 */
-			if (F_ISSET(r, WT_EVICTING)) {
-				WT_ASSERT(session, !F_ISSET(r, WT_EVICTING));
+			WT_ASSERT(session, !F_ISSET(r, WT_REC_EVICT));
+			if (F_ISSET(r, WT_REC_EVICT))
 				return (EBUSY);
-			}
 			goto done;
 
 		case WT_REF_SPLIT:
@@ -2073,7 +2032,8 @@ __rec_split_page_size_from_pct(
 /*
  * __wt_split_page_size --
  *	Split page size calculation: we don't want to repeatedly split every
- * time a new entry is added, so we split to a smaller-than-maximum page size.
+ *	time a new entry is added, so we split to a smaller-than-maximum page
+ *	size.
  */
 uint32_t
 __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize)
@@ -2396,7 +2356,7 @@ __rec_split_row_promote(
 	 * the last key and smaller than the current key.
 	 */
 	max = r->last;
-	if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE))
+	if (F_ISSET(r, WT_REC_UPDATE_RESTORE))
 		for (i = r->supd_next; i > 0; --i) {
 			supd = &r->supd[i - 1];
 			if (supd->ins == NULL)
@@ -2484,7 +2444,7 @@ __rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len)
 /*
  * __rec_split --
  *	Handle the page reconciliation bookkeeping.  (Did you know "bookkeeper"
- * has 3 doubled letters in a row?  Sweet-tooth does, too.)
+ *	has 3 doubled letters in a row?  Sweet-tooth does, too.)
  */
 static int
 __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
@@ -3157,27 +3117,13 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	/*
 	 * We may arrive here with no entries to write if the page was entirely
 	 * empty or if nothing on the page was visible to us.
+	 *
+	 * Pages with skipped or not-yet-globally visible updates aren't really
+	 * empty; otherwise, the page is truly empty and we will merge it into
+	 * its parent during the parent's reconciliation.
 	 */
-	if (r->entries == 0) {
-		/*
-		 * Pages with skipped or not-yet-globally visible updates aren't
-		 * really empty; otherwise, the page is truly empty and we will
-		 * merge it into its parent during the parent's reconciliation.
-		 */
-		if (r->supd_next == 0)
-			return (0);
-
-		/*
-		 * If using the save/restore eviction path, continue with the
-		 * write, the page will be restored after we finish.
-		 *
-		 * If using the lookaside table eviction path, we can't continue
-		 * (we need a page to be written, otherwise we won't ever find
-		 * the updates for future reads).
-		 */
-		if (F_ISSET(r, WT_EVICT_LOOKASIDE))
-			return (EBUSY);
-	}
+	if (r->entries == 0 && r->supd_next == 0)
+		return (0);
 
 	/* Set the number of entries and size for the just finished chunk. */
 	r->cur_ptr->image.size =
@@ -3195,7 +3141,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 /*
  * __rec_supd_move --
  *	Move a saved WT_UPDATE list from the per-page cache to a specific
- * block's list.
+ *	block's list.
  */
 static int
 __rec_supd_move(
@@ -3214,7 +3160,7 @@ __rec_supd_move(
 /*
  * __rec_split_write_supd --
  *	Check if we've saved updates that belong to this block, and move any
- * to the per-block structure.
+ *	to the per-block structure.
  */
 static int
 __rec_split_write_supd(WT_SESSION_IMPL *session,
@@ -3329,7 +3275,7 @@ __rec_split_write_header(WT_SESSION_IMPL *session,
 	 * and we found updates that weren't globally visible when reconciling
 	 * this page.
 	 */
-	if (F_ISSET(r, WT_EVICT_LOOKASIDE) && multi->supd != NULL) {
+	if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL) {
 		F_SET(dsk, WT_PAGE_LAS_UPDATE);
 		r->cache_write_lookaside = true;
 	}
@@ -3345,6 +3291,91 @@ __rec_split_write_header(WT_SESSION_IMPL *session,
 }
 
 /*
+ * __rec_split_write_reuse --
+ *	Check if a previously written block can be reused.
+ */
+static bool
+__rec_split_write_reuse(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, WT_MULTI *multi, WT_ITEM *image, bool last_block)
+{
+	WT_MULTI *multi_match;
+	WT_PAGE_MODIFY *mod;
+
+	mod = r->page->modify;
+
+	/*
+	 * Don't bother calculating checksums for bulk loads, there's no reason
+	 * to believe they'll be useful. Check because LSM does bulk-loads as
+	 * part of normal operations and the check is cheap.
+	 */
+	if (r->is_bulk_load)
+		return (false);
+
+	/*
+	 * Calculating the checksum is the expensive part, try to avoid it.
+	 *
+	 * Ignore the last block of any reconciliation. Pages are written in the
+	 * same block order every time, so the last block written for a page is
+	 * unlikely to match any previously written block or block written in
+	 * the future, (absent a point-update earlier in the page which didn't
+	 * change the size of the on-page object in any way).
+	 */
+	if (last_block)
+		return (false);
+
+	/*
+	 * Quit if evicting with no previously written block to compare against.
+	 * (In other words, if there's eviction pressure and the page was never
+	 * written by a checkpoint, calculating a checksum is worthless.)
+	 *
+	 * Quit if evicting and a previous check failed, once there's a miss no
+	 * future block will match.
+	 */
+	if (F_ISSET(r, WT_REC_EVICT)) {
+		if (mod->rec_result != WT_PM_REC_MULTIBLOCK ||
+		    mod->mod_multi_entries < r->multi_next)
+			return (false);
+		if (r->evict_matching_checksum_failed)
+			return (false);
+	}
+
+	/* Calculate the checksum for this block. */
+	multi->checksum = __wt_checksum(image->data, image->size);
+
+	/*
+	 * Don't check for a block match when writing blocks during compaction,
+	 * the whole idea is to move those blocks. Check after calculating the
+	 * checksum, we don't distinguish between pages written solely as part
+	 * of the compaction and pages written at around the same time, and so
+	 * there's a possibility the calculated checksum will be useful in the
+	 * future.
+	 */
+	if (session->compact_state != WT_COMPACT_NONE)
+		return (false);
+
+	/*
+	 * Pages are written in the same block order every time, only check the
+	 * appropriate slot.
+	 */
+	if (mod->rec_result != WT_PM_REC_MULTIBLOCK ||
+	    mod->mod_multi_entries < r->multi_next)
+		return (false);
+
+	multi_match = &mod->mod_multi[r->multi_next - 1];
+	if (multi_match->size != multi->size ||
+	    multi_match->checksum != multi->checksum) {
+		r->evict_matching_checksum_failed = true;
+		return (false);
+	}
+
+	multi_match->addr.reuse = 1;
+	multi->addr = multi_match->addr;
+
+	WT_STAT_DATA_INCR(session, rec_page_match);
+	return (true);
+}
+
+/*
  * __rec_split_write --
  *	Write a disk block out for the split helper functions.
  */
@@ -3353,9 +3384,8 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
     WT_CHUNK *chunk, WT_ITEM *compressed_image, bool last_block)
 {
 	WT_BTREE *btree;
-	WT_MULTI *multi, *multi_mod;
+	WT_MULTI *multi;
 	WT_PAGE *page;
-	WT_PAGE_MODIFY *mod;
 	size_t addr_size;
 	uint8_t addr[WT_BTREE_MAX_ADDR_COOKIE];
 #ifdef HAVE_DIAGNOSTIC
@@ -3364,7 +3394,6 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 
 	btree = S2BT(session);
 	page = r->page;
-	mod = page->modify;
 #ifdef HAVE_DIAGNOSTIC
 	verify_image = true;
 #endif
@@ -3422,7 +3451,7 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 	 */
 	if (last_block &&
 	    r->multi_next == 1 && __rec_is_checkpoint(session, r)) {
-		WT_ASSERT(session, r->supd == NULL);
+		WT_ASSERT(session, r->supd_next == 0);
 
 		if (compressed_image == NULL)
 			r->wrapup_checkpoint = &chunk->image;
@@ -3434,71 +3463,64 @@ __rec_split_write(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 	}
 
 	/*
-	 * If configured for an in-memory database, or using the save/restore
-	 * eviction path and we had to skip updates in order to build this disk
-	 * image, we can't actually write it. Instead, we will re-instantiate
-	 * the page using the disk image and any list of updates we skipped.
+	 * If configured for an in-memory database, we can't actually write it.
+	 * Instead, we will re-instantiate the page using the disk image and
+	 * any list of updates we skipped.
 	 */
-	if (F_ISSET(r, WT_EVICT_IN_MEMORY))
-		goto copy_image;
-	if (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && multi->supd != NULL) {
-		r->cache_write_restore = true;
+	if (F_ISSET(r, WT_REC_IN_MEMORY))
 		goto copy_image;
-	}
 
 	/*
-	 * If we wrote this block before, re-use it.  Pages get written in the
-	 * same block order every time, only check the appropriate slot.  The
-	 * expensive part of this test is the checksum, only do that work when
-	 * there has been or will be a reconciliation of this page involving
-	 * split pages.  This test isn't perfect: we're doing a checksum if a
-	 * previous reconciliation of the page split or if we will split this
-	 * time, but that test won't calculate a checksum on the first block
-	 * the first time the page splits.
+	 * If there are saved updates, we are either doing update/restore
+	 * eviction or lookaside eviction.  Update/restore never writes the
+	 * disk image.
+	 *
+	 * Lookaside does write disk images, but also needs to cope with the
+	 * case where no updates could be written, which means there are no
+	 * entries in the page image to write.
 	 */
-	if (r->multi_next > 1 ||
-	    (mod->rec_result == WT_PM_REC_MULTIBLOCK &&
-	    mod->mod_multi != NULL)) {
-		multi->checksum =
-		    __wt_checksum(chunk->image.data, chunk->image.size);
-
+	if (multi->supd != NULL &&
+	    (F_ISSET(r, WT_REC_UPDATE_RESTORE) || chunk->entries == 0)) {
 		/*
-		 * One last check: don't reuse blocks if compacting, the reason
-		 * for compaction is to move blocks to different locations. We
-		 * do this check after calculating the checksums, hopefully the
-		 * next write can be skipped.
+		 * If no entries were used, the page is empty and we can only
+		 * restore updates against an empty row store leaf page.
+		 * (Column store modify will attempt to allocate a zero-length
+		 * array).
 		 */
-		if (session->compact_state == WT_COMPACT_NONE &&
-		    mod->rec_result == WT_PM_REC_MULTIBLOCK &&
-		    mod->mod_multi_entries > r->multi_next) {
-			multi_mod = &mod->mod_multi[r->multi_next - 1];
-			if (multi_mod->size == multi->size &&
-			    multi_mod->checksum == multi->checksum) {
-				multi_mod->addr.reuse = 1;
-				multi->addr = multi_mod->addr;
-
-				WT_STAT_DATA_INCR(session, rec_page_match);
-				goto copy_image;
-			}
-		}
+		if (r->page->type != WT_PAGE_ROW_LEAF &&
+		    chunk->entries == 0 && multi->supd != NULL)
+			return (EBUSY);
+
+		r->cache_write_restore = true;
+		goto update_las;
 	}
 
+	/*
+	 * If we wrote this block before, re-use it. Prefer a checksum of the
+	 * compressed image. It's an identical test and should be faster.
+	 */
+	if (__rec_split_write_reuse(session, r, multi,
+	    compressed_image == NULL ? &chunk->image : compressed_image,
+	    last_block))
+		goto copy_image;
+
 	WT_RET(__wt_bt_write(session,
 	    compressed_image == NULL ? &chunk->image : compressed_image,
-	    addr, &addr_size,
-	    false, F_ISSET(r, WT_CHECKPOINTING), compressed_image != NULL));
+	    addr, &addr_size, false, F_ISSET(r, WT_REC_CHECKPOINT),
+	    compressed_image != NULL));
 #ifdef HAVE_DIAGNOSTIC
 	verify_image = false;
 #endif
 	WT_RET(__wt_memdup(session, addr, addr_size, &multi->addr.addr));
 	multi->addr.size = (uint8_t)addr_size;
 
+update_las:
 	/*
 	 * If using the lookaside table eviction path and we found updates that
 	 * weren't globally visible when reconciling this page, copy them into
 	 * the database's lookaside store.
 	 */
-	if (F_ISSET(r, WT_EVICT_LOOKASIDE) && multi->supd != NULL)
+	if (F_ISSET(r, WT_REC_LOOKASIDE) && multi->supd != NULL)
 		WT_RET(__rec_update_las(session, r, btree->id, multi));
 
 copy_image:
@@ -3511,13 +3533,14 @@ copy_image:
 	    __wt_verify_dsk_image(session,
 	    "[reconcile-image]", chunk->image.data, 0, true) == 0);
 #endif
+
 	/*
 	 * If re-instantiating this page in memory (either because eviction
 	 * wants to, or because we skipped updates to build the disk image),
 	 * save a copy of the disk image.
 	 */
-	if (F_ISSET(r, WT_EVICT_SCRUB) ||
-	    (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) && multi->supd != NULL))
+	if (F_ISSET(r, WT_REC_SCRUB) ||
+	    (F_ISSET(r, WT_REC_UPDATE_RESTORE) && multi->supd != NULL))
 		WT_RET(__wt_memdup(session,
 		    chunk->image.data, chunk->image.size, &multi->disk_image));
 
@@ -3535,26 +3558,19 @@ __rec_update_las(WT_SESSION_IMPL *session,
 	WT_CURSOR *cursor;
 	WT_DECL_ITEM(key);
 	WT_DECL_RET;
-	WT_ITEM las_addr, las_timestamp, las_value;
+	WT_ITEM las_timestamp, las_value;
 	WT_PAGE *page;
 	WT_SAVE_UPD *list;
 	WT_UPDATE *upd;
-	uint64_t insert_cnt, las_counter;
+	uint64_t insert_cnt, las_counter, las_pageid;
 	uint32_t i, session_flags, slot;
 	uint8_t *p;
 
 	cursor = NULL;
-	WT_CLEAR(las_addr);
 	WT_CLEAR(las_timestamp);
 	WT_CLEAR(las_value);
 	page = r->page;
-	insert_cnt = 0;
-
-	/*
-	 * We're writing lookaside records: start instantiating them on pages
-	 * we read (with the right flag set), and start sweeping the file.
-	 */
-	__wt_las_set_written(session);
+	insert_cnt = las_pageid = 0;
 
 	__wt_las_cursor(session, &cursor, &session_flags);
 
@@ -3562,29 +3578,20 @@ __rec_update_las(WT_SESSION_IMPL *session,
 	WT_ERR(__wt_scr_alloc(session, WT_INTPACK64_MAXSIZE, &key));
 
 	/*
-	 * Each key in the lookaside table is associated with a block, and those
-	 * blocks are freed and reallocated to other pages as pages in the tree
-	 * are modified and reconciled. We want to be sure we don't add records
-	 * to the lookaside table, then discard the block to which they apply,
-	 * then write a new block to the same address, and then apply the old
-	 * records to the new block when it's read. We don't want to clean old
-	 * records out of the lookaside table every time we free a block because
-	 * that happens a lot and would be costly; instead, we clean out the old
-	 * records when adding new records into the lookaside table. This works
-	 * because we only read from the lookaside table for pages marked with
-	 * the WT_PAGE_LAS_UPDATE flag: that flag won't be set if we rewrite a
-	 * block with no lookaside records, so the lookaside table won't be
-	 * checked when the block is read, even if there are lookaside table
-	 * records matching that block. If we rewrite a block that has lookaside
-	 * records, we'll run this code, discarding any old records that might
-	 * exist.
-	 */
-	WT_ERR(__wt_las_remove_block(
-	    session, cursor, btree_id, multi->addr.addr, multi->addr.size));
-
-	/* Lookaside table key component: block address. */
-	las_addr.data = multi->addr.addr;
-	las_addr.size = multi->addr.size;
+	 * Each key in the lookaside table is associated with a unique
+	 * identifier, allocated sequentially per tree.
+	 */
+	las_pageid = multi->las_pageid =
+	    __wt_atomic_add64(&S2BT(session)->las_pageid, 1);
+
+	/* The zero page ID is reserved, check we don't see it. */
+	WT_ASSERT(session, las_pageid != 0);
+
+	/*
+	 * Make sure there are no left over entries (e.g., from a handle
+	 * reopen).
+	 */
+	WT_ERR(__wt_las_remove_block(session, cursor, btree_id, las_pageid));
 
 	/* Enter each update in the boundary's list into the lookaside store. */
 	for (las_counter = 0, i = 0,
@@ -3654,13 +3661,8 @@ __rec_update_las(WT_SESSION_IMPL *session,
 				continue;
 			}
 
-#ifdef HAVE_TIMESTAMPS
-			las_timestamp.data = &list->onpage_timestamp;
-			las_timestamp.size = WT_TIMESTAMP_SIZE;
-#endif
 			cursor->set_key(cursor,
-			    btree_id, &las_addr, ++las_counter,
-			    list->onpage_txn, &las_timestamp, key);
+			    btree_id, las_pageid, ++las_counter, key);
 
 #ifdef HAVE_TIMESTAMPS
 			las_timestamp.data = &upd->timestamp;
@@ -3680,9 +3682,9 @@ __rec_update_las(WT_SESSION_IMPL *session,
 err:	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
 
 	if (insert_cnt > 0) {
-		(void)__wt_atomic_add64(
-		    &S2C(session)->las_record_cnt, insert_cnt);
-		__rec_verbose_lookaside_write(session);
+		WT_STAT_CONN_INCRV(
+		    session, cache_lookaside_entries, insert_cnt);
+		__rec_verbose_lookaside_write(session, btree_id, las_pageid);
 	}
 
 	__wt_scr_free(session, &key);
@@ -4368,7 +4370,7 @@ __rec_col_fix_slvg(WT_SESSION_IMPL *session,
 /*
  * __rec_col_var_helper --
  *	Create a column-store variable length record cell and write it onto a
- * page.
+ *	page.
  */
 static int
 __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
@@ -4634,7 +4636,7 @@ record_loop:	/*
 				 * Assert the case.
 				 */
 				WT_ASSERT(session,
-				    F_ISSET(r, WT_EVICT_UPDATE_RESTORE));
+				    F_ISSET(r, WT_REC_UPDATE_RESTORE));
 
 				/*
 				 * The on-page value will never be accessed,
@@ -4776,7 +4778,7 @@ compare:		/*
 		if (ovfl_state == OVFL_UNUSED &&
 		    vpack->raw != WT_CELL_VALUE_OVFL_RM)
 			WT_ERR(__wt_ovfl_remove(
-			    session, page, vpack, !F_ISSET(r, WT_EVICTING)));
+			    session, page, vpack, F_ISSET(r, WT_REC_EVICT)));
 	}
 
 	/* Walk any append list. */
@@ -5356,7 +5358,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
 				 * Assert the case.
 				 */
 				WT_ASSERT(session,
-				    F_ISSET(r, WT_EVICT_UPDATE_RESTORE));
+				    F_ISSET(r, WT_REC_UPDATE_RESTORE));
 
 				/*
 				 * If the key is also a removed overflow item,
@@ -5404,7 +5406,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
 			if (vpack != NULL &&
 			    vpack->ovfl && vpack->raw != WT_CELL_VALUE_OVFL_RM)
 				WT_ERR(__wt_ovfl_remove(session,
-				    page, vpack, !F_ISSET(r, WT_EVICTING)));
+				    page, vpack, F_ISSET(r, WT_REC_EVICT)));
 
 			switch (upd->type) {
 			case WT_UPDATE_DELETED:
@@ -5632,12 +5634,13 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
 
 	for (; ins != NULL; ins = WT_SKIP_NEXT(ins)) {
 		WT_RET(__rec_txn_read(session, r, ins, NULL, NULL, &upd));
+
 		if (upd == NULL) {
 			/*
 			 * Look for an update. If nothing is visible and not in
 			 * evict/restore, there's no work to do.
 			 */
-			if (!F_ISSET(r, WT_EVICT_UPDATE_RESTORE))
+			if (!F_ISSET(r, WT_REC_UPDATE_RESTORE))
 				continue;
 
 			/*
@@ -5679,8 +5682,8 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
 				if (upd->size == 0)
 					val->len = 0;
 				else
-					WT_RET(__rec_cell_build_val(
-					    session, r, upd->data, upd->size,
+					WT_RET(__rec_cell_build_val(session,
+					    r, upd->data, upd->size,
 					    (uint64_t)0));
 				break;
 			WT_ILLEGAL_VALUE(session);
@@ -5945,9 +5948,9 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		 * in memory because the latter can't handle update lists and
 		 * splits can.
 		 */
-		if (F_ISSET(r, WT_EVICT_IN_MEMORY) ||
-		    (F_ISSET(r, WT_EVICT_UPDATE_RESTORE) &&
-		    r->multi->supd != NULL))
+		if (F_ISSET(r, WT_REC_IN_MEMORY) ||
+		    (F_ISSET(r, WT_REC_UPDATE_RESTORE) &&
+		    r->multi->supd_entries != 0))
 			goto split;
 
 		/*
@@ -5959,9 +5962,15 @@ __rec_write_wrapup(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 			r->multi->addr.addr = NULL;
 			mod->mod_disk_image = r->multi->disk_image;
 			r->multi->disk_image = NULL;
+			mod->mod_replace_las_pageid = r->multi->las_pageid;
+#ifdef HAVE_TIMESTAMPS
+			__wt_timestamp_set(&mod->mod_replace_las_min_timestamp,
+			     &r->min_saved_timestamp);
+#endif
+			r->multi->las_pageid = 0;
 		} else
 			WT_RET(__wt_bt_write(session, r->wrapup_checkpoint,
-			    NULL, NULL, true, F_ISSET(r, WT_CHECKPOINTING),
+			    NULL, NULL, true, F_ISSET(r, WT_REC_CHECKPOINT),
 			    r->wrapup_checkpoint_compressed));
 
 		mod->rec_result = WT_PM_REC_REPLACE;
@@ -6037,14 +6046,13 @@ __rec_write_wrapup_err(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		}
 
 	WT_TRET(__wt_ovfl_track_wrapup_err(session, page));
-
 	return (ret);
 }
 
 /*
  * __rec_cell_build_int_key --
  *	Process a key and return a WT_CELL structure and byte string to be
- * stored on a row-store internal page.
+ *	stored on a row-store internal page.
  */
 static int
 __rec_cell_build_int_key(WT_SESSION_IMPL *session,
@@ -6081,7 +6089,7 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session,
 /*
  * __rec_cell_build_leaf_key --
  *	Process a key and return a WT_CELL structure and byte string to be
- * stored on a row-store leaf page.
+ *	stored on a row-store leaf page.
  */
 static int
 __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
@@ -6184,7 +6192,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
 /*
  * __rec_cell_build_addr --
  *	Process an address reference and return a cell structure to be stored
- * on the page.
+ *	on the page.
  */
 static void
 __rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
@@ -6219,7 +6227,7 @@ __rec_cell_build_addr(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 /*
  * __rec_cell_build_val --
  *	Process a data item and return a WT_CELL structure and byte string to
- * be stored on the page.
+ *	be stored on the page.
  */
 static int
 __rec_cell_build_val(WT_SESSION_IMPL *session,
@@ -6311,7 +6319,7 @@ __rec_cell_build_ovfl(WT_SESSION_IMPL *session,
 		/* Write the buffer. */
 		addr = buf;
 		WT_ERR(__wt_bt_write(session, tmp,
-		    addr, &size, false, F_ISSET(r, WT_CHECKPOINTING), false));
+		    addr, &size, false, F_ISSET(r, WT_REC_CHECKPOINT), false));
 
 		/*
 		 * Track the overflow record (unless it's a bulk load, which
@@ -6460,7 +6468,7 @@ __rec_dictionary_free(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 /*
  * __rec_dictionary_reset --
  *	Reset the dictionary when reconciliation restarts and when crossing a
- * page boundary (a potential split).
+ *	page boundary (a potential split).
  */
 static void
 __rec_dictionary_reset(WT_RECONCILE *r)
@@ -6527,10 +6535,11 @@ __rec_dictionary_lookup(
 /*
  * __rec_verbose_lookaside_write --
  *	Create a verbose message to display once per checkpoint with details
- * about the cache state when performing a lookaside table write.
+ *	about the cache state when performing a lookaside table write.
  */
 static void
-__rec_verbose_lookaside_write(WT_SESSION_IMPL *session)
+__rec_verbose_lookaside_write(
+    WT_SESSION_IMPL *session, uint32_t las_id, uint64_t las_pageid)
 {
 #ifdef HAVE_VERBOSE
 	WT_CONNECTION_IMPL *conn;
@@ -6560,14 +6569,19 @@ __rec_verbose_lookaside_write(WT_SESSION_IMPL *session)
 			(void)__wt_eviction_dirty_needed(session, &pct_dirty);
 
 			__wt_verbose(session, WT_VERB_LOOKASIDE,
-			    "Page reconciliation triggered lookaside write. "
-			    "Entries now in lookaside file: %" PRIu64 ", "
+			    "Page reconciliation triggered lookaside write"
+			    "file ID %" PRIu32 ", page ID %" PRIu64 ". "
+			    "Entries now in lookaside file: %" PRId64 ", "
 			    "cache dirty: %" PRIu32 "%% , "
 			    "cache use: %" PRIu32 "%%",
-			    conn->las_record_cnt, pct_dirty, pct_full);
+			    las_id, las_pageid,
+			    WT_STAT_READ(conn->stats, cache_lookaside_entries),
+			    pct_dirty, pct_full);
 		}
 	}
 #else
 	WT_UNUSED(session);
+	WT_UNUSED(las_id);
+	WT_UNUSED(las_pageid);
 #endif
 }
diff --git a/src/third_party/wiredtiger/src/session/session_api.c b/src/third_party/wiredtiger/src/session/session_api.c
index d3540cb1dab..cc32766c9dc 100644
--- a/src/third_party/wiredtiger/src/session/session_api.c
+++ b/src/third_party/wiredtiger/src/session/session_api.c
@@ -1992,11 +1992,14 @@ __wt_open_internal_session(WT_CONNECTION_IMPL *conn, const char *name,
 	F_SET(session, session_flags | WT_SESSION_INTERNAL);
 
 	/*
+	 * Optionally acquire a lookaside table cursor (or clear caller's flag).
 	 * Acquiring the lookaside table cursor requires various locks; we've
 	 * seen problems in the past where deadlocks happened because sessions
 	 * deadlocked getting the cursor late in the process.  Be defensive,
 	 * get it now.
 	 */
+	if (!F_ISSET(conn, WT_CONN_LAS_OPEN))
+		F_CLR(session, WT_SESSION_LOOKASIDE_CURSOR);
 	if (F_ISSET(session, WT_SESSION_LOOKASIDE_CURSOR) &&
 	    (ret = __wt_las_cursor_open(session, &session->las_cursor)) != 0) {
 		wt_session = &session->iface;
diff --git a/src/third_party/wiredtiger/src/support/hex.c b/src/third_party/wiredtiger/src/support/hex.c
index e0b1b6de1ea..58730b1505b 100644
--- a/src/third_party/wiredtiger/src/support/hex.c
+++ b/src/third_party/wiredtiger/src/support/hex.c
@@ -116,6 +116,12 @@ __wt_hex2byte(const u_char *from, u_char *to)
 	case '7': byte = 7 << 4; break;
 	case '8': byte = 8 << 4; break;
 	case '9': byte = 9 << 4; break;
+	case 'A': byte = 10 << 4; break;
+	case 'B': byte = 11 << 4; break;
+	case 'C': byte = 12 << 4; break;
+	case 'D': byte = 13 << 4; break;
+	case 'E': byte = 14 << 4; break;
+	case 'F': byte = 15 << 4; break;
 	case 'a': byte = 10 << 4; break;
 	case 'b': byte = 11 << 4; break;
 	case 'c': byte = 12 << 4; break;
@@ -137,6 +143,12 @@ __wt_hex2byte(const u_char *from, u_char *to)
 	case '7': byte |= 7; break;
 	case '8': byte |= 8; break;
 	case '9': byte |= 9; break;
+	case 'A': byte |= 10; break;
+	case 'B': byte |= 11; break;
+	case 'C': byte |= 12; break;
+	case 'D': byte |= 13; break;
+	case 'E': byte |= 14; break;
+	case 'F': byte |= 15; break;
 	case 'a': byte |= 10; break;
 	case 'b': byte |= 11; break;
 	case 'c': byte |= 12; break;
diff --git a/src/third_party/wiredtiger/src/support/stat.c b/src/third_party/wiredtiger/src/support/stat.c
index 05b653a8c77..57dcd33c7f1 100644
--- a/src/third_party/wiredtiger/src/support/stat.c
+++ b/src/third_party/wiredtiger/src/support/stat.c
@@ -809,6 +809,7 @@ static const char * const __stats_connection_desc[] = {
 	"cache: internal pages evicted",
 	"cache: internal pages split during eviction",
 	"cache: leaf pages split during eviction",
+	"cache: lookaside table entries",
 	"cache: lookaside table insert calls",
 	"cache: lookaside table remove calls",
 	"cache: maximum bytes configured",
@@ -1138,6 +1139,7 @@ __wt_stat_connection_clear_single(WT_CONNECTION_STATS *stats)
 	stats->cache_eviction_internal = 0;
 	stats->cache_eviction_split_internal = 0;
 	stats->cache_eviction_split_leaf = 0;
+		/* not clearing cache_lookaside_entries */
 	stats->cache_lookaside_insert = 0;
 	stats->cache_lookaside_remove = 0;
 		/* not clearing cache_bytes_max */
@@ -1488,6 +1490,8 @@ __wt_stat_connection_aggregate(
 	    WT_STAT_READ(from, cache_eviction_split_internal);
 	to->cache_eviction_split_leaf +=
 	    WT_STAT_READ(from, cache_eviction_split_leaf);
+	to->cache_lookaside_entries +=
+	    WT_STAT_READ(from, cache_lookaside_entries);
 	to->cache_lookaside_insert +=
 	    WT_STAT_READ(from, cache_lookaside_insert);
 	to->cache_lookaside_remove +=
diff --git a/src/third_party/wiredtiger/src/support/thread_group.c b/src/third_party/wiredtiger/src/support/thread_group.c
index 59caaedf5cf..f5842bea572 100644
--- a/src/third_party/wiredtiger/src/support/thread_group.c
+++ b/src/third_party/wiredtiger/src/support/thread_group.c
@@ -141,7 +141,6 @@ __thread_group_resize(
 
 	conn = S2C(session);
 	thread = NULL;
-	session_flags = 0;
 
 	__wt_verbose(session, WT_VERB_THREAD_GROUP,
 	    "Resize thread group: %p, from min: %" PRIu32 " -> %" PRIu32
@@ -187,9 +186,10 @@ __thread_group_resize(
 		 * started during recovery, before the lookaside table is
 		 * created.
 		 */
+		session_flags = 0;
 		if (LF_ISSET(WT_THREAD_CAN_WAIT))
-			session_flags = WT_SESSION_CAN_WAIT;
-		if (F_ISSET(conn, WT_CONN_LAS_OPEN))
+			FLD_SET(session_flags, WT_SESSION_CAN_WAIT);
+		if (LF_ISSET(WT_THREAD_LOOKASIDE))
 			FLD_SET(session_flags, WT_SESSION_LOOKASIDE_CURSOR);
 		WT_ERR(__wt_open_internal_session(conn, group->name,
 		    false, session_flags, &thread->session));
diff --git a/src/third_party/wiredtiger/src/txn/txn.c b/src/third_party/wiredtiger/src/txn/txn.c
index ea5cd3390e2..c5c514c008b 100644
--- a/src/third_party/wiredtiger/src/txn/txn.c
+++ b/src/third_party/wiredtiger/src/txn/txn.c
@@ -593,6 +593,21 @@ __wt_txn_commit(WT_SESSION_IMPL *session, const char *cfg[])
 #endif
 	}
 
+#ifdef HAVE_TIMESTAMPS
+	/*
+	 * Debugging checks on timestamps, if user requested them.
+	 */
+	if (F_ISSET(txn, WT_TXN_TS_COMMIT_ALWAYS) &&
+	    !F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+	    txn->mod_count != 0)
+		WT_ERR_MSG(session, EINVAL, "commit_timestamp required and "
+		    "none set on this transaction");
+	if (F_ISSET(txn, WT_TXN_TS_COMMIT_NEVER) &&
+	    F_ISSET(txn, WT_TXN_HAS_TS_COMMIT) &&
+	    txn->mod_count != 0)
+		WT_ERR_MSG(session, EINVAL, "no commit_timestamp required and "
+		    "timestamp set on this transaction");
+#endif
 	/*
 	 * The default sync setting is inherited from the connection, but can
 	 * be overridden by an explicit "sync" setting for this transaction.
diff --git a/src/third_party/wiredtiger/src/txn/txn_ckpt.c b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
index 9d5f0c1adc0..7d2bb62cdd1 100644
--- a/src/third_party/wiredtiger/src/txn/txn_ckpt.c
+++ b/src/third_party/wiredtiger/src/txn/txn_ckpt.c
@@ -289,7 +289,6 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
 	if (F_ISSET(btree, WT_BTREE_NO_CHECKPOINT))
 		return (0);
 
-#ifdef HAVE_DIAGNOSTIC
 	/*
 	 * We may have raced between starting the checkpoint transaction and
 	 * some operation completing on the handle that updated the metadata
@@ -301,32 +300,26 @@ __wt_checkpoint_get_handles(WT_SESSION_IMPL *session, const char *cfg[])
 	 */
 	if (!WT_IS_METADATA(session->dhandle)) {
 		WT_CURSOR *meta_cursor;
-		bool metadata_race;
 
 		WT_ASSERT(session, !F_ISSET(&session->txn, WT_TXN_ERROR));
 		WT_RET(__wt_metadata_cursor(session, &meta_cursor));
 		meta_cursor->set_key(meta_cursor, session->dhandle->name);
 		ret = __wt_curfile_insert_check(meta_cursor);
 		if (ret == WT_ROLLBACK) {
-			metadata_race = true;
 			/*
-			 * Disable this check and assertion for now - it is
-			 * possible that a schema operation with a timestamp in
-			 * the future is in the metadata, but not part of the
-			 * the checkpoint now that checkpoints can be created
-			 * at the stable timestamp.
-			 * See WT-3559 for context on re-adding this assertion.
+			 * If create or drop or any schema operation of a table
+			 * is with in an user transaction then checkpoint can
+			 * see the dhandle before the commit, which will lead
+			 * to the rollback error. We will ignore this dhandle as
+			 * part of this checkpoint by returning from here.
 			 */
-#if 0
-			ret = 0;
-#endif
-		} else
-			metadata_race = false;
+			WT_TRET(__wt_metadata_cursor_release(session,
+			    &meta_cursor));
+			return (0);
+		}
 		WT_TRET(__wt_metadata_cursor_release(session, &meta_cursor));
 		WT_RET(ret);
-		WT_ASSERT(session, !metadata_race);
 	}
-#endif
 
 	/*
 	 * Decide whether the tree needs to be included in the checkpoint and
diff --git a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
index 54634c03dfb..929aba30155 100644
--- a/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
+++ b/src/third_party/wiredtiger/src/txn/txn_rollback_to_stable.c
@@ -20,14 +20,15 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
 	WT_CURSOR *cursor;
 	WT_DECL_RET;
 	WT_DECL_TIMESTAMP(rollback_timestamp)
-	WT_ITEM las_addr, las_key, las_timestamp;
+	WT_ITEM las_key, las_timestamp, las_value;
 	WT_TXN_GLOBAL *txn_global;
-	uint64_t las_counter, las_txnid, remove_cnt;
+	uint64_t las_counter, las_pageid, las_total, las_txnid;
 	uint32_t las_id, session_flags;
+	uint8_t upd_type;
 
 	conn = S2C(session);
 	cursor = NULL;
-	remove_cnt = 0;
+	las_total = 0;
 	session_flags = 0;		/* [-Werror=maybe-uninitialized] */
 	WT_CLEAR(las_timestamp);
 
@@ -40,7 +41,7 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
 	txn_global = &conn->txn_global;
 	WT_WITH_TIMESTAMP_READLOCK(session, &txn_global->rwlock,
 	    __wt_timestamp_set(
-		&rollback_timestamp, &txn_global->stable_timestamp));
+	    &rollback_timestamp, &txn_global->stable_timestamp));
 
 	__wt_las_cursor(session, &cursor, &session_flags);
 
@@ -49,8 +50,8 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
 
 	/* Walk the file. */
 	for (; (ret = cursor->next(cursor)) == 0; ) {
-		WT_ERR(cursor->get_key(cursor, &las_id, &las_addr, &las_counter,
-		    &las_txnid, &las_timestamp, &las_key));
+		WT_ERR(cursor->get_key(cursor,
+		    &las_id, &las_pageid, &las_counter, &las_key));
 
 		/* Check the file ID so we can skip durable tables */
 		if (las_id >= conn->stable_rollback_maxfile)
@@ -60,27 +61,23 @@ __txn_rollback_to_stable_lookaside_fixup(WT_SESSION_IMPL *session)
 		if (__bit_test(conn->stable_rollback_bitstring, las_id))
 			continue;
 
+		WT_ERR(cursor->get_value(cursor,
+		    &las_txnid, &las_timestamp, &upd_type, &las_value));
+
 		/*
 		 * Entries with no timestamp will have a timestamp of zero,
 		 * which will fail the following check and cause them to never
 		 * be removed.
 		 */
 		if (__wt_timestamp_cmp(
-		    &rollback_timestamp, las_timestamp.data) < 0) {
+		    &rollback_timestamp, las_timestamp.data) < 0)
 			WT_ERR(cursor->remove(cursor));
-			++remove_cnt;
-		}
+		else
+			++las_total;
 	}
 	WT_ERR_NOTFOUND_OK(ret);
 err:	WT_TRET(__wt_las_cursor_close(session, &cursor, session_flags));
-	/*
-	 * If there were races to remove records, we can over-count. Underflow
-	 * isn't fatal, but check anyway so we don't skew low over time.
-	 */
-	if (remove_cnt > conn->las_record_cnt)
-		conn->las_record_cnt = 0;
-	else if (remove_cnt > 0)
-		(void)__wt_atomic_sub64(&conn->las_record_cnt, remove_cnt);
+	WT_STAT_CONN_SET(session, cache_lookaside_entries, las_total);
 
 	F_CLR(session, WT_SESSION_NO_CACHE);
 
@@ -303,6 +300,20 @@ __txn_rollback_to_stable_btree_walk(
 }
 
 /*
+ * __txn_rollback_eviction_drain --
+ *	Wait for eviction to drain from a tree.
+ */
+static int
+__txn_rollback_eviction_drain(WT_SESSION_IMPL *session, const char *cfg[])
+{
+	WT_UNUSED(cfg);
+
+	WT_RET(__wt_evict_file_exclusive_on(session));
+	__wt_evict_file_exclusive_off(session);
+	return (0);
+}
+
+/*
  * __txn_rollback_to_stable_btree --
  *	Called for each open handle - choose to either skip or wipe the commits
  */
@@ -422,7 +433,19 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_DECL_RET;
 
 	conn = S2C(session);
-	WT_RET(__txn_rollback_to_stable_check(session));
+
+	/*
+	 * Mark that a rollback operation is in progress and wait for eviction
+	 * to drain.  This is necessary because lookaside eviction uses
+	 * transactions and causes the check for a quiescent system to fail.
+	 */
+	F_SET(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+	WT_ERR(__wt_conn_btree_apply(session,
+	    NULL, __txn_rollback_eviction_drain, NULL, cfg));
+
+	WT_ERR(__txn_rollback_to_stable_check(session));
+
+	F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
 
 	/*
 	 * Allocate a non-durable btree bitstring.  We increment the global
@@ -430,7 +453,7 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
 	 * hence we need to add one here.
 	 */
 	conn->stable_rollback_maxfile = conn->next_file_id + 1;
-	WT_RET(__bit_alloc(session,
+	WT_ERR(__bit_alloc(session,
 	    conn->stable_rollback_maxfile, &conn->stable_rollback_bitstring));
 	WT_ERR(__wt_conn_btree_apply(session,
 	    NULL, __txn_rollback_to_stable_btree, NULL, cfg));
@@ -442,7 +465,9 @@ __wt_txn_rollback_to_stable(WT_SESSION_IMPL *session, const char *cfg[])
 	 * lookaside records should be removed.
 	 */
 	WT_ERR(__txn_rollback_to_stable_lookaside_fixup(session));
-err:	__wt_free(session, conn->stable_rollback_bitstring);
+
+err:	F_CLR(conn, WT_CONN_EVICTION_NO_LOOKASIDE);
+	__wt_free(session, conn->stable_rollback_bitstring);
 	return (ret);
 #endif
 }
diff --git a/src/third_party/wiredtiger/src/txn/txn_timestamp.c b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
index 2182a3924a5..8f90afeb8b4 100644
--- a/src/third_party/wiredtiger/src/txn/txn_timestamp.c
+++ b/src/third_party/wiredtiger/src/txn/txn_timestamp.c
@@ -107,27 +107,37 @@ __wt_txn_parse_timestamp(WT_SESSION_IMPL *session,
 
 #if WT_TIMESTAMP_SIZE == 8
 	{
-	static const u_char hextable[] = {
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  1,  2,  3,  4,  5,  6,  7,
-	    8,  9,  0,  0,  0,  0,  0,  0,
-	    0, 10, 11, 12, 13, 14, 15,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0,  0,  0,  0,  0,  0,  0,  0,
-	    0, 10, 11, 12, 13, 14, 15
+	static const int8_t hextable[] = {
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	     0,  1,   2,   3,   4,   5,   6,   7,
+	     8,  9,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, 10,  11,  12,  13,  14,  15,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, -1,  -1,  -1,  -1,  -1,  -1,  -1,
+	    -1, 10,  11,  12,  13,  14,  15,  -1
 	};
 	wt_timestamp_t ts;
 	size_t len;
-	const char *hex;
-
-	for (ts.val = 0, hex = cval->str, len = cval->len; len > 0; --len)
-		ts.val = (ts.val << 4) | hextable[(int)*hex++];
+	int hex_val;
+	const char *hex_itr;
+
+	for (ts.val = 0, hex_itr = cval->str, len = cval->len; len > 0; --len) {
+		if ((size_t)*hex_itr < WT_ELEMENTS(hextable))
+			hex_val = hextable[(size_t)*hex_itr++];
+		else
+			hex_val = -1;
+		if (hex_val < 0)
+			WT_RET_MSG(session, EINVAL,
+			    "Failed to parse %s timestamp '%.*s'",
+			    name, (int)cval->len, cval->str);
+		ts.val = (ts.val << 4) | (uint64_t)hex_val;
+	}
 	__wt_timestamp_set(timestamp, &ts);
 	}
 #else
diff --git a/src/third_party/wiredtiger/test/fops/file.c b/src/third_party/wiredtiger/test/fops/file.c
index 60320ae3a38..118845ab805 100644
--- a/src/third_party/wiredtiger/test/fops/file.c
+++ b/src/third_party/wiredtiger/test/fops/file.c
@@ -39,6 +39,8 @@ obj_bulk(void)
 
 	testutil_check(conn->open_session(conn, NULL, NULL, &session));
 
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	if ((ret = session->create(session, uri, config)) != 0)
 		if (ret != EEXIST && ret != EBUSY)
 			testutil_die(ret, "session.create");
@@ -51,6 +53,21 @@ obj_bulk(void)
 		} else if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
 			testutil_die(ret, "session.open_cursor bulk");
 	}
+
+	if (use_txn) {
+		/*
+		 * As the operations are being performed concurrently,
+		 * return value can be ENOENT, EBUSY or EINVAL will set
+		 * error to transaction opened by session. In these
+		 * cases the transaction has to be aborted.
+		 */
+		if (ret != ENOENT && ret != EBUSY && ret != EINVAL)
+			ret = session->commit_transaction(session, NULL);
+		else
+			ret = session->rollback_transaction(session, NULL);
+		if (ret == EINVAL)
+			testutil_die(ret, "session.commit bulk");
+	}
 	testutil_check(session->close(session, NULL));
 }
 
@@ -70,6 +87,8 @@ obj_bulk_unique(int force)
 	    new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
 	testutil_check(pthread_rwlock_unlock(&single));
 
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	testutil_check(session->create(session, new_uri, config));
 
 	__wt_yield();
@@ -89,6 +108,10 @@ obj_bulk_unique(int force)
 		if (ret != EBUSY)
 			testutil_die(ret, "session.drop: %s", new_uri);
 
+	if (use_txn &&
+	    (ret = session->commit_transaction(session, NULL)) != 0 &&
+	    ret != EINVAL)
+		testutil_die(ret, "session.commit bulk unique");
 	testutil_check(session->close(session, NULL));
 }
 
@@ -101,12 +124,19 @@ obj_cursor(void)
 
 	testutil_check(conn->open_session(conn, NULL, NULL, &session));
 
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	if ((ret =
 	    session->open_cursor(session, uri, NULL, NULL, &cursor)) != 0) {
 		if (ret != ENOENT && ret != EBUSY)
 			testutil_die(ret, "session.open_cursor");
 	} else
 		testutil_check(cursor->close(cursor));
+
+	if (use_txn &&
+	    (ret = session->commit_transaction(session, NULL)) != 0 &&
+	    ret != EINVAL)
+		testutil_die(ret, "session.commit cursor");
 	testutil_check(session->close(session, NULL));
 }
 
@@ -118,10 +148,16 @@ obj_create(void)
 
 	testutil_check(conn->open_session(conn, NULL, NULL, &session));
 
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	if ((ret = session->create(session, uri, config)) != 0)
 		if (ret != EEXIST && ret != EBUSY)
 			testutil_die(ret, "session.create");
 
+	if (use_txn &&
+	    (ret = session->commit_transaction(session, NULL)) != 0 &&
+	    ret != EINVAL)
+		testutil_die(ret, "session.commit create");
 	testutil_check(session->close(session, NULL));
 }
 
@@ -140,13 +176,25 @@ obj_create_unique(int force)
 	    new_uri, sizeof(new_uri), "%s.%u", uri, ++uid));
 	testutil_check(pthread_rwlock_unlock(&single));
 
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	testutil_check(session->create(session, new_uri, config));
+	if (use_txn &&
+	    (ret = session->commit_transaction(session, NULL)) != 0 &&
+	    ret != EINVAL)
+		testutil_die(ret, "session.commit create unique");
 
 	__wt_yield();
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	while ((ret = session->drop(
 	    session, new_uri, force ? "force" : NULL)) != 0)
 		if (ret != EBUSY)
 			testutil_die(ret, "session.drop: %s", new_uri);
+	if (use_txn &&
+	    (ret = session->commit_transaction(session, NULL)) != 0 &&
+	    ret != EINVAL)
+		testutil_die(ret, "session.commit create unique");
 
 	testutil_check(session->close(session, NULL));
 }
@@ -159,10 +207,26 @@ obj_drop(int force)
 
 	testutil_check(conn->open_session(conn, NULL, NULL, &session));
 
+	if (use_txn)
+		testutil_check(session->begin_transaction(session, NULL));
 	if ((ret = session->drop(session, uri, force ? "force" : NULL)) != 0)
 		if (ret != ENOENT && ret != EBUSY)
 			testutil_die(ret, "session.drop");
 
+	if (use_txn) {
+		/*
+		 * As the operations are being performed concurrently,
+		 * return value can be ENOENT or EBUSY will set
+		 * error to transaction opened by session. In these
+		 * cases the transaction has to be aborted.
+		 */
+		if (ret != ENOENT && ret != EBUSY)
+			ret = session->commit_transaction(session, NULL);
+		else
+			ret = session->rollback_transaction(session, NULL);
+		if (ret == EINVAL)
+			testutil_die(ret, "session.commit drop");
+	}
 	testutil_check(session->close(session, NULL));
 }
 
diff --git a/src/third_party/wiredtiger/test/fops/t.c b/src/third_party/wiredtiger/test/fops/t.c
index b6b80ba5db8..fcbbdcabd73 100644
--- a/src/third_party/wiredtiger/test/fops/t.c
+++ b/src/third_party/wiredtiger/test/fops/t.c
@@ -28,6 +28,7 @@
 
 #include "thread.h"
 
+bool use_txn;					/* Operations with user txn */
 WT_CONNECTION *conn;				/* WiredTiger connection */
 pthread_rwlock_t single;			/* Single thread */
 u_int nops;					/* Operations */
@@ -77,8 +78,9 @@ main(int argc, char *argv[])
 	nops = 1000;
 	nthreads = 10;
 	runs = 1;
+	use_txn = false;
 	config_open = working_dir = NULL;
-	while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:")) != EOF)
+	while ((ch = __wt_getopt(progname, argc, argv, "C:h:l:n:r:t:x")) != EOF)
 		switch (ch) {
 		case 'C':			/* wiredtiger_open config */
 			config_open = __wt_optarg;
@@ -102,6 +104,9 @@ main(int argc, char *argv[])
 		case 't':
 			nthreads = (u_int)atoi(__wt_optarg);
 			break;
+		case 'x':
+			use_txn = true;
+			break;
 		default:
 			return (usage());
 		}
@@ -245,7 +250,8 @@ usage(void)
 {
 	fprintf(stderr,
 	    "usage: %s "
-	    "[-C wiredtiger-config] [-l log] [-n ops] [-r runs] [-t threads]\n",
+	    "[-C wiredtiger-config] [-l log] [-n ops] [-r runs] [-t threads] "
+	    "[-x] \n",
 	    progname);
 	fprintf(stderr, "%s",
 	    "\t-C specify wiredtiger_open configuration arguments\n"
@@ -253,6 +259,7 @@ usage(void)
 	    "\t-l specify a log file\n"
 	    "\t-n set number of operations each thread does\n"
 	    "\t-r set number of runs\n"
-	    "\t-t set number of threads\n");
+	    "\t-t set number of threads\n"
+	    "\t-x operations within user transaction \n");
 	return (EXIT_FAILURE);
 }
diff --git a/src/third_party/wiredtiger/test/fops/thread.h b/src/third_party/wiredtiger/test/fops/thread.h
index f6b6bdffd63..0df36025be0 100644
--- a/src/third_party/wiredtiger/test/fops/thread.h
+++ b/src/third_party/wiredtiger/test/fops/thread.h
@@ -30,6 +30,7 @@
 
 #include <signal.h>
 
+extern bool use_txn;				/* Operations with user txn */
 extern WT_CONNECTION *conn;			/* WiredTiger connection */
 
 extern u_int nops;				/* Operations per thread */
diff --git a/src/third_party/wiredtiger/test/format/format.h b/src/third_party/wiredtiger/test/format/format.h
index 81b7fa27f79..f35e71f58aa 100644
--- a/src/third_party/wiredtiger/test/format/format.h
+++ b/src/third_party/wiredtiger/test/format/format.h
@@ -287,6 +287,7 @@ void	 bdb_update(const void *, size_t, const void *, size_t);
 
 WT_THREAD_RET alter(void *);
 WT_THREAD_RET backup(void *);
+WT_THREAD_RET checkpoint(void *);
 WT_THREAD_RET compact(void *);
 void	 config_clear(void);
 void	 config_error(void);
diff --git a/src/third_party/wiredtiger/test/format/ops.c b/src/third_party/wiredtiger/test/format/ops.c
index f4770465628..4fed18d12b4 100644
--- a/src/third_party/wiredtiger/test/format/ops.c
+++ b/src/third_party/wiredtiger/test/format/ops.c
@@ -76,7 +76,8 @@ wts_ops(int lastrun)
 	TINFO **tinfo_list, *tinfo, total;
 	WT_CONNECTION *conn;
 	WT_SESSION *session;
-	wt_thread_t alter_tid, backup_tid, compact_tid, lrt_tid, timestamp_tid;
+	wt_thread_t alter_tid, backup_tid, checkpoint_tid, compact_tid, lrt_tid;
+	wt_thread_t timestamp_tid;
 	int64_t fourths, quit_fourths, thread_ops;
 	uint32_t i;
 	bool running;
@@ -86,6 +87,7 @@ wts_ops(int lastrun)
 	session = NULL;			/* -Wconditional-uninitialized */
 	memset(&alter_tid, 0, sizeof(alter_tid));
 	memset(&backup_tid, 0, sizeof(backup_tid));
+	memset(&checkpoint_tid, 0, sizeof(checkpoint_tid));
 	memset(&compact_tid, 0, sizeof(compact_tid));
 	memset(&lrt_tid, 0, sizeof(lrt_tid));
 	memset(&timestamp_tid, 0, sizeof(timestamp_tid));
@@ -173,6 +175,9 @@ wts_ops(int lastrun)
 	if (g.c_backups)
 		testutil_check(
 		    __wt_thread_create(NULL, &backup_tid, backup, NULL));
+	if (g.c_checkpoints)
+		testutil_check(__wt_thread_create(
+		    NULL, &checkpoint_tid, checkpoint, NULL));
 	if (g.c_compact)
 		testutil_check(
 		    __wt_thread_create(NULL, &compact_tid, compact, NULL));
@@ -247,6 +252,8 @@ wts_ops(int lastrun)
 		testutil_check(__wt_thread_join(NULL, alter_tid));
 	if (g.c_backups)
 		testutil_check(__wt_thread_join(NULL, backup_tid));
+	if (g.c_checkpoints)
+		testutil_check(__wt_thread_join(NULL, checkpoint_tid));
 	if (g.c_compact)
 		testutil_check(__wt_thread_join(NULL, compact_tid));
 	if (!SINGLETHREADED && g.c_long_running_txn)
@@ -514,12 +521,11 @@ ops(void *arg)
 	WT_DECL_RET;
 	WT_ITEM *key, _key, *value, _value;
 	WT_SESSION *session;
-	uint64_t ckpt_op, keyno, reset_op, session_op;
+	uint64_t keyno, reset_op, session_op;
 	uint32_t rnd;
 	u_int i, iso_config;
 	int dir;
-	char *ckpt_config, ckpt_name[64];
-	bool ckpt_available, intxn, positioned, readonly;
+	bool intxn, positioned, readonly;
 
 	tinfo = arg;
 
@@ -542,58 +548,61 @@ ops(void *arg)
 	session = NULL;
 	session_op = 0;
 
-	/* Set the first operation where we'll perform checkpoint operations. */
-	ckpt_op = g.c_checkpoints ? mmrand(&tinfo->rnd, 100, 10000) : 0;
-	ckpt_available = false;
-
 	/* Set the first operation where we'll reset the session. */
 	reset_op = mmrand(&tinfo->rnd, 100, 10000);
 
 	for (intxn = false; !tinfo->quit; ++tinfo->ops) {
-		/*
-		 * We can't checkpoint or swap sessions/cursors while in a
-		 * transaction, resolve any running transaction.
-		 */
-		if (intxn &&
-		    (tinfo->ops == ckpt_op || tinfo->ops == session_op)) {
-			commit_transaction(tinfo, session);
-			intxn = false;
-		}
-
-		/* Open up a new session and cursors. */
-		if (tinfo->ops == session_op ||
+		/* Periodically open up a new session and cursors. */
+		if (tinfo->ops > session_op ||
 		    session == NULL || cursor == NULL) {
+			/*
+			 * We can't swap sessions/cursors if in a transaction,
+			 * resolve any running transaction.
+			 */
+			if (intxn) {
+				commit_transaction(tinfo, session);
+				intxn = false;
+			}
+
 			if (session != NULL)
 				testutil_check(session->close(session, NULL));
-
 			testutil_check(
 			    conn->open_session(conn, NULL, NULL, &session));
 
+			/* Pick the next session/cursor close/open. */
+			session_op += mmrand(&tinfo->rnd, 100, 5000);
+
 			/*
 			 * 10% of the time, perform some read-only operations
 			 * from a checkpoint.
 			 *
-			 * Skip that if we are single-threaded and doing checks
-			 * against a Berkeley DB database, because that won't
-			 * work because the Berkeley DB database records won't
-			 * match the checkpoint.  Also skip if we are using
-			 * LSM, because it doesn't support reads from
-			 * checkpoints.
+			 * Skip if single-threaded and doing checks against a
+			 * Berkeley DB database, that won't work because the
+			 * Berkeley DB database won't match the checkpoint.
+			 *
+			 * Skip if we are using data-sources or LSM, they don't
+			 * support reading from checkpoints.
 			 */
-			if (!SINGLETHREADED && !DATASOURCE("lsm") &&
-			    ckpt_available && mmrand(&tinfo->rnd, 1, 10) == 1) {
+			if (!SINGLETHREADED && !DATASOURCE("helium") &&
+			    !DATASOURCE("kvsbdb") && !DATASOURCE("lsm") &&
+			    mmrand(&tinfo->rnd, 1, 10) == 1) {
 				/*
 				 * open_cursor can return EBUSY if concurrent
 				 * with a metadata operation, retry.
 				 */
 				while ((ret = session->open_cursor(session,
-				    g.uri, NULL, ckpt_name, &cursor)) == EBUSY)
+				    g.uri, NULL,
+				    "checkpoint=WiredTigerCheckpoint",
+				    &cursor)) == EBUSY)
 					__wt_yield();
+				/*
+				 * If the checkpoint hasn't been created yet,
+				 * ignore the error.
+				 */
+				if (ret == ENOENT)
+					continue;
 				testutil_check(ret);
 
-				/* Pick the next session/cursor close/open. */
-				session_op += 250;
-
 				/* Checkpoints are read-only. */
 				readonly = true;
 			} else {
@@ -608,75 +617,11 @@ ops(void *arg)
 					__wt_yield();
 				testutil_check(ret);
 
-				/* Pick the next session/cursor close/open. */
-				session_op += mmrand(&tinfo->rnd, 100, 5000);
-
 				/* Updates supported. */
 				readonly = false;
 			}
 		}
 
-		/* Checkpoint the database. */
-		if (tinfo->ops == ckpt_op && g.c_checkpoints) {
-			/*
-			 * Checkpoints are single-threaded inside WiredTiger,
-			 * skip our checkpoint if another thread is already
-			 * doing one.
-			 */
-			ret = pthread_rwlock_trywrlock(&g.checkpoint_lock);
-			if (ret == EBUSY)
-				goto skip_checkpoint;
-			testutil_check(ret);
-
-			/*
-			 * LSM and data-sources don't support named checkpoints
-			 * and we can't drop a named checkpoint while there's a
-			 * backup in progress, otherwise name the checkpoint 5%
-			 * of the time.
-			 */
-			if (mmrand(&tinfo->rnd, 1, 20) != 1 ||
-			    DATASOURCE("helium") ||
-			    DATASOURCE("kvsbdb") || DATASOURCE("lsm") ||
-			    pthread_rwlock_trywrlock(&g.backup_lock) == EBUSY)
-				ckpt_config = NULL;
-			else {
-				testutil_check(__wt_snprintf(
-				    ckpt_name, sizeof(ckpt_name),
-				    "name=thread-%d", tinfo->id));
-				ckpt_config = ckpt_name;
-			}
-
-			ret = session->checkpoint(session, ckpt_config);
-			/*
-			 * We may be trying to create a named checkpoint while
-			 * we hold a cursor open to the previous checkpoint.
-			 * Tolerate EBUSY.
-			 */
-			if (ret != 0 && ret != EBUSY)
-				testutil_die(ret, "%s",
-				    ckpt_config == NULL ? "" : ckpt_config);
-			ret = 0;
-
-			if (ckpt_config != NULL)
-				testutil_check(
-				    pthread_rwlock_unlock(&g.backup_lock));
-			testutil_check(
-			    pthread_rwlock_unlock(&g.checkpoint_lock));
-
-			/* Rephrase the checkpoint name for cursor open. */
-			if (ckpt_config == NULL)
-				strcpy(ckpt_name,
-				    "checkpoint=WiredTigerCheckpoint");
-			else
-				testutil_check(__wt_snprintf(
-				    ckpt_name, sizeof(ckpt_name),
-				    "checkpoint=thread-%d", tinfo->id));
-			ckpt_available = true;
-
-skip_checkpoint:	/* Pick the next checkpoint operation. */
-			ckpt_op += mmrand(&tinfo->rnd, 5000, 20000);
-		}
-
 		/*
 		 * Reset the session every now and then, just to make sure that
 		 * operation gets tested. Note the test is not for equality, we
diff --git a/src/third_party/wiredtiger/test/format/t.c b/src/third_party/wiredtiger/test/format/t.c
index dc288ba4bc2..02ed0a2da60 100644
--- a/src/third_party/wiredtiger/test/format/t.c
+++ b/src/third_party/wiredtiger/test/format/t.c
@@ -169,7 +169,6 @@ main(int argc, char *argv[])
 	 */
 	testutil_check(pthread_rwlock_init(&g.append_lock, NULL));
 	testutil_check(pthread_rwlock_init(&g.backup_lock, NULL));
-	testutil_check(pthread_rwlock_init(&g.checkpoint_lock, NULL));
 	testutil_check(pthread_rwlock_init(&g.death_lock, NULL));
 
 	printf("%s: process %" PRIdMAX "\n", progname, (intmax_t)getpid());
@@ -267,7 +266,6 @@ main(int argc, char *argv[])
 
 	testutil_check(pthread_rwlock_destroy(&g.append_lock));
 	testutil_check(pthread_rwlock_destroy(&g.backup_lock));
-	testutil_check(pthread_rwlock_destroy(&g.checkpoint_lock));
 	testutil_check(pthread_rwlock_destroy(&g.death_lock));
 
 	config_clear();
diff --git a/src/third_party/wiredtiger/test/format/util.c b/src/third_party/wiredtiger/test/format/util.c
index 98af8e766f1..9ea44a29801 100644
--- a/src/third_party/wiredtiger/test/format/util.c
+++ b/src/third_party/wiredtiger/test/format/util.c
@@ -501,6 +501,86 @@ fclose_and_clear(FILE **fpp)
 }
 
 /*
+ * checkpoint --
+ *	Periodically take a checkpoint
+ */
+WT_THREAD_RET
+checkpoint(void *arg)
+{
+	WT_CONNECTION *conn;
+	WT_DECL_RET;
+	WT_SESSION *session;
+	u_int secs;
+	const char *ckpt_config;
+	char config_buf[64];
+	bool backup_locked;
+
+	(void)arg;
+	conn = g.wts_conn;
+	testutil_check(conn->open_session(conn, NULL, NULL, &session));
+
+	for (secs = mmrand(NULL, 1, 10); !g.workers_finished;) {
+		if (secs > 0) {
+			__wt_sleep(1, 0);
+			--secs;
+			continue;
+		}
+
+		/*
+		 * LSM and data-sources don't support named checkpoints. Also,
+		 * don't attempt named checkpoints during a hot backup. It's
+		 * OK to create named checkpoints during a hot backup, but we
+		 * can't delete them, so repeating an already existing named
+		 * checkpoint will fail when we can't drop the previous one.
+		 */
+		ckpt_config = NULL;
+		backup_locked = false;
+		if (!DATASOURCE("helium") && !DATASOURCE("kvsbdb") &&
+		    !DATASOURCE("lsm"))
+			switch (mmrand(NULL, 1, 20)) {
+			case 1:
+				/*
+				 * 5% create a named snapshot. Rotate between a
+				 * few names to test multiple named snapshots in
+				 * the system.
+				 */
+				ret = pthread_rwlock_trywrlock(&g.backup_lock);
+				if (ret == 0) {
+					backup_locked = true;
+					testutil_check(__wt_snprintf(
+					    config_buf, sizeof(config_buf),
+					    "name=mine.%" PRIu32,
+					    mmrand(NULL, 1, 4)));
+					ckpt_config = config_buf;
+				} else if (ret != EBUSY)
+					testutil_check(ret);
+				break;
+			case 2:
+				/*
+				 * 5% drop all named snapshots.
+				 */
+				ret = pthread_rwlock_trywrlock(&g.backup_lock);
+				if (ret == 0) {
+					backup_locked = true;
+					ckpt_config = "drop=(all)";
+				} else if (ret != EBUSY)
+					testutil_check(ret);
+				break;
+			}
+
+		testutil_check(session->checkpoint(session, ckpt_config));
+
+		if (backup_locked)
+			testutil_check(pthread_rwlock_unlock(&g.backup_lock));
+
+		secs = mmrand(NULL, 5, 40);
+	}
+
+	testutil_check(session->close(session, NULL));
+	return (WT_THREAD_RET_VALUE);
+}
+
+/*
  * timestamp --
  *	Periodically update the oldest timestamp.
  */
diff --git a/src/third_party/wiredtiger/test/mciproject.yml b/src/third_party/wiredtiger/test/mciproject.yml
index 72022fe46ec..4b67299d14c 100644
--- a/src/third_party/wiredtiger/test/mciproject.yml
+++ b/src/third_party/wiredtiger/test/mciproject.yml
@@ -167,20 +167,6 @@ buildvariants:
     - name: unit-test
     - name: fops
 
-- name: solaris
-  display_name: Solaris
-  run_on:
-  - solaris
-  expansions:
-    make_command: PATH=/opt/mongodbtoolchain/bin:$PATH gmake
-    test_env_vars: LD_LIBRARY_PATH=`pwd`/.libs
-    smp_command: -j $(kstat cpu | sort -u | grep -c "^module")
-    configure_env_vars: PATH=/opt/mongodbtoolchain/bin:$PATH CFLAGS="-m64"
-  tasks:
-    - name: compile
-    - name: unit-test
-    - name: fops
-
 - name: windows-64
   display_name: Windows 64-bit
   run_on:
diff --git a/src/third_party/wiredtiger/test/suite/test_assert01.py b/src/third_party/wiredtiger/test/suite/test_assert01.py
new file mode 100644
index 00000000000..3a4f8e4127a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_assert01.py
@@ -0,0 +1,114 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_assert01.py
+#   Timestamps: assert commit settings
+#
+
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+def timestamp_str(t):
+    return '%x' % t
+
+class test_assert01(wttest.WiredTigerTestCase, suite_subprocess):
+    base = 'assert01'
+    base_uri = 'file:' + base
+    uri_always = base_uri + '.always.wt'
+    uri_def = base_uri + '.def.wt'
+    uri_never = base_uri + '.never.wt'
+    uri_none = base_uri + '.none.wt'
+    cfg = 'key_format=S,value_format=S,'
+    cfg_always = 'assert=(commit_timestamp=always)'
+    cfg_def = ''
+    cfg_never = 'assert=(commit_timestamp=never)'
+    cfg_none = 'assert=(commit_timestamp=none)'
+
+    count = 1
+    #
+    # Commit a k/v pair making sure that it detects an error if needed, when
+    # used with and without a commit timestamp.
+    #
+    def insert_check(self, uri, use_ts):
+        c = self.session.open_cursor(uri)
+        key = 'key' + str(self.count)
+        val = 'value' + str(self.count)
+
+        # Commit with a timestamp
+        self.session.begin_transaction()
+        self.session.timestamp_transaction(
+            'commit_timestamp=' + timestamp_str(self.count))
+        c[key] = val
+        # All settings other than never should commit successfully
+        if (use_ts != 'never'):
+            self.session.commit_transaction()
+        else:
+            msg = "/timestamp set on this transaction/"
+            self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+                lambda:self.assertEquals(self.session.commit_transaction(),
+                0), msg)
+        c.close()
+        self.count += 1
+
+        # Commit without a timestamp
+        key = 'key' + str(self.count)
+        val = 'value' + str(self.count)
+        c = self.session.open_cursor(uri)
+        self.session.begin_transaction()
+        c[key] = val
+        # All settings other than always should commit successfully
+        if (use_ts != 'always'):
+            self.session.commit_transaction()
+        else:
+            msg = "/none set on this transaction/"
+            self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+                lambda:self.assertEquals(self.session.commit_transaction(),
+                0), msg)
+        self.count += 1
+        c.close()
+
+    def test_commit_timestamp(self):
+        #if not wiredtiger.timestamp_build() or not wiredtiger.diagnostic_build():
+        #    self.skipTest('requires a timestamp and diagnostic build')
+        if not wiredtiger.timestamp_build():
+            self.skipTest('requires a timestamp build')
+
+        # Create a data item at a timestamp
+        self.session.create(self.uri_always, self.cfg + self.cfg_always)
+        self.session.create(self.uri_def, self.cfg + self.cfg_def)
+        self.session.create(self.uri_never, self.cfg + self.cfg_never)
+        self.session.create(self.uri_none, self.cfg + self.cfg_none)
+
+        # Check inserting into each table
+        self.insert_check(self.uri_always, 'always')
+        self.insert_check(self.uri_def, 'none')
+        self.insert_check(self.uri_never, 'never')
+        self.insert_check(self.uri_none, 'none')
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_assert02.py b/src/third_party/wiredtiger/test/suite/test_assert02.py
new file mode 100644
index 00000000000..d264273c3a0
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_assert02.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_assert02.py
+#   Timestamps: assert read timestamp settings
+#
+
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+def timestamp_str(t):
+    return '%x' % t
+
+class test_assert02(wttest.WiredTigerTestCase, suite_subprocess):
+    def test_read_timestamp(self):
+        #if not wiredtiger.timestamp_build() or not wiredtiger.diagnostic_build():
+        #    self.skipTest('requires a timestamp and diagnostic build')
+        if not wiredtiger.timestamp_build():
+            self.skipTest('requires a timestamp build')
+
+        base = 'assert02.'
+        base_uri = 'file:' + base
+        uri_always = base_uri + '.always.wt'
+        uri_def = base_uri + '.def.wt'
+        uri_never = base_uri + '.never.wt'
+        uri_none = base_uri + '.none.wt'
+
+        cfg = 'key_format=S,value_format=S'
+        cfg_always = cfg + ',assert=(read_timestamp=always)'
+        cfg_def = cfg
+        cfg_never = cfg + ',assert=(read_timestamp=never)'
+        cfg_none = cfg + ',assert=(read_timestamp=none)'
+
+        # Create a data item at a timestamp
+        self.session.create(uri_always, cfg_always)
+        self.session.create(uri_def, cfg_def)
+        self.session.create(uri_never, cfg_never)
+        self.session.create(uri_none, cfg_none)
+
+        # Insert a data item at timestamp 1.  This should work for all.
+        c_always = self.session.open_cursor(uri_always)
+        c_def = self.session.open_cursor(uri_def)
+        c_never = self.session.open_cursor(uri_never)
+        c_none = self.session.open_cursor(uri_none)
+        self.session.begin_transaction()
+        self.session.timestamp_transaction(
+            'commit_timestamp=' + timestamp_str(1))
+        c_always['key1'] = 'value1'
+        c_def['key1'] = 'value1'
+        c_never['key1'] = 'value1'
+        c_none['key1'] = 'value1'
+        self.session.commit_transaction()
+        c_always.close()
+        c_def.close()
+        c_never.close()
+        c_none.close()
+
+        # Now that we have a timestamped data, try reading with and without
+        # the timestamp.
+        c_always = self.session.open_cursor(uri_always)
+        c_def = self.session.open_cursor(uri_def)
+        c_never = self.session.open_cursor(uri_never)
+        c_none = self.session.open_cursor(uri_none)
+
+        c_always.set_key('key1')
+        c_def.set_key('key1')
+        c_never.set_key('key1')
+        c_none.set_key('key1')
+
+        self.session.begin_transaction('read_timestamp=' + timestamp_str(1))
+        c_always.search()
+        c_def.search()
+        c_none.search()
+        self.assertEqual(c_always.get_value(), 'value1')
+        self.assertEqual(c_def.get_value(), 'value1')
+        self.assertEqual(c_none.get_value(), 'value1')
+
+        msg = "/timestamp set on this transaction/"
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda:self.assertEquals(c_never.search(), 0), msg)
+        self.session.commit_transaction()
+        c_always.close()
+        c_def.close()
+        c_never.close()
+        c_none.close()
+
+        # Read in a transaction without a timestamp.
+        c_always = self.session.open_cursor(uri_always)
+        c_def = self.session.open_cursor(uri_def)
+        c_never = self.session.open_cursor(uri_never)
+        c_none = self.session.open_cursor(uri_none)
+
+        c_always.set_key('key1')
+        c_def.set_key('key1')
+        c_never.set_key('key1')
+        c_none.set_key('key1')
+
+        self.session.begin_transaction()
+        c_never.search()
+        c_def.search()
+        c_none.search()
+        self.assertEqual(c_never.get_value(), 'value1')
+        self.assertEqual(c_def.get_value(), 'value1')
+        self.assertEqual(c_none.get_value(), 'value1')
+
+        msg = "/none set on this transaction/"
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda:self.assertEquals(c_always.search(), 0), msg)
+        self.session.commit_transaction()
+        c_always.close()
+        c_def.close()
+        c_never.close()
+        c_none.close()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_assert03.py b/src/third_party/wiredtiger/test/suite/test_assert03.py
new file mode 100644
index 00000000000..36d4936a82e
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_assert03.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_assert03.py
+# Test changing assert setting via alter.
+#
+
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+
+class test_assert03(wttest.WiredTigerTestCase, suite_subprocess):
+    conn_config = 'log=(enabled)'
+    base_uri = 'file:assert03.wt'
+    cfg = 'key_format=S,value_format=S'
+    always = 'assert=(commit_timestamp=always)'
+    never = 'assert=(commit_timestamp=never)'
+    none = 'assert=(commit_timestamp=none)'
+
+    def test_assert03(self):
+
+        #if not wiredtiger.timestamp_build() or not wiredtiger.diagnostic_build():
+        #    self.skipTest('requires a timestamp and diagnostic build')
+        if not wiredtiger.timestamp_build():
+            self.skipTest('requires a timestamp build')
+
+        # Create a data item at the default setting
+        self.session.create(self.base_uri, self.cfg)
+        c = self.session.open_cursor(self.base_uri)
+        self.session.begin_transaction()
+        c['key0'] = 'value0'
+        self.session.commit_transaction()
+        c.close()
+
+        # Now rotate through the alter settings and verify the data.
+        # The always setting should fail.
+        self.session.alter(self.base_uri, self.always)
+        c = self.session.open_cursor(self.base_uri)
+        self.session.begin_transaction()
+        c['key1'] = 'value1'
+        msg = "/none set on this transaction/"
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda:self.assertEquals(self.session.commit_transaction(), 0), msg)
+        c.close()
+
+        # The never and none settings should succeed.
+        self.session.alter(self.base_uri, self.never)
+        c = self.session.open_cursor(self.base_uri)
+        self.session.begin_transaction()
+        c['key2'] = 'value2'
+        self.session.commit_transaction()
+        c.close()
+
+        self.session.alter(self.base_uri, self.none)
+        c = self.session.open_cursor(self.base_uri)
+        self.session.begin_transaction()
+        c['key3'] = 'value3'
+        self.session.commit_transaction()
+        c.close()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_backup07.py b/src/third_party/wiredtiger/test/suite/test_backup07.py
new file mode 100644
index 00000000000..8332815b0ca
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_backup07.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+import os, shutil
+from helper import compare_files
+from suite_subprocess import suite_subprocess
+from wtdataset import simple_key
+from wtscenario import make_scenarios
+
+# test_backup07.py
+# Test cursor backup with target URIs, logging and create during backup
+
+class test_backup07(wttest.WiredTigerTestCase, suite_subprocess):
+    dir='backup.dir'                    # Backup directory name
+    logmax="100K"
+    newuri="table:newtable"
+
+    pfx = 'test_backup'
+    scenarios = make_scenarios([
+        ('table', dict(uri='table:test',dsize=100,nops=100,nthreads=1)),
+    ])
+
+    # Create a large cache, otherwise this test runs quite slowly.
+    def conn_config(self):
+        return 'cache_size=1G,log=(archive=false,enabled,file_max=%s)' % \
+            self.logmax
+
+    # Run background inserts while running checkpoints and incremental backups
+    # repeatedly.
+    def test_backup07(self):
+        log2 = "WiredTigerLog.0000000002"
+
+        self.session.create(self.uri, "key_format=S,value_format=S")
+
+        # Insert small amounts of data at a time stopping just after we
+        # cross into log file 2.  That way we can add more operations into
+        # log file 2 during the full backup.
+        loop = 0
+        c = self.session.open_cursor(self.uri)
+        while not os.path.exists(log2):
+            for i in range(0, self.nops):
+                num = i + (loop * self.nops)
+                key = 'key' + str(num)
+                val = 'value' + str(num)
+                c[key] = val
+            loop += 1
+
+        # Test a potential bug in full backups and creates.
+        # We allow creates during backup because the file doesn't exist
+        # when the backup metadata is created on cursor open and the newly
+        # created file is not in the cursor list.  However, if using logging
+        # and the create and inserts/updates appear in a log file copied,
+        # then currently there will be an error opening the backup directory.
+
+        # Open up the backup cursor, create and add data to a new table
+        # and then copy the files.
+        os.mkdir(self.dir)
+        bkup_c = self.session.open_cursor('backup:', None, None)
+
+        # Now create and populate the new table. Make sure the log records
+        # are on disk and will be copied to the backup.
+        self.session.create(self.newuri, "key_format=S,value_format=S")
+        c = self.session.open_cursor(self.newuri)
+        for i in range(0, self.nops):
+            key = 'key' + str(i)
+            val = 'value' + str(i)
+            c[key] = val
+        c.close()
+        self.session.log_flush('sync=on')
+
+        # Now copy the files returned by the backup cursor.  This will
+        # include the log file that has updates for the newly created table.
+        while True:
+            ret = bkup_c.next()
+            if ret != 0:
+                break
+            newfile = bkup_c.get_key()
+            sz = os.path.getsize(newfile)
+            self.pr('Copy from: ' + newfile + ' (' + str(sz) + ') to ' + self.dir)
+            shutil.copy(newfile, self.dir)
+        self.assertEqual(ret, wiredtiger.WT_NOTFOUND)
+        bkup_c.close()
+
+        # After the full backup, open and recover the backup database.
+        # Make sure we properly recover even though the log file will have
+        # records for the newly created table file id.
+        backup_conn = self.wiredtiger_open(self.dir)
+        backup_conn.close()
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_las.py b/src/third_party/wiredtiger/test/suite/test_las.py
index d0bd1d108fa..52a0b2d7300 100644
--- a/src/third_party/wiredtiger/test/suite/test_las.py
+++ b/src/third_party/wiredtiger/test/suite/test_las.py
@@ -26,16 +26,53 @@
 # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 # OTHER DEALINGS IN THE SOFTWARE.
 
+from helper import copy_wiredtiger_home
 import wiredtiger, wttest
 from wtdataset import SimpleDataSet
 
+def timestamp_str(t):
+    return '%x' % t
+
 # test_las.py
-#       Smoke tests to ensure lookaside tables are working.
+# Smoke tests to ensure lookaside tables are working.
 class test_las(wttest.WiredTigerTestCase):
     # Force a small cache.
     def conn_config(self):
         return 'cache_size=1GB'
 
+    def large_updates(self, session, uri, value, ds, nrows, timestamp=False):
+        # Insert a large number of records, we'll hang if the lookaside table
+        # isn't doing its thing.
+        cursor = session.open_cursor(uri)
+        for i in range(1, 1000000):
+            if timestamp == True:
+                session.begin_transaction()
+            cursor.set_key(ds.key(nrows + i))
+            cursor.set_value(value)
+            self.assertEquals(cursor.update(), 0)
+            if timestamp == True:
+                session.commit_transaction('commit_timestamp=' + timestamp_str(i + 1))
+        cursor.close()
+
+    def durable_check(self, check_value, uri, ds, nrows):
+        # Checkpoint and backup so as to simulate recovery
+        self.session.checkpoint()
+        newdir = "BACKUP"
+        copy_wiredtiger_home('.', newdir, True)
+
+        conn = self.setUpConnectionOpen(newdir)
+        session = self.setUpSessionOpen(conn)
+        cursor = session.open_cursor(uri, None)
+        # Skip the initial rows, which were not updated
+        for i in range(0, nrows+1):
+            self.assertEquals(cursor.next(), 0)
+        #print "Check value : " + str(check_value)
+        #print "value : " + str(cursor.get_value())
+        self.assertTrue(check_value == cursor.get_value())
+        cursor.close()
+        session.close()
+        conn.close()
+
     @wttest.longtest('lookaside table smoke test')
     def test_las(self):
         # Create a small table.
@@ -43,18 +80,49 @@ class test_las(wttest.WiredTigerTestCase):
         nrows = 100
         ds = SimpleDataSet(self, uri, nrows, key_format="S")
         ds.populate()
+        bigvalue = "aaaaa" * 100
 
-        # Take a snapshot.
+        # Initially load huge data
+        cursor = self.session.open_cursor(uri)
+        for i in range(1, 1000000):
+            cursor.set_key(ds.key(nrows + i))
+            cursor.set_value(bigvalue)
+            self.assertEquals(cursor.insert(), 0)
+        cursor.close()
+        self.session.checkpoint()
+
+        # Scenario: 1
+        # Check to see LAS working with old snapshot
+        bigvalue1 = "bbbbb" * 100
         self.session.snapshot("name=xxx")
+        # Update the values in different session after snapshot
+        self.large_updates(self.session, uri, bigvalue1, ds, nrows)
+        # Check to see the value after recovery
+        self.durable_check(bigvalue1, uri, ds, nrows)
+        self.session.snapshot("drop=(all)")
 
-        # Insert a large number of records, we'll hang if the lookaside table
-        # isn't doing its thing.
-        c = self.session.open_cursor(uri)
-        bigvalue = "abcde" * 100
-        for i in range(1, 1000000):
-            c.set_key(ds.key(nrows + i))
-            c.set_value(bigvalue)
-            self.assertEquals(c.insert(), 0)
+        # Scenario: 2
+        # Check to see LAS working with old reader
+        bigvalue2 = "ccccc" * 100
+        session2 = self.conn.open_session()
+        session2.begin_transaction('isolation=snapshot')
+        self.large_updates(self.session, uri, bigvalue2, ds, nrows)
+        # Check to see the value after recovery
+        self.durable_check(bigvalue2, uri, ds, nrows)
+        session2.rollback_transaction()
+        session2.close()
+
+        # Scenario: 3
+        # Check to see LAS working with old timestamp
+        bigvalue3 = "ddddd" * 100
+        self.conn.set_timestamp('stable_timestamp=' + timestamp_str(1))
+        self.large_updates(self.session, uri, bigvalue3, ds, nrows, timestamp=True)
+        # Check to see data can be see only till the stable_timestamp
+        self.durable_check(bigvalue2, uri, ds, nrows)
+
+        self.conn.set_timestamp('stable_timestamp=' + timestamp_str(i + 1))
+        # Check to see latest data can be seen
+        self.durable_check(bigvalue3, uri, ds, nrows)
 
 if __name__ == '__main__':
     wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp01.py b/src/third_party/wiredtiger/test/suite/test_timestamp01.py
index c7a5df66ae0..09a264e2afd 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp01.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp01.py
@@ -61,12 +61,40 @@ class test_timestamp01(wttest.WiredTigerTestCase, suite_subprocess):
                 'commit_timestamp=' + timestamp_str(1 << 5000)),
                 '/too long/')
 
-        # One is okay, as is 2**64 - 1
+        # Anything other than lower case hexadecimal characters is not permitted
+        self.session.begin_transaction()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.commit_transaction(
+                'commit_timestamp=' + timestamp_str(-1)),
+                '/Failed to parse commit timestamp/')
+
+        self.session.begin_transaction()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.commit_transaction(
+                'commit_timestamp=' + 'a/78f'),
+                '/Failed to parse commit timestamp/')
+
+        self.session.begin_transaction()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.commit_transaction(
+                'commit_timestamp=' + 'a`78f'),
+                '/Failed to parse commit timestamp/')
+
+        self.session.begin_transaction()
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
+            lambda: self.session.commit_transaction(
+                'commit_timestamp=' + 'a{78f'),
+                '/Failed to parse commit timestamp/')
+
+        # One is okay, as is upper case hex and 2**64 - 1
         self.session.begin_transaction()
         self.session.commit_transaction(
             'commit_timestamp=' + timestamp_str(1))
         self.session.begin_transaction()
         self.session.commit_transaction(
+            'commit_timestamp=0A78F')
+        self.session.begin_transaction()
+        self.session.commit_transaction(
             'commit_timestamp=' + timestamp_str(1 << 64 - 1))
 
 if __name__ == '__main__':
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp02.py b/src/third_party/wiredtiger/test/suite/test_timestamp02.py
index 31bea22ec66..f928dbc184f 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp02.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp02.py
@@ -38,12 +38,6 @@ from wtscenario import make_scenarios
 def timestamp_str(t):
     return '%x' % t
 
-def timestamp_ret_str(t):
-    s = timestamp_str(t)
-    if len(s) % 2 == 1:
-        s = '0' + s
-    return s
-
 class test_timestamp02(wttest.WiredTigerTestCase, suite_subprocess):
     tablename = 'test_timestamp02'
     uri = 'table:' + tablename
@@ -98,7 +92,7 @@ class test_timestamp02(wttest.WiredTigerTestCase, suite_subprocess):
                 dict((k, 1) for k in orig_keys[:i+1]))
 
         # Everything up to and including timestamp 100 has been committed.
-        self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(100))
+        self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(100))
 
         # Bump the oldest timestamp, we're not going back...
         self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(100))
@@ -111,11 +105,11 @@ class test_timestamp02(wttest.WiredTigerTestCase, suite_subprocess):
             self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + 100))
 
         # Everything up to and including timestamp 200 has been committed.
-        self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(200))
+        self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(200))
 
         # Test that we can manually move the commit timestamp back
         self.conn.set_timestamp('commit_timestamp=' + timestamp_str(150))
-        self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(150))
+        self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(150))
         self.conn.set_timestamp('commit_timestamp=' + timestamp_str(200))
 
         # Now the stable timestamp before we read.
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp03.py b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
index 9caf597e6ed..1a2511ea6ee 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp03.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
@@ -39,12 +39,6 @@ from wtscenario import make_scenarios
 def timestamp_str(t):
     return '%x' % t
 
-def timestamp_ret_str(t):
-    s = timestamp_str(t)
-    if len(s) % 2 == 1:
-        s = '0' + s
-    return s
-
 class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
     table_ts_log     = 'ts03_ts_logged'
     table_ts_nolog   = 'ts03_ts_nologged'
@@ -226,7 +220,7 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
                 self.table_nots_nolog, dict((k, self.value) for k in orig_keys))
 
         # Bump the oldest_timestamp, we're not going back...
-        self.assertEqual(self.conn.query_timestamp(), timestamp_ret_str(100))
+        self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(100))
         old_ts = timestamp_str(100)
         self.conn.set_timestamp('oldest_timestamp=' + old_ts)
         self.conn.set_timestamp('stable_timestamp=' + old_ts)
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index a52675daf8b..f7052448208 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -37,12 +37,6 @@ from wtscenario import make_scenarios
 def timestamp_str(t):
     return '%x' % t
 
-def timestamp_ret_str(t):
-    s = timestamp_str(t)
-    if len(s) % 2 == 1:
-        s = '0' + s
-    return s
-
 class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
     table_ts_log     = 'table:ts04_ts_logged'
     table_ts_nolog   = 'table:ts04_ts_nologged'
@@ -61,6 +55,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
         ('col_var', dict(empty=0, cacheSize='cache_size=20MB', extra_config=',key_format=r')),
         ('lsm', dict(empty=0, cacheSize='cache_size=31MB', extra_config=',type=lsm')),
         ('row', dict(empty=0, cacheSize='cache_size=20MB', extra_config='',)),
+        ('row-smallcache', dict(empty=0, cacheSize='cache_size=2MB', extra_config='',)),
     ]
 
     scenarios = make_scenarios(conncfg, types)
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp05.py b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
index d7131cb2004..f145184146c 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp05.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
@@ -39,12 +39,6 @@ from wtscenario import make_scenarios
 def timestamp_str(t):
     return '%x' % t
 
-def timestamp_ret_str(t):
-    s = timestamp_str(t)
-    if len(s) % 2 == 1:
-        s = '0' + s
-    return s
-
 class test_timestamp05(wttest.WiredTigerTestCase, suite_subprocess):
     uri = 'table:ts05'
 
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp07.py b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
index 12b36bdc2f8..09547dba3a7 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp07.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
@@ -56,8 +56,8 @@ class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
 
     nkeys = [
         ('100keys', dict(nkeys=100)),
-#        ('500keys', dict(nkeys=500)),
-#        ('1000keys', dict(nkeys=1000)),
+        ('500keys', dict(nkeys=500)),
+        ('1000keys', dict(nkeys=1000)),
     ]
 
     scenarios = make_scenarios(types, conncfg, nkeys)
@@ -68,19 +68,20 @@ class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
     value3 = u'\u0001\u0002cdef\u0007\u0004'
 
     # Check that a cursor (optionally started in a new transaction), sees the
-    # expected values.
-    def check(self, session, txn_config, expected):
+    # expected value for a key
+    def check(self, session, txn_config, k, expected):
         if txn_config:
             session.begin_transaction(txn_config)
         c = session.open_cursor(self.uri + self.tablename, None)
-        actual = dict((k, v) for k, v in c if v != 0)
-        self.assertTrue(actual == expected)
-        # Search for the expected items as well as iterating
-        for k, v in expected.iteritems():
-            self.assertEqual(c[k], v, "for key " + str(k))
+        if not expected:
+            c.set_key(k)
+            self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
+        else:
+            self.assertEqual(c[k], expected)
         c.close()
         if txn_config:
             session.commit_transaction()
+
     #
     # Take a backup of the database and verify that the value we want to
     # check exists in the tables the expected number of times.
@@ -168,12 +169,14 @@ class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
 
         # Now check that we see the expected state when reading at each
         # timestamp.
-        for i, t in enumerate(orig_keys):
-            self.check(self.session, 'read_timestamp=' + timestamp_str(t),
-                dict((k, self.value) for k in orig_keys[:i+1]))
+        for k in orig_keys:
+            self.check(self.session, 'read_timestamp=' + timestamp_str(k),
+                k, self.value)
+            self.check(self.session, 'read_timestamp=' + timestamp_str(k),
+                k + 1, None)
 
         # Bump the oldest timestamp, we're not going back...
-        self.assertEqual(self.conn.query_timestamp(), timestamp_str(self.nkeys))
+        self.assertTimestampsEqual(self.conn.query_timestamp(), timestamp_str(self.nkeys))
         self.oldts = timestamp_str(self.nkeys)
         self.conn.set_timestamp('oldest_timestamp=' + self.oldts)
         self.conn.set_timestamp('stable_timestamp=' + self.oldts)
@@ -201,12 +204,8 @@ class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
 
         # Take a checkpoint using the given configuration.  Then verify
         # whether value2 appears in a copy of that data or not.
-        valcnt2 = valcnt3 = self.nkeys
-        valcnt = 0
-        # If logging is disabled then value2 should not appear in logged table.
-        if self.using_log == False:
-            valcnt3 = 0
-        self.ckpt_backup(self.value2, valcnt, valcnt2, valcnt3)
+        self.ckpt_backup(self.value2, 0, self.nkeys, self.nkeys if self.using_log else 0)
+
         # Update the stable timestamp to the latest, but not the oldest
         # timestamp and make sure we can see the data.  Once the stable
         # timestamp is moved we should see all keys with value2.
@@ -245,9 +244,7 @@ class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
         # of that data or not.  Both tables that are logged should see
         # all the data regardless of timestamps.  The table that is not
         # logged should not see any of it.
-        valcnt = 0
-        valcnt2 = valcnt3 = self.nkeys
-        self.backup_check(self.value3, valcnt, valcnt2, valcnt3)
+        self.backup_check(self.value3, 0, self.nkeys, self.nkeys)
 
 if __name__ == '__main__':
     wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp09.py b/src/third_party/wiredtiger/test/suite/test_timestamp09.py
index 41a6909cbef..b79521329e7 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp09.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp09.py
@@ -114,8 +114,7 @@ class test_timestamp09(wttest.WiredTigerTestCase, suite_subprocess):
         # Oldest timestamp is 3 at the moment, trying to set it to an earlier
         # timestamp is a no-op.
         self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(1))
-        self.assertEqual(int(self.conn.query_timestamp('get=oldest')),
-            int(timestamp_str(3)))
+        self.assertTimestampsEqual(self.conn.query_timestamp('get=oldest'), timestamp_str(3))
 
         self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(3) +
             ',stable_timestamp=' + timestamp_str(3))
@@ -123,8 +122,7 @@ class test_timestamp09(wttest.WiredTigerTestCase, suite_subprocess):
         # Stable timestamp is 5 at the moment, trying to set it to an earlier
         # timestamp is a no-op.
         self.conn.set_timestamp('stable_timestamp=' + timestamp_str(4))
-        self.assertEqual(int(self.conn.query_timestamp('get=stable')),
-            int(timestamp_str(5)))
+        self.assertTimestampsEqual(self.conn.query_timestamp('get=stable'), timestamp_str(5))
 
         self.conn.set_timestamp('oldest_timestamp=' + timestamp_str(5))
         self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
diff --git a/src/third_party/wiredtiger/test/suite/test_txn14.py b/src/third_party/wiredtiger/test/suite/test_txn14.py
index 7579bbc8e54..2245f49ae85 100644
--- a/src/third_party/wiredtiger/test/suite/test_txn14.py
+++ b/src/third_party/wiredtiger/test/suite/test_txn14.py
@@ -93,10 +93,11 @@ class test_txn14(wttest.WiredTigerTestCase, suite_subprocess):
         c.close()
         self.session.log_flush(cfgarg)
         if self.sync == 'background':
-            # If doing a background flush, wait a few seconds.  I have
-            # seen an individual log file's fsync take more than a second
-            # on some systems.  So give it time to flush perhaps a few files.
-            self.session.transaction_sync('timeout_ms=4000')
+            # If doing a background flush, wait 10 seconds. I have seen an
+            # individual log file's fsync take more than a second on some
+            # systems, and we've seen timeouts at lower levels on systems
+            # with slow I/O. So give it time to flush perhaps a few files.
+            self.session.transaction_sync('timeout_ms=10000')
         self.simulate_crash_restart(".", "RESTART")
         c = self.session.open_cursor(self.t1, None, None)
         i = 0
diff --git a/src/third_party/wiredtiger/test/suite/wttest.py b/src/third_party/wiredtiger/test/suite/wttest.py
index 1c95eb355ae..c654370718c 100644
--- a/src/third_party/wiredtiger/test/suite/wttest.py
+++ b/src/third_party/wiredtiger/test/suite/wttest.py
@@ -490,6 +490,12 @@ class WiredTigerTestCase(unittest.TestCase):
             with self.expectedStderr(message):
                 self.assertRaises(exceptionType, expr)
 
+    def assertTimestampsEqual(self, ts1, ts2):
+        """
+        TestCase.assertEqual() for timestamps
+        """
+        self.assertEqual(int(ts1, 16), int(ts2, 16))
+
     def exceptionToStderr(self, expr):
         """
         Used by assertRaisesHavingMessage to convert an expression
author	Alex Gorrod <alexander.gorrod@mongodb.com>	2017-10-10 16:29:49 +1100
committer	Alex Gorrod <alexander.gorrod@mongodb.com>	2017-10-10 16:37:55 +1100
commit	39998ac6928c4e7f3acd2f7ee2fc5fb4df056c18 (patch)
tree	c075233cd32c6ec0205af77db475836c0fba60e9 /src/third_party
parent	dd094ce1bc1fb424ccc6dd71939e5c7a30159e2e (diff)
download	mongo-39998ac6928c4e7f3acd2f7ee2fc5fb4df056c18.tar.gz