13 files changed, 1283 insertions, 21 deletions
diff --git a/src/third_party/wiredtiger/test/csuite/time_shift_test.sh b/src/third_party/wiredtiger/test/csuite/time_shift_test.sh
new file mode 100755
index 00000000000..ae06fd03f36
--- /dev/null
+++ b/src/third_party/wiredtiger/test/csuite/time_shift_test.sh
@@ -0,0 +1,116 @@
+#! /bin/sh
+
+set -e
+
+# the purpose of this test is to ensure we use monotonic clock instead of
+# realtime clock in our code. we had the instances where WT is hanging when
+# system clock shifts (for eg: due to NTP servers). this test calculates
+# the execution time of a test(test_rwlock), shifts the clock -vely by that
+# time period and reexecutes the test. if the difference in the two execution
+# times is less than 20% test is considered passed. 20% is selected, based on
+# assumption that other factors of the environment will influence the execution
+# time by less than 20%.
+
+
+# need to enable long tests to run test_rwlock 
+export TESTUTIL_ENABLE_LONG_TESTS=1
+
+# We will run only when long tests are enabled.
+test "$TESTUTIL_ENABLE_LONG_TESTS" = "1" || exit 0
+
+EXIT_SUCCESS=0
+EXIT_FAILURE=1
+
+export DONT_FAKE_MONOTONIC=1
+RUN_OS=$(uname -s)
+
+# linux we run with cpu affinity, to control the execution time
+# if we don't control the execution time this test is not effective
+CPU_SET=0-1
+echo "test read write lock for time shifting using libfaketime"
+
+
+# check for program arguements, if not present, print usage
+if [ -z $1 ]
+then
+    echo "fail : this test needs libfaketime library with path"
+    echo "Usage :"
+    echo "       " $0 " <libpath> [cpuset] "
+    echo "         libpath : path to libfaketime library"
+    echo "         cpuset  : set of cpu's to be used for taskset on linux"
+    echo "                 : default is 0-1 "
+    exit $EXIT_FAILURE
+fi
+
+# check for the existence of dependent library
+if [ ! -r $1 ]
+then
+    echo "fail : $1 , libfaketime library is not readable"
+    exit $EXIT_FAILURE
+fi
+
+SEC1=`date +%s`
+if [ "$RUN_OS" = "Darwin" ]
+then
+    ./test_rwlock
+elif [ "$RUN_OS" = "Linux" ]
+then
+    if [ -z $2 ]
+    then
+        echo "default taskset value is 0-1"
+    else
+        CPU_SET=$2
+    fi
+    taskset -c $CPU_SET ./test_rwlock
+else
+    echo "not able to decide running OS, so exiting"
+    exit $EXIT_FAILURE
+fi
+
+SEC2=`date +%s`
+DIFF1=$((SEC2 - SEC1))
+
+# preload libfaketime
+if [ "$RUN_OS" = "Darwin" ]
+then
+    export DYLD_FORCE_FLAT_NAMESPACE=y
+    export DYLD_INSERT_LIBRARIES=$1
+    ./test_rwlock &
+else
+    LD_PRELOAD=$1 taskset -c $CPU_SET ./test_rwlock &
+fi
+
+# get pid of test run in background
+PID=$!
+
+sleep 5s
+echo "-$DIFF1""s" >| ~/.faketimerc
+
+wait $PID
+
+#kept echo statement here so as not to loose in cluster of test msgs. 
+echo "after sleeping for 5 seconds set ~/.faketimerc value as -ve $DIFF1 seconds"
+rm ~/.faketimerc
+
+if [ "$RUN_OS" = "Darwin" ]
+then
+    export DYLD_FORCE_FLAT_NAMESPACE=
+    export DYLD_INSERT_LIBRARIES=
+fi
+SEC3=`date +%s`
+DIFF2=$((SEC3 - SEC2))
+
+PERC=$((((DIFF2 - DIFF1)*100)/DIFF1)) 
+echo "execution time difference : $PERC %, less than 20% is ok"
+echo "normal execution time : $DIFF1 seconds"
+echo "fake time reduction by : $DIFF1 seconds"
+echo "execution time with -ve time shift : $DIFF2 seconds"
+
+if [ "$PERC" -le 20 ]
+then
+   echo "pass : execution time is affected $PERC % by -ve time shift"
+   exit $EXIT_SUCCESS
+else
+   echo "fail : execution time is affected $PERC % by -ve time shift"
+   exit $EXIT_FAILURE
+fi
diff --git a/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c b/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
index d007eb65382..9cb1ab0f4c6 100644
--- a/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
+++ b/src/third_party/wiredtiger/test/csuite/wt3363_checkpoint_op_races/main.c
@@ -81,7 +81,7 @@ main(int argc, char *argv[])
 	 * This test should not run unless long tests flag is set. The test
 	 * runs for 15 minutes.
 	 */
-	if (!testutil_is_flag_set("WT3363_CHECKPOINT_OP_RACES"))
+	if (!testutil_is_flag_set("TESTUTIL_ENABLE_TIMING_TESTS"))
 		return (EXIT_SUCCESS);
 
 	opts = &_opts;
diff --git a/src/third_party/wiredtiger/test/format/backup.c b/src/third_party/wiredtiger/test/format/backup.c
index 47f3c54325f..0dc7402e181 100644
--- a/src/third_party/wiredtiger/test/format/backup.c
+++ b/src/third_party/wiredtiger/test/format/backup.c
@@ -36,6 +36,7 @@ static void
 check_copy(void)
 {
 	WT_CONNECTION *conn;
+	WT_DECL_RET;
 	WT_SESSION *session;
 
 	wts_open(g.home_backup, false, &conn);
@@ -44,9 +45,14 @@ check_copy(void)
 	    conn->open_session(conn, NULL, NULL, &session),
 	    "%s", g.home_backup);
 
-	testutil_checkfmt(
-	    session->verify(session, g.uri, NULL),
-	    "%s: %s", g.home_backup, g.uri);
+	/*
+	 * Verify can return EBUSY if the handle isn't available. Don't yield
+	 * and retry, in the case of LSM, the handle may not be available for
+	 * a long time.
+	 */
+	ret = session->verify(session, g.uri, NULL);
+	testutil_assertfmt(ret == 0 || ret == EBUSY,
+	    "WT_SESSION.verify: %s: %s", g.home_backup, g.uri);
 
 	testutil_checkfmt(conn->close(conn, NULL), "%s", g.home_backup);
 }
diff --git a/src/third_party/wiredtiger/test/format/wts.c b/src/third_party/wiredtiger/test/format/wts.c
index 6a58cad5403..031e3bb25af 100644
--- a/src/third_party/wiredtiger/test/format/wts.c
+++ b/src/third_party/wiredtiger/test/format/wts.c
@@ -584,10 +584,14 @@ wts_verify(const char *tag)
 		testutil_check(conn->set_timestamp(conn, config_buf));
 	}
 
-	/* Session operations for LSM can return EBUSY. */
+	/*
+	 * Verify can return EBUSY if the handle isn't available. Don't yield
+	 * and retry, in the case of LSM, the handle may not be available for
+	 * a long time.
+	 */
 	ret = session->verify(session, g.uri, "strict");
-	if (ret != 0 && !(ret == EBUSY && DATASOURCE("lsm")))
-		testutil_die(ret, "session.verify: %s: %s", g.uri, tag);
+	testutil_assertfmt(
+	    ret == 0 || ret == EBUSY, "session.verify: %s: %s", g.uri, tag);
 
 	if (g.logging != 0)
 		(void)g.wt_api->msg_printf(g.wt_api, session,
diff --git a/src/third_party/wiredtiger/test/recovery/Makefile.am b/src/third_party/wiredtiger/test/recovery/Makefile.am
index 3e7fce17d0e..298b9a995b8 100644
--- a/src/third_party/wiredtiger/test/recovery/Makefile.am
+++ b/src/third_party/wiredtiger/test/recovery/Makefile.am
@@ -2,12 +2,17 @@ AM_CPPFLAGS = -I$(top_builddir)
 AM_CPPFLAGS +=-I$(top_srcdir)/src/include
 AM_CPPFLAGS +=-I$(top_srcdir)/test/utility
 
-noinst_PROGRAMS = random-abort truncated-log
+noinst_PROGRAMS = random-abort timestamp-abort truncated-log
 random_abort_SOURCES = random-abort.c
 random_abort_LDADD = $(top_builddir)/test/utility/libtest_util.la
 random_abort_LDADD +=$(top_builddir)/libwiredtiger.la
 random_abort_LDFLAGS = -static
 
+timestamp_abort_SOURCES = timestamp-abort.c
+timestamp_abort_LDADD = $(top_builddir)/test/utility/libtest_util.la
+timestamp_abort_LDADD +=$(top_builddir)/libwiredtiger.la
+timestamp_abort_LDFLAGS = -static
+
 truncated_log_SOURCES = truncated-log.c
 truncated_log_LDADD = $(top_builddir)/test/utility/libtest_util.la
 truncated_log_LDADD +=$(top_builddir)/libwiredtiger.la
diff --git a/src/third_party/wiredtiger/test/recovery/smoke.sh b/src/third_party/wiredtiger/test/recovery/smoke.sh
index ba4d77c642b..6587c7c9f98 100755
--- a/src/third_party/wiredtiger/test/recovery/smoke.sh
+++ b/src/third_party/wiredtiger/test/recovery/smoke.sh
@@ -8,4 +8,8 @@ $TEST_WRAPPER ./random-abort -t 10 -T 5
 $TEST_WRAPPER ./random-abort -m -t 10 -T 5
 $TEST_WRAPPER ./random-abort -C -t 10 -T 5
 $TEST_WRAPPER ./random-abort -C -m -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -m -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -C -t 10 -T 5
+$TEST_WRAPPER ./timestamp-abort -C -m -t 10 -T 5
 $TEST_WRAPPER ./truncated-log
diff --git a/src/third_party/wiredtiger/test/recovery/timestamp-abort.c b/src/third_party/wiredtiger/test/recovery/timestamp-abort.c
new file mode 100644
index 00000000000..7e912b1fe26
--- /dev/null
+++ b/src/third_party/wiredtiger/test/recovery/timestamp-abort.c
@@ -0,0 +1,722 @@
+/*-
+ * Public Domain 2014-2017 MongoDB, Inc.
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "test_util.h"
+
+#include <sys/wait.h>
+#include <signal.h>
+
+static char home[1024];			/* Program working dir */
+
+/*
+ * Create three tables that we will write the same data to and verify that
+ * all the types of usage have the expected data in them after a crash and
+ * recovery.  We want:
+ * 1. A table that is logged and is not involved in timestamps.  This table
+ * simulates a user local table.
+ * 2. A table that is logged and involved in timestamps.  This simulates
+ * the oplog.
+ * 3. A table that is not logged and involved in timestamps.  This simulates
+ * a typical collection file.
+ *
+ * We also create a fourth table that is not logged and not involved directly
+ * in timestamps to store the stable timestamp.  That way we can know what the
+ * latest stable timestamp is on checkpoint.
+ *
+ * We also create several files that are not WiredTiger tables.  The checkpoint
+ * thread creates a file indicating that a checkpoint has completed.  The parent
+ * process uses this to know when at least one checkpoint is done and it can
+ * start the timer to abort.
+ *
+ * Each worker thread creates its own records file that records the data it
+ * inserted and it records the timestamp that was used for that insertion.
+ */
+static const char * const uri_local = "table:local";
+static const char * const uri_oplog = "table:oplog";
+static const char * const uri_collection = "table:collection";
+
+static const char * const stable_store = "table:stable";
+static const char * const ckpt_file = "checkpoint_done";
+static bool compat, inmem, use_ts;
+static uint64_t global_ts = 1;
+
+#define	MAX_TH		12
+#define	MAX_TIME	40
+#define	MIN_TH		5
+#define	MIN_TIME	10
+#define	RECORDS_FILE	"records-%" PRIu32
+#define	STABLE_PERIOD	100
+
+#define	ENV_CONFIG_COMPAT	",compatibility=(release=\"2.9\")"
+#define	ENV_CONFIG_DEF						\
+    "create,log=(archive=false,file_max=10M,enabled)"
+#define	ENV_CONFIG_TXNSYNC					\
+    "create,log=(archive=false,file_max=10M,enabled),"			\
+    "transaction_sync=(enabled,method=none)"
+#define	ENV_CONFIG_REC "log=(archive=false,recover=on)"
+
+#define	MAX_CKPT_INTERVAL 5	/* Maximum interval between checkpoints */
+#define	MAX_VAL	1024
+
+static void usage(void)
+    WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
+usage(void)
+{
+	fprintf(stderr,
+	    "usage: %s [-h dir] [-T threads] [-t time] [-Cmvz]\n", progname);
+	exit(EXIT_FAILURE);
+}
+
+typedef struct {
+	WT_CONNECTION *conn;
+	uint64_t start;
+	uint32_t id;
+} WT_THREAD_DATA;
+
+/*
+ * thread_ckpt_run --
+ *	Runner function for the checkpoint thread.
+ */
+static WT_THREAD_RET
+thread_ckpt_run(void *arg)
+{
+	FILE *fp;
+	WT_RAND_STATE rnd;
+	WT_SESSION *session;
+	WT_THREAD_DATA *td;
+	uint64_t ts;
+	uint32_t sleep_time;
+	int i, ret;
+	bool first_ckpt;
+
+	__wt_random_init(&rnd);
+
+	td = (WT_THREAD_DATA *)arg;
+	/*
+	 * Keep a separate file with the records we wrote for checking.
+	 */
+	(void)unlink(ckpt_file);
+	if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
+		testutil_die(ret, "WT_CONNECTION:open_session");
+	first_ckpt = true;
+	ts = 0;
+	for (i = 0; ;++i) {
+		sleep_time = __wt_random(&rnd) % MAX_CKPT_INTERVAL;
+		sleep(sleep_time);
+		if (use_ts)
+			ts = global_ts;
+		/*
+		 * Since this is the default, send in this string even if
+		 * running without timestamps.
+		 */
+		testutil_check(session->checkpoint(
+		    session, "use_timestamp=true"));
+		printf("Checkpoint %d complete.  Minimum ts %" PRIu64 "\n",
+		    i, ts);
+		fflush(stdout);
+		/*
+		 * Create the checkpoint file so that the parent process knows
+		 * at least one checkpoint has finished and can start its
+		 * timer.
+		 */
+		if (first_ckpt) {
+			testutil_checksys((fp = fopen(ckpt_file, "w")) == NULL);
+			first_ckpt = false;
+			testutil_checksys(fclose(fp) != 0);
+		}
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * thread_run --
+ *	Runner function for the worker threads.
+ */
+static WT_THREAD_RET
+thread_run(void *arg)
+{
+	FILE *fp;
+	WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable;
+	WT_ITEM data;
+	WT_RAND_STATE rnd;
+	WT_SESSION *session;
+	WT_THREAD_DATA *td;
+	uint64_t i, stable_ts;
+	int ret;
+	char cbuf[MAX_VAL], lbuf[MAX_VAL], obuf[MAX_VAL];
+	char kname[64], tscfg[64];
+
+	__wt_random_init(&rnd);
+	memset(cbuf, 0, sizeof(cbuf));
+	memset(lbuf, 0, sizeof(lbuf));
+	memset(obuf, 0, sizeof(obuf));
+	memset(kname, 0, sizeof(kname));
+
+	td = (WT_THREAD_DATA *)arg;
+	/*
+	 * Set up the separate file for checking.
+	 */
+	testutil_check(__wt_snprintf(cbuf, sizeof(cbuf), RECORDS_FILE, td->id));
+	(void)unlink(cbuf);
+	testutil_checksys((fp = fopen(cbuf, "w")) == NULL);
+	/*
+	 * Set to line buffering.  But that is advisory only.  We've seen
+	 * cases where the result files end up with partial lines.
+	 */
+	__wt_stream_set_line_buffer(fp);
+	if ((ret = td->conn->open_session(td->conn, NULL, NULL, &session)) != 0)
+		testutil_die(ret, "WT_CONNECTION:open_session");
+	/*
+	 * Open a cursor to each table.
+	 */
+	if ((ret = session->open_cursor(session,
+	    uri_collection, NULL, NULL, &cur_coll)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_collection);
+	if ((ret = session->open_cursor(session,
+	    uri_local, NULL, NULL, &cur_local)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_local);
+	if ((ret = session->open_cursor(session,
+	    uri_oplog, NULL, NULL, &cur_oplog)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_oplog);
+
+	if ((ret = session->open_cursor(
+	    session, stable_store, NULL, NULL, &cur_stable)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", stable_store);
+
+	/*
+	 * Write our portion of the key space until we're killed.
+	 */
+	printf("Thread %" PRIu32 " starts at %" PRIu64 "\n", td->id, td->start);
+	for (i = td->start; ; ++i) {
+		if (use_ts)
+			stable_ts = global_ts++;
+		else
+			stable_ts = 0;
+		testutil_check(__wt_snprintf(
+		    kname, sizeof(kname), "%" PRIu64, i));
+
+		testutil_check(session->begin_transaction(session, NULL));
+		cur_coll->set_key(cur_coll, kname);
+		cur_local->set_key(cur_local, kname);
+		cur_oplog->set_key(cur_oplog, kname);
+		/*
+		 * Put an informative string into the value so that it
+		 * can be viewed well in a binary dump.
+		 */
+		testutil_check(__wt_snprintf(cbuf, sizeof(cbuf),
+		    "COLL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
+		    td->id, stable_ts, i));
+		testutil_check(__wt_snprintf(lbuf, sizeof(lbuf),
+		    "LOCAL: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
+		    td->id, stable_ts, i));
+		testutil_check(__wt_snprintf(obuf, sizeof(obuf),
+		    "OPLOG: thread:%" PRIu64 " ts:%" PRIu64 " key: %" PRIu64,
+		    td->id, stable_ts, i));
+		data.size = __wt_random(&rnd) % MAX_VAL;
+		data.data = cbuf;
+		cur_coll->set_value(cur_coll, &data);
+		if ((ret = cur_coll->insert(cur_coll)) != 0)
+			testutil_die(ret, "WT_CURSOR.insert");
+		data.size = __wt_random(&rnd) % MAX_VAL;
+		data.data = obuf;
+		cur_oplog->set_value(cur_oplog, &data);
+		if ((ret = cur_oplog->insert(cur_oplog)) != 0)
+			testutil_die(ret, "WT_CURSOR.insert");
+		if (use_ts) {
+			testutil_check(__wt_snprintf(tscfg, sizeof(tscfg),
+			    "commit_timestamp=%" PRIx64, stable_ts));
+			testutil_check(
+			    session->commit_transaction(session, tscfg));
+		} else
+			testutil_check(
+			    session->commit_transaction(session, NULL));
+		/*
+		 * Insert into the local table outside the timestamp txn.
+		 */
+		data.size = __wt_random(&rnd) % MAX_VAL;
+		data.data = lbuf;
+		cur_local->set_value(cur_local, &data);
+		if ((ret = cur_local->insert(cur_local)) != 0)
+			testutil_die(ret, "WT_CURSOR.insert");
+
+		/*
+		 * Every N records we will record our stable timestamp into the
+		 * stable table.  That will define our threshold where we
+		 * expect to find records after recovery.
+		 */
+		if (i % STABLE_PERIOD == 0) {
+			if (use_ts) {
+				/*
+				 * Set both the oldest and stable timestamp
+				 * so that we don't need to maintain read
+				 * availability at older timestamps.
+				 */
+				testutil_check(__wt_snprintf(
+				    tscfg, sizeof(tscfg),
+				    "oldest_timestamp=%" PRIx64
+				    ",stable_timestamp=%" PRIx64,
+				    stable_ts, stable_ts));
+				testutil_check(
+				    td->conn->set_timestamp(td->conn, tscfg));
+			}
+			cur_stable->set_key(cur_stable, td->id);
+			cur_stable->set_value(cur_stable, stable_ts);
+			testutil_check(cur_stable->insert(cur_stable));
+		}
+		/*
+		 * Save the timestamp and key separately for checking later.
+		 */
+		if (fprintf(fp,
+		    "%" PRIu64 " %" PRIu64 "\n", stable_ts, i) < 0)
+			testutil_die(EIO, "fprintf");
+	}
+	/* NOTREACHED */
+}
+
+/*
+ * Child process creates the database and table, and then creates worker
+ * threads to add data until it is killed by the parent.
+ */
+static void run_workload(uint32_t)
+    WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
+static void
+run_workload(uint32_t nth)
+{
+	WT_CONNECTION *conn;
+	WT_SESSION *session;
+	WT_THREAD_DATA *td;
+	wt_thread_t *thr;
+	uint32_t i;
+	int ret;
+	char envconf[512];
+
+	thr = dcalloc(nth+1, sizeof(*thr));
+	td = dcalloc(nth+1, sizeof(WT_THREAD_DATA));
+	if (chdir(home) != 0)
+		testutil_die(errno, "Child chdir: %s", home);
+	if (inmem)
+		strcpy(envconf, ENV_CONFIG_DEF);
+	else
+		strcpy(envconf, ENV_CONFIG_TXNSYNC);
+	if (compat)
+		strcat(envconf, ENV_CONFIG_COMPAT);
+
+	if ((ret = wiredtiger_open(NULL, NULL, envconf, &conn)) != 0)
+		testutil_die(ret, "wiredtiger_open");
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		testutil_die(ret, "WT_CONNECTION:open_session");
+	/*
+	 * Create all the tables.
+	 */
+	if ((ret = session->create(session, uri_collection,
+		"key_format=S,value_format=u,log=(enabled=false)")) != 0)
+		testutil_die(ret, "WT_SESSION.create: %s", uri_collection);
+	if ((ret = session->create(session,
+	    uri_local, "key_format=S,value_format=u")) != 0)
+		testutil_die(ret, "WT_SESSION.create: %s", uri_local);
+	if ((ret = session->create(session,
+	    uri_oplog, "key_format=S,value_format=u")) != 0)
+		testutil_die(ret, "WT_SESSION.create: %s", uri_oplog);
+	/*
+	 * Don't log the stable timestamp table so that we know what timestamp
+	 * was stored at the checkpoint.
+	 */
+	if ((ret = session->create(session, stable_store,
+	    "key_format=Q,value_format=Q,log=(enabled=false)")) != 0)
+		testutil_die(ret, "WT_SESSION.create: %s", stable_store);
+	if ((ret = session->close(session, NULL)) != 0)
+		testutil_die(ret, "WT_SESSION:close");
+
+	/*
+	 * Thread 0 is the checkpoint thread.
+	 */
+	td[0].conn = conn;
+	td[0].id = 0;
+	printf("Create checkpoint thread\n");
+	testutil_check(__wt_thread_create(
+	    NULL, &thr[0], thread_ckpt_run, &td[0]));
+	for (i = 1; i <= nth; ++i) {
+		td[i].conn = conn;
+		td[i].start = (UINT64_MAX / nth) * (i - 1);
+		td[i].id = i;
+		testutil_check(__wt_thread_create(
+		    NULL, &thr[i], thread_run, &td[i]));
+	}
+	/*
+	 * The threads never exit, so the child will just wait here until
+	 * it is killed.
+	 */
+	printf("Create %" PRIu32 " writer threads\n", nth);
+	fflush(stdout);
+	for (i = 0; i <= nth; ++i)
+		testutil_check(__wt_thread_join(NULL, thr[i]));
+	/*
+	 * NOTREACHED
+	 */
+	free(thr);
+	free(td);
+	exit(EXIT_SUCCESS);
+}
+
+extern int __wt_optind;
+extern char *__wt_optarg;
+
+int
+main(int argc, char *argv[])
+{
+	struct stat sb;
+	FILE *fp;
+	WT_CONNECTION *conn;
+	WT_CURSOR *cur_coll, *cur_local, *cur_oplog, *cur_stable;
+	WT_RAND_STATE rnd;
+	WT_SESSION *session;
+	pid_t pid;
+	uint64_t absent_coll, absent_local, absent_oplog, count, key, last_key;
+	uint64_t first_miss, middle_coll, middle_local, middle_oplog;
+	uint64_t stable_fp, stable_val, val[MAX_TH+1];
+	uint32_t i, nth, timeout;
+	int ch, status, ret;
+	const char *working_dir;
+	char buf[128], fname[64], kname[64], statname[1024];
+	bool fatal, rand_th, rand_time, verify_only;
+
+	(void)testutil_set_progname(argv);
+
+	compat = inmem = false;
+	use_ts = true;
+	nth = MIN_TH;
+	rand_th = rand_time = true;
+	timeout = MIN_TIME;
+	verify_only = false;
+	working_dir = "WT_TEST.timestamp-abort";
+
+	while ((ch = __wt_getopt(progname, argc, argv, "Ch:mT:t:vz")) != EOF)
+		switch (ch) {
+		case 'C':
+			compat = true;
+			break;
+		case 'h':
+			working_dir = __wt_optarg;
+			break;
+		case 'm':
+			inmem = true;
+			break;
+		case 'T':
+			rand_th = false;
+			nth = (uint32_t)atoi(__wt_optarg);
+			break;
+		case 't':
+			rand_time = false;
+			timeout = (uint32_t)atoi(__wt_optarg);
+			break;
+		case 'v':
+			verify_only = true;
+			break;
+		case 'z':
+			use_ts = false;
+			break;
+		default:
+			usage();
+		}
+	argc -= __wt_optind;
+	argv += __wt_optind;
+	if (argc != 0)
+		usage();
+
+	testutil_work_dir_from_path(home, sizeof(home), working_dir);
+	/*
+	 * If the user wants to verify they need to tell us how many threads
+	 * there were so we can find the old record files.
+	 */
+	if (verify_only && rand_th) {
+		fprintf(stderr,
+		    "Verify option requires specifying number of threads\n");
+		exit (EXIT_FAILURE);
+	}
+	if (!verify_only) {
+		testutil_make_work_dir(home);
+
+		__wt_random_init_seed(NULL, &rnd);
+		if (rand_time) {
+			timeout = __wt_random(&rnd) % MAX_TIME;
+			if (timeout < MIN_TIME)
+				timeout = MIN_TIME;
+		}
+		if (rand_th) {
+			nth = __wt_random(&rnd) % MAX_TH;
+			if (nth < MIN_TH)
+				nth = MIN_TH;
+		}
+		printf("Parent: compatibility: %s, "
+		    "in-mem log sync: %s, timestamp in use: %s\n",
+		    compat ? "true" : "false",
+		    inmem ? "true" : "false",
+		    use_ts ? "true" : "false");
+		printf("Parent: Create %" PRIu32
+		    " threads; sleep %" PRIu32 " seconds\n", nth, timeout);
+		/*
+		 * Fork a child to insert as many items.  We will then randomly
+		 * kill the child, run recovery and make sure all items we wrote
+		 * exist after recovery runs.
+		 */
+		testutil_checksys((pid = fork()) < 0);
+
+		if (pid == 0) { /* child */
+			run_workload(nth);
+			return (EXIT_SUCCESS);
+		}
+
+		/* parent */
+		/*
+		 * Sleep for the configured amount of time before killing
+		 * the child.  Start the timeout from the time we notice that
+		 * the file has been created.  That allows the test to run
+		 * correctly on really slow machines.  Verify the process ID
+		 * still exists in case the child aborts for some reason we
+		 * don't stay in this loop forever.
+		 */
+		testutil_check(__wt_snprintf(
+		    statname, sizeof(statname), "%s/%s", home, ckpt_file));
+		while (stat(statname, &sb) != 0 && kill(pid, 0) == 0)
+			sleep(1);
+		sleep(timeout);
+
+		/*
+		 * !!! It should be plenty long enough to make sure more than
+		 * one log file exists.  If wanted, that check would be added
+		 * here.
+		 */
+		printf("Kill child\n");
+		testutil_checksys(kill(pid, SIGKILL) != 0);
+		testutil_checksys(waitpid(pid, &status, 0) == -1);
+	}
+	/*
+	 * !!! If we wanted to take a copy of the directory before recovery,
+	 * this is the place to do it.
+	 */
+	if (chdir(home) != 0)
+		testutil_die(errno, "parent chdir: %s", home);
+	testutil_check(__wt_snprintf(buf, sizeof(buf),
+	    "rm -rf ../%s.SAVE && mkdir ../%s.SAVE && cp -rp * ../%s.SAVE",
+	     home, home, home));
+	(void)system(buf);
+	printf("Open database, run recovery and verify content\n");
+
+	/*
+	 * Open the connection which forces recovery to be run.
+	 */
+	if ((ret = wiredtiger_open(NULL, NULL, ENV_CONFIG_REC, &conn)) != 0)
+		testutil_die(ret, "wiredtiger_open");
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		testutil_die(ret, "WT_CONNECTION:open_session");
+	/*
+	 * Open a cursor on all the tables.
+	 */
+	if ((ret = session->open_cursor(session,
+	    uri_collection, NULL, NULL, &cur_coll)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_collection);
+	if ((ret = session->open_cursor(session,
+	    uri_local, NULL, NULL, &cur_local)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_local);
+	if ((ret = session->open_cursor(session,
+	    uri_oplog, NULL, NULL, &cur_oplog)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", uri_oplog);
+	if ((ret = session->open_cursor(session,
+	    stable_store, NULL, NULL, &cur_stable)) != 0)
+		testutil_die(ret, "WT_SESSION.open_cursor: %s", stable_store);
+
+	/*
+	 * Find the biggest stable timestamp value that was saved.
+	 */
+	stable_val = 0;
+	memset(val, 0, sizeof(val));
+	while (cur_stable->next(cur_stable) == 0) {
+		cur_stable->get_key(cur_stable, &key);
+		cur_stable->get_value(cur_stable, &val[key]);
+		if (val[key] > stable_val)
+			stable_val = val[key];
+
+		if (use_ts)
+			printf("Stable: key %" PRIu64 " value %" PRIu64 "\n",
+			    key, val[key]);
+	}
+	if (use_ts)
+		printf("Got stable_val %" PRIu64 "\n", stable_val);
+
+	count = 0;
+	absent_coll = absent_local = absent_oplog = 0;
+	fatal = false;
+	for (i = 1; i <= nth; ++i) {
+		first_miss = middle_coll = middle_local = middle_oplog = 0;
+		testutil_check(__wt_snprintf(
+		    fname, sizeof(fname), RECORDS_FILE, i));
+		if ((fp = fopen(fname, "r")) == NULL)
+			testutil_die(errno, "fopen: %s", fname);
+
+		/*
+		 * For every key in the saved file, verify that the key exists
+		 * in the table after recovery.  If we're doing in-memory
+		 * log buffering we never expect a record missing in the middle,
+		 * but records may be missing at the end.  If we did
+		 * write-no-sync, we expect every key to have been recovered.
+		 */
+		for (last_key = UINT64_MAX;; ++count, last_key = key) {
+			ret = fscanf(fp, "%" SCNu64 "%" SCNu64 "\n",
+			    &stable_fp, &key);
+			if (ret != EOF && ret != 2) {
+				/*
+				 * If we find a partial line, consider it
+				 * like an EOF.
+				 */
+				if (ret == 1 || ret == 0)
+					break;
+				testutil_die(errno, "fscanf");
+			}
+			if (ret == EOF)
+				break;
+			/*
+			 * If we're unlucky, the last line may be a partially
+			 * written key at the end that can result in a false
+			 * negative error for a missing record.  Detect it.
+			 */
+			if (last_key != UINT64_MAX && key != last_key + 1) {
+				printf("%s: Ignore partial record %" PRIu64
+				    " last valid key %" PRIu64 "\n",
+				    fname, key, last_key);
+				break;
+			}
+			testutil_check(__wt_snprintf(
+			    kname, sizeof(kname), "%" PRIu64, key));
+			cur_coll->set_key(cur_coll, kname);
+			cur_local->set_key(cur_local, kname);
+			cur_oplog->set_key(cur_oplog, kname);
+			/*
+			 * The collection table should always only have the
+			 * data as of the checkpoint.
+			 */
+			if ((ret = cur_coll->search(cur_coll)) != 0) {
+				if (ret != WT_NOTFOUND)
+					testutil_die(ret, "search");
+				/*
+				 * If we don't find a record, the stable
+				 * timestamp written to our file better be
+				 * larger than the saved one.
+				 */
+				if (!inmem &&
+				    stable_fp != 0 && stable_fp <= val[i]) {
+					printf("%s: COLLECTION no record with "
+					    "key %" PRIu64 " record ts %" PRIu64
+					    " <= stable ts %" PRIu64 "\n",
+					    fname, key, stable_fp, val[i]);
+					absent_coll++;
+				}
+				if (middle_coll == 0)
+					first_miss = key;
+				middle_coll = key;
+			} else if (middle_coll != 0) {
+				/*
+				 * We should never find an existing key after
+				 * we have detected one missing.
+				 */
+				printf("%s: COLLECTION after absent records %"
+				    PRIu64 "-%" PRIu64 " key %" PRIu64
+				    " exists\n",
+				    fname, first_miss, middle_coll, key);
+				fatal = true;
+			}
+			/*
+			 * The local table should always have all data.
+			 */
+			if ((ret = cur_local->search(cur_local)) != 0) {
+				if (ret != WT_NOTFOUND)
+					testutil_die(ret, "search");
+				if (!inmem)
+					printf("%s: LOCAL no record with key %"
+					    PRIu64 "\n", fname, key);
+				absent_local++;
+				middle_local = key;
+			} else if (middle_local != 0) {
+				/*
+				 * We should never find an existing key after
+				 * we have detected one missing.
+				 */
+				printf("%s: LOCAL after absent record at %"
+				    PRIu64 " key %" PRIu64 " exists\n",
+				    fname, middle_local, key);
+				fatal = true;
+			}
+			/*
+			 * The oplog table should always have all data.
+			 */
+			if ((ret = cur_oplog->search(cur_oplog)) != 0) {
+				if (ret != WT_NOTFOUND)
+					testutil_die(ret, "search");
+				if (!inmem)
+					printf("%s: OPLOG no record with key %"
+					    PRIu64 "\n", fname, key);
+				absent_oplog++;
+				middle_oplog = key;
+			} else if (middle_oplog != 0) {
+				/*
+				 * We should never find an existing key after
+				 * we have detected one missing.
+				 */
+				printf("%s: OPLOG after absent record at %"
+				    PRIu64 " key %" PRIu64 " exists\n",
+				    fname, middle_oplog, key);
+				fatal = true;
+			}
+		}
+		testutil_checksys(fclose(fp) != 0);
+	}
+	if ((ret = conn->close(conn, NULL)) != 0)
+		testutil_die(ret, "WT_CONNECTION:close");
+	if (fatal)
+		return (EXIT_FAILURE);
+	if (!inmem && absent_coll) {
+		printf("COLLECTION: %" PRIu64
+		    " record(s) absent from %" PRIu64 "\n",
+		    absent_coll, count);
+		fatal = true;
+	}
+	if (!inmem && absent_local) {
+		printf("LOCAL: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
+		    absent_local, count);
+		fatal = true;
+	}
+	if (!inmem && absent_oplog) {
+		printf("OPLOG: %" PRIu64 " record(s) absent from %" PRIu64 "\n",
+		    absent_oplog, count);
+		fatal = true;
+	}
+	if (fatal)
+		return (EXIT_FAILURE);
+	printf("%" PRIu64 " records verified\n", count);
+	return (EXIT_SUCCESS);
+}
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp01.py b/src/third_party/wiredtiger/test/suite/test_timestamp01.py
index a934753488d..c8938296908 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp01.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp01.py
@@ -52,7 +52,7 @@ class test_timestamp01(wttest.WiredTigerTestCase, suite_subprocess):
         self.session.begin_transaction()
         self.assertRaisesWithMessage(wiredtiger.WiredTigerError,
             lambda: self.session.commit_transaction(
-                'commit_timestamp=' + timestamp_str(1 << 100)),
+                'commit_timestamp=' + timestamp_str(1 << 5000)),
                 '/too long/')
 
         # One is okay, as is 2**64 - 1
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp03.py b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
index 734961e9e98..728200e528a 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp03.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp03.py
@@ -62,7 +62,6 @@ class test_timestamp03(wttest.WiredTigerTestCase, suite_subprocess):
         ('use_ts_def', dict(ckptcfg='', val='none')),
         ('use_ts_false', dict(ckptcfg='use_timestamp=false', val='all')),
         ('use_ts_true', dict(ckptcfg='use_timestamp=true', val='none')),
-        ('read_ts', dict(ckptcfg='read_timestamp', val='none')),
     ]
 
     conncfg = [
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp04.py b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
index 146326834db..3af0feed31b 100644
--- a/src/third_party/wiredtiger/test/suite/test_timestamp04.py
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp04.py
@@ -30,8 +30,6 @@
 #   Timestamps: Test that rollback_to_stable obeys expected visibility rules
 #
 
-import datetime
-import random
 from suite_subprocess import suite_subprocess
 import wiredtiger, wttest
 from wtscenario import make_scenarios
@@ -50,9 +48,10 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
     uri = 'table:' + tablename
 
     scenarios = make_scenarios([
-        #('col', dict(extra_config=',key_format=r')),
-        #('lsm', dict(extra_config=',type=lsm')),
-        ('row', dict(extra_config=',memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k')),
+        ('col_fix', dict(empty=1, extra_config=',key_format=r, value_format=8t')),
+        ('col_var', dict(empty=0, extra_config=',key_format=r')),
+        #('lsm', dict(empty=0, extra_config=',type=lsm')),
+        ('row', dict(empty=0, extra_config='')),
     ])
 
     # Rollback only works for non-durable tables
@@ -65,17 +64,21 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
             session.begin_transaction(txn_config)
         c = session.open_cursor(self.uri, None)
         if missing == False:
-            actual = dict((k, v) for k, v, pad in c if v != 0)
+            actual = dict((k, v) for k, v in c if v != 0)
             #print expected
             #print actual
             self.assertEqual(actual, expected)
         # Search for the expected items as well as iterating
         for k, v in expected.iteritems():
             if missing == False:
-                self.assertEqual(c[k][0], v, "for key " + str(k))
+                self.assertEqual(c[k], v, "for key " + str(k))
             else:
                 c.set_key(k)
-                self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
+                if self.empty:
+                    # Fixed-length column-store rows always exist.
+                    self.assertEqual(c.search(), 0)
+                else:
+                    self.assertEqual(c.search(), wiredtiger.WT_NOTFOUND)
         c.close()
         if txn_config:
             session.commit_transaction()
@@ -87,7 +90,8 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
         # Configure small page sizes to ensure eviction comes through and we have a
        #  somewhat complex tree
         self.session.create(self.uri,
-            'key_format=i,value_format=iS,memory_page_max=16k,leaf_page_max=8k' + self.extra_config)
+            'key_format=i,value_format=i,memory_page_max=32k,leaf_page_max=8k,internal_page_max=8k'
+                + self.extra_config)
         c = self.session.open_cursor(self.uri)
 
         # Insert keys each with timestamp=key, in some order
@@ -96,7 +100,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
 
         for k in keys:
             self.session.begin_transaction()
-            c[k] = (1, 'the quick brown fox')
+            c[k] = 1
             self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
             # Setup an oldest timestamp to ensure state remains in cache.
             if k == 1:
@@ -119,7 +123,7 @@ class test_timestamp04(wttest.WiredTigerTestCase, suite_subprocess):
         # Update the values again in preparation for rolling back more
         for k in keys:
             self.session.begin_transaction()
-            c[k] = (2, 'jumped over the lazy dog')
+            c[k] = 2
             self.session.commit_transaction('commit_timestamp=' + timestamp_str(k + key_range))
 
         # Now we should have: keys 1-100 with value 2
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp05.py b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
new file mode 100644
index 00000000000..d7131cb2004
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp05.py
@@ -0,0 +1,106 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp05.py
+#   Timestamps: make sure they don't end up in metadata
+#
+
+from helper import copy_wiredtiger_home
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+    return '%x' % t
+
+def timestamp_ret_str(t):
+    s = timestamp_str(t)
+    if len(s) % 2 == 1:
+        s = '0' + s
+    return s
+
+class test_timestamp05(wttest.WiredTigerTestCase, suite_subprocess):
+    uri = 'table:ts05'
+
+    def test_create(self):
+        if not wiredtiger.timestamp_build():
+            self.skipTest('requires a timestamp build')
+
+        s = self.session
+        conn = self.conn
+
+        # Start timestamps at 50
+        conn.set_timestamp('oldest_timestamp=50,stable_timestamp=50')
+
+        # Commit at 100
+        s.begin_transaction()
+        s.create(self.uri, 'key_format=i,value_format=S')
+        s.commit_transaction('commit_timestamp=' + timestamp_str(100))
+
+        # Make sure the tree is dirty
+        c = s.open_cursor(self.uri)
+        c[200] = 'new value'
+
+        # Checkpoint at 50
+        s.checkpoint('use_timestamp=true')
+
+    def test_bulk(self):
+        if not wiredtiger.timestamp_build():
+            self.skipTest('requires a timestamp build')
+
+        s = self.session
+        conn = self.conn
+
+        s.create(self.uri, 'key_format=i,value_format=S')
+        c = s.open_cursor(self.uri, None, 'bulk')
+
+        # Insert keys 1..100 each with timestamp=key, in some order
+        nkeys = 100
+        keys = range(1, nkeys+1)
+
+        for k in keys:
+            c[k] = 'some value'
+
+        # Start timestamps at 50
+        conn.set_timestamp('oldest_timestamp=50,stable_timestamp=50')
+
+        # Commit at 100
+        s.begin_transaction()
+        c.close()
+        s.commit_transaction('commit_timestamp=' + timestamp_str(100))
+
+        # Make sure the tree is dirty
+        c = s.open_cursor(self.uri)
+        c[200] = 'new value'
+
+        # Checkpoint at 50
+        s.checkpoint('use_timestamp=true')
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/suite/test_timestamp07.py b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
new file mode 100644
index 00000000000..c1f70e0cb1a
--- /dev/null
+++ b/src/third_party/wiredtiger/test/suite/test_timestamp07.py
@@ -0,0 +1,284 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2017 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_timestamp07.py
+#   Timestamps: checkpoints and eviction
+#
+
+from helper import copy_wiredtiger_home
+import random
+from suite_subprocess import suite_subprocess
+import wiredtiger, wttest
+from wiredtiger import stat
+from wtscenario import make_scenarios
+
+def timestamp_str(t):
+    return '%x' % t
+
+class test_timestamp07(wttest.WiredTigerTestCase, suite_subprocess):
+    tablename = 'ts07_ts_nologged'
+    tablename2 = 'ts07_nots_logged'
+    tablename3 = 'ts07_ts_logged'
+
+    types = [
+        ('file', dict(uri='file:', use_cg=False, use_index=False)),
+        ('table-cg', dict(uri='table:', use_cg=True, use_index=False)),
+    ]
+
+    conncfg = [
+        ('nolog', dict(conn_config='create,cache_size=1M,statistics=(fast)', using_log=False)),
+        ('log', dict(conn_config='create,log=(enabled),cache_size=1M,statistics=(fast)', using_log=True)),
+    ]
+
+    nkeys = [
+        ('100keys', dict(nkeys=100,evicts=False)),
+        ('500keys', dict(nkeys=500,evicts=True)),
+#        ('1000keys', dict(nkeys=1000,evicts=True)),
+    ]
+
+    scenarios = make_scenarios(types, conncfg, nkeys)
+
+    modified_evicted = 0
+
+    # Binary values.
+    value = u'\u0001\u0002abcd\u0007\u0004'
+    value2 = u'\u0001\u0002dcba\u0007\u0004'
+    value3 = u'\u0001\u0002cdef\u0007\u0004'
+
+    # Check that a cursor (optionally started in a new transaction), sees the
+    # expected values.
+    def check(self, session, txn_config, expected):
+        if txn_config:
+            #print "Check: txn_config:"
+            #print txn_config
+            session.begin_transaction(txn_config)
+        c = session.open_cursor(self.uri + self.tablename, None)
+        actual = dict((k, v) for k, v in c if v != 0)
+        self.maxDiff = None
+        #print "Expected:"
+        #print expected
+        #print "Actual:"
+        #print actual
+        self.assertEqual(actual, expected)
+        # Search for the expected items as well as iterating
+        for k, v in expected.iteritems():
+            self.assertEqual(c[k], v, "for key " + str(k))
+        c.close()
+        if txn_config:
+            session.commit_transaction()
+    #
+    # Take a backup of the database and verify that the value we want to
+    # check exists in the tables the expected number of times.
+    #
+    def backup_check(self, check_value, valcnt, valcnt2, valcnt3):
+        newdir = "BACKUP"
+        copy_wiredtiger_home('.', newdir, True)
+
+        conn = self.setUpConnectionOpen(newdir)
+        session = self.setUpSessionOpen(conn)
+        c = session.open_cursor(self.uri + self.tablename, None)
+        c2 = session.open_cursor(self.uri + self.tablename2, None)
+        c3 = session.open_cursor(self.uri + self.tablename3, None)
+        # Count how many times the second value is present
+        count = 0
+        for k, v in c:
+            if check_value in str(v):
+                # print "check_value found in key " + str(k)
+                count += 1
+        c.close()
+        # Count how many times the second value is present in the
+        # non-timestamp table.
+        count2 = 0
+        for k, v in c2:
+            if check_value in str(v):
+                # print "check_value found in key " + str(k)
+                count2 += 1
+        c2.close()
+        # Count how many times the second value is present in the
+        # logged timestamp table.
+        count3 = 0
+        for k, v in c3:
+            if check_value in str(v):
+                count3 += 1
+        c3.close()
+        conn.close()
+        # print "CHECK BACKUP: Count " + str(count) + " Count2 " + str(count2) + " Count3 " + str(count3)
+        # print "CHECK BACKUP: Expect value2 count " + str(valcnt)
+        # print "CHECK BACKUP: 2nd table Expect value2 count " + str(valcnt2)
+        # print "CHECK BACKUP: 3rd table Expect value2 count " + str(valcnt3)
+        self.assertEqual(count, valcnt)
+        self.assertEqual(count2, valcnt2)
+        self.assertEqual(count3, valcnt3)
+
+    # Return whether or not eviction happened since the last call.
+    def check_eviction(self):
+        # Get a statistics cursor and look at the number of dirty pages
+        # evicted.  Keep track of the last read value so we can determine
+        # if the value changed since the last call to this function.
+        stat_cursor = self.session.open_cursor('statistics:', None, None)
+        evict_dirty = stat_cursor[stat.conn.cache_eviction_dirty][2]
+
+        # Return True if the new value is more, False otherwise.
+        #print "Old: " + str(self.modified_evicted)
+        # print "New: " + str(evict_dirty)
+        did_eviction = self.modified_evicted < evict_dirty
+        stat_cursor.close()
+        self.modified_evicted = evict_dirty
+        # print "Evict ret: " + str(ret)
+
+        # XXX we can't guarantee that eviction will always happen, but make
+        # sure it doesn't happen if not expected.
+        self.assertTrue(not did_eviction or self.evicts)
+
+    # Check that a cursor sees the expected values after a checkpoint.
+    def ckpt_backup(self, check_value, valcnt, valcnt2, valcnt3):
+
+        # Take a checkpoint.  Make a copy of the database.  Open the
+        # copy and verify whether or not the expected data is in there.
+        ckptcfg = 'use_timestamp=true'
+        self.session.checkpoint(ckptcfg)
+        self.backup_check(check_value, valcnt, valcnt2, valcnt3)
+
+    def test_timestamp07(self):
+        if not wiredtiger.timestamp_build():
+            self.skipTest('requires a timestamp build')
+
+        uri = self.uri + self.tablename
+        uri2 = self.uri + self.tablename2
+        uri3 = self.uri + self.tablename3
+        #
+        # Open three tables:
+        # 1. Table is not logged and uses timestamps.
+        # 2. Table is logged and does not use timestamps.
+        # 3. Table is logged and uses timestamps.
+        #
+        self.session.create(uri, 'key_format=i,value_format=S,log=(enabled=false)')
+        c = self.session.open_cursor(uri)
+        self.session.create(uri2, 'key_format=i,value_format=S')
+        c2 = self.session.open_cursor(uri2)
+        self.session.create(uri3, 'key_format=i,value_format=S')
+        c3 = self.session.open_cursor(uri3)
+
+        # Insert keys 1..nkeys each with timestamp=key, in some order.
+        orig_keys = range(1, self.nkeys+1)
+        keys = orig_keys[:]
+        random.shuffle(keys)
+
+        for k in keys:
+            c2[k] = self.value
+            self.session.begin_transaction()
+            c[k] = self.value
+            c3[k] = self.value
+            self.session.commit_transaction('commit_timestamp=' + timestamp_str(k))
+
+        self.check_eviction()
+        # Now check that we see the expected state when reading at each
+        # timestamp.
+        for i, t in enumerate(orig_keys):
+            self.check(self.session, 'read_timestamp=' + timestamp_str(t),
+                dict((k, self.value) for k in orig_keys[:i+1]))
+
+        # Bump the oldest timestamp, we're not going back...
+        self.assertEqual(self.conn.query_timestamp(), timestamp_str(self.nkeys))
+        self.oldts = timestamp_str(self.nkeys)
+        self.conn.set_timestamp('oldest_timestamp=' + self.oldts)
+        self.conn.set_timestamp('stable_timestamp=' + self.oldts)
+        # print "Oldest " + self.oldts
+
+        # Update them and retry.
+        random.shuffle(keys)
+        count = 0
+        for k in keys:
+            # Make sure a timestamp cursor is the last one to update.  This
+            # tests the scenario for a bug we found where recovery replayed
+            # the last record written into the log.
+            #
+            # print "Key " + str(k) + " to value2"
+            c2[k] = self.value2
+            self.session.begin_transaction()
+            c[k] = self.value2
+            c3[k] = self.value2
+            ts = timestamp_str(k + self.nkeys)
+            self.session.commit_transaction('commit_timestamp=' + ts)
+            # print "Commit key " + str(k) + " ts " + ts
+            count += 1
+
+        self.check_eviction()
+
+        # print "Updated " + str(count) + " keys to value2"
+
+        # Take a checkpoint using the given configuration.  Then verify
+        # whether value2 appears in a copy of that data or not.
+        valcnt2 = valcnt3 = self.nkeys
+        valcnt = 0
+        self.ckpt_backup(self.value2, valcnt, valcnt2, valcnt3)
+        # Update the stable timestamp to the latest, but not the oldest
+        # timestamp and make sure we can see the data.  Once the stable
+        # timestamp is moved we should see all keys with value2.
+        self.conn.set_timestamp('stable_timestamp=' + \
+            timestamp_str(self.nkeys*2))
+        self.ckpt_backup(self.value2, self.nkeys, self.nkeys, self.nkeys)
+
+        # If we're not using the log we're done.
+        if not self.using_log:
+            return
+
+        # Update the key and retry.  This time take a backup and recover.
+        random.shuffle(keys)
+        count = 0
+        for k in keys:
+            # Make sure a timestamp cursor is the last one to update.  This
+            # tests the scenario for a bug we found where recovery replayed
+            # the last record written into the log.
+            #
+            # print "Key " + str(k) + " to value3"
+            c2[k] = self.value3
+            self.session.begin_transaction()
+            c[k] = self.value3
+            c3[k] = self.value3
+            ts = timestamp_str(k + self.nkeys*2)
+            self.session.commit_transaction('commit_timestamp=' + ts)
+            # print "Commit key " + str(k) + " ts " + ts
+            count += 1
+
+        self.check_eviction()
+        # print "Updated " + str(count) + " keys to value3"
+
+        # Flush the log but don't checkpoint
+        self.session.log_flush('sync=on')
+
+        # Take a backup and then verify whether value3 appears in a copy
+        # of that data or not.  Both tables that are logged should see
+        # all the data regardless of timestamps.  The table that is not
+        # logged should not see any of it.
+        valcnt = 0
+        valcnt2 = valcnt3 = self.nkeys
+        self.backup_check(self.value3, valcnt, valcnt2, valcnt3)
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/src/third_party/wiredtiger/test/utility/test_util.h b/src/third_party/wiredtiger/test/utility/test_util.h
index e53018ad4ea..7500df8d5e5 100644
--- a/src/third_party/wiredtiger/test/utility/test_util.h
+++ b/src/third_party/wiredtiger/test/utility/test_util.h
@@ -117,6 +117,18 @@ typedef struct {
 } while (0)
 
 /*
+ * testutil_checksys --
+ *	Complain and quit if a function call fails, returning errno. The error
+ * test must be specified, not just the call, because system calls fail in a
+ * variety of ways.
+ */
+#define	testutil_checksys(call) do {					\
+	if (call)							\
+		testutil_die(						\
+		    errno, "%s/%d: %s", __func__, __LINE__, #call);	\
+} while (0)
+
+/*
  * testutil_checkfmt --
  *	Complain and quit if a function call fails, with additional arguments.
  */