95 files changed, 2548 insertions, 957 deletions
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c
index 1238c25502c..808e85eedae 100644
--- a/bench/wtperf/config.c
+++ b/bench/wtperf/config.c
@@ -54,6 +54,7 @@ static void config_opt_usage(void);
 int
 config_assign(CONFIG *dest, const CONFIG *src)
 {
+	CONFIG_QUEUE_ENTRY *conf_line, *tmp_line;
 	size_t i, len;
 	char *newstr, **pstr;
 
@@ -96,6 +97,18 @@ config_assign(CONFIG *dest, const CONFIG *src)
 		}
 
 	TAILQ_INIT(&dest->stone_head);
+	TAILQ_INIT(&dest->config_head);
+
+	/* Clone the config string information into the new cfg object */
+	TAILQ_FOREACH(conf_line, &src->config_head, c) {
+		len = strlen(conf_line->string);
+		if ((tmp_line = calloc(sizeof(CONFIG_QUEUE_ENTRY), 1)) == NULL)
+			return (enomem(src));
+		if ((tmp_line->string = calloc(len + 1, 1)) == NULL)
+			return (enomem(src));
+		strncpy(tmp_line->string, conf_line->string, len);
+		TAILQ_INSERT_TAIL(&dest->config_head, tmp_line, c);
+	}
 	return (0);
 }
 
@@ -106,9 +119,17 @@ config_assign(CONFIG *dest, const CONFIG *src)
 void
 config_free(CONFIG *cfg)
 {
+	CONFIG_QUEUE_ENTRY *config_line;
 	size_t i;
 	char **pstr;
 
+	while (!TAILQ_EMPTY(&cfg->config_head)) {
+		config_line = TAILQ_FIRST(&cfg->config_head);
+		TAILQ_REMOVE(&cfg->config_head, config_line, c);
+		free(config_line->string);
+		free(config_line);
+	}
+
 	for (i = 0; i < sizeof(config_opts) / sizeof(config_opts[0]); i++)
 		if (config_opts[i].type == STRING_TYPE ||
 		    config_opts[i].type == CONFIG_STRING_TYPE) {
@@ -569,16 +590,34 @@ err:	if (fd != -1)
 int
 config_opt_line(CONFIG *cfg, const char *optstr)
 {
+	CONFIG_QUEUE_ENTRY *config_line;
 	WT_CONFIG_ITEM k, v;
 	WT_CONFIG_PARSER *scan;
+	size_t len;
 	int ret, t_ret;
+	char *string_copy;
 
+	len = strlen(optstr);
 	if ((ret = wiredtiger_config_parser_open(
-	    NULL, optstr, strlen(optstr), &scan)) != 0) {
+	    NULL, optstr, len, &scan)) != 0) {
 		lprintf(cfg, ret, 0, "Error in config_scan_begin");
 		return (ret);
 	}
 
+	/*
+	 * Append the current line to our copy of the config. The config is
+	 * stored in the order it is processed, so added options will be after
+	 * any parsed from the original config. We allocate len + 1 to allow for
+	 * a null byte to be added.
+	 */
+	if ((string_copy = calloc(len + 1, 1)) == NULL)
+		return (enomem(cfg));
+
+	strncpy(string_copy, optstr, len);
+	config_line = calloc(sizeof(CONFIG_QUEUE_ENTRY), 1);
+	config_line->string = string_copy;
+	TAILQ_INSERT_TAIL(&cfg->config_head, config_line, c);
+
 	while (ret == 0) {
 		if ((ret = scan->next(scan, &k, &v)) != 0) {
 			/* Any parse error has already been reported. */
@@ -653,6 +692,90 @@ config_sanity(CONFIG *cfg)
 }
 
 /*
+ * config_consolidate --
+ *	Consolidate repeated configuration settings so that it only appears
+ *	once in the configuration output file.
+ */
+void
+config_consolidate(CONFIG *cfg)
+{
+	CONFIG_QUEUE_ENTRY *conf_line, *test_line, *tmp;
+	char *string_key;
+
+	/* 
+	 * This loop iterates over the config queue and for entry checks if an
+	 * entry later in the queue has the same key. If a match is found then
+	 * the current queue entry is removed and we continue.
+	 */
+	conf_line = TAILQ_FIRST(&cfg->config_head);
+	while (conf_line != NULL) {
+		string_key = strchr(conf_line->string, '=');
+		tmp = test_line = TAILQ_NEXT(conf_line, c);
+		while (test_line != NULL) {
+			/*
+			 * The + 1 here forces the '=' sign to be matched
+			 * ensuring we don't match keys that have a common
+			 * prefix such as "table_count" and "table_count_idle"
+			 * as being the same key.
+			 */
+			if (strncmp(conf_line->string, test_line->string,
+			    (size_t)(string_key - conf_line->string + 1))
+			    == 0) {
+				TAILQ_REMOVE(&cfg->config_head, conf_line, c);
+				free(conf_line->string);
+				free(conf_line);
+				break;
+			}
+			test_line = TAILQ_NEXT(test_line, c);
+		}
+		conf_line = tmp;
+	}
+}
+
+/*
+ * config_to_file --
+ *	Write the final config used in this execution to a file.
+ */
+void
+config_to_file(CONFIG *cfg)
+{
+	CONFIG_QUEUE_ENTRY *config_line;
+	FILE *fp;
+	size_t req_len;
+	char *path;
+
+	fp = NULL;
+
+	/* Backup the config */
+	req_len = strlen(cfg->home) + 100;
+	if ((path = calloc(req_len, 1)) == NULL) {
+		(void)enomem(cfg);
+		goto err;
+	}
+
+	snprintf(path, req_len + 14, "%s/CONFIG.wtperf", cfg->home);
+	if ((fp = fopen(path, "w")) == NULL) {
+		lprintf(cfg, errno, 0, "%s", path);
+		goto err;
+	}
+
+	/* Print the config dump */
+	fprintf(fp,"# Warning. This config includes "
+	    "unwritten, implicit configuration defaults.\n"
+	    "# Changes to those values may cause differences in behavior.\n");
+	config_consolidate(cfg);
+	config_line = TAILQ_FIRST(&cfg->config_head);
+	while (config_line != NULL) {
+		fprintf(fp, "%s\n", config_line->string);
+		config_line = TAILQ_NEXT(config_line, c);
+	}
+
+err:	free(path);
+	if (fp != NULL)
+		(void)fclose(fp);
+}
+
+/*
  * config_print --
  *	Print out the configuration in verbose mode.
  */
diff --git a/bench/wtperf/runners/btree-split-stress.wtperf b/bench/wtperf/runners/btree-split-stress.wtperf
new file mode 100644
index 00000000000..deb8c70d12f
--- /dev/null
+++ b/bench/wtperf/runners/btree-split-stress.wtperf
@@ -0,0 +1,10 @@
+conn_config="cache_size=2GB,statistics=[fast,clear],statistics_log=(wait=10),eviction=(threads_max=4,threads_min=4)"
+table_config="type=file,leaf_page_max=8k,internal_page_max=8k,memory_page_max=2MB,split_deepen_min_child=250"
+icount=200000
+report_interval=5
+run_time=300
+reopen_connection=false
+populate_threads=2
+value_sz=256
+read_range=100
+threads=((count=4,inserts=1,throttle=100000),(count=8,reads=1))
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 955f605c0b3..5386096d9b7 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -60,6 +60,7 @@ static const CONFIG default_cfg = {
 	0,				/* total seconds running */
 	0,				/* has truncate */
 	{NULL, NULL},			/* the truncate queue */
+	{NULL, NULL},                   /* the config queue */
 
 #define	OPT_DEFINE_DEFAULT
 #include "wtperf_opt.i"
@@ -371,6 +372,53 @@ err:		cfg->error = cfg->stop = 1;
 	return (NULL);
 }
 
+/*
+ * do_range_reads --
+ *	If configured to execute a sequence of next operations after each
+ *	search do them. Ensuring the keys we see are always in order.
+ */
+static int
+do_range_reads(CONFIG *cfg, WT_CURSOR *cursor)
+{
+	size_t range;
+	uint64_t next_val, prev_val;
+	char *range_key_buf;
+	char buf[512];
+	int ret;
+
+	ret = 0;
+
+	if (cfg->read_range == 0)
+		return (0);
+
+	memset(&buf[0], 0, 512 * sizeof(char));
+	range_key_buf = &buf[0];
+
+	/* Save where the first key is for comparisons. */
+	cursor->get_key(cursor, &range_key_buf);
+	extract_key(range_key_buf, &next_val);
+
+	for (range = 0; range < cfg->read_range; ++range) {
+		prev_val = next_val;
+		ret = cursor->next(cursor);
+		/* We are done if we reach the end. */
+		if (ret != 0)
+			break;
+
+		/* Retrieve and decode the key */
+		cursor->get_key(cursor, &range_key_buf);
+		extract_key(range_key_buf, &next_val);
+		if (next_val < prev_val) {
+			lprintf(cfg, EINVAL, 0,
+			    "Out of order keys %" PRIu64
+			    " came before %" PRIu64,
+			    prev_val, next_val);
+			return (EINVAL);
+		}
+	}
+	return (0);
+}
+
 static void *
 worker(void *arg)
 {
@@ -381,8 +429,8 @@ worker(void *arg)
 	WT_CONNECTION *conn;
 	WT_CURSOR **cursors, *cursor, *tmp_cursor;
 	WT_SESSION *session;
-	int64_t ops, ops_per_txn, throttle_ops;
 	size_t i;
+	int64_t ops, ops_per_txn, throttle_ops;
 	uint64_t next_val, usecs;
 	uint8_t *op, *op_end;
 	int measure_latency, ret, truncated;
@@ -533,7 +581,14 @@ worker(void *arg)
 					    "get_value in read.");
 					goto err;
 				}
+				/*
+				 * If we want to read a range, then call next
+				 * for several operations, confirming that the
+				 * next key is in the correct order.
+				 */
+				ret = do_range_reads(cfg, cursor);
 			}
+
 			if (ret == 0 || ret == WT_NOTFOUND)
 				break;
 			goto op_err;
@@ -2103,6 +2158,8 @@ main(int argc, char *argv[])
 	if (config_assign(cfg, &default_cfg))
 		goto err;
 
+	TAILQ_INIT(&cfg->config_head);
+
 	/* Do a basic validation of options, and home is needed before open. */
 	while ((ch = __wt_getopt("wtperf", argc, argv, opts)) != EOF)
 		switch (ch) {
@@ -2308,6 +2365,9 @@ main(int argc, char *argv[])
 	if ((ret = config_sanity(cfg)) != 0)
 		goto err;
 
+	/* Write a copy of the config. */
+	config_to_file(cfg);
+
 	/* Display the configuration. */
 	if (cfg->verbose > 1)
 		config_print(cfg);
@@ -2333,7 +2393,7 @@ start_threads(CONFIG *cfg,
     WORKLOAD *workp, CONFIG_THREAD *base, u_int num, void *(*func)(void *))
 {
 	CONFIG_THREAD *thread;
-	u_int i, j;
+	u_int i;
 	int ret;
 
 	/* Initialize the threads. */
@@ -2342,15 +2402,13 @@ start_threads(CONFIG *cfg,
 		thread->workload = workp;
 
 		/*
-		 * We don't want the threads executing in lock-step, move each
-		 * new RNG state further along in the sequence.
+		 * We don't want the threads executing in lock-step, seed each
+		 * one differently.
 		 */
-		if (i == 0)
-			__wt_random_init(&thread->rnd);
-		else
-			thread->rnd = (thread - 1)->rnd;
-		for (j = 0; j < 1000; ++j)
-			(void)__wt_random(&thread->rnd);
+		if ((ret = __wt_random_init_seed(NULL, &thread->rnd)) != 0) {
+			lprintf(cfg, ret, 0, "Error initializing RNG");
+			return (ret);
+		}
 
 		/*
 		 * Every thread gets a key/data buffer because we don't bother
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index b26e978c13b..361b135ced7 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -127,6 +127,12 @@ struct __truncate_queue_entry {
 };
 typedef struct __truncate_queue_entry TRUNCATE_QUEUE_ENTRY;
 
+struct __config_queue_entry {
+	char *string;
+	TAILQ_ENTRY(__config_queue_entry) c;
+};
+typedef struct __config_queue_entry CONFIG_QUEUE_ENTRY;
+
 #define	LOG_PARTIAL_CONFIG	",log=(enabled=false)"
 /*
  * NOTE:  If you add any fields to this structure here, you must also add
@@ -181,6 +187,9 @@ struct __config {			/* Configuration structure */
 	/* Queue head for use with the Truncate Logic */
 	TAILQ_HEAD(__truncate_qh, __truncate_queue_entry) stone_head;
 
+	/* Queue head to save a copy of the config to be output */
+	TAILQ_HEAD(__config_qh, __config_queue_entry) config_head;
+
 	/* Fields changeable on command line are listed in wtperf_opt.i */
 #define	OPT_DECLARE_STRUCT
 #include "wtperf_opt.i"
@@ -189,6 +198,7 @@ struct __config {			/* Configuration structure */
 
 #define	ELEMENTS(a)	(sizeof(a) / sizeof(a[0]))
 
+#define	READ_RANGE_OPS	10
 #define	THROTTLE_OPS	100
 
 #define	THOUSAND	(1000ULL)
@@ -271,6 +281,8 @@ void	 config_free(CONFIG *);
 int	 config_opt_file(CONFIG *, const char *);
 int	 config_opt_line(CONFIG *, const char *);
 int	 config_opt_str(CONFIG *, const char *, const char *);
+void	 config_to_file(CONFIG *);
+void	 config_consolidate(CONFIG *);
 void	 config_print(CONFIG *);
 int	 config_sanity(CONFIG *);
 void	 latency_insert(CONFIG *, uint32_t *, uint32_t *, uint32_t *);
@@ -305,4 +317,10 @@ generate_key(CONFIG *cfg, char *key_buf, uint64_t keyno)
 	sprintf(key_buf, "%0*" PRIu64, cfg->key_sz - 1, keyno);
 }
 
+static inline void
+extract_key(char *key_buf, uint64_t *keynop)
+{
+	sscanf(key_buf, "%" SCNu64, keynop);
+}
+
 #endif
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index a9d3c033b74..3c122e4d186 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -140,6 +140,7 @@ DEF_OPT_AS_UINT32(random_range, 0,
     "if non zero choose a value from within this range as the key for "
     "insert operations")
 DEF_OPT_AS_BOOL(random_value, 0, "generate random content for the value")
+DEF_OPT_AS_UINT32(read_range, 0, "scan a range of keys after each search")
 DEF_OPT_AS_BOOL(reopen_connection, 1,
     "close and reopen the connection between populate and workload phases")
 DEF_OPT_AS_UINT32(report_interval, 2,
diff --git a/build_posix/configure.ac.in b/build_posix/configure.ac.in
index de2f8963629..5949fb0509c 100644
--- a/build_posix/configure.ac.in
+++ b/build_posix/configure.ac.in
@@ -103,7 +103,7 @@ esac
 
 # Linux requires buffers aligned to 4KB boundaries for O_DIRECT to work.
 BUFFER_ALIGNMENT=0
-if test "$ac_cv_func_posix_memalign" = "yes" ; then
+if test "$ax_cv_func_posix_memalign_works" = "yes" ; then
 	case "$host_os" in
 	linux*)	BUFFER_ALIGNMENT=4096 ;;
 	esac
diff --git a/build_win/filelist.win b/build_win/filelist.win
index af6ddf98da9..b845c45823e 100644
--- a/build_win/filelist.win
+++ b/build_win/filelist.win
@@ -121,6 +121,7 @@ src/os_win/os_map.c
 src/os_win/os_mtx_cond.c
 src/os_win/os_once.c
 src/os_win/os_open.c
+src/os_win/os_pagesize.c
 src/os_win/os_path.c
 src/os_win/os_priv.c
 src/os_win/os_remove.c
diff --git a/dist/api_data.py b/dist/api_data.py
index f58a48b4a0b..ff6d3f3ccb5 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -814,21 +814,19 @@ methods = {
 
 'WT_SESSION.open_cursor' : Method(cursor_runtime_config + [
     Config('bulk', 'false', r'''
-        configure the cursor for bulk-loading, a fast, initial load
-        path (see @ref tune_bulk_load for more information).  Bulk-load
-        may only be used for newly created objects and cursors
-        configured for bulk-load only support the WT_CURSOR::insert
-        and WT_CURSOR::close methods.  When bulk-loading row-store
-        objects, keys must be loaded in sorted order.  The value is
-        usually a true/false flag; when bulk-loading fixed-length
-        column store objects, the special value \c bitmap allows
-        chunks of a memory resident bitmap to be loaded directly into
-        a file by passing a \c WT_ITEM to WT_CURSOR::set_value where
-        the \c size field indicates the number of records in the
-        bitmap (as specified by the object's \c value_format
-        configuration). Bulk-loaded bitmap values must end on a byte
-        boundary relative to the bit count (except for the last set
-        of values loaded)'''),
+        configure the cursor for bulk-loading, a fast, initial load path
+        (see @ref tune_bulk_load for more information).  Bulk-load may
+        only be used for newly created objects and applications should
+        use the WT_CURSOR::insert method to insert rows.  When
+        bulk-loading, rows must be loaded in sorted order.  The value
+        is usually a true/false flag; when bulk-loading fixed-length
+        column store objects, the special value \c bitmap allows chunks
+        of a memory resident bitmap to be loaded directly into a file
+        by passing a \c WT_ITEM to WT_CURSOR::set_value where the \c
+        size field indicates the number of records in the bitmap (as
+        specified by the object's \c value_format configuration).
+        Bulk-loaded bitmap values must end on a byte boundary relative
+        to the bit count (except for the last set of values loaded)'''),
     Config('checkpoint', '', r'''
         the name of a checkpoint to open (the reserved name
         "WiredTigerCheckpoint" opens the most recent internal
@@ -843,12 +841,20 @@ methods = {
         with the @ref util_dump and @ref util_load commands''',
         choices=['hex', 'json', 'print']),
     Config('next_random', 'false', r'''
-        configure the cursor to return a pseudo-random record from
-        the object; valid only for row-store cursors.  Cursors
-        configured with \c next_random=true only support the
-        WT_CURSOR::next and WT_CURSOR::close methods.  See @ref
-        cursor_random for details''',
+        configure the cursor to return a pseudo-random record from the
+        object when the WT_CURSOR::next method is called; valid only for
+        row-store cursors. See @ref cursor_random for details''',
         type='boolean'),
+    Config('next_random_sample_size', '0', r'''
+        cursors configured by \c next_random to return pseudo-random
+        records from the object randomly select from the entire object,
+        by default. Setting \c next_random_sample_size to a non-zero
+        value sets the number of samples the application expects to take
+        using the \c next_random cursor. A cursor configured with both
+        \c next_random and \c next_random_sample_size attempts to divide
+        the object into \c next_random_sample_size equal-sized pieces,
+        and each retrieval returns a record from one of those pieces. See
+        @ref cursor_random for details'''),
     Config('raw', 'false', r'''
         ignore the encodings for the key and value, manage data as if
         the formats were \c "u".  See @ref cursor_raw for details''',
diff --git a/dist/filelist b/dist/filelist
index 52af87c2a68..dde090e5a85 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -119,6 +119,7 @@ src/os_posix/os_mtx_cond.c
 src/os_posix/os_mtx_rw.c
 src/os_posix/os_once.c
 src/os_posix/os_open.c
+src/os_posix/os_pagesize.c
 src/os_posix/os_path.c
 src/os_posix/os_priv.c
 src/os_posix/os_remove.c
diff --git a/dist/flags.py b/dist/flags.py
index 1965dfb7dbe..7d237dd39a4 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -37,6 +37,7 @@ flags = {
         'READ_NO_WAIT',
         'READ_PREV',
         'READ_SKIP_INTL',
+        'READ_SKIP_LEAF',
         'READ_TRUNCATE',
         'READ_WONT_NEED',
     ],
diff --git a/dist/log.py b/dist/log.py
index feeb053db3e..6d35bf2e718 100644
--- a/dist/log.py
+++ b/dist/log.py
@@ -8,14 +8,15 @@ import log_data
 tmp_file = '__tmp'
 
 # Map log record types to:
-# (C type, pack type, printf format, printf arg(s), printf setup)
+# (C type, pack type, printf format, printf arg(s), list of setup functions)
 field_types = {
-    'string' : ('const char *', 'S', '%s', 'arg', ''),
+    'string' : ('const char *', 'S', '%s', 'arg', [ '' ]),
     'item' : ('WT_ITEM *', 'u', '%s', 'escaped',
-        'WT_ERR(__logrec_jsonify_str(session, &escaped, &arg));'),
-    'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', ''),
-    'uint32' : ('uint32_t', 'I', '%" PRIu32 "', 'arg', ''),
-    'uint64' : ('uint64_t', 'Q', '%" PRIu64 "', 'arg', ''),
+        [ 'WT_ERR(__logrec_make_json_str(session, &escaped, &arg));',
+          'WT_ERR(__logrec_make_hex_str(session, &escaped, &arg));']),
+    'recno' : ('uint64_t', 'r', '%" PRIu64 "', 'arg', [ '' ]),
+    'uint32' : ('uint32_t', 'I', '%" PRIu32 "', 'arg', [ '' ]),
+    'uint64' : ('uint64_t', 'Q', '%" PRIu64 "', 'arg', [ '' ]),
 }
 
 def cintype(f):
@@ -38,15 +39,13 @@ def clocaltype(f):
     return type
 
 def escape_decl(fields):
-    for f in fields:
-        if 'escaped' in field_types[f[0]][4]:
-            return '\n\tchar *escaped;'
-    return ''
+    return '\n\tchar *escaped;' if has_escape(fields) else ''
 
 def has_escape(fields):
     for f in fields:
-        if 'escaped' in field_types[f[0]][4]:
-            return True
+        for setup in field_types[f[0]][4]:
+            if 'escaped' in setup:
+                return True
     return False
 
 def pack_fmt(fields):
@@ -65,10 +64,38 @@ def printf_arg(f):
     arg = field_types[f[0]][3].replace('arg', f[1])
     return ' ' + arg
 
-def printf_setup(f):
-    stmt = field_types[f[0]][4].replace('arg', f[1])
-    return '' if stmt == '' else stmt + '\n\t'
-
+def printf_setup(f, i, nl_indent):
+    stmt = field_types[f[0]][4][i].replace('arg', f[1])
+    return '' if stmt == '' else stmt + nl_indent
+
+def n_setup(f):
+    return len(field_types[f[0]][4])
+
+# Create a printf line, with an optional setup function.
+# ishex indicates that the the field name in the output is modified
+# (to add "-hex"), and that the setup and printf are conditional
+# in the generated code.
+def printf_line(f, optype, i, ishex):
+    ifbegin = ''
+    ifend = ''
+    nl_indent = '\n\t'
+    name = f[1]
+    postcomma = '' if i + 1 == len(optype.fields) else ',\\n'
+    precomma = ''
+    if ishex > 0:
+        name += '-hex'
+        ifend = nl_indent + '}'
+        nl_indent += '\t'
+        ifbegin = 'if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {' + nl_indent
+        if postcomma == '':
+            precomma = ',\\n'
+    body = '%s%s(__wt_fprintf(out,' % (
+        printf_setup(f, ishex, nl_indent),
+        'WT_ERR' if has_escape(optype.fields) else 'WT_RET') + \
+        '%s    "%s        \\"%s\\": \\"%s\\"%s",%s));' % (
+        nl_indent, precomma, name, printf_fmt(f), postcomma,
+        printf_arg(f))
+    return ifbegin + body + ifend
 
 #####################################################################
 # Update log.h with #defines for types
@@ -176,7 +203,7 @@ __logrec_json_unpack_str(char *dest, size_t destlen, const char *src,
 }
 
 static int
-__logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
+__logrec_make_json_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
 {
 \tsize_t needed;
 
@@ -185,6 +212,17 @@ __logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
 \t(void)__logrec_json_unpack_str(*destp, needed, item->data, item->size);
 \treturn (0);
 }
+
+static int
+__logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
+{
+\tsize_t needed;
+
+\tneeded = item->size * 2 + 1;
+\tWT_RET(__wt_realloc(session, NULL, needed, destp));
+\t__wt_fill_hex(item->data, item->size, (uint8_t *)*destp, needed, NULL);
+\treturn (0);
+}
 ''')
 
 # Emit code to read, write and print log operations (within a log record)
@@ -255,11 +293,12 @@ __wt_logop_%(name)s_unpack(
     tfile.write('''
 int
 __wt_logop_%(name)s_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 %(arg_ret)s\t%(arg_decls)s
 
-\t%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack(
+\t%(arg_unused)s%(arg_init)sWT_RET(__wt_logop_%(name)s_unpack(
 \t    session, pp, end%(arg_addrs)s));
 
 \tWT_RET(__wt_fprintf(out, " \\"optype\\": \\"%(name)s\\",\\n"));
@@ -272,27 +311,22 @@ __wt_logop_%(name)s_print(
     'arg_decls' : ('\n\t'.join('%s%s%s;' %
         (clocaltype(f), '' if clocaltype(f)[-1] == '*' else ' ', f[1])
         for f in optype.fields)) + escape_decl(optype.fields),
+    'arg_unused' : ('' if has_escape(optype.fields)
+        else 'WT_UNUSED(flags);\n\t'),
     'arg_init' : ('escaped = NULL;\n\t' if has_escape(optype.fields) else ''),
     'arg_fini' : ('\nerr:\t__wt_free(session, escaped);\n\treturn (ret);'
     if has_escape(optype.fields) else '\treturn (0);'),
     'arg_addrs' : ''.join(', &%s' % f[1] for f in optype.fields),
-    'print_args' : '\n\t'.join(
-        '%s%s(__wt_fprintf(out,\n\t    "        \\"%s\\": \\"%s\\",\\n",%s));' %
-        (printf_setup(f),
-        'WT_ERR' if has_escape(optype.fields) else 'WT_RET',
-        f[1], printf_fmt(f), printf_arg(f))
-        for f in optype.fields[:-1]) + str(
-        '\n\t%s%s(__wt_fprintf(out,\n\t    "        \\"%s\\": \\"%s\\"",%s));' %
-        (printf_setup(last_field),
-        'WT_ERR' if has_escape(optype.fields) else 'WT_RET',
-        last_field[1], printf_fmt(last_field), printf_arg(last_field))),
+    'print_args' : '\n\t'.join(printf_line(f, optype, i, s)
+        for i,f in enumerate(optype.fields) for s in range(0, n_setup(f)))
 })
 
 # Emit the printlog entry point
 tfile.write('''
 int
 __wt_txn_op_printlog(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 \tuint32_t optype, opsize;
 
@@ -308,7 +342,8 @@ for optype in log_data.optypes:
 
     tfile.write('''
 \tcase %(macro)s:
-\t\tWT_RET(%(print_func)s(session, pp, end, out));
+\t\tWT_RET(%(print_func)s(session, pp, end, out,
+\t\t    flags));
 \t\tbreak;
 ''' % {
     'macro' : optype.macro_name(),
diff --git a/dist/s_copyright b/dist/s_copyright
index 020be6ae33d..0816274a367 100755
--- a/dist/s_copyright
+++ b/dist/s_copyright
@@ -6,6 +6,7 @@ c1=__wt.copyright.1
 c2=__wt.copyright.2
 c3=__wt.copyright.3
 c4=__wt.copyright.4
+c5=__wt.copyright.5
 
 check()
 {
@@ -34,6 +35,9 @@ check()
 	if `sed -e 1,3p -e 4q -e d $1 | diff - dist/$c4 > /dev/null` ; then
 		return;
 	fi
+	if `sed -e 2,7p -e 8q -e d $1 | diff - dist/$c5 > /dev/null` ; then
+		return;
+	fi
 
 	echo "$1: copyright information is incorrect"
 	exit 1
@@ -81,6 +85,16 @@ cat > $c4 <<ENDOFTEXT
 # This is free and unencumbered software released into the public domain.
 ENDOFTEXT
 
+cat > $c5 <<ENDOFTEXT
+ * Copyright (c) 2014-$year MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ENDOFTEXT
+
 # Search for files, skipping some well-known 3rd party directories.
 (cd .. && find [a-z]* -name '*.[chi]' \
 	-o -name '*.cxx' \
diff --git a/dist/s_funcs b/dist/s_funcs
index 3769ccc4aa7..5fee03b5615 100755
--- a/dist/s_funcs
+++ b/dist/s_funcs
@@ -6,7 +6,7 @@ trap 'rm -f $t; exit 0' 0 1 2 3 13 15
 
 # List of files to search.
 l=`sed -e 's,#.*,,' -e '/^$/d' -e 's,^,../,' filelist`
-l="$l `echo ../src/*/*.i ../src/utilities/*.c`"
+l="$l `echo ../src/*/*.i ../src/utilities/*.c ../bench/wtperf/*.c`"
 
 (
 # Copy out the functions we don't use, but it's OK.
diff --git a/dist/s_longlines b/dist/s_longlines
index 15ca5603385..decedb58f44 100755
--- a/dist/s_longlines
+++ b/dist/s_longlines
@@ -8,10 +8,11 @@ l=`(cd .. &&
     find bench/wtperf examples ext src test -name '*.[chisy]' &&
     find dist -name '*.py' &&
     find src -name '*.in') |
-    sed -e '/include\/extern\.h/d'\
-	-e '/support\/stat\.c/d'`
+    sed -e '/dist\/stat_data\.py/d' \
+        -e '/support\/stat\.c/d' \
+        -e '/include\/extern\.h/d'`
 
 for f in $l ; do
 	expand -t8 < ../$f | awk -- \
-		"{if(length(\$0) > 80) printf(\"%s:%d\\n\", \"$f\", NR)}"
+	    "{if(length(\$0) > 80) printf(\"%s:%d\\n\", \"$f\", NR)}"
 done
diff --git a/dist/s_string.ok b/dist/s_string.ok
index b408888970b..27583402259 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -144,6 +144,7 @@ INIT
 INITIALIZER
 INMEM
 INTL
+ISA
 ITEMs
 Inline
 Intra
@@ -180,6 +181,7 @@ LevelDB
 Levyx
 Llqr
 Llqrt
+LoadLoad
 LockFile
 Lookaside
 Lookup
@@ -187,6 +189,7 @@ MALLOC
 MEM
 MEMALIGN
 MERCHANTABILITY
+MONGODB
 MSVC
 MULTIBLOCK
 MUTEX
@@ -283,10 +286,13 @@ Spinlock
 Spinlocks
 Split's
 Stoica
+StoreLoad
+StoreStore
 TAILQ
 TCMalloc
 TODO
 TORTIOUS
+TSO
 TXN
 TXNC
 Timespec
@@ -301,6 +307,7 @@ ULINE
 URI
 URIs
 UTF
+UltraSparc
 Unbuffered
 UnixLib
 Unmap
@@ -371,6 +378,7 @@ automake
 bInheritHandle
 basecfg
 basho
+bcr
 bdb
 beginthreadex
 bigram
@@ -412,6 +420,7 @@ bzip
 calloc
 cas
 catfmt
+ccr
 cd
 centric
 cfg
@@ -423,6 +432,7 @@ checkpointer
 checkpointing
 checksum
 checksums
+children's
 chk
 chongo
 cip
@@ -711,6 +721,7 @@ lookaside
 lookup
 lookups
 lossy
+lr
 lrt
 lru
 lseek
@@ -719,6 +730,7 @@ lsn
 lsnappy
 lt
 lu
+lwsync
 lz
 lzo
 madvise
@@ -726,6 +738,8 @@ majorp
 malloc
 marshall
 marshalled
+mbll
+mbss
 mem
 memalign
 membar
@@ -802,6 +816,7 @@ os
 ovfl
 ownp
 packv
+pagesize
 parens
 pareto
 parserp
@@ -1022,6 +1037,7 @@ variable's
 vectorized
 versa
 vfprintf
+vm
 vpack
 vprintf
 vrfy
diff --git a/dist/s_win b/dist/s_win
index cdfc71a8a1e..1eb4702d517 100755
--- a/dist/s_win
+++ b/dist/s_win
@@ -62,6 +62,7 @@ win_filelist()
 	    -e 's;os_posix/os_mtx_cond.c;os_win/os_mtx_cond.c;' \
 	    -e 's;os_posix/os_once.c;os_win/os_once.c;' \
 	    -e 's;os_posix/os_open.c;os_win/os_open.c;' \
+	    -e 's;os_posix/os_pagesize.c;os_win/os_pagesize.c;' \
 	    -e 's;os_posix/os_path.c;os_win/os_path.c;' \
 	    -e 's;os_posix/os_priv.c;os_win/os_priv.c;' \
 	    -e 's;os_posix/os_remove.c;os_win/os_remove.c;' \
diff --git a/dist/stat.py b/dist/stat.py
index d62fda3fcb9..6dcfccfeab5 100644
--- a/dist/stat.py
+++ b/dist/stat.py
@@ -171,9 +171,7 @@ __wt_stat_''' + name + '''_aggregate_single(
 {
 ''')
         for l in sorted(list):
-            if 'no_aggregate' in l.flags:
-                o = '\tto->' + l.name + ' = from->' + l.name + ';\n'
-            elif 'max_aggregate' in l.flags:
+            if 'max_aggregate' in l.flags:
                 o = '\tif (from->' + l.name + ' > to->' + l.name + ')\n' +\
                     '\t\tto->' + l.name + ' = from->' + l.name + ';\n'
             else:
@@ -197,12 +195,12 @@ __wt_stat_''' + name + '''_aggregate(
             f.write('\tint64_t v;\n\n')
             break;
     for l in sorted(list):
-        if 'no_aggregate' in l.flags:
-            o = '\tto->' + l.name + ' = from[0]->' + l.name + ';\n'
-        elif 'max_aggregate' in l.flags:
-            o = '\tif ((v = WT_STAT_READ(from, ' + l.name + ')) >\n' +\
-                '\t    to->' + l.name + ')\n' +\
-                '\t\tto->' + l.name + ' = v;\n'
+        if 'max_aggregate' in l.flags:
+            o = '\tif ((v = WT_STAT_READ(from, ' + l.name + ')) > ' +\
+                'to->' + l.name + ')\n'
+            if len(o) > 72:             # Account for the leading tab.
+                o = o.replace(' > ', ' >\n\t    ')
+            o +='\t\tto->' + l.name + ' = v;\n'
         else:
             o = '\tto->' + l.name + ' += WT_STAT_READ(from, ' + l.name + ');\n'
             if len(o) > 72:             # Account for the leading tab.
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 3a23071a3f2..41a93961079 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -8,20 +8,13 @@
 # NOTE: All statistics descriptions must have a prefix string followed by ':'.
 #
 # Data-source statistics are normally aggregated across the set of underlying
-# objects. Additional optionaly configuration flags are available:
-#       no_aggregate    Ignore the value when aggregating statistics
+# objects. Additional optional configuration flags are available:
 #       max_aggregate   Take the maximum value when aggregating statistics
-#
-# Optional configuration flags:
 #       no_clear        Value not cleared when statistics cleared
 #       no_scale        Don't scale value per second in the logging tool script
 #
-# The no_clear flag is a little complicated: it means we don't clear the values
-# when resetting statistics after each run (necessary when the WiredTiger engine
-# is updating values that persist over multiple runs, for example the count of
-# cursors), but it also causes the underlying display routines to not treat the
-# change between displays as relative to the number of seconds, that is, it's an
-# absolute value. The no_clear flag should be set in either case.
+# The no_clear and no_scale flags are normally always set together (values that
+# are maintained over time are normally not scaled per second).
 
 from operator import attrgetter
 import sys
@@ -129,13 +122,11 @@ connection_stats = [
     # Async API statistics
     ##########################################
     AsyncStat('async_alloc_race', 'number of allocation state races'),
-    AsyncStat('async_alloc_view',
-        'number of operation slots viewed for allocation'),
+    AsyncStat('async_alloc_view', 'number of operation slots viewed for allocation'),
     AsyncStat('async_cur_queue', 'current work queue length'),
     AsyncStat('async_flush', 'number of flush calls'),
     AsyncStat('async_full', 'number of times operation allocation failed'),
-    AsyncStat('async_max_queue',
-        'maximum work queue length', 'no_clear,no_scale'),
+    AsyncStat('async_max_queue', 'maximum work queue length', 'no_clear,no_scale'),
     AsyncStat('async_nowork', 'number of times worker found no work'),
     AsyncStat('async_op_alloc', 'total allocations'),
     AsyncStat('async_op_compact', 'total compact calls'),
@@ -158,89 +149,59 @@ connection_stats = [
     ##########################################
     # Cache and eviction statistics
     ##########################################
-    CacheStat('cache_bytes_dirty',
-        'tracked dirty bytes in the cache', 'no_clear,no_scale'),
-    CacheStat('cache_bytes_internal',
-        'tracked bytes belonging to internal pages in the cache',
-        'no_clear,no_scale'),
-    CacheStat('cache_bytes_inuse',
-        'bytes currently in the cache', 'no_clear,no_scale'),
-    CacheStat('cache_bytes_leaf',
-        'tracked bytes belonging to leaf pages in the cache',
-        'no_clear,no_scale'),
-    CacheStat('cache_bytes_max',
-        'maximum bytes configured', 'no_clear,no_scale'),
-    CacheStat('cache_bytes_overflow',
-        'tracked bytes belonging to overflow pages in the cache',
-        'no_clear,no_scale'),
+    CacheStat('cache_bytes_dirty', 'tracked dirty bytes in the cache', 'no_clear,no_scale'),
+    CacheStat('cache_bytes_internal', 'tracked bytes belonging to internal pages in the cache', 'no_clear,no_scale'),
+    CacheStat('cache_bytes_inuse', 'bytes currently in the cache', 'no_clear,no_scale'),
+    CacheStat('cache_bytes_leaf', 'tracked bytes belonging to leaf pages in the cache', 'no_clear,no_scale'),
+    CacheStat('cache_bytes_max', 'maximum bytes configured', 'no_clear,no_scale'),
+    CacheStat('cache_bytes_overflow', 'tracked bytes belonging to overflow pages in the cache', 'no_clear,no_scale'),
     CacheStat('cache_bytes_read', 'bytes read into cache'),
     CacheStat('cache_bytes_write', 'bytes written from cache'),
     CacheStat('cache_eviction_app', 'pages evicted by application threads'),
     CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
     CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
-    CacheStat('cache_eviction_deepen',
-        'page split during eviction deepened the tree'),
+    CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'),
     CacheStat('cache_eviction_dirty', 'modified pages evicted'),
-    CacheStat('cache_eviction_fail',
-        'pages selected for eviction unable to be evicted'),
-    CacheStat('cache_eviction_force',
-        'pages evicted because they exceeded the in-memory maximum'),
-    CacheStat('cache_eviction_force_delete',
-        'pages evicted because they had chains of deleted items'),
-    CacheStat('cache_eviction_force_fail',
-        'failed eviction of pages that exceeded the in-memory maximum'),
+    CacheStat('cache_eviction_fail', 'pages selected for eviction unable to be evicted'),
+    CacheStat('cache_eviction_force', 'pages evicted because they exceeded the in-memory maximum'),
+    CacheStat('cache_eviction_force_delete', 'pages evicted because they had chains of deleted items'),
+    CacheStat('cache_eviction_force_fail', 'failed eviction of pages that exceeded the in-memory maximum'),
     CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
     CacheStat('cache_eviction_internal', 'internal pages evicted'),
-    CacheStat('cache_eviction_maximum_page_size',
-        'maximum page size at eviction', 'no_clear,no_scale'),
-    CacheStat('cache_eviction_queue_empty',
-        'eviction server candidate queue empty when topping up'),
-    CacheStat('cache_eviction_queue_not_empty',
-        'eviction server candidate queue not empty when topping up'),
-    CacheStat('cache_eviction_server_evicting',
-        'eviction server evicting pages'),
-    CacheStat('cache_eviction_server_not_evicting',
-        'eviction server populating queue, but not evicting pages'),
-    CacheStat('cache_eviction_slow',
-        'eviction server unable to reach eviction goal'),
-    CacheStat('cache_eviction_split_internal',
-        'internal pages split during eviction'),
+    CacheStat('cache_eviction_maximum_page_size', 'maximum page size at eviction', 'no_clear,no_scale'),
+    CacheStat('cache_eviction_queue_empty', 'eviction server candidate queue empty when topping up'),
+    CacheStat('cache_eviction_queue_not_empty', 'eviction server candidate queue not empty when topping up'),
+    CacheStat('cache_eviction_server_evicting', 'eviction server evicting pages'),
+    CacheStat('cache_eviction_server_not_evicting', 'eviction server populating queue, but not evicting pages'),
+    CacheStat('cache_eviction_slow', 'eviction server unable to reach eviction goal'),
+    CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'),
     CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'),
     CacheStat('cache_eviction_walk', 'pages walked for eviction'),
-    CacheStat('cache_eviction_worker_evicting',
-        'eviction worker thread evicting pages'),
+    CacheStat('cache_eviction_worker_evicting', 'eviction worker thread evicting pages'),
     CacheStat('cache_inmem_split', 'in-memory page splits'),
-    CacheStat('cache_inmem_splittable',
-        'in-memory page passed criteria to be split'),
+    CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
     CacheStat('cache_lookaside_insert', 'lookaside table insert calls'),
     CacheStat('cache_lookaside_remove', 'lookaside table remove calls'),
     CacheStat('cache_overhead', 'percentage overhead', 'no_clear,no_scale'),
-    CacheStat('cache_pages_dirty',
-        'tracked dirty pages in the cache', 'no_clear,no_scale'),
-    CacheStat('cache_pages_inuse',
-        'pages currently held in the cache', 'no_clear,no_scale'),
+    CacheStat('cache_pages_dirty', 'tracked dirty pages in the cache', 'no_clear,no_scale'),
+    CacheStat('cache_pages_inuse', 'pages currently held in the cache', 'no_clear,no_scale'),
     CacheStat('cache_read', 'pages read into cache'),
-    CacheStat('cache_read_lookaside',
-        'pages read into cache requiring lookaside entries'),
+    CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
     CacheStat('cache_write', 'pages written from cache'),
-    CacheStat('cache_write_lookaside',
-        'page written requiring lookaside records'),
-    CacheStat('cache_write_restore',
-        'pages written requiring in-memory restoration'),
+    CacheStat('cache_write_lookaside', 'page written requiring lookaside records'),
+    CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
 
     ##########################################
     # Dhandle statistics
     ##########################################
-    DhandleStat('dh_conn_handle_count',
-        'connection data handles currently active', 'no_clear,no_scale'),
+    DhandleStat('dh_conn_handle_count', 'connection data handles currently active', 'no_clear,no_scale'),
+    DhandleStat('dh_session_handles', 'session dhandles swept'),
+    DhandleStat('dh_session_sweeps', 'session sweep attempts'),
     DhandleStat('dh_sweep_close', 'connection sweep dhandles closed'),
-    DhandleStat('dh_sweep_remove',
-        'connection sweep dhandles removed from hash list'),
     DhandleStat('dh_sweep_ref', 'connection sweep candidate became referenced'),
+    DhandleStat('dh_sweep_remove', 'connection sweep dhandles removed from hash list'),
     DhandleStat('dh_sweep_tod', 'connection sweep time-of-death sets'),
     DhandleStat('dh_sweeps', 'connection sweeps'),
-    DhandleStat('dh_session_handles', 'session dhandles swept'),
-    DhandleStat('dh_session_sweeps', 'session sweep attempts'),
 
     ##########################################
     # Logging statistics
@@ -257,10 +218,8 @@ connection_stats = [
     LogStat('log_flush', 'log flush operations'),
     LogStat('log_max_filesize', 'maximum log file size', 'no_clear,no_scale'),
     LogStat('log_prealloc_files', 'pre-allocated log files prepared'),
-    LogStat('log_prealloc_max',
-        'number of pre-allocated log files to create', 'no_clear,no_scale'),
-    LogStat('log_prealloc_missed',
-        'pre-allocated log files not ready and missed'),
+    LogStat('log_prealloc_max', 'number of pre-allocated log files to create', 'no_clear,no_scale'),
+    LogStat('log_prealloc_missed', 'pre-allocated log files not ready and missed'),
     LogStat('log_prealloc_used', 'pre-allocated log files used'),
     LogStat('log_release_write_lsn', 'log release advances write LSN'),
     LogStat('log_scan_records', 'records processed by log scan'),
@@ -283,46 +242,32 @@ connection_stats = [
     ##########################################
     # Reconciliation statistics
     ##########################################
-    RecStat('rec_pages', 'page reconciliation calls'),
     RecStat('rec_page_delete', 'pages deleted'),
     RecStat('rec_page_delete_fast', 'fast-path pages deleted'),
+    RecStat('rec_pages', 'page reconciliation calls'),
     RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
-    RecStat('rec_split_stashed_bytes',
-        'split bytes currently awaiting free', 'no_clear,no_scale'),
-    RecStat('rec_split_stashed_objects',
-        'split objects currently awaiting free', 'no_clear,no_scale'),
+    RecStat('rec_split_stashed_bytes', 'split bytes currently awaiting free', 'no_clear,no_scale'),
+    RecStat('rec_split_stashed_objects', 'split objects currently awaiting free', 'no_clear,no_scale'),
 
     ##########################################
     # Transaction statistics
     ##########################################
     TxnStat('txn_begin', 'transaction begins'),
     TxnStat('txn_checkpoint', 'transaction checkpoints'),
-    TxnStat('txn_checkpoint_generation',
-        'transaction checkpoint generation', 'no_clear,no_scale'),
-    TxnStat('txn_checkpoint_running',
-        'transaction checkpoint currently running', 'no_clear,no_scale'),
-    TxnStat('txn_checkpoint_time_max',
-        'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
-    TxnStat('txn_checkpoint_time_min',
-        'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
-    TxnStat('txn_checkpoint_time_recent',
-        'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
-    TxnStat('txn_checkpoint_time_total',
-        'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_generation', 'transaction checkpoint generation', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_running', 'transaction checkpoint currently running', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_time_max', 'transaction checkpoint max time (msecs)', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_time_min', 'transaction checkpoint min time (msecs)', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_time_recent', 'transaction checkpoint most recent time (msecs)', 'no_clear,no_scale'),
+    TxnStat('txn_checkpoint_time_total', 'transaction checkpoint total time (msecs)', 'no_clear,no_scale'),
     TxnStat('txn_commit', 'transactions committed'),
-    TxnStat('txn_fail_cache',
-        'transaction failures due to cache overflow'),
-    TxnStat('txn_pinned_checkpoint_range',
-        'transaction range of IDs currently pinned by a checkpoint',
-            'no_clear,no_scale'),
-    TxnStat('txn_pinned_range',
-        'transaction range of IDs currently pinned', 'no_clear,no_scale'),
-    TxnStat('txn_pinned_snapshot_range',
-        'transaction range of IDs currently pinned by named snapshots',
-            'no_clear,no_scale'),
+    TxnStat('txn_fail_cache', 'transaction failures due to cache overflow'),
+    TxnStat('txn_pinned_checkpoint_range', 'transaction range of IDs currently pinned by a checkpoint', 'no_clear,no_scale'),
+    TxnStat('txn_pinned_range', 'transaction range of IDs currently pinned', 'no_clear,no_scale'),
+    TxnStat('txn_pinned_snapshot_range', 'transaction range of IDs currently pinned by named snapshots', 'no_clear,no_scale'),
+    TxnStat('txn_rollback', 'transactions rolled back'),
     TxnStat('txn_snapshots_created', 'number of named snapshots created'),
     TxnStat('txn_snapshots_dropped', 'number of named snapshots dropped'),
-    TxnStat('txn_rollback', 'transactions rolled back'),
     TxnStat('txn_sync', 'transaction sync calls'),
 
     ##########################################
@@ -331,23 +276,18 @@ connection_stats = [
     LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
     LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
     LSMStat('lsm_rows_merged', 'rows merged in an LSM tree'),
-    LSMStat('lsm_work_queue_app',
-        'application work units currently queued', 'no_clear,no_scale'),
-    LSMStat('lsm_work_queue_manager',
-        'merge work units currently queued', 'no_clear,no_scale'),
+    LSMStat('lsm_work_queue_app', 'application work units currently queued', 'no_clear,no_scale'),
+    LSMStat('lsm_work_queue_manager', 'merge work units currently queued', 'no_clear,no_scale'),
     LSMStat('lsm_work_queue_max', 'tree queue hit maximum'),
-    LSMStat('lsm_work_queue_switch',
-        'switch work units currently queued', 'no_clear,no_scale'),
+    LSMStat('lsm_work_queue_switch', 'switch work units currently queued', 'no_clear,no_scale'),
     LSMStat('lsm_work_units_created', 'tree maintenance operations scheduled'),
-    LSMStat('lsm_work_units_discarded',
-        'tree maintenance operations discarded'),
+    LSMStat('lsm_work_units_discarded', 'tree maintenance operations discarded'),
     LSMStat('lsm_work_units_done', 'tree maintenance operations executed'),
 
     ##########################################
     # Session operations
     ##########################################
-    SessionStat('session_cursor_open',
-        'open cursor count', 'no_clear,no_scale'),
+    SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
     SessionStat('session_open', 'open session count', 'no_clear,no_scale'),
 
     ##########################################
@@ -385,8 +325,7 @@ dsrc_stats = [
     # Session operations
     ##########################################
     SessionStat('session_compact', 'object compaction'),
-    SessionStat('session_cursor_open',
-        'open cursor count', 'no_clear,no_scale'),
+    SessionStat('session_cursor_open', 'open cursor count', 'no_clear,no_scale'),
 
     ##########################################
     # Cursor operations
@@ -394,8 +333,7 @@ dsrc_stats = [
     CursorStat('cursor_create', 'create calls'),
     CursorStat('cursor_insert', 'insert calls'),
     CursorStat('cursor_insert_bulk', 'bulk-loaded cursor-insert calls'),
-    CursorStat('cursor_insert_bytes',
-        'cursor-insert key and value bytes inserted'),
+    CursorStat('cursor_insert_bytes', 'cursor-insert key and value bytes inserted'),
     CursorStat('cursor_next', 'next calls'),
     CursorStat('cursor_prev', 'prev calls'),
     CursorStat('cursor_remove', 'remove calls'),
@@ -411,33 +349,21 @@ dsrc_stats = [
     ##########################################
     # Btree statistics
     ##########################################
-    BtreeStat('btree_checkpoint_generation',
-        'btree checkpoint generation', 'no_clear,no_scale'),
-    BtreeStat('btree_column_deleted',
-        'column-store variable-size deleted values', 'no_scale'),
-    BtreeStat('btree_column_fix',
-        'column-store fixed-size leaf pages', 'no_scale'),
-    BtreeStat('btree_column_internal',
-        'column-store internal pages', 'no_scale'),
-    BtreeStat('btree_column_rle',
-        'column-store variable-size RLE encoded values', 'no_scale'),
-    BtreeStat('btree_column_variable',
-        'column-store variable-size leaf pages', 'no_scale'),
+    BtreeStat('btree_checkpoint_generation', 'btree checkpoint generation', 'no_clear,no_scale'),
+    BtreeStat('btree_column_deleted', 'column-store variable-size deleted values', 'no_scale'),
+    BtreeStat('btree_column_fix', 'column-store fixed-size leaf pages', 'no_scale'),
+    BtreeStat('btree_column_internal', 'column-store internal pages', 'no_scale'),
+    BtreeStat('btree_column_rle', 'column-store variable-size RLE encoded values', 'no_scale'),
+    BtreeStat('btree_column_variable', 'column-store variable-size leaf pages', 'no_scale'),
     BtreeStat('btree_compact_rewrite', 'pages rewritten by compaction'),
     BtreeStat('btree_entries', 'number of key/value pairs', 'no_scale'),
-    BtreeStat('btree_fixed_len', 'fixed-record size', 'no_aggregate,no_scale'),
-    BtreeStat('btree_maximum_depth',
-        'maximum tree depth', 'max_aggregate,no_scale'),
-    BtreeStat('btree_maxintlkey',
-        'maximum internal page key size', 'max_aggregate,no_scale'),
-    BtreeStat('btree_maxintlpage',
-        'maximum internal page size', 'max_aggregate,no_scale'),
-    BtreeStat('btree_maxleafkey',
-        'maximum leaf page key size', 'max_aggregate,no_scale'),
-    BtreeStat('btree_maxleafpage',
-        'maximum leaf page size', 'max_aggregate,no_scale'),
-    BtreeStat('btree_maxleafvalue',
-        'maximum leaf page value size', 'max_aggregate,no_scale'),
+    BtreeStat('btree_fixed_len', 'fixed-record size', 'max_aggregate,no_scale'),
+    BtreeStat('btree_maximum_depth', 'maximum tree depth', 'max_aggregate,no_scale'),
+    BtreeStat('btree_maxintlkey', 'maximum internal page key size', 'max_aggregate,no_scale'),
+    BtreeStat('btree_maxintlpage', 'maximum internal page size', 'max_aggregate,no_scale'),
+    BtreeStat('btree_maxleafkey', 'maximum leaf page key size', 'max_aggregate,no_scale'),
+    BtreeStat('btree_maxleafpage', 'maximum leaf page size', 'max_aggregate,no_scale'),
+    BtreeStat('btree_maxleafvalue', 'maximum leaf page value size', 'max_aggregate,no_scale'),
     BtreeStat('btree_overflow', 'overflow pages', 'no_scale'),
     BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale'),
     BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale'),
@@ -454,26 +380,21 @@ dsrc_stats = [
     LSMStat('bloom_size', 'total size of bloom filters', 'no_scale'),
     LSMStat('lsm_checkpoint_throttle', 'sleep for LSM checkpoint throttle'),
     LSMStat('lsm_chunk_count', 'chunks in the LSM tree', 'no_scale'),
-    LSMStat('lsm_generation_max',
-        'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
-    LSMStat('lsm_lookup_no_bloom',
-        'queries that could have benefited ' +
-        'from a Bloom filter that did not exist'),
+    LSMStat('lsm_generation_max', 'highest merge generation in the LSM tree', 'max_aggregate,no_scale'),
+    LSMStat('lsm_lookup_no_bloom', 'queries that could have benefited from a Bloom filter that did not exist'),
     LSMStat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
 
     ##########################################
     # Block manager statistics
     ##########################################
-    BlockStat('allocation_size',
-        'file allocation unit size', 'no_aggregate,no_scale'),
+    BlockStat('allocation_size', 'file allocation unit size', 'max_aggregate,no_scale'),
     BlockStat('block_alloc', 'blocks allocated'),
     BlockStat('block_checkpoint_size', 'checkpoint size', 'no_scale'),
     BlockStat('block_extension', 'allocations requiring file extension'),
     BlockStat('block_free', 'blocks freed'),
-    BlockStat('block_magic', 'file magic number', 'no_aggregate,no_scale'),
-    BlockStat('block_major',
-        'file major version number', 'no_aggregate,no_scale'),
-    BlockStat('block_minor', 'minor version number', 'no_aggregate,no_scale'),
+    BlockStat('block_magic', 'file magic number', 'max_aggregate,no_scale'),
+    BlockStat('block_major', 'file major version number', 'max_aggregate,no_scale'),
+    BlockStat('block_minor', 'minor version number', 'max_aggregate,no_scale'),
     BlockStat('block_reuse_bytes', 'file bytes available for reuse'),
     BlockStat('block_size', 'file size in bytes', 'no_scale'),
 
@@ -484,44 +405,33 @@ dsrc_stats = [
     CacheStat('cache_bytes_write', 'bytes written from cache'),
     CacheStat('cache_eviction_checkpoint', 'checkpoint blocked page eviction'),
     CacheStat('cache_eviction_clean', 'unmodified pages evicted'),
-    CacheStat('cache_eviction_deepen',
-        'page split during eviction deepened the tree'),
+    CacheStat('cache_eviction_deepen', 'page split during eviction deepened the tree'),
     CacheStat('cache_eviction_dirty', 'modified pages evicted'),
-    CacheStat('cache_eviction_fail',
-        'data source pages selected for eviction unable to be evicted'),
+    CacheStat('cache_eviction_fail', 'data source pages selected for eviction unable to be evicted'),
     CacheStat('cache_eviction_hazard', 'hazard pointer blocked page eviction'),
     CacheStat('cache_eviction_internal', 'internal pages evicted'),
-    CacheStat('cache_eviction_split_internal',
-        'internal pages split during eviction'),
+    CacheStat('cache_eviction_split_internal', 'internal pages split during eviction'),
     CacheStat('cache_eviction_split_leaf', 'leaf pages split during eviction'),
     CacheStat('cache_inmem_split', 'in-memory page splits'),
-    CacheStat('cache_inmem_splittable',
-        'in-memory page passed criteria to be split'),
-    CacheStat('cache_overflow_value',
-        'overflow values cached in memory', 'no_scale'),
+    CacheStat('cache_inmem_splittable', 'in-memory page passed criteria to be split'),
+    CacheStat('cache_overflow_value', 'overflow values cached in memory', 'no_scale'),
     CacheStat('cache_read', 'pages read into cache'),
-    CacheStat('cache_read_lookaside',
-        'pages read into cache requiring lookaside entries'),
+    CacheStat('cache_read_lookaside', 'pages read into cache requiring lookaside entries'),
     CacheStat('cache_read_overflow', 'overflow pages read into cache'),
     CacheStat('cache_write', 'pages written from cache'),
-    CacheStat('cache_write_lookaside',
-        'page written requiring lookaside records'),
-    CacheStat('cache_write_restore',
-        'pages written requiring in-memory restoration'),
+    CacheStat('cache_write_lookaside', 'page written requiring lookaside records'),
+    CacheStat('cache_write_restore', 'pages written requiring in-memory restoration'),
 
     ##########################################
     # Compression statistics
     ##########################################
-    CompressStat('compress_raw_fail',
-        'raw compression call failed, no additional data available'),
-    CompressStat('compress_raw_fail_temporary',
-        'raw compression call failed, additional data available'),
+    CompressStat('compress_raw_fail', 'raw compression call failed, no additional data available'),
+    CompressStat('compress_raw_fail_temporary', 'raw compression call failed, additional data available'),
     CompressStat('compress_raw_ok', 'raw compression call succeeded'),
     CompressStat('compress_read', 'compressed pages read'),
     CompressStat('compress_write', 'compressed pages written'),
     CompressStat('compress_write_fail', 'page written failed to compress'),
-    CompressStat('compress_write_too_small',
-        'page written was too small to compress'),
+    CompressStat('compress_write_too_small', 'page written was too small to compress'),
 
     ##########################################
     # Reconciliation statistics
@@ -529,8 +439,7 @@ dsrc_stats = [
     RecStat('rec_dictionary', 'dictionary matches'),
     RecStat('rec_multiblock_internal', 'internal page multi-block writes'),
     RecStat('rec_multiblock_leaf', 'leaf page multi-block writes'),
-    RecStat('rec_multiblock_max',
-        'maximum blocks required for a page', 'max_aggregate,no_scale'),
+    RecStat('rec_multiblock_max', 'maximum blocks required for a page', 'max_aggregate,no_scale'),
     RecStat('rec_overflow_key_internal', 'internal-page overflow keys'),
     RecStat('rec_overflow_key_leaf', 'leaf-page overflow keys'),
     RecStat('rec_overflow_value', 'overflow values written'),
@@ -539,10 +448,8 @@ dsrc_stats = [
     RecStat('rec_page_match', 'page checksum matches'),
     RecStat('rec_pages', 'page reconciliation calls'),
     RecStat('rec_pages_eviction', 'page reconciliation calls for eviction'),
-    RecStat('rec_prefix_compression',
-        'leaf page key bytes discarded using prefix compression'),
-    RecStat('rec_suffix_compression',
-        'internal page key bytes discarded using suffix compression'),
+    RecStat('rec_prefix_compression', 'leaf page key bytes discarded using prefix compression'),
+    RecStat('rec_suffix_compression', 'internal page key bytes discarded using suffix compression'),
 
     ##########################################
     # Transaction statistics
diff --git a/src/block/block_compact.c b/src/block/block_compact.c
index d45d0a96da7..cd304b848d4 100644
--- a/src/block/block_compact.c
+++ b/src/block/block_compact.c
@@ -8,7 +8,7 @@
 
 #include "wt_internal.h"
 
-static int __block_dump_avail(WT_SESSION_IMPL *, WT_BLOCK *);
+static int __block_dump_avail(WT_SESSION_IMPL *, WT_BLOCK *, bool);
 
 /*
  * __wt_block_compact_start --
@@ -22,8 +22,6 @@ __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
 	/* Switch to first-fit allocation. */
 	__wt_block_configure_first_fit(block, true);
 
-	block->compact_pct_tenths = 0;
-
 	return (0);
 }
 
@@ -34,14 +32,21 @@ __wt_block_compact_start(WT_SESSION_IMPL *session, WT_BLOCK *block)
 int
 __wt_block_compact_end(WT_SESSION_IMPL *session, WT_BLOCK *block)
 {
+	WT_DECL_RET;
+
 	WT_UNUSED(session);
 
 	/* Restore the original allocation plan. */
 	__wt_block_configure_first_fit(block, false);
 
-	block->compact_pct_tenths = 0;
+	/* Dump the results of the compaction pass. */
+	if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) {
+		__wt_spin_lock(session, &block->live_lock);
+		ret = __block_dump_avail(session, block, false);
+		__wt_spin_unlock(session, &block->live_lock);
+	}
 
-	return (0);
+	return (ret);
 }
 
 /*
@@ -70,12 +75,23 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
 	if (fh->size <= WT_MEGABYTE)
 		return (0);
 
+	/*
+	 * Reset the compaction state information. This is done here, not in the
+	 * compaction "start" routine, because this function is called first to
+	 * determine if compaction is useful.
+	 */
+	block->compact_pct_tenths = 0;
+	block->compact_pages_reviewed = 0;
+	block->compact_pages_skipped = 0;
+	block->compact_pages_written = 0;
+
 	__wt_spin_lock(session, &block->live_lock);
 
+	/* Dump the current state of the file. */
 	if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT))
-		WT_ERR(__block_dump_avail(session, block));
+		WT_ERR(__block_dump_avail(session, block, true));
 
-	/* Sum the available bytes in the first 80% and 90% of the file. */
+	/* Sum the available bytes in the initial 80% and 90% of the file. */
 	avail_eighty = avail_ninety = 0;
 	ninety = fh->size - fh->size / 10;
 	eighty = fh->size - ((fh->size / 10) * 2);
@@ -88,23 +104,6 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
 				avail_eighty += ext->size;
 		}
 
-	WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
-	    "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
-	    "80%% of the file",
-	    block->name,
-	    (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty));
-	WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
-	    "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
-	    "90%% of the file",
-	    block->name,
-	    (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety));
-	WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
-	    "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first "
-	    "90%% of the file to perform compaction, compaction %s",
-	    block->name,
-	    (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10,
-	    *skipp ? "skipped" : "proceeding"));
-
 	/*
 	 * Skip files where we can't recover at least 1MB.
 	 *
@@ -127,6 +126,23 @@ __wt_block_compact_skip(WT_SESSION_IMPL *session, WT_BLOCK *block, bool *skipp)
 		block->compact_pct_tenths = 1;
 	}
 
+	WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
+	    "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
+	    "80%% of the file",
+	    block->name,
+	    (uintmax_t)avail_eighty / WT_MEGABYTE, (uintmax_t)avail_eighty));
+	WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
+	    "%s: %" PRIuMAX "MB (%" PRIuMAX ") available space in the first "
+	    "90%% of the file",
+	    block->name,
+	    (uintmax_t)avail_ninety / WT_MEGABYTE, (uintmax_t)avail_ninety));
+	WT_ERR(__wt_verbose(session, WT_VERB_COMPACT,
+	    "%s: require 10%% or %" PRIuMAX "MB (%" PRIuMAX ") in the first "
+	    "90%% of the file to perform compaction, compaction %s",
+	    block->name,
+	    (uintmax_t)(fh->size / 10) / WT_MEGABYTE, (uintmax_t)fh->size / 10,
+	    *skipp ? "skipped" : "proceeding"));
+
 err:	__wt_spin_unlock(session, &block->live_lock);
 
 	return (ret);
@@ -177,6 +193,14 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
 	}
 	__wt_spin_unlock(session, &block->live_lock);
 
+	if (WT_VERBOSE_ISSET(session, WT_VERB_COMPACT)) {
+		++block->compact_pages_reviewed;
+		if (*skipp)
+			++block->compact_pages_skipped;
+		else
+			++block->compact_pages_written;
+	}
+
 	return (ret);
 }
 
@@ -185,7 +209,7 @@ __wt_block_compact_page_skip(WT_SESSION_IMPL *session,
  *	Dump out the avail list so we can see what compaction will look like.
  */
 static int
-__block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block)
+__block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block, bool start)
 {
 	WT_EXTLIST *el;
 	WT_EXT *ext;
@@ -196,6 +220,20 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block)
 	size = block->fh->size;
 
 	WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
+	    "============ %s",
+	    start ? "testing for compaction" : "ending compaction pass"));
+
+	if (!start) {
+		WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
+		    "pages reviewed: %" PRIuMAX,
+		    block->compact_pages_reviewed));
+		WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
+		    "pages skipped: %" PRIuMAX, block->compact_pages_skipped));
+		WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
+		    "pages written: %" PRIuMAX, block->compact_pages_written));
+	}
+
+	WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
 	    "file size %" PRIuMAX "MB (%" PRIuMAX ") with %" PRIuMAX
 	    "%% space available %" PRIuMAX "MB (%" PRIuMAX ")",
 	    (uintmax_t)size / WT_MEGABYTE, (uintmax_t)size,
@@ -219,6 +257,10 @@ __block_dump_avail(WT_SESSION_IMPL *session, WT_BLOCK *block)
 		}
 
 #ifdef __VERBOSE_OUTPUT_PERCENTILE
+	/*
+	 * The verbose output always displays 10% buckets, running this code
+	 * as well also displays 1% buckets.
+	 */
 	for (i = 0; i < WT_ELEMENTS(percentile); ++i) {
 		v = percentile[i] * 512;
 		WT_RET(__wt_verbose(session, WT_VERB_COMPACT,
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 7260cab75d9..f9f66e05d7f 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -221,6 +221,18 @@ __bm_free(WT_BM *bm,
 }
 
 /*
+ * __bm_is_mapped --
+ *	Return if the file is mapped into memory.
+ */
+static bool
+__bm_is_mapped(WT_BM *bm, WT_SESSION_IMPL *session)
+{
+	WT_UNUSED(session);
+
+	return (bm->map == NULL ? false : true);
+}
+
+/*
  * __bm_stat --
  *	Block-manager statistics.
  */
@@ -357,6 +369,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
 		    (int (*)(WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
 		bm->free = (int (*)(WT_BM *,
 		    WT_SESSION_IMPL *, const uint8_t *, size_t))__bm_readonly;
+		bm->is_mapped = __bm_is_mapped;
 		bm->preload = __wt_bm_preload;
 		bm->read = __wt_bm_read;
 		bm->salvage_end = (int (*)
@@ -367,6 +380,7 @@ __bm_method_set(WT_BM *bm, bool readonly)
 		    (WT_BM *, WT_SESSION_IMPL *))__bm_readonly;
 		bm->salvage_valid = (int (*)(WT_BM *,
 		    WT_SESSION_IMPL *, uint8_t *, size_t, bool))__bm_readonly;
+		bm->size = __wt_block_manager_size;
 		bm->stat = __bm_stat;
 		bm->sync =
 		    (int (*)(WT_BM *, WT_SESSION_IMPL *, bool))__bm_readonly;
@@ -391,12 +405,14 @@ __bm_method_set(WT_BM *bm, bool readonly)
 		bm->compact_skip = __bm_compact_skip;
 		bm->compact_start = __bm_compact_start;
 		bm->free = __bm_free;
+		bm->is_mapped = __bm_is_mapped;
 		bm->preload = __wt_bm_preload;
 		bm->read = __wt_bm_read;
 		bm->salvage_end = __bm_salvage_end;
 		bm->salvage_next = __bm_salvage_next;
 		bm->salvage_start = __bm_salvage_start;
 		bm->salvage_valid = __bm_salvage_valid;
+		bm->size = __wt_block_manager_size;
 		bm->stat = __bm_stat;
 		bm->sync = __bm_sync;
 		bm->verify_addr = __bm_verify_addr;
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 7cf12d36066..ff70b765d1f 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -405,27 +405,37 @@ __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats)
 	 * Reading from the live system's structure normally requires locking,
 	 * but it's an 8B statistics read, there's no need.
 	 */
-	stats->allocation_size = block->allocsize;
-	stats->block_checkpoint_size = (int64_t)block->live.ckpt_size;
-	stats->block_magic = WT_BLOCK_MAGIC;
-	stats->block_major = WT_BLOCK_MAJOR_VERSION;
-	stats->block_minor = WT_BLOCK_MINOR_VERSION;
-	stats->block_reuse_bytes = (int64_t)block->live.avail.bytes;
-	stats->block_size = block->fh->size;
+	WT_STAT_WRITE(stats, allocation_size, block->allocsize);
+	WT_STAT_WRITE(
+	    stats, block_checkpoint_size, (int64_t)block->live.ckpt_size);
+	WT_STAT_WRITE(stats, block_magic, WT_BLOCK_MAGIC);
+	WT_STAT_WRITE(stats, block_major, WT_BLOCK_MAJOR_VERSION);
+	WT_STAT_WRITE(stats, block_minor, WT_BLOCK_MINOR_VERSION);
+	WT_STAT_WRITE(
+	    stats, block_reuse_bytes, (int64_t)block->live.avail.bytes);
+	WT_STAT_WRITE(stats, block_size, block->fh->size);
 }
 
 /*
  * __wt_block_manager_size --
- *	Set the size statistic for a file.
+ *	Return the size of a live block handle.
  */
 int
-__wt_block_manager_size(
-    WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats)
+__wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep)
 {
-	wt_off_t filesize;
-
-	WT_RET(__wt_filesize_name(session, filename, false, &filesize));
-	stats->block_size = filesize;
+	WT_UNUSED(session);
 
+	*sizep = bm->block->fh == NULL ? 0 : bm->block->fh->size;
 	return (0);
 }
+
+/*
+ * __wt_block_manager_named_size --
+ *	Return the size of a named file.
+ */
+int
+__wt_block_manager_named_size(
+    WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep)
+{
+	return (__wt_filesize_name(session, name, false, sizep));
+}
diff --git a/src/btree/bt_compact.c b/src/btree/bt_compact.c
index b2c9e4b67f8..8935d39b696 100644
--- a/src/btree/bt_compact.c
+++ b/src/btree/bt_compact.c
@@ -17,9 +17,11 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
 {
 	WT_BM *bm;
 	WT_DECL_RET;
+	WT_MULTI *multi;
 	WT_PAGE *page;
 	WT_PAGE_MODIFY *mod;
 	size_t addr_size;
+	uint32_t i;
 	const uint8_t *addr;
 
 	*skipp = true;					/* Default skip. */
@@ -41,29 +43,46 @@ __compact_rewrite(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
 
 	/*
 	 * If the page is clean, test the original addresses.
-	 * If the page is a 1-to-1 replacement, test the replacement addresses.
+	 * If the page is a replacement, test the replacement addresses.
 	 * Ignore empty pages, they get merged into the parent.
 	 */
 	if (mod == NULL || mod->rec_result == 0) {
-		WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
+		__wt_ref_info(ref, &addr, &addr_size, NULL);
 		if (addr == NULL)
 			return (0);
-		WT_RET(
+		return (
 		    bm->compact_page_skip(bm, session, addr, addr_size, skipp));
-	} else if (mod->rec_result == WT_PM_REC_REPLACE) {
-		/*
-		 * The page's modification information can change underfoot if
-		 * the page is being reconciled, serialize with reconciliation.
-		 */
+	}
+
+	/*
+	 * The page's modification information can change underfoot if the page
+	 * is being reconciled, serialize with reconciliation.
+	 */
+	if (mod->rec_result == WT_PM_REC_REPLACE ||
+	    mod->rec_result == WT_PM_REC_MULTIBLOCK)
 		WT_RET(__wt_fair_lock(session, &page->page_lock));
 
+	if (mod->rec_result == WT_PM_REC_REPLACE)
 		ret = bm->compact_page_skip(bm, session,
 		    mod->mod_replace.addr, mod->mod_replace.size, skipp);
 
+	if (mod->rec_result == WT_PM_REC_MULTIBLOCK)
+		for (multi = mod->mod_multi,
+		    i = 0; i < mod->mod_multi_entries; ++multi, ++i) {
+			if (multi->disk_image != NULL)
+				continue;
+			if ((ret = bm->compact_page_skip(bm, session,
+			    multi->addr.addr, multi->addr.size, skipp)) != 0)
+				break;
+			if (!*skipp)
+				break;
+		}
+
+	if (mod->rec_result == WT_PM_REC_REPLACE ||
+	    mod->rec_result == WT_PM_REC_MULTIBLOCK)
 		WT_TRET(__wt_fair_unlock(session, &page->page_lock));
-		WT_RET(ret);
-	}
-	return (0);
+
+	return (ret);
 }
 
 /*
@@ -130,7 +149,7 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
 		 * read, set its generation to a low value so it is evicted
 		 * quickly.
 		 */
-		WT_ERR(__wt_tree_walk(session, &ref, NULL,
+		WT_ERR(__wt_tree_walk(session, &ref,
 		    WT_READ_COMPACT | WT_READ_NO_GEN | WT_READ_WONT_NEED));
 		if (ref == NULL)
 			break;
@@ -139,7 +158,8 @@ __wt_compact(WT_SESSION_IMPL *session, const char *cfg[])
 		if (skip)
 			continue;
 
-		session->compaction = true;
+		session->compact_state = WT_COMPACT_SUCCESS;
+
 		/* Rewrite the page: mark the page and tree dirty. */
 		WT_ERR(__wt_page_modify_init(session, ref->page));
 		__wt_page_modify_set(session, ref->page);
@@ -182,7 +202,7 @@ __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp)
 	 * address, the page isn't on disk, but we have to read internal pages
 	 * to walk the tree regardless; throw up our hands and read it.
 	 */
-	WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, &type));
+	__wt_ref_info(ref, &addr, &addr_size, &type);
 	if (addr == NULL)
 		return (0);
 
diff --git a/src/btree/bt_curnext.c b/src/btree/bt_curnext.c
index 3c96bad39d7..6573bc60165 100644
--- a/src/btree/bt_curnext.c
+++ b/src/btree/bt_curnext.c
@@ -389,6 +389,14 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
 	 */
 	cbt->page_deleted_count = 0;
 
+#ifdef HAVE_DIAGNOSTIC
+	/*
+	 * If starting a new iteration, clear the last-key returned, it doesn't
+	 * apply.
+	 */
+	cbt->lastkey->size = 0;
+	cbt->lastrecno = WT_RECNO_OOB;
+#endif
 	/*
 	 * If we don't have a search page, then we're done, we're starting at
 	 * the beginning or end of the tree, not as a result of a search.
@@ -430,6 +438,104 @@ __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt)
 	}
 }
 
+#ifdef HAVE_DIAGNOSTIC
+/*
+ * __cursor_key_order_check_col --
+ *	Check key ordering for column-store cursor movements.
+ */
+static int
+__cursor_key_order_check_col(
+    WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+{
+	int cmp;
+
+	cmp = 0;			/* -Werror=maybe-uninitialized */
+
+	if (cbt->lastrecno != WT_RECNO_OOB) {
+		if (cbt->lastrecno < cbt->recno)
+			cmp = -1;
+		if (cbt->lastrecno > cbt->recno)
+			cmp = 1;
+	}
+
+	if (cbt->lastrecno == WT_RECNO_OOB ||
+	    (next && cmp < 0) || (!next && cmp > 0)) {
+		cbt->lastrecno = cbt->recno;
+		return (0);
+	}
+
+	WT_PANIC_RET(session, EINVAL,
+	    "WT_CURSOR.%s out-of-order returns: returned key %" PRIu64 " then "
+	    "key %" PRIu64,
+	    next ? "next" : "prev", cbt->lastrecno, cbt->recno);
+}
+
+/*
+ * __cursor_key_order_check_row --
+ *	Check key ordering for row-store cursor movements.
+ */
+static int
+__cursor_key_order_check_row(
+    WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+{
+	WT_BTREE *btree;
+	WT_ITEM *key;
+	WT_DECL_RET;
+	WT_DECL_ITEM(a);
+	WT_DECL_ITEM(b);
+	int cmp;
+
+	btree = S2BT(session);
+	key = &cbt->iface.key;
+	cmp = 0;			/* -Werror=maybe-uninitialized */
+
+	if (cbt->lastkey->size != 0)
+		WT_RET(__wt_compare(
+		    session, btree->collator, cbt->lastkey, key, &cmp));
+
+	if (cbt->lastkey->size == 0 || (next && cmp < 0) || (!next && cmp > 0))
+		return (__wt_buf_set(session, cbt->lastkey,
+		    cbt->iface.key.data, cbt->iface.key.size));
+
+	WT_ERR(__wt_scr_alloc(session, 512, &a));
+	WT_ERR(__wt_buf_set_printable(
+	    session, a, cbt->lastkey->data, cbt->lastkey->size));
+
+	WT_ERR(__wt_scr_alloc(session, 512, &b));
+	WT_ERR(__wt_buf_set_printable(session, b, key->data, key->size));
+
+	WT_PANIC_ERR(session, EINVAL,
+	    "WT_CURSOR.%s out-of-order returns: returned key %.*s then "
+	    "key %.*s",
+	    next ? "next" : "prev",
+	    (int)a->size, (const char *)a->data,
+	    (int)b->size, (const char *)b->data);
+
+err:	__wt_scr_free(session, &a);
+	__wt_scr_free(session, &b);
+
+	return (ret);
+}
+
+/*
+ * __wt_cursor_key_order_check --
+ *	Check key ordering for cursor movements.
+ */
+int
+__wt_cursor_key_order_check(
+    WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next)
+{
+	switch (cbt->ref->page->type) {
+	case WT_PAGE_COL_FIX:
+	case WT_PAGE_COL_VAR:
+		return (__cursor_key_order_check_col(session, cbt, next));
+	case WT_PAGE_ROW_LEAF:
+		return (__cursor_key_order_check_row(session, cbt, next));
+	WT_ILLEGAL_VALUE(session);
+	}
+}
+#endif
+
 /*
  * __wt_btcur_next --
  *	Move to the next record in the tree.
@@ -527,10 +633,15 @@ __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating)
 			__wt_page_evict_soon(page);
 		cbt->page_deleted_count = 0;
 
-		WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags));
+		WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
 		WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
 	}
 
+#ifdef HAVE_DIAGNOSTIC
+	if (ret == 0)
+		WT_ERR(__wt_cursor_key_order_check(session, cbt, true));
+#endif
+
 err:	if (ret != 0)
 		WT_TRET(__cursor_reset(cbt));
 	return (ret);
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index b7cea561b48..1e4b1daa090 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -615,9 +615,13 @@ __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating)
 			__wt_page_evict_soon(page);
 		cbt->page_deleted_count = 0;
 
-		WT_ERR(__wt_tree_walk(session, &cbt->ref, NULL, flags));
+		WT_ERR(__wt_tree_walk(session, &cbt->ref, flags));
 		WT_ERR_TEST(cbt->ref == NULL, WT_NOTFOUND);
 	}
+#ifdef HAVE_DIAGNOSTIC
+	if (ret == 0)
+		WT_ERR(__wt_cursor_key_order_check(session, cbt, false));
+#endif
 
 err:	if (ret != 0)
 		WT_TRET(__cursor_reset(cbt));
diff --git a/src/btree/bt_cursor.c b/src/btree/bt_cursor.c
index 69512f45933..28b51fd2865 100644
--- a/src/btree/bt_cursor.c
+++ b/src/btree/bt_cursor.c
@@ -62,8 +62,18 @@ __cursor_size_chk(WT_SESSION_IMPL *session, WT_ITEM *kv)
 static inline int
 __cursor_fix_implicit(WT_BTREE *btree, WT_CURSOR_BTREE *cbt)
 {
-	return (btree->type == BTREE_COL_FIX &&
-	    !F_ISSET(cbt, WT_CBT_MAX_RECORD));
+	/*
+	 * When there's no exact match, column-store search returns the key
+	 * nearest the searched-for key (continuing past keys smaller than the
+	 * searched-for key to return the next-largest key). Therefore, if the
+	 * returned comparison is -1, the searched-for key was larger than any
+	 * row on the page's standard information or column-store insert list.
+	 *
+	 * If the returned comparison is NOT -1, there was a row equal to or
+	 * larger than the searched-for key, and we implicitly create missing
+	 * rows.
+	 */
+	return (btree->type == BTREE_COL_FIX && cbt->compare != -1);
 }
 
 /*
@@ -502,19 +512,14 @@ retry:	WT_RET(__cursor_func_init(cbt, true));
 	case BTREE_COL_VAR:
 		/*
 		 * If WT_CURSTD_APPEND is set, insert a new record (ignoring
-		 * the application's record number).  First we search for the
-		 * maximum possible record number so the search ends on the
-		 * last page.  The real record number is assigned by the
-		 * serialized append operation.
+		 * the application's record number). The real record number
+		 * is assigned by the serialized append operation.
 		 */
 		if (F_ISSET(cursor, WT_CURSTD_APPEND))
-			cbt->iface.recno = UINT64_MAX;
+			cbt->iface.recno = WT_RECNO_OOB;
 
 		WT_ERR(__cursor_col_search(session, cbt, NULL));
 
-		if (F_ISSET(cursor, WT_CURSTD_APPEND))
-			cbt->iface.recno = WT_RECNO_OOB;
-
 		/*
 		 * If not overwriting, fail if the key exists.  Creating a
 		 * record past the end of the tree in a fixed-length
@@ -816,7 +821,12 @@ err:	if (ret == WT_RESTART) {
 
 /*
  * __wt_btcur_next_random --
- *	Move to a random record in the tree.
+ *	Move to a random record in the tree. There are two algorithms, one
+ *	where we select a record at random from the whole tree on each
+ *	retrieval and one where we first select a record at random from the
+ *	whole tree, and then subsequently sample forward from that location.
+ *	The sampling approach allows us to select reasonably uniform random
+ *	points from unbalanced trees.
  */
 int
 __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
@@ -825,6 +835,8 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
 	WT_UPDATE *upd;
+	wt_off_t size;
+	uint64_t skip;
 
 	session = (WT_SESSION_IMPL *)cbt->iface.session;
 	btree = cbt->btree;
@@ -839,11 +851,65 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
 	WT_STAT_FAST_CONN_INCR(session, cursor_next);
 	WT_STAT_FAST_DATA_INCR(session, cursor_next);
 
-	WT_RET(__cursor_func_init(cbt, true));
+	/*
+	 * If retrieving random values without sampling, or we don't have a
+	 * page reference, pick a roughly random leaf page in the tree.
+	 */
+	if (cbt->ref == NULL || cbt->next_random_sample_size == 0) {
+		/*
+		 * Skip past the sample size of the leaf pages in the tree
+		 * between each random key return to compensate for unbalanced
+		 * trees.
+		 *
+		 * Use the underlying file size divided by its block allocation
+		 * size as our guess of leaf pages in the file (this can be
+		 * entirely wrong, as it depends on how many pages are in this
+		 * particular checkpoint, how large the leaf and internal pages
+		 * really are, and other factors). Then, divide that value by
+		 * the configured sample size and increment the final result to
+		 * make sure tiny files don't leave us with a skip value of 0.
+		 *
+		 * !!!
+		 * Ideally, the number would be prime to avoid restart issues.
+		 */
+		if (cbt->next_random_sample_size != 0) {
+			WT_ERR(btree->bm->size(btree->bm, session, &size));
+			cbt->next_random_leaf_skip = (uint64_t)
+			    ((size / btree->allocsize) /
+			    cbt->next_random_sample_size) + 1;
+		}
+
+		/*
+		 * Choose a leaf page from the tree.
+		 */
+		WT_ERR(__cursor_func_init(cbt, true));
+		WT_WITH_PAGE_INDEX(
+		    session, ret = __wt_row_random_descent(session, cbt));
+		WT_ERR(ret);
+	} else {
+		/*
+		 * Read through the tree, skipping leaf pages. Be cautious about
+		 * the skip count: if the last leaf page skipped was also the
+		 * last leaf page in the tree, it may be set to zero on return
+		 * with the end-of-walk condition.
+		 *
+		 * Pages read for data sampling aren't "useful"; don't update
+		 * the read generation of pages already in memory, and if a page
+		 * is read, set its generation to a low value so it is evicted
+		 * quickly.
+		 */
+		for (skip =
+		    cbt->next_random_leaf_skip; cbt->ref == NULL || skip > 0;)
+			WT_ERR(__wt_tree_walk_skip(session, &cbt->ref, &skip,
+			    WT_READ_NO_GEN |
+			    WT_READ_SKIP_INTL | WT_READ_WONT_NEED));
+	}
 
-	WT_WITH_PAGE_INDEX(session,
-	    ret = __wt_row_random(session, cbt));
-	WT_ERR(ret);
+	/*
+	 * Select a random entry from the leaf page. If it's not valid, move to
+	 * the next entry, if that doesn't work, move to the previous entry.
+	 */
+	WT_ERR(__wt_row_random_leaf(session, cbt));
 	if (__cursor_valid(cbt, &upd))
 		WT_ERR(__wt_kv_return(session, cbt, upd));
 	else {
@@ -851,9 +917,9 @@ __wt_btcur_next_random(WT_CURSOR_BTREE *cbt)
 			ret = __wt_btcur_prev(cbt, false);
 		WT_ERR(ret);
 	}
+	return (0);
 
-err:	if (ret != 0)
-		WT_TRET(__cursor_reset(cbt));
+err:	WT_TRET(__cursor_reset(cbt));
 	return (ret);
 }
 
@@ -1167,6 +1233,11 @@ __wt_btcur_open(WT_CURSOR_BTREE *cbt)
 {
 	cbt->row_key = &cbt->_row_key;
 	cbt->tmp = &cbt->_tmp;
+
+#ifdef HAVE_DIAGNOSTIC
+	cbt->lastkey = &cbt->_lastkey;
+	cbt->lastrecno = WT_RECNO_OOB;
+#endif
 }
 
 /*
@@ -1192,6 +1263,9 @@ __wt_btcur_close(WT_CURSOR_BTREE *cbt, bool lowlevel)
 
 	__wt_buf_free(session, &cbt->_row_key);
 	__wt_buf_free(session, &cbt->_tmp);
+#ifdef HAVE_DIAGNOSTIC
+	__wt_buf_free(session, &cbt->_lastkey);
+#endif
 
 	return (ret);
 }
diff --git a/src/btree/bt_debug.c b/src/btree/bt_debug.c
index 0f47c060daf..393f869ece9 100644
--- a/src/btree/bt_debug.c
+++ b/src/btree/bt_debug.c
@@ -43,7 +43,7 @@ static int  __debug_page_col_var(WT_DBG *, WT_PAGE *);
 static int  __debug_page_metadata(WT_DBG *, WT_PAGE *);
 static int  __debug_page_row_int(WT_DBG *, WT_PAGE *, uint32_t);
 static int  __debug_page_row_leaf(WT_DBG *, WT_PAGE *);
-static int  __debug_ref(WT_DBG *, WT_REF *);
+static void __debug_ref(WT_DBG *, WT_REF *);
 static void __debug_row_skip(WT_DBG *, WT_INSERT_HEAD *);
 static int  __debug_tree(
 	WT_SESSION_IMPL *, WT_BTREE *, WT_PAGE *, const char *, uint32_t);
@@ -74,9 +74,7 @@ __wt_debug_set_verbose(WT_SESSION_IMPL *session, const char *v)
 static inline void
 __debug_hex_byte(WT_DBG *ds, uint8_t v)
 {
-	static const char hex[] = "0123456789abcdef";
-
-	__dmsg(ds, "#%c%c", hex[(v & 0xf0) >> 4], hex[v & 0x0f]);
+	__dmsg(ds, "#%c%c", __wt_hex[(v & 0xf0) >> 4], __wt_hex[v & 0x0f]);
 }
 
 /*
@@ -769,7 +767,7 @@ __debug_page_col_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
 
 	WT_INTL_FOREACH_BEGIN(session, page, ref) {
 		__dmsg(ds, "\trecno %" PRIu64 "\n", ref->key.recno);
-		WT_RET(__debug_ref(ds, ref));
+		__debug_ref(ds, ref);
 	} WT_INTL_FOREACH_END;
 
 	if (LF_ISSET(WT_DEBUG_TREE_WALK))
@@ -843,7 +841,7 @@ __debug_page_row_int(WT_DBG *ds, WT_PAGE *page, uint32_t flags)
 	WT_INTL_FOREACH_BEGIN(session, page, ref) {
 		__wt_ref_key(page, ref, &p, &len);
 		__debug_item(ds, "K", p, len);
-		WT_RET(__debug_ref(ds, ref));
+		__debug_ref(ds, ref);
 	} WT_INTL_FOREACH_END;
 
 	if (LF_ISSET(WT_DEBUG_TREE_WALK))
@@ -965,7 +963,7 @@ __debug_update(WT_DBG *ds, WT_UPDATE *upd, bool hexbyte)
  * __debug_ref --
  *	Dump a WT_REF structure.
  */
-static int
+static void
 __debug_ref(WT_DBG *ds, WT_REF *ref)
 {
 	WT_SESSION_IMPL *session;
@@ -994,14 +992,14 @@ __debug_ref(WT_DBG *ds, WT_REF *ref)
 	case WT_REF_SPLIT:
 		__dmsg(ds, "split");
 		break;
-	WT_ILLEGAL_VALUE(session);
+	default:
+		__dmsg(ds, "INVALID");
+		break;
 	}
 
-	WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
+	__wt_ref_info(ref, &addr, &addr_size, NULL);
 	__dmsg(ds, " %s\n",
 	    __wt_addr_string(session, addr, addr_size, ds->tmp));
-
-	return (0);
 }
 
 /*
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index 294cc399d65..a6330326954 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -601,7 +601,7 @@ __btree_preload(WT_SESSION_IMPL *session)
 
 	/* Pre-load the second-level internal pages. */
 	WT_INTL_FOREACH_BEGIN(session, btree->root.page, ref) {
-		WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
+		__wt_ref_info(ref, &addr, &addr_size, NULL);
 		if (addr != NULL)
 			WT_RET(bm->preload(bm, session, addr, addr_size));
 	} WT_INTL_FOREACH_END;
@@ -622,7 +622,7 @@ __btree_get_last_recno(WT_SESSION_IMPL *session)
 	btree = S2BT(session);
 
 	next_walk = NULL;
-	WT_RET(__wt_tree_walk(session, &next_walk, NULL, WT_READ_PREV));
+	WT_RET(__wt_tree_walk(session, &next_walk, WT_READ_PREV));
 	if (next_walk == NULL)
 		return (WT_NOTFOUND);
 
diff --git a/src/btree/bt_huffman.c b/src/btree/bt_huffman.c
index d9ff9616072..a34e57796a8 100644
--- a/src/btree/bt_huffman.c
+++ b/src/btree/bt_huffman.c
@@ -332,11 +332,17 @@ __wt_huffman_read(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *ip,
 	for (tp = table, lineno = 1; (ret =
 	    fscanf(fp, "%" SCNi64 " %" SCNi64, &symbol, &frequency)) != EOF;
 	    ++tp, ++lineno) {
-		if (lineno > entries)
+		/*
+		 * Entries is 0-based, that is, there are (entries +1) possible
+		 * values that can be configured. The line number is 1-based, so
+		 * adjust the test for too many entries, and report (entries +1)
+		 * in the error as the maximum possible number of entries.
+		 */
+		if (lineno > entries + 1)
 			WT_ERR_MSG(session, EINVAL,
 			    "Huffman table file %.*s is corrupted, "
 			    "more than %" PRIu32 " entries",
-			    (int)ip->len, ip->str, entries);
+			    (int)ip->len, ip->str, entries + 1);
 		if (ret != 2)
 			WT_ERR_MSG(session, EINVAL,
 			    "line %u of Huffman table file %.*s is corrupted: "
diff --git a/src/btree/bt_misc.c b/src/btree/bt_misc.c
index d2b16bb5d21..a60499ef8b7 100644
--- a/src/btree/bt_misc.c
+++ b/src/btree/bt_misc.c
@@ -101,7 +101,7 @@ __wt_page_addr_string(WT_SESSION_IMPL *session, WT_REF *ref, WT_ITEM *buf)
 		return (buf->data);
 	}
 
-	(void)__wt_ref_info(session, ref, &addr, &addr_size, NULL);
+	__wt_ref_info(ref, &addr, &addr_size, NULL);
 	return (__wt_addr_string(session, addr, addr_size, buf));
 }
 
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index 8808f0b1a85..fdccf033828 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -272,7 +272,7 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
 	const WT_PAGE_HEADER *dsk;
 	WT_PAGE_INDEX *pindex;
 	WT_REF **refp, *ref;
-	uint32_t i;
+	uint32_t hint, i;
 
 	btree = S2BT(session);
 	dsk = page->dsk;
@@ -284,9 +284,11 @@ __inmem_col_int(WT_SESSION_IMPL *session, WT_PAGE *page)
 	 */
 	pindex = WT_INTL_INDEX_GET_SAFE(page);
 	refp = pindex->index;
+	hint = 0;
 	WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
 		ref = *refp++;
 		ref->home = page;
+		ref->pindex_hint = hint++;
 
 		__wt_cell_unpack(cell, unpack);
 		ref->addr = cell;
@@ -404,7 +406,7 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
 	const WT_PAGE_HEADER *dsk;
 	WT_PAGE_INDEX *pindex;
 	WT_REF *ref, **refp;
-	uint32_t i;
+	uint32_t hint, i;
 	bool overflow_keys;
 
 	btree = S2BT(session);
@@ -421,9 +423,11 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *sizep)
 	pindex = WT_INTL_INDEX_GET_SAFE(page);
 	refp = pindex->index;
 	overflow_keys = false;
+	hint = 0;
 	WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
 		ref = *refp;
 		ref->home = page;
+		ref->pindex_hint = hint++;
 
 		__wt_cell_unpack(cell, unpack);
 		switch (unpack->type) {
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 77215474359..c50f97bbe14 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -375,7 +375,7 @@ __page_read(WT_SESSION_IMPL *session, WT_REF *ref)
 	 * Get the address: if there is no address, the page was deleted, but a
 	 * subsequent search or insert is forcing re-creation of the name space.
 	 */
-	WT_ERR(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
+	__wt_ref_info(ref, &addr, &addr_size, NULL);
 	if (addr == NULL) {
 		WT_ASSERT(session, previous_state == WT_REF_DELETED);
 
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 756ffd98f3a..b5c299b9ea9 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -1807,7 +1807,7 @@ err:	if (page != NULL)
  */
 static int
 __slvg_row_build_internal(
-    WT_SESSION_IMPL *session, uint32_t leaf_cnt,  WT_STUFF *ss)
+    WT_SESSION_IMPL *session, uint32_t leaf_cnt, WT_STUFF *ss)
 {
 	WT_ADDR *addr;
 	WT_DECL_RET;
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index 631aca0d5c0..69c787c9385 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -15,6 +15,22 @@
 } while (0)
 
 /*
+ * A note on error handling: main split functions first allocate/initialize new
+ * structures; failures during that period are handled by discarding the memory
+ * and returning an error code, the caller knows the split didn't happen and
+ * proceeds accordingly. Second, split functions update the tree, and a failure
+ * in that period is catastrophic, any partial update to the tree requires a
+ * panic, we can't recover. Third, once the split is complete and the tree has
+ * been fully updated, we have to ignore most errors, the split is complete and
+ * correct, callers have to proceed accordingly.
+ */
+typedef enum {
+	WT_ERR_IGNORE,				/* Ignore minor errors */
+	WT_ERR_PANIC,				/* Panic on all errors */
+	WT_ERR_RETURN				/* Clean up and return error */
+} WT_SPLIT_ERROR_PHASE;
+
+/*
  * __split_oldest_gen --
  *	Calculate the oldest active split generation.
  */
@@ -190,6 +206,8 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
 	case WT_PAGE_COL_INT:
 		recno = 0;		/* Less than any valid record number. */
 		WT_INTL_FOREACH_BEGIN(session, page, ref) {
+			WT_ASSERT(session, ref->home == page);
+
 			WT_ASSERT(session, ref->key.recno > recno);
 			recno = ref->key.recno;
 		} WT_INTL_FOREACH_END;
@@ -202,6 +220,8 @@ __split_verify_intl_key_order(WT_SESSION_IMPL *session, WT_PAGE *page)
 
 		first = true;
 		WT_INTL_FOREACH_BEGIN(session, page, ref) {
+			WT_ASSERT(session, ref->home == page);
+
 			__wt_ref_key(page, ref, &next->data, &next->size);
 			if (last->size == 0) {
 				if (first)
@@ -328,7 +348,7 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
 	/*
 	 * If there's no address (the page has never been written), or the
 	 * address has been instantiated, there's no work to do.  Otherwise,
-	 * get the address from the on-page cell.
+	 * instantiate the address in-memory, from the on-page cell.
 	 */
 	addr = ref->addr;
 	if (addr != NULL && !__wt_off_page(from_home, addr)) {
@@ -363,65 +383,101 @@ __split_ref_move(WT_SESSION_IMPL *session, WT_PAGE *from_home,
 }
 
 /*
- * __split_child_block_evict_and_split --
- *	Ensure the newly created child isn't evicted or split for now.
+ * __split_ref_step1 --
+ *	Prepare a set of WT_REFs for a move.
  */
 static void
-__split_child_block_evict_and_split(WT_PAGE *child)
+__split_ref_step1(
+    WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first)
 {
+	WT_PAGE *child;
+	WT_REF *child_ref, *ref;
+	uint32_t i, j;
+
+	/* The newly created subtree is complete. */
+	WT_WRITE_BARRIER();
+
 	/*
-	 * Once the split is live, newly created internal pages might be evicted
-	 * and their WT_REF structures freed. If that happens before all threads
-	 * exit the index of the page which previously "owned" the WT_REF, a
-	 * thread might see a freed WT_REF. To ensure that doesn't happen, the
-	 * newly created page's modify structure has a field with a transaction
-	 * ID that's checked before any internal page is evicted. Unfortunately,
-	 * we don't know the correct value until we update the original page's
-	 * index (we need a transaction ID from after that update), but the act
-	 * of updating the original page's index is what allows the eviction to
-	 * happen.
-	 *
-	 * Once the split is live, newly created internal pages might themselves
-	 * split. The split itself is not the problem: if a page splits before
-	 * we fix up its WT_REF (in other words, a WT_REF we move is then moved
-	 * again, before we reset the underlying page's parent reference), it's
-	 * OK because the test we use to find a WT_REF and WT_PAGE that require
-	 * fixing up is only that the WT_REF points to the wrong parent, not it
-	 * points to a specific wrong parent. The problem is our fix up of the
-	 * WT_REFs in the created page could race with the subsequent fix of the
-	 * same WT_REFs (in a different created page), we'd have to acquire some
-	 * lock to prevent that race, and that's going to be difficult at best.
-	 *
-	 * For now, block eviction and splits in newly created pages until they
-	 * have been fixed up.
+	 * Update the moved WT_REFs so threads moving through them start looking
+	 * at the created children's page index information. Because we've not
+	 * yet updated the page index of the parent page into which we are going
+	 * to split this subtree, a cursor moving through these WT_REFs will
+	 * ascend into the created children, but eventually fail as that parent
+	 * page won't yet know about the created children pages. That's OK, we
+	 * spin there until the parent's page index is updated.
 	 */
-	F_SET_ATOMIC(child, WT_PAGE_SPLIT_BLOCK);
+	for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) {
+		ref = pindex->index[i];
+		child = ref->page;
+
+		/*
+		 * Block eviction and splits in newly created pages.
+		 *
+		 * Once the split is live, newly created internal pages might be
+		 * evicted and their WT_REF structures freed. If that happened
+		 * before all threads exit the index of the page that previously
+		 * "owned" the WT_REF, a thread might see a freed WT_REF. To
+		 * ensure that doesn't happen, the newly created page's modify
+		 * structure has a field with a transaction ID that's checked
+		 * before any internal page is evicted. Unfortunately, we don't
+		 * know the correct value until we update the original page's
+		 * index (we need a transaction ID from after that update), but
+		 * the act of updating the original page's index is what allows
+		 * the eviction to happen.
+		 *
+		 * Split blocking was because historic versions of the split
+		 * code didn't update the WT_REF.home field until after the
+		 * split was live, so the WT_REF.home fields being updated could
+		 * split again before the update, there's a race between splits
+		 * as to which would update them first. The current code updates
+		 * the WT_REF.home fields before going live (in this function),
+		 * this shouldn't be an issue, but for now splits remain turned
+		 * off.
+		 */
+		F_SET_ATOMIC(child, WT_PAGE_SPLIT_BLOCK);
+
+		/*
+		 * We use a page flag to prevent the child from splitting from
+		 * underneath us, but the split-generation error checks don't
+		 * know about that flag; use the standard macros to ensure that
+		 * reading the child's page index structure is safe.
+		 */
+		j = 0;
+		WT_ENTER_PAGE_INDEX(session);
+		WT_INTL_FOREACH_BEGIN(session, child, child_ref) {
+			child_ref->home = child;
+			child_ref->pindex_hint = j++;
+		} WT_INTL_FOREACH_END;
+		WT_LEAVE_PAGE_INDEX(session);
+
+#ifdef HAVE_DIAGNOSTIC
+		WT_WITH_PAGE_INDEX(session,
+		    __split_verify_intl_key_order(session, child));
+#endif
+	}
 }
 
 /*
- * __split_ref_move_final --
- *	Finalize the moved WT_REF structures after the split succeeds.
+ * __split_ref_step2 --
+ *	Allow the newly created children to be evicted or split.
  */
 static int
-__split_ref_move_final(
-    WT_SESSION_IMPL *session, WT_REF **refp, uint32_t entries)
+__split_ref_step2(
+    WT_SESSION_IMPL *session, WT_PAGE_INDEX *pindex, bool skip_first)
 {
 	WT_DECL_RET;
 	WT_PAGE *child;
-	WT_REF *ref, *child_ref;
+	WT_REF *ref;
 	uint32_t i;
 
 	/*
-	 * The WT_REF structures moved to newly allocated child pages reference
-	 * the wrong parent page and we have to fix that up. The problem is
-	 * revealed when a thread of control searches for the child page's
-	 * reference structure slot, and fails to find it because the parent
-	 * page being searched no longer references the child. When that failure
-	 * happens the thread waits for the reference's home page to be updated,
-	 * which we do here: walk the children and fix them up.
+	 * The split has gone live, enable eviction and splits on the newly
+	 * created internal pages.
 	 */
-	for (i = 0; i < entries; ++i, ++refp) {
-		ref = *refp;
+	WT_WRITE_BARRIER();
+
+	for (i = skip_first ? 1 : 0; i < pindex->entries; ++i) {
+		ref = pindex->index[i];
 
 		/*
 		 * We don't hold hazard pointers on created pages, they cannot
@@ -441,42 +497,18 @@ __split_ref_move_final(
 		WT_ERR(ret);
 
 		child = ref->page;
+
+		/* The child can now be evicted or split. */
+		F_CLR_ATOMIC(child, WT_PAGE_SPLIT_BLOCK);
+
 #ifdef HAVE_DIAGNOSTIC
 		WT_WITH_PAGE_INDEX(session,
 		    __split_verify_intl_key_order(session, child));
 #endif
-		/*
-		 * We use a page flag to prevent the child from splitting from
-		 * underneath us, but the split-generation error checks don't
-		 * know about that flag; use the standard macros to ensure that
-		 * reading the child's page index structure is safe.
-		 */
-		WT_ENTER_PAGE_INDEX(session);
-		WT_INTL_FOREACH_BEGIN(session, child, child_ref) {
-			/*
-			 * The page's home reference may not be wrong, as we
-			 * opened up access from the top of the tree already,
-			 * disk pages may have been read in since then, and
-			 * those pages would have correct parent references.
-			 */
-			if (child_ref->home != child) {
-				child_ref->home = child;
-				child_ref->pindex_hint = 0;
-			}
-		} WT_INTL_FOREACH_END;
-		WT_LEAVE_PAGE_INDEX(session);
-
-		/* The child can now be evicted or split. */
-		F_CLR_ATOMIC(child, WT_PAGE_SPLIT_BLOCK);
 
 		WT_ERR(__wt_hazard_clear(session, child));
 	}
 
-	/*
-	 * Push out the changes: not required for correctness, but don't let
-	 * threads spin on incorrect page references longer than necessary.
-	 */
-	WT_FULL_BARRIER();
 	return (0);
 
 err:	/* Something really bad just happened. */
@@ -496,11 +528,11 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
 	WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex;
 	WT_REF **alloc_refp;
 	WT_REF **child_refp, *ref, **root_refp;
+	WT_SPLIT_ERROR_PHASE complete;
 	size_t child_incr, root_decr, root_incr, size;
 	uint64_t split_gen;
 	uint32_t children, chunk, i, j, remain;
 	uint32_t slots;
-	bool complete;
 	void *p;
 
 	WT_STAT_FAST_CONN_INCR(session, cache_eviction_deepen);
@@ -511,7 +543,7 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
 	btree = S2BT(session);
 	alloc_index = NULL;
 	root_decr = root_incr = 0;
-	complete = false;
+	complete = WT_ERR_RETURN;
 
 	/* The root page will be marked dirty, make sure that will succeed. */
 	WT_RET(__wt_page_modify_init(session, root));
@@ -589,16 +621,13 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
 		WT_ERR(__wt_page_modify_init(session, child));
 		__wt_page_modify_set(session, child);
 
-		/* Ensure the page isn't evicted or split for now. */
-		__split_child_block_evict_and_split(child);
-
 		/*
 		 * The newly allocated child's page index references the same
 		 * structures as the root.  (We cannot move WT_REF structures,
 		 * threads may be underneath us right now changing the structure
 		 * state.)  However, if the WT_REF structures reference on-page
 		 * information, we have to fix that, because the disk image for
-		 * the page that has an page index entry for the WT_REF is about
+		 * the page that has a page index entry for the WT_REF is about
 		 * to change.
 		 */
 		child_pindex = WT_INTL_INDEX_GET_SAFE(child);
@@ -615,31 +644,28 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
 	WT_ASSERT(session,
 	    root_refp - pindex->index == (ptrdiff_t)pindex->entries);
 
+	/* Start making real changes to the tree, errors are fatal. */
+	complete = WT_ERR_PANIC;
+
+	/* Prepare the WT_REFs for the move. */
+	__split_ref_step1(session, alloc_index, false);
+
 	/*
 	 * Confirm the root page's index hasn't moved, then update it, which
-	 * makes the split visible to threads descending the tree. From this
-	 * point on, we're committed to the split.
-	 *
-	 * A note on error handling: until this point, there's no problem with
-	 * unwinding on error.  We allocated a new page index, a new set of
-	 * WT_REFs and a new set of child pages -- if an error occurred, the
-	 * root remained unchanged, although it may have an incorrect memory
-	 * footprint.  From now on we've modified the root page, attention
-	 * needs to be paid. However, subsequent failures are relatively benign,
-	 * the split is OK and complete. For that reason, we ignore errors past
-	 * this point unless there's a panic.
+	 * makes the split visible to threads descending the tree.
 	 */
 	WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(root) == pindex);
 	WT_INTL_INDEX_SET(root, alloc_index);
-	complete = true;
 
 #ifdef HAVE_DIAGNOSTIC
 	WT_WITH_PAGE_INDEX(session,
 	    __split_verify_intl_key_order(session, root));
 #endif
-	/* Fix up the moved WT_REF structures. */
-	WT_ERR(__split_ref_move_final(
-	    session, alloc_index->index, alloc_index->entries));
+	/* Finalize the WT_REFs we moved. */
+	WT_ERR(__split_ref_step2(session, alloc_index, false));
+
+	/* The split is complete and correct, ignore benign errors. */
+	complete = WT_ERR_IGNORE;
 
 	/* We've installed the allocated page-index, ensure error handling. */
 	alloc_index = NULL;
@@ -664,24 +690,25 @@ __split_root(WT_SESSION_IMPL *session, WT_PAGE *root)
 	__wt_cache_page_inmem_decr(session, root, root_decr);
 	__wt_page_modify_set(session, root);
 
-err:	/*
-	 * If complete is true, we saw an error after opening up the tree to
-	 * descent through the root page's new index. There is nothing we
-	 * can do, there are threads potentially active in both versions of
-	 * the tree.
-	 *
-	 * A note on error handling: if we completed the split, return success,
-	 * nothing really bad can have happened, and our caller has to proceed
-	 * with the split.
-	 */
-	if (!complete)
+err:	switch (complete) {
+	case WT_ERR_RETURN:
 		__wt_free_ref_index(session, root, alloc_index, true);
-
-	if (ret != 0 && ret != WT_PANIC)
+		break;
+	case WT_ERR_PANIC:
 		__wt_err(session, ret,
-		    "ignoring not-fatal error during root page split to "
-		    "deepen the tree");
-	return (ret == WT_PANIC || !complete ? ret : 0);
+		    "fatal error during root page split to deepen the tree");
+		ret = WT_PANIC;
+		break;
+	case WT_ERR_IGNORE:
+		if (ret != 0 && ret != WT_PANIC) {
+			__wt_err(session, ret,
+			    "ignoring not-fatal error during root page split "
+			    "to deepen the tree");
+			ret = 0;
+		}
+		break;
+	}
+	return (ret);
 }
 
 /*
@@ -698,19 +725,21 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
 	WT_PAGE *parent;
 	WT_PAGE_INDEX *alloc_index, *pindex;
 	WT_REF **alloc_refp, *next_ref;
+	WT_SPLIT_ERROR_PHASE complete;
 	size_t parent_decr, size;
 	uint64_t split_gen;
-	uint32_t i, j;
+	uint32_t hint, i, j;
 	uint32_t deleted_entries, parent_entries, result_entries;
 	uint32_t *deleted_refs;
-	bool complete, empty_parent;
+	bool empty_parent;
 
 	parent = ref->home;
 
 	alloc_index = pindex = NULL;
 	parent_decr = 0;
 	parent_entries = 0;
-	complete = empty_parent = false;
+	empty_parent = false;
+	complete = WT_ERR_RETURN;
 
 	/* The parent page will be marked dirty, make sure that will succeed. */
 	WT_RET(__wt_page_modify_init(session, parent));
@@ -728,7 +757,7 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
 	 * array anyway.  Switch them to the special split state, so that any
 	 * reading thread will restart.
 	 */
-	WT_RET(__wt_scr_alloc(session, 10 * sizeof(uint32_t), &scr));
+	WT_ERR(__wt_scr_alloc(session, 10 * sizeof(uint32_t), &scr));
 	for (deleted_entries = 0, i = 0; i < parent_entries; ++i) {
 		next_ref = pindex->index[i];
 		WT_ASSERT(session, next_ref->state != WT_REF_SPLIT);
@@ -768,28 +797,40 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
 	 * Allocate and initialize a new page index array for the parent, then
 	 * copy references from the original index array, plus references from
 	 * the newly created split array, into place.
+	 *
+	 * Update the WT_REF's page-index hint as we go. This can race with a
+	 * thread setting the hint based on an older page-index, and the change
+	 * isn't backed out in the case of an error, so there ways for the hint
+	 * to be wrong; OK because it's just a hint.
 	 */
 	size = sizeof(WT_PAGE_INDEX) + result_entries * sizeof(WT_REF *);
 	WT_ERR(__wt_calloc(session, 1, size, &alloc_index));
 	parent_incr += size;
 	alloc_index->index = (WT_REF **)(alloc_index + 1);
 	alloc_index->entries = result_entries;
-	for (alloc_refp = alloc_index->index, i = 0; i < parent_entries; ++i) {
+	for (alloc_refp = alloc_index->index,
+	    hint = i = 0; i < parent_entries; ++i) {
 		next_ref = pindex->index[i];
 		if (next_ref == ref)
 			for (j = 0; j < new_entries; ++j) {
 				ref_new[j]->home = parent;
+				ref_new[j]->pindex_hint = hint++;
 				*alloc_refp++ = ref_new[j];
 			}
-		else if (next_ref->state != WT_REF_SPLIT)
+		else if (next_ref->state != WT_REF_SPLIT) {
 			/* Skip refs we have marked for deletion. */
+			next_ref->pindex_hint = hint++;
 			*alloc_refp++ = next_ref;
+		}
 	}
 
 	/* Check that we filled in all the entries. */
 	WT_ASSERT(session,
 	    alloc_refp - alloc_index->index == (ptrdiff_t)result_entries);
 
+	/* Start making real changes to the tree, errors are fatal. */
+	complete = WT_ERR_PANIC;
+
 	/*
 	 * Confirm the parent page's index hasn't moved then update it, which
 	 * makes the split visible to threads descending the tree.
@@ -830,16 +871,8 @@ __split_parent(WT_SESSION_IMPL *session, WT_REF *ref, WT_REF **ref_new,
 	 */
 	WT_FULL_BARRIER();
 
-	/*
-	 * A note on error handling: failures before we swapped the new page
-	 * index into the parent can be resolved by freeing allocated memory
-	 * because the original page is unchanged, we can continue to use it
-	 * and we have not yet modified the parent.  Failures after we swap
-	 * the new page index into the parent are also relatively benign, the
-	 * split is OK and complete. For those reasons, we ignore errors past
-	 * this point unless there's a panic.
-	 */
-	complete = true;
+	/* The split is complete and correct, ignore benign errors. */
+	complete = WT_ERR_IGNORE;
 
 	WT_ERR(__wt_verbose(session, WT_VERB_SPLIT,
 	    "%p: %s %s" "split into parent %p, %" PRIu32 " -> %" PRIu32
@@ -923,7 +956,8 @@ err:	__wt_scr_free(session, &scr);
 	 * nothing really bad can have happened, and our caller has to proceed
 	 * with the split.
 	 */
-	if (!complete) {
+	switch (complete) {
+	case WT_ERR_RETURN:
 		for (i = 0; i < parent_entries; ++i) {
 			next_ref = pindex->index[i];
 			if (next_ref->state == WT_REF_SPLIT)
@@ -931,20 +965,28 @@ err:	__wt_scr_free(session, &scr);
 		}
 
 		__wt_free_ref_index(session, NULL, alloc_index, false);
-
 		/*
 		 * The split couldn't proceed because the parent would be empty,
 		 * return EBUSY so our caller knows to unlock the WT_REF that's
 		 * being deleted, but don't be noisy, there's nothing wrong.
 		 */
 		if (empty_parent)
-			return (EBUSY);
+			ret = EBUSY;
+		break;
+	case WT_ERR_PANIC:
+		__wt_err(session, ret, "fatal error during parent page split");
+		ret = WT_PANIC;
+		break;
+	case WT_ERR_IGNORE:
+		if (ret != 0 && ret != WT_PANIC) {
+			__wt_err(session, ret,
+			    "ignoring not-fatal error during parent page "
+			    "split");
+			ret = 0;
+		}
+		break;
 	}
-
-	if (ret != 0 && ret != WT_PANIC)
-		__wt_err(session, ret,
-		    "ignoring not-fatal error during parent page split");
-	return (ret == WT_PANIC || !complete ? ret : 0);
+	return (ret);
 }
 
 /*
@@ -960,11 +1002,11 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
 	WT_PAGE_INDEX *alloc_index, *child_pindex, *pindex, *replace_index;
 	WT_REF **alloc_refp;
 	WT_REF **child_refp, *page_ref, **page_refp, *ref;
+	WT_SPLIT_ERROR_PHASE complete;
 	size_t child_incr, page_decr, page_incr, parent_incr, size;
 	uint64_t split_gen;
 	uint32_t children, chunk, i, j, remain;
 	uint32_t slots;
-	bool complete;
 	void *p;
 
 	WT_STAT_FAST_CONN_INCR(session, cache_eviction_split_internal);
@@ -977,7 +1019,7 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
 	alloc_index = replace_index = NULL;
 	page_ref = page->pg_intl_parent_ref;
 	page_decr = page_incr = parent_incr = 0;
-	complete = false;
+	complete = WT_ERR_RETURN;
 
 	/*
 	 * Our caller is holding the page locked to single-thread splits, which
@@ -1074,9 +1116,6 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
 		WT_ERR(__wt_page_modify_init(session, child));
 		__wt_page_modify_set(session, child);
 
-		/* Ensure the page isn't evicted or split for now. */
-		__split_child_block_evict_and_split(child);
-
 		/*
 		 * The newly allocated child's page index references the same
 		 * structures as the parent. (We cannot move WT_REF structures,
@@ -1100,22 +1139,16 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
 	WT_ASSERT(session,
 	    page_refp - pindex->index == (ptrdiff_t)pindex->entries);
 
+	/* Start making real changes to the tree, errors are fatal. */
+	complete = WT_ERR_PANIC;
+
+	/* Prepare the WT_REFs for the move. */
+	__split_ref_step1(session, alloc_index, true);
+
 	/* Split into the parent. */
 	WT_ERR(__split_parent(session, page_ref, alloc_index->index,
 	    alloc_index->entries, parent_incr, false, false));
 
-	/*
-	 * A note on error handling: until this point, there's no problem with
-	 * unwinding on error.  We allocated a new page index, a new set of
-	 * WT_REFs and a new set of child pages -- if an error occurred, the
-	 * page remained unchanged, although it may have an incorrect memory
-	 * footprint.  From now on we've modified the parent page, attention
-	 * needs to be paid. However, subsequent failures are relatively benign,
-	 * the split is OK and complete. For that reason, we ignore errors past
-	 * this point unless there's a panic.
-	 */
-	complete = true;
-
 	/* Confirm the page's index hasn't moved, then update it. */
 	WT_ASSERT(session, WT_INTL_INDEX_GET_SAFE(page) == pindex);
 	WT_INTL_INDEX_SET(page, replace_index);
@@ -1127,9 +1160,17 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
 	    __split_verify_intl_key_order(session, page));
 #endif
 
-	/* Fix up the moved WT_REF structures. */
-	WT_ERR(__split_ref_move_final(
-	    session, alloc_index->index + 1, alloc_index->entries - 1));
+	/* Finalize the WT_REFs we moved. */
+	WT_ERR(__split_ref_step2(session, alloc_index, true));
+
+	/* The split is complete and correct, ignore benign errors. */
+	complete = WT_ERR_IGNORE;
+
+	/*
+	 * Push out the changes: not required for correctness, but no reason
+	 * to wait.
+	 */
+	WT_FULL_BARRIER();
 
 	/*
 	 * We don't care about the page-index we allocated, all we needed was
@@ -1158,24 +1199,26 @@ __split_internal(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_PAGE *page)
 	__wt_cache_page_inmem_decr(session, page, page_decr);
 	__wt_page_modify_set(session, page);
 
-err:	/*
-	 * If complete is true, we saw an error after opening up the tree to
-	 * descent through the page's new index. There is nothing we can do,
-	 * there are threads potentially active in both versions of the tree.
-	 *
-	 * A note on error handling: if we completed the split, return success,
-	 * nothing really bad can have happened, and our caller has to proceed
-	 * with the split.
-	 */
-	if (!complete) {
+err:	switch (complete) {
+	case WT_ERR_RETURN:
 		__wt_free_ref_index(session, page, alloc_index, true);
 		__wt_free_ref_index(session, page, replace_index, false);
-	}
-
-	if (ret != 0 && ret != WT_PANIC)
+		break;
+	case WT_ERR_PANIC:
 		__wt_err(session, ret,
-		    "ignoring not-fatal error during internal page split");
-	return (ret == WT_PANIC || !complete ? ret : 0);
+		    "fatal error during internal page split");
+		ret = WT_PANIC;
+		break;
+	case WT_ERR_IGNORE:
+		if (ret != 0 && ret != WT_PANIC) {
+			__wt_err(session, ret,
+			    "ignoring not-fatal error during internal page "
+			    "split");
+			ret = 0;
+		}
+		break;
+	}
+	return (ret);
 }
 
 /*
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index 2f8759b9d82..ef70160aa72 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -35,10 +35,10 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
 
 	WT_STAT_SET(session, stats, btree_fixed_len, btree->bitcnt);
 	WT_STAT_SET(session, stats, btree_maximum_depth, btree->maximum_depth);
-	WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage);
 	WT_STAT_SET(session, stats, btree_maxintlkey, btree->maxintlkey);
-	WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
+	WT_STAT_SET(session, stats, btree_maxintlpage, btree->maxintlpage);
 	WT_STAT_SET(session, stats, btree_maxleafkey, btree->maxleafkey);
+	WT_STAT_SET(session, stats, btree_maxleafpage, btree->maxleafpage);
 	WT_STAT_SET(session, stats, btree_maxleafvalue, btree->maxleafvalue);
 
 	/* Everything else is really, really expensive. */
@@ -59,8 +59,8 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
 	WT_STAT_SET(session, stats, btree_row_leaf, 0);
 
 	next_walk = NULL;
-	while ((ret = __wt_tree_walk(session, &next_walk, NULL, 0)) == 0 &&
-	    next_walk != NULL) {
+	while ((ret = __wt_tree_walk(
+	    session, &next_walk, 0)) == 0 && next_walk != NULL) {
 		WT_WITH_PAGE_INDEX(session,
 		    ret = __stat_page(session, next_walk->page, stats));
 		WT_RET(ret);
diff --git a/src/btree/bt_sync.c b/src/btree/bt_sync.c
index 07bb2eb3a01..86607d8f187 100644
--- a/src/btree/bt_sync.c
+++ b/src/btree/bt_sync.c
@@ -58,7 +58,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 
 		flags |= WT_READ_NO_WAIT | WT_READ_SKIP_INTL;
 		for (walk = NULL;;) {
-			WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
+			WT_ERR(__wt_tree_walk(session, &walk, flags));
 			if (walk == NULL)
 				break;
 
@@ -124,7 +124,7 @@ __sync_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		/* Write all dirty in-cache pages. */
 		flags |= WT_READ_NO_EVICT;
 		for (walk = NULL;;) {
-			WT_ERR(__wt_tree_walk(session, &walk, NULL, flags));
+			WT_ERR(__wt_tree_walk(session, &walk, flags));
 			if (walk == NULL)
 				break;
 
diff --git a/src/btree/bt_walk.c b/src/btree/bt_walk.c
index c7d83d8dfff..abb18529041 100644
--- a/src/btree/bt_walk.c
+++ b/src/btree/bt_walk.c
@@ -69,16 +69,78 @@ retry:	WT_INTL_INDEX_GET(session, ref->home, pindex);
 }
 
 /*
- * __wt_tree_walk --
+ * __ref_is_leaf --
+ *	Check if a reference is for a leaf page.
+ */
+static inline bool
+__ref_is_leaf(WT_REF *ref)
+{
+	size_t addr_size;
+	u_int type;
+	const uint8_t *addr;
+
+	/*
+	 * If the page has a disk address, we can crack it to figure out if
+	 * this page is a leaf page or not. If there's no address, the page
+	 * isn't on disk and we don't know the page type.
+	 */
+	__wt_ref_info(ref, &addr, &addr_size, &type);
+	return (addr == NULL ?
+	    false : type == WT_CELL_ADDR_LEAF || type == WT_CELL_ADDR_LEAF_NO);
+}
+
+/*
+ * __page_ascend --
+ *	Ascend the tree one level.
+ */
+static void
+__page_ascend(WT_SESSION_IMPL *session,
+    WT_REF **refp, WT_PAGE_INDEX **pindexp, uint32_t *slotp)
+{
+	WT_REF *parent_ref, *ref;
+
+	/*
+	 * Ref points to the first/last slot on an internal page from which we
+	 * are ascending the tree, moving to the parent page. This is tricky
+	 * because the internal page we're on may be splitting into its parent.
+	 * Find a stable configuration where the page we start from and the
+	 * page we're moving to are connected. The tree eventually stabilizes
+	 * into that configuration, keep trying until we succeed.
+	 */
+	for (ref = *refp;;) {
+		/*
+		 * Find our parent slot on the next higher internal page, the
+		 * slot from which we move to a next/prev slot, checking that
+		 * we haven't reached the root.
+		 */
+		parent_ref = ref->home->pg_intl_parent_ref;
+		if (__wt_ref_is_root(parent_ref))
+			break;
+		__page_refp(session, parent_ref, pindexp, slotp);
+
+		/*
+		 * When internal pages split, the WT_REF structures being moved
+		 * are updated first. If the WT_REF we started with references
+		 * the same page as we found on our search of the parent, there
+		 * is a consistent view.
+		 */
+		if (ref->home == parent_ref->page)
+			break;
+	}
+
+	*refp = parent_ref;
+}
+
+/*
+ * __tree_walk_internal --
  *	Move to the next/previous page in the tree.
  */
-int
-__wt_tree_walk(WT_SESSION_IMPL *session,
-    WT_REF **refp, uint64_t *walkcntp, uint32_t flags)
+static inline int
+__tree_walk_internal(WT_SESSION_IMPL *session,
+    WT_REF **refp, uint64_t *walkcntp, uint64_t *skipleafcntp, uint32_t flags)
 {
 	WT_BTREE *btree;
 	WT_DECL_RET;
-	WT_PAGE *page;
 	WT_PAGE_INDEX *pindex;
 	WT_REF *couple, *couple_orig, *ref;
 	bool empty_internal, prev, skip;
@@ -153,7 +215,7 @@ __wt_tree_walk(WT_SESSION_IMPL *session,
 		goto descend;
 	}
 
-ascend:	/*
+	/*
 	 * If the active page was the root, we've reached the walk's end.
 	 * Release any hazard-pointer we're holding.
 	 */
@@ -167,13 +229,14 @@ ascend:	/*
 
 	for (;;) {
 		/*
-		 * If we're at the last/first slot on the page, return this page
-		 * in post-order traversal.  Otherwise we move to the next/prev
-		 * slot and left/right-most element in its subtree.
+		 * If we're at the last/first slot on the internal page, return
+		 * it in post-order traversal. Otherwise move to the next/prev
+		 * slot and left/right-most element in that subtree.
 		 */
-		if ((prev && slot == 0) ||
+		while ((prev && slot == 0) ||
 		    (!prev && slot == pindex->entries - 1)) {
-			ref = ref->home->pg_intl_parent_ref;
+			/* Ascend to the parent. */
+			__page_ascend(session, &ref, &pindex, &slot);
 
 			/*
 			 * If we got all the way through an internal page and
@@ -185,40 +248,37 @@ ascend:	/*
 				empty_internal = false;
 			}
 
-			/* Optionally skip internal pages. */
-			if (LF_ISSET(WT_READ_SKIP_INTL))
-				goto ascend;
-
 			/*
-			 * We've ascended the tree and are returning an internal
-			 * page.  If it's the root, discard our hazard pointer,
-			 * otherwise, swap our hazard pointer for the page we'll
-			 * return.
+			 * If at the root and returning internal pages, return
+			 * the root page, otherwise we're done. Regardless, no
+			 * hazard pointer is required, release the one we hold.
 			 */
-			if (__wt_ref_is_root(ref))
+			if (__wt_ref_is_root(ref)) {
 				WT_ERR(__wt_page_release(
 				    session, couple, flags));
-			else {
-				/*
-				 * Locate the reference to our parent page then
-				 * swap our child hazard pointer for the parent.
-				 * We don't handle restart or not-found returns.
-				 * It would require additional complexity and is
-				 * not a possible return: we're moving to the
-				 * parent of the current child page, our parent
-				 * reference can't have split or been evicted.
-				 */
-				__page_refp(session, ref, &pindex, &slot);
+				if (!LF_ISSET(WT_READ_SKIP_INTL))
+					*refp = ref;
+				goto done;
+			}
+
+			/*
+			 * Optionally return internal pages. Swap our previous
+			 * hazard pointer for the page we'll return. We don't
+			 * handle restart or not-found returns, it would require
+			 * additional complexity and is not a possible return:
+			 * we're moving to the parent of the current child page,
+			 * the parent can't have been evicted.
+			 */
+			if (!LF_ISSET(WT_READ_SKIP_INTL)) {
 				if ((ret = __wt_page_swap(
 				    session, couple, ref, flags)) != 0) {
 					WT_TRET(__wt_page_release(
 					    session, couple, flags));
 					WT_ERR(ret);
 				}
+				*refp = ref;
+				goto done;
 			}
-
-			*refp = ref;
-			goto done;
 		}
 
 		if (prev)
@@ -304,6 +364,31 @@ ascend:	/*
 					break;
 			}
 
+			/*
+			 * Optionally skip leaf pages: skip all leaf pages if
+			 * WT_READ_SKIP_LEAF is set, when the skip-leaf-count
+			 * variable is non-zero, skip some count of leaf pages.
+			 * If this page is disk-based, crack the cell to figure
+			 * out it's a leaf page without reading it.
+			 *
+			 * If skipping some number of leaf pages, decrement the
+			 * count of pages to zero, and then take the next leaf
+			 * page we can. Be cautious around the page decrement,
+			 * if for some reason don't take this particular page,
+			 * we can take the next one, and, there are additional
+			 * tests/decrements when we're about to return a leaf
+			 * page.
+			 */
+			if (skipleafcntp != NULL || LF_ISSET(WT_READ_SKIP_LEAF))
+				if (__ref_is_leaf(ref)) {
+					if (LF_ISSET(WT_READ_SKIP_LEAF))
+						break;
+					if (*skipleafcntp > 0) {
+						--*skipleafcntp;
+						break;
+					}
+				}
+
 			ret = __wt_page_swap(session, couple, ref, flags);
 
 			/*
@@ -359,13 +444,29 @@ ascend:	/*
 			 * A new page: configure for traversal of any internal
 			 * page's children, else return the leaf page.
 			 */
-descend:		couple = ref;
-			page = ref->page;
-			if (WT_PAGE_IS_INTERNAL(page)) {
-				WT_INTL_INDEX_GET(session, page, pindex);
+			if (WT_PAGE_IS_INTERNAL(ref->page)) {
+descend:			couple = ref;
+				WT_INTL_INDEX_GET(session, ref->page, pindex);
 				slot = prev ? pindex->entries - 1 : 0;
 				empty_internal = true;
 			} else {
+				/*
+				 * Optionally skip leaf pages, the second half.
+				 * We didn't have an on-page cell to figure out
+				 * if it was a leaf page, we had to acquire the
+				 * hazard pointer and look at the page.
+				 */
+				if (skipleafcntp != NULL ||
+				    LF_ISSET(WT_READ_SKIP_LEAF)) {
+					couple = ref;
+					if (LF_ISSET(WT_READ_SKIP_LEAF))
+						break;
+					if (*skipleafcntp > 0) {
+						--*skipleafcntp;
+						break;
+					}
+				}
+
 				*refp = ref;
 				goto done;
 			}
@@ -376,3 +477,37 @@ done:
 err:	WT_LEAVE_PAGE_INDEX(session);
 	return (ret);
 }
+
+/*
+ * __wt_tree_walk --
+ *	Move to the next/previous page in the tree.
+ */
+int
+__wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags)
+{
+	return (__tree_walk_internal(session, refp, NULL, NULL, flags));
+}
+
+/*
+ * __wt_tree_walk_count --
+ *	Move to the next/previous page in the tree, tracking how many
+ *	references were visited to get there.
+ */
+int
+__wt_tree_walk_count(WT_SESSION_IMPL *session,
+    WT_REF **refp, uint64_t *walkcntp, uint32_t flags)
+{
+	return (__tree_walk_internal(session, refp, walkcntp, NULL, flags));
+}
+
+/*
+ * __wt_tree_walk_skip --
+ *	Move to the next/previous page in the tree, skipping a certain number
+ *	of leaf pages before returning.
+ */
+int
+__wt_tree_walk_skip(WT_SESSION_IMPL *session,
+    WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags)
+{
+	return (__tree_walk_internal(session, refp, NULL, skipleafcntp, flags));
+}
diff --git a/src/btree/col_srch.c b/src/btree/col_srch.c
index e9fa570f97b..c5e2abbe440 100644
--- a/src/btree/col_srch.c
+++ b/src/btree/col_srch.c
@@ -9,12 +9,60 @@
 #include "wt_internal.h"
 
 /*
+ * __check_leaf_key_range --
+ *	Check the search key is in the leaf page's key range.
+ */
+static inline int
+__check_leaf_key_range(WT_SESSION_IMPL *session,
+    uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+{
+	WT_PAGE_INDEX *pindex;
+	uint32_t indx;
+
+	/*
+	 * There are reasons we can't do the fast checks, and we continue with
+	 * the leaf page search in those cases, only skipping the complete leaf
+	 * page search if we know it's not going to work.
+	 */
+	cbt->compare = 0;
+
+	/*
+	 * Check if the search key is smaller than the parent's starting key for
+	 * this page.
+	 */
+	if (recno < leaf->key.recno) {
+		cbt->compare = 1;		/* page keys > search key */
+		return (0);
+	}
+
+	/*
+	 * Check if the search key is greater than or equal to the starting key
+	 * for the parent's next page.
+	 *
+	 * !!!
+	 * Check that "indx + 1" is a valid page-index entry first, because it
+	 * also checks that "indx" is a valid page-index entry, and we have to
+	 * do that latter check before looking at the indx slot of the array
+	 * for a match to leaf (in other words, our page hint might be wrong).
+	 */
+	WT_INTL_INDEX_GET(session, leaf->home, pindex);
+	indx = leaf->pindex_hint;
+	if (indx + 1 < pindex->entries && pindex->index[indx] == leaf)
+		if (recno >= pindex->index[indx + 1]->key.recno) {
+			cbt->compare = -1;	/* page keys < search key */
+			return (0);
+		}
+
+	return (0);
+}
+
+/*
  * __wt_col_search --
  *	Search a column-store tree for a specific record-based key.
  */
 int
 __wt_col_search(WT_SESSION_IMPL *session,
-    uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+    uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
 {
 	WT_BTREE *btree;
 	WT_COL *cip;
@@ -24,6 +72,7 @@ __wt_col_search(WT_SESSION_IMPL *session,
 	WT_PAGE *page;
 	WT_PAGE_INDEX *pindex, *parent_pindex;
 	WT_REF *current, *descent;
+	uint64_t recno;
 	uint32_t base, indx, limit;
 	int depth;
 
@@ -31,8 +80,38 @@ __wt_col_search(WT_SESSION_IMPL *session,
 
 	__cursor_pos_clear(cbt);
 
-	/* We may only be searching a single leaf page, not the full tree. */
+	/*
+	 * When appending a new record, the search record number will be an
+	 * out-of-band value, search for the largest key in the table instead.
+	 */
+	if ((recno = search_recno) == WT_RECNO_OOB)
+		recno = UINT64_MAX;
+
+	/*
+	 * We may be searching only a single leaf page, not the full tree. In
+	 * the normal case where the page links to a parent, check the page's
+	 * parent keys before doing the full search, it's faster when the
+	 * cursor is being re-positioned. (One case where the page doesn't
+	 * have a parent is if it is being re-instantiated in memory as part
+	 * of a split).
+	 */
 	if (leaf != NULL) {
+		WT_ASSERT(session, search_recno != WT_RECNO_OOB);
+
+		if (leaf->home != NULL) {
+			WT_RET(__check_leaf_key_range(
+			    session, recno, leaf, cbt));
+			if (cbt->compare != 0) {
+				/*
+				 * !!!
+				 * WT_CURSOR.search_near uses the slot value to
+				 * decide if there was an on-page match.
+				 */
+				cbt->slot = 0;
+				return (0);
+			}
+		}
+
 		current = leaf;
 		goto leaf_only;
 	}
@@ -120,7 +199,17 @@ leaf_only:
 	page = current->page;
 	cbt->ref = current;
 	cbt->recno = recno;
-	cbt->compare = 0;
+
+	/* 
+	 * Don't bother searching if the caller is appending a new record where
+	 * we'll allocate the record number; we're not going to find a match by
+	 * definition, and we figure out the record number and position when we
+	 * do the work.
+	 */
+	if (search_recno == WT_RECNO_OOB) {
+		cbt->compare = -1;
+		return (0);
+	}
 
 	/*
 	 * Set the on-page slot to an impossible value larger than any possible
@@ -142,6 +231,7 @@ leaf_only:
 	 * that's impossibly large for the page. We do have additional setup to
 	 * do in that case, the record may be appended to the page.
 	 */
+	cbt->compare = 0;
 	if (page->type == WT_PAGE_COL_FIX) {
 		if (recno < page->pg_fix_recno) {
 			cbt->compare = 1;
@@ -190,18 +280,10 @@ past_end:
 	 * This is a rarely used path: we normally find exact matches, because
 	 * column-store files are dense, but in this case the caller searched
 	 * past the end of the table.
-	 *
-	 * Don't bother searching if the caller is appending a new record where
-	 * we'll allocate the record number; we're not going to find a match by
-	 * definition, and we figure out the position when we do the work.
 	 */
 	cbt->ins_head = WT_COL_APPEND(page);
-	if (recno == UINT64_MAX)
-		cbt->ins = NULL;
-	else
-		cbt->ins = __col_insert_search(
-		    cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno);
-	if (cbt->ins == NULL)
+	if ((cbt->ins = __col_insert_search(
+	    cbt->ins_head, cbt->ins_stack, cbt->next_stack, recno)) == NULL)
 		cbt->compare = -1;
 	else {
 		cbt->recno = WT_INSERT_RECNO(cbt->ins);
@@ -212,14 +294,5 @@ past_end:
 		else
 			cbt->compare = -1;
 	}
-
-	/*
-	 * Note if the record is past the maximum record in the tree, the cursor
-	 * search functions need to know for fixed-length column-stores because
-	 * appended records implicitly create any skipped records, and cursor
-	 * search functions have to handle that case.
-	 */
-	if (cbt->compare == -1)
-		F_SET(cbt, WT_CBT_MAX_RECORD);
 	return (0);
 }
diff --git a/src/btree/row_srch.c b/src/btree/row_srch.c
index d2d8a4640ca..e98d30152ab 100644
--- a/src/btree/row_srch.c
+++ b/src/btree/row_srch.c
@@ -132,6 +132,76 @@ __wt_search_insert(
 }
 
 /*
+ * __check_leaf_key_range --
+ *	Check the search key is in the leaf page's key range.
+ */
+static inline int
+__check_leaf_key_range(WT_SESSION_IMPL *session,
+    WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt)
+{
+	WT_BTREE *btree;
+	WT_COLLATOR *collator;
+	WT_ITEM *item;
+	WT_PAGE_INDEX *pindex;
+	uint32_t indx;
+	int cmp;
+
+	btree = S2BT(session);
+	collator = btree->collator;
+	item = cbt->tmp;
+
+	/*
+	 * There are reasons we can't do the fast checks, and we continue with
+	 * the leaf page search in those cases, only skipping the complete leaf
+	 * page search if we know it's not going to work.
+	 */
+	cbt->compare = 0;
+
+	/*
+	 * First, confirm we have the right parent page-index slot, and quit if
+	 * we don't. We don't search for the correct slot, that would make this
+	 * cheap test expensive.
+	 */
+	WT_INTL_INDEX_GET(session, leaf->home, pindex);
+	indx = leaf->pindex_hint;
+	if (indx >= pindex->entries || pindex->index[indx] != leaf)
+		return (0);
+
+	/*
+	 * Check if the search key is smaller than the parent's starting key for
+	 * this page.
+	 *
+	 * We can't compare against slot 0 on a row-store internal page because
+	 * reconciliation doesn't build it, it may not be a valid key.
+	 */
+	if (indx != 0) {
+		__wt_ref_key(leaf->home, leaf, &item->data, &item->size);
+		WT_RET(__wt_compare(session, collator, srch_key, item, &cmp));
+		if (cmp < 0) {
+			cbt->compare = 1;	/* page keys > search key */
+			return (0);
+		}
+	}
+
+	/*
+	 * Check if the search key is greater than or equal to the starting key
+	 * for the parent's next page.
+	 */
+	++indx;
+	if (indx < pindex->entries) {
+		__wt_ref_key(
+		    leaf->home, pindex->index[indx], &item->data, &item->size);
+		WT_RET(__wt_compare(session, collator, srch_key, item, &cmp));
+		if (cmp >= 0) {
+			cbt->compare = -1;	/* page keys < search key */
+			return (0);
+		}
+	}
+
+	return (0);
+}
+
+/*
  * __wt_row_search --
  *	Search a row-store tree for a specific key.
  */
@@ -179,8 +249,29 @@ __wt_row_search(WT_SESSION_IMPL *session,
 	append_check = insert && cbt->append_tree;
 	descend_right = true;
 
-	/* We may only be searching a single leaf page, not the full tree. */
+	/*
+	 * We may be searching only a single leaf page, not the full tree. In
+	 * the normal case where the page links to a parent, check the page's
+	 * parent keys before doing the full search, it's faster when the
+	 * cursor is being re-positioned. (One case where the page doesn't
+	 * have a parent is if it is being re-instantiated in memory as part
+	 * of a split).
+	 */
 	if (leaf != NULL) {
+		if (leaf->home != NULL) {
+			WT_RET(__check_leaf_key_range(
+			    session, srch_key, leaf, cbt));
+			if (cbt->compare != 0) {
+				/*
+				 * !!!
+				 * WT_CURSOR.search_near uses the slot value to
+				 * decide if there was an on-page match.
+				 */
+				cbt->slot = 0;
+				return (0);
+			}
+		}
+
 		current = leaf;
 		goto leaf_only;
 	}
@@ -196,15 +287,6 @@ restart_page:	page = current->page;
 
 		WT_INTL_INDEX_GET(session, page, pindex);
 
-		/*
-		 * Fast-path internal pages with one child, a common case for
-		 * the root page in new trees.
-		 */
-		if (pindex->entries == 1) {
-			descent = pindex->index[0];
-			goto descend;
-		}
-
 		/* Fast-path appends. */
 		if (append_check) {
 			descent = pindex->index[pindex->entries - 1];
@@ -536,19 +618,163 @@ err:	/*
 }
 
 /*
- * __wt_row_random --
- *	Return a random key from a row-store tree.
+ * __wt_row_random_leaf --
+ *	Return a random key from a row-store leaf page.
  */
 int
-__wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+__wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
+{
+	WT_INSERT *ins, **start, **stop;
+	WT_INSERT_HEAD *ins_head;
+	WT_PAGE *page;
+	uint32_t choice, entries, i;
+	int level;
+
+	page = cbt->ref->page;
+
+	start = stop = NULL;		/* [-Wconditional-uninitialized] */
+	entries = 0;			/* [-Wconditional-uninitialized] */
+
+	/* If the page has disk-based entries, select from them. */
+	if (page->pg_row_entries != 0) {
+		cbt->compare = 0;
+		cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries;
+
+		/*
+		 * The real row-store search function builds the key, so we
+		 * have to as well.
+		 */
+		return (__wt_row_leaf_key(session,
+		    page, page->pg_row_d + cbt->slot, cbt->tmp, false));
+	}
+
+	/*
+	 * If the tree is new (and not empty), it might have a large insert
+	 * list.
+	 */
+	F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
+	if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
+		return (WT_NOTFOUND);
+
+	/*
+	 * Walk down the list until we find a level with at least 50 entries,
+	 * that's where we'll start rolling random numbers. The value 50 is
+	 * used to ignore levels with only a few entries, that is, levels which
+	 * are potentially badly skewed.
+	 */
+	for (ins_head = cbt->ins_head,
+	    level = WT_SKIP_MAXDEPTH - 1; level >= 0; --level) {
+		start = &ins_head->head[level];
+		for (entries = 0, stop = start;
+		    *stop != NULL; stop = &(*stop)->next[level])
+			++entries;
+
+		if (entries > 50)
+			break;
+	}
+
+	/*
+	 * If it's a tiny list and we went all the way to level 0, correct the
+	 * level; entries is correctly set.
+	 */
+	if (level < 0)
+		level = 0;
+
+	/*
+	 * Step down the skip list levels, selecting a random chunk of the name
+	 * space at each level.
+	 */
+	while (level > 0) {
+		/*
+		 * There are (entries) or (entries + 1) chunks of the name space
+		 * considered at each level. They are: between start and the 1st
+		 * element, between the 1st and 2nd elements, and so on to the
+		 * last chunk which is the name space after the stop element on
+		 * the current level. This last chunk of name space may or may
+		 * not be there: as we descend the levels of the skip list, this
+		 * chunk may appear, depending if the next level down has
+		 * entries logically after the stop point in the current level.
+		 * We can't ignore those entries: because of the algorithm used
+		 * to determine the depth of a skiplist, there may be a large
+		 * number of entries "revealed" by descending a level.
+		 *
+		 * If the next level down has more items after the current stop
+		 * point, there are (entries + 1) chunks to consider, else there
+		 * are (entries) chunks.
+		 */
+		if (*(stop - 1) == NULL)
+			choice = __wt_random(&session->rnd) % entries;
+		else
+			choice = __wt_random(&session->rnd) % (entries + 1);
+
+		if (choice == entries) {
+			/*
+			 * We selected the name space after the stop element on
+			 * this level. Set the start point to the current stop
+			 * point, descend a level and move the stop element to
+			 * the end of the list, that is, the end of the newly
+			 * discovered name space, counting entries as we go.
+			 */
+			start = stop;
+			--start;
+			--level;
+			for (entries = 0, stop = start;
+			    *stop != NULL; stop = &(*stop)->next[level])
+				++entries;
+		} else {
+			/*
+			 * We selected another name space on the level. Move the
+			 * start pointer the selected number of entries forward
+			 * to the start of the selected chunk (if the selected
+			 * number is 0, start won't move). Set the stop pointer
+			 * to the next element in the list and drop both start
+			 * and stop down a level.
+			 */
+			for (i = 0; i < choice; ++i)
+				start = &(*start)->next[level];
+			stop = &(*start)->next[level];
+
+			--start;
+			--stop;
+			--level;
+
+			/* Count the entries in the selected name space. */
+			for (entries = 0,
+			    ins = *start; ins != *stop; ins = ins->next[level])
+				++entries;
+		}
+	}
+
+	/*
+	 * When we reach the bottom level, entries will already be set. Select
+	 * a random entry from the name space and return it.
+	 *
+	 * It should be impossible for the entries count to be 0 at this point,
+	 * but check for it out of paranoia and to quiet static testing tools.
+	 */
+	if (entries > 0)
+		entries = __wt_random(&session->rnd) % entries;
+	for (ins = *start; entries > 0; --entries)
+		ins = ins->next[0];
+
+	cbt->ins = ins;
+	cbt->compare = 0;
+
+	return (0);
+}
+
+/*
+ * __wt_row_random_descent --
+ *	Find a random leaf page in a row-store tree.
+ */
+int
+__wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt)
 {
 	WT_BTREE *btree;
 	WT_DECL_RET;
-	WT_INSERT *p, *t;
 	WT_PAGE *page;
 	WT_PAGE_INDEX *pindex;
 	WT_REF *current, *descent;
-	uint32_t cnt;
 
 	btree = S2BT(session);
 
@@ -585,43 +811,6 @@ restart_root:
 		return (ret);
 	}
 
-	if (page->pg_row_entries != 0) {
-		cbt->ref = current;
-		cbt->compare = 0;
-		cbt->slot = __wt_random(&session->rnd) % page->pg_row_entries;
-
-		/*
-		 * The real row-store search function builds the key, so we
-		 * have to as well.
-		 */
-		return (__wt_row_leaf_key(session,
-		    page, page->pg_row_d + cbt->slot, cbt->tmp, false));
-	}
-
-	/*
-	 * If the tree is new (and not empty), it might have a large insert
-	 * list. Count how many records are in the list.
-	 */
-	F_SET(cbt, WT_CBT_SEARCH_SMALLEST);
-	if ((cbt->ins_head = WT_ROW_INSERT_SMALLEST(page)) == NULL)
-		WT_ERR(WT_NOTFOUND);
-	for (cnt = 1, p = WT_SKIP_FIRST(cbt->ins_head);; ++cnt)
-		if ((p = WT_SKIP_NEXT(p)) == NULL)
-			break;
-
-	/*
-	 * Select a random number from 0 to (N - 1), return that record.
-	 */
-	cnt = __wt_random(&session->rnd) % cnt;
-	for (p = t = WT_SKIP_FIRST(cbt->ins_head);; t = p)
-		if (cnt-- == 0 || (p = WT_SKIP_NEXT(p)) == NULL)
-			break;
 	cbt->ref = current;
-	cbt->compare = 0;
-	cbt->ins = t;
-
 	return (0);
-
-err:	WT_TRET(__wt_page_release(session, current, 0));
-	return (ret);
 }
diff --git a/src/cache/cache_las.c b/src/cache/cache_las.c
index d3a0265c13a..e943f01236e 100644
--- a/src/cache/cache_las.c
+++ b/src/cache/cache_las.c
@@ -18,6 +18,7 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
 	WT_CONNECTION_IMPL *conn;
 	WT_CONNECTION_STATS **cstats;
 	WT_DSRC_STATS **dstats;
+	int64_t v;
 
 	conn = S2C(session);
 
@@ -37,10 +38,10 @@ __wt_las_stats_update(WT_SESSION_IMPL *session)
 	dstats = ((WT_CURSOR_BTREE *)
 	    conn->las_session->las_cursor)->btree->dhandle->stats;
 
-	WT_STAT_SET(session, cstats,
-	    cache_lookaside_insert, WT_STAT_READ(dstats, cursor_insert));
-	WT_STAT_SET(session, cstats,
-	    cache_lookaside_remove, WT_STAT_READ(dstats, cursor_remove));
+	v = WT_STAT_READ(dstats, cursor_insert);
+	WT_STAT_SET(session, cstats, cache_lookaside_insert, v);
+	v = WT_STAT_READ(dstats, cursor_remove);
+	WT_STAT_SET(session, cstats, cache_lookaside_remove, v);
 }
 
 /*
diff --git a/src/config/config_def.c b/src/config/config_def.c
index d79ce6853e6..9d12e953498 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -323,6 +323,7 @@ static const WT_CONFIG_CHECK confchk_WT_SESSION_open_cursor[] = {
 	    NULL, "choices=[\"hex\",\"json\",\"print\"]",
 	    NULL, 0 },
 	{ "next_random", "boolean", NULL, NULL, NULL, 0 },
+	{ "next_random_sample_size", "string", NULL, NULL, NULL, 0 },
 	{ "overwrite", "boolean", NULL, NULL, NULL, 0 },
 	{ "raw", "boolean", NULL, NULL, NULL, 0 },
 	{ "readonly", "boolean", NULL, NULL, NULL, 0 },
@@ -920,9 +921,10 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  NULL, 0
 	},
 	{ "WT_SESSION.open_cursor",
-	  "append=0,bulk=0,checkpoint=,dump=,next_random=0,overwrite=,raw=0"
-	  ",readonly=0,skip_sort_check=0,statistics=,target=",
-	  confchk_WT_SESSION_open_cursor, 11
+	  "append=0,bulk=0,checkpoint=,dump=,next_random=0,"
+	  "next_random_sample_size=0,overwrite=,raw=0,readonly=0,"
+	  "skip_sort_check=0,statistics=,target=",
+	  confchk_WT_SESSION_open_cursor, 12
 	},
 	{ "WT_SESSION.reconfigure",
 	  "isolation=read-committed",
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index bd14e1bf4fd..ee9935828e2 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -2003,6 +2003,9 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
 	WT_ERR(__wt_sweep_config(session, cfg));
 	WT_ERR(__wt_verbose_config(session, cfg));
 
+	/* Initialize the OS page size for mmap */
+	conn->page_size = __wt_get_vm_pagesize();
+
 	/* Now that we know if verbose is configured, output the version. */
 	WT_ERR(__wt_verbose(
 	    session, WT_VERB_VERSION, "%s", WIREDTIGER_VERSION_STRING));
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index c6d5b535b86..0821238fbd7 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -147,12 +147,14 @@ __conn_dhandle_mark_dead(WT_SESSION_IMPL *session)
 int
 __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
 {
+	WT_BM *bm;
 	WT_BTREE *btree;
 	WT_DATA_HANDLE *dhandle;
 	WT_DECL_RET;
 	bool marked_dead, no_schema_lock;
 
 	btree = S2BT(session);
+	bm = btree->bm;
 	dhandle = session->dhandle;
 	marked_dead = false;
 
@@ -191,7 +193,7 @@ __wt_conn_btree_sync_and_close(WT_SESSION_IMPL *session, bool final, bool force)
 	 */
 	if (!F_ISSET(btree,
 	    WT_BTREE_SALVAGE | WT_BTREE_UPGRADE | WT_BTREE_VERIFY)) {
-		if (force && (btree->bm == NULL || btree->bm->map == NULL))  {
+		if (force && (bm == NULL || !bm->is_mapped(bm, session))) {
 			WT_ERR(__conn_dhandle_mark_dead(session));
 			marked_dead = true;
 		}
diff --git a/src/cursor/cur_file.c b/src/cursor/cur_file.c
index 63f77248ca8..b955b292292 100644
--- a/src/cursor/cur_file.c
+++ b/src/cursor/cur_file.c
@@ -455,14 +455,24 @@ __wt_curfile_create(WT_SESSION_IMPL *session,
 	}
 
 	/*
-	 * random_retrieval
-	 * Random retrieval cursors only support next, reset and close.
+	 * Random retrieval, row-store only.
+	 * Random retrieval cursors support a limited set of methods.
 	 */
 	WT_ERR(__wt_config_gets_def(session, cfg, "next_random", 0, &cval));
 	if (cval.val != 0) {
+		if (WT_CURSOR_RECNO(cursor))
+			WT_ERR_MSG(session, ENOTSUP,
+			    "next_random configuration not supported for "
+			    "column-store objects");
+
 		__wt_cursor_set_notsup(cursor);
 		cursor->next = __curfile_next_random;
 		cursor->reset = __curfile_reset;
+
+		WT_ERR(__wt_config_gets_def(
+		    session, cfg, "next_random_sample_size", 0, &cval));
+		if (cval.val != 0)
+			cbt->next_random_sample_size = (u_int)cval.val;
 	}
 
 	/* Underlying btree initialization. */
diff --git a/src/cursor/cur_json.c b/src/cursor/cur_json.c
index 8f858a5012f..3270be07de4 100644
--- a/src/cursor/cur_json.c
+++ b/src/cursor/cur_json.c
@@ -313,7 +313,6 @@ size_t
 __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode)
 {
 	char abbrev;
-	u_char h;
 
 	if (!force_unicode) {
 		if (isprint(ch) && ch != '\\' && ch != '"') {
@@ -354,16 +353,8 @@ __wt_json_unpack_char(char ch, u_char *buf, size_t bufsz, bool force_unicode)
 		*buf++ = 'u';
 		*buf++ = '0';
 		*buf++ = '0';
-		h = (((u_char)ch) >> 4) & 0xF;
-		if (h >= 10)
-			*buf++ = 'A' + (h - 10);
-		else
-			*buf++ = '0' + h;
-		h = ((u_char)ch) & 0xF;
-		if (h >= 10)
-			*buf++ = 'A' + (h - 10);
-		else
-			*buf++ = '0' + h;
+		*buf++ = __wt_hex[(ch & 0xf0) >> 4];
+		*buf++ = __wt_hex[ch & 0x0f];
 	}
 	return (6);
 }
diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c
index e1d5b8eb91a..652dec364fb 100644
--- a/src/cursor/cur_stat.c
+++ b/src/cursor/cur_stat.c
@@ -384,6 +384,7 @@ __curstat_file_init(WT_SESSION_IMPL *session,
 {
 	WT_DATA_HANDLE *dhandle;
 	WT_DECL_RET;
+	wt_off_t size;
 	const char *filename;
 
 	/*
@@ -395,8 +396,8 @@ __curstat_file_init(WT_SESSION_IMPL *session,
 		if (!WT_PREFIX_SKIP(filename, "file:"))
 			return (EINVAL);
 		__wt_stat_dsrc_init_single(&cst->u.dsrc_stats);
-		WT_RET(__wt_block_manager_size(
-		    session, filename, &cst->u.dsrc_stats));
+		WT_RET(__wt_block_manager_named_size(session, filename, &size));
+		cst->u.dsrc_stats.block_size = size;
 		__wt_curstat_dsrc_final(cst);
 		return (0);
 	}
@@ -662,7 +663,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
 
 	/*
 	 * We return the statistics field's offset as the key, and a string
-	 * description, a string value,  and a uint64_t value as the value
+	 * description, a string value, and a uint64_t value as the value
 	 * columns.
 	 */
 	cursor->key_format = "i";
diff --git a/src/cursor/cur_std.c b/src/cursor/cur_std.c
index f92426355ef..da38988b6c2 100644
--- a/src/cursor/cur_std.c
+++ b/src/cursor/cur_std.c
@@ -40,11 +40,11 @@ void
 __wt_cursor_set_notsup(WT_CURSOR *cursor)
 {
 	/*
-	 * Set all of the cursor methods (except for close and reset), to fail.
-	 * Close is unchanged so the cursor can be discarded, reset defaults to
+	 * Set cursor methods other than close, reconfigure and reset, to fail.
+	 * Close is unchanged so the cursor can be discarded; reset is set to
 	 * a no-op because session transactional operations reset all of the
-	 * cursors in a session, and random cursors shouldn't block transactions
-	 * or checkpoints.
+	 * cursors in a session. Reconfigure is left open in case it's possible
+	 * in the future to change these configurations.
 	 */
 	cursor->compare =
 	    (int (*)(WT_CURSOR *, WT_CURSOR *, int *))__wt_cursor_notsup;
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index dca72a16ee5..e746ccd5871 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -968,8 +968,11 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
 	WT_ERR(__wt_strdup(session, tmp->data, &ctable->cfg[1]));
 
 	if (0) {
-err:		WT_TRET(__curtable_close(cursor));
-		*cursorp = NULL;
+err:		if (*cursorp != NULL) {
+			WT_TRET(__wt_cursor_close(*cursorp));
+			*cursorp = NULL;
+		}
+		WT_TRET(__curtable_close(cursor));
 	}
 
 	__wt_scr_free(session, &tmp);
diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox
index 745c5051be3..e2b376d5e3f 100644
--- a/src/docs/command-line.dox
+++ b/src/docs/command-line.dox
@@ -32,7 +32,7 @@ on success and non-zero on error.
 
 The \c wt tool supports several commands.  If configured in the underlying
 database, some commands will run recovery when opening the database.  If
-the user wants to force recovery on any command, use the \c -r option.
+the user wants to force recovery on any command, use the \c -R option.
 In general, commands that modify the database or tables will run recovery
 by default and commands that only read data will not run recovery.
 
@@ -46,7 +46,7 @@ opened as a WiredTiger database.  See @ref backup for more information,
 and @ref file_permissions for specifics on the copied file permissions.
 
 @subsection util_backup_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] backup [-t uri] directory</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] backup [-t uri] directory</code>
 
 @subsection util_backup_options Options
 The following are command-specific options for the \c backup command:
@@ -64,7 +64,7 @@ The \c compact command attempts to rewrite the specified table or file
 to consume less disk space.
 
 @subsection util_compact_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] compact uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] compact uri</code>
 
 @subsection util_compact_options Options
 The \c compact command has no command-specific options.
@@ -78,7 +78,7 @@ configuration.  It is equivalent to a call to WT_SESSION::create with
 the specified string arguments.
 
 @subsection util_create_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] create [-c config] uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] create [-c config] uri</code>
 
 @subsection util_create_options Options
 The following are command-specific options for the \c create command:
@@ -94,7 +94,7 @@ The \c drop command drops the specified \c uri.  It is equivalent to a
 call to WT_SESSION::drop with the "force" configuration argument.
 
 @subsection util_drop_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] drop uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] drop uri</code>
 
 @subsection util_drop_options Options
 The \c drop command has no command-specific options.
@@ -109,7 +109,7 @@ which can be re-loaded into a new table using the \c load command.
 See @subpage dump_formats for details of the dump file formats.
 
 @subsection util_dump_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] dump [-jrx] [-c checkpoint] [-f output] uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] dump [-jrx] [-c checkpoint] [-f output] uri</code>
 
 @subsection util_dump_options Options
 The following are command-specific options for the \c dump command:
@@ -143,7 +143,7 @@ the database.  If a URI is specified as an argument, only information about
 that data source is printed.
 
 @subsection util_list_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] list [-cv] [uri]</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] list [-cv] [uri]</code>
 
 @subsection util_list_options Options
 The following are command-specific options for the \c list command:
@@ -170,7 +170,7 @@ table will be overwritten by the new data (use the \c -n option to
 make an attempt to overwrite existing data return an error).
 
 @subsection util_load_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] load [-ajn] [-f input] [-r name] [uri configuration ...]</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] load [-ajn] [-f input] [-r name] [uri configuration ...]</code>
 
 @subsection util_load_options Options
 The following are command-specific options for the \c load command:
@@ -244,7 +244,7 @@ row-store table or file already exists, data in the table or file will
 be overwritten by the new data.
 
 @subsection util_loadtext_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input]</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] loadtext [-f input]</code>
 
 @subsection util_loadtext_options Options
 The following are command-specific options for the \c loadtext command:
@@ -260,7 +260,7 @@ Display the database log.
 The \c printlog command outputs the database log.
 
 @subsection util_printlog_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] printlog [-p] [-f output]</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] printlog [-x] [-f output]</code>
 
 @subsection util_printlog_options Options
 The following are command-specific options for the \c printlog command:
@@ -269,8 +269,9 @@ The following are command-specific options for the \c printlog command:
 By default, the \c printlog command output is written to the standard
 output; the \c -f option re-directs the output to the specified file.
 
-@par <code>-p</code>
-Display the log in a printable format.
+@par <code>-x</code>
+Keys and value items in the log are printed in hex format in addition
+to the default string format.
 
 <hr>
 @section util_read wt read
@@ -283,7 +284,7 @@ with string or record number keys and string values.
 The \c read command exits non-zero if a specified record is not found.
 
 @subsection util_read_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] read uri key ...</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] read uri key ...</code>
 
 @subsection util_read_options Options
 The \c read command has no command-specific options.
@@ -295,7 +296,7 @@ Rename a table or file.
 The \c rename command renames the specified table or file.
 
 @subsection util_rename_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] rename uri name</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] rename uri name</code>
 
 @subsection util_rename_options Options
 The \c rename command has no command-specific options.
@@ -309,7 +310,7 @@ data that cannot be recovered.  Underlying files are re-written in
 place, overwriting the original file contents.
 
 @subsection util_salvage_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] salvage [-F force] uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] salvage [-F force] uri</code>
 
 @subsection util_salvage_options Options
 The following are command-specific options for the \c salvage command:
@@ -327,7 +328,7 @@ The \c stat command outputs run-time statistics for the WiredTiger
 engine, or, if specified, for the URI on the command-line.
 
 @subsection util_stat_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] stat [-f] [uri]</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] stat [-f] [uri]</code>
 
 @subsection util_stat_options Options
 The following are command-specific options for the \c stat command:
@@ -345,7 +346,7 @@ success if the data source is up-to-date, and failure if the data source
 cannot be upgraded.
 
 @subsection util_upgrade_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] upgrade uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] upgrade uri</code>
 
 @subsection util_upgrade_options Options
 The \c upgrade command has no command-specific options.
@@ -359,7 +360,7 @@ success if the data source is correct, and failure if the data source is
 corrupted.
 
 @subsection util_verify_synopsis Synopsis
-<code>wt [-rVv] [-C config] [-E secretkey ] [-h directory] verify uri</code>
+<code>wt [-RVv] [-C config] [-E secretkey ] [-h directory] verify uri</code>
 
 @subsection util_verify_options Options
 The \c verify command has no command-specific options.
@@ -381,9 +382,9 @@ Attempting to overwrite an already existing record will fail.
 
 @subsection util_write_synopsis Synopsis
 <code>
-wt [-rVv] [-C config] [-E secretkey ] [-h directory] write -a uri value ...
+wt [-RVv] [-C config] [-E secretkey ] [-h directory] write -a uri value ...
 <br>
-wt [-rVv] [-C config] [-E secretkey ] [-h directory] write [-o] uri key value ...
+wt [-RVv] [-C config] [-E secretkey ] [-h directory] write [-o] uri key value ...
 </code>
 
 @subsection util_write_options Options
diff --git a/src/docs/cursor-random.dox b/src/docs/cursor-random.dox
index 446981e3192..a0a3212be6d 100644
--- a/src/docs/cursor-random.dox
+++ b/src/docs/cursor-random.dox
@@ -2,6 +2,27 @@
 
 The \c next_random configuration to the WT_SESSION::open_cursor method
 configures the cursor to return a pseudo-random record from a row-store
-object (the configuration is not supported on other types of objects).
+object (the \c next_random configuration is not supported on other types
+of objects).
+
+Applications should use the WT_CURSOR::next method to retrieve records
+from the object, most other cursor methods are not supported. For
+example, it's not possible to update using a cursor configured for
+random retrieval.
+
+By default, each returned record is pseudo-randomly selected from the
+underlying object as a whole. That can lead to skewed results when the
+underlying tree structure is unbalanced or records are not uniformly
+distributed. In such cases, the \c next_random_sample_size configuration
+can also be specified. Setting \c next_random_sample_size configures the
+number of samples the application expects to take using the cursor. A
+cursor configured using \c next_random_sample_size divides the object
+into \c next_random_sample_size pieces, and each subsequent retrieval
+returns a record from the next one of those pieces.
+
+For example, setting \c next_random_sample_percent to \c 10 would cause
+the cursor to sequentially return records from each tenth part of the
+object. Setting \c next_random_sample_percent to \c 1000 would cause the
+cursor to sequentially return records from each .1% of the object.
 
  */
diff --git a/src/docs/license.dox b/src/docs/license.dox
index f34ebad19a7..febced2c6af 100644
--- a/src/docs/license.dox
+++ b/src/docs/license.dox
@@ -13,6 +13,19 @@ WARRANTY; without even the implied warranty of MERCHANTABILITY or
 FITNESS FOR A PARTICULAR PURPOSE.  See the
 <b>GNU General Public License</b></a> for details.
 
+Additionally, portions of the WiredTiger distribution are distributed
+under the terms of the
+<a href="http://www.opensource.org/licenses/BSD-3-Clause">
+BSD-3-Clause License</a>.  These files have
+<a href="http://www.opensource.org/licenses/BSD-3-Clause">
+BSD-3-Clause License</a>
+copyright notices, and may be freely used and redistributed under the
+terms of that notice.
+
+Additionally, portions of the WiredTiger distribution are public domain
+software. Public domain files have notices releasing the software into
+the public domain and may be freely used and redistributed.
+
 For a license to use the WiredTiger software under conditions other than
 those described above, or for technical support for this software, please
 contact MongoDB, Inc. at
@@ -28,7 +41,7 @@ of the WiredTiger library should comply with these copyrights.
 @hrow{Distribution Files, Copyright Holder, License}
 @row{\c src/include/bitstring.i, University of California\, Berkeley, <a href="http://www.opensource.org/licenses/BSD-3-Clause">BSD-3-Clause License</a>}
 @row{\c src/include/queue.h, University of California\, Berkeley, <a href="http://www.opensource.org/licenses/BSD-3-Clause">BSD-3-Clause License</a>}
-@row{\c src/os_posix/getopt.c, University of California\, Berkeley, <a href="http://www.opensource.org/licenses/BSD-3-Clause">BSD-3-Clause License</a>}
+@row{\c src/os_posix/os_getopt.c, University of California\, Berkeley, <a href="http://www.opensource.org/licenses/BSD-3-Clause">BSD-3-Clause License</a>}
 @row{\c src/support/hash_city.c, Google\, Inc., <a href="http://www.opensource.org/licenses/MIT">The MIT License</a>}
 @row{\c src/support/hash_fnv.c, Authors, Public Domain}
 </table>
@@ -63,10 +76,4 @@ selected portions of the WiredTiger sources, please review the copyright
 notices and LICENSE files included in the WiredTiger distribution for
 the terms and conditions of such redistribution.
 
-@section license_public_domain Public domain software
-
-Many portions of the WiredTiger distribution are public domain software.
-Public domain files have notices releasing the software into the public
-domain and may be freely used and redistributed.
-
 */
diff --git a/src/docs/wtperf.dox b/src/docs/wtperf.dox
index f3bdd64cfda..339bf740265 100644
--- a/src/docs/wtperf.dox
+++ b/src/docs/wtperf.dox
@@ -206,6 +206,8 @@ if non zero choose a value from within this range as the key for
 insert operations
 @par random_value (boolean, default=false)
 generate random content for the value
+@par read_range (unsigned int, default=0)
+scan a range of keys after each search
 @par reopen_connection (boolean, default=true)
 close and reopen the connection between populate and workload phases
 @par report_interval (unsigned int, default=2)
diff --git a/src/evict/evict_file.c b/src/evict/evict_file.c
index 2b2117ad9fd..c5f6ae3d4d1 100644
--- a/src/evict/evict_file.c
+++ b/src/evict/evict_file.c
@@ -31,8 +31,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 
 	/* Walk the tree, discarding pages. */
 	next_ref = NULL;
-	WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
-	    WT_READ_CACHE | WT_READ_NO_EVICT));
+	WT_ERR(__wt_tree_walk(
+	    session, &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
 	while ((ref = next_ref) != NULL) {
 		page = ref->page;
 
@@ -68,8 +68,8 @@ __wt_evict_file(WT_SESSION_IMPL *session, WT_CACHE_OP syncop)
 		 * the reconciliation, the next walk call could miss a page in
 		 * the tree.
 		 */
-		WT_ERR(__wt_tree_walk(session, &next_ref, NULL,
-		    WT_READ_CACHE | WT_READ_NO_EVICT));
+		WT_ERR(__wt_tree_walk(session,
+		    &next_ref, WT_READ_CACHE | WT_READ_NO_EVICT));
 
 		switch (syncop) {
 		case WT_SYNC_CLOSE:
diff --git a/src/evict/evict_lru.c b/src/evict/evict_lru.c
index a8979fa6231..0e2b33c35ec 100644
--- a/src/evict/evict_lru.c
+++ b/src/evict/evict_lru.c
@@ -1229,7 +1229,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
 	 */
 	for (evict = start, pages_walked = 0;
 	    evict < end && !enough && (ret == 0 || ret == WT_NOTFOUND);
-	    ret = __wt_tree_walk(
+	    ret = __wt_tree_walk_count(
 	    session, &btree->evict_ref, &pages_walked, walk_flags)) {
 		enough = pages_walked > cache->evict_max_refs_per_file;
 		if ((ref = btree->evict_ref) == NULL) {
@@ -1336,8 +1336,9 @@ fast:		/* If the page can't be evicted, give up. */
 		if (__wt_ref_is_root(ref))
 			WT_RET(__evict_clear_walk(session));
 		else if (ref->page->read_gen == WT_READGEN_OLDEST)
-			WT_RET_NOTFOUND_OK(__wt_tree_walk(session,
-			    &btree->evict_ref, &pages_walked, walk_flags));
+			WT_RET_NOTFOUND_OK(__wt_tree_walk_count(
+			    session, &btree->evict_ref,
+			    &pages_walked, walk_flags));
 	}
 
 	WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked);
@@ -1617,7 +1618,7 @@ __wt_cache_dump(WT_SESSION_IMPL *session, const char *ofile)
 
 		next_walk = NULL;
 		session->dhandle = dhandle;
-		while (__wt_tree_walk(session, &next_walk, NULL,
+		while (__wt_tree_walk(session, &next_walk,
 		    WT_READ_CACHE | WT_READ_NO_EVICT | WT_READ_NO_WAIT) == 0 &&
 		    next_walk != NULL) {
 			page = next_walk->page;
diff --git a/src/include/block.h b/src/include/block.h
index 4bff6c82783..804eec24874 100644
--- a/src/include/block.h
+++ b/src/include/block.h
@@ -173,6 +173,7 @@ struct __wt_bm {
 	int (*compact_skip)(WT_BM *, WT_SESSION_IMPL *, bool *);
 	int (*compact_start)(WT_BM *, WT_SESSION_IMPL *);
 	int (*free)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
+	bool (*is_mapped)(WT_BM *, WT_SESSION_IMPL *);
 	int (*preload)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
 	int (*read)
 	    (WT_BM *, WT_SESSION_IMPL *, WT_ITEM *, const uint8_t *, size_t);
@@ -182,6 +183,7 @@ struct __wt_bm {
 	int (*salvage_start)(WT_BM *, WT_SESSION_IMPL *);
 	int (*salvage_valid)
 	    (WT_BM *, WT_SESSION_IMPL *, uint8_t *, size_t, bool);
+	int (*size)(WT_BM *, WT_SESSION_IMPL *, wt_off_t *);
 	int (*stat)(WT_BM *, WT_SESSION_IMPL *, WT_DSRC_STATS *stats);
 	int (*sync)(WT_BM *, WT_SESSION_IMPL *, bool);
 	int (*verify_addr)(WT_BM *, WT_SESSION_IMPL *, const uint8_t *, size_t);
@@ -244,7 +246,10 @@ struct __wt_block {
 	bool		ckpt_inprogress;/* Live checkpoint in progress */
 
 				/* Compaction support */
-	int	compact_pct_tenths;	/* Percent to compact */
+	int	 compact_pct_tenths;	/* Percent to compact */
+	uint64_t compact_pages_reviewed;/* Pages reviewed */
+	uint64_t compact_pages_skipped;	/* Pages skipped */
+	uint64_t compact_pages_written;	/* Pages rewritten */
 
 				/* Salvage support */
 	wt_off_t	slvg_off;	/* Salvage file offset */
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 6ee74c61a38..12a736c56a2 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -478,7 +478,7 @@ struct __wt_page {
 #define	pg_row_ins	u.row.ins
 #undef	pg_row_upd
 #define	pg_row_upd	u.row.upd
-#define	pg_row_entries	u.row.entries
+#undef	pg_row_entries
 #define	pg_row_entries	u.row.entries
 
 		/* Fixed-length column-store leaf page. */
@@ -1049,7 +1049,7 @@ struct __wt_insert_head {
 	uint64_t __prev_split_gen = (session)->split_gen;		\
 	if (__prev_split_gen == 0)					\
 		do {                                                    \
-			WT_PUBLISH((session)->split_gen,                \
+			WT_PUBLISH((session)->split_gen,		\
 			    S2C(session)->split_gen);                   \
 		} while ((session)->split_gen != S2C(session)->split_gen)
 
diff --git a/src/include/btree.i b/src/include/btree.i
index 3e2e7158e04..23e0dfea2cd 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -948,9 +948,8 @@ __wt_row_leaf_value(WT_PAGE *page, WT_ROW *rip, WT_ITEM *value)
  * __wt_ref_info --
  *	Return the addr/size and type triplet for a reference.
  */
-static inline int
-__wt_ref_info(WT_SESSION_IMPL *session,
-    WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
+static inline void
+__wt_ref_info(WT_REF *ref, const uint8_t **addrp, size_t *sizep, u_int *typep)
 {
 	WT_ADDR *addr;
 	WT_CELL_UNPACK *unpack, _unpack;
@@ -984,7 +983,9 @@ __wt_ref_info(WT_SESSION_IMPL *session,
 			case WT_ADDR_LEAF_NO:
 				*typep = WT_CELL_ADDR_LEAF_NO;
 				break;
-			WT_ILLEGAL_VALUE(session);
+			default:
+				*typep = 0;
+				break;
 			}
 	} else {
 		__wt_cell_unpack((WT_CELL *)addr, unpack);
@@ -993,7 +994,6 @@ __wt_ref_info(WT_SESSION_IMPL *session,
 		if (typep != NULL)
 			*typep = unpack->type;
 	}
-	return (0);
 }
 
 /*
@@ -1009,7 +1009,7 @@ __wt_ref_block_free(WT_SESSION_IMPL *session, WT_REF *ref)
 	if (ref->addr == NULL)
 		return (0);
 
-	WT_RET(__wt_ref_info(session, ref, &addr, &addr_size, NULL));
+	__wt_ref_info(ref, &addr, &addr_size, NULL);
 	WT_RET(__wt_btree_block_free(session, addr, addr_size));
 
 	/* Clear the address (so we don't free it twice). */
diff --git a/src/include/column.i b/src/include/column.i
index fc1f372b2a9..9388e07d0d8 100644
--- a/src/include/column.i
+++ b/src/include/column.i
@@ -176,6 +176,16 @@ __col_insert_search(WT_INSERT_HEAD *inshead,
 			continue;
 		}
 
+		/*
+		 * When no exact match is found, the search returns the smallest
+		 * key larger than the searched-for key, or the largest key
+		 * smaller than the searched-for key, if there is no larger key.
+		 * Our callers depend on that: specifically, the fixed-length
+		 * column store cursor code interprets returning a key smaller
+		 * than the searched-for key to mean the searched-for key is
+		 * larger than any key on the page. Don't change that behavior,
+		 * things will break.
+		 */
 		ins_recno = WT_INSERT_RECNO(ret_ins);
 		cmp = (recno == ins_recno) ? 0 : (recno < ins_recno) ? -1 : 1;
 
@@ -282,7 +292,17 @@ __col_var_search(WT_PAGE *page, uint64_t recno, uint64_t *start_recnop)
 		start_recno = repeat->recno + repeat->rle;
 	}
 
-	if (recno >= start_recno + (page->pg_var_entries - start_indx))
+	/*
+	 * !!!
+	 * The test could be written more simply as:
+	 *
+	 * 	(recno >= start_recno + (page->pg_var_entries - start_indx))
+	 *
+	 * It's split into two parts because the simpler test will overflow if
+	 * searching for large record numbers.
+	 */
+	if (recno >= start_recno &&
+	    recno - start_recno >= page->pg_var_entries - start_indx)
 		return (NULL);
 
 	return (page->pg_var_d + start_indx + (uint32_t)(recno - start_recno));
diff --git a/src/include/connection.h b/src/include/connection.h
index 2367f5a0035..1c1cb9b8987 100644
--- a/src/include/connection.h
+++ b/src/include/connection.h
@@ -415,6 +415,7 @@ struct __wt_connection_impl {
 	uint32_t direct_io;
 	uint32_t write_through;		/* FILE_FLAG_WRITE_THROUGH type flags */
 	bool	 mmap;			/* mmap configuration */
+	int page_size;			/* OS page size for mmap alignment */
 	uint32_t verbose;
 
 	uint32_t flags;
diff --git a/src/include/cursor.h b/src/include/cursor.h
index 43bbfcf5b05..4f232ce4fd0 100644
--- a/src/include/cursor.h
+++ b/src/include/cursor.h
@@ -104,6 +104,14 @@ struct __wt_cursor_btree {
 	uint64_t recno;			/* Record number */
 
 	/*
+	 * Next-random cursors can optionally be configured to step through a
+	 * percentage of the total leaf pages to their next value. Note the
+	 * configured value and the calculated number of leaf pages to skip.
+	 */
+	uint64_t next_random_leaf_skip;
+	u_int	 next_random_sample_size;
+
+	/*
 	 * The search function sets compare to:
 	 *	< 1 if the found key is less than the specified key
 	 *	  0 if the found key matches the specified key
@@ -192,18 +200,23 @@ struct __wt_cursor_btree {
 
 	uint8_t	append_tree;		/* Cursor appended to the tree */
 
+#ifdef HAVE_DIAGNOSTIC
+	/* Check that cursor next/prev never returns keys out-of-order. */
+	WT_ITEM *lastkey, _lastkey;
+	uint64_t lastrecno;
+#endif
+
 #define	WT_CBT_ACTIVE		0x01	/* Active in the tree */
 #define	WT_CBT_ITERATE_APPEND	0x02	/* Col-store: iterating append list */
 #define	WT_CBT_ITERATE_NEXT	0x04	/* Next iteration configuration */
 #define	WT_CBT_ITERATE_PREV	0x08	/* Prev iteration configuration */
-#define	WT_CBT_MAX_RECORD	0x10	/* Col-store: past end-of-table */
-#define	WT_CBT_NO_TXN   	0x20	/* Non-transactional cursor
+#define	WT_CBT_NO_TXN   	0x10	/* Non-transactional cursor
 					   (e.g. on a checkpoint) */
-#define	WT_CBT_SEARCH_SMALLEST	0x40	/* Row-store: small-key insert list */
+#define	WT_CBT_SEARCH_SMALLEST	0x20	/* Row-store: small-key insert list */
 
 #define	WT_CBT_POSITION_MASK		/* Flags associated with position */ \
 	(WT_CBT_ITERATE_APPEND | WT_CBT_ITERATE_NEXT | WT_CBT_ITERATE_PREV | \
-	WT_CBT_MAX_RECORD | WT_CBT_SEARCH_SMALLEST)
+	WT_CBT_SEARCH_SMALLEST)
 
 	uint8_t flags;
 };
diff --git a/src/include/extern.h b/src/include/extern.h
index bd32e067a58..7338f8dae3b 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -51,7 +51,8 @@ extern int __wt_block_open(WT_SESSION_IMPL *session, const char *filename, const
 extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block);
 extern int __wt_desc_init(WT_SESSION_IMPL *session, WT_FH *fh, uint32_t allocsize);
 extern void __wt_block_stat(WT_SESSION_IMPL *session, WT_BLOCK *block, WT_DSRC_STATS *stats);
-extern int __wt_block_manager_size( WT_SESSION_IMPL *session, const char *filename, WT_DSRC_STATS *stats);
+extern int __wt_block_manager_size(WT_BM *bm, WT_SESSION_IMPL *session, wt_off_t *sizep);
+extern int __wt_block_manager_named_size( WT_SESSION_IMPL *session, const char *name, wt_off_t *sizep);
 extern int __wt_bm_preload( WT_BM *bm, WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size);
 extern int __wt_bm_read(WT_BM *bm, WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size);
 extern int __wt_block_read_off_blind( WT_SESSION_IMPL *session, WT_BLOCK *block, WT_ITEM *buf, wt_off_t offset);
@@ -91,6 +92,7 @@ extern int __wt_bloom_drop(WT_BLOOM *bloom, const char *config);
 extern int __wt_compact(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_compact_page_skip(WT_SESSION_IMPL *session, WT_REF *ref, bool *skipp);
 extern void __wt_btcur_iterate_setup(WT_CURSOR_BTREE *cbt);
+extern int __wt_cursor_key_order_check( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, bool next);
 extern int __wt_btcur_next(WT_CURSOR_BTREE *cbt, bool truncating);
 extern int __wt_btcur_prev(WT_CURSOR_BTREE *cbt, bool truncating);
 extern int __wt_btcur_reset(WT_CURSOR_BTREE *cbt);
@@ -167,9 +169,11 @@ extern int __wt_upgrade(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_verify(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_verify_dsk_image(WT_SESSION_IMPL *session, const char *tag, const WT_PAGE_HEADER *dsk, size_t size, bool empty_page_ok);
 extern int __wt_verify_dsk(WT_SESSION_IMPL *session, const char *tag, WT_ITEM *buf);
-extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags);
+extern int __wt_tree_walk(WT_SESSION_IMPL *session, WT_REF **refp, uint32_t flags);
+extern int __wt_tree_walk_count(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *walkcntp, uint32_t flags);
+extern int __wt_tree_walk_skip(WT_SESSION_IMPL *session, WT_REF **refp, uint64_t *skipleafcntp, uint32_t flags);
 extern int __wt_col_modify(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, uint64_t recno, WT_ITEM *value, WT_UPDATE *upd_arg, bool is_remove);
-extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt);
+extern int __wt_col_search(WT_SESSION_IMPL *session, uint64_t search_recno, WT_REF *leaf, WT_CURSOR_BTREE *cbt);
 extern int __wt_row_leaf_keys(WT_SESSION_IMPL *session, WT_PAGE *page);
 extern int __wt_row_leaf_key_copy( WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip, WT_ITEM *key);
 extern int __wt_row_leaf_key_work(WT_SESSION_IMPL *session, WT_PAGE *page, WT_ROW *rip_arg, WT_ITEM *keyb, bool instantiate);
@@ -184,7 +188,8 @@ extern WT_UPDATE *__wt_update_obsolete_check( WT_SESSION_IMPL *session, WT_PAGE
 extern void __wt_update_obsolete_free( WT_SESSION_IMPL *session, WT_PAGE *page, WT_UPDATE *upd);
 extern int __wt_search_insert( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt, WT_ITEM *srch_key);
 extern int __wt_row_search(WT_SESSION_IMPL *session, WT_ITEM *srch_key, WT_REF *leaf, WT_CURSOR_BTREE *cbt, bool insert);
-extern int __wt_row_random(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
+extern int __wt_row_random_leaf(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
+extern int __wt_row_random_descent(WT_SESSION_IMPL *session, WT_CURSOR_BTREE *cbt);
 extern void __wt_las_stats_update(WT_SESSION_IMPL *session);
 extern int __wt_las_create(WT_SESSION_IMPL *session);
 extern int __wt_las_destroy(WT_SESSION_IMPL *session);
@@ -360,23 +365,23 @@ extern int __wt_logrec_read(WT_SESSION_IMPL *session, const uint8_t **pp, const
 extern int __wt_logop_read(WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *optypep, uint32_t *opsizep);
 extern int __wt_logop_col_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno, WT_ITEM *value);
 extern int __wt_logop_col_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop, WT_ITEM *valuep);
-extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
+extern int __wt_logop_col_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
 extern int __wt_logop_col_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t recno);
 extern int __wt_logop_col_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *recnop);
-extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
+extern int __wt_logop_col_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
 extern int __wt_logop_col_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, uint64_t start, uint64_t stop);
 extern int __wt_logop_col_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, uint64_t *startp, uint64_t *stopp);
-extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
+extern int __wt_logop_col_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
 extern int __wt_logop_row_put_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key, WT_ITEM *value);
 extern int __wt_logop_row_put_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp, WT_ITEM *valuep);
-extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
+extern int __wt_logop_row_put_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
 extern int __wt_logop_row_remove_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *key);
 extern int __wt_logop_row_remove_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *keyp);
-extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
+extern int __wt_logop_row_remove_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
 extern int __wt_logop_row_truncate_pack( WT_SESSION_IMPL *session, WT_ITEM *logrec, uint32_t fileid, WT_ITEM *start, WT_ITEM *stop, uint32_t mode);
 extern int __wt_logop_row_truncate_unpack( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, uint32_t *fileidp, WT_ITEM *startp, WT_ITEM *stopp, uint32_t *modep);
-extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
-extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out);
+extern int __wt_logop_row_truncate_print( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
+extern int __wt_txn_op_printlog( WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out, uint32_t flags);
 extern void __wt_log_slot_activate(WT_SESSION_IMPL *session, WT_LOGSLOT *slot);
 extern int __wt_log_slot_switch( WT_SESSION_IMPL *session, WT_MYSLOT *myslot, bool retry, bool forced);
 extern int __wt_log_slot_new(WT_SESSION_IMPL *session);
@@ -466,7 +471,7 @@ extern int __wt_meta_track_init(WT_SESSION_IMPL *session);
 extern int __wt_meta_track_destroy(WT_SESSION_IMPL *session);
 extern int __wt_turtle_init(WT_SESSION_IMPL *session);
 extern int __wt_turtle_read(WT_SESSION_IMPL *session, const char *key, char **valuep);
-extern int __wt_turtle_update( WT_SESSION_IMPL *session, const char *key, const char *value);
+extern int __wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value);
 extern void __wt_abort(WT_SESSION_IMPL *session) WT_GCC_FUNC_DECL_ATTRIBUTE((noreturn));
 extern int __wt_calloc(WT_SESSION_IMPL *session, size_t number, size_t size, void *retp);
 extern int __wt_realloc(WT_SESSION_IMPL *session, size_t *bytes_allocated_ret, size_t bytes_to_allocate, void *retp);
@@ -512,6 +517,7 @@ extern int __wt_rwlock_destroy(WT_SESSION_IMPL *session, WT_RWLOCK **rwlockp);
 extern int __wt_once(void (*init_routine)(void));
 extern int __wt_open(WT_SESSION_IMPL *session, const char *name, bool ok_create, bool exclusive, int dio_type, WT_FH **fhp);
 extern int __wt_close(WT_SESSION_IMPL *session, WT_FH **fhp);
+extern int __wt_get_vm_pagesize(void);
 extern bool __wt_absolute_path(const char *path);
 extern const char *__wt_path_separator(void);
 extern bool __wt_has_priv(void);
@@ -653,6 +659,7 @@ __wt_hazard_set(WT_SESSION_IMPL *session, WT_REF *ref, bool *busyp
  );
 extern int __wt_hazard_clear(WT_SESSION_IMPL *session, WT_PAGE *page);
 extern void __wt_hazard_close(WT_SESSION_IMPL *session);
+extern void __wt_fill_hex(const uint8_t *src, size_t src_max, uint8_t *dest, size_t dest_max, size_t *lenp);
 extern int __wt_raw_to_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to);
 extern int __wt_raw_to_esc_hex( WT_SESSION_IMPL *session, const uint8_t *from, size_t size, WT_ITEM *to);
 extern int __wt_hex2byte(const u_char *from, u_char *to);
@@ -670,6 +677,7 @@ extern uint32_t __wt_log2_int(uint32_t n);
 extern bool __wt_ispo2(uint32_t v);
 extern uint32_t __wt_rduppo2(uint32_t n, uint32_t po2);
 extern void __wt_random_init(WT_RAND_STATE volatile *rnd_state);
+extern int __wt_random_init_seed( WT_SESSION_IMPL *session, WT_RAND_STATE volatile *rnd_state);
 extern uint32_t __wt_random(WT_RAND_STATE volatile *rnd_state);
 extern int __wt_buf_grow_worker(WT_SESSION_IMPL *session, WT_ITEM *buf, size_t size);
 extern int __wt_buf_fmt(WT_SESSION_IMPL *session, WT_ITEM *buf, const char *fmt, ...) WT_GCC_FUNC_DECL_ATTRIBUTE((format (printf, 3, 4)));
@@ -731,7 +739,7 @@ extern int __wt_txn_checkpoint_logread( WT_SESSION_IMPL *session, const uint8_t
 extern int __wt_txn_checkpoint_log( WT_SESSION_IMPL *session, bool full, uint32_t flags, WT_LSN *lsnp);
 extern int __wt_txn_truncate_log( WT_SESSION_IMPL *session, WT_CURSOR_BTREE *start, WT_CURSOR_BTREE *stop);
 extern int __wt_txn_truncate_end(WT_SESSION_IMPL *session);
-extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out);
+extern int __wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags);
 extern int __wt_txn_named_snapshot_begin(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_txn_named_snapshot_drop(WT_SESSION_IMPL *session, const char *cfg[]);
 extern int __wt_txn_named_snapshot_get(WT_SESSION_IMPL *session, WT_CONFIG_ITEM *nameval);
diff --git a/src/include/flags.h b/src/include/flags.h
index 064349125cc..bafff92fbc0 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -45,8 +45,9 @@
 #define	WT_READ_NO_WAIT					0x00000020
 #define	WT_READ_PREV					0x00000040
 #define	WT_READ_SKIP_INTL				0x00000080
-#define	WT_READ_TRUNCATE				0x00000100
-#define	WT_READ_WONT_NEED				0x00000200
+#define	WT_READ_SKIP_LEAF				0x00000100
+#define	WT_READ_TRUNCATE				0x00000200
+#define	WT_READ_WONT_NEED				0x00000400
 #define	WT_SESSION_CAN_WAIT				0x00000001
 #define	WT_SESSION_CLEAR_EVICT_WALK			0x00000002
 #define	WT_SESSION_INTERNAL				0x00000004
diff --git a/src/include/gcc.h b/src/include/gcc.h
index 01e33792d73..bb80f8b738b 100644
--- a/src/include/gcc.h
+++ b/src/include/gcc.h
@@ -156,8 +156,7 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new)
 
 #if defined(x86_64) || defined(__x86_64__)
 /* Pause instruction to prevent excess processor bus usage */
-#define	WT_PAUSE() __asm__ volatile("pause\n" ::: "memory")
-
+#define	WT_PAUSE()	__asm__ volatile("pause\n" ::: "memory")
 #define	WT_FULL_BARRIER() do {						\
 	__asm__ volatile ("mfence" ::: "memory");			\
 } while (0)
@@ -169,7 +168,7 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new)
 } while (0)
 
 #elif defined(i386) || defined(__i386__)
-#define	WT_PAUSE() __asm__ volatile("pause\n" ::: "memory")
+#define	WT_PAUSE()	__asm__ volatile("pause\n" ::: "memory")
 #define	WT_FULL_BARRIER() do {						\
 	__asm__ volatile ("lock; addl $0, 0(%%esp)" ::: "memory");	\
 } while (0)
@@ -177,23 +176,58 @@ __wt_atomic_cas_ptr(void *vp, void *old, void *new)
 #define	WT_WRITE_BARRIER()	WT_FULL_BARRIER()
 
 #elif defined(__PPC64__) || defined(PPC64)
+/* ori 0,0,0 is the PPC64 noop instruction */
 #define	WT_PAUSE()	__asm__ volatile("ori 0,0,0" ::: "memory")
-#define	WT_FULL_BARRIER()	do {
+#define	WT_FULL_BARRIER() do {						\
 	__asm__ volatile ("sync" ::: "memory");				\
 } while (0)
-#define	WT_READ_BARRIER()	WT_FULL_BARRIER()
-#define	WT_WRITE_BARRIER()	WT_FULL_BARRIER()
+
+/* TODO: ISA 2.07 Elemental Memory Barriers would be better,
+   specifically mbll, and mbss, but they are not supported by POWER 8 */
+#define	WT_READ_BARRIER() do {						\
+	__asm__ volatile ("lwsync" ::: "memory");			\
+} while (0)
+#define	WT_WRITE_BARRIER() do {						\
+	__asm__ volatile ("lwsync" ::: "memory");			\
+} while (0)
 
 #elif defined(__aarch64__)
 #define	WT_PAUSE()	__asm__ volatile("yield" ::: "memory")
 #define	WT_FULL_BARRIER() do {						\
-	  __asm__ volatile ("dsb sy" ::: "memory");			\
+	__asm__ volatile ("dsb sy" ::: "memory");			\
+} while (0)
+#define	WT_READ_BARRIER() do {						\
+	__asm__ volatile ("dsb ld" ::: "memory");			\
+} while (0)
+#define	WT_WRITE_BARRIER() do {						\
+	__asm__ volatile ("dsb st" ::: "memory");			\
+} while (0)
+
+#elif defined(__s390x__)
+#define	WT_PAUSE()	__asm__ volatile("lr 0,0" ::: "memory")
+#define	WT_FULL_BARRIER() do {						\
+	__asm__ volatile ("bcr 15,0\n" ::: "memory");			\
 } while (0)
+#define	WT_READ_BARRIER()	WT_FULL_BARRIER()
+#define	WT_WRITE_BARRIER()	WT_FULL_BARRIER()
+
+#elif defined(__sparc__)
+#define	WT_PAUSE()	__asm__ volatile("rd %%ccr, %%g0" ::: "memory")
+
+#define	WT_FULL_BARRIER() do {						\
+	__asm__ volatile ("membar #StoreLoad" ::: "memory");		\
+} while (0)
+
+/*
+ * On UltraSparc machines, TSO is used, and so there is no need for membar.
+ * READ_BARRIER = #LoadLoad, and WRITE_BARRIER = #StoreStore are noop.
+ */
 #define	WT_READ_BARRIER() do {						\
-	  __asm__ volatile ("dsb ld" ::: "memory");			\
+	__asm__ volatile ("" ::: "memory");				\
 } while (0)
+
 #define	WT_WRITE_BARRIER() do {						\
-	  __asm__ volatile ("dsb st" ::: "memory");			\
+	__asm__ volatile ("" ::: "memory");				\
 } while (0)
 
 #else
diff --git a/src/include/log.h b/src/include/log.h
index 521de567fc0..e7737e12663 100644
--- a/src/include/log.h
+++ b/src/include/log.h
@@ -267,6 +267,11 @@ struct __wt_log_desc {
 };
 
 /*
+ * Flags for __wt_txn_op_printlog.
+ */
+#define	WT_TXN_PRINTLOG_HEX	0x0001	/* Add hex output */
+
+/*
  * WT_LOG_REC_DESC --
  *	A descriptor for a log record type.
  */
diff --git a/src/include/misc.h b/src/include/misc.h
index e542baec642..898e44eb8e0 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -268,3 +268,6 @@ union __wt_rand_state {
 		uint32_t w, z;
 	} x;
 };
+
+/* Shared array for converting to hex */
+extern const u_char __wt_hex[];
diff --git a/src/include/session.h b/src/include/session.h
index 5c3bcfb8ed0..1eca49f2c40 100644
--- a/src/include/session.h
+++ b/src/include/session.h
@@ -74,7 +74,10 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
 	TAILQ_HEAD(__cursors, __wt_cursor) cursors;
 
 	WT_CURSOR_BACKUP *bkp_cursor;	/* Hot backup cursor */
-	WT_COMPACT	 *compact;	/* Compact state */
+
+	WT_COMPACT	 *compact;	/* Compaction information */
+	enum { WT_COMPACT_NONE=0,
+	    WT_COMPACT_RUNNING, WT_COMPACT_SUCCESS } compact_state;
 
 	/*
 	 * Lookaside table cursor, sweep and eviction worker threads only.
@@ -134,8 +137,6 @@ struct WT_COMPILER_TYPE_ALIGN(WT_CACHE_LINE_ALIGNMENT) __wt_session_impl {
 	void	*reconcile;		/* Reconciliation support */
 	int	(*reconcile_cleanup)(WT_SESSION_IMPL *);
 
-	bool compaction;		/* Compaction did some work */
-
 	uint32_t flags;
 
 	/*
diff --git a/src/include/stat.h b/src/include/stat.h
index dfe7ee5c6cd..a554607b7d5 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -139,8 +139,8 @@ __wt_stats_clear(void *stats_arg, int slot)
  */
 #define	WT_STAT_READ(stats, fld)					\
 	__wt_stats_aggregate(stats, WT_STATS_FIELD_TO_SLOT(stats, fld))
-#define	WT_STAT_WRITE(session, stats, fld)				\
-	((stats)[WT_STATS_SLOT_ID(session)]->fld);
+#define	WT_STAT_WRITE(stats, fld, v)					\
+	(stats)->fld = (int64_t)(v)
 
 #define	WT_STAT_DECRV(session, stats, fld, value)			\
 	(stats)[WT_STATS_SLOT_ID(session)]->fld -= (int64_t)(value)
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 08f73386090..bdd8bb65910 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -896,18 +896,17 @@ struct __wt_session {
 	 * boolean flag; default \c false.}
 	 * @config{bulk, configure the cursor for bulk-loading\, a fast\,
 	 * initial load path (see @ref tune_bulk_load for more information).
-	 * Bulk-load may only be used for newly created objects and cursors
-	 * configured for bulk-load only support the WT_CURSOR::insert and
-	 * WT_CURSOR::close methods.  When bulk-loading row-store objects\, keys
-	 * must be loaded in sorted order.  The value is usually a true/false
-	 * flag; when bulk-loading fixed-length column store objects\, the
-	 * special value \c bitmap allows chunks of a memory resident bitmap to
-	 * be loaded directly into a file by passing a \c WT_ITEM to
-	 * WT_CURSOR::set_value where the \c size field indicates the number of
-	 * records in the bitmap (as specified by the object's \c value_format
-	 * configuration). Bulk-loaded bitmap values must end on a byte boundary
-	 * relative to the bit count (except for the last set of values
-	 * loaded)., a string; default \c false.}
+	 * Bulk-load may only be used for newly created objects and applications
+	 * should use the WT_CURSOR::insert method to insert rows.  When
+	 * bulk-loading\, rows must be loaded in sorted order.  The value is
+	 * usually a true/false flag; when bulk-loading fixed-length column
+	 * store objects\, the special value \c bitmap allows chunks of a memory
+	 * resident bitmap to be loaded directly into a file by passing a \c
+	 * WT_ITEM to WT_CURSOR::set_value where the \c size field indicates the
+	 * number of records in the bitmap (as specified by the object's \c
+	 * value_format configuration). Bulk-loaded bitmap values must end on a
+	 * byte boundary relative to the bit count (except for the last set of
+	 * values loaded)., a string; default \c false.}
 	 * @config{checkpoint, the name of a checkpoint to open (the reserved
 	 * name "WiredTigerCheckpoint" opens the most recent internal checkpoint
 	 * taken for the object). The cursor does not support data
@@ -921,10 +920,19 @@ struct __wt_session {
 	 * string\, chosen from the following options: \c "hex"\, \c "json"\, \c
 	 * "print"; default empty.}
 	 * @config{next_random, configure the cursor to return a pseudo-random
-	 * record from the object; valid only for row-store cursors.  Cursors
-	 * configured with \c next_random=true only support the WT_CURSOR::next
-	 * and WT_CURSOR::close methods.  See @ref cursor_random for details., a
-	 * boolean flag; default \c false.}
+	 * record from the object when the WT_CURSOR::next method is called;
+	 * valid only for row-store cursors.  See @ref cursor_random for
+	 * details., a boolean flag; default \c false.}
+	 * @config{next_random_sample_size, cursors configured by \c next_random
+	 * to return pseudo-random records from the object randomly select from
+	 * the entire object\, by default.  Setting \c next_random_sample_size
+	 * to a non-zero value sets the number of samples the application
+	 * expects to take using the \c next_random cursor.  A cursor configured
+	 * with both \c next_random and \c next_random_sample_size attempts to
+	 * divide the object into \c next_random_sample_size equal-sized
+	 * pieces\, and each retrieval returns a record from one of those
+	 * pieces.  See @ref cursor_random for details., a string; default \c
+	 * 0.}
 	 * @config{overwrite, configures whether the cursor's insert\, update
 	 * and remove methods check the existing state of the record.  If \c
 	 * overwrite is \c false\, WT_CURSOR::insert fails with
diff --git a/src/log/log_auto.c b/src/log/log_auto.c
index 5a1d03b1976..54df01d01ab 100644
--- a/src/log/log_auto.c
+++ b/src/log/log_auto.c
@@ -69,7 +69,7 @@ __logrec_json_unpack_str(char *dest, size_t destlen, const char *src,
 }
 
 static int
-__logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
+__logrec_make_json_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
 {
 	size_t needed;
 
@@ -79,6 +79,17 @@ __logrec_jsonify_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
 	return (0);
 }
 
+static int
+__logrec_make_hex_str(WT_SESSION_IMPL *session, char **destp, WT_ITEM *item)
+{
+	size_t needed;
+
+	needed = item->size * 2 + 1;
+	WT_RET(__wt_realloc(session, NULL, needed, destp));
+	__wt_fill_hex(item->data, item->size, (uint8_t *)*destp, needed, NULL);
+	return (0);
+}
+
 int
 __wt_logop_col_put_pack(
     WT_SESSION_IMPL *session, WT_ITEM *logrec,
@@ -121,7 +132,8 @@ __wt_logop_col_put_unpack(
 
 int
 __wt_logop_col_put_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	WT_DECL_RET;
 	uint32_t fileid;
@@ -138,9 +150,14 @@ __wt_logop_col_put_print(
 	    "        \"fileid\": \"%" PRIu32 "\",\n", fileid));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"recno\": \"%" PRIu64 "\",\n", recno));
-	WT_ERR(__logrec_jsonify_str(session, &escaped, &value));
+	WT_ERR(__logrec_make_json_str(session, &escaped, &value));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"value\": \"%s\"", escaped));
+	if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
+		WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
+		WT_ERR(__wt_fprintf(out,
+		    ",\n        \"value-hex\": \"%s\"", escaped));
+	}
 
 err:	__wt_free(session, escaped);
 	return (ret);
@@ -188,11 +205,13 @@ __wt_logop_col_remove_unpack(
 
 int
 __wt_logop_col_remove_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	uint32_t fileid;
 	uint64_t recno;
 
+	WT_UNUSED(flags);
 	WT_RET(__wt_logop_col_remove_unpack(
 	    session, pp, end, &fileid, &recno));
 
@@ -246,12 +265,14 @@ __wt_logop_col_truncate_unpack(
 
 int
 __wt_logop_col_truncate_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	uint32_t fileid;
 	uint64_t start;
 	uint64_t stop;
 
+	WT_UNUSED(flags);
 	WT_RET(__wt_logop_col_truncate_unpack(
 	    session, pp, end, &fileid, &start, &stop));
 
@@ -307,7 +328,8 @@ __wt_logop_row_put_unpack(
 
 int
 __wt_logop_row_put_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	WT_DECL_RET;
 	uint32_t fileid;
@@ -322,12 +344,22 @@ __wt_logop_row_put_print(
 	WT_RET(__wt_fprintf(out, " \"optype\": \"row_put\",\n"));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"fileid\": \"%" PRIu32 "\",\n", fileid));
-	WT_ERR(__logrec_jsonify_str(session, &escaped, &key));
+	WT_ERR(__logrec_make_json_str(session, &escaped, &key));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"key\": \"%s\",\n", escaped));
-	WT_ERR(__logrec_jsonify_str(session, &escaped, &value));
+	if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
+		WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
+		WT_ERR(__wt_fprintf(out,
+		    "        \"key-hex\": \"%s\",\n", escaped));
+	}
+	WT_ERR(__logrec_make_json_str(session, &escaped, &value));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"value\": \"%s\"", escaped));
+	if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
+		WT_ERR(__logrec_make_hex_str(session, &escaped, &value));
+		WT_ERR(__wt_fprintf(out,
+		    ",\n        \"value-hex\": \"%s\"", escaped));
+	}
 
 err:	__wt_free(session, escaped);
 	return (ret);
@@ -375,7 +407,8 @@ __wt_logop_row_remove_unpack(
 
 int
 __wt_logop_row_remove_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	WT_DECL_RET;
 	uint32_t fileid;
@@ -389,9 +422,14 @@ __wt_logop_row_remove_print(
 	WT_RET(__wt_fprintf(out, " \"optype\": \"row_remove\",\n"));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"fileid\": \"%" PRIu32 "\",\n", fileid));
-	WT_ERR(__logrec_jsonify_str(session, &escaped, &key));
+	WT_ERR(__logrec_make_json_str(session, &escaped, &key));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"key\": \"%s\"", escaped));
+	if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
+		WT_ERR(__logrec_make_hex_str(session, &escaped, &key));
+		WT_ERR(__wt_fprintf(out,
+		    ",\n        \"key-hex\": \"%s\"", escaped));
+	}
 
 err:	__wt_free(session, escaped);
 	return (ret);
@@ -439,7 +477,8 @@ __wt_logop_row_truncate_unpack(
 
 int
 __wt_logop_row_truncate_print(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	WT_DECL_RET;
 	uint32_t fileid;
@@ -455,12 +494,22 @@ __wt_logop_row_truncate_print(
 	WT_RET(__wt_fprintf(out, " \"optype\": \"row_truncate\",\n"));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"fileid\": \"%" PRIu32 "\",\n", fileid));
-	WT_ERR(__logrec_jsonify_str(session, &escaped, &start));
+	WT_ERR(__logrec_make_json_str(session, &escaped, &start));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"start\": \"%s\",\n", escaped));
-	WT_ERR(__logrec_jsonify_str(session, &escaped, &stop));
+	if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
+		WT_ERR(__logrec_make_hex_str(session, &escaped, &start));
+		WT_ERR(__wt_fprintf(out,
+		    "        \"start-hex\": \"%s\",\n", escaped));
+	}
+	WT_ERR(__logrec_make_json_str(session, &escaped, &stop));
 	WT_ERR(__wt_fprintf(out,
 	    "        \"stop\": \"%s\",\n", escaped));
+	if (LF_ISSET(WT_TXN_PRINTLOG_HEX)) {
+		WT_ERR(__logrec_make_hex_str(session, &escaped, &stop));
+		WT_ERR(__wt_fprintf(out,
+		    "        \"stop-hex\": \"%s\",\n", escaped));
+	}
 	WT_ERR(__wt_fprintf(out,
 	    "        \"mode\": \"%" PRIu32 "\"", mode));
 
@@ -470,7 +519,8 @@ err:	__wt_free(session, escaped);
 
 int
 __wt_txn_op_printlog(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end,
+    FILE *out, uint32_t flags)
 {
 	uint32_t optype, opsize;
 
@@ -480,27 +530,33 @@ __wt_txn_op_printlog(
 
 	switch (optype) {
 	case WT_LOGOP_COL_PUT:
-		WT_RET(__wt_logop_col_put_print(session, pp, end, out));
+		WT_RET(__wt_logop_col_put_print(session, pp, end, out,
+		    flags));
 		break;
 
 	case WT_LOGOP_COL_REMOVE:
-		WT_RET(__wt_logop_col_remove_print(session, pp, end, out));
+		WT_RET(__wt_logop_col_remove_print(session, pp, end, out,
+		    flags));
 		break;
 
 	case WT_LOGOP_COL_TRUNCATE:
-		WT_RET(__wt_logop_col_truncate_print(session, pp, end, out));
+		WT_RET(__wt_logop_col_truncate_print(session, pp, end, out,
+		    flags));
 		break;
 
 	case WT_LOGOP_ROW_PUT:
-		WT_RET(__wt_logop_row_put_print(session, pp, end, out));
+		WT_RET(__wt_logop_row_put_print(session, pp, end, out,
+		    flags));
 		break;
 
 	case WT_LOGOP_ROW_REMOVE:
-		WT_RET(__wt_logop_row_remove_print(session, pp, end, out));
+		WT_RET(__wt_logop_row_remove_print(session, pp, end, out,
+		    flags));
 		break;
 
 	case WT_LOGOP_ROW_TRUNCATE:
-		WT_RET(__wt_logop_row_truncate_print(session, pp, end, out));
+		WT_RET(__wt_logop_row_truncate_print(session, pp, end, out,
+		    flags));
 		break;
 
 	WT_ILLEGAL_VALUE(session);
diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c
index c1eb7a2a389..7c53990a2a2 100644
--- a/src/lsm/lsm_stat.c
+++ b/src/lsm/lsm_stat.c
@@ -91,7 +91,7 @@ __curstat_lsm_init(
 		 * top-level.
 		 */
 		new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
-		new->lsm_generation_max = chunk->generation;
+		WT_STAT_WRITE(new, lsm_generation_max, chunk->generation);
 
 		/* Aggregate statistics from each new chunk. */
 		__wt_stat_dsrc_aggregate_single(new, stats);
@@ -115,37 +115,40 @@ __curstat_lsm_init(
 		 * into the top-level.
 		 */
 		new = (WT_DSRC_STATS *)WT_CURSOR_STATS(stat_cursor);
-		new->bloom_size =
-		    (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8);
-		new->bloom_page_evict =
-		    new->cache_eviction_clean + new->cache_eviction_dirty;
-		new->bloom_page_read = new->cache_read;
+		WT_STAT_WRITE(new, bloom_size,
+		    (int64_t)((chunk->count * lsm_tree->bloom_bit_count) / 8));
+		WT_STAT_WRITE(new, bloom_page_evict,
+		    new->cache_eviction_clean + new->cache_eviction_dirty);
+		WT_STAT_WRITE(new, bloom_page_read, new->cache_read);
 
 		__wt_stat_dsrc_aggregate_single(new, stats);
 		WT_ERR(stat_cursor->close(stat_cursor));
 	}
 
 	/* Set statistics that aren't aggregated directly into the cursor */
-	stats->bloom_count = bloom_count;
-	stats->lsm_chunk_count = lsm_tree->nchunks;
+	WT_STAT_WRITE(stats, bloom_count, bloom_count);
+	WT_STAT_WRITE(stats, lsm_chunk_count, lsm_tree->nchunks);
 
 	/* Include, and optionally clear, LSM-level specific information. */
-	stats->bloom_miss = lsm_tree->bloom_miss;
+	WT_STAT_WRITE(stats, bloom_miss, lsm_tree->bloom_miss);
 	if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
 		lsm_tree->bloom_miss = 0;
-	stats->bloom_hit = lsm_tree->bloom_hit;
+	WT_STAT_WRITE(stats, bloom_hit, lsm_tree->bloom_hit);
 	if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
 		lsm_tree->bloom_hit = 0;
-	stats->bloom_false_positive = lsm_tree->bloom_false_positive;
+	WT_STAT_WRITE(
+	    stats, bloom_false_positive, lsm_tree->bloom_false_positive);
 	if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
 		lsm_tree->bloom_false_positive = 0;
-	stats->lsm_lookup_no_bloom = lsm_tree->lsm_lookup_no_bloom;
+	WT_STAT_WRITE(
+	    stats, lsm_lookup_no_bloom, lsm_tree->lsm_lookup_no_bloom);
 	if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
 		lsm_tree->lsm_lookup_no_bloom = 0;
-	stats->lsm_checkpoint_throttle = lsm_tree->lsm_checkpoint_throttle;
+	WT_STAT_WRITE(
+	    stats, lsm_checkpoint_throttle, lsm_tree->lsm_checkpoint_throttle);
 	if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
 		lsm_tree->lsm_checkpoint_throttle = 0;
-	stats->lsm_merge_throttle = lsm_tree->lsm_merge_throttle;
+	WT_STAT_WRITE(stats, lsm_merge_throttle, lsm_tree->lsm_merge_throttle);
 	if (F_ISSET(cst, WT_CONN_STAT_CLEAR))
 		lsm_tree->lsm_merge_throttle = 0;
 
diff --git a/src/meta/meta_turtle.c b/src/meta/meta_turtle.c
index 13e8b31916f..3bd57846862 100644
--- a/src/meta/meta_turtle.c
+++ b/src/meta/meta_turtle.c
@@ -271,8 +271,7 @@ err:	WT_TRET(__wt_fclose(&fp, WT_FHANDLE_READ));
  *	Update the turtle file.
  */
 int
-__wt_turtle_update(
-    WT_SESSION_IMPL *session, const char *key,  const char *value)
+__wt_turtle_update(WT_SESSION_IMPL *session, const char *key, const char *value)
 {
 	WT_FH *fh;
 	WT_DECL_ITEM(buf);
diff --git a/src/os_posix/os_map.c b/src/os_posix/os_map.c
index e95ccb0ade2..4276c89dbcf 100644
--- a/src/os_posix/os_map.c
+++ b/src/os_posix/os_map.c
@@ -48,8 +48,6 @@ __wt_mmap(WT_SESSION_IMPL *session,
 	return (0);
 }
 
-#define	WT_VM_PAGESIZE	4096
-
 /*
  * __wt_mmap_preload --
  *	Cause a section of a memory map to be faulted in.
@@ -59,9 +57,10 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size)
 {
 #ifdef HAVE_POSIX_MADVISE
 	/* Linux requires the address be aligned to a 4KB boundary. */
+	WT_CONNECTION_IMPL *conn = S2C(session);
 	WT_BM *bm = S2BT(session)->bm;
 	WT_DECL_RET;
-	void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(WT_VM_PAGESIZE - 1));
+	void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
 	size += WT_PTRDIFF(p, blk);
 
 	/* XXX proxy for "am I doing a scan?" -- manual read-ahead */
@@ -78,9 +77,9 @@ __wt_mmap_preload(WT_SESSION_IMPL *session, const void *p, size_t size)
 	 * Manual pages aren't clear on whether alignment is required for the
 	 * size, so we will be conservative.
 	 */
-	size &= ~(size_t)(WT_VM_PAGESIZE - 1);
+	size &= ~(size_t)(conn->page_size - 1);
 
-	if (size > WT_VM_PAGESIZE &&
+	if (size > (size_t)conn->page_size &&
 	    (ret = posix_madvise(blk, size, POSIX_MADV_WILLNEED)) != 0)
 		WT_RET_MSG(session, ret, "posix_madvise will need");
 #else
@@ -101,8 +100,9 @@ __wt_mmap_discard(WT_SESSION_IMPL *session, void *p, size_t size)
 {
 #ifdef HAVE_POSIX_MADVISE
 	/* Linux requires the address be aligned to a 4KB boundary. */
+	WT_CONNECTION_IMPL *conn = S2C(session);
 	WT_DECL_RET;
-	void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(WT_VM_PAGESIZE - 1));
+	void *blk = (void *)((uintptr_t)p & ~(uintptr_t)(conn->page_size - 1));
 	size += WT_PTRDIFF(p, blk);
 
 	if ((ret = posix_madvise(blk, size, POSIX_MADV_DONTNEED)) != 0)
diff --git a/src/os_posix/os_pagesize.c b/src/os_posix/os_pagesize.c
new file mode 100644
index 00000000000..e7c7b4fdf15
--- /dev/null
+++ b/src/os_posix/os_pagesize.c
@@ -0,0 +1,19 @@
+/*-
+ * Copyright (c) 2014-2015 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_get_vm_pagesize --
+ *	Return the default page size of a virtual memory page.
+ */
+int
+__wt_get_vm_pagesize(void)
+{
+	return (getpagesize());
+}
diff --git a/src/os_win/os_pagesize.c b/src/os_win/os_pagesize.c
new file mode 100644
index 00000000000..55cd6a694ec
--- /dev/null
+++ b/src/os_win/os_pagesize.c
@@ -0,0 +1,23 @@
+/*-
+ * Copyright (c) 2014-2015 MongoDB, Inc.
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __wt_get_vm_pagesize --
+ *	Return the default page size of a virtual memory page.
+ */
+int
+__wt_get_vm_pagesize(void)
+{
+	SYSTEM_INFO system_info;
+
+	GetSystemInfo(&system_info);
+
+	return (system_info.dwPageSize);
+}
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index fd2aec45115..2b07117f9d5 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -1276,6 +1276,8 @@ __rec_txn_read(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 		for (upd = upd_list; upd->next != NULL; upd = upd->next)
 			;
 		upd->next = append;
+		__wt_cache_page_inmem_incr(
+		    session, page, WT_UPDATE_MEMSIZE(append));
 	}
 
 	/*
@@ -1756,7 +1758,7 @@ __rec_key_state_update(WT_RECONCILE *r, bool ovfl_key)
  *	Figure out the maximum leaf page size for the reconciliation.
  */
 static inline uint32_t
-__rec_leaf_page_max(WT_SESSION_IMPL *session,  WT_RECONCILE *r)
+__rec_leaf_page_max(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 {
 	WT_BTREE *btree;
 	WT_PAGE *page;
@@ -3263,7 +3265,14 @@ supd_check_complete:
 		memset(WT_BLOCK_HEADER_REF(dsk), 0, btree->block_header);
 		bnd->cksum = __wt_cksum(buf->data, buf->size);
 
-		if (mod->rec_result == WT_PM_REC_MULTIBLOCK &&
+		/*
+		 * One last check: don't reuse blocks if compacting, the reason
+		 * for compaction is to move blocks to different locations. We
+		 * do this check after calculating the checksums, hopefully the
+		 * next write can be skipped.
+		 */
+		if (session->compact_state == WT_COMPACT_NONE &&
+		    mod->rec_result == WT_PM_REC_MULTIBLOCK &&
 		    mod->mod_multi_entries > bnd_slot) {
 			multi = &mod->mod_multi[bnd_slot];
 			if (multi->size == bnd->size &&
@@ -4465,7 +4474,7 @@ compare:		/*
 		WT_ERR(__rec_txn_read(session, r, ins, NULL, NULL, &upd));
 		if (upd == NULL)
 			continue;
-		for (n = WT_INSERT_RECNO(ins); src_recno <= n; ++src_recno) {
+		for (n = WT_INSERT_RECNO(ins); src_recno <= n;) {
 			/*
 			 * The application may have inserted records which left
 			 * gaps in the name space, and these gaps can be huge.
@@ -4505,7 +4514,7 @@ compare:		/*
 				    last->size == size &&
 				    memcmp(last->data, data, size) == 0)) {
 					++rle;
-					continue;
+					goto next;
 				}
 				WT_ERR(__rec_col_var_helper(session, r,
 				    salvage, last, last_deleted, 0, rle));
@@ -4524,6 +4533,15 @@ compare:		/*
 			}
 			last_deleted = deleted;
 			rle = 1;
+
+			/*
+			 * Move to the next record. It's not a simple increment
+			 * because if it's the maximum record, incrementing it
+			 * wraps to 0 and this turns into an infinite loop.
+			 */
+next:			if (src_recno == UINT64_MAX)
+				break;
+			++src_recno;
 		}
 	}
 
diff --git a/src/session/session_api.c b/src/session/session_api.c
index 053f69ee7f8..f0d0f26db54 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -148,7 +148,7 @@ __session_close(WT_SESSION *wt_session, const char *config)
 		 * via the registered close callback.
 		 */
 		if (session->event_handler->handle_close != NULL &&
-		    !WT_STREQ(cursor->uri, WT_LAS_URI))
+		    !WT_STREQ(cursor->internal_uri, WT_LAS_URI))
 			WT_TRET(session->event_handler->handle_close(
 			    session->event_handler, wt_session, cursor));
 		WT_TRET(cursor->close(cursor));
diff --git a/src/session/session_compact.c b/src/session/session_compact.c
index 456fcd3ce03..8a5b741c0c5 100644
--- a/src/session/session_compact.c
+++ b/src/session/session_compact.c
@@ -172,12 +172,12 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
 	for (i = 0; i < 100; ++i) {
 		WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg));
 
-		session->compaction = false;
+		session->compact_state = WT_COMPACT_RUNNING;
 		WT_WITH_SCHEMA_LOCK(session,
 		    ret = __wt_schema_worker(
 		    session, uri, __wt_compact, NULL, cfg, 0));
 		WT_ERR(ret);
-		if (!session->compaction)
+		if (session->compact_state != WT_COMPACT_SUCCESS)
 			break;
 
 		WT_ERR(__wt_txn_checkpoint(session, checkpoint_cfg));
@@ -185,7 +185,9 @@ __compact_file(WT_SESSION_IMPL *session, const char *uri, const char *cfg[])
 		WT_ERR(__session_compact_check_timeout(session, start_time));
 	}
 
-err:	__wt_scr_free(session, &t);
+err:	session->compact_state = WT_COMPACT_NONE;
+
+	__wt_scr_free(session, &t);
 	return (ret);
 }
 
diff --git a/src/support/global.c b/src/support/global.c
index 1e32f5b4453..2330a65a707 100644
--- a/src/support/global.c
+++ b/src/support/global.c
@@ -12,28 +12,6 @@ WT_PROCESS __wt_process;			/* Per-process structure */
 static int __wt_pthread_once_failed;		/* If initialization failed */
 
 /*
- * __system_is_little_endian --
- *	Check if the system is little endian.
- */
-static int
-__system_is_little_endian(void)
-{
-	uint64_t v;
-	bool little;
-
-	v = 1;
-	little = *((uint8_t *)&v) != 0;
-
-	if (little)
-		return (0);
-
-	fprintf(stderr,
-	    "This release of the WiredTiger data engine does not support "
-	    "big-endian systems; contact WiredTiger for more information.\n");
-	return (EINVAL);
-}
-
-/*
  * __wt_global_once --
  *	Global initialization, run once.
  */
@@ -42,11 +20,6 @@ __wt_global_once(void)
 {
 	WT_DECL_RET;
 
-	if ((ret = __system_is_little_endian()) != 0) {
-		__wt_pthread_once_failed = ret;
-		return;
-	}
-
 	if ((ret =
 	    __wt_spin_init(NULL, &__wt_process.spinlock, "global")) != 0) {
 		__wt_pthread_once_failed = ret;
@@ -115,7 +88,7 @@ __wt_attach(WT_SESSION_IMPL *session)
 
 	/* Sleep forever, the debugger will interrupt us when it attaches. */
 	for (;;)
-		__wt_sleep(100, 0);
+		__wt_sleep(10, 0);
 #else
 	WT_UNUSED(session);
 #endif
diff --git a/src/support/hash_city.c b/src/support/hash_city.c
index 9a4a6464f40..33f4113c004 100644
--- a/src/support/hash_city.c
+++ b/src/support/hash_city.c
@@ -99,6 +99,12 @@ static uint32_t UNALIGNED_LOAD32(const char *p) {
 #define	bswap_32(x) OSSwapInt32(x)
 #define	bswap_64(x) OSSwapInt64(x)
 
+#elif defined(__sun)
+
+#include <sys/byteorder.h>
+#define	bswap_32 BSWAP_32
+#define	bswap_64 BSWAP_64
+
 #else
 #include <byteswap.h>
 #endif
diff --git a/src/support/hex.c b/src/support/hex.c
index eb9f420911a..5fb8d4bc190 100644
--- a/src/support/hex.c
+++ b/src/support/hex.c
@@ -8,7 +8,7 @@
 
 #include "wt_internal.h"
 
-static const u_char hex[] = "0123456789abcdef";
+const u_char __wt_hex[] = "0123456789abcdef";
 
 /*
  * __fill_hex --
@@ -25,8 +25,8 @@ __fill_hex(const uint8_t *src, size_t src_max,
 		--dest_max;
 	for (; src_max > 0 && dest_max > 1;
 	    src_max -= 1, dest_max -= 2, ++src) {
-		*dest++ = hex[(*src & 0xf0) >> 4];
-		*dest++ = hex[*src & 0x0f];
+		*dest++ = __wt_hex[(*src & 0xf0) >> 4];
+		*dest++ = __wt_hex[*src & 0x0f];
 	}
 	*dest++ = '\0';
 	if (lenp != NULL)
@@ -34,6 +34,17 @@ __fill_hex(const uint8_t *src, size_t src_max,
 }
 
 /*
+ * __wt_fill_hex --
+ *	In-memory conversion of raw bytes to a hexadecimal representation.
+ */
+void
+__wt_fill_hex(const uint8_t *src, size_t src_max,
+    uint8_t *dest, size_t dest_max, size_t *lenp)
+{
+	__fill_hex(src, src_max, dest, dest_max, lenp);
+}
+
+/*
  * __wt_raw_to_hex --
  *	Convert a chunk of data to a nul-terminated printable hex string.
  */
@@ -83,8 +94,8 @@ __wt_raw_to_esc_hex(
 			*t++ = *p;
 		} else {
 			*t++ = '\\';
-			*t++ = hex[(*p & 0xf0) >> 4];
-			*t++ = hex[*p & 0x0f];
+			*t++ = __wt_hex[(*p & 0xf0) >> 4];
+			*t++ = __wt_hex[*p & 0x0f];
 		}
 	*t++ = '\0';
 	to->size = WT_PTRDIFF(t, to->mem);
diff --git a/src/support/huffman.c b/src/support/huffman.c
index 4bda365cb10..9488dbf14fe 100644
--- a/src/support/huffman.c
+++ b/src/support/huffman.c
@@ -1,9 +1,31 @@
-/*-
+/*
  * Copyright (c) 2014-2015 MongoDB, Inc.
  * Copyright (c) 2008-2014 WiredTiger, Inc.
  *	All rights reserved.
  *
- * See the file LICENSE for redistribution information.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 4. Neither the name MongoDB or the name WiredTiger
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MONGODB INC. ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
  */
 
 #include "wt_internal.h"
diff --git a/src/support/rand.c b/src/support/rand.c
index f5ecb12633e..3adcb801f03 100644
--- a/src/support/rand.c
+++ b/src/support/rand.c
@@ -60,6 +60,29 @@ __wt_random_init(WT_RAND_STATE volatile * rnd_state)
 }
 
 /*
+ * __wt_random_init_seed --
+ *	Initialize the state of a 32-bit pseudo-random number.
+ * Use this, instead of __wt_random_init if we are running with multiple
+ * threads and we want each thread to initialize its own random state based
+ * on a different random seed.
+ */
+int
+__wt_random_init_seed(
+    WT_SESSION_IMPL *session, WT_RAND_STATE volatile * rnd_state)
+{
+	struct timespec ts;
+	WT_RAND_STATE rnd;
+
+	WT_RET(__wt_epoch(session, &ts));
+	M_W(rnd) = (uint32_t)(ts.tv_nsec + 521288629);
+	M_Z(rnd) = (uint32_t)(ts.tv_nsec + 362436069);
+
+	*rnd_state = rnd;
+
+	return (0);
+}
+
+/*
  * __wt_random --
  *	Return a 32-bit pseudo-random number.
  */
diff --git a/src/support/stat.c b/src/support/stat.c
index 4d7cd65fd18..7a615131628 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -250,19 +250,24 @@ __wt_stat_dsrc_aggregate_single(
 	to->block_alloc += from->block_alloc;
 	to->block_free += from->block_free;
 	to->block_checkpoint_size += from->block_checkpoint_size;
-	to->allocation_size = from->allocation_size;
+	if (from->allocation_size > to->allocation_size)
+		to->allocation_size = from->allocation_size;
 	to->block_reuse_bytes += from->block_reuse_bytes;
-	to->block_magic = from->block_magic;
-	to->block_major = from->block_major;
+	if (from->block_magic > to->block_magic)
+		to->block_magic = from->block_magic;
+	if (from->block_major > to->block_major)
+		to->block_major = from->block_major;
 	to->block_size += from->block_size;
-	to->block_minor = from->block_minor;
+	if (from->block_minor > to->block_minor)
+		to->block_minor = from->block_minor;
 	to->btree_checkpoint_generation += from->btree_checkpoint_generation;
 	to->btree_column_fix += from->btree_column_fix;
 	to->btree_column_internal += from->btree_column_internal;
 	to->btree_column_deleted += from->btree_column_deleted;
 	to->btree_column_variable += from->btree_column_variable;
 	to->btree_column_rle += from->btree_column_rle;
-	to->btree_fixed_len = from->btree_fixed_len;
+	if (from->btree_fixed_len > to->btree_fixed_len)
+		to->btree_fixed_len = from->btree_fixed_len;
 	if (from->btree_maxintlkey > to->btree_maxintlkey)
 		to->btree_maxintlkey = from->btree_maxintlkey;
 	if (from->btree_maxintlpage > to->btree_maxintlpage)
@@ -367,12 +372,16 @@ __wt_stat_dsrc_aggregate(
 	to->block_free += WT_STAT_READ(from, block_free);
 	to->block_checkpoint_size +=
 	    WT_STAT_READ(from, block_checkpoint_size);
-	to->allocation_size = from[0]->allocation_size;
+	if ((v = WT_STAT_READ(from, allocation_size)) > to->allocation_size)
+		to->allocation_size = v;
 	to->block_reuse_bytes += WT_STAT_READ(from, block_reuse_bytes);
-	to->block_magic = from[0]->block_magic;
-	to->block_major = from[0]->block_major;
+	if ((v = WT_STAT_READ(from, block_magic)) > to->block_magic)
+		to->block_magic = v;
+	if ((v = WT_STAT_READ(from, block_major)) > to->block_major)
+		to->block_major = v;
 	to->block_size += WT_STAT_READ(from, block_size);
-	to->block_minor = from[0]->block_minor;
+	if ((v = WT_STAT_READ(from, block_minor)) > to->block_minor)
+		to->block_minor = v;
 	to->btree_checkpoint_generation +=
 	    WT_STAT_READ(from, btree_checkpoint_generation);
 	to->btree_column_fix += WT_STAT_READ(from, btree_column_fix);
@@ -382,15 +391,14 @@ __wt_stat_dsrc_aggregate(
 	to->btree_column_variable +=
 	    WT_STAT_READ(from, btree_column_variable);
 	to->btree_column_rle += WT_STAT_READ(from, btree_column_rle);
-	to->btree_fixed_len = from[0]->btree_fixed_len;
-	if ((v = WT_STAT_READ(from, btree_maxintlkey)) >
-	    to->btree_maxintlkey)
+	if ((v = WT_STAT_READ(from, btree_fixed_len)) > to->btree_fixed_len)
+		to->btree_fixed_len = v;
+	if ((v = WT_STAT_READ(from, btree_maxintlkey)) > to->btree_maxintlkey)
 		to->btree_maxintlkey = v;
 	if ((v = WT_STAT_READ(from, btree_maxintlpage)) >
 	    to->btree_maxintlpage)
 		to->btree_maxintlpage = v;
-	if ((v = WT_STAT_READ(from, btree_maxleafkey)) >
-	    to->btree_maxleafkey)
+	if ((v = WT_STAT_READ(from, btree_maxleafkey)) > to->btree_maxleafkey)
 		to->btree_maxleafkey = v;
 	if ((v = WT_STAT_READ(from, btree_maxleafpage)) >
 	    to->btree_maxleafpage)
diff --git a/src/txn/txn.c b/src/txn/txn.c
index f835fea8f67..0a3e4a7a7db 100644
--- a/src/txn/txn.c
+++ b/src/txn/txn.c
@@ -216,6 +216,7 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
 	conn = S2C(session);
 	txn_global = &conn->txn_global;
 
+retry:
 	current_id = last_running = txn_global->current;
 	oldest_session = NULL;
 	prev_oldest_id = txn_global->oldest_id;
@@ -287,43 +288,60 @@ __wt_txn_update_oldest(WT_SESSION_IMPL *session, bool force)
 	    WT_TXNID_LT(txn_global->last_running, last_running);
 
 	/* Update the oldest ID. */
-	if ((WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) &&
-	    __wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) {
-		WT_ORDERED_READ(session_cnt, conn->session_cnt);
-		for (i = 0, s = txn_global->states; i < session_cnt; i++, s++) {
-			if ((id = s->id) != WT_TXN_NONE &&
-			    WT_TXNID_LT(id, last_running))
-				last_running = id;
-			if ((id = s->snap_min) != WT_TXN_NONE &&
-			    WT_TXNID_LT(id, oldest_id))
-				oldest_id = id;
-		}
-
-		if (WT_TXNID_LT(last_running, oldest_id))
-			oldest_id = last_running;
-
-#ifdef HAVE_DIAGNOSTIC
+	if (WT_TXNID_LT(prev_oldest_id, oldest_id) || last_running_moved) {
 		/*
-		 * Make sure the ID doesn't move past any named snapshots.
-		 *
-		 * Don't include the read/assignment in the assert statement.
-		 * Coverity complains if there are assignments only done in
-		 * diagnostic builds, and when the read is from a volatile.
+		 * We know we want to update.  Check if we're racing.
 		 */
-		id = txn_global->nsnap_oldest_id;
-		WT_ASSERT(session,
-		    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
+		if (__wt_atomic_casiv32(&txn_global->scan_count, 1, -1)) {
+			WT_ORDERED_READ(session_cnt, conn->session_cnt);
+			for (i = 0, s = txn_global->states;
+			    i < session_cnt; i++, s++) {
+				if ((id = s->id) != WT_TXN_NONE &&
+				WT_TXNID_LT(id, last_running))
+					last_running = id;
+				if ((id = s->snap_min) != WT_TXN_NONE &&
+				WT_TXNID_LT(id, oldest_id))
+					oldest_id = id;
+			}
+
+			if (WT_TXNID_LT(last_running, oldest_id))
+				oldest_id = last_running;
+
+#ifdef HAVE_DIAGNOSTIC
+			/*
+			 * Make sure the ID doesn't move past any named
+			 * snapshots.
+			 *
+			 * Don't include the read/assignment in the assert
+			 * statement.  Coverity complains if there are
+			 * assignments only done in diagnostic builds, and
+			 * when the read is from a volatile.
+			 */
+			id = txn_global->nsnap_oldest_id;
+			WT_ASSERT(session,
+			    id == WT_TXN_NONE || !WT_TXNID_LT(id, oldest_id));
 #endif
-		if (WT_TXNID_LT(txn_global->last_running, last_running))
-			txn_global->last_running = last_running;
-		if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
-			txn_global->oldest_id = oldest_id;
-		WT_ASSERT(session, txn_global->scan_count == -1);
-		txn_global->scan_count = 0;
+			if (WT_TXNID_LT(txn_global->last_running, last_running))
+				txn_global->last_running = last_running;
+			if (WT_TXNID_LT(txn_global->oldest_id, oldest_id))
+				txn_global->oldest_id = oldest_id;
+			WT_ASSERT(session, txn_global->scan_count == -1);
+			txn_global->scan_count = 0;
+		} else {
+			/*
+			 * We wanted to update the oldest ID but we're racing
+			 * another thread.  Retry if this is a forced update.
+			 */
+			WT_ASSERT(session, txn_global->scan_count > 0);
+			(void)__wt_atomic_subiv32(&txn_global->scan_count, 1);
+			if (force) {
+				__wt_yield();
+				goto retry;
+			}
+		}
 	} else {
 		if (WT_VERBOSE_ISSET(session, WT_VERB_TRANSACTION) &&
-		    current_id - oldest_id > 10000 && last_running_moved &&
-		    oldest_session != NULL) {
+		    current_id - oldest_id > 10000 && oldest_session != NULL) {
 			(void)__wt_verbose(session, WT_VERB_TRANSACTION,
 			    "old snapshot %" PRIu64
 			    " pinned in session %d [%s]"
diff --git a/src/txn/txn_log.c b/src/txn/txn_log.c
index c5fa52dea6a..148ed868792 100644
--- a/src/txn/txn_log.c
+++ b/src/txn/txn_log.c
@@ -8,6 +8,12 @@
 
 #include "wt_internal.h"
 
+/* Cookie passed to __txn_printlog. */
+typedef struct {
+	FILE *out;
+	uint32_t flags;
+} WT_TXN_PRINTLOG_ARGS;
+
 /*
  * __txn_op_log --
  *	Log an operation for the current transaction.
@@ -64,7 +70,8 @@ err:	__wt_buf_free(session, &key);
  */
 static int
 __txn_commit_printlog(
-    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out)
+    WT_SESSION_IMPL *session, const uint8_t **pp, const uint8_t *end, FILE *out,
+    uint32_t flags)
 {
 	bool firstrecord;
 
@@ -79,7 +86,7 @@ __txn_commit_printlog(
 
 		firstrecord = false;
 
-		WT_RET(__wt_txn_op_printlog(session, pp, end, out));
+		WT_RET(__wt_txn_op_printlog(session, pp, end, out, flags));
 		WT_RET(__wt_fprintf(out, "\n      }"));
 	}
 
@@ -459,6 +466,7 @@ __txn_printlog(WT_SESSION_IMPL *session,
 	FILE *out;
 	WT_LOG_RECORD *logrec;
 	WT_LSN ckpt_lsn;
+	WT_TXN_PRINTLOG_ARGS *args;
 	const uint8_t *end, *p;
 	const char *msg;
 	uint64_t txnid;
@@ -467,7 +475,8 @@ __txn_printlog(WT_SESSION_IMPL *session,
 	bool compressed;
 
 	WT_UNUSED(next_lsnp);
-	out = cookie;
+	args = cookie;
+	out = args->out;
 
 	p = WT_LOG_SKIP_HEADER(rawrec->data);
 	end = (const uint8_t *)rawrec->data + rawrec->size;
@@ -506,7 +515,8 @@ __txn_printlog(WT_SESSION_IMPL *session,
 		WT_RET(__wt_fprintf(out, "    \"type\" : \"commit\",\n"));
 		WT_RET(__wt_fprintf(out,
 		    "    \"txnid\" : %" PRIu64 ",\n", txnid));
-		WT_RET(__txn_commit_printlog(session, &p, end, out));
+		WT_RET(__txn_commit_printlog(session, &p, end, out,
+		    args->flags));
 		break;
 
 	case WT_LOGREC_FILE_SYNC:
@@ -537,15 +547,18 @@ __txn_printlog(WT_SESSION_IMPL *session,
  *	Print the log in a human-readable format.
  */
 int
-__wt_txn_printlog(WT_SESSION *wt_session, FILE *out)
+__wt_txn_printlog(WT_SESSION *wt_session, FILE *out, uint32_t flags)
 {
 	WT_SESSION_IMPL *session;
+	WT_TXN_PRINTLOG_ARGS args;
 
 	session = (WT_SESSION_IMPL *)wt_session;
+	args.out = out;
+	args.flags = flags;
 
 	WT_RET(__wt_fprintf(out, "[\n"));
 	WT_RET(__wt_log_scan(
-	    session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, out));
+	    session, NULL, WT_LOGSCAN_FIRST, __txn_printlog, &args));
 	WT_RET(__wt_fprintf(out, "\n]\n"));
 
 	return (0);
diff --git a/src/utilities/util_main.c b/src/utilities/util_main.c
index 9cbda08690e..3b7187bd0de 100644
--- a/src/utilities/util_main.c
+++ b/src/utilities/util_main.c
@@ -226,7 +226,6 @@ main(int argc, char *argv[])
 	ret = func(session, argc, argv);
 
 	/* Close the database. */
-
 err:	if (conn != NULL && (tret = conn->close(conn, NULL)) != 0 && ret == 0)
 		ret = tret;
 
diff --git a/src/utilities/util_printlog.c b/src/utilities/util_printlog.c
index d202b09b228..3a665c1c657 100644
--- a/src/utilities/util_printlog.c
+++ b/src/utilities/util_printlog.c
@@ -15,10 +15,10 @@ util_printlog(WT_SESSION *session, int argc, char *argv[])
 {
 	WT_DECL_RET;
 	int ch;
-	bool printable;
+	uint32_t flags;
 
-	printable = false;
-	while ((ch = __wt_getopt(progname, argc, argv, "f:p")) != EOF)
+	flags = 0;
+	while ((ch = __wt_getopt(progname, argc, argv, "f:x")) != EOF)
 		switch (ch) {
 		case 'f':			/* output file */
 			if (freopen(__wt_optarg, "w", stdout) == NULL) {
@@ -27,8 +27,8 @@ util_printlog(WT_SESSION *session, int argc, char *argv[])
 				return (1);
 			}
 			break;
-		case 'p':
-			printable = true;
+		case 'x':			/* hex output */
+			LF_SET(WT_TXN_PRINTLOG_HEX);
 			break;
 		case '?':
 		default:
@@ -41,8 +41,7 @@ util_printlog(WT_SESSION *session, int argc, char *argv[])
 	if (argc != 0)
 		return (usage());
 
-	WT_UNUSED(printable);
-	ret = __wt_txn_printlog(session, stdout);
+	ret = __wt_txn_printlog(session, stdout, flags);
 
 	if (ret != 0) {
 		fprintf(stderr, "%s: printlog failed: %s\n",
@@ -61,7 +60,7 @@ usage(void)
 {
 	(void)fprintf(stderr,
 	    "usage: %s %s "
-	    "printlog [-p] [-f output-file]\n",
+	    "printlog [-x] [-f output-file]\n",
 	    progname, usage_prefix);
 	return (1);
 }
diff --git a/test/format/ops.c b/test/format/ops.c
index c705d362fe8..7e299b7d975 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -504,7 +504,7 @@ skip_insert:			if (col_update(tinfo,
 		 */
 		if (!insert) {
 			dir = (int)mmrand(&tinfo->rnd, 0, 1);
-			for (np = 0; np < mmrand(&tinfo->rnd, 1, 8); ++np) {
+			for (np = 0; np < mmrand(&tinfo->rnd, 1, 30); ++np) {
 				if (notfound)
 					break;
 				if (nextprev(cursor, dir, &notfound))
diff --git a/test/suite/test_bulk01.py b/test/suite/test_bulk01.py
index 80b420c9392..df027df0ddd 100644
--- a/test/suite/test_bulk01.py
+++ b/test/suite/test_bulk01.py
@@ -130,7 +130,7 @@ class test_bulk_load(wttest.WiredTigerTestCase):
 
     # Test that variable-length column-store bulk-load efficiently creates big
     # records.
-    def test_bulk_load_col_delete_big(self):
+    def test_bulk_load_col_big(self):
         if self.keyfmt != 'r' or self.valfmt == '8t':
                 return
 
diff --git a/test/suite/test_colgap.py b/test/suite/test_colgap.py
index 4192f14c5e6..924d622a024 100644
--- a/test/suite/test_colgap.py
+++ b/test/suite/test_colgap.py
@@ -28,6 +28,7 @@
 
 import wiredtiger, wttest
 from helper import simple_populate, key_populate, value_populate
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
 
 # test_colgap.py
 #    Test variable-length column-store gap performance.
@@ -119,5 +120,90 @@ class test_column_store_gap(wttest.WiredTigerTestCase):
         self.backward(cursor, list(reversed(v)))
 
 
+# Basic testing of variable-length column-store with big records.
+class test_colmax(wttest.WiredTigerTestCase):
+    name = 'test_colmax'
+
+    types = [
+        ('file', dict(type='file:')),
+        ('table', dict(type='table:'))
+    ]
+    valfmt = [
+        ('integer', dict(valfmt='i')),
+        ('string', dict(valfmt='S')),
+    ]
+    record_number = [
+        ('big', dict(recno=18446744073709551606)),
+        ('max', dict(recno=18446744073709551615)),
+    ]
+    bulk = [
+        ('bulk', dict(bulk=1)),
+        ('not-bulk', dict(bulk=0)),
+    ]
+    reopen = [
+        ('reopen', dict(reopen=1)),
+        ('not-reopen', dict(reopen=0)),
+    ]
+    single = [
+        ('single', dict(single=1)),
+        ('not-single', dict(single=0)),
+    ]
+
+    scenarios = number_scenarios(multiply_scenarios(\
+        '.', types, valfmt, record_number, bulk, reopen, single))
+
+    # Test that variable-length column-store correctly/efficiently handles big
+    # records (if it's not efficient, we'll just hang).
+    def test_colmax_op(self):
+        recno = self.recno
+
+        uri = self.type + self.name
+        self.session.create(uri, 'key_format=r' +',value_format=' + self.valfmt)
+
+        # Insert a big record with/without a bulk cursor.
+        bulk_config = ""
+        if self.bulk:
+            bulk_config = "bulk"
+        cursor = self.session.open_cursor(uri, None, bulk_config)
+
+        # Optionaly make the big record the only record in the table.
+        if not self.single:
+            for i in range(1, 723):
+                cursor[key_populate(cursor, i)] = value_populate(cursor, i)
+
+        # Confirm searching past the end of the table works.
+        if not self.bulk:
+            cursor.set_key(recno)
+            self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+        # Insert the big record.
+        cursor[key_populate(cursor, recno)] = value_populate(cursor, recno)
+
+        # Optionally flush to disk; re-open the cursor as necessary.
+        if self.bulk or self.reopen:
+            cursor.close()
+        if self.reopen == 1:
+            self.reopen_conn()
+        if self.bulk or self.reopen:
+            cursor = self.session.open_cursor(uri, None, None)
+
+        # Search for the large record.
+        cursor.set_key(recno)
+        self.assertEqual(cursor.search(), 0)
+        self.assertEqual(cursor.get_value(), value_populate(cursor, recno))
+
+        # Update it.
+        cursor[key_populate(cursor, recno)] = value_populate(cursor, 37)
+        cursor.set_key(recno)
+        self.assertEqual(cursor.search(), 0)
+        self.assertEqual(cursor.get_value(), value_populate(cursor, 37))
+
+        # Remove it.
+        cursor.set_key(recno)
+        self.assertEqual(cursor.remove(), 0)
+        cursor.set_key(key_populate(cursor, recno))
+        self.assertEqual(cursor.search(), wiredtiger.WT_NOTFOUND)
+
+
 if __name__ == '__main__':
     wttest.run()
diff --git a/test/suite/test_compact.py b/test/suite/test_compact01.py
index c7269785115..c7269785115 100644
--- a/test/suite/test_compact.py
+++ b/test/suite/test_compact01.py
diff --git a/test/suite/test_compact02.py b/test/suite/test_compact02.py
new file mode 100644
index 00000000000..f2d5c1fa283
--- /dev/null
+++ b/test/suite/test_compact02.py
@@ -0,0 +1,152 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+# test_compact02.py
+#   Test that compact reduces the file size.
+#
+
+import wiredtiger, wttest
+from wiredtiger import stat
+from wtscenario import multiply_scenarios, number_scenarios
+
+# Test basic compression
+class test_compact02(wttest.WiredTigerTestCase):
+
+    types = [
+        ('file', dict(uri='file:test_compact02')),
+    ]
+    cacheSize = [
+        ('default', dict(cacheSize='')),
+        ('1mb', dict(cacheSize='cache_size=1MB')),
+        ('10gb', dict(cacheSize='cache_size=10GB')),
+    ]
+
+    # There's a balance between the pages we create and the size of the records
+    # being stored: compaction doesn't work on tables with many overflow items
+    # because we don't rewrite them. Experimentally, 8KB is as small as the test
+    # can go. Additionally, we can't set the maximum page size too large because
+    # there won't be enough pages to rewrite. Experimentally, 32KB (the default)
+    # is as large as the test can go.
+    fileConfig = [
+        ('default', dict(fileConfig='')),
+        ('8KB', dict(fileConfig='leaf_page_max=8kb')),
+    ]
+    scenarios = \
+        number_scenarios(multiply_scenarios('.', types, cacheSize, fileConfig))
+
+    # We want about 22K records that total about 130Mb.  That is an average
+    # of 6196 bytes per record.  Half the records should be smaller, about
+    # 2700 bytes (about 30Mb) and the other half should be larger, 9666 bytes
+    # per record (about 100Mb).
+    #
+    # Test flow is as follows.
+    #
+    # 1. Create a table with the data, alternating record size.
+    # 2. Checkpoint and get stats on the table to confirm the size.
+    # 3. Delete the half of the records with the larger record size.
+    # 4. Call compact.
+    # 5. Get stats on compacted table.
+    #
+    nrecords = 22000
+    bigvalue = "abcdefghi" * 1074          # 9*1074 == 9666
+    smallvalue = "ihgfedcba" * 303         # 9*303 == 2727
+
+    fullsize = nrecords / 2 * len(bigvalue) + nrecords / 2 * len(smallvalue)
+
+    # Return the size of the file
+    def getSize(self):
+        cstat = self.session.open_cursor(
+            'statistics:' + self.uri, None, 'statistics=(size)')
+        sz = cstat[stat.dsrc.block_size][2]
+        cstat.close()
+        return sz
+
+    # This test varies the cache size and so needs to set up its own connection.
+    # Override the standard methods.
+    def setUpConnectionOpen(self, dir):
+        return None
+    def setUpSessionOpen(self, conn):
+        return None
+    def ConnectionOpen(self, cacheSize):
+        self.home = '.'
+        conn_params = 'create,' + \
+            cacheSize + ',error_prefix="%s: ",' % self.shortid() + \
+            'statistics=(fast)'
+        try:
+            self.conn = wiredtiger.wiredtiger_open(self.home, conn_params)
+        except wiredtiger.WiredTigerError as e:
+            print "Failed conn at '%s' with config '%s'" % (dir, conn_params)
+        self.session = self.conn.open_session(None)
+
+    # Create a table, add keys with both big and small values.
+    def test_compact02(self):
+        self.ConnectionOpen(self.cacheSize)
+
+        mb = 1024 * 1024
+        params = 'key_format=i,value_format=S,' + self.fileConfig
+
+        # 1. Create a table with the data, alternating record size.
+        self.session.create(self.uri, params)
+        c = self.session.open_cursor(self.uri, None)
+        for i in range(self.nrecords):
+            if i % 2 == 0:
+                c[i] = str(i) + self.bigvalue
+            else:
+                c[i] = str(i) + self.smallvalue
+        c.close()
+
+        # 2. Checkpoint and get stats on the table to confirm the size.
+        self.session.checkpoint()
+        sz = self.getSize()
+        self.pr('After populate ' + str(sz / mb) + 'MB')
+        self.assertGreater(sz, self.fullsize)
+
+        # 3. Delete the half of the records with the larger record size.
+        c = self.session.open_cursor(self.uri, None)
+        count = 0
+        for i in range(self.nrecords):
+            if i % 2 == 0:
+                count += 1
+                c.set_key(i)
+                c.remove()
+        c.close()
+        self.pr('Removed total ' + str((count * 9666) / mb) + 'MB')
+
+        # 4. Call compact.
+        self.session.compact(self.uri, None)
+
+        # 5. Get stats on compacted table.
+        sz = self.getSize()
+        self.pr('After compact ' + str(sz / mb) + 'MB')
+
+        # After compact, the file size should be less than half the full size.
+        self.assertLess(sz, self.fullsize / 2)
+
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/test/suite/test_cursor_random.py b/test/suite/test_cursor_random.py
index 10a3140a2fd..b424dbbc7e3 100644
--- a/test/suite/test_cursor_random.py
+++ b/test/suite/test_cursor_random.py
@@ -29,90 +29,93 @@
 import wiredtiger, wttest
 from helper import complex_populate, simple_populate
 from helper import key_populate, value_populate
-from wtscenario import check_scenarios
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
 
 # test_cursor_random.py
 #    Cursor next_random operations
 class test_cursor_random(wttest.WiredTigerTestCase):
-    scenarios = check_scenarios([
-        ('file', dict(type='file:',fmt='S')),
-        ('table', dict(type='table:',fmt='S'))
-    ])
+    types = [
+        ('file', dict(type='file:random')),
+        ('table', dict(type='table:random'))
+    ]
+    config = [
+        ('sample', dict(config='next_random=true,next_random_sample_size=35')),
+        ('not-sample', dict(config='next_random=true'))
+    ]
+    scenarios =number_scenarios(multiply_scenarios('.', types, config))
 
     # Check that opening a random cursor on a row-store returns not-supported
-    # for every method except for next and reset, and next returns not-found.
-    def test_cursor_random_column(self):
-        uri = self.type + 'random'
-        self.session.create(uri, 'key_format=' + self.fmt + ',value_format=S')
-        cursor = self.session.open_cursor(uri, None, "next_random=true")
+    # for methods other than next, reconfigure and reset, and next returns
+    # not-found.
+    def test_cursor_random(self):
+        uri = self.type
+        self.session.create(uri, 'key_format=S,value_format=S')
+        cursor = self.session.open_cursor(uri, None, self.config)
         self.assertRaises(
             wiredtiger.WiredTigerError, lambda: cursor.compare(cursor))
+        self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert())
         self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.prev())
+        self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.remove())
         self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.search())
         self.assertRaises(
             wiredtiger.WiredTigerError, lambda: cursor.search_near())
-        self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.insert())
         self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.update())
-        self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.remove())
 
-        cursor.reset()
         self.assertTrue(cursor.next(), wiredtiger.WT_NOTFOUND)
+        self.assertEquals(cursor.reconfigure(), 0)
+        self.assertEquals(cursor.reset(), 0)
         cursor.close()
 
     # Check that next_random works with a single value, repeatedly.
     def test_cursor_random_single_record(self):
-        uri = self.type + 'random'
-        self.session.create(uri, 'key_format=' + self.fmt + ',value_format=S')
+        uri = self.type
+        self.session.create(uri, 'key_format=S,value_format=S')
         cursor = self.session.open_cursor(uri, None)
         cursor['AAA'] = 'BBB'
         cursor.close()
-        cursor = self.session.open_cursor(uri, None, "next_random=true")
+        cursor = self.session.open_cursor(uri, None, self.config)
         for i in range(1,5):
-            cursor.next()
+            self.assertEquals(cursor.next(), 0)
             self.assertEquals(cursor.get_key(), 'AAA')
         cursor.close
 
     # Check that next_random works in the presence of a larger set of values,
     # where the values are in an insert list.
     def test_cursor_random_multiple_insert_records(self):
-        uri = self.type + 'random'
-        if self.type == 'file:':
+        uri = self.type
+        if uri.startswith('file:'):
             simple_populate(self, uri,
-                'allocation_size=512,leaf_page_max=512,key_format=' +\
-                self.fmt, 100)
+                'allocation_size=512,leaf_page_max=512,key_format=S', 100)
         else:
             complex_populate(self, uri,
-                'allocation_size=512,leaf_page_max=512,key_format=' +\
-                self.fmt, 100)
+                'allocation_size=512,leaf_page_max=512,key_format=S', 100)
 
         # In a insert list, next_random always selects the middle key/value
         # pair, all we can do is confirm cursor.next works.
-        cursor = self.session.open_cursor(uri, None, "next_random=true")
+        cursor = self.session.open_cursor(uri, None, self.config)
         self.assertEqual(cursor.next(), 0)
 
     # Check that next_random works in the presence of a larger set of values,
     # where the values are in a disk format page.
     def cursor_random_multiple_page_records(self, reopen):
-        uri = self.type + 'random'
-        if self.type == 'file:':
+        uri = self.type
+        if uri.startswith('file:'):
             simple_populate(self, uri,
-                'allocation_size=512,leaf_page_max=512,key_format=' +\
-                self.fmt, 10000)
+                'allocation_size=512,leaf_page_max=512,key_format=S', 10000)
         else:
             complex_populate(self, uri,
-                'allocation_size=512,leaf_page_max=512,key_format=' +\
-                self.fmt, 10000)
+                'allocation_size=512,leaf_page_max=512,key_format=S', 10000)
 
         # Optionally close the connection so everything is forced to disk,
         # insert lists are an entirely different path in the code.
         if reopen:
             self.reopen_conn()
 
-        cursor = self.session.open_cursor(uri, None, "next_random=true")
+        cursor = self.session.open_cursor(uri, None, self.config)
         last = ''
         match = 0
         for i in range(1,10):
-            cursor.next()
+            self.assertEqual(cursor.next(), 0)
             current = cursor.get_key()
             if current == last:
                 match += 1
@@ -128,23 +131,32 @@ class test_cursor_random(wttest.WiredTigerTestCase):
 # Check that opening a random cursor on column-store returns not-supported.
 class test_cursor_random_column(wttest.WiredTigerTestCase):
     scenarios = check_scenarios([
-        ('file', dict(uri='file:random',fmt='r')),
-        ('table', dict(uri='table:random',fmt='r')),
+        ('file', dict(uri='file:random')),
+        ('table', dict(uri='table:random'))
     ])
 
     def test_cursor_random_column(self):
-        self.session.create(
-            self.uri, 'key_format=' + self.fmt + ',value_format=S')
-        cursor = self.session.open_cursor(self.uri, None, "next_random=true")
-        self.assertRaises(wiredtiger.WiredTigerError, lambda: cursor.next())
-        cursor.close()
+        self.session.create(self.uri, 'key_format=r,value_format=S')
+        msg = '/Operation not supported/'
+        self.assertRaisesWithMessage(wiredtiger.WiredTigerError, lambda:
+            self.session.open_cursor(self.uri, None, "next_random=true"), msg)
 
 
 # Check next_random works in the presence a set of updates, some or all of
 # which are invisible to the cursor.
 class test_cursor_random_invisible(wttest.WiredTigerTestCase):
+    types = [
+        ('file', dict(type='file:random')),
+        ('table', dict(type='table:random'))
+    ]
+    config = [
+        ('sample', dict(config='next_random=true,next_random_sample_size=35')),
+        ('not-sample', dict(config='next_random=true'))
+    ]
+    scenarios =number_scenarios(multiply_scenarios('.', types, config))
+
     def test_cursor_random_invisible_all(self):
-        uri = 'file:random'
+        uri = self.type
         self.session.create(uri, 'key_format=S,value_format=S')
         cursor = self.session.open_cursor(uri, None)
 
@@ -156,11 +168,11 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
         # Open another session, the updates won't yet be visible, we shouldn't
         # find anything at all.
         s = self.conn.open_session()
-        cursor = s.open_cursor(uri, None, "next_random=true")
+        cursor = s.open_cursor(uri, None, self.config)
         self.assertEqual(cursor.next(), wiredtiger.WT_NOTFOUND)
 
     def test_cursor_random_invisible_after(self):
-        uri = 'file:random'
+        uri = self.type
         self.session.create(uri, 'key_format=S,value_format=S')
         cursor = self.session.open_cursor(uri, None)
 
@@ -175,12 +187,12 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
         # Open another session, the updates won't yet be visible, we should
         # return the only possible record.
         s = self.conn.open_session()
-        cursor = s.open_cursor(uri, None, "next_random=true")
-        cursor.next()
+        cursor = s.open_cursor(uri, None, self.config)
+        self.assertEquals(cursor.next(), 0)
         self.assertEqual(cursor.get_key(), key_populate(cursor, 1))
 
     def test_cursor_random_invisible_before(self):
-        uri = 'file:random'
+        uri = self.type
         self.session.create(uri, 'key_format=S,value_format=S')
         cursor = self.session.open_cursor(uri, None)
 
@@ -195,8 +207,8 @@ class test_cursor_random_invisible(wttest.WiredTigerTestCase):
         # Open another session, the updates won't yet be visible, we should
         # return the only possible record.
         s = self.conn.open_session()
-        cursor = s.open_cursor(uri, None, "next_random=true")
-        cursor.next()
+        cursor = s.open_cursor(uri, None, self.config)
+        self.assertEquals(cursor.next(), 0)
         self.assertEqual(cursor.get_key(), key_populate(cursor, 99))
 
 
diff --git a/test/suite/test_cursor_random02.py b/test/suite/test_cursor_random02.py
new file mode 100644
index 00000000000..7c9e0e38cb9
--- /dev/null
+++ b/test/suite/test_cursor_random02.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+#
+# Public Domain 2014-2015 MongoDB, Inc.
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+
+import wiredtiger, wttest
+from helper import complex_populate, simple_populate
+from helper import key_populate, value_populate
+from wtscenario import check_scenarios, multiply_scenarios, number_scenarios
+
+# test_cursor_random02.py
+#    Cursor next_random operations
+class test_cursor_random02(wttest.WiredTigerTestCase):
+    type = 'table:random'
+    config = [
+        ('not-sample', dict(config='next_random=true'))
+    ]
+    records = [
+        ('1', dict(records=1)),
+        ('250', dict(records=250)),
+        ('500', dict(records=500)),
+        ('5000', dict(records=5000)),
+        ('10000', dict(records=10000)),
+        ('50000', dict(records=50000)),
+    ]
+    scenarios = number_scenarios(multiply_scenarios('.', config, records))
+
+    # Check that next_random works in the presence of a larger set of values,
+    # where the values are in an insert list.
+    def test_cursor_random_reasonable_distribution(self):
+        uri = self.type
+        num_entries = self.records
+
+        # Set the leaf-page-max value, otherwise the page might split.
+        simple_populate(self, uri,
+            'leaf_page_max=100MB,key_format=S', num_entries)
+        # Setup an array to track which keys are seen
+        visitedKeys = [0] * (num_entries + 1)
+
+        cursor = self.session.open_cursor(uri, None, 'next_random=true')
+        for i in range(0, num_entries):
+            self.assertEqual(cursor.next(), 0)
+            current = cursor.get_key()
+            current = int(current)
+            visitedKeys[current] = visitedKeys[current] + 1
+
+        differentKeys = sum(x > 0 for x in visitedKeys)
+
+        #print visitedKeys
+        #print differentKeys
+        '''
+        self.tty('differentKeys: ' + str(differentKeys) + ' of ' + \
+            str(num_entries) + ', ' + \
+            str((int)((differentKeys * 100) / num_entries)) + '%')
+        '''
+
+        self.assertGreater(differentKeys, num_entries / 4,
+            'next_random random distribution not adequate')
+
+if __name__ == '__main__':
+    wttest.run()
diff --git a/test/suite/test_jsondump02.py b/test/suite/test_jsondump02.py
index 790f651fd2f..ac81e0729e5 100644
--- a/test/suite/test_jsondump02.py
+++ b/test/suite/test_jsondump02.py
@@ -209,7 +209,7 @@ class test_jsondump02(wttest.WiredTigerTestCase):
         self.check_json(self.table_uri3, (
                 ('"key0" : 1', '"value0" : "\\u0001\\u0002\\u0003"'),
                 ('"key0" : 2',
-                 '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00FF\\u00FE"')))
+                 '"value0" : "\\u0077\\u0088\\u0099\\u0000\\u00ff\\u00fe"')))
         self.check_json(self.table_uri4, (
                 ('"ikey" : 1,\n"Skey" : "key1"',
                  '"S1" : "val1",\n"i2" : 1,\n"S3" : "val1",\n"i4" : 1'),
diff --git a/test/suite/test_txn08.py b/test/suite/test_txn08.py
index d35a0c70b3b..8ee48104231 100644
--- a/test/suite/test_txn08.py
+++ b/test/suite/test_txn08.py
@@ -82,6 +82,11 @@ class test_txn08(wttest.WiredTigerTestCase, suite_subprocess):
         self.runWt(['printlog'], outfilename='printlog.out')
         self.check_file_contains('printlog.out',
             '\\u0001\\u0002abcd\\u0003\\u0004')
+        self.runWt(['printlog', '-x'], outfilename='printlog-hex.out')
+        self.check_file_contains('printlog-hex.out',
+            '\\u0001\\u0002abcd\\u0003\\u0004')
+        self.check_file_contains('printlog-hex.out',
+            '0102616263640304')
 
 if __name__ == '__main__':
     wttest.run()