Merge branch 'develop'

author: Alex Gorrod <alexg@wiredtiger.com> 2014-03-04 16:04:48 +1100
committer: Alex Gorrod <alexg@wiredtiger.com> 2014-03-04 16:04:48 +1100
commit: be98ccec7299820ab9cbfa78581d1400b5f5ec65 (patch)
tree: 73f63490811959378dd453de3a3d00d4e1ca334c
parent: 006f6b7afec5d29351c6c1bf79268cb0d1c011fc (diff)
parent: 4deb7b35e421150a6d92ba3c197eab438b63cef7 (diff)
download: mongo-be98ccec7299820ab9cbfa78581d1400b5f5ec65.tar.gz
89 files changed, 2114 insertions, 1048 deletions
diff --git a/NEWS b/NEWS
index 9f0682f0ecd..a7ad4f8987c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,29 @@
+WiredTiger release 2.1.1, 2014-03-04
+------------------------------------
+
+The WiredTiger 2.1.1 release contains new features, performance enhancements
+and bug fixes.  Significant changes include:
+
+Fix a bug where a page could be marked clean when it contained uncommitted
+changes.  This bug could cause undefined behavior in transaction rollback
+under load.
+
+Fix a bug with shared caches when rebalancing between connections.
+
+Add a new public API to WiredTiger that provides the ability to parse
+WiredTiger compatible configuration strings. See the upgrading documentation
+for further information. [#873]
+
+A number of performance enhancements to the LSM implementation, particularly
+for long running workloads.
+
+A number of performance enhancements and bug fixes to cache eviction code.
+
+Add an option to use direct I/O when reading from checkpoints. To enabled
+the functionality add "direct_io=[checkpoint]" to your wiredtiger_open
+configuration string. [#847]
+
+
 WiredTiger release 2.1.0, 2014-02-04
 ------------------------------------
 
diff --git a/README b/README
index 7fab4d05ea0..711056e27f0 100644
--- a/README
+++ b/README
@@ -1,6 +1,6 @@
-WiredTiger 2.1.0: (February  4, 2014)
+WiredTiger 2.1.1: (March  4, 2014)
 
-This is version 2.1.0 of WiredTiger.
+This is version 2.1.1 of WiredTiger.
 
 WiredTiger release packages and documentation can be found at:
 
@@ -9,7 +9,7 @@ WiredTiger release packages and documentation can be found at:
 Information on configuring, building and installing WiredTiger can be
 found at:
 
-	http://source.wiredtiger.com/2.1.0/install.html
+	http://source.wiredtiger.com/2.1.1/install.html
 
 WiredTiger licensing information can be found at:
 
diff --git a/RELEASE b/RELEASE
index e555c4a62a7..9254d748a4d 100644
--- a/RELEASE
+++ b/RELEASE
@@ -1,6 +1,6 @@
 WIREDTIGER_VERSION_MAJOR=2
 WIREDTIGER_VERSION_MINOR=1
-WIREDTIGER_VERSION_PATCH=0
+WIREDTIGER_VERSION_PATCH=1
 WIREDTIGER_VERSION="$WIREDTIGER_VERSION_MAJOR.$WIREDTIGER_VERSION_MINOR.$WIREDTIGER_VERSION_PATCH"
 
 WIREDTIGER_RELEASE_DATE=`date "+%B %e, %Y"`
diff --git a/bench/wtperf/config.c b/bench/wtperf/config.c
index 21275f69768..c87c3ac75a1 100644
--- a/bench/wtperf/config.c
+++ b/bench/wtperf/config.c
@@ -60,6 +60,23 @@ config_assign(CONFIG *dest, const CONFIG *src)
 	config_free(dest);
 	memcpy(dest, src, sizeof(CONFIG));
 
+	if (src->uris != NULL) {
+		dest->uris = (char **)calloc(src->table_count, sizeof(char *));
+		for (i = 0; i < src->table_count; i++)
+			dest->uris[i] = strdup(src->uris[i]);
+	}
+	dest->ckptthreads = NULL;
+	dest->popthreads = NULL;
+	dest->workers = NULL;
+
+	if (src->base_uri != NULL)
+		dest->base_uri = strdup(src->base_uri);
+	if (src->workload != NULL) {
+		dest->workload = calloc(WORKLOAD_MAX, sizeof(WORKLOAD));
+		memcpy(dest->workload,
+		    src->workload, WORKLOAD_MAX * sizeof(WORKLOAD));
+	}
+
 	for (i = 0; i < sizeof(config_opts) / sizeof(config_opts[0]); i++)
 		if (config_opts[i].type == STRING_TYPE ||
 		    config_opts[i].type == CONFIG_STRING_TYPE) {
@@ -96,14 +113,53 @@ config_free(CONFIG *cfg)
 				*pstr = NULL;
 			}
 		}
+	if (cfg->uris != NULL) {
+		for (i = 0; i < cfg->table_count; i++)
+			free(cfg->uris[i]);
+		free(cfg->uris);
+	}
 
+	free(cfg->ckptthreads);
 	free(cfg->popthreads);
-	free(cfg->uri);
+	free(cfg->base_uri);
 	free(cfg->workers);
 	free(cfg->workload);
 }
 
 /*
+ * config_compress --
+ *	Parse the compression configuration.
+ */
+int
+config_compress(CONFIG *cfg)
+{
+	int ret;
+	const char *s;
+
+	ret = 0;
+	s = cfg->compression;
+	if (strcmp(s, "none") == 0) {
+		cfg->compress_ext = NULL;
+		cfg->compress_table = NULL;
+	} else if (strcmp(s, "bzip") == 0) {
+		cfg->compress_ext = BZIP_EXT;
+		cfg->compress_table = BZIP_BLK;
+	} else if (strcmp(s, "snappy") == 0) {
+		cfg->compress_ext = SNAPPY_EXT;
+		cfg->compress_table = SNAPPY_BLK;
+	} else if (strcmp(s, "zlib") == 0) {
+		cfg->compress_ext = ZLIB_EXT;
+		cfg->compress_table = ZLIB_BLK;
+	} else {
+		fprintf(stderr,
+	    "invalid compression configuration: %s\n", s);
+		ret = EINVAL;
+	}
+	return (ret);
+
+}
+
+/*
  * config_threads --
  *	Parse the thread configuration.
  */
@@ -112,12 +168,10 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
 {
 	WORKLOAD *workp;
 	WT_CONFIG_ITEM groupk, groupv, k, v;
-	WT_CONFIG_SCAN *group, *scan;
-	WT_EXTENSION_API *wt_api;
+	WT_CONFIG_PARSER *group, *scan;
 	int ret;
 
-	wt_api = cfg->conn->get_extension_api(cfg->conn);
-
+	group = scan = NULL;
 	/* Allocate the workload array. */
 	if ((cfg->workload = calloc(WORKLOAD_MAX, sizeof(WORKLOAD))) == NULL)
 		return (enomem(cfg));
@@ -132,12 +186,11 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
 	 * returned from the original string.
 	 */
 	if ((ret =
-	    wt_api->config_scan_begin(wt_api, NULL, config, len, &group)) != 0)
+	    wiredtiger_config_parser_open(NULL, config, len, &group)) != 0)
 		goto err;
-	while ((ret =
-	    wt_api->config_scan_next(wt_api, group, &groupk, &groupv)) == 0) {
-		if ((ret = wt_api-> config_scan_begin(
-		    wt_api, NULL, groupk.str, groupk.len, &scan)) != 0)
+	while ((ret = group->next(group, &groupk, &groupv)) == 0) {
+		if ((ret = wiredtiger_config_parser_open(
+		    NULL, groupk.str, groupk.len, &scan)) != 0)
 			goto err;
 		
 		/* Move to the next workload slot. */
@@ -150,8 +203,7 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
 		}
 		workp = &cfg->workload[cfg->workload_cnt++];
 
-		while ((ret =
-		    wt_api->config_scan_next(wt_api, scan, &k, &v)) == 0) {
+		while ((ret = scan->next(scan, &k, &v)) == 0) {
 			if (STRING_MATCH("count", k.str, k.len)) {
 				if ((workp->threads = v.val) <= 0)
 					goto err;
@@ -181,7 +233,9 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
 			ret = 0;
 		if (ret != 0 )
 			goto err;
-		if ((ret = wt_api->config_scan_end(wt_api, scan)) != 0)
+		ret = scan->close(scan);
+		scan = NULL;
+		if (ret != 0)
 			goto err;
 
 		if (workp->insert == 0 &&
@@ -190,12 +244,19 @@ config_threads(CONFIG *cfg, const char *config, size_t len)
 		cfg->workers_cnt += (u_int)workp->threads;
 	}
 
-	if ((ret = wt_api->config_scan_end(wt_api, group)) != 0)
+	ret = group->close(group);
+	group = NULL;
+	if (ret != 0)
 		goto err;
 
 	return (0);
 
-err:	fprintf(stderr,
+err:	if (group != NULL)
+		(void)group->close(group);
+	if (scan != NULL)
+		(void)scan->close(scan);
+		
+	fprintf(stderr,
 	    "invalid thread configuration or scan error: %.*s\n",
 	    (int)len, config);
 	return (EINVAL);
@@ -408,21 +469,17 @@ int
 config_opt_line(CONFIG *cfg, const char *optstr)
 {
 	WT_CONFIG_ITEM k, v;
-	WT_CONFIG_SCAN *scan;
-	WT_EXTENSION_API *wt_api;
+	WT_CONFIG_PARSER *scan;
 	int ret, t_ret;
 
-	wt_api = cfg->conn->get_extension_api(cfg->conn);
-
-	if ((ret = wt_api->config_scan_begin(
-	    wt_api, NULL, optstr, strlen(optstr), &scan)) != 0) {
+	if ((ret = wiredtiger_config_parser_open(
+	    NULL, optstr, strlen(optstr), &scan)) != 0) {
 		lprintf(cfg, ret, 0, "Error in config_scan_begin");
 		return (ret);
 	}
 
 	while (ret == 0) {
-		if ((ret =
-		    wt_api->config_scan_next(wt_api, scan, &k, &v)) != 0) {
+		if ((ret = scan->next(scan, &k, &v)) != 0) {
 			/* Any parse error has already been reported. */
 			if (ret == WT_NOTFOUND)
 				ret = 0;
@@ -430,7 +487,7 @@ config_opt_line(CONFIG *cfg, const char *optstr)
 		}
 		ret = config_opt(cfg, &k, &v);
 	}
-	if ((t_ret = wt_api->config_scan_end(wt_api, scan)) != 0) {
+	if ((t_ret = scan->close(scan)) != 0) {
 		lprintf(cfg, ret, 0, "Error in config_scan_end");
 		if (ret == 0)
 			ret = t_ret;
@@ -474,6 +531,14 @@ config_sanity(CONFIG *cfg)
 		fprintf(stderr, "interval value longer than the run-time\n");
 		return (1);
 	}
+	if (cfg->table_count > 99) {
+		fprintf(stderr, "table count greater than 99\n");
+		return (1);
+	}
+	if (cfg->database_count > 99) {
+		fprintf(stderr, "database count greater than 99\n");
+		return (1);
+	}
 	return (0);
 }
 
@@ -600,11 +665,8 @@ config_opt_usage(void)
 void
 usage(void)
 {
-	printf("wtperf [-LMS] [-C config] "
+	printf("wtperf [-C config] "
 	    "[-H mount] [-h home] [-O file] [-o option] [-T config]\n");
-	printf("\t-L Use a large default configuration\n");
-	printf("\t-M Use a medium default configuration\n");
-	printf("\t-S Use a small default configuration\n");
 	printf("\t-C <string> additional connection configuration\n");
 	printf("\t            (added to option conn_config)\n");
 	printf("\t-H <mount> configure Helium volume mount point\n");
diff --git a/bench/wtperf/misc.c b/bench/wtperf/misc.c
index 4dac172a0c5..4946a260bec 100644
--- a/bench/wtperf/misc.c
+++ b/bench/wtperf/misc.c
@@ -44,24 +44,27 @@ enomem(const CONFIG *cfg)
 int
 setup_log_file(CONFIG *cfg)
 {
+	int ret;
 	char *fname;
 
+	ret = 0;
+
 	if (cfg->verbose < 1)
 		return (0);
 
-	if ((fname = calloc(strlen(cfg->home) +
+	if ((fname = calloc(strlen(cfg->monitor_dir) +
 	    strlen(cfg->table_name) + strlen(".stat") + 2, 1)) == NULL)
 		return (enomem(cfg));
 
-	sprintf(fname, "%s/%s.stat", cfg->home, cfg->table_name);
-	cfg->logf = fopen(fname, "a");
-	free(fname);
-
+	sprintf(fname, "%s/%s.stat", cfg->monitor_dir, cfg->table_name);
+	cfg->logf = fopen(fname, "w");
 	if (cfg->logf == NULL) {
-		fprintf(stderr,
-		    "Failed to open log file: %s\n", strerror(errno));
-		return (EINVAL);
+		ret = errno;
+		fprintf(stderr, "%s: %s\n", fname, strerror(ret));
 	}
+	free(fname);
+	if (cfg->logf == NULL)
+		return (ret);
 
 	/* Use line buffering for the log file. */
 	(void)setvbuf(cfg->logf, NULL, _IOLBF, 0);
diff --git a/bench/wtperf/runners/fruit-lsm.wtperf b/bench/wtperf/runners/fruit-lsm.wtperf
index 193b29d38bf..a06cc37c33d 100644
--- a/bench/wtperf/runners/fruit-lsm.wtperf
+++ b/bench/wtperf/runners/fruit-lsm.wtperf
@@ -6,7 +6,7 @@
 conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
 compact=true
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
 icount=25000000
 key_sz=40
 value_sz=800
diff --git a/bench/wtperf/runners/fruit-short.wtperf b/bench/wtperf/runners/fruit-short.wtperf
index 9061e231bbe..f546cbc29b5 100644
--- a/bench/wtperf/runners/fruit-short.wtperf
+++ b/bench/wtperf/runners/fruit-short.wtperf
@@ -6,7 +6,7 @@
 conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
 compact=true
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
 icount=25000000
 key_sz=40
 value_sz=800
diff --git a/bench/wtperf/runners/medium-lsm-compact.wtperf b/bench/wtperf/runners/medium-lsm-compact.wtperf
index 5393cdbfeba..62ae8cf86ca 100644
--- a/bench/wtperf/runners/medium-lsm-compact.wtperf
+++ b/bench/wtperf/runners/medium-lsm-compact.wtperf
@@ -1,6 +1,6 @@
 # wtperf options file: medium lsm configuration
 conn_config="cache_size=1G"
-table_config="lsm=(chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="lsm=(chunk_size=100MB,merge_threads=2,chunk_max=1TB),type=lsm"
 icount=50000000
 populate_threads=1
 compact=true
diff --git a/bench/wtperf/runners/multi-btree.wtperf b/bench/wtperf/runners/multi-btree.wtperf
new file mode 100644
index 00000000000..831aa71cdd2
--- /dev/null
+++ b/bench/wtperf/runners/multi-btree.wtperf
@@ -0,0 +1,13 @@
+# wtperf options file: small btree multi-database configuration
+# Original cache was 500MB.  Shared cache is 500MB * database_count.
+conn_config="shared_cache=(enable=true,size=2500MB,chunk=10M)"
+database_count=5
+table_config="leaf_page_max=4k,internal_page_max=16k,leaf_item_max=1433,internal_item_max=3100,type=file"
+# Likewise, divide original icount by database_count.
+icount=50000
+populate_threads=1
+random_range=100000000
+report_interval=5
+run_time=3000
+threads=((count=1,reads=1),(count=1,inserts=1))
+value_sz=1024
diff --git a/bench/wtperf/runners/test1-500m-lsm.wtperf b/bench/wtperf/runners/test1-500m-lsm.wtperf
index 116641fc165..7f85b8c13e5 100644
--- a/bench/wtperf/runners/test1-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test1-500m-lsm.wtperf
@@ -1,12 +1,13 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
+# wtperf options file: simulate riak and its test1 configuration
 # The configuration for the connection and table are from riak and the
 # specification of the data (count, size, threads) is from basho_bench.
 #
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
 conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
 compact=true
+compression="snappy"
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
 icount=500000000
 key_sz=40
 value_sz=1000
diff --git a/bench/wtperf/runners/test1-50m-lsm.wtperf b/bench/wtperf/runners/test1-50m-lsm.wtperf
index 6551b1565a9..548932f3aee 100644
--- a/bench/wtperf/runners/test1-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test1-50m-lsm.wtperf
@@ -1,12 +1,12 @@
-# wtperf options file: simulate riak and its test1 and test2 configuration
+# wtperf options file: simulate riak and its test1 configuration
 # The configuration for the connection and table are from riak and the
 # specification of the data (count, size, threads) is from basho_bench.
 #
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
 conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
 compact=true
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
 icount=50000000
 key_sz=40
 value_sz=1000
diff --git a/bench/wtperf/runners/test2-500m-lsm.wtperf b/bench/wtperf/runners/test2-500m-lsm.wtperf
index 9b8f225f643..db1ee9d469c 100644
--- a/bench/wtperf/runners/test2-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test2-500m-lsm.wtperf
@@ -3,11 +3,12 @@
 # specification of the data (count, size, threads) is from basho_bench.
 # This test assumes that a test1 populate already completed and exists.
 #
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
 conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
 create=false
-sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+compression="snappy"
+sess_config="isolation=snapshot
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
 key_sz=40
 value_sz=1000
 report_interval=10
diff --git a/bench/wtperf/runners/test2-50m-lsm.wtperf b/bench/wtperf/runners/test2-50m-lsm.wtperf
index 945faade694..c5e77e7e207 100644
--- a/bench/wtperf/runners/test2-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test2-50m-lsm.wtperf
@@ -3,11 +3,11 @@
 # specification of the data (count, size, threads) is from basho_bench.
 # This test assumes that a test1 populate already completed and exists.
 #
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
 conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
 create=false
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
 key_sz=40
 value_sz=1000
 report_interval=10
diff --git a/bench/wtperf/runners/test3-500m-lsm.wtperf b/bench/wtperf/runners/test3-500m-lsm.wtperf
index 2da836d577c..446309c32c8 100644
--- a/bench/wtperf/runners/test3-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test3-500m-lsm.wtperf
@@ -1,13 +1,14 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test3 configuration
 # The configuration for the connection and table are from riak and the
 # specification of the data (count, size, threads) is from basho_bench.
 # This test assumes that a test1 populate already completed and exists.
 #
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
 conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
 create=false
+compression="snappy"
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
 key_sz=40
 value_sz=1000
 pareto=true
diff --git a/bench/wtperf/runners/test3-50m-lsm.wtperf b/bench/wtperf/runners/test3-50m-lsm.wtperf
index bc54cec3496..00291737545 100644
--- a/bench/wtperf/runners/test3-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test3-50m-lsm.wtperf
@@ -1,17 +1,18 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test3 configuration
 # The configuration for the connection and table are from riak and the
 # specification of the data (count, size, threads) is from basho_bench.
 # This test assumes that a test1 populate already completed and exists.
 #
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
 conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
 create=false
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
 key_sz=40
 value_sz=1000
 pareto=true
 report_interval=10
 run_time=1440
 sample_interval=10
-threads=((count=10,reads=1,updates=1))
+#threads=((count=10,reads=1,updates=1))
+threads=((count=5,reads=1),(count=5,updates=1))
diff --git a/bench/wtperf/runners/test4-500m-lsm.wtperf b/bench/wtperf/runners/test4-500m-lsm.wtperf
index 1257f473a7d..a959ec9057e 100644
--- a/bench/wtperf/runners/test4-500m-lsm.wtperf
+++ b/bench/wtperf/runners/test4-500m-lsm.wtperf
@@ -1,13 +1,14 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test4 configuration
 # The configuration for the connection and table are from riak and the
 # specification of the data (count, size, threads) is from basho_bench.
 # This test assumes that a test1 populate already completed and exists.
 #
-#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=60)"
 conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
 create=false
+compression="snappy"
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_max=5GB,chunk_size=100MB,merge_threads=3),type=lsm,leaf_page_max=16K"
 key_sz=40
 value_sz=1000
 report_interval=10
diff --git a/bench/wtperf/runners/test4-50m-lsm.wtperf b/bench/wtperf/runners/test4-50m-lsm.wtperf
index fc7c96c31d3..6536d8a1de7 100644
--- a/bench/wtperf/runners/test4-50m-lsm.wtperf
+++ b/bench/wtperf/runners/test4-50m-lsm.wtperf
@@ -1,13 +1,13 @@
-# wtperf options file: simulate riak and its test2 configuration
+# wtperf options file: simulate riak and its test4 configuration
 # The configuration for the connection and table are from riak and the
 # specification of the data (count, size, threads) is from basho_bench.
 # This test assumes that a test1 populate already completed and exists.
 #
-#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=300)"
+#conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=30)"
 conn_config="cache_size=10G,checkpoint_sync=false,mmap=false,session_max=1024"
 create=false
 sess_config="isolation=snapshot"
-table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
 key_sz=40
 value_sz=1000
 report_interval=10
diff --git a/bench/wtperf/runners/voxer-10k.wtperf b/bench/wtperf/runners/voxer-10k.wtperf
new file mode 100644
index 00000000000..17c46b4ed94
--- /dev/null
+++ b/bench/wtperf/runners/voxer-10k.wtperf
@@ -0,0 +1,19 @@
+# wtperf options file: simulate riak and its test1 and test2 configuration
+# The configuration for the connection and table are from riak and the
+# specification of the data (count, size, threads) is from basho_bench.
+#
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
+compact=true
+compression="snappy"
+sess_config="isolation=snapshot"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
+icount=15000
+key_sz=40
+value_sz=10000
+populate_threads=1
+report_interval=10
+random_value=true
+run_time=3600
+sample_interval=10
+threads=((count=20,read=1,update=1))
diff --git a/bench/wtperf/runners/voxer-130k.wtperf b/bench/wtperf/runners/voxer-130k.wtperf
new file mode 100644
index 00000000000..e81510cf067
--- /dev/null
+++ b/bench/wtperf/runners/voxer-130k.wtperf
@@ -0,0 +1,19 @@
+# wtperf options file: simulate riak and its test1 and test2 configuration
+# The configuration for the connection and table are from riak and the
+# specification of the data (count, size, threads) is from basho_bench.
+#
+#conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024,statistics=(fast,clear),statistics_log=(wait=600)"
+conn_config="cache_size=21G,checkpoint_sync=false,mmap=false,session_max=1024"
+compact=true
+compression="snappy"
+sess_config="isolation=snapshot"
+table_config="internal_page_max=128K,lsm=(bloom_config=(leaf_page_max=8MB),bloom_bit_count=28,bloom_hash_count=19,bloom_oldest=true,chunk_size=100MB,merge_threads=2),type=lsm,leaf_page_max=16K"
+icount=15000
+key_sz=40
+value_sz=130000
+populate_threads=1
+report_interval=10
+random_value=true
+run_time=3600
+sample_interval=10
+threads=((count=20,read=1,update=1))
diff --git a/bench/wtperf/smoke.sh b/bench/wtperf/smoke.sh
index 192d3cd208b..062277d90dc 100755
--- a/bench/wtperf/smoke.sh
+++ b/bench/wtperf/smoke.sh
@@ -1,4 +1,4 @@
 #! /bin/sh
 
 # Smoke-test wtperf as part of running "make check".
-./wtperf -S
+./wtperf -O `dirname $0`/runners/small-lsm.wtperf -o "run_time=20"
diff --git a/bench/wtperf/track.c b/bench/wtperf/track.c
index 12f50dd5411..3919d0eb1ab 100644
--- a/bench/wtperf/track.c
+++ b/bench/wtperf/track.c
@@ -279,10 +279,8 @@ latency_print_single(CONFIG *cfg, TRACK *total, const char *name)
 		return;
 	}
 
-#ifdef __WRITE_A_HEADER
 	fprintf(fp,
-	    "usecs,operations,cumulative-operations,total-operations\n");
-#endif
+	    "#usecs,operations,cumulative-operations,total-operations\n");
 	cumops = 0;
 	for (i = 0; i < ELEMENTS(total->us); ++i) {
 		if (total->us[i] == 0)
diff --git a/bench/wtperf/wtperf.c b/bench/wtperf/wtperf.c
index 5554f2e564e..ab64c00802e 100644
--- a/bench/wtperf/wtperf.c
+++ b/bench/wtperf/wtperf.c
@@ -31,68 +31,36 @@
 static const CONFIG default_cfg = {
 	"WT_TEST",			/* home */
 	"WT_TEST",			/* monitor dir */
-	NULL,				/* uri */
+	NULL,				/* base_uri */
+	NULL,				/* uris */
+	NULL,				/* helium_mount */
 	NULL,				/* conn */
 	NULL,				/* logf */
+	NULL, NULL,			/* compressor ext, blk */
 	NULL, NULL,			/* populate, checkpoint threads */
 
 	NULL,				/* worker threads */
 	0,				/* worker thread count */
 	NULL,				/* workloads */
 	0,				/* workload count */
+	0,				/* checkpoint operations */
+	0,				/* insert operations */
+	0,				/* read operations */
+	0,				/* update operations */
+	0,				/* insert key */
+	0,				/* checkpoint in progress */
+	0,				/* thread error */
+	0,				/* notify threads to stop */
+	0,				/* total seconds running */
 
 #define	OPT_DEFINE_DEFAULT
 #include "wtperf_opt.i"
 #undef OPT_DEFINE_DEFAULT
 };
 
-static const char * const small_config_str =
-    "conn_config=\"cache_size=500MB\","
-    "table_config=\"lsm=(chunk_size=5MB)\","
-    "icount=500000,"
-    "value_sz=100,"
-    "key_sz=20,"
-    "report_interval=5,"
-    "run_time=20,"
-    "populate_threads=1,"
-    "threads=((count=8,read=1)),";
-
-static const char * const med_config_str =
-    "conn_config=\"cache_size=1GB\","
-    "table_config=\"lsm=(chunk_size=20MB)\","
-    "icount=50000000,"
-    "value_sz=100,"
-    "key_sz=20,"
-    "report_interval=5,"
-    "run_time=100,"
-    "populate_threads=1,"
-    "threads=((count=16,read=1)),";
-
-static const char * const large_config_str =
-    "conn_config=\"cache_size=2GB\","
-    "table_config=\"lsm=(chunk_size=50MB)\","
-    "icount=500000000,"
-    "value_sz=100,"
-    "key_sz=20,"
-    "report_interval=5,"
-    "run_time=600,"
-    "populate_threads=1,"
-    "threads=((count=16,read=1)),";
-
 static const char * const debug_cconfig = "verbose=[lsm]";
 static const char * const debug_tconfig = "";
 
-static uint64_t g_ckpt_ops;		/* checkpoint operations */
-static uint64_t g_insert_ops;		/* insert operations */
-static uint64_t g_read_ops;		/* read operations */
-static uint64_t g_update_ops;		/* update operations */
-
-static uint64_t g_insert_key;		/* insert key */
-
-static volatile int g_ckpt;		/* checkpoint in progress */
-static volatile int g_error;		/* thread error */
-static volatile int g_stop;		/* notify threads to stop */
-
 /*
  * Atomic update where needed.
  */
@@ -103,15 +71,20 @@ static volatile int g_stop;		/* notify threads to stop */
 #endif
 
 static void	*checkpoint_worker(void *);
+static int	 create_tables(CONFIG *);
+static int	 create_uris(CONFIG *);
 static int	 execute_populate(CONFIG *);
 static int	 execute_workload(CONFIG *);
 static int	 find_table_count(CONFIG *);
 static void	*monitor(void *);
 static void	*populate_thread(void *);
-static void	randomize_value(CONFIG *, char *);
+static void	 randomize_value(CONFIG *, char *);
+static int	 start_all_runs(CONFIG *);
+static int	 start_run(CONFIG *);
 static int	 start_threads(CONFIG *,
 		    WORKLOAD *, CONFIG_THREAD *, u_int, void *(*)(void *));
 static int	 stop_threads(CONFIG *, u_int, CONFIG_THREAD *);
+static void	*thread_run_wtperf(void *);
 static void	*worker(void *);
 static uint64_t	 wtperf_rand(CONFIG *);
 static uint64_t	 wtperf_value_range(CONFIG *);
@@ -130,9 +103,9 @@ extern uint32_t	__wt_random(void);
 
 /* Retrieve an ID for the next insert operation. */
 static inline uint64_t
-get_next_incr(void)
+get_next_incr(CONFIG *cfg)
 {
-	return (ATOMIC_ADD(g_insert_key, 1));
+	return (ATOMIC_ADD(cfg->insert_key, 1));
 }
 
 static void
@@ -219,16 +192,18 @@ worker(void *arg)
 	CONFIG_THREAD *thread;
 	TRACK *trk;
 	WT_CONNECTION *conn;
-	WT_CURSOR *cursor;
+	WT_CURSOR **cursors, *cursor;
 	WT_SESSION *session;
+	size_t i;
 	uint64_t next_val, usecs;
-	int measure_latency, ret;
 	uint8_t *op, *op_end;
+	int measure_latency, ret;
 	char *value_buf, *key_buf, *value;
 
 	thread = (CONFIG_THREAD *)arg;
 	cfg = thread->cfg;
 	conn = cfg->conn;
+	cursors = NULL;
 	session = NULL;
 	trk = NULL;
 
@@ -237,12 +212,22 @@ worker(void *arg)
 		lprintf(cfg, ret, 0, "worker: WT_CONNECTION.open_session");
 		goto err;
 	}
-	if ((ret = session->open_cursor(
-	    session, cfg->uri, NULL, NULL, &cursor)) != 0) {
-		lprintf(cfg,
-		    ret, 0, "worker: WT_SESSION.open_cursor: %s", cfg->uri);
+	cursors = (WT_CURSOR **)calloc(
+	    cfg->table_count, sizeof(WT_CURSOR *));
+	if (cursors == NULL) {
+		lprintf(cfg, ENOMEM, 0,
+		    "worker: couldn't allocate cursor array");
 		goto err;
 	}
+	for (i = 0; i < cfg->table_count; i++) {
+		if ((ret = session->open_cursor(session,
+		    cfg->uris[i], NULL, NULL, &cursors[i])) != 0) {
+			lprintf(cfg, ret, 0,
+			    "worker: WT_SESSION.open_cursor: %s",
+			    cfg->uris[i]);
+			goto err;
+		}
+	}
 
 	key_buf = thread->key_buf;
 	value_buf = thread->value_buf;
@@ -250,7 +235,7 @@ worker(void *arg)
 	op = thread->workload->ops;
 	op_end = op + sizeof(thread->workload->ops);
 
-	while (!g_stop) {
+	while (!cfg->stop) {
 		/*
 		 * Generate the next key and setup operation specific
 		 * statistics tracking objects.
@@ -262,7 +247,7 @@ worker(void *arg)
 			if (cfg->random_range)
 				next_val = wtperf_rand(cfg);
 			else
-				next_val = cfg->icount + get_next_incr();
+				next_val = cfg->icount + get_next_incr(cfg);
 			break;
 		case WORKER_READ:
 			trk = &thread->read;
@@ -285,7 +270,18 @@ worker(void *arg)
 		}
 
 		sprintf(key_buf, "%0*" PRIu64, cfg->key_sz, next_val);
-		measure_latency = cfg->sample_interval != 0 && (
+
+		/*
+		 * Spread the data out around the multiple databases.
+		 */
+		cursor = cursors[next_val % cfg->table_count];
+
+		/*
+		 * Skip the first time we do an operation, when trk->ops
+		 * is 0, to avoid first time latency spikes.
+		 */
+		measure_latency =
+		    cfg->sample_interval != 0 && trk->ops != 0 && (
 		    trk->ops % cfg->sample_rate == 0);
 		if (measure_latency &&
 		    (ret = __wt_epoch(NULL, &start)) != 0) {
@@ -385,8 +381,10 @@ op_err:			lprintf(cfg, ret, 0,
 
 	/* Notify our caller we failed and shut the system down. */
 	if (0) {
-err:		g_error = g_stop = 1;
+err:		cfg->error = cfg->stop = 1;
 	}
+	if (cursors != NULL)
+		free(cursors);
 
 	return (NULL);
 }
@@ -502,17 +500,20 @@ populate_thread(void *arg)
 	CONFIG_THREAD *thread;
 	TRACK *trk;
 	WT_CONNECTION *conn;
-	WT_CURSOR *cursor;
+	WT_CURSOR **cursors, *cursor;
 	WT_SESSION *session;
-	uint32_t opcount;
+	size_t i;
 	uint64_t op, usecs;
+	uint32_t opcount;
 	int intxn, measure_latency, ret;
 	char *value_buf, *key_buf;
+	const char *cursor_config;
 
 	thread = (CONFIG_THREAD *)arg;
 	cfg = thread->cfg;
 	conn = cfg->conn;
 	session = NULL;
+	cursors = NULL;
 	ret = 0;
 	trk = &thread->insert;
 
@@ -525,17 +526,30 @@ populate_thread(void *arg)
 		goto err;
 	}
 
-	/* Do a bulk load if populate is single-threaded. */
-	if ((ret = session->open_cursor(session, cfg->uri, NULL,
-	    cfg->populate_threads == 1 ? "bulk" : NULL, &cursor)) != 0) {
-		lprintf(cfg,
-		    ret, 0, "populate: WT_SESSION.open_cursor: %s", cfg->uri);
+	/* Do bulk loads if populate is single-threaded. */
+	cursor_config = cfg->populate_threads == 1 ? "bulk" : NULL;
+	/* Create the cursors. */
+	cursors = (WT_CURSOR **)calloc(
+	    cfg->table_count, sizeof(WT_CURSOR *));
+	if (cursors == NULL) {
+		lprintf(cfg, ENOMEM, 0,
+		    "worker: couldn't allocate cursor array");
 		goto err;
 	}
+	for (i = 0; i < cfg->table_count; i++) {
+		if ((ret = session->open_cursor(
+		    session, cfg->uris[i], NULL,
+		    cursor_config, &cursors[i])) != 0) {
+			lprintf(cfg, ret, 0,
+			    "populate: WT_SESSION.open_cursor: %s",
+			    cfg->uris[i]);
+			goto err;
+		}
+	}
 
-	/* Populate the database. */
+	/* Populate the databases. */
 	for (intxn = 0, opcount = 0;;) {
-		op = get_next_incr();
+		op = get_next_incr(cfg);
 		if (op > cfg->icount)
 			break;
 
@@ -548,8 +562,13 @@ populate_thread(void *arg)
 			}
 			intxn = 1;
 		}
+		/*
+		 * Figure out which table this op belongs to.
+		 */
+		cursor = cursors[op % cfg->table_count];
 		sprintf(key_buf, "%0*" PRIu64, cfg->key_sz, op);
-		measure_latency = cfg->sample_interval != 0 && (
+		measure_latency = 
+		    cfg->sample_interval != 0 && trk->ops != 0 && (
 		    trk->ops % cfg->sample_rate == 0);
 		if (measure_latency &&
 		    (ret = __wt_epoch(NULL, &start)) != 0) {
@@ -564,7 +583,12 @@ populate_thread(void *arg)
 			lprintf(cfg, ret, 0, "Failed inserting");
 			goto err;
 		}
-		/* Gather statistics */
+		/*
+		 * Gather statistics.
+		 * We measure the latency of inserting a single key.  If there
+		 * are multiple tables, it is the time for insertion into all
+		 * of them.
+		 */
 		if (measure_latency) {
 			if ((ret = __wt_epoch(NULL, &stop)) != 0) {
 				lprintf(cfg, ret, 0,
@@ -601,8 +625,10 @@ populate_thread(void *arg)
 
 	/* Notify our caller we failed and shut the system down. */
 	if (0) {
-err:		g_error = g_stop = 1;
+err:		cfg->error = cfg->stop = 1;
 	}
+	if (cursors != NULL)
+		free(cursors);
 
 	return (NULL);
 }
@@ -614,16 +640,16 @@ monitor(void *arg)
 	struct tm *tm, _tm;
 	CONFIG *cfg;
 	FILE *fp;
-	char buf[64], *path;
-	int ret;
+	size_t len;
 	uint64_t reads, inserts, updates;
 	uint64_t cur_reads, cur_inserts, cur_updates;
 	uint64_t last_reads, last_inserts, last_updates;
 	uint32_t read_avg, read_min, read_max;
 	uint32_t insert_avg, insert_min, insert_max;
 	uint32_t update_avg, update_min, update_max;
-	size_t len;
 	u_int i;
+	int ret;
+	char buf[64], *path;
 
 	cfg = (CONFIG *)arg;
 	assert(cfg->sample_interval != 0);
@@ -643,28 +669,32 @@ monitor(void *arg)
 	}
 	/* Set line buffering for monitor file. */
 	(void)setvbuf(fp, NULL, _IOLBF, 0);
-#ifdef __WRITE_A_HEADER
 	fprintf(fp,
 	    "#time,"
-	    "read operations,insert operations,update operations,"
+	    "totalsec,"
+	    "read ops per second,"
+	    "insert ops per second,"
+	    "update ops per second,"
 	    "checkpoints,"
-	    "read average latency(NS),read minimum latency(NS),"
-	    "read maximum latency(NS),"
-	    "insert average latency(NS),insert min latency(NS),"
-	    "insert maximum latency(NS),"
-	    "update average latency(NS),update min latency(NS),"
-	    "update maximum latency(NS)"
+	    "read average latency(uS),"
+	    "read minimum latency(uS),"
+	    "read maximum latency(uS),"
+	    "insert average latency(uS),"
+	    "insert min latency(uS),"
+	    "insert maximum latency(uS),"
+	    "update average latency(uS),"
+	    "update min latency(uS),"
+	    "update maximum latency(uS)"
 	    "\n");
-#endif
 	last_reads = last_inserts = last_updates = 0;
-	while (!g_stop) {
+	while (!cfg->stop) {
 		for (i = 0; i < cfg->sample_interval; i++) {
 			sleep(1);
-			if (g_stop)
+			if (cfg->stop)
 				break;
 		}
 		/* If the workers are done, don't bother with a final call. */
-		if (g_stop)
+		if (cfg->stop)
 			break;
 
 		if ((ret = __wt_epoch(NULL, &t)) != 0) {
@@ -694,18 +724,18 @@ monitor(void *arg)
 			cur_inserts = inserts - last_inserts;
 
 		(void)fprintf(fp,
-		    "%s"
+		    "%s,%" PRIu32
 		    ",%" PRIu64 ",%" PRIu64 ",%" PRIu64
 		    ",%c"
 		    ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
 		    ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
 		    ",%" PRIu32 ",%" PRIu32 ",%" PRIu32
 		    "\n",
-		    buf,
+		    buf, cfg->totalsec,
 		    cur_reads / cfg->sample_interval,
 		    cur_inserts / cfg->sample_interval,
 		    cur_updates / cfg->sample_interval,
-		    g_ckpt ? 'Y' : 'N',
+		    cfg->ckpt ? 'Y' : 'N',
 		    read_avg, read_min, read_max,
 		    insert_avg, insert_min, insert_max,
 		    update_avg, update_min, update_max);
@@ -717,7 +747,7 @@ monitor(void *arg)
 
 	/* Notify our caller we failed and shut the system down. */
 	if (0) {
-err:		g_error = g_stop = 1;
+err:		cfg->error = cfg->stop = 1;
 	}
 
 	if (fp != NULL)
@@ -750,27 +780,27 @@ checkpoint_worker(void *arg)
 		goto err;
 	}
 
-	while (!g_stop) {
+	while (!cfg->stop) {
 		/* Break the sleep up, so we notice interrupts faster. */
 		for (i = 0; i < cfg->checkpoint_interval; i++) {
 			sleep(1);
-			if (g_stop)
+			if (cfg->stop)
 				break;
 		}
 		/* If the workers are done, don't bother with a final call. */
-		if (g_stop)
+		if (cfg->stop)
 			break;
 
 		if ((ret = __wt_epoch(NULL, &s)) != 0) {
 			lprintf(cfg, ret, 0, "Get time failed in checkpoint.");
 			goto err;
 		}
-		g_ckpt = 1;
+		cfg->ckpt = 1;
 		if ((ret = session->checkpoint(session, NULL)) != 0) {
 			lprintf(cfg, ret, 0, "Checkpoint failed.");
 			goto err;
 		}
-		g_ckpt = 0;
+		cfg->ckpt = 0;
 		++thread->ckpt.ops;
 
 		if ((ret = __wt_epoch(NULL, &e)) != 0) {
@@ -788,7 +818,7 @@ checkpoint_worker(void *arg)
 
 	/* Notify our caller we failed and shut the system down. */
 	if (0) {
-err:		g_error = g_stop = 1;
+err:		cfg->error = cfg->stop = 1;
 	}
 
 	return (NULL);
@@ -797,13 +827,14 @@ err:		g_error = g_stop = 1;
 static int
 execute_populate(CONFIG *cfg)
 {
+	struct timespec start, stop;
 	CONFIG_THREAD *popth;
 	WT_SESSION *session;
-	struct timespec start, stop;
 	double secs;
+	size_t i;
 	uint64_t last_ops;
-	uint32_t interval, total;
-	int elapsed, ret;
+	uint32_t interval;
+	int elapsed, ret, t_ret;
 
 	session = NULL;
 	lprintf(cfg, 0, 1,
@@ -816,14 +847,14 @@ execute_populate(CONFIG *cfg)
 	    cfg->popthreads, cfg->populate_threads, populate_thread)) != 0)
 		return (ret);
 
-	g_insert_key = 0;
+	cfg->insert_key = 0;
 
 	if ((ret = __wt_epoch(NULL, &start)) != 0) {
 		lprintf(cfg, ret, 0, "Get time failed in populate.");
 		return (ret);
 	}
-	for (elapsed = 0, interval = 0, last_ops = 0, total = 0;
-	    g_insert_key < cfg->icount && g_error == 0;) {
+	for (elapsed = 0, interval = 0, last_ops = 0;
+	    cfg->insert_key < cfg->icount && cfg->error == 0;) {
 		/*
 		 * Sleep for 100th of a second, report_interval is in second
 		 * granularity, each 100th increment of elapsed is a single
@@ -836,14 +867,14 @@ execute_populate(CONFIG *cfg)
 		if (++interval < cfg->report_interval)
 			continue;
 		interval = 0;
-		total += cfg->report_interval;
-		g_insert_ops = sum_pop_ops(cfg);
+		cfg->totalsec += cfg->report_interval;
+		cfg->insert_ops = sum_pop_ops(cfg);
 		lprintf(cfg, 0, 1,
 		    "%" PRIu64 " populate inserts (%" PRIu64 " of %"
 		    PRIu32 ") in %" PRIu32 " secs (%" PRIu32 " total secs)",
-		    g_insert_ops - last_ops, g_insert_ops,
-		    cfg->icount, cfg->report_interval, total);
-		last_ops = g_insert_ops;
+		    cfg->insert_ops - last_ops, cfg->insert_ops,
+		    cfg->icount, cfg->report_interval, cfg->totalsec);
+		last_ops = cfg->insert_ops;
 	}
 	if ((ret = __wt_epoch(NULL, &stop)) != 0) {
 		lprintf(cfg, ret, 0, "Get time failed in populate.");
@@ -864,7 +895,7 @@ execute_populate(CONFIG *cfg)
 		return (ret);
 
 	/* Report if any worker threads didn't finish. */
-	if (g_error != 0) {
+	if (cfg->error != 0) {
 		lprintf(cfg, WT_ERROR, 0,
 		    "Populate thread(s) exited without finishing.");
 		return (WT_ERROR);
@@ -879,7 +910,9 @@ execute_populate(CONFIG *cfg)
 	    "Load time: %.2f\n" "load ops/sec: %.2f", secs, cfg->icount / secs);
 
 	/*
-	 * If configured, compact to allow LSM merging to complete.
+	 * If configured, compact to allow LSM merging to complete.  We
+	 * set an unlimited timeout because if we close the connection
+	 * then any in-progress compact/merge is aborted.
 	 */
 	if (cfg->compact) {
 		if ((ret = cfg->conn->open_session(
@@ -889,16 +922,34 @@ execute_populate(CONFIG *cfg)
 			return (ret);
 		}
 		lprintf(cfg, 0, 1, "Compact after populate");
-		if ((ret = session->compact(session, cfg->uri, NULL)) != 0) {
-			lprintf(cfg, ret, 0,
-			     "execute_populate: WT_SESSION.compact");
-			return (ret);
+		if ((ret = __wt_epoch(NULL, &start)) != 0) {
+			lprintf(cfg, ret, 0, "Get time failed in populate.");
+			goto err;
 		}
-		if ((ret = session->close(session, NULL)) != 0) {
-			lprintf(cfg, ret, 0,
+		/*
+		 * We measure how long it takes to compact all tables for this
+		 * workload.
+		 */
+		for (i = 0; i < cfg->table_count; i++)
+			if ((ret = session->compact(
+			    session, cfg->uris[i], "timeout=0")) != 0) {
+				lprintf(cfg, ret, 0,
+				     "execute_populate: WT_SESSION.compact");
+				goto err;
+			}
+		if ((ret = __wt_epoch(NULL, &stop)) != 0) {
+			lprintf(cfg, ret, 0, "Get time failed in populate.");
+			goto err;
+		}
+		secs = stop.tv_sec + stop.tv_nsec / (double)BILLION;
+		secs -= start.tv_sec + start.tv_nsec / (double)BILLION;
+		lprintf(cfg, 0, 1, "Compact completed in %.2f seconds", secs);
+err:
+		if ((t_ret = session->close(session, NULL)) != 0)
+			lprintf(cfg, t_ret, 0,
 			     "execute_populate: WT_SESSION.close");
+		if (ret != 0 || (ret = t_ret) != 0)
 			return (ret);
-		}
 	}
 
 	/*
@@ -926,12 +977,12 @@ execute_workload(CONFIG *cfg)
 	CONFIG_THREAD *threads;
 	WORKLOAD *workp;
 	uint64_t last_ckpts, last_inserts, last_reads, last_updates;
-	uint32_t interval, run_ops, run_time, total;
+	uint32_t interval, run_ops, run_time;
 	u_int i;
 	int ret, t_ret;
 
-	g_insert_key = 0;
-	g_insert_ops = g_read_ops = g_update_ops = 0;
+	cfg->insert_key = 0;
+	cfg->insert_ops = cfg->read_ops = cfg->update_ops = 0;
 
 	last_ckpts = last_inserts = last_reads = last_updates = 0;
 	ret = 0;
@@ -963,8 +1014,8 @@ execute_workload(CONFIG *cfg)
 		threads += workp->threads;
 	}
 
-	for (interval = cfg->report_interval, total = 0,
-	    run_time = cfg->run_time, run_ops = cfg->run_ops; g_error == 0;) {
+	for (interval = cfg->report_interval, run_time = cfg->run_time,
+	    run_ops = cfg->run_ops; cfg->error == 0;) {
 		/*
 		 * Sleep for one second at a time.
 		 * If we are tracking run time, check to see if we're done, and
@@ -979,46 +1030,46 @@ execute_workload(CONFIG *cfg)
 		}
 
 		/* Sum the operations we've done. */
-		g_ckpt_ops = sum_ckpt_ops(cfg);
-		g_insert_ops = sum_insert_ops(cfg);
-		g_read_ops = sum_read_ops(cfg);
-		g_update_ops = sum_update_ops(cfg);
+		cfg->ckpt_ops = sum_ckpt_ops(cfg);
+		cfg->insert_ops = sum_insert_ops(cfg);
+		cfg->read_ops = sum_read_ops(cfg);
+		cfg->update_ops = sum_update_ops(cfg);
 
 		/* If we're checking total operations, see if we're done. */
-		if (run_ops != 0 &&
-		    run_ops <= g_insert_ops + g_read_ops + g_update_ops)
+		if (run_ops != 0 && run_ops <=
+		    cfg->insert_ops + cfg->read_ops + cfg->update_ops)
 			break;
 
 		/* If writing out throughput information, see if it's time. */
 		if (interval == 0 || --interval > 0)
 			continue;
 		interval = cfg->report_interval;
-		total += cfg->report_interval;
+		cfg->totalsec += cfg->report_interval;
 
 		lprintf(cfg, 0, 1,
 		    "%" PRIu64 " reads, %" PRIu64 " inserts, %" PRIu64
 		    " updates, %" PRIu64 " checkpoints in %" PRIu32
 		    " secs (%" PRIu32 " total secs)",
-		    g_read_ops - last_reads,
-		    g_insert_ops - last_inserts,
-		    g_update_ops - last_updates,
-		    g_ckpt_ops - last_ckpts,
-		    cfg->report_interval, total);
-		last_reads = g_read_ops;
-		last_inserts = g_insert_ops;
-		last_updates = g_update_ops;
-		last_ckpts = g_ckpt_ops;
+		    cfg->read_ops - last_reads,
+		    cfg->insert_ops - last_inserts,
+		    cfg->update_ops - last_updates,
+		    cfg->ckpt_ops - last_ckpts,
+		    cfg->report_interval, cfg->totalsec);
+		last_reads = cfg->read_ops;
+		last_inserts = cfg->insert_ops;
+		last_updates = cfg->update_ops;
+		last_ckpts = cfg->ckpt_ops;
 	}
 
 	/* Notify the worker threads they are done. */
-err:	g_stop = 1;
+err:	cfg->stop = 1;
 
 	if ((t_ret = stop_threads(
 	    cfg, (u_int)cfg->workers_cnt, cfg->workers)) != 0 && ret == 0)
 		ret = t_ret;
 
 	/* Report if any worker threads didn't finish. */
-	if (g_error != 0) {
+	if (cfg->error != 0) {
 		lprintf(cfg, WT_ERROR, 0,
 		    "Worker thread(s) exited without finishing.");
 		if (ret == 0)
@@ -1037,235 +1088,249 @@ find_table_count(CONFIG *cfg)
 	WT_CONNECTION *conn;
 	WT_CURSOR *cursor;
 	WT_SESSION *session;
-	char *key;
+	uint32_t i, max_icount, table_icount;
 	int ret, t_ret;
+	char *key;
 
 	conn = cfg->conn;
 
+	max_icount = 0;
 	if ((ret = conn->open_session(
 	    conn, NULL, cfg->sess_config, &session)) != 0) {
 		lprintf(cfg, ret, 0,
-		    "open_session failed finding existing table count");
-		goto err;
-	}
-	if ((ret = session->open_cursor(session, cfg->uri,
-	    NULL, NULL, &cursor)) != 0) {
-		lprintf(cfg, ret, 0,
-		    "open_cursor failed finding existing table count");
-		goto err;
-	}
-	if ((ret = cursor->prev(cursor)) != 0) {
-		lprintf(cfg, ret, 0,
-		    "cursor prev failed finding existing table count");
-		goto err;
-	}
-	if ((ret = cursor->get_key(cursor, &key)) != 0) {
-		lprintf(cfg, ret, 0,
-		    "cursor get_key failed finding existing table count");
-		goto err;
+		    "find_table_count: open_session failed");
+		goto out;
 	}
-	cfg->icount = (uint32_t)atoi(key);
+	for (i = 0; i < cfg->table_count; i++) {
+		if ((ret = session->open_cursor(session, cfg->uris[i],
+		    NULL, NULL, &cursor)) != 0) {
+			lprintf(cfg, ret, 0,
+			    "find_table_count: open_cursor failed");
+			goto err;
+		}
+		if ((ret = cursor->prev(cursor)) != 0) {
+			lprintf(cfg, ret, 0,
+			    "find_table_count: cursor prev failed");
+			goto err;
+		}
+		if ((ret = cursor->get_key(cursor, &key)) != 0) {
+			lprintf(cfg, ret, 0,
+			    "find_table_count: cursor get_key failed");
+			goto err;
+		}
+		table_icount = (uint32_t)atoi(key);
+		if (table_icount > max_icount)
+			max_icount = table_icount;
 
+		if ((ret = cursor->close(cursor)) != 0) {
+			lprintf(cfg, ret, 0,
+			    "find_table_count: cursor close failed");
+			goto err;
+		}
+	}
 err:	if ((t_ret = session->close(session, NULL)) != 0) {
 		if (ret == 0)
 			ret = t_ret;
 		lprintf(cfg, ret, 0,
-		    "session close failed finding existing table count");
+		    "find_table_count: session close failed");
 	}
-	return (ret);
+	cfg->icount = max_icount;
+out:	return (ret);
 }
 
-int
-main(int argc, char *argv[])
+/*
+ * Populate the uri array if more than one table is being used.
+ */
+static int
+create_uris(CONFIG *cfg)
 {
-	CONFIG *cfg, _cfg;
-	WT_SESSION *session;
-	pthread_t monitor_thread;
-	size_t len;
-	uint64_t req_len, total_ops;
-	int ch, monitor_created, monitor_set, ret, t_ret;
-	const char *helium_mount;
-	const char *opts = "C:H:h:LMm:O:o:ST:";
-	const char *wtperftmp_subdir = "wtperftmp";
-	const char *user_cconfig, *user_tconfig;
-	char *cmd, *cc_buf, *tc_buf, *tmphome;
-
-	session = NULL;
-	monitor_created = monitor_set = ret = 0;
-	helium_mount = user_cconfig = user_tconfig = NULL;
-	cmd = cc_buf = tc_buf = tmphome = NULL;
+	size_t base_uri_len;
+	uint32_t i;
+	int ret;
+	char *uri;
 
-	/* Setup the default configuration values. */
-	cfg = &_cfg;
-	memset(cfg, 0, sizeof(*cfg));
-	if (config_assign(cfg, &default_cfg))
+	ret = 0;
+	base_uri_len = strlen(cfg->base_uri);
+	cfg->uris = (char **)calloc(cfg->table_count, sizeof(char *));
+	if (cfg->uris == NULL) {
+		ret = ENOMEM;
 		goto err;
-
-	/* Do a basic validation of options, and home is needed before open. */
-	while ((ch = getopt(argc, argv, opts)) != EOF)
-		switch (ch) {
-		case 'h':
-			cfg->home = optarg;
-			break;
-		case 'm':
-			cfg->monitor_dir = optarg;
-			monitor_set = 1;
-			break;
-		case '?':
-			fprintf(stderr, "Invalid option\n");
-			usage();
-			goto einval;
+	}
+	for (i = 0; i < cfg->table_count; i++) {
+		uri = cfg->uris[i] = (char *)calloc(base_uri_len + 3, 1);
+		if (uri == NULL) {
+			ret = ENOMEM;
+			goto err;
 		}
+		memcpy(uri, cfg->base_uri, base_uri_len);
+		/*
+		 * If there is only one table, just use base name.
+		 */
+		if (cfg->table_count > 1) {
+			uri[base_uri_len] = uri[base_uri_len + 1] = '0';
+			uri[base_uri_len] = '0' + (i / 10);
+			uri[base_uri_len + 1] = '0' + (i % 10);
+		}
+	}
+err:	if (ret != 0 && cfg->uris != NULL) {
+		for (i = 0; i < cfg->table_count; i++)
+			free(cfg->uris[i]);
+		free(cfg->uris);
+		cfg->uris = NULL;
+	}
+	return (ret);
+}
 
-	/*
-	 * If the user did not specify a monitor directory
-	 * then set the monitor directory to the home dir.
-	 */
-	if (!monitor_set)
-		cfg->monitor_dir = cfg->home;
+static int
+create_tables(CONFIG *cfg)
+{
+	WT_SESSION *session;
+	size_t i;
+	int ret;
+	char *uri;
 
-	/*
-	 * Create a temporary directory underneath the test directory in which
-	 * we do an initial WiredTiger open, because we need a connection in
-	 * order to use the extension configuration parser.  We will open the
-	 * real WiredTiger database after parsing the options.
-	 */
-	len = strlen(cfg->home) + strlen(wtperftmp_subdir) + 2;
-	if ((tmphome = malloc(len)) == NULL) {
-		ret = enomem(cfg);
-		goto err;
-	}
-	snprintf(tmphome, len, "%s/%s", cfg->home, wtperftmp_subdir);
-	len = len * 2 + 100;
-	if ((cmd = malloc(len)) == NULL) {
-		ret = enomem(cfg);
-		goto err;
+	session = NULL;
+	if (cfg->create == 0)
+		return (0);
+
+	uri = cfg->base_uri;
+	if ((ret = cfg->conn->open_session(
+	    cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
+		lprintf(cfg, ret, 0,
+		    "Error opening a session on %s", cfg->home);
+		return (ret);
 	}
-	snprintf(cmd, len, "rm -rf %s && mkdir %s", tmphome, tmphome);
-	if (system(cmd) != 0) {
-		fprintf(stderr, "%s: failed\n", cmd);
-		goto einval;
+	for (i = 0; i < cfg->table_count; i++) {
+		uri = cfg->uris[i];
+		if ((ret = session->create(
+		    session, uri, cfg->table_config)) != 0) {
+			lprintf(cfg, ret, 0,
+			    "Error creating table %s", cfg->uris[i]);
+			return (ret);
+		}
 	}
-	if ((ret = wiredtiger_open(
-	    tmphome, NULL, "create", &cfg->conn)) != 0) {
-		lprintf(cfg, ret, 0, "wiredtiger_open: %s", tmphome);
-		goto err;
+	if ((ret = session->close(session, NULL)) != 0) {
+		lprintf(cfg,
+		    ret, 0, "Error closing session");
+		return (ret);
 	}
+	return (ret);
+}
 
-	/*
-	 * Then parse different config structures - other options override
-	 * fields within the structure.
-	 */
-	optind = 1;
-	while ((ch = getopt(argc, argv, opts)) != EOF)
-		switch (ch) {
-		case 'S':
-			if (config_opt_line(cfg, small_config_str) != 0)
-				goto einval;
-			break;
-		case 'M':
-			if (config_opt_line(cfg, med_config_str) != 0)
-				goto einval;
-			break;
-		case 'L':
-			if (config_opt_line(cfg, large_config_str) != 0)
-				goto einval;
-			break;
-		case 'O':
-			if (config_opt_file(cfg, optarg) != 0)
-				goto einval;
-			break;
-		default:
-			/* Validation done previously. */
-			break;
-		}
+static int
+start_all_runs(CONFIG *cfg)
+{
+	CONFIG *next_cfg, **configs;
+	pthread_t *threads;
+	size_t cmd_len, home_len, i;
+	int ret, t_ret;
+	char *cmd_buf, *new_home;
 
-	/* Parse other options */
-	optind = 1;
-	while ((ch = getopt(argc, argv, opts)) != EOF)
-		switch (ch) {
-		case 'C':
-			user_cconfig = optarg;
-			break;
-		case 'H':
-			helium_mount = optarg;
-			break;
-		case 'o':
-			/* Allow -o key=value */
-			if (config_opt_line(cfg, optarg) != 0)
-				goto einval;
-			break;
-		case 'T':
-			user_tconfig = optarg;
-			break;
-		}
+	ret = 0;
+	configs = NULL;
+	cmd_buf = NULL;
 
-	/* Build the URI from the table name. */
-	req_len = strlen("table:") +
-	    strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
-	if ((cfg->uri = calloc(req_len, 1)) == NULL) {
-		ret = enomem(cfg);
+	if (cfg->database_count == 1)
+		return (start_run(cfg));
+
+	/* Allocate an array to hold our config struct copies. */
+	configs = calloc(cfg->database_count, sizeof(CONFIG *));
+	if (configs == NULL)
+		return (ENOMEM);
+
+	/* Allocate an array to hold our thread IDs. */
+	threads = calloc(cfg->database_count, sizeof(pthread_t));
+	if (threads == NULL) {
+		ret = ENOMEM;
 		goto err;
 	}
-	snprintf(cfg->uri, req_len, "table:%s%s%s",
-	    helium_mount == NULL ? "" : HELIUM_NAME,
-	    helium_mount == NULL ? "" : "/",
-	    cfg->table_name);
-	
-	if ((ret = setup_log_file(cfg)) != 0)
-		goto err;
 
-	/* Make stdout line buffered, so verbose output appears quickly. */
-	(void)setvbuf(stdout, NULL, _IOLBF, 0);
+	home_len = strlen(cfg->home);
+	cmd_len = (home_len * 2) + 30; /* Add some slop. */
+	cmd_buf = calloc(cmd_len, 1);
+	if (cmd_buf == NULL) {
+		ret = ENOMEM;
+		goto err;
+	}
+	for (i = 0; i < cfg->database_count; i++) {
+		next_cfg = calloc(1, sizeof(CONFIG));
+		if ((ret = config_assign(next_cfg, cfg)) != 0)
+			goto err;
 
-	/* Concatenate non-default configuration strings. */
-	if (cfg->verbose > 1 || user_cconfig != NULL) {
-		req_len = strlen(cfg->conn_config) + strlen(debug_cconfig) + 3;
-		if (user_cconfig != NULL)
-			req_len += strlen(user_cconfig);
-		if ((cc_buf = calloc(req_len, 1)) == NULL) {
-			ret = enomem(cfg);
+		/* Setup a unique home directory for each database. */
+		configs[i] = next_cfg;
+		new_home = malloc(home_len + 5);
+		if (new_home == NULL) {
+			ret = ENOMEM;
 			goto err;
 		}
-		snprintf(cc_buf, req_len, "%s%s%s%s%s",
-		    cfg->conn_config,
-		    cfg->verbose > 1 ? "," : "",
-		    cfg->verbose > 1 ? debug_cconfig : "",
-		    user_cconfig ? "," : "", user_cconfig ? user_cconfig : "");
-		if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
-			goto err;
-	}
-	if (cfg->verbose > 1 || helium_mount != NULL || user_tconfig != NULL) {
-		req_len = strlen(cfg->table_config) +
-		    strlen(HELIUM_CONFIG) + strlen(debug_tconfig) + 3;
-		if (user_tconfig != NULL)
-			req_len += strlen(user_tconfig);
-		if ((tc_buf = calloc(req_len, 1)) == NULL) {
-			ret = enomem(cfg);
+		sprintf(new_home, "%s/D%02d", cfg->home, (int)i);
+		next_cfg->home = (const char *)new_home;
+
+		/* If the monitor dir is default, update it too. */
+		if (strcmp(cfg->monitor_dir, cfg->home) == 0)
+			next_cfg->monitor_dir = new_home;
+
+		/* Create clean home directories. */
+		snprintf(cmd_buf, cmd_len, "rm -rf %s && mkdir %s",
+		    next_cfg->home, next_cfg->home);
+		if ((ret = system(cmd_buf)) != 0) {
+			fprintf(stderr, "%s: failed\n", cmd_buf);
 			goto err;
 		}
-		snprintf(tc_buf, req_len, "%s%s%s%s%s%s",
-		    cfg->table_config,
-		    cfg->verbose > 1 ? "," : "",
-		    cfg->verbose > 1 ? debug_tconfig : "",
-		    user_tconfig ? "," : "", user_tconfig ? user_tconfig : "",
-		    helium_mount == NULL ? "" : HELIUM_CONFIG);
-		if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
+		if ((ret = pthread_create(
+		    &threads[i], NULL, thread_run_wtperf, next_cfg)) != 0) {
+			lprintf(cfg, ret, 0, "Error creating thread");
 			goto err;
+		}
 	}
 
-	ret = cfg->conn->close(cfg->conn, NULL);
-	cfg->conn = NULL;
-	if (ret != 0) {
-		lprintf(cfg, ret, 0, "WT_CONNECTION.close: %s", tmphome);
-		goto err;
+	/* Wait for threads to finish. */
+	for (i = 0; i < cfg->database_count; i++) {
+		if ((t_ret = pthread_join(threads[i], NULL)) != 0) {
+			lprintf(cfg, ret, 0, "Error joining thread");
+			if (ret == 0)
+				ret = t_ret;
+		}
 	}
-					/* Sanity-check the configuration */
-	if (config_sanity(cfg) != 0)
-		goto err;
 
-	if (cfg->verbose > 1)		/* Display the configuration. */
-		config_print(cfg);
+err:	for (i = 0; i < cfg->database_count && configs[i] != NULL; i++) {
+		free((char *)configs[i]->home);
+		config_free(configs[i]);
+		free(configs[i]);
+	}
+	free(configs);
+	free(threads);
+	free(cmd_buf);
+
+	return (ret);
+}
+
+/* Run an instance of wtperf for a given configuration. */
+static void *
+thread_run_wtperf(void *arg)
+{
+	CONFIG *cfg;
+	int ret;
+
+	cfg = (CONFIG *)arg;
+	if ((ret = start_run(cfg)) != 0)
+		lprintf(cfg, ret, 0, "Run failed for: %s.", cfg->home);
+	return (NULL);
+}
+
+static int
+start_run(CONFIG *cfg)
+{
+	pthread_t monitor_thread;
+	uint64_t total_ops;
+	int monitor_created, ret, t_ret;
+	char helium_buf[256];
+
+	monitor_created = ret = 0;
+	
+	if ((ret = setup_log_file(cfg)) != 0)
+		goto err;
 
 	if ((ret = wiredtiger_open(	/* Open the real connection. */
 	    cfg->home, NULL, cfg->conn_config, &cfg->conn)) != 0) {
@@ -1273,40 +1338,25 @@ main(int argc, char *argv[])
 		goto err;
 	}
 
-	if (helium_mount != NULL) {	/* Configure optional Helium volume. */
-		char helium_buf[256];
+	/* Configure optional Helium volume. */
+	if (cfg->helium_mount != NULL) {
 		snprintf(helium_buf, sizeof(helium_buf),
 		    "entry=wiredtiger_extension_init,config=["
 		    "%s=[helium_devices=\"he://./%s\","
 		    "helium_o_volume_truncate=1]]",
-		    HELIUM_NAME, helium_mount);
+		    HELIUM_NAME, cfg->helium_mount);
 		if ((ret = cfg->conn->load_extension(
 		    cfg->conn, HELIUM_PATH, helium_buf)) != 0)
 			lprintf(cfg,
 			    ret, 0, "Error loading Helium: %s", helium_buf);
 	}
 
-	if (cfg->create != 0) {		/* If creating, create the table. */
-		if ((ret = cfg->conn->open_session(
-		    cfg->conn, NULL, cfg->sess_config, &session)) != 0) {
-			lprintf(cfg, ret, 0,
-			    "Error opening a session on %s", cfg->home);
-			goto err;
-		}
-		if ((ret = session->create(
-		    session, cfg->uri, cfg->table_config)) != 0) {
-			lprintf(cfg,
-			    ret, 0, "Error creating table %s", cfg->uri);
-			goto err;
-		}
-		if ((ret = session->close(session, NULL)) != 0) {
-			lprintf(cfg,
-			    ret, 0, "Error closing session");
-			goto err;
-		}
-		session = NULL;
-	}
-					/* Start the monitor thread. */
+	if ((ret = create_uris(cfg)) != 0)
+		goto err;
+	if ((ret = create_tables(cfg)) != 0)
+		goto err;
+
+	/* Start the monitor thread. */
 	if (cfg->sample_interval != 0) {
 		if ((ret = pthread_create(
 		    &monitor_thread, NULL, monitor, cfg)) != 0) {
@@ -1316,15 +1366,17 @@ main(int argc, char *argv[])
 		}
 		monitor_created = 1;
 	}
-					/* If creating, populate the table. */
+
+	/* If creating, populate the table. */
 	if (cfg->create != 0 && execute_populate(cfg) != 0)
 		goto err;
-					/* Optional workload. */
+
+	/* Optional workload. */
 	if (cfg->run_time != 0 || cfg->run_ops != 0) {
-					/* Didn't create, set insert count. */
+		/* Didn't create, set insert count. */
 		if (cfg->create == 0 && find_table_count(cfg) != 0)
 			goto err;
-					/* Start the checkpoint thread. */
+		/* Start the checkpoint thread. */
 		if (cfg->checkpoint_threads != 0) {
 			lprintf(cfg, 0, 1,
 			    "Starting %" PRIu32 " checkpoint thread(s)",
@@ -1339,47 +1391,46 @@ main(int argc, char *argv[])
 			    cfg->checkpoint_threads, checkpoint_worker) != 0)
 				goto err;
 		}
-					/* Execute the workload. */
+		/* Execute the workload. */
 		if ((ret = execute_workload(cfg)) != 0)
 			goto err;
 
 		/* One final summation of the operations we've completed. */
-		g_read_ops = sum_read_ops(cfg);
-		g_insert_ops = sum_insert_ops(cfg);
-		g_update_ops = sum_update_ops(cfg);
-		g_ckpt_ops = sum_ckpt_ops(cfg);
-		total_ops = g_read_ops + g_insert_ops + g_update_ops;
+		cfg->read_ops = sum_read_ops(cfg);
+		cfg->insert_ops = sum_insert_ops(cfg);
+		cfg->update_ops = sum_update_ops(cfg);
+		cfg->ckpt_ops = sum_ckpt_ops(cfg);
+		total_ops = cfg->read_ops + cfg->insert_ops + cfg->update_ops;
 
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " read operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
-		    g_read_ops, (g_read_ops * 100) / total_ops,
-		    g_read_ops / cfg->run_time);
+		    cfg->read_ops, (cfg->read_ops * 100) / total_ops,
+		    cfg->read_ops / cfg->run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " insert operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
-		    g_insert_ops, (g_insert_ops * 100) / total_ops,
-		    g_insert_ops / cfg->run_time);
+		    cfg->insert_ops, (cfg->insert_ops * 100) / total_ops,
+		    cfg->insert_ops / cfg->run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " update operations (%" PRIu64
 		    "%%) %" PRIu64 " ops/sec",
-		    g_update_ops, (g_update_ops * 100) / total_ops,
-		    g_update_ops / cfg->run_time);
+		    cfg->update_ops, (cfg->update_ops * 100) / total_ops,
+		    cfg->update_ops / cfg->run_time);
 		lprintf(cfg, 0, 1,
 		    "Executed %" PRIu64 " checkpoint operations",
-		    g_ckpt_ops);
+		    cfg->ckpt_ops);
 
 		latency_print(cfg);
 	}
 
 	if (0) {
-einval:		ret = EINVAL;
 err:		if (ret == 0)
 			ret = EXIT_FAILURE;
 	}
 
 	/* Notify the worker threads they are done. */
-	g_stop = 1;
+	cfg->stop = 1;
 
 	if ((t_ret = stop_threads(cfg, 1, cfg->ckptthreads)) != 0)
 		if (ret == 0)
@@ -1415,12 +1466,165 @@ err:		if (ret == 0)
 		if ((t_ret = fclose(cfg->logf)) != 0 && ret == 0)
 			ret = t_ret;
 	}
-	config_free(cfg);
+	return (ret);
+}
 
+int
+main(int argc, char *argv[])
+{
+	CONFIG *cfg, _cfg;
+	size_t req_len;
+	int ch, monitor_set, ret;
+	char *cc_buf, *tc_buf;
+	const char *opts = "C:H:h:m:O:o:T:";
+	const char *config_opts, *user_cconfig, *user_tconfig;
+
+	monitor_set = ret = 0;
+	cc_buf = tc_buf = NULL;
+	config_opts = user_cconfig = user_tconfig = NULL;
+
+	/* Setup the default configuration values. */
+	cfg = &_cfg;
+	memset(cfg, 0, sizeof(*cfg));
+	if (config_assign(cfg, &default_cfg))
+		goto err;
+
+	/* Do a basic validation of options, and home is needed before open. */
+	while ((ch = getopt(argc, argv, opts)) != EOF)
+		switch (ch) {
+		case 'C':
+			user_cconfig = optarg;
+			break;
+		case 'H':
+			cfg->helium_mount = optarg;
+			break;
+		case 'O':
+			config_opts = optarg;
+			break;
+		case 'T':
+			user_tconfig = optarg;
+			break;
+		case 'h':
+			cfg->home = optarg;
+			break;
+		case 'm':
+			cfg->monitor_dir = optarg;
+			monitor_set = 1;
+			break;
+		case '?':
+			fprintf(stderr, "Invalid option\n");
+			usage();
+			goto einval;
+		}
+
+	/*
+	 * If the user did not specify a monitor directory then set the
+	 * monitor directory to the home dir.
+	 */
+	if (!monitor_set)
+		cfg->monitor_dir = cfg->home;
+
+	/* Parse configuration settings from configuration file. */
+	if (config_opts != NULL && config_opt_file(cfg, config_opts) != 0)
+		goto einval;
+
+	/* Parse options that override values set via a configuration file. */
+	optind = 1;
+	while ((ch = getopt(argc, argv, opts)) != EOF)
+		switch (ch) {
+		case 'o':
+			/* Allow -o key=value */
+			if (config_opt_line(cfg, optarg) != 0)
+				goto einval;
+			break;
+		}
+
+	if ((ret = config_compress(cfg)) != 0)
+		goto err;
+
+	/* Build the URI from the table name. */
+	req_len = strlen("table:") +
+	    strlen(HELIUM_NAME) + strlen(cfg->table_name) + 2;
+	if ((cfg->base_uri = calloc(req_len, 1)) == NULL) {
+		ret = enomem(cfg);
+		goto err;
+	}
+	snprintf(cfg->base_uri, req_len, "table:%s%s%s",
+	    cfg->helium_mount == NULL ? "" : HELIUM_NAME,
+	    cfg->helium_mount == NULL ? "" : "/",
+	    cfg->table_name);
+
+	/* Make stdout line buffered, so verbose output appears quickly. */
+	(void)setvbuf(stdout, NULL, _IOLBF, 0);
+
+	/* Concatenate non-default configuration strings. */
+	if (cfg->verbose > 1 || user_cconfig != NULL ||
+	    cfg->compress_ext != NULL) {
+		req_len = strlen(cfg->conn_config) + strlen(debug_cconfig) + 3;
+		if (user_cconfig != NULL)
+			req_len += strlen(user_cconfig);
+		if (cfg->compress_ext != NULL)
+			req_len += strlen(cfg->compress_ext);
+		if ((cc_buf = calloc(req_len, 1)) == NULL) {
+			ret = enomem(cfg);
+			goto err;
+		}
+		/*
+		 * This is getting hard to parse.
+		 */
+		snprintf(cc_buf, req_len, "%s%s%s%s%s%s",
+		    cfg->conn_config,
+		    cfg->compress_ext ? cfg->compress_ext : "",
+		    cfg->verbose > 1 ? ",": "",
+		    cfg->verbose > 1 ? debug_cconfig : "",
+		    user_cconfig ? ",": "",
+		    user_cconfig ? user_cconfig : "");
+		if ((ret = config_opt_str(cfg, "conn_config", cc_buf)) != 0)
+			goto err;
+	}
+	if (cfg->verbose > 1 || cfg->helium_mount != NULL ||
+	    user_tconfig != NULL || cfg->compress_table != NULL) {
+		req_len = strlen(cfg->table_config) + strlen(HELIUM_CONFIG) +
+		    strlen(debug_tconfig) + 3;
+		if (user_tconfig != NULL)
+			req_len += strlen(user_tconfig);
+		if (cfg->compress_table != NULL)
+			req_len += strlen(cfg->compress_table);
+		if ((tc_buf = calloc(req_len, 1)) == NULL) {
+			ret = enomem(cfg);
+			goto err;
+		}
+		/*
+		 * This is getting hard to parse.
+		 */
+		snprintf(tc_buf, req_len, "%s%s%s%s%s%s%s",
+		    cfg->table_config,
+		    cfg->compress_table ? cfg->compress_table : "",
+		    cfg->verbose > 1 ? ",": "",
+		    cfg->verbose > 1 ? debug_tconfig : "",
+		    user_tconfig ? ",": "",
+		    user_tconfig ? user_tconfig : "",
+		    cfg->helium_mount == NULL ? "" : HELIUM_CONFIG);
+		if ((ret = config_opt_str(cfg, "table_config", tc_buf)) != 0)
+			goto err;
+	}
+
+	/* Sanity-check the configuration. */
+	if (config_sanity(cfg) != 0)
+		goto err;
+
+	/* Display the configuration. */
+	if (cfg->verbose > 1)
+		config_print(cfg);
+
+	if ((ret = start_all_runs(cfg)) != 0)
+		goto err;
+
+	if (0)
+einval:		ret = EINVAL;
+err:	config_free(cfg);
 	free(cc_buf);
-	free(cmd);
 	free(tc_buf);
-	free(tmphome);
 
 	return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
 }
@@ -1507,7 +1711,7 @@ wtperf_value_range(CONFIG *cfg)
 	if (cfg->random_range)
 		return (cfg->icount + cfg->random_range);
 
-	return (cfg->icount + g_insert_key - (u_int)(cfg->workers_cnt + 1));
+	return (cfg->icount + cfg->insert_key - (u_int)(cfg->workers_cnt + 1));
 }
 
 static uint64_t
diff --git a/bench/wtperf/wtperf.h b/bench/wtperf/wtperf.h
index 54c4334a2ac..c94ec10d8fd 100644
--- a/bench/wtperf/wtperf.h
+++ b/bench/wtperf/wtperf.h
@@ -48,6 +48,21 @@
 typedef struct __config CONFIG;
 typedef struct __config_thread CONFIG_THREAD;
 
+#define	EXT_PFX	",extensions=("
+#define	EXT_SFX	")"
+#define	EXTPATH "../../ext/compressors/"		/* Extensions path */
+#define	BLKCMP_PFX	",block_compressor="
+
+#define	BZIP_BLK BLKCMP_PFX "bzip2"
+#define	BZIP_EXT							\
+	EXT_PFX EXTPATH "bzip2/.libs/libwiredtiger_bzip2.so" EXT_SFX
+#define	SNAPPY_BLK BLKCMP_PFX "snappy"
+#define	SNAPPY_EXT							\
+	EXT_PFX EXTPATH "snappy/.libs/libwiredtiger_snappy.so" EXT_SFX
+#define	ZLIB_BLK BLKCMP_PFX "zlib"
+#define	ZLIB_EXT							\
+	EXT_PFX EXTPATH "zlib/.libs/libwiredtiger_zlib.so" EXT_SFX
+
 typedef struct {
 	int64_t threads;		/* Thread count */
 	int64_t insert;			/* Insert ratio */
@@ -61,15 +76,24 @@ typedef struct {
 	uint8_t ops[100];		/* Operation schedule */
 } WORKLOAD;
 
+/*
+ * NOTE:  If you add any fields to this structure here, you must also add
+ * an initialization in wtperf.c in the default_cfg.
+ */
 struct __config {			/* Configuration struction */
 	const char *home;		/* WiredTiger home */
 	const char *monitor_dir;	/* Monitor output dir */
-	char *uri;			/* Object URI */
+	char *base_uri;			/* Object URI */
+	char **uris;			/* URIs if multiple tables */
+	const char *helium_mount;	/* Optional Helium mount point */
 
 	WT_CONNECTION *conn;		/* Database connection */
 
 	FILE *logf;			/* Logging handle */
 
+	const char *compress_ext;	/* Compression extension for conn */
+	const char *compress_table;	/* Compression arg to table create */
+
 	CONFIG_THREAD *ckptthreads, *popthreads;
 
 #define	WORKLOAD_MAX	50
@@ -79,6 +103,21 @@ struct __config {			/* Configuration struction */
 	WORKLOAD	*workload;		/* Workloads */
 	u_int		 workload_cnt;
 
+	/* State tracking variables. */
+
+	uint64_t ckpt_ops;		/* checkpoint operations */
+	uint64_t insert_ops;		/* insert operations */
+	uint64_t read_ops;		/* read operations */
+	uint64_t update_ops;		/* update operations */
+
+	uint64_t insert_key;		/* insert key */
+
+	volatile int ckpt;		/* checkpoint in progress */
+	volatile int error;		/* thread error */
+	volatile int stop;		/* notify threads to stop */
+
+	volatile uint32_t totalsec;	/* total seconds running */
+
 	/* Fields changeable on command line are listed in wtperf_opt.i */
 #define	OPT_DECLARE_STRUCT
 #include "wtperf_opt.i"
@@ -141,8 +180,8 @@ typedef struct {
 	 * Minimum/maximum latency, shared with the monitor thread, that is, the
 	 * monitor thread clears it so it's recalculated again for each period.
 	 */
-	uint32_t min_latency;		/* Minimum latency (NS) */
-	uint32_t max_latency;		/* Maximum latency (NS) */
+	uint32_t min_latency;		/* Minimum latency (uS) */
+	uint32_t max_latency;		/* Maximum latency (uS) */
 
 	/*
 	 * Latency buckets.
@@ -168,6 +207,7 @@ struct __config_thread {		/* Per-thread structure */
 };
 
 int	 config_assign(CONFIG *, const CONFIG *);
+int	 config_compress(CONFIG *);
 void	 config_free(CONFIG *);
 int	 config_opt_file(CONFIG *, const char *);
 int	 config_opt_line(CONFIG *, const char *);
diff --git a/bench/wtperf/wtperf_opt.i b/bench/wtperf/wtperf_opt.i
index 4e11799781f..9ee7072497c 100644
--- a/bench/wtperf/wtperf_opt.i
+++ b/bench/wtperf/wtperf_opt.i
@@ -82,9 +82,18 @@ DEF_OPT_AS_UINT32(checkpoint_threads, 0, "number of checkpoint threads")
 DEF_OPT_AS_CONFIG_STRING(conn_config, "create",
     "connection configuration string")
 DEF_OPT_AS_BOOL(compact, 0, "post-populate compact for LSM merging activity")
+DEF_OPT_AS_STRING(compression, "none",
+    "compression extension.  Allowed configuration values are: "
+    "'none' (default), 'bzip', 'snappy', 'zlib'")
 DEF_OPT_AS_BOOL(create, 1,
     "do population phase; false to use existing database")
-DEF_OPT_AS_UINT32(icount, 5000, "number of records to initially populate")
+DEF_OPT_AS_UINT32(database_count, 1,
+    "number of WiredTiger databases to use. Each database will execute the"
+    " workload using a separate home directory and complete set of worker"
+    " threads")
+DEF_OPT_AS_UINT32(icount, 5000,
+    "number of records to initially populate. If multiple tables are "
+    "configured, each table has this many items inserted.")
 DEF_OPT_AS_BOOL(insert_rmw, 0,
     "execute a read prior to each insert in workload phase")
 DEF_OPT_AS_UINT32(key_sz, 20, "key size")
@@ -114,6 +123,9 @@ DEF_OPT_AS_CONFIG_STRING(table_config,
     "key_format=S,value_format=S,type=lsm,exclusive=true,"
     "leaf_page_max=4kb,internal_page_max=64kb,allocation_size=4kb,",
     "table configuration string")
+DEF_OPT_AS_UINT32(table_count, 1,
+    "number of tables to run operations over. Keys are divided evenly "
+    "over the tables. Default 1, maximum 99.")
 DEF_OPT_AS_STRING(threads, "", "workload configuration: each 'count' "
     "entry is the total number of threads, and the 'insert', 'read' and "
     "'update' entries are the ratios of insert, read and update operations "
diff --git a/build_posix/aclocal/version-set.m4 b/build_posix/aclocal/version-set.m4
index 2f2770d84cf..0c7436fad81 100644
--- a/build_posix/aclocal/version-set.m4
+++ b/build_posix/aclocal/version-set.m4
@@ -2,8 +2,8 @@ dnl build by dist/s_version
 
 VERSION_MAJOR=2
 VERSION_MINOR=1
-VERSION_PATCH=0
-VERSION_STRING='"WiredTiger 2.1.0: (February  4, 2014)"'
+VERSION_PATCH=1
+VERSION_STRING='"WiredTiger 2.1.1: (March  4, 2014)"'
 
 AC_SUBST(VERSION_MAJOR)
 AC_SUBST(VERSION_MINOR)
diff --git a/build_posix/aclocal/version.m4 b/build_posix/aclocal/version.m4
index b47038041e0..80fe1bb193f 100644
--- a/build_posix/aclocal/version.m4
+++ b/build_posix/aclocal/version.m4
@@ -1,2 +1,2 @@
 dnl WiredTiger product version for AC_INIT.  Maintained by dist/s_version
-2.1.0
+2.1.1
diff --git a/dist/api_data.py b/dist/api_data.py
index 5509032d148..412f0603efd 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -613,8 +613,13 @@ methods = {
 	    Use \c O_DIRECT to access files.  Options are given as a list,
 	    such as <code>"direct_io=[data]"</code>.  Configuring
 	    \c direct_io requires care, see @ref
-	    tuning_system_buffer_cache_direct_io for important warnings''',
-	    type='list', choices=['data', 'log']),
+	    tuning_system_buffer_cache_direct_io for important warnings.
+		Including \c "data" will cause WiredTiger data files to use
+		\c O_DIRECT, including \c "log" will cause WiredTiger log files
+		to use \c O_DIRECT, and including \c "checkpoint" will cause
+		WiredTiger data files opened at a checkpoint (i.e: read only) to
+		use \c O_DIRECT''',
+	    type='list', choices=['checkpoint', 'data', 'log']),
 	Config('extensions', '', r'''
 	    list of shared library extensions to load (using dlopen).
 	    Any values specified to an library extension are passed to
diff --git a/dist/filelist b/dist/filelist
index 87b75d0b7cf..58b5aa2ca92 100644
--- a/dist/filelist
+++ b/dist/filelist
@@ -50,6 +50,7 @@ src/btree/row_key.c
 src/btree/row_modify.c
 src/btree/row_srch.c
 src/config/config.c
+src/config/config_api.c
 src/config/config_check.c
 src/config/config_collapse.c
 src/config/config_concat.c
diff --git a/dist/flags.py b/dist/flags.py
index 7ca1ee3144d..2a6adfccf43 100644
--- a/dist/flags.py
+++ b/dist/flags.py
@@ -15,6 +15,7 @@ flags = {
 		'SYNC_WRITE_LEAVES',
 	],
 	'file_types' : [
+		'FILE_TYPE_CHECKPOINT',
 		'FILE_TYPE_DATA',
 		'FILE_TYPE_LOG'
 	],
diff --git a/dist/s_funcs.list b/dist/s_funcs.list
index b34564adda7..6ffb956f59e 100644
--- a/dist/s_funcs.list
+++ b/dist/s_funcs.list
@@ -22,6 +22,7 @@ __wt_log_scan
 __wt_nlpo2
 __wt_nlpo2_round
 __wt_print_huffman_code
+wiredtiger_config_parser_open
 wiredtiger_pack_int
 wiredtiger_pack_item
 wiredtiger_pack_str
diff --git a/dist/s_string.ok b/dist/s_string.ok
index a71cca62a8c..5f0d331e105 100644
--- a/dist/s_string.ok
+++ b/dist/s_string.ok
@@ -688,6 +688,7 @@ openfile
 os
 ovfl
 packv
+parserp
 patchp
 pathname
 pathnames
diff --git a/dist/s_symbols.list b/dist/s_symbols.list
index a66af1f8994..d3803bc3afa 100644
--- a/dist/s_symbols.list
+++ b/dist/s_symbols.list
@@ -1,4 +1,5 @@
 # List of OK external symbols.
+wiredtiger_config_parser_open
 wiredtiger_open
 wiredtiger_pack_close
 wiredtiger_pack_int
diff --git a/dist/stat_data.py b/dist/stat_data.py
index 1007cf71a11..72babeb881a 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -153,6 +153,9 @@ connection_stats = [
 	##########################################
 	# LSM statistics
 	##########################################
+	Stat('lsm_checkpoint_throttle',
+	    'sleep for LSM checkpoint throttle'),
+	Stat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
 	Stat('lsm_rows_merged', 'rows merged in an LSM tree'),
 
 	##########################################
@@ -244,6 +247,8 @@ dsrc_stats = [
 	    'bloom filter pages evicted from cache'),
 	Stat('bloom_page_read', 'bloom filter pages read into cache'),
 	Stat('bloom_size', 'total size of bloom filters', 'no_scale'),
+	Stat('lsm_checkpoint_throttle',
+	    'sleep for LSM checkpoint throttle'),
 	Stat('lsm_chunk_count',
 	    'chunks in the LSM tree', 'no_aggregate,no_scale'),
 	Stat('lsm_generation_max',
@@ -252,6 +257,7 @@ dsrc_stats = [
 	Stat('lsm_lookup_no_bloom',
 	    'queries that could have benefited ' +
 	    'from a Bloom filter that did not exist'),
+	Stat('lsm_merge_throttle', 'sleep for LSM merge throttle'),
 
 	##########################################
 	# Block manager statistics
diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am
index bd281df7130..5b43dcc2285 100644
--- a/examples/c/Makefile.am
+++ b/examples/c/Makefile.am
@@ -6,6 +6,7 @@ noinst_PROGRAMS = \
 	ex_all \
 	ex_call_center \
 	ex_config \
+	ex_config_parse \
 	ex_cursor \
 	ex_data_source \
 	ex_extending \
diff --git a/examples/c/ex_config_parse.c b/examples/c/ex_config_parse.c
new file mode 100644
index 00000000000..c6adc327c78
--- /dev/null
+++ b/examples/c/ex_config_parse.c
@@ -0,0 +1,166 @@
+/*-
+ * Public Domain 2008-2014 WiredTiger, Inc.
+ *
+ * This is free and unencumbered software released into the public domain.
+ *
+ * Anyone is free to copy, modify, publish, use, compile, sell, or
+ * distribute this software, either in source code form or as a compiled
+ * binary, for any purpose, commercial or non-commercial, and by any
+ * means.
+ *
+ * In jurisdictions that recognize copyright laws, the author or authors
+ * of this software dedicate any and all copyright interest in the
+ * software to the public domain. We make this dedication for the benefit
+ * of the public at large and to the detriment of our heirs and
+ * successors. We intend this dedication to be an overt act of
+ * relinquishment in perpetuity of all present and future rights to this
+ * software under copyright law.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * ex_config_parse.c
+ *	This is an example demonstrating how to parse WiredTiger compatible
+ *	configuration strings.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <wiredtiger.h>
+
+const char *home = NULL;
+
+int main(void)
+{
+	int ret;
+
+	/*! [Create a configuration parser] */
+	WT_CONFIG_ITEM k, v;
+	WT_CONFIG_PARSER *parser;
+	const char *config_string =
+	    "path=/dev/loop,page_size=1024,log=(archive=true,file_max=20MB)";
+
+	if ((ret = wiredtiger_config_parser_open(
+	    NULL, config_string, strlen(config_string), &parser)) != 0) {
+		fprintf(stderr, "Error creating configuration parser: %s\n",
+		    wiredtiger_strerror(ret));
+		return (ret);
+	}
+	if ((ret = parser->close(parser)) != 0) {
+		fprintf(stderr, "Error closing configuration parser: %s\n",
+		    wiredtiger_strerror(ret));
+		return (ret);
+	}
+	/*! [Create a configuration parser] */
+
+	if ((ret = wiredtiger_config_parser_open(
+	    NULL, config_string, strlen(config_string), &parser)) != 0) {
+		fprintf(stderr, "Error creating configuration parser: %s\n",
+		    wiredtiger_strerror(ret));
+		return (ret);
+	}
+
+	{
+	/*! [get] */
+	int64_t my_page_size;
+	/*
+	 * Retrieve the value of the integer configuration string "page_size".
+	 */
+	if ((ret = parser->get(parser, "page_size", &v)) != 0) {
+		fprintf(stderr,
+		    "page_size configuration: %s", wiredtiger_strerror(ret));
+		return (ret);
+	}
+	my_page_size = v.val;
+	/*! [get] */
+
+	ret = parser->close(parser);
+
+	(void)my_page_size;
+	}
+
+	{
+	if ((ret = wiredtiger_config_parser_open(
+	    NULL, config_string, strlen(config_string), &parser)) != 0) {
+		fprintf(stderr, "Error creating configuration parser: %s\n",
+		    wiredtiger_strerror(ret));
+		return (ret);
+	}
+	/*! [next] */
+	/*
+	 * Retrieve and print the values of the configuration strings.
+	 */
+	while ((ret = parser->next(parser, &k, &v)) == 0) {
+		printf("%.*s:", (int)k.len, k.str);
+		if (v.type == WT_CONFIG_ITEM_NUM)
+			printf("%d\n", (int)v.val);
+		else
+			printf("%.*s\n", (int)v.len, v.str);
+	}
+	/*! [next] */
+	ret = parser->close(parser);
+	}
+
+	if ((ret = wiredtiger_config_parser_open(
+	    NULL, config_string, strlen(config_string), &parser)) != 0) {
+		fprintf(stderr, "Error creating configuration parser: %s\n",
+		    wiredtiger_strerror(ret));
+		return (ret);
+	}
+
+	/*! [nested get] */
+	/*
+	 * Retrieve the value of the nested log file_max configuration string
+	 * using dot shorthand. Utilize the configuration parsing automatic
+	 * conversion of value strings into an integer.
+	 */
+	v.type = WT_CONFIG_ITEM_NUM;
+	if ((ret = parser->get(parser, "log.file_max", &v)) != 0) {
+		fprintf(stderr,
+		    "log.file_max configuration: %s", wiredtiger_strerror(ret));
+		return (ret);
+	}
+	printf("log file max: %d\n", (int)v.val);
+	/*! [nested get] */
+	ret = parser->close(parser);
+
+	if ((ret = wiredtiger_config_parser_open(
+	    NULL, config_string, strlen(config_string), &parser)) != 0) {
+		fprintf(stderr, "Error creating configuration parser: %s\n",
+		    wiredtiger_strerror(ret));
+		return (ret);
+	}
+	/*! [nested traverse] */
+	{
+	WT_CONFIG_PARSER *sub_parser;
+	while ((ret = parser->next(parser, &k, &v)) == 0) {
+		if (v.type == WT_CONFIG_ITEM_STRUCT) {
+			printf("Found nested configuration: %.*s\n",
+			    (int)k.len, k.str);
+			if ((ret = wiredtiger_config_parser_open(
+			    NULL, v.str, v.len, &sub_parser)) != 0) {
+				fprintf(stderr,
+				    "Error creating nested configuration "
+				    "parser: %s\n",
+				    wiredtiger_strerror(ret));
+				parser->close(parser);
+				return (ret);
+			}
+			while ((ret = sub_parser->next(
+			    sub_parser, &k, &v)) == 0)
+				printf("\t%.*s\n", (int)k.len, k.str);
+			sub_parser->close(sub_parser);
+		}
+	}
+	/*! [nested traverse] */
+	parser->close(parser);
+	}
+
+	return (0);
+}
diff --git a/examples/c/ex_data_source.c b/examples/c/ex_data_source.c
index daedce04075..953bb799340 100644
--- a/examples/c/ex_data_source.c
+++ b/examples/c/ex_data_source.c
@@ -339,30 +339,6 @@ my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
 	}
 
 	{
-	/*! [WT_EXTENSION config_strget] */
-	WT_CONFIG_ITEM v;
-	int64_t my_data_source_page_size;
-
-	/*
-	 * Retrieve the value of the integer type configuration string
-	 * "page_size" from a local string (as opposed to the provided
-	 * WT_CONFIG_ARG reference).
-	 */
-	const char *config_string = "path=/dev/loop,page_size=1024";
-
-	if ((ret = wt_api->config_strget(
-	    wt_api, session, config_string, "page_size", &v)) != 0) {
-		(void)wt_api->err_printf(wt_api, session,
-		    "page_size configuration: %s", wiredtiger_strerror(ret));
-		return (ret);
-	}
-	my_data_source_page_size = v.val;
-	/*! [WT_EXTENSION config_strget] */
-
-	(void)my_data_source_page_size;
-	}
-
-	{
 	/*! [WT_EXTENSION config_get] */
 	WT_CONFIG_ITEM v;
 	const char *my_data_source_key;
@@ -392,31 +368,6 @@ my_open_cursor(WT_DATA_SOURCE *dsrc, WT_SESSION *session,
 	}
 
 	{
-	/*! [WT_EXTENSION config scan] */
-	WT_CONFIG_ITEM k, v;
-	WT_CONFIG_SCAN *scan;
-
-	/*
-	 * Retrieve the value of the list type configuration string "paths".
-	 */
-	if ((ret = wt_api->config_get(
-	    wt_api, session, config, "paths", &v)) != 0) {
-		(void)wt_api->err_printf(wt_api, session,
-		    "paths configuration: %s", wiredtiger_strerror(ret));
-		return (ret);
-	}
-
-	/*
-	 * Step through the list of entries.
-	 */
-	ret = wt_api->config_scan_begin(wt_api, session, v.str, v.len, &scan);
-	while ((ret = wt_api->config_scan_next(wt_api, scan, &k, &v)) == 0)
-		printf("%.*s\n", (int)k.len, k.str);
-	ret = wt_api->config_scan_end(wt_api, scan);
-	/*! [WT_EXTENSION config scan] */
-	}
-
-	{
 	/*! [WT_EXTENSION collator config] */
 	/*
 	 * Configure the appropriate collator.
diff --git a/ext/datasources/helium/helium.c b/ext/datasources/helium/helium.c
index 1239c88befa..cc420c89999 100644
--- a/ext/datasources/helium/helium.c
+++ b/ext/datasources/helium/helium.c
@@ -2098,19 +2098,22 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
 	CURSOR *cursor;
 	DATA_SOURCE *ds;
 	WT_CONFIG_ITEM v;
+	WT_CONFIG_PARSER *config_parser;
 	WT_CURSOR *wtcursor;
 	WT_EXTENSION_API *wtext;
 	WT_SOURCE *ws;
-	int locked, ret = 0;
+	int locked, ret, tret;
 	const char *value;
 
 	*new_cursor = NULL;
 
+	config_parser = NULL;
 	cursor = NULL;
 	ds = (DATA_SOURCE *)wtds;
 	wtext = ds->wtext;
 	ws = NULL;
 	locked = 0;
+	ret = tret = 0;
 	value = NULL;
 
 	/* Allocate and initialize a cursor. */
@@ -2164,23 +2167,28 @@ helium_session_open_cursor(WT_DATA_SOURCE *wtds, WT_SESSION *session,
 		if ((ret = master_uri_get(wtds, session, uri, &value)) != 0)
 			goto err;
 
-		if ((ret = wtext->config_strget(
-		    wtext, session, value, "key_format", &v)) != 0)
+		if ((ret = wtext->config_parser_open(wtext,
+		    session, value, strlen(value), &config_parser)) != 0)
+			EMSG_ERR(wtext, session, ret,
+			    "Configuration string parser: %s",
+			    wtext->strerror(ret));
+		if ((ret = config_parser->get(
+		    config_parser, "key_format", &v)) != 0)
 			EMSG_ERR(wtext, session, ret,
 			    "key_format configuration: %s",
 			    wtext->strerror(ret));
 		ws->config_recno = v.len == 1 && v.str[0] == 'r';
 
-		if ((ret = wtext->config_strget(
-		    wtext, session, value, "value_format", &v)) != 0)
+		if ((ret = config_parser->get(
+		    config_parser, "value_format", &v)) != 0)
 			EMSG_ERR(wtext, session, ret,
 			    "value_format configuration: %s",
 			    wtext->strerror(ret));
 		ws->config_bitfield =
 		    v.len == 2 && isdigit(v.str[0]) && v.str[1] == 't';
 
-		if ((ret = wtext->config_strget(
-		    wtext, session, value, "helium_o_compress", &v)) != 0)
+		if ((ret = config_parser->get(
+		    config_parser, "helium_o_compress", &v)) != 0)
 			EMSG_ERR(wtext, session, ret,
 			    "helium_o_compress configuration: %s",
 			    wtext->strerror(ret));
@@ -2219,6 +2227,11 @@ err:		if (ws != NULL && locked)
 			ESET(unlock(wtext, session, &ws->lock));
 		cursor_destroy(cursor);
 	}
+	if (config_parser != NULL &&
+	    (tret = config_parser->close(config_parser)) != 0)
+		EMSG(wtext, session, tret,
+		    "WT_CONFIG_PARSER.close: %s", wtext->strerror(tret));
+
 	free((void *)value);
 	return (ret);
 }
@@ -2882,19 +2895,19 @@ helium_config_read(WT_EXTENSION_API *wtext, WT_CONFIG_ITEM *config,
     char **devicep, HE_ENV *envp, int *env_setp, int *flagsp)
 {
 	WT_CONFIG_ITEM k, v;
-	WT_CONFIG_SCAN *scan;
+	WT_CONFIG_PARSER *config_parser;
 	int ret = 0, tret;
 
 	*env_setp = 0;
 	*flagsp = 0;
 
-	/* Set up the scan of the configuration arguments list. */
-	if ((ret = wtext->config_scan_begin(
-	    wtext, NULL, config->str, config->len, &scan)) != 0)
+	/* Traverse the configuration arguments list. */
+	if ((ret = wtext->config_parser_open(
+	    wtext, NULL, config->str, config->len, &config_parser)) != 0)
 		ERET(wtext, NULL, ret,
-		    "WT_EXTENSION_API.config_scan_begin: %s",
+		    "WT_EXTENSION_API.config_parser_open: %s",
 		    wtext->strerror(ret));
-	while ((ret = wtext->config_scan_next(wtext, scan, &k, &v)) == 0) {
+	while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
 		if (string_match("helium_devices", k.str, k.len)) {
 			if ((*devicep = calloc(1, v.len + 1)) == NULL)
 				return (os_errno());
@@ -2924,13 +2937,11 @@ helium_config_read(WT_EXTENSION_API *wtext, WT_CONFIG_ITEM *config,
 		ret = 0;
 	if (ret != 0)
 		EMSG_ERR(wtext, NULL, ret,
-		    "WT_EXTENSION_API.config_scan_next: %s",
-		    wtext->strerror(ret));
+		    "WT_CONFIG_PARSER.next: %s", wtext->strerror(ret));
 
-err:	if ((tret = wtext->config_scan_end(wtext, scan)) != 0)
+err:	if ((tret = config_parser->close(config_parser)) != 0)
 		EMSG(wtext, NULL, tret,
-		    "WT_EXTENSION_API.config_scan_end: %s",
-		    wtext->strerror(tret));
+		    "WT_CONFIG_PARSER.close: %s", wtext->strerror(tret));
 
 	return (ret);
 }
@@ -3320,11 +3331,12 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
 	DATA_SOURCE *ds;
 	HELIUM_SOURCE *hs;
 	WT_CONFIG_ITEM k, v;
-	WT_CONFIG_SCAN *scan;
+	WT_CONFIG_PARSER *config_parser;
 	WT_EXTENSION_API *wtext;
 	int vmajor, vminor, ret = 0;
 	const char **p;
 
+	config_parser = NULL;
 	ds = NULL;
 
 	wtext = connection->get_extension_api(connection);
@@ -3357,12 +3369,12 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
 		    wtext->strerror(ret));
 
 	/* Step through the list of Helium sources, opening each one. */
-	if ((ret =
-	    wtext->config_scan_begin(wtext, NULL, v.str, v.len, &scan)) != 0)
+	if ((ret = wtext->config_parser_open(
+	    wtext, NULL, v.str, v.len, &config_parser)) != 0)
 		EMSG_ERR(wtext, NULL, ret,
-		    "WT_EXTENSION_API.config_scan_begin: config: %s",
+		    "WT_EXTENSION_API.config_parser_open: config: %s",
 		    wtext->strerror(ret));
-	while ((ret = wtext->config_scan_next(wtext, scan, &k, &v)) == 0) {
+	while ((ret = config_parser->next(config_parser, &k, &v)) == 0) {
 		if (string_match("helium_verbose", k.str, k.len)) {
 			verbose = v.val == 0 ? 0 : 1;
 			continue;
@@ -3372,12 +3384,13 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
 	}
 	if (ret != WT_NOTFOUND)
 		EMSG_ERR(wtext, NULL, ret,
-		    "WT_EXTENSION_API.config_scan_next: config: %s",
+		    "WT_CONFIG_PARSER.next: config: %s",
 		    wtext->strerror(ret));
-	if ((ret = wtext->config_scan_end(wtext, scan)) != 0)
+	if ((ret = config_parser->close(config_parser)) != 0)
 		EMSG_ERR(wtext, NULL, ret,
-		    "WT_EXTENSION_API.config_scan_end: config: %s",
+		    "WT_CONFIG_PARSER.close: config: %s",
 		    wtext->strerror(ret));
+	config_parser = NULL;
 
 	/* Find and open the database transaction store. */
 	if ((ret = helium_source_open_txn(ds)) != 0)
@@ -3414,6 +3427,8 @@ wiredtiger_extension_init(WT_CONNECTION *connection, WT_CONFIG_ARG *config)
 
 err:	if (ds != NULL)
 		ESET(helium_terminate((WT_DATA_SOURCE *)ds, NULL));
+	if (config_parser != NULL)
+		(void)config_parser->close(config_parser);
 	return (ret);
 }
 
diff --git a/lang/java/java_doc.i b/lang/java/java_doc.i
index fcb580ddbd4..83404b45508 100644
--- a/lang/java/java_doc.i
+++ b/lang/java/java_doc.i
@@ -40,3 +40,6 @@ COPYDOC(__wt_connection, WT_CONNECTION, add_data_source)
 COPYDOC(__wt_connection, WT_CONNECTION, add_collator)
 COPYDOC(__wt_connection, WT_CONNECTION, add_compressor)
 COPYDOC(__wt_connection, WT_CONNECTION, add_extractor)
+COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, close)
+COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, next)
+COPYDOC(__wt_config_parser, WT_CONFIG_PARSER, get)
diff --git a/lang/python/wiredtiger.i b/lang/python/wiredtiger.i
index 31dc159410b..e5f49fe2c63 100644
--- a/lang/python/wiredtiger.i
+++ b/lang/python/wiredtiger.i
@@ -582,19 +582,21 @@ typedef int int_void;
 
 /* Add event handler support. */
 %{
+/* Write to and flush the stream. */
 static int
 writeToPythonStream(const char *streamname, const char *message)
 {
-	PyObject *sys, *se, *sys_stderr_write, *written, *arglist;
+	PyObject *sys, *se, *write_method, *flush_method, *written,
+	    *arglist, *arglist2;
 	char *msg;
 	int ret;
 	size_t msglen;
 
 	sys = NULL;
 	se = NULL;
-	sys_stderr_write = NULL;
+	write_method = flush_method = NULL;
 	written = NULL;
-	arglist = NULL;
+	arglist = arglist2 = NULL;
 	msglen = strlen(message);
 	msg = malloc(msglen + 2);
 	strcpy(msg, message);
@@ -608,21 +610,30 @@ writeToPythonStream(const char *streamname, const char *message)
 		goto err;
 	if ((se = PyObject_GetAttrString(sys, streamname)) == NULL)
 		goto err;
-	if ((sys_stderr_write = PyObject_GetAttrString(se, "write")) == NULL)
+	if ((write_method = PyObject_GetAttrString(se, "write")) == NULL)
+		goto err;
+	if ((flush_method = PyObject_GetAttrString(se, "flush")) == NULL)
 		goto err;
 	if ((arglist = Py_BuildValue("(s)", msg)) == NULL)
 		goto err;
+	if ((arglist2 = Py_BuildValue("()", msg)) == NULL)
+		goto err;
 
-	written = PyObject_CallObject(sys_stderr_write, arglist);
+	written = PyObject_CallObject(write_method, arglist);
+	(void)PyObject_CallObject(flush_method, arglist2);
 	ret = 0;
 
 err:    /* Release python Global Interpreter Lock */
 	SWIG_PYTHON_THREAD_END_BLOCK;
 
+	if (arglist2)
+		Py_XDECREF(arglist2);
 	if (arglist)
 		Py_XDECREF(arglist);
-	if (sys_stderr_write)
-		Py_XDECREF(sys_stderr_write);
+	if (flush_method)
+		Py_XDECREF(flush_method);
+	if (write_method)
+		Py_XDECREF(write_method);
 	if (se)
 		Py_XDECREF(se);
 	if (sys)
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 04c24a6a3b6..35201008622 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -405,8 +405,9 @@ __bm_method_set(WT_BM *bm, int readonly)
  *	Open a file.
  */
 int
-__wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
-    const char *cfg[], int forced_salvage, uint32_t allocsize, WT_BM **bmp)
+__wt_block_manager_open(WT_SESSION_IMPL *session,
+    const char *filename, const char *cfg[],
+    int forced_salvage, int readonly, uint32_t allocsize, WT_BM **bmp)
 {
 	WT_BM *bm;
 	WT_DECL_RET;
@@ -416,8 +417,8 @@ __wt_block_manager_open(WT_SESSION_IMPL *session, const char *filename,
 	WT_RET(__wt_calloc_def(session, 1, &bm));
 	__bm_method_set(bm, 0);
 
-	WT_ERR(__wt_block_open(
-	    session, filename, cfg, forced_salvage, allocsize, &bm->block));
+	WT_ERR(__wt_block_open(session, filename, cfg,
+	    forced_salvage, readonly, allocsize, &bm->block));
 
 	*bmp = bm;
 	return (0);
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 1132cb85a6c..23e69027718 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -95,7 +95,7 @@ __block_destroy(WT_SESSION_IMPL *session, WT_BLOCK *block)
 int
 __wt_block_open(WT_SESSION_IMPL *session,
     const char *filename, const char *cfg[],
-    int forced_salvage, uint32_t allocsize, WT_BLOCK **blockp)
+    int forced_salvage, int readonly, uint32_t allocsize, WT_BLOCK **blockp)
 {
 	WT_BLOCK *block;
 	WT_CONFIG_ITEM cval;
@@ -158,8 +158,9 @@ __wt_block_open(WT_SESSION_IMPL *session,
 #endif
 
 	/* Open the underlying file handle. */
-	WT_ERR(__wt_open(
-	    session, filename, 0, 0, WT_FILE_TYPE_DATA, &block->fh));
+	WT_ERR(__wt_open(session, filename, 0, 0,
+	    readonly ? WT_FILE_TYPE_CHECKPOINT : WT_FILE_TYPE_DATA,
+	    &block->fh));
 
 	/* Initialize the live checkpoint's lock. */
 	WT_ERR(__wt_spin_init(session, &block->live_lock, "block manager"));
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c
index 46f74a2b997..3adaa459826 100644
--- a/src/bloom/bloom.c
+++ b/src/bloom/bloom.c
@@ -127,7 +127,7 @@ __bloom_open_cursor(WT_BLOOM *bloom, WT_CURSOR *owner)
 	WT_RET(__wt_open_cursor(session, bloom->uri, owner, cfg, &c));
 
 	/* XXX Layering violation: bump the cache priority for Bloom filters. */
-	((WT_CURSOR_BTREE *)c)->btree->evict_priority = (1 << 19);
+	((WT_CURSOR_BTREE *)c)->btree->evict_priority = WT_EVICT_INT_SKEW;
 
 	bloom->c = c;
 	return (0);
diff --git a/src/btree/bt_discard.c b/src/btree/bt_discard.c
index 568e07f7fd1..c0c7a245a9f 100644
--- a/src/btree/bt_discard.c
+++ b/src/btree/bt_discard.c
@@ -317,6 +317,10 @@ __free_update_list(WT_SESSION_IMPL *session, WT_UPDATE *upd)
 
 	do {
 		next = upd->next;
+		/* Everything we free should be visible to everyone. */
+		WT_ASSERT(session,
+		    upd->txnid == WT_TXN_ABORTED ||
+		    __wt_txn_visible_all(session, upd->txnid));
 		__wt_free(session, upd);
 	} while ((upd = next) != NULL);
 }
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index d57162c06a9..0b6767df613 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -229,7 +229,8 @@ __evict_worker(WT_SESSION_IMPL *session)
 
 		/* Check to see if the eviction server should run. */
 		if (bytes_inuse > (cache->eviction_target * bytes_max) / 100)
-			flags = WT_EVICT_PASS_ALL;
+			flags = (loop > 10) ?
+			    WT_EVICT_PASS_AGGRESSIVE : WT_EVICT_PASS_ALL;
 		else if (dirty_inuse >
 		    (cache->eviction_dirty_target * bytes_max) / 100)
 			/* Ignore clean pages unless the cache is too large */
@@ -244,7 +245,7 @@ __evict_worker(WT_SESSION_IMPL *session)
 		F_SET(cache, WT_EVICT_ACTIVE);
 		WT_VERBOSE_RET(session, evictserver,
 		    "Eviction pass with: Max: %" PRIu64
-		    " In use: %" PRIu64 " Dirty: %" PRIu64 " Internal: %s",
+		    " In use: %" PRIu64 " Dirty: %" PRIu64 " Merge: %s",
 		    bytes_max, bytes_inuse, dirty_inuse,
 		    LF_ISSET(WT_EVICT_PASS_INTERNAL) ? "yes" : "no");
 
@@ -436,6 +437,9 @@ __wt_evict_file(WT_SESSION_IMPL *session, int syncop)
 	 */
 	__wt_evict_file_exclusive_on(session);
 
+	/* Make sure the oldest transaction ID is up-to-date. */
+	__wt_txn_update_oldest(session);
+
 	/*
 	 * We can't evict the page just returned to us, it marks our place in
 	 * the tree.  So, always walk one page ahead of the page being evicted.
@@ -721,12 +725,23 @@ __evict_walk(WT_SESSION_IMPL *session, u_int *entriesp, uint32_t flags)
 	WT_CONNECTION_IMPL *conn;
 	WT_DATA_HANDLE *dhandle;
 	WT_DECL_RET;
-	u_int slot, max_entries, retries;
+	u_int max_entries, old_slot, retries, slot;
 
 	conn = S2C(session);
 	cache = S2C(session)->cache;
 	retries = 0;
 
+	/* Increment the shared read generation. */
+	__wt_cache_read_gen_incr(session);
+
+	/*
+	 * Update the oldest ID: we use it to decide whether pages are
+	 * candidates for eviction.  Without this, if all threads are blocked
+	 * after a long-running transaction (such as a checkpoint) completes,
+	 * we may never start evicting again.
+	 */
+	__wt_txn_update_oldest(session);
+
 	/*
 	 * Set the starting slot in the queue and the maximum pages added
 	 * per walk.
@@ -751,9 +766,7 @@ retry:	SLIST_FOREACH(dhandle, &conn->dhlh, l) {
 
 		/*
 		 * Each time we reenter this function, start at the next handle
-		 * on the list.  We use the address of the handle's name as the
-		 * handle's unique identifier, that should be unique, and is
-		 * unlikely to cause a false positive if freed and reallocated.
+		 * on the list.
 		 */
 		if (cache->evict_file_next != NULL &&
 		    cache->evict_file_next != dhandle)
@@ -769,9 +782,29 @@ retry:	SLIST_FOREACH(dhandle, &conn->dhlh, l) {
 		 */
 		btree = dhandle->handle;
 		if (btree->root_page == NULL ||
-		    F_ISSET(btree, WT_BTREE_NO_EVICTION) || btree->bulk_load_ok)
+		    F_ISSET(btree, WT_BTREE_NO_EVICTION) ||
+		    btree->bulk_load_ok)
 			continue;
 
+		/*
+		 * Also skip files that are configured to stick in cache until
+		 * we get aggressive.
+		 */
+		if (btree->evict_priority != 0 &&
+		    !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
+			continue;
+
+		/*
+		 * If we are filling the queue, skip files that haven't been
+		 * useful in the past.
+		 */
+		if (btree->evict_walk_period != 0 &&
+		    cache->evict_entries >= WT_EVICT_WALK_BASE &&
+		    btree->evict_walk_skips++ < btree->evict_walk_period)
+			continue;
+		btree->evict_walk_skips = 0;
+		old_slot = slot;
+
 		__wt_spin_lock(session, &cache->evict_walk_lock);
 
 		/*
@@ -784,6 +817,17 @@ retry:	SLIST_FOREACH(dhandle, &conn->dhlh, l) {
 
 		__wt_spin_unlock(session, &cache->evict_walk_lock);
 
+		/*
+		 * If we didn't find enough candidates in the file, skip it
+		 * next time.
+		 */
+		if (slot >= old_slot + WT_EVICT_WALK_PER_FILE ||
+		    slot >= max_entries)
+			btree->evict_walk_period = 0;
+		else
+			btree->evict_walk_period = WT_MIN(
+			    WT_MAX(1, 2 * btree->evict_walk_period), 1000);
+
 		if (ret != 0 || slot >= max_entries)
 			break;
 	}
@@ -843,15 +887,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
 	WT_DECL_RET;
 	WT_EVICT_ENTRY *end, *evict, *start;
 	WT_PAGE *page;
-	int modified, restarts, levels;
+	int internal_pages, levels, modified, restarts;
 	uint32_t walk_flags;
+	uint64_t pages_walked;
 
 	btree = S2BT(session);
 	cache = S2C(session)->cache;
 	start = cache->evict + *slotp;
-	end = start + WT_EVICT_WALK_PER_FILE;
-	if (end > cache->evict + cache->evict_slots)
-		end = cache->evict + cache->evict_slots;
+	end = WT_MIN(start + WT_EVICT_WALK_PER_FILE,
+	    cache->evict + cache->evict_slots);
 
 	WT_ASSERT(session, btree->evict_page == NULL ||
 	    WT_PAGE_IS_ROOT(btree->evict_page) ||
@@ -860,20 +904,21 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
 	walk_flags = WT_TREE_EVICT;
 	if (LF_ISSET(WT_EVICT_PASS_INTERNAL))
 		walk_flags |= WT_TREE_SKIP_LEAF;
+
 	/*
 	 * Get some more eviction candidate pages.
 	 */
-	for (evict = start, restarts = 0;
+	for (evict = start, pages_walked = 0, internal_pages = restarts = 0;
 	    evict < end && (ret == 0 || ret == WT_NOTFOUND);
-	    ret = __wt_tree_walk(session, &btree->evict_page, walk_flags)) {
+	    ret = __wt_tree_walk(session, &btree->evict_page, walk_flags),
+	    ++pages_walked) {
 		if ((page = btree->evict_page) == NULL) {
 			ret = 0;
 			/*
 			 * Take care with terminating this loop.
 			 *
 			 * Don't make an extra call to __wt_tree_walk: that
-			 * will leave a page in the WT_REF_EVICT_WALK state,
-			 * unable to be evicted, which may prevent any work
+			 * will leave a page pinned, which may prevent any work
 			 * from being done.
 			 */
 			if (++restarts == 2)
@@ -881,8 +926,6 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
 			continue;
 		}
 
-		WT_STAT_FAST_CONN_INCR(session, cache_eviction_walk);
-
 		/* Ignore root pages entirely. */
 		if (WT_PAGE_IS_ROOT(page))
 			continue;
@@ -940,6 +983,13 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
 		} else if (LF_ISSET(WT_EVICT_PASS_INTERNAL))
 			continue;
 
+		/* Limit internal pages to 50% unless we get aggressive. */
+		if ((page->type == WT_PAGE_COL_INT ||
+		    page->type == WT_PAGE_ROW_INT) &&
+		    ++internal_pages > WT_EVICT_WALK_PER_FILE / 2 &&
+		    !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE))
+			break;
+
 		/*
 		 * If this page has never been considered for eviction,
 		 * set its read generation to a little bit in the
@@ -975,6 +1025,15 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
 			continue;
 
 		/*
+		 * If the page is clean but has modifications that appear too
+		 * new to evict, skip it.
+		 */
+		if (!modified && page->modify != NULL &&
+		    !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE) &&
+		    !__wt_txn_visible_all(session, page->modify->rec_max_txn))
+			continue;
+
+		/*
 		 * If the oldest transaction hasn't changed since the
 		 * last time this page was written, it's unlikely that
 		 * we can make progress.  Similarly, if the most recent
@@ -988,7 +1047,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp, uint32_t flags)
 		 * since rolled back, or we can help get the checkpoint
 		 * completed sooner.
 		 */
-		if (modified && !F_ISSET(cache, WT_EVICT_STUCK) &&
+		if (modified && !LF_ISSET(WT_EVICT_PASS_AGGRESSIVE) &&
 		    (page->modify->disk_snap_min ==
 		    S2C(session)->txn_global.oldest_id ||
 		    !__wt_txn_visible_all(session,
@@ -1004,6 +1063,7 @@ add:		WT_ASSERT(session, evict->page == NULL);
 	}
 
 	*slotp += (u_int)(evict - start);
+	WT_STAT_FAST_CONN_INCRV(session, cache_eviction_walk, pages_walked);
 	return (ret);
 }
 
@@ -1160,7 +1220,8 @@ __wt_cache_dump(WT_SESSION_IMPL *session)
 	WT_CONNECTION_IMPL *conn;
 	WT_DATA_HANDLE *dhandle;
 	WT_PAGE *page;
-	uint64_t file_bytes, file_dirty, file_pages, total_bytes;
+	uint64_t file_intl_pages, file_leaf_pages;
+	uint64_t file_bytes, file_dirty, total_bytes;
 
 	conn = S2C(session);
 	total_bytes = 0;
@@ -1176,22 +1237,28 @@ __wt_cache_dump(WT_SESSION_IMPL *session)
 		    btree->bulk_load_ok)
 			continue;
 
-		file_bytes = file_dirty = file_pages = 0;
+		file_bytes = file_dirty = file_intl_pages = file_leaf_pages = 0;
 		page = NULL;
 		session->dhandle = dhandle;
 		while (__wt_tree_walk(session, &page, WT_TREE_CACHE) == 0 &&
 		    page != NULL) {
-			++file_pages;
+			if (page->type == WT_PAGE_COL_INT ||
+			    page->type == WT_PAGE_ROW_INT)
+				++file_intl_pages;
+			else
+				++file_leaf_pages;
 			file_bytes += page->memory_footprint;
 			if (__wt_page_is_modified(page))
 				file_dirty += page->memory_footprint;
 		}
 		session->dhandle = NULL;
 
-		printf("cache dump: %s [%s]: %"
-		    PRIu64 " pages, %" PRIu64 "MB, %" PRIu64 "MB dirty\n",
+		printf("cache dump: %s [%s]:"
+		    " %" PRIu64 " intl pages, %" PRIu64 " leaf pages,"
+		    " %" PRIu64 "MB, %" PRIu64 "MB dirty\n",
 		    dhandle->name, dhandle->checkpoint,
-		    file_pages, file_bytes >> 20, file_dirty >> 20);
+		    file_intl_pages, file_leaf_pages,
+		    file_bytes >> 20, file_dirty >> 20);
 
 		total_bytes += file_bytes;
 	}
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index a970b656cf5..f6cc4cc6fb3 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -11,7 +11,7 @@ static int __btree_conf(WT_SESSION_IMPL *, WT_CKPT *ckpt);
 static int __btree_get_last_recno(WT_SESSION_IMPL *);
 static int __btree_page_sizes(WT_SESSION_IMPL *);
 static int __btree_preload(WT_SESSION_IMPL *);
-static int __btree_tree_open_empty(WT_SESSION_IMPL *, int);
+static int __btree_tree_open_empty(WT_SESSION_IMPL *, int, int);
 
 static int pse1(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t);
 static int pse2(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t, int);
@@ -69,8 +69,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
 	if (!WT_PREFIX_SKIP(filename, "file:"))
 		WT_ERR_MSG(session, EINVAL, "expected a 'file:' URI");
 
-	WT_ERR(__wt_block_manager_open(session, filename,
-	    dhandle->cfg, forced_salvage, btree->allocsize, &btree->bm));
+	WT_ERR(__wt_block_manager_open(session, filename, dhandle->cfg,
+	    forced_salvage, readonly, btree->allocsize, &btree->bm));
 	bm = btree->bm;
 
 	/*
@@ -102,7 +102,8 @@ __wt_btree_open(WT_SESSION_IMPL *session, const char *op_cfg[])
 		    ckpt.raw.data, ckpt.raw.size,
 		    root_addr, &root_addr_size, readonly));
 		if (creation || root_addr_size == 0)
-			WT_ERR(__btree_tree_open_empty(session, creation));
+			WT_ERR(__btree_tree_open_empty(
+			    session, creation, readonly));
 		else {
 			WT_ERR(__wt_btree_tree_open(
 			    session, root_addr, root_addr_size));
@@ -355,7 +356,7 @@ err:		__wt_buf_free(session, &dsk);
  *	Create an empty in-memory tree.
  */
 static int
-__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
+__btree_tree_open_empty(WT_SESSION_IMPL *session, int creation, int readonly)
 {
 	WT_BTREE *btree;
 	WT_DECL_RET;
@@ -423,23 +424,31 @@ __btree_tree_open_empty(WT_SESSION_IMPL *session, int creation)
 	 * the root page dirty to force a write, and without reconciling the
 	 * leaf page we won't realize there's no records to write, we'll write
 	 * a root page, which isn't correct for an empty tree.
-	 *    Earlier versions of this code kept the leaf page clean, but with
-	 * the "empty" flag set in the leaf page's modification structure; in
-	 * that case, checkpoints works (forced reconciliation of a root with
-	 * a single "empty" page wouldn't write any blocks). That version had
+	 *
+	 * Earlier versions of this code kept the leaf page clean, but with the
+	 * "empty" flag set in the leaf page's modification structure; in that
+	 * case, checkpoints works (forced reconciliation of a root with a
+	 * single "empty" page wouldn't write any blocks). That version had
 	 * memory leaks because the eviction code didn't correctly handle pages
 	 * that were "clean" (and so never reconciled), yet "modified" with an
 	 * "empty" flag.  The goal of this code is to mimic a real tree that
 	 * simply has no records, for whatever reason, and trust reconciliation
 	 * to figure out it's empty and not write any blocks.
-	 *    We do not set the tree's modified flag because the checkpoint code
-	 * skips unmodified files in closing checkpoints (checkpoints that don't
-	 * require a write unless the file is actually dirty).  There's no need
-	 * to reconcile this file unless the application does a real checkpoint
-	 * or it's actually modified.
+	 *
+	 * We do not set the tree's modified flag because the checkpoint code
+	 * skips unmodified files in closing checkpoints (checkpoints that
+	 * don't require a write unless the file is actually dirty).  There's
+	 * no need to reconcile this file unless the application does a real
+	 * checkpoint or it's actually modified.
+	 *
+	 * Only do this for a live tree, not for checkpoints.  If we open an
+	 * empty checkpoint, the leaf page cannot be dirty or eviction may try
+	 * to write it, which will fail because checkpoints are read-only.
 	 */
-	WT_ERR(__wt_page_modify_init(session, leaf));
-	__wt_page_only_modify_set(session, leaf);
+	if (!readonly) {
+		WT_ERR(__wt_page_modify_init(session, leaf));
+		__wt_page_only_modify_set(session, leaf);
+	}
 
 	btree->root_page = root;
 
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index cfc7137b13a..21298ac4722 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -93,21 +93,17 @@ __wt_page_in_func(
 			}
 
 			/*
-			 * If this page has ever been considered for eviction,
-			 * and its generation is aging, update it.
-			 */
-			if (page->read_gen != WT_READ_GEN_NOTSET &&
-			    page->read_gen < __wt_cache_read_gen(session))
-				page->read_gen =
-				    __wt_cache_read_gen_set(session);
-
-			/*
 			 * If we read the page and we are configured to not
 			 * trash the cache, set the oldest read generation so
 			 * the page is forcibly evicted as soon as possible.
+			 *
+			 * Otherwise, update the page's read generation.
 			 */
 			if (oldgen && page->read_gen == WT_READ_GEN_NOTSET)
 				page->read_gen = WT_READ_GEN_OLDEST;
+			else if (page->read_gen < __wt_cache_read_gen(session))
+				page->read_gen =
+				    __wt_cache_read_gen_set(session);
 
 			return (0);
 		WT_ILLEGAL_VALUE(session);
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index 0713989af58..ce6c04d1283 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -498,7 +498,7 @@ ckpt:		WT_STAT_FAST_CONN_INCR(session, cache_eviction_checkpoint);
 	 * cache.
 	 */
 	if (!exclusive && mod != NULL &&
-	    !__wt_txn_visible_all(session, mod->disk_txn))
+	    !__wt_txn_visible_all(session, mod->rec_max_txn))
 		return (EBUSY);
 
 	/*
diff --git a/src/btree/rec_write.c b/src/btree/rec_write.c
index 81a4ec7a025..dd237693465 100644
--- a/src/btree/rec_write.c
+++ b/src/btree/rec_write.c
@@ -668,26 +668,12 @@ static inline int
 __rec_txn_read(
     WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_UPDATE *upd, WT_UPDATE **updp)
 {
-	uint64_t txnid;
 	int skip, retried;
 
 	retried = 0;
-retry:	*updp = __wt_txn_read_skip(session, upd, &skip);
-	if (!skip) {
-		/*
-		 * Track the largest transaction ID written to disk for this
-		 * page.  We store this in the page at the end of
-		 * reconciliation if no updates are skipped.  It is used to
-		 * avoid evicting a clean page from memory with changes that
-		 * are required to satisfy a snapshot read.
-		 */
-		if (*updp != NULL) {
-			txnid = (*updp)->txnid;
-			if (TXNID_LT(r->max_txn, txnid))
-				r->max_txn = txnid;
-		}
+retry:	*updp = __wt_txn_read_skip(session, upd, &r->max_txn, &skip);
+	if (!skip)
 		return (0);
-	}
 
 	/*
 	 * If skipping this update will cause reconciliation to quit, update
@@ -4093,7 +4079,7 @@ err:			__wt_scr_free(&tkey);
 	 * cache's dirty statistics.
 	 */
 	if (!r->upd_skipped) {
-		mod->disk_txn = r->max_txn;
+		mod->rec_max_txn = r->max_txn;
 
 		if (WT_ATOMIC_CAS(mod->write_gen, r->orig_write_gen, 0))
 			__wt_cache_dirty_decr(session, page);
diff --git a/src/config/config.c b/src/config/config.c
index c268d4e053a..c0eb672015f 100644
--- a/src/config/config.c
+++ b/src/config/config.c
@@ -118,7 +118,7 @@ static const int8_t gostruct[256] = {
 	A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD,
 	A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_LOOP, A_BAD, A_QUP,
 	A_BAD, A_BAD, A_BAD, A_BAD, A_BAD, A_UP, A_DOWN, A_BAD, A_BAD,
-	A_NEXT, A_NUMBARE, A_BAD, A_BARE, A_NUMBARE, A_NUMBARE,
+	A_NEXT, A_NUMBARE, A_BARE, A_BARE, A_NUMBARE, A_NUMBARE,
 	A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE, A_NUMBARE,
 	A_NUMBARE, A_NUMBARE, A_NUMBARE, A_VALUE, A_BAD, A_BAD,
 	A_VALUE, A_BAD, A_BAD, A_BAD, A_BARE, A_BARE, A_BARE, A_BARE,
@@ -738,10 +738,8 @@ __wt_config_subgetraw(WT_SESSION_IMPL *session,
 __wt_config_subgets(WT_SESSION_IMPL *session,
     WT_CONFIG_ITEM *cfg, const char *key, WT_CONFIG_ITEM *value)
 {
-	WT_CONFIG_ITEM key_item;
-
-	key_item.str = key;
-	key_item.len = strlen(key);
+	WT_CONFIG_ITEM key_item =
+	    { key, strlen(key), 0, WT_CONFIG_ITEM_STRING };
 
 	return (__wt_config_subgetraw(session, cfg, &key_item, value));
 }
diff --git a/src/config/config_api.c b/src/config/config_api.c
new file mode 100644
index 00000000000..42f4c117b81
--- /dev/null
+++ b/src/config/config_api.c
@@ -0,0 +1,105 @@
+/*-
+ * Copyright (c) 2008-2014 WiredTiger, Inc.
+ *	All rights reserved.
+ *
+ * See the file LICENSE for redistribution information.
+ */
+
+#include "wt_internal.h"
+
+/*
+ * __config_parser_close --
+ *      WT_CONFIG_PARSER->close method.
+ */
+static int
+__config_parser_close(WT_CONFIG_PARSER *wt_config_parser)
+{
+	WT_CONFIG_PARSER_IMPL *config_parser;
+
+	config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+
+	if (config_parser == NULL)
+		return (EINVAL);
+
+	__wt_free(config_parser->session, config_parser);
+	return (0);
+}
+
+/*
+ * __config_parser_get --
+ *      WT_CONFIG_PARSER->search method.
+ */
+static int
+__config_parser_get(WT_CONFIG_PARSER *wt_config_parser,
+     const char *key, WT_CONFIG_ITEM *cval)
+{
+	WT_CONFIG_PARSER_IMPL *config_parser;
+
+	config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+
+	if (config_parser == NULL)
+		return (EINVAL);
+
+	return (__wt_config_subgets(config_parser->session,
+	    &config_parser->config_item, key, cval));
+}
+
+/*
+ * __config_parser_next --
+ *	WT_CONFIG_PARSER->next method.
+ */
+static int
+__config_parser_next(WT_CONFIG_PARSER *wt_config_parser,
+     WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *cval)
+{
+	WT_CONFIG_PARSER_IMPL *config_parser;
+
+	config_parser = (WT_CONFIG_PARSER_IMPL *)wt_config_parser;
+
+	if (config_parser == NULL)
+		return (EINVAL);
+
+	return (__wt_config_next(&config_parser->config, key, cval));
+}
+
+/*
+ * wiredtiger_config_parser_open --
+ *	Create a configuration parser.
+ */
+int
+wiredtiger_config_parser_open(WT_SESSION *wt_session,
+    const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+{
+	static const WT_CONFIG_PARSER stds = {
+		__config_parser_close,
+		__config_parser_next,
+		__config_parser_get
+	};
+	WT_CONFIG_ITEM config_item =
+	    { config, len, 0, WT_CONFIG_ITEM_STRING };
+	WT_CONFIG_PARSER_IMPL *config_parser;
+	WT_DECL_RET;
+	WT_SESSION_IMPL *session;
+
+	*config_parserp = NULL;
+	session = (WT_SESSION_IMPL *)wt_session;
+
+	WT_RET(__wt_calloc_def(session, 1, &config_parser));
+	config_parser->iface = stds;
+	config_parser->session = session;
+
+	/*
+	 * Setup a WT_CONFIG_ITEM to be used for get calls and a WT_CONFIG
+	 * structure for iterations through the configuration string.
+	 */
+	memcpy(&config_parser->config_item, &config_item, sizeof(config_item));
+	WT_ERR(__wt_config_initn(
+	    session, &config_parser->config, config, len));
+
+	if (ret == 0)
+		*config_parserp = (WT_CONFIG_PARSER *)config_parser;
+	else
+err:		__wt_free(session, config_parser);
+
+	return (ret);
+}
diff --git a/src/config/config_def.c b/src/config/config_def.c
index abe6713696c..2c01cac1a85 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -250,7 +250,9 @@ static const WT_CONFIG_CHECK confchk_wiredtiger_open[] = {
 	    ,
 	{ "checkpoint_sync", "boolean", NULL, NULL},
 	{ "create", "boolean", NULL, NULL},
-	{ "direct_io", "list", "choices=[\"data\",\"log\"]", NULL},
+	{ "direct_io", "list",
+	    "choices=[\"checkpoint\",\"data\",\"log\"]",
+	    NULL},
 	{ "error_prefix", "string", NULL, NULL},
 	{ "eviction_dirty_target", "int", "min=10,max=99", NULL},
 	{ "eviction_target", "int", "min=10,max=99", NULL},
diff --git a/src/config/config_ext.c b/src/config/config_ext.c
index 7dd5445cc3c..26b3799d61c 100644
--- a/src/config/config_ext.c
+++ b/src/config/config_ext.c
@@ -8,6 +8,19 @@
 #include "wt_internal.h"
 
 /*
+ * __wt_ext_config_parser_open --
+ *	WT_EXTENSION_API->config_parser_open implementation
+ */
+int
+__wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext, WT_SESSION *wt_session,
+    const char *config, size_t len, WT_CONFIG_PARSER **config_parserp)
+{
+	WT_UNUSED(wt_ext);
+	return (wiredtiger_config_parser_open(
+	    wt_session, config, len, config_parserp));
+}
+
+/*
  * __wt_ext_config_get --
  *	Given a NULL-terminated list of configuration strings, find the final
  * value for a given string key (external API version).
@@ -29,81 +42,3 @@ __wt_ext_config_get(WT_EXTENSION_API *wt_api,
 		return (WT_NOTFOUND);
 	return (__wt_config_gets(session, cfg, key, cval));
 }
-
-/*
- * __wt_ext_config_strget --
- *	Given a single configuration string, find the final value for a given
- * string key (external API version).
- */
-int
-__wt_ext_config_strget(WT_EXTENSION_API *wt_api,
-    WT_SESSION *wt_session, const char *config, const char *key,
-    WT_CONFIG_ITEM *cval)
-{
-	const char *cfg_arg[] = { config, NULL };
-
-	return (__wt_ext_config_get(
-	    wt_api, wt_session, (WT_CONFIG_ARG *)cfg_arg, key, cval));
-}
-
-/*
- * __wt_ext_config_scan_begin --
- *	Start a scan of a config string.
- * (external API only).
- */
-int
-__wt_ext_config_scan_begin(
-    WT_EXTENSION_API *wt_api, WT_SESSION *wt_session,
-    const char *str, size_t len, WT_CONFIG_SCAN **scanp)
-{
-	WT_CONFIG config, *scan;
-	WT_CONNECTION_IMPL *conn;
-	WT_SESSION_IMPL *session;
-
-	conn = (WT_CONNECTION_IMPL *)wt_api->conn;
-	if ((session = (WT_SESSION_IMPL *)wt_session) == NULL)
-		session = conn->default_session;
-
-	/* Note: allocate memory last to avoid cleanup. */
-	WT_CLEAR(config);
-	WT_RET(__wt_config_initn(session, &config, str, len));
-	WT_RET(__wt_calloc_def(session, 1, &scan));
-	*scan = config;
-	*scanp = (WT_CONFIG_SCAN *)scan;
-	return (0);
-}
-
-/*
- * __wt_ext_config_scan_end --
- *	End a scan of a config string.
- * (external API only).
- */
-int
-__wt_ext_config_scan_end(WT_EXTENSION_API *wt_api, WT_CONFIG_SCAN *scan)
-{
-	WT_CONFIG *conf;
-
-	WT_UNUSED(wt_api);
-
-	conf = (WT_CONFIG *)scan;
-	__wt_free(conf->session, scan);
-	return (0);
-}
-
-/*
- * __wt_ext_config_scan_next --
- *	Get the next key/value pair from a config scan.
- * (external API only).
- */
-int
-__wt_ext_config_scan_next(
-    WT_EXTENSION_API *wt_api, WT_CONFIG_SCAN *scan,
-    WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value)
-{
-	WT_CONFIG *conf;
-
-	WT_UNUSED(wt_api);
-
-	conf = (WT_CONFIG *)scan;
-	return (__wt_config_next(conf, key, value));
-}
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 780ae6f6be5..b1992793827 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -105,11 +105,8 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
 	conn->extension_api.scr_free = __wt_ext_scr_free;
 	conn->extension_api.collator_config = ext_collator_config;
 	conn->extension_api.collate = ext_collate;
+	conn->extension_api.config_parser_open = __wt_ext_config_parser_open;
 	conn->extension_api.config_get = __wt_ext_config_get;
-	conn->extension_api.config_strget = __wt_ext_config_strget;
-	conn->extension_api.config_scan_begin = __wt_ext_config_scan_begin;
-	conn->extension_api.config_scan_end = __wt_ext_config_scan_end;
-	conn->extension_api.config_scan_next = __wt_ext_config_scan_next;
 	conn->extension_api.metadata_insert = __wt_ext_metadata_insert;
 	conn->extension_api.metadata_remove = __wt_ext_metadata_remove;
 	conn->extension_api.metadata_search = __wt_ext_metadata_search;
@@ -123,6 +120,7 @@ __conn_get_extension_api(WT_CONNECTION *wt_conn)
 	conn->extension_api.transaction_notify = __wt_ext_transaction_notify;
 	conn->extension_api.transaction_oldest = __wt_ext_transaction_oldest;
 	conn->extension_api.transaction_visible = __wt_ext_transaction_visible;
+	conn->extension_api.version = wiredtiger_version;
 
 	return (&conn->extension_api);
 }
@@ -498,6 +496,20 @@ __conn_close(WT_CONNECTION *wt_conn, const char *config)
 	CONNECTION_API_CALL(conn, session, close, config, cfg);
 	WT_UNUSED(cfg);
 
+	/*
+	 * Rollback all running transactions.
+	 * We do this as a separate pass because an active transaction in one
+	 * session could cause trouble when closing a file, even if that
+	 * session never referenced that file.
+	 */
+	for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
+		if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL) &&
+		    F_ISSET(&s->txn, TXN_RUNNING)) {
+			wt_session = &s->iface;
+			WT_TRET(wt_session->rollback_transaction(
+			    wt_session, NULL));
+		}
+
 	/* Close open, external sessions. */
 	for (s = conn->sessions, i = 0; i < conn->session_cnt; ++s, ++i)
 		if (s->active && !F_ISSET(s, WT_SESSION_INTERNAL)) {
@@ -1012,6 +1024,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
 		const char *name;
 		uint32_t flag;
 	} *ft, file_types[] = {
+		{ "checkpoint",	WT_FILE_TYPE_CHECKPOINT },
 		{ "data",	WT_FILE_TYPE_DATA },
 		{ "log",	WT_FILE_TYPE_LOG },
 		{ NULL, 0 }
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index 55faeefbde0..34b6a51570b 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -464,8 +464,10 @@ __cache_pool_adjust(uint64_t highest, uint64_t bump_threshold)
 			 * it.
 			 */
 			grew = 0;
-			adjusted = (cp->chunk > entry->cache_size - reserved) ?
-			    cp->chunk : (entry->cache_size - reserved);
+			if (entry->cache_size - cp->chunk > reserved)
+				adjusted = cp->chunk;
+			else
+				adjusted = entry->cache_size - reserved;
 		} else if (highest > 1 &&
 		    entry->cache_size < cp->size &&
 		     cache->bytes_inmem >=
diff --git a/src/docs/Doxyfile b/src/docs/Doxyfile
index 5f7e5016892..d8f753b269b 100644
--- a/src/docs/Doxyfile
+++ b/src/docs/Doxyfile
@@ -1576,6 +1576,7 @@ PREDEFINED             = DOXYGEN \
                          __wt_compressor:=WT_COMPRESSOR \
                          __wt_config_arg:=WT_CONFIG_ARG \
                          __wt_config_item:=WT_CONFIG_ITEM \
+                         __wt_config_parser:=WT_CONFIG_PARSER \
                          __wt_config_scan:=WT_CONFIG_SCAN \
                          __wt_connection:=WT_CONNECTION \
                          __wt_cursor:=WT_CURSOR \
diff --git a/src/docs/command-line.dox b/src/docs/command-line.dox
index 52be68a1074..04daa4050cd 100644
--- a/src/docs/command-line.dox
+++ b/src/docs/command-line.dox
@@ -291,9 +291,6 @@ engine, or, if specified, for the URI on the command-line.
 <code>wt [-Vv] [-C config] [-h directory] stat [-a] [uri]</code>
 
 @subsection util_stat_options Options
-The \c stat command has no command-specific options.
-
-@subsection util_stat_options Options
 The following are command-specific options for the \c stat command:
 
 @par <code>-a</code>
diff --git a/src/docs/config-strings.dox b/src/docs/config-strings.dox
index 542e86f620d..295316fe039 100644
--- a/src/docs/config-strings.dox
+++ b/src/docs/config-strings.dox
@@ -16,7 +16,7 @@ specified directly.  More precisely, keys or values that match this regular
 expression do not require quotes:
 
 <pre>
-    [-_0-9A-Za-z/][^\\t\\r\\n :=,\\])}]*
+    [-_0-9A-Za-z./][^\\t\\r\\n :=,\\])}]*
 </pre>
 
 More complex keys and values can be specified by quoting them with double
diff --git a/src/docs/custom_data.dox b/src/docs/custom_data.dox
index b1ee8dfbd9e..22dd75dcc26 100644
--- a/src/docs/custom_data.dox
+++ b/src/docs/custom_data.dox
@@ -165,11 +165,6 @@ of a configuration string as follows:
 
 @snippet ex_data_source.c WT_EXTENSION config_get
 
-The WT_DATA_SOURCE::open_cursor method might retrieve the list value
-of a configuration string as follows:
-
-@snippet ex_data_source.c WT_EXTENSION config scan
-
 @subsection custom_ds_config_add Creating data-specific configuration strings
 
 Applications can add their own configuration strings to WiredTiger
diff --git a/src/docs/helium.dox b/src/docs/helium.dox
index cd6b47fb968..35e3886a8d6 100644
--- a/src/docs/helium.dox
+++ b/src/docs/helium.dox
@@ -103,7 +103,7 @@ WT_SESSION *session;
 
 /* Create and truncate the access table. */
 ret = session->create(session, "table:dev1/access",
-    "key_format=S,value_format=S,type=helium,helium_open_o_truncate=1");
+    "key_format=S,value_format=S,type=helium,helium_o_truncate=1");
 @endcode
 
 @section helium_notes Helium notes
diff --git a/src/docs/spell.ok b/src/docs/spell.ok
index 6d24c474e19..58701aaa5bf 100644
--- a/src/docs/spell.ok
+++ b/src/docs/spell.ok
@@ -313,6 +313,7 @@ statlog
 str
 strerror
 strftime
+strget
 struct
 structs
 subdatabases
diff --git a/src/docs/top/main.dox b/src/docs/top/main.dox
index 5481d2deae5..f36f1887a73 100644
--- a/src/docs/top/main.dox
+++ b/src/docs/top/main.dox
@@ -6,9 +6,9 @@ WiredTiger is an high performance, scalable, production quality, NoSQL,
 @section releases Releases
 
 <table>
-@row{<b>WiredTiger 2.1.0</b> (current),
-	<a href="releases/wiredtiger-2.1.0.tar.bz2"><b>[Release package]</b></a>,
-	<a href="2.1.0/index.html"><b>[Documentation]</b></a>}
+@row{<b>WiredTiger 2.1.1</b> (current),
+	<a href="releases/wiredtiger-2.1.1.tar.bz2"><b>[Release package]</b></a>,
+	<a href="2.1.1/index.html"><b>[Documentation]</b></a>}
 @row{<b>WiredTiger 1.6.6</b> (previous),
 	<a href="releases/wiredtiger-1.6.6.tar.bz2"><b>[Release package]</b></a>,
 	<a href="1.6.6/index.html"><b>[Documentation]</b></a>}
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index e59b031a1ff..3cb0b96b0ef 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -1,5 +1,20 @@
 /*! @page upgrading Upgrading WiredTiger applications
 
+@section version_211 Upgrading to Version 2.1.1
+<dl>
+
+<dt>WT_EXTENSION_API::config methods</dt>
+<dd>
+In the 2.1.1 release of WiredTiger the configuration string parsing API
+has been changed and added to a new public handle. The
+WT_EXTENSION_API::config_strget, WT_EXTENSION_API::config_scan_begin,
+WT_EXTENSION_API::config_scan_next and WT_EXTENSION_API::config_scan_end
+have been removed. They have been replaced by a
+WT_EXTENSION_API::config_parser_open method, which can be used to parse
+configuration strings. See the WT_CONFIG_PARSER documentation for
+examples on how to use the updated API.
+</dd>
+
 @section version_21 Upgrading to Version 2.1
 <dl>
 
diff --git a/src/include/btmem.h b/src/include/btmem.h
index 7f0bf280d5c..42d7ecfa9e2 100644
--- a/src/include/btmem.h
+++ b/src/include/btmem.h
@@ -197,8 +197,8 @@ struct __wt_page_modify {
 	 */
 	uint64_t disk_snap_min;
 
-	/* The largest transaction ID written to disk for the page. */
-	uint64_t disk_txn;
+	/* The largest transaction ID seen on the page by reconciliation. */
+	uint64_t rec_max_txn;
 
 	/* The largest update transaction ID (approximate). */
 	uint64_t update_txn;
@@ -345,22 +345,17 @@ struct __wt_page {
 	 * The read generation is a 64-bit value, if incremented frequently, a
 	 * 32-bit value could overflow.
 	 *
-	 * The read generation is a piece of shared memory potentially accessed
+	 * The read generation is a piece of shared memory potentially read
 	 * by many threads.  We don't want to update page read generations for
 	 * in-cache workloads and suffer the cache misses, so we don't simply
 	 * increment the read generation value on every access.  Instead, the
-	 * read generation is initialized to 0, then set to a real value if the
-	 * page is ever considered for eviction.  Once set to a real value, the
-	 * read generation is potentially incremented every time the page is
-	 * accessed.  To try and avoid incrementing the page at a fast rate in
-	 * this case, the read generation is incremented to a future point.
-	 *
-	 * The read generation is not declared volatile or published: the read
-	 * generation is set a lot, and we don't want to write it that much.
+	 * read generation is incremented by the eviction server each time it
+	 * becomes active.  To avoid incrementing a page's read generation too
+	 * frequently, it is set to a future point.
 	 */
 #define	WT_READ_GEN_NOTSET	0
 #define	WT_READ_GEN_OLDEST	1
-#define	WT_READ_GEN_STEP	1000
+#define	WT_READ_GEN_STEP	100
 	uint64_t read_gen;
 
 	uint64_t memory_footprint;	/* Memory attached to the page */
diff --git a/src/include/btree.h b/src/include/btree.h
index 84da462417a..b40fc8a7f80 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -112,7 +112,9 @@ struct __wt_btree {
 	uint64_t write_gen;		/* Write generation */
 
 	WT_PAGE *evict_page;		/* Eviction thread's location */
-	uint64_t evict_priority;	/* Relative priority of cached pages. */
+	uint64_t evict_priority;	/* Relative priority of cached pages */
+	u_int    evict_walk_period;	/* Skip this many LRU walks */
+	u_int    evict_walk_skips;	/* Number of walks skipped */
 	volatile uint32_t lru_count;	/* Count of threads in LRU eviction */
 
 	volatile int checkpointing;	/* Checkpoint in progress */
diff --git a/src/include/btree.i b/src/include/btree.i
index f09d05178ab..94f8133d187 100644
--- a/src/include/btree.i
+++ b/src/include/btree.i
@@ -159,6 +159,12 @@ __wt_cache_read_gen(WT_SESSION_IMPL *session)
 	return (S2C(session)->cache->read_gen);
 }
 
+static inline void
+__wt_cache_read_gen_incr(WT_SESSION_IMPL *session)
+{
+	++S2C(session)->cache->read_gen;
+}
+
 static inline uint64_t
 __wt_cache_read_gen_set(WT_SESSION_IMPL *session)
 {
@@ -171,7 +177,7 @@ __wt_cache_read_gen_set(WT_SESSION_IMPL *session)
 	 * page.  In other words, the goal is to avoid some number of updates
 	 * immediately after each update we have to make.
 	 */
-	return (++S2C(session)->cache->read_gen + WT_READ_GEN_STEP);
+	return (__wt_cache_read_gen(session) + WT_READ_GEN_STEP);
 }
 
 /*
@@ -563,6 +569,30 @@ __wt_eviction_force_check(WT_SESSION_IMPL *session, WT_PAGE *page)
 }
 
 /*
+ * __wt_eviction_force_txn_check --
+ *	Check if the current transaction permits forced eviction of a page.
+ */
+static inline int
+__wt_eviction_force_txn_check(WT_SESSION_IMPL *session, WT_PAGE *page)
+{
+	WT_TXN_STATE *txn_state;
+
+	/*
+	 * Only try if there is a chance of success.  If the page has already
+	 * been split and this transaction is already pinning the oldest ID so
+	 * that the page can't be evicted, it has to complete before eviction
+	 * can succeed.
+	 */
+	txn_state = &S2C(session)->txn_global.states[session->id];
+	if (!F_ISSET_ATOMIC(page, WT_PAGE_WAS_SPLIT) ||
+	    txn_state->snap_min == WT_TXN_NONE ||
+	    TXNID_LT(page->modify->update_txn, txn_state->snap_min))
+		return (1);
+
+	return (0);
+}
+
+/*
  * __wt_page_release --
  *	Release a reference to a page.
  */
@@ -582,10 +612,8 @@ __wt_page_release(WT_SESSION_IMPL *session, WT_PAGE *page)
 	 * Try to immediately evict pages if they have the special "oldest"
 	 * read generation and we have some chance of succeeding.
 	 */
-	if (!WT_TXN_ACTIVE(&session->txn) &&
-	    (page->modify == NULL ||
-	    !F_ISSET(page->modify, WT_PM_REC_SPLIT_MERGE)) &&
-	    page->read_gen == WT_READ_GEN_OLDEST &&
+	if (page->read_gen == WT_READ_GEN_OLDEST &&
+	    __wt_eviction_force_txn_check(session, page) &&
 	    WT_ATOMIC_CAS(page->ref->state, WT_REF_MEM, WT_REF_LOCKED)) {
 		if ((ret = __wt_hazard_clear(session, page)) != 0) {
 			page->ref->state = WT_REF_MEM;
@@ -678,30 +706,6 @@ __wt_page_hazard_check(WT_SESSION_IMPL *session, WT_PAGE *page)
 
 /*
  * __wt_eviction_force --
- *	Check if the current transaction permits forced eviction of a page.
- */
-static inline int
-__wt_eviction_force_txn_check(WT_SESSION_IMPL *session, WT_PAGE *page)
-{
-	WT_TXN_STATE *txn_state;
-
-	/*
-	 * Only try if there is a chance of success.  If the page has already
-	 * been split and this transaction is already pinning the oldest ID so
-	 * that the page can't be evicted, it has to complete before eviction
-	 * can succeed.
-	 */
-	txn_state = &S2C(session)->txn_global.states[session->id];
-	if (!F_ISSET_ATOMIC(page, WT_PAGE_WAS_SPLIT) ||
-	    txn_state->snap_min == WT_TXN_NONE ||
-	    TXNID_LT(page->modify->update_txn, txn_state->snap_min))
-		return (1);
-
-	return (0);
-}
-
-/*
- * __wt_eviction_force --
  *	Forcefully evict a page, if possible.
  */
 static inline int
diff --git a/src/include/cache.h b/src/include/cache.h
index 055041b7e6c..717191d19dd 100644
--- a/src/include/cache.h
+++ b/src/include/cache.h
@@ -9,16 +9,17 @@
  * Tuning constants: I hesitate to call this tuning, but we want to review some
  * number of pages from each file's in-memory tree for each page we evict.
  */
-#define	WT_EVICT_INT_SKEW  (1<<12)	/* Prefer leaf pages over internal
+#define	WT_EVICT_INT_SKEW  (1<<20)	/* Prefer leaf pages over internal
 					   pages by this many increments of the
 					   read generation. */
 #define	WT_EVICT_WALK_PER_FILE	10	/* Pages to visit per file */
 #define	WT_EVICT_WALK_BASE     300	/* Pages tracked across file visits */
 #define	WT_EVICT_WALK_INCR     100	/* Pages added each walk */
 
-#define	WT_EVICT_PASS_ALL	0x01
-#define	WT_EVICT_PASS_DIRTY	0x02
-#define	WT_EVICT_PASS_INTERNAL	0x04
+#define	WT_EVICT_PASS_AGGRESSIVE	0x01
+#define	WT_EVICT_PASS_ALL		0x02
+#define	WT_EVICT_PASS_DIRTY		0x04
+#define	WT_EVICT_PASS_INTERNAL		0x08
 
 /*
  * WT_EVICT_ENTRY --
diff --git a/src/include/config.h b/src/include/config.h
index d8837f0f368..c83d96c8a5e 100644
--- a/src/include/config.h
+++ b/src/include/config.h
@@ -33,6 +33,14 @@ struct __wt_config_entry {
 	const WT_CONFIG_CHECK *checks;		/* check array */
 };
 
+struct __wt_config_parser_impl {
+	WT_CONFIG_PARSER iface;
+
+	WT_SESSION_IMPL *session;
+	WT_CONFIG config;
+	WT_CONFIG_ITEM config_item;
+};
+
 /*
  * DO NOT EDIT: automatically built by dist/api_config.py.
  * configuration section: BEGIN
diff --git a/src/include/extern.h b/src/include/extern.h
index d1662717345..ba026253a5b 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -126,6 +126,7 @@ extern int __wt_block_manager_open(WT_SESSION_IMPL *session,
     const char *filename,
     const char *cfg[],
     int forced_salvage,
+    int readonly,
     uint32_t allocsize,
     WT_BM **bmp);
 extern int __wt_block_manager_truncate( WT_SESSION_IMPL *session,
@@ -138,6 +139,7 @@ extern int __wt_block_open(WT_SESSION_IMPL *session,
     const char *filename,
     const char *cfg[],
     int forced_salvage,
+    int readonly,
     uint32_t allocsize,
     WT_BLOCK **blockp);
 extern int __wt_block_close(WT_SESSION_IMPL *session, WT_BLOCK *block);
@@ -529,27 +531,16 @@ extern int __wt_config_concat( WT_SESSION_IMPL *session,
     const char **config_ret);
 extern int __wt_conn_config_init(WT_SESSION_IMPL *session);
 extern void __wt_conn_config_discard(WT_SESSION_IMPL *session);
+extern int __wt_ext_config_parser_open(WT_EXTENSION_API *wt_ext,
+    WT_SESSION *wt_session,
+    const char *config,
+    size_t len,
+    WT_CONFIG_PARSER **config_parserp);
 extern int __wt_ext_config_get(WT_EXTENSION_API *wt_api,
     WT_SESSION *wt_session,
     WT_CONFIG_ARG *cfg_arg,
     const char *key,
     WT_CONFIG_ITEM *cval);
-extern int __wt_ext_config_strget(WT_EXTENSION_API *wt_api,
-    WT_SESSION *wt_session,
-    const char *config,
-    const char *key,
-    WT_CONFIG_ITEM *cval);
-extern int __wt_ext_config_scan_begin( WT_EXTENSION_API *wt_api,
-    WT_SESSION *wt_session,
-    const char *str,
-    size_t len,
-    WT_CONFIG_SCAN **scanp);
-extern int __wt_ext_config_scan_end(WT_EXTENSION_API *wt_api,
-    WT_CONFIG_SCAN *scan);
-extern int __wt_ext_config_scan_next( WT_EXTENSION_API *wt_api,
-    WT_CONFIG_SCAN *scan,
-    WT_CONFIG_ITEM *key,
-    WT_CONFIG_ITEM *value);
 extern int __wt_collator_config( WT_SESSION_IMPL *session,
     const char **cfg,
     WT_COLLATOR **collatorp);
diff --git a/src/include/flags.h b/src/include/flags.h
index 31eed83e351..89f2450f3af 100644
--- a/src/include/flags.h
+++ b/src/include/flags.h
@@ -10,6 +10,7 @@
 #define	WT_CONN_PANIC					0x00000002
 #define	WT_CONN_SERVER_RUN				0x00000001
 #define	WT_EVICTION_SERVER_LOCKED			0x00000004
+#define	WT_FILE_TYPE_CHECKPOINT				0x00000004
 #define	WT_FILE_TYPE_DATA				0x00000002
 #define	WT_FILE_TYPE_LOG				0x00000001
 #define	WT_LOGSCAN_FIRST				0x00000008
diff --git a/src/include/lsm.h b/src/include/lsm.h
index 4d3b14c5f74..b5c3859605c 100644
--- a/src/include/lsm.h
+++ b/src/include/lsm.h
@@ -30,6 +30,8 @@ struct __wt_cursor_lsm {
 	uint64_t *txnid_max;		/* Maximum txn for each chunk */
 	size_t txnid_alloc;
 
+	u_int update_count;		/* Updates performed. */
+
 #define	WT_CLSM_ACTIVE		0x01    /* Incremented the session count */
 #define	WT_CLSM_ITERATE_NEXT    0x02    /* Forward iteration */
 #define	WT_CLSM_ITERATE_PREV    0x04    /* Backward iteration */
diff --git a/src/include/stat.h b/src/include/stat.h
index 6717b4d081f..ea2a4068f96 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -182,6 +182,8 @@ struct __wt_connection_stats {
 	WT_STATS log_slot_transitions;
 	WT_STATS log_sync;
 	WT_STATS log_writes;
+	WT_STATS lsm_checkpoint_throttle;
+	WT_STATS lsm_merge_throttle;
 	WT_STATS lsm_rows_merged;
 	WT_STATS memory_allocation;
 	WT_STATS memory_free;
@@ -275,9 +277,11 @@ struct __wt_dsrc_stats {
 	WT_STATS cursor_search_near;
 	WT_STATS cursor_update;
 	WT_STATS cursor_update_bytes;
+	WT_STATS lsm_checkpoint_throttle;
 	WT_STATS lsm_chunk_count;
 	WT_STATS lsm_generation_max;
 	WT_STATS lsm_lookup_no_bloom;
+	WT_STATS lsm_merge_throttle;
 	WT_STATS rec_dictionary;
 	WT_STATS rec_overflow_key_internal;
 	WT_STATS rec_overflow_key_leaf;
diff --git a/src/include/txn.h b/src/include/txn.h
index e71b693928b..17e2c0c632e 100644
--- a/src/include/txn.h
+++ b/src/include/txn.h
@@ -131,6 +131,3 @@ struct __wt_txn {
 #define	TXN_RUNNING	0x08
 	uint32_t flags;
 };
-
-#define	WT_TXN_ACTIVE(txn)						\
-	(F_ISSET((txn), TXN_RUNNING) && (txn)->mod_count > 0)
diff --git a/src/include/txn.i b/src/include/txn.i
index cdfe697ee51..2543d5ff21f 100644
--- a/src/include/txn.i
+++ b/src/include/txn.i
@@ -166,20 +166,42 @@ __wt_txn_visible(WT_SESSION_IMPL *session, uint64_t id)
 
 /*
  * __wt_txn_read_skip --
- *	Get the first visible update in a list (or NULL if none are visible),
- *	and report whether uncommitted changes were skipped.
+ *	Get the first visible update in a list (or NULL if none are visible).
+ *	Report the maximum transaction ID in the list and whether any updates
+ *	were skipped to find the visible update.
  */
 static inline WT_UPDATE *
-__wt_txn_read_skip(WT_SESSION_IMPL *session, WT_UPDATE *upd, int *skipp)
+__wt_txn_read_skip(
+    WT_SESSION_IMPL *session, WT_UPDATE *upd, uint64_t *max_txn, int *skipp)
 {
+	WT_UPDATE *first_upd;
+
+	/*
+	 * Track the largest transaction ID on this page.  We store this in the
+	 * page at the end of reconciliation if no updates are skipped.  It is
+	 * used to avoid evicting a clean page from memory with changes that
+	 * are required to satisfy a snapshot read.
+	 *
+	 * Record whether any updates were skipped on the way to finding the
+	 * first visible update.  That determines whether a future read with no
+	 * intervening modifications to the page could see a different value.
+	 * If not, the page can safely be marked clean, and does not need to be
+	 * reconciled until it is modified again.
+	 */
 	*skipp = 0;
-	while (upd != NULL && !__wt_txn_visible(session, upd->txnid)) {
-		if (upd->txnid != WT_TXN_ABORTED)
-			*skipp = 1;
-		upd = upd->next;
-	}
+	for (first_upd = NULL; upd != NULL; upd = upd->next)
+		if (upd->txnid != WT_TXN_ABORTED) {
+			if (TXNID_LT(*max_txn, upd->txnid))
+				*max_txn = upd->txnid;
+			if (first_upd == NULL) {
+				if (__wt_txn_visible(session, upd->txnid))
+					first_upd = upd;
+				else
+					*skipp = 1;
+			}
+		}
 
-	return (upd);
+	return (first_upd);
 }
 
 /*
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 4c7ee4917e9..6334bca2061 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -71,6 +71,9 @@ extern "C" {
  *******************************************/
 struct __wt_collator;	    typedef struct __wt_collator WT_COLLATOR;
 struct __wt_compressor;	    typedef struct __wt_compressor WT_COMPRESSOR;
+struct __wt_config_item;    typedef struct __wt_config_item WT_CONFIG_ITEM;
+struct __wt_config_parser;
+	typedef struct __wt_config_parser WT_CONFIG_PARSER;
 struct __wt_connection;	    typedef struct __wt_connection WT_CONNECTION;
 struct __wt_cursor;	    typedef struct __wt_cursor WT_CURSOR;
 struct __wt_data_source;    typedef struct __wt_data_source WT_DATA_SOURCE;
@@ -1490,8 +1493,12 @@ struct __wt_connection {
  * @config{direct_io, Use \c O_DIRECT to access files.  Options are given as a
  * list\, such as <code>"direct_io=[data]"</code>. Configuring \c direct_io
  * requires care\, see @ref tuning_system_buffer_cache_direct_io for important
- * warnings., a list\, with values chosen from the following options: \c
- * "data"\, \c "log"; default empty.}
+ * warnings.  Including \c "data" will cause WiredTiger data files to use \c
+ * O_DIRECT\, including \c "log" will cause WiredTiger log files to use \c
+ * O_DIRECT\, and including \c "checkpoint" will cause WiredTiger data files
+ * opened at a checkpoint (i.e: read only) to use \c O_DIRECT., a list\, with
+ * values chosen from the following options: \c "checkpoint"\, \c "data"\, \c
+ * "log"; default empty.}
  * @config{error_prefix, prefix string for error messages., a string; default
  * empty.}
  * @config{eviction_dirty_target, continue evicting until the cache has less
@@ -1890,6 +1897,153 @@ int wiredtiger_unpack_str(WT_PACK_STREAM *ps, const char **sp);
  */
 int wiredtiger_unpack_uint(WT_PACK_STREAM *ps, uint64_t *up);
 
+/*!
+ * @name Configuration string parsing
+ * @{
+ */
+
+/*!
+ * The configuration information returned by the WiredTiger configuration
+ * parsing functions in the WT_EXTENSION_API and the public API.
+ */
+struct __wt_config_item {
+	/*!
+	 * The value of a configuration string.
+	 *
+	 * Regardless of the type of the configuration string (boolean, int,
+	 * list or string), the \c str field will reference the value of the
+	 * configuration string.
+	 *
+	 * The bytes referenced by \c str are <b>not</b> nul-terminated,
+	 * use the \c len field instead of a terminating nul byte.
+	 */
+	const char *str;
+
+	/*! The number of bytes in the value referenced by \c str. */
+	size_t len;
+
+	/*!
+	 * The value of a configuration boolean or integer.
+	 *
+	 * If the configuration string's value is "true" or "false", the
+	 * \c val field will be set to 1 (true), or 0 (false).
+	 *
+	 * If the configuration string can be legally interpreted as an integer,
+	 * using the strtoll function rules as specified in ISO/IEC 9899:1990
+	 * ("ISO C90"), that integer will be stored in the \c val field.
+	 */
+	int64_t val;
+
+	/*! Permitted values of the \c type field. */
+	enum {
+		/*! A string value with quotes stripped. */
+		WT_CONFIG_ITEM_STRING,
+		/*! A boolean literal ("true" or "false"). */
+		WT_CONFIG_ITEM_BOOL,
+		/*! An unquoted identifier: a string value without quotes. */
+		WT_CONFIG_ITEM_ID,
+		/*! A numeric value. */
+		WT_CONFIG_ITEM_NUM,
+		/*! A nested structure or list, including brackets. */
+		WT_CONFIG_ITEM_STRUCT
+	}
+	/*!
+	 * The type of value determined by the parser.  In all cases,
+	 * the \c str and \c len fields are set.
+	 */
+	type;
+};
+
+/*!
+ * Create a handle that can be used to parse or create configuration strings
+ * compatible with WiredTiger APIs.
+ * This API is outside the scope of a WiredTiger connection handle, since
+ * applications may need to generate configuration strings prior to calling
+ * ::wiredtiger_open.
+ * @param session the session handle to be used for error reporting. If NULL
+ *        error messages will be written to stdout.
+ * @param config the configuration string being parsed. The string must
+ *        remain valid for the lifetime of the parser handle.
+ * @param len the number of valid bytes in \c config
+ * @param[out] config_parserp A pointer to the newly opened handle
+ * @errors
+ */
+int wiredtiger_config_parser_open(WT_SESSION *session,
+    const char *config, size_t len, WT_CONFIG_PARSER **config_parserp);
+
+/*!
+ * A handle that can be used to search and traverse configuration strings
+ * compatible with WiredTiger APIs.
+ * To parse the contents of a list or nested configuration string use a new
+ * configuration parser handle based on the content of the ::WT_CONFIG_ITEM
+ * retrieved from the parent configuration string.
+ *
+ * @section config_parse_examples Configuration String Parsing examples
+ *
+ * This could be used in C to create a configuration parser as follows:
+ *
+ * @snippet ex_config_parse.c Create a configuration parser
+ *
+ * Once the parser has been created the content can be queried directly:
+ *
+ * @snippet ex_config_parse.c get
+ *
+ * Or the content can be traversed linearly:
+ *
+ * @snippet ex_config_parse.c next
+ *
+ * Nested configuration values can be queried using a shorthand notation:
+ *
+ * @snippet ex_config_parse.c nested get
+ *
+ * Nested configuration values can be traversed using multiple
+ * ::WT_CONFIG_PARSER handles:
+ *
+ * @snippet ex_config_parse.c nested traverse
+ */
+struct __wt_config_parser {
+
+	/*!
+	 * Close the configuration scanner releasing any resources.
+	 *
+	 * @param config_parser the configuration parser handle
+	 * @errors
+	 *
+	 */
+	int __F(close)(WT_CONFIG_PARSER *config_parser);
+
+	/*!
+	 * Return the next key/value pair.
+	 *
+	 * When iteration would pass the end of the configuration string
+	 * ::WT_NOTFOUND will be returned.
+	 *
+	 * If an item has no explicitly assigned value, the item will be
+	 * returned in \c key and the \c value will be set to the boolean
+	 * \c "true" value.
+	 *
+	 * @param config_parser the configuration parser handle
+	 * @param key the returned key
+	 * @param value the returned value
+	 * @errors
+	 *
+	 */
+	int __F(next)(WT_CONFIG_PARSER *config_parser,
+	    WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value);
+
+	/*!
+	 * Return the value of an item in the configuration string.
+	 *
+	 * @param config_parser the configuration parser handle
+	 * @param key configuration key string
+	 * @param value the returned value
+	 * @errors
+	 *
+	 */
+	int __F(get)(WT_CONFIG_PARSER *config_parser,
+	    const char *key, WT_CONFIG_ITEM *value);
+};
+
 #endif /* !defined(SWIG) */
 /*!
  * @}
@@ -1985,9 +2139,7 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
 /*******************************************
  * Forward structure declarations for the extension API
  *******************************************/
-struct __wt_config_arg;	    typedef struct __wt_config_arg WT_CONFIG_ARG;
-struct __wt_config_item;    typedef struct __wt_config_item WT_CONFIG_ITEM;
-struct __wt_config_scan;    typedef struct __wt_config_scan WT_CONFIG_SCAN;
+struct __wt_config_arg;     typedef struct __wt_config_arg WT_CONFIG_ARG;
 
 /*!
  * The interface implemented by applications to provide custom ordering of
@@ -2588,42 +2740,46 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_CONN_LOG_SYNC				1063
 /*! log: log write operations */
 #define	WT_STAT_CONN_LOG_WRITES				1064
+/*! sleep for LSM checkpoint throttle */
+#define	WT_STAT_CONN_LSM_CHECKPOINT_THROTTLE		1065
+/*! sleep for LSM merge throttle */
+#define	WT_STAT_CONN_LSM_MERGE_THROTTLE			1066
 /*! rows merged in an LSM tree */
-#define	WT_STAT_CONN_LSM_ROWS_MERGED			1065
+#define	WT_STAT_CONN_LSM_ROWS_MERGED			1067
 /*! memory allocations */
-#define	WT_STAT_CONN_MEMORY_ALLOCATION			1066
+#define	WT_STAT_CONN_MEMORY_ALLOCATION			1068
 /*! memory frees */
-#define	WT_STAT_CONN_MEMORY_FREE			1067
+#define	WT_STAT_CONN_MEMORY_FREE			1069
 /*! memory re-allocations */
-#define	WT_STAT_CONN_MEMORY_GROW			1068
+#define	WT_STAT_CONN_MEMORY_GROW			1070
 /*! total read I/Os */
-#define	WT_STAT_CONN_READ_IO				1069
+#define	WT_STAT_CONN_READ_IO				1071
 /*! page reconciliation calls */
-#define	WT_STAT_CONN_REC_PAGES				1070
+#define	WT_STAT_CONN_REC_PAGES				1072
 /*! page reconciliation calls for eviction */
-#define	WT_STAT_CONN_REC_PAGES_EVICTION			1071
+#define	WT_STAT_CONN_REC_PAGES_EVICTION			1073
 /*! reconciliation failed because an update could not be included */
-#define	WT_STAT_CONN_REC_SKIPPED_UPDATE			1072
+#define	WT_STAT_CONN_REC_SKIPPED_UPDATE			1074
 /*! pthread mutex shared lock read-lock calls */
-#define	WT_STAT_CONN_RWLOCK_READ			1073
+#define	WT_STAT_CONN_RWLOCK_READ			1075
 /*! pthread mutex shared lock write-lock calls */
-#define	WT_STAT_CONN_RWLOCK_WRITE			1074
+#define	WT_STAT_CONN_RWLOCK_WRITE			1076
 /*! open cursor count */
-#define	WT_STAT_CONN_SESSION_CURSOR_OPEN		1075
+#define	WT_STAT_CONN_SESSION_CURSOR_OPEN		1077
 /*! transactions */
-#define	WT_STAT_CONN_TXN_BEGIN				1076
+#define	WT_STAT_CONN_TXN_BEGIN				1078
 /*! transaction checkpoints */
-#define	WT_STAT_CONN_TXN_CHECKPOINT			1077
+#define	WT_STAT_CONN_TXN_CHECKPOINT			1079
 /*! transaction checkpoint currently running */
-#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1078
+#define	WT_STAT_CONN_TXN_CHECKPOINT_RUNNING		1080
 /*! transactions committed */
-#define	WT_STAT_CONN_TXN_COMMIT				1079
+#define	WT_STAT_CONN_TXN_COMMIT				1081
 /*! transaction failures due to cache overflow */
-#define	WT_STAT_CONN_TXN_FAIL_CACHE			1080
+#define	WT_STAT_CONN_TXN_FAIL_CACHE			1082
 /*! transactions rolled-back */
-#define	WT_STAT_CONN_TXN_ROLLBACK			1081
+#define	WT_STAT_CONN_TXN_ROLLBACK			1083
 /*! total write I/Os */
-#define	WT_STAT_CONN_WRITE_IO				1082
+#define	WT_STAT_CONN_WRITE_IO				1084
 
 /*!
  * @}
@@ -2767,43 +2923,47 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_DSRC_CURSOR_UPDATE			2066
 /*! cursor-update value bytes updated */
 #define	WT_STAT_DSRC_CURSOR_UPDATE_BYTES		2067
+/*! sleep for LSM checkpoint throttle */
+#define	WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE		2068
 /*! chunks in the LSM tree */
-#define	WT_STAT_DSRC_LSM_CHUNK_COUNT			2068
+#define	WT_STAT_DSRC_LSM_CHUNK_COUNT			2069
 /*! highest merge generation in the LSM tree */
-#define	WT_STAT_DSRC_LSM_GENERATION_MAX			2069
+#define	WT_STAT_DSRC_LSM_GENERATION_MAX			2070
 /*! queries that could have benefited from a Bloom filter that did not
  * exist */
-#define	WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM		2070
+#define	WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM		2071
+/*! sleep for LSM merge throttle */
+#define	WT_STAT_DSRC_LSM_MERGE_THROTTLE			2072
 /*! reconciliation dictionary matches */
-#define	WT_STAT_DSRC_REC_DICTIONARY			2071
+#define	WT_STAT_DSRC_REC_DICTIONARY			2073
 /*! reconciliation internal-page overflow keys */
-#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL		2072
+#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL		2074
 /*! reconciliation leaf-page overflow keys */
-#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF		2073
+#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF		2075
 /*! reconciliation overflow values written */
-#define	WT_STAT_DSRC_REC_OVERFLOW_VALUE			2074
+#define	WT_STAT_DSRC_REC_OVERFLOW_VALUE			2076
 /*! reconciliation pages deleted */
-#define	WT_STAT_DSRC_REC_PAGE_DELETE			2075
+#define	WT_STAT_DSRC_REC_PAGE_DELETE			2077
 /*! reconciliation pages merged */
-#define	WT_STAT_DSRC_REC_PAGE_MERGE			2076
+#define	WT_STAT_DSRC_REC_PAGE_MERGE			2078
 /*! page reconciliation calls */
-#define	WT_STAT_DSRC_REC_PAGES				2077
+#define	WT_STAT_DSRC_REC_PAGES				2079
 /*! page reconciliation calls for eviction */
-#define	WT_STAT_DSRC_REC_PAGES_EVICTION			2078
+#define	WT_STAT_DSRC_REC_PAGES_EVICTION			2080
 /*! reconciliation failed because an update could not be included */
-#define	WT_STAT_DSRC_REC_SKIPPED_UPDATE			2079
+#define	WT_STAT_DSRC_REC_SKIPPED_UPDATE			2081
 /*! reconciliation internal pages split */
-#define	WT_STAT_DSRC_REC_SPLIT_INTERNAL			2080
+#define	WT_STAT_DSRC_REC_SPLIT_INTERNAL			2082
 /*! reconciliation leaf pages split */
-#define	WT_STAT_DSRC_REC_SPLIT_LEAF			2081
+#define	WT_STAT_DSRC_REC_SPLIT_LEAF			2083
 /*! reconciliation maximum splits for a page */
-#define	WT_STAT_DSRC_REC_SPLIT_MAX			2082
+#define	WT_STAT_DSRC_REC_SPLIT_MAX			2084
 /*! object compaction */
-#define	WT_STAT_DSRC_SESSION_COMPACT			2083
+#define	WT_STAT_DSRC_SESSION_COMPACT			2085
 /*! open cursor count */
-#define	WT_STAT_DSRC_SESSION_CURSOR_OPEN		2084
+#define	WT_STAT_DSRC_SESSION_CURSOR_OPEN		2086
 /*! update conflicts */
-#define	WT_STAT_DSRC_TXN_UPDATE_CONFLICT		2085
+#define	WT_STAT_DSRC_TXN_UPDATE_CONFLICT		2087
 /*! @} */
 /*
  * Statistics section: END
diff --git a/src/include/wiredtiger_ext.h b/src/include/wiredtiger_ext.h
index 88ddecdade1..acf7efad3d9 100644
--- a/src/include/wiredtiger_ext.h
+++ b/src/include/wiredtiger_ext.h
@@ -184,6 +184,12 @@ struct __wt_extension_api {
 	    WT_ITEM *first, WT_ITEM *second, int *cmp);
 
 	/*!
+	 * @copydoc wiredtiger_config_parser_open
+	 */
+	int (*config_parser_open)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
+	    const char *config, size_t len, WT_CONFIG_PARSER **config_parserp);
+
+	/*!
 	 * Return the value of a configuration string.
 	 *
 	 * @param wt_api the extension handle
@@ -199,69 +205,6 @@ struct __wt_extension_api {
 	    WT_CONFIG_ARG *config, const char *key, WT_CONFIG_ITEM *value);
 
 	/*!
-	 * Return the value of a configuration string.
-	 *
-	 * @param wt_api the extension handle
-	 * @param session the session handle (or NULL if none available)
-	 * @param config a configuration string
-	 * @param key configuration key string
-	 * @param value the returned value
-	 * @errors
-	 *
-	 * @snippet ex_data_source.c WT_EXTENSION config_strget
-	 */
-	int (*config_strget)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
-	    const char *config, const char *key, WT_CONFIG_ITEM *value);
-
-	/*!
-	 * Return the list entries of a configuration string value.
-	 * This method steps through the entries found in the last returned
-	 * value from WT_EXTENSION_API::config_get.  The last returned value
-	 * should be of type "list".
-	 *
-	 * @param wt_api the extension handle
-	 * @param session the session handle (or NULL if none available)
-	 * @param str the configuration string to scan
-	 * @param len the number of valid bytes in \c str
-	 * @param[out] scanp a handle used to scan the config string
-	 * @errors
-	 *
-	 * @snippet ex_data_source.c WT_EXTENSION config scan
-	 */
-	int (*config_scan_begin)(WT_EXTENSION_API *wt_api, WT_SESSION *session,
-	    const char *str, size_t len, WT_CONFIG_SCAN **scanp);
-
-	/*!
-	 * Release any resources allocated by
-	 * WT_EXTENSION_API::config_scan_begin.
-	 *
-	 * @param wt_api the extension handle
-	 * @param scan the configuration scanner, invalid after this call
-	 * @errors
-	 *
-	 * @snippet ex_data_source.c WT_EXTENSION config scan
-	 */
-	int (*config_scan_end)(WT_EXTENSION_API *wt_api, WT_CONFIG_SCAN *scan);
-
-	/*!
-	 * Return the next key/value pair from a config string scan.
-	 *
-	 * If the string contains a list of items with no assigned value, the
-	 * items will be returned in \c key and the \c value will be set to the
-	 * boolean \c "true" value.
-	 *
-	 * @param wt_api the extension handle
-	 * @param scan the configuration scanner
-	 * @param key the returned key
-	 * @param value the returned value
-	 * @errors
-	 *
-	 * @snippet ex_data_source.c WT_EXTENSION config scan
-	 */
-	int (*config_scan_next)(WT_EXTENSION_API *wt_api,
-	    WT_CONFIG_SCAN *scan, WT_CONFIG_ITEM *key, WT_CONFIG_ITEM *value);
-
-	/*!
 	 * Insert a row into the metadata if it does not already exist.
 	 *
 	 * @param wt_api the extension handle
@@ -428,75 +371,19 @@ struct __wt_extension_api {
 	 */
 	int (*transaction_visible)(WT_EXTENSION_API *wt_api,
 	    WT_SESSION *session, uint64_t transaction_id);
-};
-
-/*!
- * @typedef WT_CONFIG_ARG
- *
- * A configuration object passed to some extension interfaces.  This is an
- * opaque type: configuration values can be queried using
- * WT_EXTENSION_API::config_get.
- */
 
-/*!
- * The configuration information returned by the WiredTiger extension function
- * WT_EXTENSION_API::config_get.
- */
-struct __wt_config_item {
 	/*!
-	 * The value of a configuration string.
-	 *
-	 * Regardless of the type of the configuration string (boolean, int,
-	 * list or string), the \c str field will reference the value of the
-	 * configuration string.
-	 *
-	 * The bytes referenced by \c str are <b>not</b> be nul-terminated,
-	 * use the \c len field instead of a terminating nul byte.
+	 * @copydoc wiredtiger_version
 	 */
-	const char *str;
-
-	/*! The number of bytes in the value referenced by \c str. */
-	size_t len;
-
-	/*!
-	 * The value of a configuration boolean or integer.
-	 *
-	 * If the configuration string's value is "true" or "false", the
-	 * \c val field will be set to 1 (true), or 0 (false).
-	 *
-	 * If the configuration string can be legally interpreted as an integer,
-	 * using the strtoll function rules as specified in ISO/IEC 9899:1990
-	 * ("ISO C90"), that integer will be stored in the \c val field.
-	 */
-	int64_t val;
-
-	/*! Permitted values of the \c type field. */
-	enum {
-		/*! A string value with quotes stripped. */
-		WT_CONFIG_ITEM_STRING,
-		/*! A boolean literal ("true" or "false"). */
-		WT_CONFIG_ITEM_BOOL,
-		/*! An unquoted identifier: a string value without quotes. */
-		WT_CONFIG_ITEM_ID,
-		/*! A numeric value. */
-		WT_CONFIG_ITEM_NUM,
-		/*! A nested structure or list, including brackets. */
-		WT_CONFIG_ITEM_STRUCT
-	}
-	/*!
-	 * The type of value determined by the parser.  In all cases,
-	 * the \c str and \c len fields are set.
-	 */
-	type;
+	const char *(*version)(int *majorp, int *minorp, int *patchp);
 };
 
 /*!
- * @typedef WT_CONFIG_SCAN
+ * @typedef WT_CONFIG_ARG
  *
- * A handle for a scan through a configuration string.
- * This is an opaque type returned by WT_EXTENSION_API::config_scan_begin.
- * Configuration values can be queried using WT_EXTENSION_API::config_scan_next.
- * Call WT_EXTENSION_API::config_scan_end when finished to release resources.
+ * A configuration object passed to some extension interfaces.  This is an
+ * opaque type: configuration values can be queried using
+ * WT_EXTENSION_API::config_get
  */
 
 /*! @} */
diff --git a/src/include/wt_internal.h b/src/include/wt_internal.h
index 3ae89fc211e..19a805aa25f 100644
--- a/src/include/wt_internal.h
+++ b/src/include/wt_internal.h
@@ -93,6 +93,8 @@ struct __wt_config_check;
     typedef struct __wt_config_check WT_CONFIG_CHECK;
 struct __wt_config_entry;
     typedef struct __wt_config_entry WT_CONFIG_ENTRY;
+struct __wt_config_parser_impl;
+    typedef struct __wt_config_parser_impl WT_CONFIG_PARSER_IMPL;
 struct __wt_connection_impl;
     typedef struct __wt_connection_impl WT_CONNECTION_IMPL;
 struct __wt_connection_stats;
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 618257469ee..c1824df24ac 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -17,15 +17,15 @@
 /*
  * LSM API enter: check that the cursor is in sync with the tree.
  */
-#define	WT_LSM_ENTER(clsm, cursor, session, n)				\
+#define	WT_LSM_ENTER(clsm, cursor, session, n, reset)			\
 	clsm = (WT_CURSOR_LSM *)cursor;					\
 	CURSOR_API_CALL(cursor, session, n, NULL);			\
-	WT_ERR(__clsm_enter(clsm, 0))
+	WT_ERR(__clsm_enter(clsm, reset, 0))
 
 #define	WT_LSM_UPDATE_ENTER(clsm, cursor, session, n)			\
 	clsm = (WT_CURSOR_LSM *)cursor;					\
 	CURSOR_UPDATE_API_CALL(cursor, session, n, NULL);		\
-	WT_ERR(__clsm_enter(clsm, 1))
+	WT_ERR(__clsm_enter(clsm, 0, 1))
 
 #define	WT_LSM_LEAVE(session)						\
 	API_END(session);						\
@@ -35,16 +35,18 @@
 	CURSOR_UPDATE_API_END(session, ret);				\
 	WT_TRET(__clsm_leave(clsm))
 
-static int __clsm_open_cursors(WT_CURSOR_LSM *, int, u_int, uint32_t);
 static int __clsm_lookup(WT_CURSOR_LSM *);
+static int __clsm_open_cursors(WT_CURSOR_LSM *, int, u_int, uint32_t);
+static int __clsm_reset_cursors(WT_CURSOR_LSM *, WT_CURSOR *);
 
 /*
  * __clsm_enter --
  *	Start an operation on an LSM cursor, update if the tree has changed.
  */
 static inline int
-__clsm_enter(WT_CURSOR_LSM *clsm, int update)
+__clsm_enter(WT_CURSOR_LSM *clsm, int reset, int update)
 {
+	WT_CURSOR *c;
 	WT_DECL_RET;
 	WT_LSM_CHUNK *chunk;
 	WT_SESSION_IMPL *session;
@@ -57,6 +59,20 @@ __clsm_enter(WT_CURSOR_LSM *clsm, int update)
 	if (F_ISSET(clsm, WT_CLSM_MERGE))
 		return (0);
 
+	if (reset) {
+		c = &clsm->iface;
+		/* Copy out data before resetting chunk cursors. */
+		if (F_ISSET(c, WT_CURSTD_KEY_INT) &&
+		    !WT_DATA_IN_ITEM(&c->key))
+			WT_RET(__wt_buf_set(
+			    session, &c->key, c->key.data, c->key.size));
+		if (F_ISSET(c, WT_CURSTD_VALUE_INT) &&
+		    !WT_DATA_IN_ITEM(&c->value))
+			WT_RET(__wt_buf_set(
+			    session, &c->value, c->value.data, c->value.size));
+		WT_RET(__clsm_reset_cursors(clsm, NULL));
+	}
+
 	for (;;) {
 		/*
 		 * If the cursor looks up-to-date, check if the cache is full.
@@ -369,7 +385,7 @@ retry:	if (F_ISSET(clsm, WT_CLSM_MERGE)) {
 		 * generation number and retry if it has changed under us.
 		 */
 		if (clsm->cursors != NULL && (ngood < clsm->nchunks ||
-		    (F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0))) {
+		    (!F_ISSET(clsm, WT_CLSM_OPEN_READ) && nupdates > 0))) {
 			saved_gen = lsm_tree->dsk_gen;
 			locked = 0;
 			WT_ERR(__wt_lsm_tree_unlock(session, lsm_tree));
@@ -607,7 +623,7 @@ __clsm_next(WT_CURSOR *cursor)
 	u_int i;
 	int check, cmp, deleted;
 
-	WT_LSM_ENTER(clsm, cursor, session, next);
+	WT_LSM_ENTER(clsm, cursor, session, next, 0);
 
 	/* If we aren't positioned for a forward scan, get started. */
 	if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_NEXT)) {
@@ -689,7 +705,7 @@ __clsm_prev(WT_CURSOR *cursor)
 	u_int i;
 	int check, cmp, deleted;
 
-	WT_LSM_ENTER(clsm, cursor, session, prev);
+	WT_LSM_ENTER(clsm, cursor, session, prev, 0);
 
 	/* If we aren't positioned for a reverse scan, get started. */
 	if (clsm->current == NULL || !F_ISSET(clsm, WT_CLSM_ITERATE_PREV)) {
@@ -875,13 +891,14 @@ __clsm_lookup(WT_CURSOR_LSM *clsm)
 	}
 	WT_ERR(WT_NOTFOUND);
 
-done:	WT_TRET(__clsm_reset_cursors(clsm, c));
+done:
 err:	if (ret == 0) {
 		clsm->current = c;
 		F_CLR(cursor, WT_CURSTD_KEY_EXT | WT_CURSTD_VALUE_EXT);
 		F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
 	} else {
-		WT_TRET(__clsm_reset_cursors(clsm, NULL));
+		if (c != NULL)
+			WT_TRET(c->reset(c));
 		F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
 	}
 
@@ -899,7 +916,7 @@ __clsm_search(WT_CURSOR *cursor)
 	WT_DECL_RET;
 	WT_SESSION_IMPL *session;
 
-	WT_LSM_ENTER(clsm, cursor, session, search);
+	WT_LSM_ENTER(clsm, cursor, session, search, 1);
 	WT_CURSOR_NEEDKEY(cursor);
 
 	ret = __clsm_lookup(clsm);
@@ -925,13 +942,10 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
 
 	larger = smaller = NULL;
 
-	WT_LSM_ENTER(clsm, cursor, session, search_near);
+	WT_LSM_ENTER(clsm, cursor, session, search_near, 1);
 	WT_CURSOR_NEEDKEY(cursor);
 	F_CLR(clsm, WT_CLSM_ITERATE_NEXT | WT_CLSM_ITERATE_PREV);
 
-	/* Reset any positioned cursor(s) to release pinned resources. */
-	WT_ERR(__clsm_reset_cursors(clsm, NULL));
-
 	/*
 	 * search_near is somewhat fiddly: we can't just return a nearby key
 	 * from the in-memory chunk because there could be a closer key on
@@ -1111,13 +1125,29 @@ __clsm_put(WT_SESSION_IMPL *session,
 	}
 
 	/*
-	 * The count is in a shared structure, but it's only approximate, so
-	 * don't worry about protecting access.
+	 * Update the record count.  It is in a shared structure, but it's only
+	 * approximate, so don't worry about protecting access.
+	*
+	 * Throttle if necessary.  Every 100 update operations on each cursor,
+	 * check if throttling is required.  Don't rely only on the shared
+	 * counter because it can race, and because for some workloads, there
+	 * may not be enough records per chunk to get effective throttling.
 	 */
-	if (++clsm->primary_chunk->count % 100 == 0 &&
-	    lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0)
+	if ((++clsm->primary_chunk->count % 100 == 0 ||
+	    ++clsm->update_count >= 100) &&
+	    lsm_tree->merge_throttle + lsm_tree->ckpt_throttle > 0) {
+		clsm->update_count = 0;
+		WT_STAT_FAST_INCRV(session, &clsm->lsm_tree->stats,
+		    lsm_checkpoint_throttle, (uint64_t)lsm_tree->ckpt_throttle);
+		WT_STAT_FAST_CONN_INCRV(session,
+		    lsm_checkpoint_throttle, (uint64_t)lsm_tree->ckpt_throttle);
+		WT_STAT_FAST_INCRV(session, &clsm->lsm_tree->stats,
+		    lsm_merge_throttle, (uint64_t)lsm_tree->merge_throttle);
+		WT_STAT_FAST_CONN_INCRV(session,
+		    lsm_merge_throttle, (uint64_t)lsm_tree->merge_throttle);
 		__wt_sleep(0,
 		    lsm_tree->ckpt_throttle + lsm_tree->merge_throttle);
+	}
 
 	/*
 	 * In LSM there are multiple btrees active at one time. The tree
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 792adf0773f..1f70072b41f 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -82,7 +82,7 @@ __wt_lsm_merge(
 		aggressive = 10;
 	merge_min = (aggressive > 5) ? 2 : lsm_tree->merge_min;
 	max_gap = (aggressive + 4) / 5;
-	max_level = (id == 0 ? 0 : id - 1) + aggressive;
+	max_level = (lsm_tree->merge_throttle > 0) ? 0 : id + aggressive;
 
 	/*
 	 * If there aren't any chunks to merge, or some of the chunks aren't
@@ -101,24 +101,19 @@ __wt_lsm_merge(
 	WT_RET(__wt_lsm_tree_lock(session, lsm_tree, 1));
 
 	/*
-	 * Only include chunks that already have a Bloom filter and not
-	 * involved in a merge.
+	 * Only include chunks that already have a Bloom filter or are the
+	 * result of a merge and not involved in a merge.
 	 */
-	end_chunk = lsm_tree->nchunks - 1;
-	while (end_chunk > 0 &&
-	    ((chunk = lsm_tree->chunk[end_chunk]) == NULL ||
-	    !F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) ||
-	    F_ISSET(chunk, WT_LSM_CHUNK_MERGING))) {
-		--end_chunk;
-
-		/*
-		 * If we find a chunk on disk without a Bloom filter, give up.
-		 * We may have waited a while to lock the tree, and new chunks
-		 * may have been created in the meantime.
-		 */
-		if (chunk != NULL &&
+	for (end_chunk = lsm_tree->nchunks - 1; end_chunk > 0; --end_chunk) {
+		chunk = lsm_tree->chunk[end_chunk];
+		WT_ASSERT(session, chunk != NULL);
+		if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING))
+			continue;
+		if (F_ISSET(chunk, WT_LSM_CHUNK_BLOOM) || chunk->generation > 0)
+			break;
+		else if (FLD_ISSET(lsm_tree->bloom, WT_LSM_BLOOM_OFF) &&
 		    F_ISSET(chunk, WT_LSM_CHUNK_ONDISK))
-			end_chunk = 0;
+			break;
 	}
 
 	/*
@@ -150,8 +145,11 @@ __wt_lsm_merge(
 		youngest = lsm_tree->chunk[end_chunk];
 		nchunks = (end_chunk + 1) - start_chunk;
 
-		/* If the chunk is already involved in a merge, stop. */
-		if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING))
+		/*
+		 * If the chunk is already involved in a merge or a Bloom
+		 * filter is being built for it, stop.
+		 */
+		if (F_ISSET(chunk, WT_LSM_CHUNK_MERGING) || chunk->bloom_busy)
 			break;
 
 		/*
@@ -163,10 +161,14 @@ __wt_lsm_merge(
 
 		/*
 		 * If the size of the chunks selected so far exceeds the
-		 * configured maximum chunk size, stop.
+		 * configured maximum chunk size, stop.  Keep going if we can
+		 * slide the window further into the tree: we don't want to
+		 * leave small chunks in the middle.
 		 */
 		if ((chunk_size += chunk->size) > lsm_tree->chunk_max)
-			break;
+			if (nchunks < merge_min ||
+			    chunk_size - youngest->size > lsm_tree->chunk_max)
+				break;
 
 		/*
 		 * If we have enough chunks for a merge and the next chunk is
@@ -184,7 +186,12 @@ __wt_lsm_merge(
 		record_count += chunk->count;
 		--start_chunk;
 
-		if (nchunks == lsm_tree->merge_max) {
+		/*
+		 * If we have a full window, or the merge would be too big,
+		 * remove the youngest chunk.
+		 */
+		if (nchunks == lsm_tree->merge_max ||
+		    chunk_size > lsm_tree->chunk_max) {
 			WT_ASSERT(session,
 			    F_ISSET(youngest, WT_LSM_CHUNK_MERGING));
 			F_CLR(youngest, WT_LSM_CHUNK_MERGING);
diff --git a/src/lsm/lsm_stat.c b/src/lsm/lsm_stat.c
index 30148de9a9f..a626032fe8a 100644
--- a/src/lsm/lsm_stat.c
+++ b/src/lsm/lsm_stat.c
@@ -39,7 +39,7 @@ __lsm_stat_init(WT_SESSION_IMPL *session, const char *uri, WT_CURSOR_STAT *cst)
 		    "statistics=(%s%s%s)",
 		    cst->stat_clear ? "clear," : "",
 		    cst->stat_all ? "all," : "",
-		    cst->stat_fast ? "fast," : "");
+		    !cst->stat_all && cst->stat_fast ? "fast," : "");
 		cfg[1] = disk_cfg[1] = config;
 	}
 
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index a830295908f..eb81acecaf4 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -692,9 +692,9 @@ __wt_lsm_tree_throttle(
 	else if (!decrease_only)
 		WT_LSM_MERGE_THROTTLE_INCREASE(lsm_tree->merge_throttle);
 
-	/* Put an upper bound of 100ms on both throttle calculations. */
-	lsm_tree->ckpt_throttle = WT_MIN(100000, lsm_tree->ckpt_throttle);
-	lsm_tree->merge_throttle = WT_MIN(100000, lsm_tree->merge_throttle);
+	/* Put an upper bound of 1s on both throttle calculations. */
+	lsm_tree->ckpt_throttle = WT_MIN(1000000, lsm_tree->ckpt_throttle);
+	lsm_tree->merge_throttle = WT_MIN(1000000, lsm_tree->merge_throttle);
 
 	/*
 	 * Update our estimate of how long each in-memory chunk stays active.
@@ -1035,7 +1035,13 @@ __wt_lsm_compact(WT_SESSION_IMPL *session, const char *name, int *skip)
 
 	WT_RET(__wt_seconds(session, &begin));
 
+	/*
+	 * Set the compacting flag and clear the current merge throttle
+	 * setting, so that all merge threads look for merges at all levels of
+	 * the tree.
+	 */
 	F_SET(lsm_tree, WT_LSM_TREE_COMPACTING);
+	lsm_tree->merge_throttle = 0;
 
 	/* Wake up the merge threads. */
 	WT_RET(__wt_cond_signal(session, lsm_tree->work_cond));
diff --git a/src/lsm/lsm_worker.c b/src/lsm/lsm_worker.c
index 8b939529b33..29f6ea31271 100644
--- a/src/lsm/lsm_worker.c
+++ b/src/lsm/lsm_worker.c
@@ -104,7 +104,7 @@ __wt_lsm_merge_worker(void *vargs)
 	session = lsm_tree->worker_sessions[id];
 	__wt_free(session, args);
 
-	aggressive = stallms = 0;
+	aggressive = chunk_wait = stallms = 0;
 
 	while (F_ISSET(lsm_tree, WT_LSM_TREE_WORKING)) {
 		/*
@@ -127,7 +127,7 @@ __wt_lsm_merge_worker(void *vargs)
 			progress = 1;
 
 		/* If we didn't create a Bloom filter, try to merge. */
-		if (progress == 0 &&
+		if ((id != 0 || progress == 0) &&
 		    __wt_lsm_merge(session, lsm_tree, id, aggressive) == 0)
 			progress = 1;
 
@@ -185,7 +185,8 @@ err:		__wt_err(session, ret, "LSM merge worker failed");
 
 /*
  * __lsm_bloom_work --
- *	Try to create a Bloom filter for the newest on-disk chunk.
+ *	Try to create a Bloom filter for the newest on-disk chunk that doesn't
+ *	have one.
  */
 static int
 __lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
@@ -216,10 +217,14 @@ __lsm_bloom_work(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
 		    chunk->count == 0)
 			continue;
 
-		/* See if we win the race to switch on the "busy" flag. */
+		/*
+		 * See if we win the race to switch on the "busy" flag and
+		 * recheck that the chunk still needs a Bloom filter.
+		 */
 		if (WT_ATOMIC_CAS(chunk->bloom_busy, 0, 1)) {
-			ret = __lsm_bloom_create(
-			    session, lsm_tree, chunk, (u_int)i);
+			if (!F_ISSET(chunk, WT_LSM_CHUNK_BLOOM))
+				ret = __lsm_bloom_create(
+				    session, lsm_tree, chunk, (u_int)i);
 			chunk->bloom_busy = 0;
 			break;
 		}
diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c
index 6ef4caadd0a..c6938dad9fd 100644
--- a/src/os_posix/os_open.c
+++ b/src/os_posix/os_open.c
@@ -107,7 +107,8 @@ __wt_open(WT_SESSION_IMPL *session,
 #endif
 #ifdef O_NOATIME
 	/* Avoid updating metadata for read-only workloads. */
-	if (dio_type == WT_FILE_TYPE_DATA)
+	if (dio_type == WT_FILE_TYPE_DATA ||
+	    dio_type == WT_FILE_TYPE_CHECKPOINT)
 		f |= O_NOATIME;
 #endif
 
@@ -157,7 +158,8 @@ __wt_open(WT_SESSION_IMPL *session,
 
 #if defined(HAVE_POSIX_FADVISE)
 	/* Disable read-ahead on trees: it slows down random read workloads. */
-	if (dio_type == WT_FILE_TYPE_DATA)
+	if (dio_type == WT_FILE_TYPE_DATA ||
+	    dio_type == WT_FILE_TYPE_CHECKPOINT)
 		WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
 #endif
 
@@ -174,7 +176,8 @@ __wt_open(WT_SESSION_IMPL *session,
 	WT_ERR(__wt_filesize(session, fh, &fh->size));
 
 	/* Configure file extension. */
-	if (dio_type == WT_FILE_TYPE_DATA)
+	if (dio_type == WT_FILE_TYPE_DATA ||
+	    dio_type == WT_FILE_TYPE_CHECKPOINT)
 		fh->extend_len = conn->data_extend_len;
 
 	/*
diff --git a/src/schema/schema_create.c b/src/schema/schema_create.c
index 8d620198c09..8bd78ce0ac4 100644
--- a/src/schema/schema_create.c
+++ b/src/schema/schema_create.c
@@ -33,7 +33,8 @@ __wt_direct_io_size_check(WT_SESSION_IMPL *session,
 	 * if you configure direct I/O and then don't do I/O in alignments and
 	 * units of its happy place.
 	 */
-	if (FLD_ISSET(conn->direct_io, WT_FILE_TYPE_DATA)) {
+	if (FLD_ISSET(conn->direct_io,
+	   WT_FILE_TYPE_CHECKPOINT | WT_FILE_TYPE_DATA)) {
 		align = (int64_t)conn->buffer_alignment;
 		if (align != 0 && (cval.val < align || cval.val % align != 0))
 			WT_RET_MSG(session, EINVAL,
diff --git a/src/support/scratch.c b/src/support/scratch.c
index 4b342977372..1aae8649901 100644
--- a/src/support/scratch.c
+++ b/src/support/scratch.c
@@ -127,7 +127,8 @@ __wt_buf_set(
 	/* Ensure the buffer is large enough. */
 	WT_RET(__wt_buf_initsize(session, buf, size));
 
-	memcpy(buf->mem, data, size);
+	/* Copy, allowing for overlapping strings. */
+	memmove(buf->mem, data, size);
 
 	return (0);
 }
diff --git a/src/support/stat.c b/src/support/stat.c
index 621c79220a4..c0caecbe606 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -93,11 +93,14 @@ __wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats)
 	stats->cursor_search_near.desc = "cursor search near calls";
 	stats->cursor_update.desc = "cursor update calls";
 	stats->cursor_update_bytes.desc = "cursor-update value bytes updated";
+	stats->lsm_checkpoint_throttle.desc =
+	    "sleep for LSM checkpoint throttle";
 	stats->lsm_chunk_count.desc = "chunks in the LSM tree";
 	stats->lsm_generation_max.desc =
 	    "highest merge generation in the LSM tree";
 	stats->lsm_lookup_no_bloom.desc =
 	    "queries that could have benefited from a Bloom filter that did not exist";
+	stats->lsm_merge_throttle.desc = "sleep for LSM merge throttle";
 	stats->rec_dictionary.desc = "reconciliation dictionary matches";
 	stats->rec_overflow_key_internal.desc =
 	    "reconciliation internal-page overflow keys";
@@ -194,9 +197,11 @@ __wt_stat_refresh_dsrc_stats(void *stats_arg)
 	stats->cursor_search_near.v = 0;
 	stats->cursor_update.v = 0;
 	stats->cursor_update_bytes.v = 0;
+	stats->lsm_checkpoint_throttle.v = 0;
 	stats->lsm_chunk_count.v = 0;
 	stats->lsm_generation_max.v = 0;
 	stats->lsm_lookup_no_bloom.v = 0;
+	stats->lsm_merge_throttle.v = 0;
 	stats->rec_dictionary.v = 0;
 	stats->rec_overflow_key_internal.v = 0;
 	stats->rec_overflow_key_leaf.v = 0;
@@ -280,9 +285,11 @@ __wt_stat_aggregate_dsrc_stats(const void *child, const void *parent)
 	p->cursor_search_near.v += c->cursor_search_near.v;
 	p->cursor_update.v += c->cursor_update.v;
 	p->cursor_update_bytes.v += c->cursor_update_bytes.v;
+	p->lsm_checkpoint_throttle.v += c->lsm_checkpoint_throttle.v;
 	if (c->lsm_generation_max.v > p->lsm_generation_max.v)
 	    p->lsm_generation_max.v = c->lsm_generation_max.v;
 	p->lsm_lookup_no_bloom.v += c->lsm_lookup_no_bloom.v;
+	p->lsm_merge_throttle.v += c->lsm_merge_throttle.v;
 	p->rec_dictionary.v += c->rec_dictionary.v;
 	p->rec_overflow_key_internal.v += c->rec_overflow_key_internal.v;
 	p->rec_overflow_key_leaf.v += c->rec_overflow_key_leaf.v;
@@ -389,6 +396,9 @@ __wt_stat_init_connection_stats(WT_CONNECTION_STATS *stats)
 	    "log: consolidated slot join transitions";
 	stats->log_sync.desc = "log: log sync operations";
 	stats->log_writes.desc = "log: log write operations";
+	stats->lsm_checkpoint_throttle.desc =
+	    "sleep for LSM checkpoint throttle";
+	stats->lsm_merge_throttle.desc = "sleep for LSM merge throttle";
 	stats->lsm_rows_merged.desc = "rows merged in an LSM tree";
 	stats->memory_allocation.desc = "memory allocations";
 	stats->memory_free.desc = "memory frees";
@@ -479,6 +489,8 @@ __wt_stat_refresh_connection_stats(void *stats_arg)
 	stats->log_slot_transitions.v = 0;
 	stats->log_sync.v = 0;
 	stats->log_writes.v = 0;
+	stats->lsm_checkpoint_throttle.v = 0;
+	stats->lsm_merge_throttle.v = 0;
 	stats->lsm_rows_merged.v = 0;
 	stats->memory_allocation.v = 0;
 	stats->memory_free.v = 0;
diff --git a/tools/wt_nvd3_util.py b/tools/wt_nvd3_util.py
new file mode 100644
index 00000000000..6bf1396b0ff
--- /dev/null
+++ b/tools/wt_nvd3_util.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+#
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+from datetime import datetime
+from nvd3 import lineChart
+
+# Add a multiChart type so we can overlay line graphs
+class multiChart(lineChart):
+    def __init__(self, **kwargs):
+        lineChart.__init__(self, **kwargs)
+
+        # Fix the axes
+        del self.axislist['yAxis']
+        self.create_y_axis('yAxis1', format=kwargs.get('y_axis_format', '.02f'))
+        self.create_y_axis('yAxis2', format=kwargs.get('y_axis_format', '.02f'))
+
+TIMEFMT = "%b %d %H:%M:%S"
+
+thisyear = datetime.today().year
+def parsetime(s):
+    return datetime.strptime(s, TIMEFMT).replace(year=thisyear)
+
diff --git a/tools/wtperf_graph.py b/tools/wtperf_graph.py
index 8bd11433014..f45145cf801 100644
--- a/tools/wtperf_graph.py
+++ b/tools/wtperf_graph.py
@@ -33,22 +33,26 @@ from subprocess import call
 
 TIMEFMT = "%b %d %H:%M:%S"
 
-def process_monitor(fname, ckptlist, opdict):
+def process_monitor(fname, sfx, ckptlist, opdict):
     # Read the monitor file and figure out when a checkpoint was running.
     in_ckpt = 'N'
 
     ckptlist=[]
+
+    ofname = 'monitor%s.png' % (sfx)
     # Monitor output format currently is:
-    # time,read,insert,update,ckpt,...latencies...
+    # time,totalsec,read,insert,update,ckpt,...latencies...
     ops = ('read', 'insert', 'update')
-    csvcol = (1, 2, 3)
+    csvcol = (2, 3, 4)
     with open(fname, 'r') as csvfile:
         reader = csv.reader(csvfile)
         for row in reader:
+            if row[0].lstrip().startswith('#'):
+                continue
             # Look for checkpoints and operations.
-            if row[4] != in_ckpt:
+            if row[5] != in_ckpt:
                 ckptlist.append(row[0])
-                in_ckpt = row[4]
+                in_ckpt = row[5]
             for op, col in zip(ops, csvcol):
                 if row[col] != '0' and opdict[op] == 0:
                     opdict[op] = 1
@@ -77,14 +81,14 @@ set yrange [0:]\n''' % {
         })
     it = iter(ckptlist)
     for start, stop in zip(it, it):
-        of.write('set object rectangle from first \'' + start +\
-            '\', graph 0 ' + ' to first \'' + stop +\
-            '\', graph 1 fc rgb "gray" back\n') 
-    of.write('set output "' + fname + '.png"\n')
-    of.write('plot "' + fname + '" using 1:($2/1000) title "Reads", "' +\
-        fname + '" using 1:($3/1000) title "Inserts", "' +\
-        fname + '" using 1:($4/1000) title "Updates", "' +\
-        fname + '" using 1:(($2+$3+$4)/1000) title "Total"\n')
+        of.write("set object rectangle from first '%s',\
+                graph 0 to first '%s',\
+                graph 1 fc rgb \"gray\" back\n" % (start, stop))
+    of.write('set output "%s"\n' % (ofname))
+    of.write("""plot "{name}" using 1:($3/1000) title "Reads", \\
+        "{name}" using 1:($4/1000) title "Inserts",\\
+        "{name}" using 1:($5/1000) title "Updates"
+        """.format(name=fname))
     of.close()
     call(["gnuplot", gcmd])
     os.remove(gcmd)
@@ -118,10 +122,12 @@ set yrange [1:]\n''' % {
             '\', graph 1 fc rgb "gray" back\n')
     ofname = name + sfx + '.latency1.png'
     of.write('set output "' + ofname + '"\n')
-    of.write('plot "' + fname + '" using 1:($' + repr(col_avg) +\
+    of.write('plot "' +\
+        fname + '" using 1:($' + repr(col_avg) +\
         ') title "Average Latency", "' + fname +'" using 1:($' +\
         repr(col_min) + ') title "Minimum Latency", "' +\
-        fname + '" using 1:($' + repr(col_max) + ') title "Maximum Latency"\n')
+        fname + '" using 1:($' + repr(col_max) +\
+        ') title "Maximum Latency"\n')
     of.close()
     call(["gnuplot", gcmd])
     os.remove(gcmd)
@@ -129,6 +135,9 @@ set yrange [1:]\n''' % {
 
 # Graph latency vs. % operations
 def plot_latency_percent(name, dirname, sfx, ckptlist):
+    lfile = os.path.join(dirname, 'latency.' + name)
+    if not os.path.exists(lfile):
+        return
     gcmd = "gnuplot." + name + ".l2.cmd"
     of = open(gcmd, "w")
     of.write('''
@@ -147,7 +156,7 @@ set ylabel "%% operations"
 set yrange [0:]\n''')
     ofname = name + sfx + '.latency2.png'
     of.write('set output "' + ofname + '"\n')
-    of.write('plot "' + os.path.join(dirname, 'latency.' + name) + sfx +\
+    of.write('plot "' + lfile + sfx +\
         '" using (($2 * 100)/$4) title "' + name + '"\n')
     of.close()
     call(["gnuplot", gcmd])
@@ -156,6 +165,9 @@ set yrange [0:]\n''')
 
 # Graph latency vs. % operations (cumulative)
 def plot_latency_cumulative_percent(name, dirname, sfx, ckptlist):
+    lfile = os.path.join(dirname, 'latency.' + name)
+    if not os.path.exists(lfile):
+        return
     # Latency plot: cumulative operations vs. latency
     gcmd = "gnuplot." + name + ".l3.cmd"
     of = open(gcmd, "w")
@@ -176,7 +188,7 @@ set yrange [0:]\n''' % {
         })
     ofname = name + sfx + '.latency3.png'
     of.write('set output "' + ofname + '"\n')
-    of.write('plot "' + os.path.join(dirname, 'latency.' + name) + sfx +\
+    of.write('plot "' + lfile + sfx +\
         '" using 1:(($3 * 100)/$4) title "' + name + '"\n')
     of.close()
     call(["gnuplot", gcmd])
@@ -187,14 +199,14 @@ def process_file(fname):
     # NOTE: The operations below must be in this exact order to match
     # the operation latency output in the monitor file.
     opdict={'read':0, 'insert':0, 'update':0}
-    process_monitor(fname, ckptlist, opdict)
     
     # This assumes the monitor file has the string "monitor"
     # and any other (optional) characters in the filename are a suffix.
     sfx = os.path.basename(fname).replace('monitor','')
     dirname = os.path.dirname(fname)
 
-    column = 6              # average, minimum, maximum start in column 6
+    process_monitor(fname, sfx, ckptlist, opdict)
+    column = 7              # average, minimum, maximum start in column 7
     for k, v in opdict.items():
         if v != 0:
             plot_latency_operation(
diff --git a/tools/wtperf_stats.py b/tools/wtperf_stats.py
new file mode 100644
index 00000000000..d9743055fff
--- /dev/null
+++ b/tools/wtperf_stats.py
@@ -0,0 +1,173 @@
+#!/usr/bin/env python
+#
+# Public Domain 2008-2014 WiredTiger, Inc.
+#
+# This is free and unencumbered software released into the public domain.
+#
+# Anyone is free to copy, modify, publish, use, compile, sell, or
+# distribute this software, either in source code form or as a compiled
+# binary, for any purpose, commercial or non-commercial, and by any
+# means.
+#
+# In jurisdictions that recognize copyright laws, the author or authors
+# of this software dedicate any and all copyright interest in the
+# software to the public domain. We make this dedication for the benefit
+# of the public at large and to the detriment of our heirs and
+# successors. We intend this dedication to be an overt act of
+# relinquishment in perpetuity of all present and future rights to this
+# software under copyright law.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+# IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+# OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+# ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+# OTHER DEALINGS IN THE SOFTWARE.
+#
+
+import os, csv, operator
+from time import mktime
+
+try:
+    from wt_nvd3_util import multiChart, parsetime
+except ImportError:
+    print >>sys.stderr, "Could not import wt_nvd3_util.py, it should be\
+            in the same directory as %s" % sys.argv[0]
+    sys.exit(-1)
+
+try:
+    from stat_data import no_scale_per_second_list
+except ImportError:
+    print >>sys.stderr, "Could not import stat_data.py, it should be in\
+            the same directory as %s" % sys.argv[0]
+    sys.exit(-1)
+
+# Fixup the names and values in a dictionary read in from a csv file. One
+# field must be "#time" - which is used to calculate the interval.
+# Input is a dictionary, output is a list of dictionaries with a single entry.
+def munge_dict(values_dict, abstime):
+    sorted_values = sorted(values_dict, key=operator.itemgetter('#time'))
+    start_time = parsetime(sorted_values[0]['#time'])
+
+    ret = []
+    for v in sorted_values:
+        if abstime:
+            # Build the time series, milliseconds since the epoch
+            v['#time'] = int(mktime(parsetime(v['#time']).timetuple())) * 1000
+        else:
+            # Build the time series as seconds since the start of the data
+            v['#time'] = (parsetime(v['#time']) - start_time).seconds
+        next_val = {}
+        for title, value in v.items():
+            if title.find('uS') != -1:
+                title = title.replace('uS', 'ms')
+                value = float(value) / 1000
+            if title == 'totalsec':
+                value = 0
+            if title == 'checkpoints' and value == 'N':
+                value = 0
+            elif title.find('time') != -1:
+                title = 'time'
+            elif title.find('latency') == -1 and \
+              title.find('checkpoints') == -1:
+                title = title + ' (thousands)'
+                value = float(value) / 1000
+            next_val[title] = value
+        ret.append(next_val)
+
+    # After building the series, eliminate constants
+    d0 = ret[0]
+    for t0, v0 in d0.items():
+        skip = True
+        for d in ret:
+            v = d[t0]
+            if v != v0:
+                skip = False
+                break
+        if skip:
+            for dicts in ret:
+                del dicts[t0]
+
+    return ret
+
+def addPlotsToChart(chart, graph_data, wtstat_chart = False):
+    # Extract the times - they are the same for all lines.
+    times = []
+    for v in graph_data:
+        times.append(v['time'])
+
+    # Add a line to the graph for each field in the CSV file in alphabetical
+    # order, so the key is sorted.
+    for field in sorted(graph_data[0].keys()):
+        if field == 'time':
+            continue
+        # Split the latency and non-latency measurements onto different scales
+        axis = "1"
+        if not wtstat_chart and field.find('latency') == -1:
+            axis="2"
+        ydata = []
+        for v in graph_data:
+            ydata.append(v[field])
+        chart.add_serie(x=times, y=ydata, name=field, type="line", yaxis=axis)
+
+# Input parameters are a chart populated with WiredTiger statistics and
+# the directory where the wtperf monitor file can be found.
+def addPlotsToStatsChart(chart, dirname, abstime):
+    fname = os.path.join(dirname, 'monitor')
+    try:
+        with open(fname, 'rb') as csvfile:
+            reader = csv.DictReader(csvfile)
+            # Transform the data into something NVD3 can digest
+            graph_data = munge_dict(reader, abstime)
+    except IOError:
+        print >>sys.stderr, "Could not open wtperf monitor file."
+        sys.exit(-1)
+    addPlotsToChart(chart, graph_data, 1)
+
+def main():
+    # Parse the command line
+    import argparse
+
+    parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.')
+    parser.add_argument('--abstime', action='store_true',
+        help='use absolute time on the x axis')
+    parser.add_argument('--output', '-o', metavar='file',
+        default='wtperf_stats.html', help='HTML output file')
+    parser.add_argument('files', metavar='file', nargs='+',
+        help='input monitor file generated by WiredTiger wtperf application')
+    args = parser.parse_args()
+
+    output_file = open(args.output, 'w')
+
+    if len(args.files) != 1:
+        print 'Script currently only supports a single monitor file'
+        exit (1)
+
+    chart_extra = {}
+    # Add in the x axis if the user wants time.
+    if args.abstime:
+        chart_extra['x_axis_format'] = '%H:%M:%S'
+
+    for f in args.files:
+        with open(f, 'rb') as csvfile:
+            reader = csv.DictReader(csvfile)
+            # Transform the data into something NVD3 can digest
+            graph_data = munge_dict(reader, args.abstime)
+
+    chart = multiChart(name='wtperf',
+                      height=450 + 10*len(graph_data[0].keys()),
+                      resize=True,
+                      x_is_date=args.abstime,
+                      assets_directory='http://source.wiredtiger.com/graphs/',
+                      **chart_extra)
+
+    addPlotsToChart(chart, graph_data)
+
+    chart.buildhtml()
+    output_file.write(chart.htmlcontent)
+    output_file.close()
+
+if __name__ == '__main__':
+    main()
+
diff --git a/tools/wtstats.py b/tools/wtstats.py
index 766c49989b6..37b28e6c13a 100644
--- a/tools/wtstats.py
+++ b/tools/wtstats.py
@@ -28,38 +28,35 @@
 
 import fileinput, os, re, shutil, sys, textwrap
 from collections import defaultdict
-from datetime import datetime
 from time import mktime
 from subprocess import call
 
 try:
     from stat_data import no_scale_per_second_list
 except ImportError:
-    print >>sys.stderr, "Could not import stat_data.py, it should be in the same directory as %s" % sys.argv[0]
+    print >>sys.stderr, "Could not import stat_data.py, it should be\
+            in the same directory as %s" % sys.argv[0]
     sys.exit(-1)
 
 try:
-    from nvd3 import lineChart, lineWithFocusChart
+    from wtperf_stats import addPlotsToStatsChart
 except ImportError:
-    print >>sys.stderr, "Could not import nvd3.  Please install it *from source* (other versions may be missing features that we rely on).  Run these commands: git clone https://github.com/areski/python-nvd3.git ; cd python-nvd3 ; sudo python setup.py install"
+    print >>sys.stderr, "Could not import wtperf_stats.py, it should be\
+            in the same directory as %s" % sys.argv[0]
     sys.exit(-1)
 
+try:
+    from wt_nvd3_util import multiChart, parsetime
+except ImportError:
+    print >>sys.stderr, "Could not import wt_nvd3_util.py, it should be\
+            in the same directory as %s" % sys.argv[0]
+    sys.exit(-1)
 
-# Add a multiChart type so we can overlay line graphs
-class multiChart(lineChart):
-    def __init__(self, **kwargs):
-        lineChart.__init__(self, **kwargs)
-
-        # Fix the axes
-        del self.axislist['yAxis']
-        self.create_y_axis('yAxis1', format=kwargs.get('y_axis_format', '.02f'))
-        self.create_y_axis('yAxis2', format=kwargs.get('y_axis_format', '.02f'))
-
-TIMEFMT = "%b %d %H:%M:%S"
-
-thisyear = datetime.today().year
-def parsetime(s):
-    return datetime.strptime(s, TIMEFMT).replace(year=thisyear)
+try:
+    from nvd3 import lineChart, lineWithFocusChart
+except ImportError:
+    print >>sys.stderr, "Could not import nvd3.  Please install it *from source* (other versions may be missing features that we rely on).  Run these commands: git clone https://github.com/areski/python-nvd3.git ; cd python-nvd3 ; sudo python setup.py install"
+    sys.exit(-1)
 
 # Plot a set of entries for a title.
 def munge(title, values):
@@ -93,7 +90,7 @@ def munge(title, values):
 # Parse the command line
 import argparse
 
-parser = argparse.ArgumentParser(description='Create graphs from WiredTIger statistics.')
+parser = argparse.ArgumentParser(description='Create graphs from WiredTiger statistics.')
 parser.add_argument('--abstime', action='store_true',
     help='use absolute time on the x axis')
 parser.add_argument('--focus', action='store_true',
@@ -108,6 +105,8 @@ parser.add_argument('--output', '-o', metavar='file', default='wtstats.html',
 parser.add_argument('--right', '-R', metavar='regexp',
     type=re.compile, action='append',
     help='use the right axis for series with titles matching the specifed regexp')
+parser.add_argument('--wtperf', '-w', action='store_true',
+    help='Plot wtperf statistics on the same graph')
 parser.add_argument('files', metavar='file', nargs='+',
     help='input files generated by WiredTiger statistics logging')
 args = parser.parse_args()
@@ -211,6 +210,9 @@ for title, yaxis, ydata in results:
     chart.add_serie(x=xdata, y=(ydata.get(x, 0) for x in xdata), name=title,
                     type="line", yaxis="2" if yaxis else "1")
 
+if args.wtperf:
+    addPlotsToStatsChart(chart, os.path.dirname(args.files[0]), args.abstime)
+
 chart.buildhtml()
 output_file.write(chart.htmlcontent)
author	Alex Gorrod <alexg@wiredtiger.com>	2014-03-04 16:04:48 +1100
committer	Alex Gorrod <alexg@wiredtiger.com>	2014-03-04 16:04:48 +1100
commit	be98ccec7299820ab9cbfa78581d1400b5f5ec65 (patch)
tree	73f63490811959378dd453de3a3d00d4e1ca334c
parent	006f6b7afec5d29351c6c1bf79268cb0d1c011fc (diff)
parent	4deb7b35e421150a6d92ba3c197eab438b63cef7 (diff)
download	mongo-be98ccec7299820ab9cbfa78581d1400b5f5ec65.tar.gz