Merge branch 'develop' into cursor-reconfigure

Conflicts: src/cursor/cur_metadata.c
author: Keith Bostic <keith@wiredtiger.com> 2014-12-15 09:35:54 -0500
committer: Keith Bostic <keith@wiredtiger.com> 2014-12-15 09:35:54 -0500
commit: 980165614f114dbcf02344ba7209ae77369bcb80 (patch)
tree: 952a89a49aa758ec177ed9ce491524d0c1c79c1f
parent: 4c26d2324bae1d7030b0142d50dbd2ccf11ddeb6 (diff)
parent: 5cf21acf8fd66876e71334cc09deac0a09e8ea91 (diff)
download: mongo-980165614f114dbcf02344ba7209ae77369bcb80.tar.gz
81 files changed, 927 insertions, 745 deletions
diff --git a/bench/wtperf/runners/small-lsm.wtperf b/bench/wtperf/runners/small-lsm.wtperf
index 1b00d18d76b..8c7f65bb8b0 100644
--- a/bench/wtperf/runners/small-lsm.wtperf
+++ b/bench/wtperf/runners/small-lsm.wtperf
@@ -1,6 +1,6 @@
 # wtperf options file: small lsm configuration
 conn_config="cache_size=500MB"
-table_config="lsm=(chunk_size=5MB),type=lsm,os_cache_dirty_max=16MB"
+table_config="lsm=(chunk_size=10MB),type=lsm"
 icount=500000
 report_interval=5
 run_time=120
diff --git a/dist/api_data.py b/dist/api_data.py
index 2f7757dce6b..bf1346c187c 100644
--- a/dist/api_data.py
+++ b/dist/api_data.py
@@ -207,17 +207,26 @@ file_config = format_meta + [
         block compression is done''',
         min='512B', max='512MB'),
     Config('internal_item_max', '0', r'''
-        the largest key stored within an internal node, in bytes.  If
-        non-zero, any key larger than the specified size will be
-        stored as an overflow item (which may require additional I/O
-        to access).  If zero, a default size is chosen that permits at
-        least 8 keys per internal page''',
-        min=0),
+        historic term for internal_key_max''',
+        min=0, undoc=True),
+    Config('internal_key_max', '0', r'''
+        the largest key stored in an internal node, in bytes.  If set, keys
+        larger than the specified size are stored as overflow items (which
+        may require additional I/O to access).  The default and the maximum
+        allowed value are both one-tenth the size of a newly split internal
+        page''',
+        min='0'),
     Config('key_gap', '10', r'''
         the maximum gap between instantiated keys in a Btree leaf page,
         constraining the number of keys processed to instantiate a
         random Btree leaf page key''',
         min='0', undoc=True),
+    Config('leaf_key_max', '0', r'''
+        the largest key stored in a leaf node, in bytes.  If set, keys
+        larger than the specified size are stored as overflow items (which
+        may require additional I/O to access).  The default value is
+        one-tenth the size of a newly split leaf page''',
+        min='0'),
     Config('leaf_page_max', '32KB', r'''
         the maximum page size for leaf nodes, in bytes; the size must
         be a multiple of the allocation size, and is significant for
@@ -226,13 +235,17 @@ file_config = format_meta + [
         data, that is, the limit is applied before any block compression
         is done''',
         min='512B', max='512MB'),
+    Config('leaf_value_max', '0', r'''
+        the largest value stored in a leaf node, in bytes.  If set, values
+        larger than the specified size are stored as overflow items (which
+        may require additional I/O to access). If the size is larger than
+        the maximum leaf page size, the page size is temporarily ignored
+        when large values are written. The default is one-half the size of
+        a newly split leaf page''',
+        min='0'),
     Config('leaf_item_max', '0', r'''
-        the largest key or value stored within a leaf node, in bytes.
-        If non-zero, any key or value larger than the specified size
-        will be stored as an overflow item (which may require additional
-        I/O to access).  If zero, a default size is chosen that permits
-        at least 4 key and value pairs per leaf page''',
-        min=0),
+        historic term for leaf_key_max and leaf_value_max''',
+        min=0, undoc=True),
     Config('memory_page_max', '5MB', r'''
         the maximum size a page can grow to in memory before being
         reconciled to disk.  The specified size will be adjusted to a lower
diff --git a/dist/api_err.py b/dist/api_err.py
index 0c61a41ff28..cb2c8cc588e 100644
--- a/dist/api_err.py
+++ b/dist/api_err.py
@@ -42,7 +42,9 @@ errors = [
     Error('WT_PANIC', -31804,
         'WiredTiger library panic', '''
         This error indicates an underlying problem that requires the
-        application exit and restart.'''),
+        application exit and restart. The application can exit
+        immediately when \c WT_PANIC is returned from a WiredTiger
+        interface, no further WiredTiger calls are required.'''),
     Error('WT_RESTART', -31805,
         'restart the operation (internal)', undoc=True),
 ]
diff --git a/dist/stat_data.py b/dist/stat_data.py
index bd628e7418a..d1d3dd7e5ea 100644
--- a/dist/stat_data.py
+++ b/dist/stat_data.py
@@ -348,14 +348,16 @@ dsrc_stats = [
     BtreeStat('btree_fixed_len', 'fixed-record size', 'no_aggregate,no_scale'),
     BtreeStat('btree_maximum_depth',
         'maximum tree depth', 'max_aggregate,no_scale'),
-    BtreeStat('btree_maxintlitem',
-        'maximum internal page item size', 'no_aggregate,no_scale'),
+    BtreeStat('btree_maxintlkey',
+        'maximum internal page key size', 'no_aggregate,no_scale'),
     BtreeStat('btree_maxintlpage',
         'maximum internal page size', 'no_aggregate,no_scale'),
-    BtreeStat('btree_maxleafitem',
-        'maximum leaf page item size', 'no_aggregate,no_scale'),
+    BtreeStat('btree_maxleafkey',
+        'maximum leaf page key size', 'no_aggregate,no_scale'),
     BtreeStat('btree_maxleafpage',
         'maximum leaf page size', 'no_aggregate,no_scale'),
+    BtreeStat('btree_maxleafvalue',
+        'maximum leaf page value size', 'no_aggregate,no_scale'),
     BtreeStat('btree_overflow', 'overflow pages', 'no_scale'),
     BtreeStat('btree_row_internal', 'row-store internal pages', 'no_scale'),
     BtreeStat('btree_row_leaf', 'row-store leaf pages', 'no_scale'),
diff --git a/examples/c/Makefile.am b/examples/c/Makefile.am
index 17beba4a470..382c5912fef 100644
--- a/examples/c/Makefile.am
+++ b/examples/c/Makefile.am
@@ -13,7 +13,6 @@ noinst_PROGRAMS = \
 	ex_data_source \
 	ex_extending \
 	ex_extractor \
-	ex_file \
 	ex_hello \
 	ex_log \
 	ex_pack \
diff --git a/examples/c/ex_all.c b/examples/c/ex_all.c
index db418deed9d..cf5fb363c2f 100644
--- a/examples/c/ex_all.c
+++ b/examples/c/ex_all.c
@@ -524,6 +524,20 @@ session_ops(WT_SESSION *session)
 	/*! [Create a table with columns] */
 	ret = session->drop(session, "table:mytable", NULL);
 
+	/*! [Create a table and configure the page size] */
+	ret = session->create(session,
+	    "table:mytable", "key_format=S,value_format=S"
+	    "internal_page_max=16KB,leaf_page_max=1MB,leaf_value_max=64KB");
+	/*! [Create a table and configure the page size] */
+	ret = session->drop(session, "table:mytable", NULL);
+
+	/*! [Create a table and configure a large leaf value max] */
+	ret = session->create(session,
+	    "table:mytable", "key_format=S,value_format=S"
+	    "leaf_page_max=16KB,leaf_value_max=256KB");
+	/*! [Create a table and configure a large leaf value max] */
+	ret = session->drop(session, "table:mytable", NULL);
+
 	/*
 	 * This example code gets run, and the compression libraries might not
 	 * be loaded, causing the create to fail.  The documentation requires
diff --git a/examples/c/ex_backup.c b/examples/c/ex_backup.c
index fb5c5b9d299..ea572c8810b 100644
--- a/examples/c/ex_backup.c
+++ b/examples/c/ex_backup.c
@@ -125,7 +125,7 @@ compare_backups(int i)
  * That way we can compare the full and incremental each time through.
  */
 static int
-setup_directories()
+setup_directories(void)
 {
 	int i, ret;
 	char buf[1024];
diff --git a/examples/c/ex_file.c b/examples/c/ex_file.c
deleted file mode 100644
index 4170d1b099d..00000000000
--- a/examples/c/ex_file.c
+++ /dev/null
@@ -1,72 +0,0 @@
-/*-
- * Public Domain 2008-2014 WiredTiger, Inc.
- *
- * This is free and unencumbered software released into the public domain.
- *
- * Anyone is free to copy, modify, publish, use, compile, sell, or
- * distribute this software, either in source code form or as a compiled
- * binary, for any purpose, commercial or non-commercial, and by any
- * means.
- *
- * In jurisdictions that recognize copyright laws, the author or authors
- * of this software dedicate any and all copyright interest in the
- * software to the public domain. We make this dedication for the benefit
- * of the public at large and to the detriment of our heirs and
- * successors. We intend this dedication to be an overt act of
- * relinquishment in perpetuity of all present and future rights to this
- * software under copyright law.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * ex_file.c
- *	This is an example demonstrating how to configure an individual file.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <wiredtiger.h>
-
-static const char *home;
-
-int
-main(void)
-{
-	WT_CONNECTION *conn;
-	WT_SESSION *session;
-	int ret;
-
-	/*
-	 * Create a clean test directory for this run of the test program if the
-	 * environment variable isn't already set (as is done by make check).
-	 */
-	if (getenv("WIREDTIGER_HOME") == NULL) {
-		home = "WT_HOME";
-		ret = system("rm -rf WT_HOME && mkdir WT_HOME");
-	} else
-		home = NULL;
-
-	if ((ret = wiredtiger_open(home, NULL, "create", &conn)) != 0 ||
-	    (ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
-		fprintf(stderr, "Error connecting to %s: %s\n",
-		    home, wiredtiger_strerror(ret));
-		return (ret);
-	}
-	/* Note: further error checking omitted for clarity. */
-
-	/*! [file create] */
-	ret = session->create(session, "file:example",
-	    "key_format=u,"
-	    "internal_page_max=32KB,internal_item_max=1KB,"
-	    "leaf_page_max=1MB,leaf_item_max=32KB");
-	/*! [file create] */
-
-	return (conn->close(conn, NULL) == 0 ? ret : EXIT_FAILURE);
-}
diff --git a/src/async/async_api.c b/src/async/async_api.c
index 3cb78e80b09..6aeb404bccd 100644
--- a/src/async/async_api.c
+++ b/src/async/async_api.c
@@ -54,7 +54,7 @@ __async_get_format(WT_CONNECTION_IMPL *conn, const char *uri,
 	WT_RET(
 	    __wt_open_internal_session(conn, "async-cursor", 1, 1, &session));
 	__wt_spin_lock(session, &async->ops_lock);
-	WT_ERR(__wt_calloc_def(session, 1, &af));
+	WT_ERR(__wt_calloc_one(session, &af));
 	WT_ERR(__wt_strdup(session, uri, &af->uri));
 	WT_ERR(__wt_strdup(session, config, &af->config));
 	af->uri_hash = uri_hash;
@@ -232,7 +232,7 @@ __async_start(WT_SESSION_IMPL *session)
 	/*
 	 * Async is on, allocate the WT_ASYNC structure and initialize the ops.
 	 */
-	WT_RET(__wt_calloc(session, 1, sizeof(WT_ASYNC), &conn->async));
+	WT_RET(__wt_calloc_one(session, &conn->async));
 	async = conn->async;
 	STAILQ_INIT(&async->formatqh);
 	WT_RET(__wt_spin_init(session, &async->ops_lock, "ops"));
diff --git a/src/async/async_worker.c b/src/async/async_worker.c
index 7a88ac9dd6e..ecf052fc3bf 100644
--- a/src/async/async_worker.c
+++ b/src/async/async_worker.c
@@ -150,7 +150,7 @@ __async_worker_cursor(WT_SESSION_IMPL *session, WT_ASYNC_OP_IMPL *op,
 	 * We didn't find one in our cache.  Open one and cache it.
 	 * Insert it at the head expecting LRU usage.
 	 */
-	WT_RET(__wt_calloc_def(session, 1, &ac));
+	WT_RET(__wt_calloc_one(session, &ac));
 	WT_ERR(wt_session->open_cursor(
 	    wt_session, op->format->uri, NULL, op->format->config, &c));
 	ac->cfg_hash = op->format->cfg_hash;
diff --git a/src/block/block_mgr.c b/src/block/block_mgr.c
index 4f7f2898de5..a9b3b07904d 100644
--- a/src/block/block_mgr.c
+++ b/src/block/block_mgr.c
@@ -419,7 +419,7 @@ __wt_block_manager_open(WT_SESSION_IMPL *session,
 
 	*bmp = NULL;
 
-	WT_RET(__wt_calloc_def(session, 1, &bm));
+	WT_RET(__wt_calloc_one(session, &bm));
 	__bm_method_set(bm, 0);
 
 	WT_ERR(__wt_block_open(session, filename, cfg,
diff --git a/src/block/block_open.c b/src/block/block_open.c
index 7b68c59c766..0abe9cffc5f 100644
--- a/src/block/block_open.c
+++ b/src/block/block_open.c
@@ -128,7 +128,7 @@ __wt_block_open(WT_SESSION_IMPL *session,
 		}
 
 	/* Basic structure allocation, initialization. */
-	WT_ERR(__wt_calloc_def(session, 1, &block));
+	WT_ERR(__wt_calloc_one(session, &block));
 	block->ref = 1;
 	TAILQ_INSERT_HEAD(&conn->blockqh, block, q);
 
diff --git a/src/block/block_session.c b/src/block/block_session.c
index fa56b72f49b..90fe0af562a 100644
--- a/src/block/block_session.c
+++ b/src/block/block_session.c
@@ -152,7 +152,7 @@ __block_ext_discard(WT_SESSION_IMPL *session, u_int max)
 static int
 __block_size_alloc(WT_SESSION_IMPL *session, WT_SIZE **szp)
 {
-	return (__wt_calloc(session, 1, sizeof(WT_SIZE), szp));
+	return (__wt_calloc_one(session, szp));
 }
 
 /*
diff --git a/src/bloom/bloom.c b/src/bloom/bloom.c
index b8fecfe0efd..5f7a8f47c21 100644
--- a/src/bloom/bloom.c
+++ b/src/bloom/bloom.c
@@ -28,7 +28,7 @@ __bloom_init(WT_SESSION_IMPL *session,
 
 	*bloomp = NULL;
 
-	WT_RET(__wt_calloc_def(session, 1, &bloom));
+	WT_RET(__wt_calloc_one(session, &bloom));
 
 	WT_ERR(__wt_strdup(session, uri, &bloom->uri));
 	len = strlen(WT_BLOOM_TABLE_CONFIG) + 2;
diff --git a/src/btree/bt_delete.c b/src/btree/bt_delete.c
index 2fc1b0d5460..a58ed5d66e9 100644
--- a/src/btree/bt_delete.c
+++ b/src/btree/bt_delete.c
@@ -117,7 +117,7 @@ __wt_delete_page(WT_SESSION_IMPL *session, WT_REF *ref, int *skipp)
 	 * Record the change in the transaction structure and set the change's
 	 * transaction ID.
 	 */
-	WT_ERR(__wt_calloc_def(session, 1, &ref->page_del));
+	WT_ERR(__wt_calloc_one(session, &ref->page_del));
 	ref->page_del->txnid = session->txn.id;
 
 	WT_ERR(__wt_txn_modify_ref(session, ref));
@@ -306,7 +306,7 @@ __wt_delete_page_instantiate(WT_SESSION_IMPL *session, WT_REF *ref)
 	 * deleted items.
 	 */
 	for (i = 0; i < page->pg_row_entries; ++i) {
-		WT_ERR(__wt_calloc_def(session, 1, &upd));
+		WT_ERR(__wt_calloc_one(session, &upd));
 		WT_UPDATE_DELETED_SET(upd);
 
 		if (page_del == NULL)
diff --git a/src/btree/bt_handle.c b/src/btree/bt_handle.c
index fe2623b055b..10ea6cd019c 100644
--- a/src/btree/bt_handle.c
+++ b/src/btree/bt_handle.c
@@ -13,9 +13,6 @@ static int __btree_page_sizes(WT_SESSION_IMPL *);
 static int __btree_preload(WT_SESSION_IMPL *);
 static int __btree_tree_open_empty(WT_SESSION_IMPL *, int, int);
 
-static int pse1(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t);
-static int pse2(WT_SESSION_IMPL *, const char *, uint32_t, uint32_t, int);
-
 /*
  * __wt_btree_open --
  *	Open a Btree.
@@ -623,153 +620,98 @@ __btree_page_sizes(WT_SESSION_IMPL *session)
 	btree = S2BT(session);
 	cfg = btree->dhandle->cfg;
 
+	/*
+	 * Get the allocation size.  Allocation sizes must be a power-of-two,
+	 * nothing else makes sense.
+	 */
 	WT_RET(__wt_direct_io_size_check(
 	    session, cfg, "allocation_size", &btree->allocsize));
+	if (!__wt_ispo2(btree->allocsize))
+		WT_RET_MSG(session,
+		    EINVAL, "the allocation size must be a power of two");
+
+	/*
+	 * Get the internal/leaf page sizes.
+	 * All page sizes must be in units of the allocation size.
+	 */
 	WT_RET(__wt_direct_io_size_check(
 	    session, cfg, "internal_page_max", &btree->maxintlpage));
-	WT_RET(__wt_config_gets(session, cfg, "internal_item_max", &cval));
-	btree->maxintlitem = (uint32_t)cval.val;
 	WT_RET(__wt_direct_io_size_check(
 	    session, cfg, "leaf_page_max", &btree->maxleafpage));
-	WT_RET(__wt_config_gets(session, cfg, "leaf_item_max", &cval));
-	btree->maxleafitem = (uint32_t)cval.val;
-
-	WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval));
-	btree->split_pct = (int)cval.val;
+	if (btree->maxintlpage < btree->allocsize ||
+	    btree->maxintlpage % btree->allocsize != 0 ||
+	    btree->maxleafpage < btree->allocsize ||
+	    btree->maxleafpage % btree->allocsize != 0)
+		WT_RET_MSG(session, EINVAL,
+		    "page sizes must be a multiple of the page allocation "
+		    "size (%" PRIu32 "B)", btree->allocsize);
 
 	/*
 	 * When a page is forced to split, we want at least 50 entries on its
 	 * parent.
-	 */
-	WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
-	btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage);
-
-	/*
+	 *
 	 * Don't let pages grow to more than half the cache size.  Otherwise,
 	 * with very small caches, we can end up in a situation where nothing
 	 * can be evicted.  Take care getting the cache size: with a shared
 	 * cache, it may not have been set.
 	 */
+	WT_RET(__wt_config_gets(session, cfg, "memory_page_max", &cval));
+	btree->maxmempage = WT_MAX((uint64_t)cval.val, 50 * btree->maxleafpage);
 	cache_size = S2C(session)->cache_size;
 	if (cache_size > 0)
 		btree->maxmempage = WT_MIN(btree->maxmempage, cache_size / 2);
 
-	/* Allocation sizes must be a power-of-two, nothing else makes sense. */
-	if (!__wt_ispo2(btree->allocsize))
-		WT_RET_MSG(session,
-		    EINVAL, "the allocation size must be a power of two");
-
-	/* All page sizes must be in units of the allocation size. */
-	if (btree->maxintlpage < btree->allocsize ||
-	    btree->maxintlpage % btree->allocsize != 0 ||
-	    btree->maxleafpage < btree->allocsize ||
-	    btree->maxleafpage % btree->allocsize != 0)
-		WT_RET_MSG(session, EINVAL,
-		    "page sizes must be a multiple of the page allocation "
-		    "size (%" PRIu32 "B)", btree->allocsize);
-
 	/*
-	 * Set the split percentage: reconciliation splits to a smaller-than-
-	 * maximum page size so we don't split every time a new entry is added.
+	 * Get the split percentage (reconciliation splits pages into smaller
+	 * than the maximum page size chunks so we don't split every time a
+	 * new entry is added). Determine how large newly split pages will be.
 	 */
+	WT_RET(__wt_config_gets(session, cfg, "split_pct", &cval));
+	btree->split_pct = (int)cval.val;
 	intl_split_size = __wt_split_page_size(btree, btree->maxintlpage);
 	leaf_split_size = __wt_split_page_size(btree, btree->maxleafpage);
 
 	/*
-	 * Default values for internal and leaf page items: make sure at least
-	 * 8 items fit on split pages.
-	 */
-	if (btree->maxintlitem == 0)
-		    btree->maxintlitem = intl_split_size / 8;
-	if (btree->maxleafitem == 0)
-		    btree->maxleafitem = leaf_split_size / 8;
-
-	/*
-	 * If raw compression is configured, the application owns page layout,
-	 * it's not our problem.   Hopefully the application chose well.
+	 * Get the maximum internal/leaf page key/value sizes.
+	 *
+	 * In historic versions of WiredTiger, the maximum internal/leaf page
+	 * key/value sizes were set by the internal_item_max and leaf_item_max
+	 * configuration strings. Look for those strings if we don't find the
+	 * newer ones.
 	 */
-	if (btree->compressor != NULL &&
-	    btree->compressor->compress_raw != NULL)
-		return (0);
-
-	/* Check we can fit at least 2 items on a page. */
-	if (btree->maxintlitem > btree->maxintlpage / 2)
-		return (pse1(session, "internal",
-		    btree->maxintlpage, btree->maxintlitem));
-	if (btree->maxleafitem > btree->maxleafpage / 2)
-		return (pse1(session, "leaf",
-		    btree->maxleafpage, btree->maxleafitem));
+	WT_RET(__wt_config_gets(session, cfg, "internal_key_max", &cval));
+	btree->maxintlkey = (uint32_t)cval.val;
+	if (btree->maxintlkey == 0) {
+		WT_RET(
+		    __wt_config_gets(session, cfg, "internal_item_max", &cval));
+		btree->maxintlkey = (uint32_t)cval.val;
+	}
+	WT_RET(__wt_config_gets(session, cfg, "leaf_key_max", &cval));
+	btree->maxleafkey = (uint32_t)cval.val;
+	WT_RET(__wt_config_gets(session, cfg, "leaf_value_max", &cval));
+	btree->maxleafvalue = (uint32_t)cval.val;
+	if (btree->maxleafkey == 0 && btree->maxleafvalue == 0) {
+		WT_RET(__wt_config_gets(session, cfg, "leaf_item_max", &cval));
+		btree->maxleafkey = (uint32_t)cval.val;
+		btree->maxleafvalue = (uint32_t)cval.val;
+	}
 
 	/*
-	 * Take into account the size of a split page:
+	 * Default/maximum for internal and leaf page keys: split-page / 10.
+	 * Default for leaf page values: split-page / 2.
 	 *
-	 * Make it a separate error message so it's clear what went wrong.
+	 * It's difficult for applications to configure this in any exact way as
+	 * they have to duplicate our calculation of how many keys must fit on a
+	 * page, and given a split-percentage and page header, that isn't easy
+	 * to do. If the maximum internal key value is too large for the page,
+	 * reset it to the default.
 	 */
-	if (btree->maxintlitem > intl_split_size / 2)
-		return (pse2(session, "internal",
-		    btree->maxintlpage, btree->maxintlitem, btree->split_pct));
-	if (btree->maxleafitem > leaf_split_size / 2)
-		return (pse2(session, "leaf",
-		    btree->maxleafpage, btree->maxleafitem, btree->split_pct));
+	if (btree->maxintlkey == 0 || btree->maxintlkey > intl_split_size / 10)
+		    btree->maxintlkey = intl_split_size / 10;
+	if (btree->maxleafkey == 0)
+		    btree->maxleafkey = leaf_split_size / 10;
+	if (btree->maxleafvalue == 0)
+		    btree->maxleafvalue = leaf_split_size / 2;
 
 	return (0);
 }
-
-/*
- * __wt_split_page_size --
- *	Split page size calculation: we don't want to repeatedly split every
- * time a new entry is added, so we split to a smaller-than-maximum page size.
- */
-uint32_t
-__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize)
-{
-	uintmax_t a;
-	uint32_t split_size;
-
-	/*
-	 * Ideally, the split page size is some percentage of the maximum page
-	 * size rounded to an allocation unit (round to an allocation unit so
-	 * we don't waste space when we write).
-	 */
-	a = maxpagesize;			/* Don't overflow. */
-	split_size = (uint32_t)
-	    WT_ALIGN((a * (u_int)btree->split_pct) / 100, btree->allocsize);
-
-	/*
-	 * If the result of that calculation is the same as the allocation unit
-	 * (that happens if the maximum size is the same size as an allocation
-	 * unit, use a percentage of the maximum page size).
-	 */
-	if (split_size == btree->allocsize)
-		split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100);
-
-	return (split_size);
-}
-
-/*
- * pse1 --
- *	Page size error message 1.
- */
-static int
-pse1(WT_SESSION_IMPL *session, const char *type, uint32_t max, uint32_t ovfl)
-{
-	WT_RET_MSG(session, EINVAL,
-	    "%s page size (%" PRIu32 "B) too small for the maximum item size "
-	    "(%" PRIu32 "B); the page must be able to hold at least 2 items",
-	    type, max, ovfl);
-}
-
-/*
- * pse2 --
- *	Page size error message 2.
- */
-static int
-pse2(WT_SESSION_IMPL *session,
-    const char *type, uint32_t max, uint32_t ovfl, int pct)
-{
-	WT_RET_MSG(session, EINVAL,
-	    "%s page size (%" PRIu32 "B) too small for the maximum item size "
-	    "(%" PRIu32 "B), because of the split percentage (%d %%); a split "
-	    "page must be able to hold at least 2 items",
-	    type, max, ovfl, pct);
-}
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index b2767e74bac..799f0cca3ee 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -227,8 +227,8 @@ __wt_page_alloc(WT_SESSION_IMPL *session, uint8_t type,
 		WT_INTL_INDEX_SET(page, pindex);
 		if (alloc_refs)
 			for (i = 0; i < pindex->entries; ++i) {
-				WT_ERR(__wt_calloc_def(
-				    session, 1, &pindex->index[i]));
+				WT_ERR(__wt_calloc_one(
+				    session, &pindex->index[i]));
 				size += sizeof(WT_REF);
 			}
 		if (0) {
diff --git a/src/btree/bt_slvg.c b/src/btree/bt_slvg.c
index 6e70c9ea2b6..96b63f3f8f0 100644
--- a/src/btree/bt_slvg.c
+++ b/src/btree/bt_slvg.c
@@ -491,8 +491,8 @@ __slvg_trk_init(WT_SESSION_IMPL *session,
 	WT_DECL_RET;
 	WT_TRACK *trk;
 
-	WT_RET(__wt_calloc_def(session, 1, &trk));
-	WT_ERR(__wt_calloc_def(session, 1, &trk->shared));
+	WT_RET(__wt_calloc_one(session, &trk));
+	WT_ERR(__wt_calloc_one(session, &trk->shared));
 	trk->shared->ref = 1;
 
 	trk->ss = ss;
@@ -519,7 +519,7 @@ __slvg_trk_split(WT_SESSION_IMPL *session, WT_TRACK *orig, WT_TRACK **newp)
 {
 	WT_TRACK *trk;
 
-	WT_RET(__wt_calloc_def(session, 1, &trk));
+	WT_RET(__wt_calloc_one(session, &trk));
 
 	trk->shared = orig->shared;
 	trk->ss = orig->ss;
@@ -1181,7 +1181,7 @@ __slvg_col_build_internal(
 		ref->home = page;
 		ref->page = NULL;
 
-		WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr));
+		WT_ERR(__wt_calloc_one(session, &addr));
 		WT_ERR(__wt_strndup(
 		    session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
 		addr->size = trk->trk_addr_size;
@@ -1826,7 +1826,7 @@ __slvg_row_build_internal(
 		ref->home = page;
 		ref->page = NULL;
 
-		WT_ERR(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr));
+		WT_ERR(__wt_calloc_one(session, &addr));
 		WT_ERR(__wt_strndup(
 		    session, trk->trk_addr, trk->trk_addr_size, &addr->addr));
 		addr->size = trk->trk_addr_size;
diff --git a/src/btree/bt_split.c b/src/btree/bt_split.c
index e25f0b73e01..c6b97733b69 100644
--- a/src/btree/bt_split.c
+++ b/src/btree/bt_split.c
@@ -310,7 +310,7 @@ __split_ref_instantiate(WT_SESSION_IMPL *session,
 		    sizeof(WT_ADDR) + addr->size);
 	else {
 		__wt_cell_unpack((WT_CELL *)ref->addr, &unpack);
-		WT_RET(__wt_calloc_def(session, 1, &addr));
+		WT_RET(__wt_calloc_one(session, &addr));
 		if ((ret = __wt_strndup(
 		    session, unpack.data, unpack.size, &addr->addr)) != 0) {
 			__wt_free(session, addr);
@@ -444,7 +444,7 @@ __split_deepen(WT_SESSION_IMPL *session, WT_PAGE *parent)
 	    pindex->index[pindex->entries - 1];
 	for (alloc_refp = alloc_index->index + SPLIT_CORRECT_1,
 	    i = 0; i < children; ++alloc_refp, ++i) {
-		WT_ERR(__wt_calloc_def(session, 1, alloc_refp));
+		WT_ERR(__wt_calloc_one(session, alloc_refp));
 		WT_MEMSIZE_ADD(parent_incr, sizeof(WT_REF));
 	}
 
@@ -747,7 +747,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
 
 	/* In some cases, the underlying WT_REF has not yet been allocated. */
 	if (*refp == NULL) {
-		WT_RET(__wt_calloc_def(session, 1, refp));
+		WT_RET(__wt_calloc_one(session, refp));
 		WT_MEMSIZE_ADD(incr, sizeof(WT_REF));
 	}
 	ref = *refp;
@@ -768,7 +768,7 @@ __wt_multi_to_ref(WT_SESSION_IMPL *session,
 		 * would have to avoid freeing the memory, and it's not worth
 		 * the confusion.
 		 */
-		WT_RET(__wt_calloc_def(session, 1, &addr));
+		WT_RET(__wt_calloc_one(session, &addr));
 		WT_MEMSIZE_ADD(incr, sizeof(WT_ADDR));
 		ref->addr = addr;
 		addr->size = multi->addr.size;
@@ -1081,7 +1081,7 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
 	 *
 	 * The new reference is visible to readers once the split completes.
 	 */
-	WT_ERR(__wt_calloc_def(session, 1, &split_ref[0]));
+	WT_ERR(__wt_calloc_one(session, &split_ref[0]));
 	child = split_ref[0];
 	*child = *ref;
 	child->state = WT_REF_MEM;
@@ -1112,12 +1112,12 @@ __wt_split_insert(WT_SESSION_IMPL *session, WT_REF *ref, int *splitp)
 	 * The second page in the split is a new WT_REF/page pair.
 	 */
 	WT_ERR(__wt_page_alloc(session, WT_PAGE_ROW_LEAF, 0, 0, 0, &right));
-	WT_ERR(__wt_calloc_def(session, 1, &right->pg_row_ins));
-	WT_ERR(__wt_calloc_def(session, 1, &right->pg_row_ins[0]));
+	WT_ERR(__wt_calloc_one(session, &right->pg_row_ins));
+	WT_ERR(__wt_calloc_one(session, &right->pg_row_ins[0]));
 	WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD));
 	WT_MEMSIZE_ADD(right_incr, sizeof(WT_INSERT_HEAD *));
 
-	WT_ERR(__wt_calloc_def(session, 1, &split_ref[1]));
+	WT_ERR(__wt_calloc_one(session, &split_ref[1]));
 	child = split_ref[1];
 	child->page = right;
 	child->state = WT_REF_MEM;
diff --git a/src/btree/bt_stat.c b/src/btree/bt_stat.c
index 3da0bcf346c..c08e9d9218b 100644
--- a/src/btree/bt_stat.c
+++ b/src/btree/bt_stat.c
@@ -32,10 +32,11 @@ __wt_btree_stat_init(WT_SESSION_IMPL *session, WT_CURSOR_STAT *cst)
 
 	WT_STAT_SET(stats, btree_fixed_len, btree->bitcnt);
 	WT_STAT_SET(stats, btree_maximum_depth, btree->maximum_depth);
-	WT_STAT_SET(stats, btree_maxintlitem, btree->maxintlitem);
 	WT_STAT_SET(stats, btree_maxintlpage, btree->maxintlpage);
-	WT_STAT_SET(stats, btree_maxleafitem, btree->maxleafitem);
+	WT_STAT_SET(stats, btree_maxintlkey, btree->maxintlkey);
 	WT_STAT_SET(stats, btree_maxleafpage, btree->maxleafpage);
+	WT_STAT_SET(stats, btree_maxleafkey, btree->maxleafkey);
+	WT_STAT_SET(stats, btree_maxleafvalue, btree->maxleafvalue);
 
 	/* Everything else is really, really expensive. */
 	if (!F_ISSET(cst, WT_CONN_STAT_ALL))
diff --git a/src/btree/row_modify.c b/src/btree/row_modify.c
index e0036d14cbb..e7fb75dc8cb 100644
--- a/src/btree/row_modify.c
+++ b/src/btree/row_modify.c
@@ -19,7 +19,7 @@ __wt_page_modify_alloc(WT_SESSION_IMPL *session, WT_PAGE *page)
 
 	conn = S2C(session);
 
-	WT_RET(__wt_calloc_def(session, 1, &modify));
+	WT_RET(__wt_calloc_one(session, &modify));
 
 	/*
 	 * Select a spinlock for the page; let the barrier immediately below
diff --git a/src/config/config_api.c b/src/config/config_api.c
index 42f4c117b81..0c920af0d0e 100644
--- a/src/config/config_api.c
+++ b/src/config/config_api.c
@@ -84,7 +84,7 @@ wiredtiger_config_parser_open(WT_SESSION *wt_session,
 	*config_parserp = NULL;
 	session = (WT_SESSION_IMPL *)wt_session;
 
-	WT_RET(__wt_calloc_def(session, 1, &config_parser));
+	WT_RET(__wt_calloc_one(session, &config_parser));
 	config_parser->iface = stds;
 	config_parser->session = session;
 
diff --git a/src/config/config_check.c b/src/config/config_check.c
index c6fd6bbd75b..18300da8282 100644
--- a/src/config/config_check.c
+++ b/src/config/config_check.c
@@ -122,7 +122,7 @@ __wt_configure_method(WT_SESSION_IMPL *session,
 	 * The new base value is the previous base value, a separator and the
 	 * new configuration string.
 	 */
-	WT_ERR(__wt_calloc_def(session, 1, &entry));
+	WT_ERR(__wt_calloc_one(session, &entry));
 	entry->method = (*epp)->method;
 	WT_ERR(__wt_calloc_def(session,
 	    strlen((*epp)->base) + strlen(",") + strlen(config) + 1, &p));
diff --git a/src/config/config_def.c b/src/config/config_def.c
index 23f7b27338f..750d9843279 100644
--- a/src/config/config_def.c
+++ b/src/config/config_def.c
@@ -138,12 +138,15 @@ static const WT_CONFIG_CHECK confchk_file_meta[] = {
 	{ "huffman_value", "string", NULL, NULL },
 	{ "id", "string", NULL, NULL },
 	{ "internal_item_max", "int", "min=0", NULL },
+	{ "internal_key_max", "int", "min=0", NULL },
 	{ "internal_key_truncate", "boolean", NULL, NULL },
 	{ "internal_page_max", "int", "min=512B,max=512MB", NULL },
 	{ "key_format", "format", NULL, NULL },
 	{ "key_gap", "int", "min=0", NULL },
 	{ "leaf_item_max", "int", "min=0", NULL },
+	{ "leaf_key_max", "int", "min=0", NULL },
 	{ "leaf_page_max", "int", "min=512B,max=512MB", NULL },
+	{ "leaf_value_max", "int", "min=0", NULL },
 	{ "memory_page_max", "int", "min=512B,max=10TB", NULL },
 	{ "os_cache_dirty_max", "int", "min=0", NULL },
 	{ "os_cache_max", "int", "min=0", NULL },
@@ -227,12 +230,15 @@ static const WT_CONFIG_CHECK confchk_session_create[] = {
 	{ "huffman_value", "string", NULL, NULL },
 	{ "immutable", "boolean", NULL, NULL },
 	{ "internal_item_max", "int", "min=0", NULL },
+	{ "internal_key_max", "int", "min=0", NULL },
 	{ "internal_key_truncate", "boolean", NULL, NULL },
 	{ "internal_page_max", "int", "min=512B,max=512MB", NULL },
 	{ "key_format", "format", NULL, NULL },
 	{ "key_gap", "int", "min=0", NULL },
 	{ "leaf_item_max", "int", "min=0", NULL },
+	{ "leaf_key_max", "int", "min=0", NULL },
 	{ "leaf_page_max", "int", "min=512B,max=512MB", NULL },
+	{ "leaf_value_max", "int", "min=0", NULL },
 	{ "lsm", "category", NULL, confchk_lsm_subconfigs },
 	{ "memory_page_max", "int", "min=512B,max=10TB", NULL },
 	{ "os_cache_dirty_max", "int", "min=0", NULL },
@@ -567,11 +573,12 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "block_compressor=,cache_resident=0,checkpoint=,checkpoint_lsn=,"
 	  "checksum=uncompressed,collator=,columns=,dictionary=0,"
 	  "format=btree,huffman_key=,huffman_value=,id=,internal_item_max=0"
-	  ",internal_key_truncate=,internal_page_max=4KB,key_format=u,"
-	  "key_gap=10,leaf_item_max=0,leaf_page_max=32KB,"
-	  "memory_page_max=5MB,os_cache_dirty_max=0,os_cache_max=0,"
-	  "prefix_compression=0,prefix_compression_min=4,split_pct=75,"
-	  "value_format=u,version=(major=0,minor=0)",
+	  ",internal_key_max=0,internal_key_truncate=,internal_page_max=4KB"
+	  ",key_format=u,key_gap=10,leaf_item_max=0,leaf_key_max=0,"
+	  "leaf_page_max=32KB,leaf_value_max=0,memory_page_max=5MB,"
+	  "os_cache_dirty_max=0,os_cache_max=0,prefix_compression=0,"
+	  "prefix_compression_min=4,split_pct=75,value_format=u,"
+	  "version=(major=0,minor=0)",
 	  confchk_file_meta
 	},
 	{ "index.meta",
@@ -604,8 +611,9 @@ static const WT_CONFIG_ENTRY config_entries[] = {
 	  "block_compressor=,cache_resident=0,checksum=uncompressed,"
 	  "colgroups=,collator=,columns=,dictionary=0,exclusive=0,"
 	  "extractor=,format=btree,huffman_key=,huffman_value=,immutable=0,"
-	  "internal_item_max=0,internal_key_truncate=,internal_page_max=4KB"
-	  ",key_format=u,key_gap=10,leaf_item_max=0,leaf_page_max=32KB,"
+	  "internal_item_max=0,internal_key_max=0,internal_key_truncate=,"
+	  "internal_page_max=4KB,key_format=u,key_gap=10,leaf_item_max=0,"
+	  "leaf_key_max=0,leaf_page_max=32KB,leaf_value_max=0,"
 	  "lsm=(auto_throttle=,bloom=,bloom_bit_count=16,bloom_config=,"
 	  "bloom_hash_count=8,bloom_oldest=0,chunk_max=5GB,chunk_size=10MB,"
 	  "merge_max=15,merge_min=0),memory_page_max=5MB,"
diff --git a/src/conn/conn_api.c b/src/conn/conn_api.c
index 8d104729733..551c3037f7b 100644
--- a/src/conn/conn_api.c
+++ b/src/conn/conn_api.c
@@ -292,7 +292,7 @@ __conn_add_collator(WT_CONNECTION *wt_conn,
 		WT_ERR_MSG(session, EINVAL,
 		    "invalid name for a collator: %s", name);
 
-	WT_ERR(__wt_calloc_def(session, 1, &ncoll));
+	WT_ERR(__wt_calloc_one(session, &ncoll));
 	WT_ERR(__wt_strdup(session, name, &ncoll->name));
 	ncoll->collator = collator;
 
@@ -363,7 +363,7 @@ __conn_add_compressor(WT_CONNECTION *wt_conn,
 		WT_ERR_MSG(session, EINVAL,
 		    "invalid name for a compressor: %s", name);
 
-	WT_ERR(__wt_calloc_def(session, 1, &ncomp));
+	WT_ERR(__wt_calloc_one(session, &ncomp));
 	WT_ERR(__wt_strdup(session, name, &ncomp->name));
 	ncomp->compressor = compressor;
 
@@ -428,7 +428,7 @@ __conn_add_data_source(WT_CONNECTION *wt_conn,
 	CONNECTION_API_CALL(conn, session, add_data_source, config, cfg);
 	WT_UNUSED(cfg);
 
-	WT_ERR(__wt_calloc_def(session, 1, &ndsrc));
+	WT_ERR(__wt_calloc_one(session, &ndsrc));
 	WT_ERR(__wt_strdup(session, prefix, &ndsrc->prefix));
 	ndsrc->dsrc = dsrc;
 
@@ -497,7 +497,7 @@ __conn_add_extractor(WT_CONNECTION *wt_conn,
 		WT_ERR_MSG(session, EINVAL,
 		    "invalid name for an extractor: %s", name);
 
-	WT_ERR(__wt_calloc_def(session, 1, &nextractor));
+	WT_ERR(__wt_calloc_one(session, &nextractor));
 	WT_ERR(__wt_strdup(session, name, &nextractor->name));
 	nextractor->extractor = extractor;
 
@@ -1490,7 +1490,7 @@ wiredtiger_open(const char *home, WT_EVENT_HANDLER *event_handler,
 
 	WT_RET(__wt_library_init());
 
-	WT_RET(__wt_calloc_def(NULL, 1, &conn));
+	WT_RET(__wt_calloc_one(NULL, &conn));
 	conn->iface = stdc;
 
 	/*
diff --git a/src/conn/conn_cache.c b/src/conn/conn_cache.c
index 079bd05ff1e..61bd4447abf 100644
--- a/src/conn/conn_cache.c
+++ b/src/conn/conn_cache.c
@@ -83,7 +83,7 @@ __wt_cache_create(WT_SESSION_IMPL *session, const char *cfg[])
 	WT_ASSERT(session, conn->cache == NULL ||
 	    (F_ISSET(conn, WT_CONN_CACHE_POOL) && conn->cache != NULL));
 
-	WT_RET(__wt_calloc_def(session, 1, &conn->cache));
+	WT_RET(__wt_calloc_one(session, &conn->cache));
 
 	cache = conn->cache;
 
diff --git a/src/conn/conn_cache_pool.c b/src/conn/conn_cache_pool.c
index c7558eea5fb..dcc37da3b3b 100644
--- a/src/conn/conn_cache_pool.c
+++ b/src/conn/conn_cache_pool.c
@@ -81,7 +81,7 @@ __wt_cache_pool_config(WT_SESSION_IMPL *session, const char **cfg)
 	if (__wt_process.cache_pool == NULL) {
 		WT_ASSERT(session, !reconfiguring);
 		/* Create a cache pool. */
-		WT_ERR(__wt_calloc_def(session, 1, &cp));
+		WT_ERR(__wt_calloc_one(session, &cp));
 		created = 1;
 		cp->name = pool_name;
 		pool_name = NULL; /* Belongs to the cache pool now. */
diff --git a/src/conn/conn_dhandle.c b/src/conn/conn_dhandle.c
index 088ff2f3d2c..cfd99ac1f8f 100644
--- a/src/conn/conn_dhandle.c
+++ b/src/conn/conn_dhandle.c
@@ -187,7 +187,7 @@ __conn_dhandle_get(WT_SESSION_IMPL *session,
 	 * then initialize the data handle.  Exclusively lock the data handle
 	 * before inserting it in the list.
 	 */
-	WT_RET(__wt_calloc_def(session, 1, &dhandle));
+	WT_RET(__wt_calloc_one(session, &dhandle));
 
 	WT_ERR(__wt_rwlock_alloc(session, &dhandle->rwlock, "data handle"));
 
@@ -196,7 +196,7 @@ __conn_dhandle_get(WT_SESSION_IMPL *session,
 	if (ckpt != NULL)
 		WT_ERR(__wt_strdup(session, ckpt, &dhandle->checkpoint));
 
-	WT_ERR(__wt_calloc_def(session, 1, &btree));
+	WT_ERR(__wt_calloc_one(session, &btree));
 	dhandle->handle = btree;
 	btree->dhandle = dhandle;
 
diff --git a/src/conn/conn_log.c b/src/conn/conn_log.c
index 618a0934ce1..6a1a63b5abe 100644
--- a/src/conn/conn_log.c
+++ b/src/conn/conn_log.c
@@ -341,7 +341,7 @@ __wt_logmgr_create(WT_SESSION_IMPL *session, const char *cfg[])
 	/*
 	 * Logging is on, allocate the WT_LOG structure and open the log file.
 	 */
-	WT_RET(__wt_calloc(session, 1, sizeof(WT_LOG), &conn->log));
+	WT_RET(__wt_calloc_one(session, &conn->log));
 	log = conn->log;
 	WT_RET(__wt_spin_init(session, &log->log_lock, "log"));
 	WT_RET(__wt_spin_init(session, &log->log_slot_lock, "log slot"));
diff --git a/src/cursor/cur_backup.c b/src/cursor/cur_backup.c
index 41bfaea7ee3..2c03fc55b85 100644
--- a/src/cursor/cur_backup.c
+++ b/src/cursor/cur_backup.c
@@ -126,7 +126,7 @@ __wt_curbackup_open(WT_SESSION_IMPL *session,
 
 	cb = NULL;
 
-	WT_RET(__wt_calloc_def(session, 1, &cb));
+	WT_RET(__wt_calloc_one(session, &cb));
 	cursor = &cb->iface;
 	*cursor = iface;
 	cursor->session = &session->iface;
diff --git a/src/cursor/cur_config.c b/src/cursor/cur_config.c
index 5e7ca487ae2..b37736d1b43 100644
--- a/src/cursor/cur_config.c
+++ b/src/cursor/cur_config.c
@@ -49,7 +49,7 @@ __wt_curconfig_open(WT_SESSION_IMPL *session,
 
 	WT_UNUSED(uri);
 
-	WT_RET(__wt_calloc_def(session, 1, &cconfig));
+	WT_RET(__wt_calloc_one(session, &cconfig));
 
 	cursor = &cconfig->iface;
 	*cursor = iface;
diff --git a/src/cursor/cur_ds.c b/src/cursor/cur_ds.c
index 096a0e27f8d..f16cc9b33f0 100644
--- a/src/cursor/cur_ds.c
+++ b/src/cursor/cur_ds.c
@@ -474,7 +474,7 @@ __wt_curds_open(
 	data_source = NULL;
 	metaconf = NULL;
 
-	WT_RET(__wt_calloc_def(session, 1, &data_source));
+	WT_RET(__wt_calloc_one(session, &data_source));
 	cursor = &data_source->iface;
 	*cursor = iface;
 	cursor->session = &session->iface;
diff --git a/src/cursor/cur_dump.c b/src/cursor/cur_dump.c
index 5760752d406..55b47d13a6d 100644
--- a/src/cursor/cur_dump.c
+++ b/src/cursor/cur_dump.c
@@ -372,7 +372,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
 
 	session = (WT_SESSION_IMPL *)child->session;
 
-	WT_RET(__wt_calloc_def(session, 1, &cdump));
+	WT_RET(__wt_calloc_one(session, &cdump));
 	cursor = &cdump->iface;
 	*cursor = iface;
 	cursor->session = child->session;
@@ -385,7 +385,7 @@ __wt_curdump_create(WT_CURSOR *child, WT_CURSOR *owner, WT_CURSOR **cursorp)
 	F_SET(cursor, F_ISSET(child,
 	    WT_CURSTD_DUMP_HEX | WT_CURSTD_DUMP_JSON | WT_CURSTD_DUMP_PRINT));
 	if (F_ISSET(cursor, WT_CURSTD_DUMP_JSON)) {
-		WT_ERR(__wt_calloc_def(session, 1, &json));
+		WT_ERR(__wt_calloc_one(session, &json));
 		cursor->json_private = child->json_private = json;
 	}
 
diff --git a/src/cursor/cur_index.c b/src/cursor/cur_index.c
index b516b5c58b1..2b31f75cf08 100644
--- a/src/cursor/cur_index.c
+++ b/src/cursor/cur_index.c
@@ -383,7 +383,7 @@ __wt_curindex_open(WT_SESSION_IMPL *session,
 		namesize = (size_t)(columns - idxname);
 
 	WT_RET(__wt_schema_open_index(session, table, idxname, namesize, &idx));
-	WT_RET(__wt_calloc_def(session, 1, &cindex));
+	WT_RET(__wt_calloc_one(session, &cindex));
 
 	cursor = &cindex->iface;
 	*cursor = iface;
diff --git a/src/cursor/cur_log.c b/src/cursor/cur_log.c
index bdb19d05c01..0d375ee4a52 100644
--- a/src/cursor/cur_log.c
+++ b/src/cursor/cur_log.c
@@ -336,12 +336,12 @@ __wt_curlog_open(WT_SESSION_IMPL *session,
 
 	log = conn->log;
 	cl = NULL;
-	WT_RET(__wt_calloc_def(session, 1, &cl));
+	WT_RET(__wt_calloc_one(session, &cl));
 	cursor = &cl->iface;
 	*cursor = iface;
 	cursor->session = &session->iface;
-	WT_ERR(__wt_calloc_def(session, 1, &cl->cur_lsn));
-	WT_ERR(__wt_calloc_def(session, 1, &cl->next_lsn));
+	WT_ERR(__wt_calloc_one(session, &cl->cur_lsn));
+	WT_ERR(__wt_calloc_one(session, &cl->next_lsn));
 	WT_ERR(__wt_scr_alloc(session, 0, &cl->logrec));
 	WT_ERR(__wt_scr_alloc(session, 0, &cl->opkey));
 	WT_ERR(__wt_scr_alloc(session, 0, &cl->opvalue));
diff --git a/src/cursor/cur_metadata.c b/src/cursor/cur_metadata.c
index d6c76c48ab9..e1e08c307fc 100644
--- a/src/cursor/cur_metadata.c
+++ b/src/cursor/cur_metadata.c
@@ -423,7 +423,7 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
 	WT_DECL_RET;
 	WT_CONFIG_ITEM cval;
 
-	WT_RET(__wt_calloc_def(session, 1, &mdc));
+	WT_RET(__wt_calloc_one(session, &mdc));
 
 	cursor = &mdc->iface;
 	*cursor = iface;
@@ -445,7 +445,9 @@ __wt_curmetadata_open(WT_SESSION_IMPL *session,
 	}
 
 	if (0) {
-err:		__wt_free(session, mdc);
+err:		if (mdc->file_cursor != NULL)
+			WT_TRET(mdc->file_cursor->close(mdc->file_cursor));
+		__wt_free(session, mdc);
 	}
 	return (ret);
 }
diff --git a/src/cursor/cur_stat.c b/src/cursor/cur_stat.c
index cc12077024f..74237c6ffdc 100644
--- a/src/cursor/cur_stat.c
+++ b/src/cursor/cur_stat.c
@@ -503,7 +503,7 @@ __wt_curstat_open(WT_SESSION_IMPL *session,
 
 	conn = S2C(session);
 
-	WT_ERR(__wt_calloc_def(session, 1, &cst));
+	WT_ERR(__wt_calloc_one(session, &cst));
 	cursor = &cst->iface;
 	*cursor = iface;
 	cursor->session = &session->iface;
diff --git a/src/cursor/cur_table.c b/src/cursor/cur_table.c
index 50d76609411..1825d641c49 100644
--- a/src/cursor/cur_table.c
+++ b/src/cursor/cur_table.c
@@ -878,7 +878,7 @@ __wt_curtable_open(WT_SESSION_IMPL *session,
 		return (ret);
 	}
 
-	WT_RET(__wt_calloc_def(session, 1, &ctable));
+	WT_RET(__wt_calloc_one(session, &ctable));
 
 	cursor = &ctable->iface;
 	*cursor = iface;
diff --git a/src/docs/error-handling.dox b/src/docs/error-handling.dox
index bced608434b..cf268f80500 100644
--- a/src/docs/error-handling.dox
+++ b/src/docs/error-handling.dox
@@ -47,7 +47,7 @@ This error is returned when an error is not covered by a specific error return.
 This error indicates an operation did not find a value to return. This includes cursor search and other operations where no record matched the cursor's search key such as WT_CURSOR::update or WT_CURSOR::remove.
 
 @par <code>WT_PANIC</code>
-This error indicates an underlying problem that requires the application exit and restart.
+This error indicates an underlying problem that requires the application exit and restart. The application can exit immediately when \c WT_PANIC is returned from a WiredTiger interface, no further WiredTiger calls are required.
 
 @if IGNORE_BUILT_BY_API_ERR_END
 @endif
diff --git a/src/docs/examples.dox b/src/docs/examples.dox
index 53bd3589362..9b86df099e3 100644
--- a/src/docs/examples.dox
+++ b/src/docs/examples.dox
@@ -22,9 +22,6 @@ extractors and cursor types.
 @example ex_extractor.c
 Shows how to extend WiredTiger with a more complex custom extractor.
 
-@example ex_file.c
-Shows how to use file objects.
-
 @example ex_hello.c
 This is an example of how to create and open a database.
 
diff --git a/src/docs/tune-page-sizes.dox b/src/docs/tune-page-sizes.dox
index b3fd20f6276..130e047a02d 100644
--- a/src/docs/tune-page-sizes.dox
+++ b/src/docs/tune-page-sizes.dox
@@ -1,42 +1,127 @@
-/*! @page tune_page_sizes  Page and overflow item sizes
-
-There are four page and item size configuration values: \c internal_page_max,
-\c internal_item_max, \c leaf_page_max and \c leaf_item_max.  All four are
-specified to the WT_SESSION::create method, that is, they are configurable
-on a per-file basis.
-
-The \c internal_page_max and \c leaf_page_max configuration values specify
-the maximum size for Btree internal and leaf pages.  That is, when an
-internal or leaf page grows past the specified size, it splits into
-multiple pages.  Generally, internal pages should be sized to fit into
-the system's on-chip caches in order to minimize cache misses when
-searching the tree, while leaf pages should be sized to maximize I/O
-performance (if reading from disk is necessary, it is usually desirable
-to read a large amount of data, assuming some locality of reference in
-the application's access pattern).
-
-The \c internal_item_max and \c leaf_item_max configuration values specify
-the maximum size at which an object will be stored on-page.  Larger items
-will be stored separately in the file from the page where the item logically
-appears.  Referencing overflow items is more expensive than referencing
-on-page items, requiring additional I/O if the object is not already cached.
-For this reason, it is important to avoid creating large numbers of overflow
-items that are repeatedly referenced, and the maximum item size should
-probably be increased if many overflow items are being created.  Because
-pages must be large enough to store any item that is not an overflow item,
-increasing the size of the overflow items may also require increasing the
-page sizes.
-
-With respect to compression, page and item sizes do not necessarily reflect
-the actual size of the page or item on disk, if block compression has been
-configured.  Block compression in WiredTiger happens within the disk I/O
-subsystem, and so a page might split even if subsequent compression would
-result in a resulting page size that would be small enough to leave as a
-single page.  In other words, page and overflow sizes are based on in-memory
-sizes, not disk sizes.
-
-There are two other, related configuration values, also settable by the
-WT_SESSION::create method.  They are \c allocation_size and \c split_pct.
+/*! @page tune_page_sizes  Page and overflow key/value sizes
+
+There are seven page and key/value size configuration strings:
+
+- allocation size (\c allocation_size),
+- page sizes (\c internal_page_max and \c leaf_page_max),
+- key and value sizes (\c internal_key_max, \c leaf_key_max and \c leaf_value_max), and the
+- page-split percentage (\c split_pct).
+
+All seven are specified to the WT_SESSION::create method, in other
+words, they are configurable on a per-file basis.
+
+Applications commonly configure page sizes, based on their workload's
+typical key and value size. Once the correct page size has been chosen,
+appropriate defaults for the other configuration values are derived from
+the page sizes, and relatively few applications will need to modify the
+other page and key/value size configuration options.
+
+An example of configuring page and key/value sizes:
+
+@snippet ex_all.c Create a table and configure the page size
+
+@section tune_page_sizes_sizes Page, key and value sizes
+
+The \c internal_page_max and \c leaf_page_max configuration values
+specify a maximum size for Btree internal and leaf pages.  That is, when
+an internal or leaf page grows past that size, it splits into multiple
+pages.  Generally, internal pages should be sized to fit into on-chip
+caches in order to minimize cache misses when searching the tree, while
+leaf pages should be sized to maximize I/O performance (if reading from
+disk is necessary, it is usually desirable to read a large amount of
+data, assuming some locality of reference in the application's access
+pattern).
+
+The default page size configurations (2KB for \c internal_page_max, 32KB
+for \c leaf_page_max), are appropriate for applications with relatively
+small keys and values.
+
+- Applications doing full-table scans through out-of-memory workloads
+might increase both internal and leaf page sizes to transfer more data
+per I/O.
+- Applications focused on read/write amplification might decrease the page
+size to better match the underlying storage block size.
+
+When block compression has been configured, configured page sizes will
+not match the actual size of the page on disk. Block compression in
+WiredTiger happens within the I/O subsystem, and so a page might split
+even if subsequent compression would result in a resulting page size
+small enough to leave as a single page.  In other words, page sizes are
+based on in-memory sizes, not on-disk sizes. Applications needing to
+write specific sized blocks may want to consider implementing a
+WT_COMPRESSOR::compress_raw function.
+
+The page sizes also determine the default size of overflow items, that
+is, keys and values too large to easily store on a page.  Overflow items
+are stored separately in the file from the page where the item logically
+appears, and so reading or writing an overflow item is more expensive
+than an on-page item, normally requiring additional I/O.  Additionally,
+overflow values are not cached in memory. This means overflow items
+won't affect the caching behavior of the application, but it also means
+that each time an overflow value is read, it is re-read from disk.
+
+For both of these reasons, applications should avoid creating large
+numbers of commonly referenced overflow items.  This is especially
+important for keys, as keys on internal pages are referenced during
+random searches, not just during data retrieval.  Generally,
+applications should make every attempt to avoid creating overflow keys.
+
+- Applications with large keys and values, and concerned with latency,
+might increase the page size to avoid creating overflow items, in order
+to avoid the additional cost of retrieving them.
+
+- Applications with large keys and values, doing random searches, might
+decrease the page size to avoid wasting cache space on overflow items
+that aren't likely to be needed.
+
+- Applications with large keys and values, doing table scans, might
+increase the page size to avoid creating overflow items, as the overflow
+items must be read into memory in all cases, anyway.
+
+The \c internal_key_max, \c leaf_key_max and \c leaf_value_max
+configuration values allow applications to change the size at which a
+key or value will be treated as an overflow item.
+
+The value of \c internal_key_max is relative to the maximum internal
+page size.  Because the number of keys on an internal page determines
+the depth of the tree, the \c internal_key_max value can only be
+adjusted within a certain range, and the configured value will be
+automatically adjusted by WiredTiger, if necessary to ensure a
+reasonable number of keys fit on an internal page.
+
+The values of \c leaf_key_max and \c leaf_value_max are not relative to
+the maximum leaf page size. If either is larger than the maximum page
+size, the page size will be ignored when the larger keys and values are
+being written, and a larger page will be created as necessary.
+
+Most applications should not need to tune the maximum key and value
+sizes.  Applications requiring a small page size, but also having
+latency concerns such that the additional work to retrieve an overflow
+item is an issue, may find them useful.
+
+An example of configuring a large leaf overflow value:
+
+@snippet ex_all.c Create a table and configure a large leaf value max
+
+@section tune_page_sizes_split_percentage Split percentage
+
+The \c split_pct configuration string configures the size of a split
+page.  When a page grows sufficiently large that it must be written as
+multiple disk blocks, the newly written block size is \c split_pct
+percent of the maximum page size.  This value should be selected to
+avoid creating a large number of tiny pages or repeatedly splitting
+whenever new entries are inserted.  For example, if the maximum page
+size is 1MB, a \c split_pct value of 10% would potentially result in
+creating a large number of 100KB pages, which may not be optimal for
+future I/O.   Or, if the maximum page size is 1MB, a \c split_pct value
+of 90% would potentially result in repeatedly splitting pages as the
+split pages grow to 1MB over and over.  The default value for \c
+split_pct is 75%, intended to keep large pages relatively large, while
+still giving split pages room to grow.
+
+Most applications should not need to tune the split percentage size.
+
+@section tune_page_sizes_allocation_size Allocation size
 
 The \c allocation_size configuration value is the underlying unit of
 allocation for the file.  As the unit of file allocation, it sets the
@@ -46,25 +131,12 @@ is set to 4KB, an overflow item of 18,000 bytes requires 5 allocation
 units and wastes about 2KB of space.  If the allocation size is 16KB,
 the same overflow item would waste more than 10KB.
 
-The default allocation size is 4KB, chosen for compatibility with virtual
-memory page sizes and direct I/O requirements on common server platforms.
-
-The last configuration value is \c split_pct, which configures the size
-of a split page.  When a page grows sufficiently large that it must be
-written as multiple disk blocks, the newly written block size is \c
-split_pct percent of the maximum page size.  This value should be
-selected to avoid creating a large number of tiny pages or repeatedly
-splitting whenever new entries are inserted.  For example, if the
-maximum page size is 1MB, a \c split_pct value of 10% would potentially
-result in creating a large number of 100KB pages, which may not be
-optimal for future I/O.   Or, if the maximum page size is 1MB, a \c
-split_pct value of 90% would potentially result in repeatedly splitting
-pages as the split pages grow to 1MB over and over.  The default value
-for \c split_pct is 75%, intended to keep large pages relatively large,
-while still giving split pages room to grow.
-
-An example of configuring page sizes:
+The default allocation size is 4KB, chosen for compatibility with
+virtual memory page sizes and direct I/O requirements on common server
+platforms.
 
-@snippet ex_file.c file create
+Most applications should not need to tune the allocation size; it is
+primarily intended for applications coping with the specific
+requirements some file systems make to support features like direct I/O.
 
- */
+*/
diff --git a/src/docs/upgrading.dox b/src/docs/upgrading.dox
index 0e750ae0ca1..0fb858643fd 100644
--- a/src/docs/upgrading.dox
+++ b/src/docs/upgrading.dox
@@ -17,6 +17,14 @@ Collators, compressors and extractors can now be disabled with an explicit
 using the name \c "none" for a collator, compressor or extractor will need to
 be updated.
 </dd>
+
+<dt>maximum keys and value sizes
+<dd>
+The WT_SESSION::create \c internal_item_max and \c leaf_item_max
+configuration strings are now deprecated in favor of the
+\c internal_key_max, \c leaf_key_max, and \c leaf_value_max
+configuration strings. See @ref tune_page_sizes for more information.
+</dd>
 </dl>
 
 @section version_241 Upgrading to Version 2.4.1
diff --git a/src/evict/evict_page.c b/src/evict/evict_page.c
index fa3bfa50eb0..bc791de6d0f 100644
--- a/src/evict/evict_page.c
+++ b/src/evict/evict_page.c
@@ -206,7 +206,7 @@ __evict_page_dirty_update(WT_SESSION_IMPL *session, WT_REF *ref, int exclusive)
 		 * Publish: a barrier to ensure the structure fields are set
 		 * before the state change makes the page available to readers.
 		 */
-		WT_RET(__wt_calloc(session, 1, sizeof(WT_ADDR), &addr));
+		WT_RET(__wt_calloc_one(session, &addr));
 		*addr = mod->mod_replace;
 		mod->mod_replace.addr = NULL;
 		mod->mod_replace.size = 0;
diff --git a/src/include/btree.h b/src/include/btree.h
index 907b36c9ed4..e7c1826bda9 100644
--- a/src/include/btree.h
+++ b/src/include/btree.h
@@ -83,9 +83,10 @@ struct __wt_btree {
 
 	uint32_t allocsize;		/* Allocation size */
 	uint32_t maxintlpage;		/* Internal page max size */
-	uint32_t maxintlitem;		/* Internal page max item size */
+	uint32_t maxintlkey;		/* Internal page max key size */
 	uint32_t maxleafpage;		/* Leaf page max size */
-	uint32_t maxleafitem;		/* Leaf page max item size */
+	uint32_t maxleafkey;		/* Leaf page max key size */
+	uint32_t maxleafvalue;		/* Leaf page max value size */
 	uint64_t maxmempage;		/* In memory page max size */
 
 	void *huffman_key;		/* Key huffman encoding */
diff --git a/src/include/extern.h b/src/include/extern.h
index 5a5601160c8..00bbdaf746c 100644
--- a/src/include/extern.h
+++ b/src/include/extern.h
@@ -122,7 +122,6 @@ extern void __wt_root_ref_init(WT_REF *root_ref, WT_PAGE *root, int is_recno);
 extern int __wt_btree_tree_open( WT_SESSION_IMPL *session, const uint8_t *addr, size_t addr_size);
 extern int __wt_btree_new_leaf_page(WT_SESSION_IMPL *session, WT_PAGE **pagep);
 extern void __wt_btree_evictable(WT_SESSION_IMPL *session, int on);
-extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize);
 extern int __wt_btree_huffman_open(WT_SESSION_IMPL *session);
 extern void __wt_btree_huffman_close(WT_SESSION_IMPL *session);
 extern int __wt_bt_read(WT_SESSION_IMPL *session, WT_ITEM *buf, const uint8_t *addr, size_t addr_size);
@@ -496,6 +495,7 @@ extern void __wt_ovfl_txnc_free(WT_SESSION_IMPL *session, WT_PAGE *page);
 extern int __wt_ovfl_track_wrapup(WT_SESSION_IMPL *session, WT_PAGE *page);
 extern int __wt_ovfl_track_wrapup_err(WT_SESSION_IMPL *session, WT_PAGE *page);
 extern int __wt_reconcile(WT_SESSION_IMPL *session, WT_REF *ref, WT_SALVAGE_COOKIE *salvage, uint32_t flags);
+extern uint32_t __wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize);
 extern int __wt_bulk_init(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
 extern int __wt_bulk_wrapup(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
 extern int __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk);
diff --git a/src/include/misc.h b/src/include/misc.h
index c861dff18bc..c2abaa08057 100644
--- a/src/include/misc.h
+++ b/src/include/misc.h
@@ -65,11 +65,13 @@
 #define	WT_SKIP_PROBABILITY	(UINT32_MAX >> 2)
 
 /*
- * __wt_calloc_def --
- *	Simple calls don't need separate sizeof arguments.
+ * __wt_calloc_def, __wt_calloc_one --
+ *	Most calloc calls don't need separate count or sizeof arguments.
  */
 #define	__wt_calloc_def(session, number, addr)				\
 	__wt_calloc(session, (size_t)(number), sizeof(**(addr)), addr)
+#define	__wt_calloc_one(session, addr)					\
+	__wt_calloc(session, (size_t)1, sizeof(**(addr)), addr)
 
 /*
  * __wt_realloc_def --
diff --git a/src/include/stat.h b/src/include/stat.h
index 37df43adfee..69fa0ba8e4f 100644
--- a/src/include/stat.h
+++ b/src/include/stat.h
@@ -287,10 +287,11 @@ struct __wt_dsrc_stats {
 	WT_STATS btree_entries;
 	WT_STATS btree_fixed_len;
 	WT_STATS btree_maximum_depth;
-	WT_STATS btree_maxintlitem;
+	WT_STATS btree_maxintlkey;
 	WT_STATS btree_maxintlpage;
-	WT_STATS btree_maxleafitem;
+	WT_STATS btree_maxleafkey;
 	WT_STATS btree_maxleafpage;
+	WT_STATS btree_maxleafvalue;
 	WT_STATS btree_overflow;
 	WT_STATS btree_row_internal;
 	WT_STATS btree_row_leaf;
diff --git a/src/include/wiredtiger.in b/src/include/wiredtiger.in
index 100bc771798..9aa219eccfc 100644
--- a/src/include/wiredtiger.in
+++ b/src/include/wiredtiger.in
@@ -762,6 +762,18 @@ struct __wt_session {
 	/*! The connection for this session. */
 	WT_CONNECTION *connection;
 
+	/*
+	 * Don't expose app_private to non-C language bindings - they have
+	 * their own way to attach data to an operation.
+	 */
+#if !defined(SWIG)
+	/*!
+	 * A location for applications to store information that will be
+	 * available in callbacks taking a WT_SESSION handle.
+	 */
+	void *app_private;
+#endif
+
 	/*!
 	 * Close the session handle.
 	 *
@@ -997,12 +1009,12 @@ struct __wt_session {
 	 * @config{immutable, configure the index to be immutable - that is an
 	 * index is not changed by any update to a record in the table., a
 	 * boolean flag; default \c false.}
-	 * @config{internal_item_max, the largest key stored within an internal
-	 * node\, in bytes.  If non-zero\, any key larger than the specified
-	 * size will be stored as an overflow item (which may require additional
-	 * I/O to access). If zero\, a default size is chosen that permits at
-	 * least 8 keys per internal page., an integer greater than or equal to
-	 * 0; default \c 0.}
+	 * @config{internal_key_max, the largest key stored in an internal
+	 * node\, in bytes.  If set\, keys larger than the specified size are
+	 * stored as overflow items (which may require additional I/O to
+	 * access). The default and the maximum allowed value are both one-tenth
+	 * the size of a newly split internal page., an integer greater than or
+	 * equal to 0; default \c 0.}
 	 * @config{internal_key_truncate, configure internal key truncation\,
 	 * discarding unnecessary trailing bytes on internal keys (ignored for
 	 * custom collators)., a boolean flag; default \c true.}
@@ -1020,12 +1032,11 @@ struct __wt_session {
 	 * row-store files: keys of type \c 'r' are record numbers and records
 	 * referenced by record number are stored in column-store files., a
 	 * format string; default \c u.}
-	 * @config{leaf_item_max, the largest key or value stored within a leaf
-	 * node\, in bytes.  If non-zero\, any key or value larger than the
-	 * specified size will be stored as an overflow item (which may require
-	 * additional I/O to access). If zero\, a default size is chosen that
-	 * permits at least 4 key and value pairs per leaf page., an integer
-	 * greater than or equal to 0; default \c 0.}
+	 * @config{leaf_key_max, the largest key stored in a leaf node\, in
+	 * bytes.  If set\, keys larger than the specified size are stored as
+	 * overflow items (which may require additional I/O to access). The
+	 * default value is one-tenth the size of a newly split leaf page., an
+	 * integer greater than or equal to 0; default \c 0.}
 	 * @config{leaf_page_max, the maximum page size for leaf nodes\, in
 	 * bytes; the size must be a multiple of the allocation size\, and is
 	 * significant for applications wanting to maximize sequential data
@@ -1033,6 +1044,13 @@ struct __wt_session {
 	 * uncompressed data\, that is\, the limit is applied before any block
 	 * compression is done., an integer between 512B and 512MB; default \c
 	 * 32KB.}
+	 * @config{leaf_value_max, the largest value stored in a leaf node\, in
+	 * bytes.  If set\, values larger than the specified size are stored as
+	 * overflow items (which may require additional I/O to access). If the
+	 * size is larger than the maximum leaf page size\, the page size is
+	 * temporarily ignored when large values are written.  The default is
+	 * one-half the size of a newly split leaf page., an integer greater
+	 * than or equal to 0; default \c 0.}
 	 * @config{lsm = (, options only relevant for LSM data sources., a set
 	 * of related configuration options defined below.}
 	 * @config{&nbsp;&nbsp;&nbsp;&nbsp;auto_throttle, Throttle inserts into
@@ -2056,6 +2074,11 @@ struct __wt_event_handler {
 	 * Callback to handle error messages; by default, error messages are
 	 * written to the stderr stream.
 	 *
+	 * Errors that require the application to exit and restart will have
+	 * their \c error value set to \c WT_PANIC. The application can exit
+	 * immediately when \c WT_PANIC is passed to an error handler, there
+	 * is no reason to return into WiredTiger.
+	 *
 	 * Error handler returns are not ignored: if the handler returns
 	 * non-zero, the error may cause the WiredTiger function posting the
 	 * event to fail, and may even cause operation or library failure.
@@ -2526,7 +2549,9 @@ const char *wiredtiger_version(int *majorp, int *minorp, int *patchp);
 /*!
  * WiredTiger library panic.
  * This error indicates an underlying problem that requires the application exit
- * and restart.
+ * and restart. The application can exit immediately when \c WT_PANIC is
+ * returned from a WiredTiger interface, no further WiredTiger calls are
+ * required.
  */
 #define	WT_PANIC	-31804
 /*! @cond internal */
@@ -2642,7 +2667,7 @@ struct __wt_compressor {
 	 * of \c dst_len.  If the WT_COMPRESSOR::pre_size method is specified,
 	 * the destination buffer will be at least the size returned by that
 	 * method; otherwise, the destination buffer will be at least as large
-	 * as \c src_len.
+	 * as the length of the data to compress.
 	 *
 	 * If compression would not shrink the data or the \c dst buffer is not
 	 * large enough to hold the compressed data, the callback should set
@@ -2712,10 +2737,8 @@ struct __wt_compressor {
 	 * On entry, \c dst points to the destination buffer with a length
 	 * of \c dst_len.  If the WT_COMPRESSOR::pre_size method is specified,
 	 * the destination buffer will be at least the size returned by that
-	 * method; otherwise, the destination buffer will be at least the
-	 * maximum size for the page being written (that is, when writing a
-	 * row-store leaf page, the destination buffer will be at least as
-	 * large as the \c leaf_page_max configuration value).
+	 * method; otherwise, the destination buffer will be at least as large
+	 * as the length of the data to compress.
 	 *
 	 * After successful completion, the callback should return \c 0, and
 	 * set \c result_slotsp to the number of byte strings encoded and
@@ -3378,130 +3401,132 @@ extern int wiredtiger_extension_terminate(WT_CONNECTION *connection);
 #define	WT_STAT_DSRC_BTREE_FIXED_LEN			2023
 /*! btree: maximum tree depth */
 #define	WT_STAT_DSRC_BTREE_MAXIMUM_DEPTH		2024
-/*! btree: maximum internal page item size */
-#define	WT_STAT_DSRC_BTREE_MAXINTLITEM			2025
+/*! btree: maximum internal page key size */
+#define	WT_STAT_DSRC_BTREE_MAXINTLKEY			2025
 /*! btree: maximum internal page size */
 #define	WT_STAT_DSRC_BTREE_MAXINTLPAGE			2026
-/*! btree: maximum leaf page item size */
-#define	WT_STAT_DSRC_BTREE_MAXLEAFITEM			2027
+/*! btree: maximum leaf page key size */
+#define	WT_STAT_DSRC_BTREE_MAXLEAFKEY			2027
 /*! btree: maximum leaf page size */
 #define	WT_STAT_DSRC_BTREE_MAXLEAFPAGE			2028
+/*! btree: maximum leaf page value size */
+#define	WT_STAT_DSRC_BTREE_MAXLEAFVALUE			2029
 /*! btree: overflow pages */
-#define	WT_STAT_DSRC_BTREE_OVERFLOW			2029
+#define	WT_STAT_DSRC_BTREE_OVERFLOW			2030
 /*! btree: row-store internal pages */
-#define	WT_STAT_DSRC_BTREE_ROW_INTERNAL			2030
+#define	WT_STAT_DSRC_BTREE_ROW_INTERNAL			2031
 /*! btree: row-store leaf pages */
-#define	WT_STAT_DSRC_BTREE_ROW_LEAF			2031
+#define	WT_STAT_DSRC_BTREE_ROW_LEAF			2032
 /*! cache: bytes read into cache */
-#define	WT_STAT_DSRC_CACHE_BYTES_READ			2032
+#define	WT_STAT_DSRC_CACHE_BYTES_READ			2033
 /*! cache: bytes written from cache */
-#define	WT_STAT_DSRC_CACHE_BYTES_WRITE			2033
+#define	WT_STAT_DSRC_CACHE_BYTES_WRITE			2034
 /*! cache: checkpoint blocked page eviction */
-#define	WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT		2034
+#define	WT_STAT_DSRC_CACHE_EVICTION_CHECKPOINT		2035
 /*! cache: unmodified pages evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_CLEAN		2035
+#define	WT_STAT_DSRC_CACHE_EVICTION_CLEAN		2036
 /*! cache: modified pages evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_DIRTY		2036
+#define	WT_STAT_DSRC_CACHE_EVICTION_DIRTY		2037
 /*! cache: data source pages selected for eviction unable to be evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_FAIL		2037
+#define	WT_STAT_DSRC_CACHE_EVICTION_FAIL		2038
 /*! cache: hazard pointer blocked page eviction */
-#define	WT_STAT_DSRC_CACHE_EVICTION_HAZARD		2038
+#define	WT_STAT_DSRC_CACHE_EVICTION_HAZARD		2039
 /*! cache: internal pages evicted */
-#define	WT_STAT_DSRC_CACHE_EVICTION_INTERNAL		2039
+#define	WT_STAT_DSRC_CACHE_EVICTION_INTERNAL		2040
 /*! cache: in-memory page splits */
-#define	WT_STAT_DSRC_CACHE_INMEM_SPLIT			2040
+#define	WT_STAT_DSRC_CACHE_INMEM_SPLIT			2041
 /*! cache: overflow values cached in memory */
-#define	WT_STAT_DSRC_CACHE_OVERFLOW_VALUE		2041
+#define	WT_STAT_DSRC_CACHE_OVERFLOW_VALUE		2042
 /*! cache: pages read into cache */
-#define	WT_STAT_DSRC_CACHE_READ				2042
+#define	WT_STAT_DSRC_CACHE_READ				2043
 /*! cache: overflow pages read into cache */
-#define	WT_STAT_DSRC_CACHE_READ_OVERFLOW		2043
+#define	WT_STAT_DSRC_CACHE_READ_OVERFLOW		2044
 /*! cache: pages written from cache */
-#define	WT_STAT_DSRC_CACHE_WRITE			2044
+#define	WT_STAT_DSRC_CACHE_WRITE			2045
 /*! compression: raw compression call failed, no additional data available */
-#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL			2045
+#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL			2046
 /*! compression: raw compression call failed, additional data available */
-#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY	2046
+#define	WT_STAT_DSRC_COMPRESS_RAW_FAIL_TEMPORARY	2047
 /*! compression: raw compression call succeeded */
-#define	WT_STAT_DSRC_COMPRESS_RAW_OK			2047
+#define	WT_STAT_DSRC_COMPRESS_RAW_OK			2048
 /*! compression: compressed pages read */
-#define	WT_STAT_DSRC_COMPRESS_READ			2048
+#define	WT_STAT_DSRC_COMPRESS_READ			2049
 /*! compression: compressed pages written */
-#define	WT_STAT_DSRC_COMPRESS_WRITE			2049
+#define	WT_STAT_DSRC_COMPRESS_WRITE			2050
 /*! compression: page written failed to compress */
-#define	WT_STAT_DSRC_COMPRESS_WRITE_FAIL		2050
+#define	WT_STAT_DSRC_COMPRESS_WRITE_FAIL		2051
 /*! compression: page written was too small to compress */
-#define	WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL		2051
+#define	WT_STAT_DSRC_COMPRESS_WRITE_TOO_SMALL		2052
 /*! cursor: create calls */
-#define	WT_STAT_DSRC_CURSOR_CREATE			2052
+#define	WT_STAT_DSRC_CURSOR_CREATE			2053
 /*! cursor: insert calls */
-#define	WT_STAT_DSRC_CURSOR_INSERT			2053
+#define	WT_STAT_DSRC_CURSOR_INSERT			2054
 /*! cursor: bulk-loaded cursor-insert calls */
-#define	WT_STAT_DSRC_CURSOR_INSERT_BULK			2054
+#define	WT_STAT_DSRC_CURSOR_INSERT_BULK			2055
 /*! cursor: cursor-insert key and value bytes inserted */
-#define	WT_STAT_DSRC_CURSOR_INSERT_BYTES		2055
+#define	WT_STAT_DSRC_CURSOR_INSERT_BYTES		2056
 /*! cursor: next calls */
-#define	WT_STAT_DSRC_CURSOR_NEXT			2056
+#define	WT_STAT_DSRC_CURSOR_NEXT			2057
 /*! cursor: prev calls */
-#define	WT_STAT_DSRC_CURSOR_PREV			2057
+#define	WT_STAT_DSRC_CURSOR_PREV			2058
 /*! cursor: remove calls */
-#define	WT_STAT_DSRC_CURSOR_REMOVE			2058
+#define	WT_STAT_DSRC_CURSOR_REMOVE			2059
 /*! cursor: cursor-remove key bytes removed */
-#define	WT_STAT_DSRC_CURSOR_REMOVE_BYTES		2059
+#define	WT_STAT_DSRC_CURSOR_REMOVE_BYTES		2060
 /*! cursor: reset calls */
-#define	WT_STAT_DSRC_CURSOR_RESET			2060
+#define	WT_STAT_DSRC_CURSOR_RESET			2061
 /*! cursor: search calls */
-#define	WT_STAT_DSRC_CURSOR_SEARCH			2061
+#define	WT_STAT_DSRC_CURSOR_SEARCH			2062
 /*! cursor: search near calls */
-#define	WT_STAT_DSRC_CURSOR_SEARCH_NEAR			2062
+#define	WT_STAT_DSRC_CURSOR_SEARCH_NEAR			2063
 /*! cursor: update calls */
-#define	WT_STAT_DSRC_CURSOR_UPDATE			2063
+#define	WT_STAT_DSRC_CURSOR_UPDATE			2064
 /*! cursor: cursor-update value bytes updated */
-#define	WT_STAT_DSRC_CURSOR_UPDATE_BYTES		2064
+#define	WT_STAT_DSRC_CURSOR_UPDATE_BYTES		2065
 /*! LSM: sleep for LSM checkpoint throttle */
-#define	WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE		2065
+#define	WT_STAT_DSRC_LSM_CHECKPOINT_THROTTLE		2066
 /*! LSM: chunks in the LSM tree */
-#define	WT_STAT_DSRC_LSM_CHUNK_COUNT			2066
+#define	WT_STAT_DSRC_LSM_CHUNK_COUNT			2067
 /*! LSM: highest merge generation in the LSM tree */
-#define	WT_STAT_DSRC_LSM_GENERATION_MAX			2067
+#define	WT_STAT_DSRC_LSM_GENERATION_MAX			2068
 /*! LSM: queries that could have benefited from a Bloom filter that did
  * not exist */
-#define	WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM		2068
+#define	WT_STAT_DSRC_LSM_LOOKUP_NO_BLOOM		2069
 /*! LSM: sleep for LSM merge throttle */
-#define	WT_STAT_DSRC_LSM_MERGE_THROTTLE			2069
+#define	WT_STAT_DSRC_LSM_MERGE_THROTTLE			2070
 /*! reconciliation: dictionary matches */
-#define	WT_STAT_DSRC_REC_DICTIONARY			2070
+#define	WT_STAT_DSRC_REC_DICTIONARY			2071
 /*! reconciliation: internal page multi-block writes */
-#define	WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL		2071
+#define	WT_STAT_DSRC_REC_MULTIBLOCK_INTERNAL		2072
 /*! reconciliation: leaf page multi-block writes */
-#define	WT_STAT_DSRC_REC_MULTIBLOCK_LEAF		2072
+#define	WT_STAT_DSRC_REC_MULTIBLOCK_LEAF		2073
 /*! reconciliation: maximum blocks required for a page */
-#define	WT_STAT_DSRC_REC_MULTIBLOCK_MAX			2073
+#define	WT_STAT_DSRC_REC_MULTIBLOCK_MAX			2074
 /*! reconciliation: internal-page overflow keys */
-#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL		2074
+#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_INTERNAL		2075
 /*! reconciliation: leaf-page overflow keys */
-#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF		2075
+#define	WT_STAT_DSRC_REC_OVERFLOW_KEY_LEAF		2076
 /*! reconciliation: overflow values written */
-#define	WT_STAT_DSRC_REC_OVERFLOW_VALUE			2076
+#define	WT_STAT_DSRC_REC_OVERFLOW_VALUE			2077
 /*! reconciliation: pages deleted */
-#define	WT_STAT_DSRC_REC_PAGE_DELETE			2077
+#define	WT_STAT_DSRC_REC_PAGE_DELETE			2078
 /*! reconciliation: page checksum matches */
-#define	WT_STAT_DSRC_REC_PAGE_MATCH			2078
+#define	WT_STAT_DSRC_REC_PAGE_MATCH			2079
 /*! reconciliation: page reconciliation calls */
-#define	WT_STAT_DSRC_REC_PAGES				2079
+#define	WT_STAT_DSRC_REC_PAGES				2080
 /*! reconciliation: page reconciliation calls for eviction */
-#define	WT_STAT_DSRC_REC_PAGES_EVICTION			2080
+#define	WT_STAT_DSRC_REC_PAGES_EVICTION			2081
 /*! reconciliation: leaf page key bytes discarded using prefix compression */
-#define	WT_STAT_DSRC_REC_PREFIX_COMPRESSION		2081
+#define	WT_STAT_DSRC_REC_PREFIX_COMPRESSION		2082
 /*! reconciliation: internal page key bytes discarded using suffix
  * compression */
-#define	WT_STAT_DSRC_REC_SUFFIX_COMPRESSION		2082
+#define	WT_STAT_DSRC_REC_SUFFIX_COMPRESSION		2083
 /*! session: object compaction */
-#define	WT_STAT_DSRC_SESSION_COMPACT			2083
+#define	WT_STAT_DSRC_SESSION_COMPACT			2084
 /*! session: open cursor count */
-#define	WT_STAT_DSRC_SESSION_CURSOR_OPEN		2084
+#define	WT_STAT_DSRC_SESSION_CURSOR_OPEN		2085
 /*! transaction: update conflicts */
-#define	WT_STAT_DSRC_TXN_UPDATE_CONFLICT		2085
+#define	WT_STAT_DSRC_TXN_UPDATE_CONFLICT		2086
 /*! @} */
 /*
  * Statistics section: END
diff --git a/src/lsm/lsm_cursor.c b/src/lsm/lsm_cursor.c
index 103a506287d..dd60ad926d8 100644
--- a/src/lsm/lsm_cursor.c
+++ b/src/lsm/lsm_cursor.c
@@ -322,14 +322,15 @@ __clsm_deleted_encode(WT_SESSION_IMPL *session,
  *	Decode values that start with the tombstone.
  */
 static inline void
-__clsm_deleted_decode(WT_ITEM *value)
+__clsm_deleted_decode(WT_CURSOR_LSM *clsm, WT_ITEM *value)
 {
 	/*
 	 * Take care with this check: when an LSM cursor is used for a merge,
 	 * and/or to create a Bloom filter, it is valid to return the tombstone
 	 * value.
 	 */
-	if (value->size > __tombstone.size &&
+	if (!F_ISSET(clsm, WT_CLSM_MERGE) &&
+	    value->size > __tombstone.size &&
 	    memcmp(value->data, __tombstone.data, __tombstone.size) == 0)
 		--value->size;
 }
@@ -840,7 +841,7 @@ retry:		/*
 err:	WT_TRET(__clsm_leave(clsm));
 	API_END(session, ret);
 	if (ret == 0)
-		__clsm_deleted_decode(&cursor->value);
+		__clsm_deleted_decode(clsm, &cursor->value);
 	return (ret);
 }
 
@@ -928,7 +929,7 @@ retry:		/*
 err:	WT_TRET(__clsm_leave(clsm));
 	API_END(session, ret);
 	if (ret == 0)
-		__clsm_deleted_decode(&cursor->value);
+		__clsm_deleted_decode(clsm, &cursor->value);
 	return (ret);
 }
 
@@ -1087,7 +1088,7 @@ __clsm_search(WT_CURSOR *cursor)
 err:	WT_TRET(__clsm_leave(clsm));
 	API_END(session, ret);
 	if (ret == 0)
-		__clsm_deleted_decode(&cursor->value);
+		__clsm_deleted_decode(clsm, &cursor->value);
 	return (ret);
 }
 
@@ -1173,8 +1174,7 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
 	 * smallest cursor larger than the search key, or it is NULL if the
 	 * search key is larger than any record in the tree.
 	 */
-	if (!exact)
-		cmp = 1;
+	cmp = exact ? 0 : 1;
 
 	/*
 	 * If we land on a deleted item, try going forwards or backwards to
@@ -1189,7 +1189,9 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
 		clsm->current = closest;
 		closest = NULL;
 		deleted = __clsm_deleted(clsm, &cursor->value);
-		if (deleted && (ret = cursor->next(cursor)) == 0) {
+		if (!deleted)
+			__clsm_deleted_decode(clsm, &cursor->value);
+		else if ((ret = cursor->next(cursor)) == 0) {
 			cmp = 1;
 			deleted = 0;
 		}
@@ -1197,8 +1199,8 @@ __clsm_search_near(WT_CURSOR *cursor, int *exactp)
 	}
 	if (deleted) {
 		clsm->current = NULL;
-		if ((ret = cursor->prev(cursor)) == 0)
-			cmp = -1;
+		WT_ERR(cursor->prev(cursor));
+		cmp = -1;
 	}
 	*exactp = cmp;
 
@@ -1210,7 +1212,6 @@ err:	WT_TRET(__clsm_leave(clsm));
 	F_CLR(cursor, WT_CURSTD_KEY_SET | WT_CURSTD_VALUE_SET);
 	if (ret == 0) {
 		F_SET(cursor, WT_CURSTD_KEY_INT | WT_CURSTD_VALUE_INT);
-		__clsm_deleted_decode(&cursor->value);
 	} else
 		clsm->current = NULL;
 
@@ -1460,7 +1461,7 @@ __wt_clsm_open(WT_SESSION_IMPL *session,
 	    ret = __wt_lsm_tree_get(session, uri, 0, &lsm_tree));
 	WT_RET(ret);
 
-	WT_ERR(__wt_calloc_def(session, 1, &clsm));
+	WT_ERR(__wt_calloc_one(session, &clsm));
 
 	cursor = &clsm->iface;
 	*cursor = iface;
diff --git a/src/lsm/lsm_manager.c b/src/lsm/lsm_manager.c
index 1356d336f6e..248ac70c61e 100644
--- a/src/lsm/lsm_manager.c
+++ b/src/lsm/lsm_manager.c
@@ -645,7 +645,7 @@ __wt_lsm_manager_push_entry(WT_SESSION_IMPL *session,
 
 	WT_RET(__wt_epoch(session, &lsm_tree->work_push_ts));
 
-	WT_RET(__wt_calloc_def(session, 1, &entry));
+	WT_RET(__wt_calloc_one(session, &entry));
 	entry->type = type;
 	entry->flags = flags;
 	entry->lsm_tree = lsm_tree;
diff --git a/src/lsm/lsm_merge.c b/src/lsm/lsm_merge.c
index 9ed605724ce..8989e979a44 100644
--- a/src/lsm/lsm_merge.c
+++ b/src/lsm/lsm_merge.c
@@ -311,7 +311,7 @@ __wt_lsm_merge(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree, u_int id)
 			    lsm_tree->name, verb, lsm_tree->chunk[verb]->id));
 	}
 
-	WT_ERR(__wt_calloc_def(session, 1, &chunk));
+	WT_ERR(__wt_calloc_one(session, &chunk));
 	created_chunk = 1;
 	chunk->id = dest_id;
 
diff --git a/src/lsm/lsm_meta.c b/src/lsm/lsm_meta.c
index bf03588c066..7fd77b64720 100644
--- a/src/lsm/lsm_meta.c
+++ b/src/lsm/lsm_meta.c
@@ -91,8 +91,8 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
 					WT_ERR(__wt_realloc_def(session,
 					    &lsm_tree->chunk_alloc,
 					    nchunks + 1, &lsm_tree->chunk));
-					WT_ERR(__wt_calloc_def(
-					    session, 1, &chunk));
+					WT_ERR(
+					    __wt_calloc_one(session, &chunk));
 					lsm_tree->chunk[nchunks++] = chunk;
 					chunk->id = (uint32_t)lv.val;
 					WT_ERR(__wt_lsm_tree_chunk_name(session,
@@ -136,7 +136,7 @@ __wt_lsm_meta_read(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
 				WT_ERR(__wt_realloc_def(session,
 				    &lsm_tree->old_alloc, nchunks + 1,
 				    &lsm_tree->old_chunks));
-				WT_ERR(__wt_calloc_def(session, 1, &chunk));
+				WT_ERR(__wt_calloc_one(session, &chunk));
 				lsm_tree->old_chunks[nchunks++] = chunk;
 				WT_ERR(__wt_strndup(session,
 				    lk.str, lk.len, &chunk->uri));
diff --git a/src/lsm/lsm_tree.c b/src/lsm/lsm_tree.c
index 888f12bdd94..e7b1d7f9d2c 100644
--- a/src/lsm/lsm_tree.c
+++ b/src/lsm/lsm_tree.c
@@ -332,7 +332,7 @@ __wt_lsm_tree_create(WT_SESSION_IMPL *session,
 		WT_RET_MSG(session, EINVAL,
 		    "LSM trees cannot be configured as column stores");
 
-	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));
+	WT_RET(__wt_calloc_one(session, &lsm_tree));
 
 	WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
 
@@ -551,7 +551,7 @@ __lsm_tree_open(
 		return (ret);
 
 	/* Try to open the tree. */
-	WT_RET(__wt_calloc_def(session, 1, &lsm_tree));
+	WT_RET(__wt_calloc_one(session, &lsm_tree));
 	WT_ERR(__wt_rwlock_alloc(session, &lsm_tree->rwlock, "lsm tree"));
 
 	WT_ERR(__lsm_tree_set_name(session, lsm_tree, uri));
@@ -820,7 +820,7 @@ __wt_lsm_tree_switch(WT_SESSION_IMPL *session, WT_LSM_TREE *lsm_tree)
 	    "merge throttle %ld", lsm_tree->name,
 	    new_id, lsm_tree->ckpt_throttle, lsm_tree->merge_throttle));
 
-	WT_ERR(__wt_calloc_def(session, 1, &chunk));
+	WT_ERR(__wt_calloc_one(session, &chunk));
 	chunk->id = new_id;
 	chunk->switch_txn = WT_TXN_NONE;
 	lsm_tree->chunk[lsm_tree->nchunks++] = chunk;
@@ -1011,7 +1011,7 @@ __wt_lsm_tree_truncate(
 	locked = 1;
 
 	/* Create the new chunk. */
-	WT_ERR(__wt_calloc_def(session, 1, &chunk));
+	WT_ERR(__wt_calloc_one(session, &chunk));
 	chunk->id = WT_ATOMIC_ADD4(lsm_tree->last, 1);
 	WT_ERR(__wt_lsm_tree_setup_chunk(session, lsm_tree, chunk));
 
diff --git a/src/os_posix/os_dlopen.c b/src/os_posix/os_dlopen.c
index 91410c54c04..cb9fe314beb 100644
--- a/src/os_posix/os_dlopen.c
+++ b/src/os_posix/os_dlopen.c
@@ -17,7 +17,7 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
 	WT_DECL_RET;
 	WT_DLH *dlh;
 
-	WT_RET(__wt_calloc_def(session, 1, &dlh));
+	WT_RET(__wt_calloc_one(session, &dlh));
 	WT_ERR(__wt_strdup(session, path, &dlh->name));
 
 	if ((dlh->handle = dlopen(path, RTLD_LAZY)) == NULL)
diff --git a/src/os_posix/os_mtx_cond.c b/src/os_posix/os_mtx_cond.c
index 3a76cceb3f0..479a61db795 100644
--- a/src/os_posix/os_mtx_cond.c
+++ b/src/os_posix/os_mtx_cond.c
@@ -22,7 +22,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session,
 	 * !!!
 	 * This function MUST handle a NULL session handle.
 	 */
-	WT_RET(__wt_calloc(session, 1, sizeof(WT_CONDVAR), &cond));
+	WT_RET(__wt_calloc_one(session, &cond));
 
 	WT_ERR(pthread_mutex_init(&cond->mtx, NULL));
 
diff --git a/src/os_posix/os_mtx_rw.c b/src/os_posix/os_mtx_rw.c
index 1a692f71dce..c6cfa9412a7 100644
--- a/src/os_posix/os_mtx_rw.c
+++ b/src/os_posix/os_mtx_rw.c
@@ -53,7 +53,7 @@ __wt_rwlock_alloc(
 
 	WT_RET(__wt_verbose(session, WT_VERB_MUTEX, "rwlock: alloc %s", name));
 
-	WT_RET(__wt_calloc_def(session, 1, &rwlock));
+	WT_RET(__wt_calloc_one(session, &rwlock));
 
 	rwlock->name = name;
 
diff --git a/src/os_posix/os_open.c b/src/os_posix/os_open.c
index 736ed2be377..a0da1952101 100644
--- a/src/os_posix/os_open.c
+++ b/src/os_posix/os_open.c
@@ -145,7 +145,7 @@ setupfh:
 		WT_ERR(posix_fadvise(fd, 0, 0, POSIX_FADV_RANDOM));
 #endif
 
-	WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh));
+	WT_ERR(__wt_calloc_one(session, &fh));
 	WT_ERR(__wt_strdup(session, name, &fh->name));
 	fh->fd = fd;
 	fh->ref = 1;
diff --git a/src/os_win/os_dir.c b/src/os_win/os_dir.c
index 076c64670d4..ab332e01186 100644
--- a/src/os_win/os_dir.c
+++ b/src/os_win/os_dir.c
@@ -38,7 +38,7 @@ __wt_dirlist(WT_SESSION_IMPL *session, const char *dir, const char *prefix,
 		path[pathlen - 1] = '\0';
 	}
 
-	WT_ERR(__wt_scr_alloc(session, 0, &pathbuf));
+	WT_ERR(__wt_scr_alloc(session, pathlen + 3, &pathbuf));
 	WT_ERR(__wt_buf_fmt(session, pathbuf, "%s\\*", path));
 
 	dirallocsz = 0;
@@ -96,7 +96,7 @@ err:
 	if (findhandle != INVALID_HANDLE_VALUE)
 		(void)FindClose(findhandle);
 	__wt_free(session, path);
-	__wt_buf_free(session, pathbuf);
+	__wt_scr_free(&pathbuf);
 
 	if (ret == 0)
 		return (0);
diff --git a/src/os_win/os_dlopen.c b/src/os_win/os_dlopen.c
index ebc90edd2b2..3fdd0c74b1f 100644
--- a/src/os_win/os_dlopen.c
+++ b/src/os_win/os_dlopen.c
@@ -17,7 +17,7 @@ __wt_dlopen(WT_SESSION_IMPL *session, const char *path, WT_DLH **dlhp)
 	WT_DECL_RET;
 	WT_DLH *dlh;
 
-	WT_RET(__wt_calloc_def(session, 1, &dlh));
+	WT_RET(__wt_calloc_one(session, &dlh));
 	WT_ERR(__wt_strdup(session, path, &dlh->name));
 
 	/* NULL means load from the current binary */
diff --git a/src/os_win/os_mtx_cond.c b/src/os_win/os_mtx_cond.c
index 9c9907bd8be..a33ab4e5c37 100644
--- a/src/os_win/os_mtx_cond.c
+++ b/src/os_win/os_mtx_cond.c
@@ -21,7 +21,7 @@ __wt_cond_alloc(WT_SESSION_IMPL *session,
 	 * !!!
 	 * This function MUST handle a NULL session handle.
 	 */
-	WT_RET(__wt_calloc(session, 1, sizeof(WT_CONDVAR), &cond));
+	WT_RET(__wt_calloc_one(session, &cond));
 
 	InitializeCriticalSection(&cond->mtx);
 
diff --git a/src/os_win/os_open.c b/src/os_win/os_open.c
index 6bdbaa3f065..f9d47c5be5d 100644
--- a/src/os_win/os_open.c
+++ b/src/os_win/os_open.c
@@ -130,7 +130,7 @@ __wt_open(WT_SESSION_IMPL *session,
 		    "open failed for secondary handle: %s", path);
 
 setupfh:
-	WT_ERR(__wt_calloc(session, 1, sizeof(WT_FH), &fh));
+	WT_ERR(__wt_calloc_one(session, &fh));
 	WT_ERR(__wt_strdup(session, name, &fh->name));
 	fh->filehandle = filehandle;
 	fh->filehandle_secondary = filehandle_secondary;
diff --git a/src/packing/pack_stream.c b/src/packing/pack_stream.c
index efbbd5d9adb..a35a3555458 100644
--- a/src/packing/pack_stream.c
+++ b/src/packing/pack_stream.c
@@ -30,7 +30,7 @@ wiredtiger_pack_start(WT_SESSION *wt_session,
 	WT_SESSION_IMPL *session;
 
 	session = (WT_SESSION_IMPL *)wt_session;
-	WT_RET(__wt_calloc_def(session, 1, &ps));
+	WT_RET(__wt_calloc_one(session, &ps));
 	WT_ERR(__pack_init(session, &ps->pack, format));
 	ps->p = ps->start = buffer;
 	ps->end = ps->p + len;
diff --git a/src/reconcile/rec_track.c b/src/reconcile/rec_track.c
index 92282393a23..fdf8ee6d68b 100644
--- a/src/reconcile/rec_track.c
+++ b/src/reconcile/rec_track.c
@@ -21,7 +21,7 @@
 static int
 __ovfl_track_init(WT_SESSION_IMPL *session, WT_PAGE *page)
 {
-	return (__wt_calloc_def(session, 1, &page->modify->ovfl_track));
+	return (__wt_calloc_one(session, &page->modify->ovfl_track));
 }
 
 /*
diff --git a/src/reconcile/rec_write.c b/src/reconcile/rec_write.c
index c72447ae841..839ab028afd 100644
--- a/src/reconcile/rec_write.c
+++ b/src/reconcile/rec_write.c
@@ -96,16 +96,15 @@ typedef struct {
 	 * image size.
 	 *
 	 * First, the sizes of the page we're building.  If WiredTiger is doing
-	 * page layout, page_size is the same as page_size_max.  We accumulate
-	 * the maximum page size of raw data and when we reach that size, we
-	 * split the page into multiple chunks, eventually compressing those
-	 * chunks.  When the application is doing page layout (raw compression
-	 * is configured), page_size can continue to grow past page_size_max,
-	 * and we keep accumulating raw data until the raw compression callback
-	 * accepts it.
+	 * page layout, page_size is the same as page_size_orig. We accumulate
+	 * a "page size" of raw data and when we reach that size, we split the
+	 * page into multiple chunks, eventually compressing those chunks.  When
+	 * the application is doing page layout (raw compression is configured),
+	 * page_size can continue to grow past page_size_orig, and we keep
+	 * accumulating raw data until the raw compression callback accepts it.
 	 */
-	uint32_t page_size;		/* Current page size */
-	uint32_t page_size_max;		/* Maximum on-disk page size */
+	uint32_t page_size;		/* Set page size */
+	uint32_t page_size_orig;	/* Saved set page size */
 
 	/*
 	 * Second, the split size: if we're doing the page layout, split to a
@@ -202,9 +201,8 @@ typedef struct {
 	 * because we've already been forced to split.
 	 */
 	enum {	SPLIT_BOUNDARY=0,	/* Next: a split page boundary */
-		SPLIT_MAX=1,		/* Next: the maximum page boundary */
-		SPLIT_TRACKING_OFF=2,	/* No boundary checks */
-		SPLIT_TRACKING_RAW=3 }	/* Underlying compression decides */
+		SPLIT_TRACKING_OFF=1,	/* No boundary checks */
+		SPLIT_TRACKING_RAW=2 }	/* Underlying compression decides */
 	bnd_state;
 
 	/*
@@ -591,7 +589,7 @@ __rec_write_init(WT_SESSION_IMPL *session,
 	page = ref->page;
 
 	if ((r = *(WT_RECONCILE **)reconcilep) == NULL) {
-		WT_RET(__wt_calloc_def(session, 1, &r));
+		WT_RET(__wt_calloc_one(session, &r));
 
 		*(WT_RECONCILE **)reconcilep = r;
 		session->reconcile_cleanup = __rec_destroy_session;
@@ -1284,7 +1282,7 @@ __rec_incr(WT_SESSION_IMPL *session, WT_RECONCILE *r, uint32_t v, size_t size)
 	 */
 	WT_ASSERT(session, r->space_avail >= size);
 	WT_ASSERT(session,
-	    WT_BLOCK_FITS(r->first_free, size, r->dsk.mem, r->page_size));
+	    WT_BLOCK_FITS(r->first_free, size, r->dsk.mem, r->dsk.memsize));
 
 	r->entries += v;
 	r->space_avail -= size;
@@ -1543,6 +1541,37 @@ __rec_split_bnd_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 }
 
 /*
+ * __wt_split_page_size --
+ *	Split page size calculation: we don't want to repeatedly split every
+ * time a new entry is added, so we split to a smaller-than-maximum page size.
+ */
+uint32_t
+__wt_split_page_size(WT_BTREE *btree, uint32_t maxpagesize)
+{
+	uintmax_t a;
+	uint32_t split_size;
+
+	/*
+	 * Ideally, the split page size is some percentage of the maximum page
+	 * size rounded to an allocation unit (round to an allocation unit so
+	 * we don't waste space when we write).
+	 */
+	a = maxpagesize;			/* Don't overflow. */
+	split_size = (uint32_t)
+	    WT_ALIGN((a * (u_int)btree->split_pct) / 100, btree->allocsize);
+
+	/*
+	 * If the result of that calculation is the same as the allocation unit
+	 * (that happens if the maximum size is the same size as an allocation
+	 * unit, use a percentage of the maximum page size).
+	 */
+	if (split_size == btree->allocsize)
+		split_size = (uint32_t)((a * (u_int)btree->split_pct) / 100);
+
+	return (split_size);
+}
+
+/*
  * __rec_split_init --
  *	Initialization for the reconciliation split functions.
  */
@@ -1576,7 +1605,7 @@ __rec_split_init(WT_SESSION_IMPL *session,
 	 * we don't want to increment our way up to the amount of data needed by
 	 * the application to successfully compress to the target page size.
 	 */
-	r->page_size = r->page_size_max = max;
+	r->page_size = r->page_size_orig = max;
 	if (r->raw_compression)
 		r->page_size *= 10;
 
@@ -1632,11 +1661,11 @@ __rec_split_init(WT_SESSION_IMPL *session,
 		r->space_avail = r->page_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
 	}
 	else if (page->type == WT_PAGE_COL_FIX) {
-		r->split_size = r->page_size_max;
+		r->split_size = r->page_size;
 		r->space_avail =
 		    r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
 	} else {
-		r->split_size = __wt_split_page_size(btree, r->page_size_max);
+		r->split_size = __wt_split_page_size(btree, r->page_size);
 		r->space_avail =
 		    r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
 	}
@@ -1853,17 +1882,45 @@ err:	__wt_scr_free(&update);
 }
 
 /*
+ * __rec_split_grow --
+ *	Grow the split buffer.
+ */
+static int
+__rec_split_grow(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t add_len)
+{
+	WT_BM *bm;
+	WT_BTREE *btree;
+	size_t corrected_page_size, len;
+
+	btree = S2BT(session);
+	bm = btree->bm;
+
+	len = WT_PTRDIFF(r->first_free, r->dsk.mem);
+	corrected_page_size = len + add_len;
+	WT_RET(bm->write_size(bm, session, &corrected_page_size));
+	WT_RET(__wt_buf_grow(session, &r->dsk, corrected_page_size));
+	r->first_free = (uint8_t *)r->dsk.mem + len;
+	WT_ASSERT(session, corrected_page_size >= len);
+	r->space_avail = corrected_page_size - len;
+	WT_ASSERT(session, r->space_avail >= add_len);
+	return (0);
+}
+
+/*
  * __rec_split --
  *	Handle the page reconciliation bookkeeping.  (Did you know "bookkeeper"
  * has 3 doubled letters in a row?  Sweet-tooth does, too.)
  */
 static int
-__rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+__rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
 {
-	WT_BTREE *btree;
 	WT_BOUNDARY *last, *next;
+	WT_BTREE *btree;
 	WT_PAGE_HEADER *dsk;
-	uint32_t len;
+	size_t len;
+
+	btree = S2BT(session);
+	dsk = r->dsk.mem;
 
 	/*
 	 * We should never split during salvage, and we're about to drop core
@@ -1874,45 +1931,20 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 		    "%s page too large, attempted split during salvage",
 		    __wt_page_type_string(r->page->type));
 
-	/*
-	 * Handle page-buffer size tracking; we have to do this work in every
-	 * reconciliation loop, and I don't want to repeat the code that many
-	 * times.
-	 */
-	btree = S2BT(session);
-	dsk = r->dsk.mem;
-
 	/* Hitting a page boundary resets the dictionary, in all cases. */
 	__rec_dictionary_reset(r);
 
-	/*
-	 * There are 3 cases we have to handle.
-	 *
-	 * #1
-	 * About to cross a split boundary: save current boundary information
-	 * and return.
-	 *
-	 * #2
-	 * About to cross the maximum boundary: use saved boundary information
-	 * to write all of the split pages.
-	 *
-	 * #3
-	 * About to cross a split boundary, but we've either already done the
-	 * split thing when we approached the maximum boundary, in which
-	 * case we write the page and keep going, or we were never tracking
-	 * split boundaries at all.
-	 *
-	 * Cases #1 and #2 are the hard ones: we're called when we're about to
-	 * cross each split boundary, and we save information away so we can
-	 * split if we have to.  We're also called when we're about to cross
-	 * the maximum page boundary: in that case, we do the actual split and
-	 * clean up all the previous boundaries, then keep going.
-	 */
 	switch (r->bnd_state) {
-	case SPLIT_BOUNDARY:				/* Case #1 */
+	case SPLIT_BOUNDARY:
+		/* We can get here if the first key/value pair won't fit. */
+		if (r->entries == 0)
+			break;
+
 		/*
-		 * Save the information about where we are when the split would
-		 * have happened.
+		 * About to cross a split boundary but not yet forced to split
+		 * into multiple pages. If we have to split, this is one of the
+		 * split points, save information about where we are when the
+		 * split would have happened.
 		 */
 		WT_RET(__rec_split_bnd_grow(session, r));
 		last = &r->bnd[r->bnd_next++];
@@ -1939,37 +1971,50 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 		/*
 		 * Set the space available to another split-size chunk, if we
 		 * have one.  If we don't have room for another split chunk,
-		 * add whatever space remains in the maximum page size, and
-		 * hope it's enough.
+		 * add whatever space remains in this page.
 		 */
 		len = WT_PTRDIFF32(r->first_free, dsk);
 		if (len + r->split_size <= r->page_size)
 			r->space_avail =
 			    r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree);
 		else {
-			r->bnd_state = SPLIT_MAX;
+			WT_ASSERT(session, r->page_size >=
+			    (WT_PAGE_HEADER_BYTE_SIZE(btree) + len));
 			r->space_avail = r->page_size -
 			    (WT_PAGE_HEADER_BYTE_SIZE(btree) + len);
 		}
-		break;
-	case SPLIT_MAX:					/* Case #2 */
+
+		/* If the next object fits into this page, we're good to go. */
+		if (r->space_avail >= next_len)
+			return (0);
+
 		/*
-		 * It didn't all fit into a single page.
+		 * We're going to have to split and create multiple pages.
 		 *
 		 * Cycle through the saved split-point information, writing the
-		 * split chunks we have tracked.
+		 * split chunks we have tracked.  The underlying fixup function
+		 * sets the space available and other information, and copied
+		 * any unwritten chunk of data to the beginning of the buffer.
 		 */
 		WT_RET(__rec_split_fixup(session, r));
-
-		/* We're done saving split chunks. */
-		r->bnd_state = SPLIT_TRACKING_OFF;
 		break;
-	case SPLIT_TRACKING_OFF:			/* Case #3 */
+	case SPLIT_TRACKING_OFF:
+		/*
+		 * We can get here if the first key/value pair won't fit.
+		 * Additionally, grow the buffer to contain the current data if
+		 * we haven't already consumed a reasonable portion of the page.
+		 */
+		if (r->entries == 0)
+			break;
+		if (WT_PTRDIFF(r->first_free, r->dsk.mem) < r->page_size / 2)
+			break;
+
 		/*
-		 * It didn't all fit, but either we've already noticed it and
-		 * are now processing the rest of the page at the split-size
-		 * boundaries, or the split size was the same as the page size,
-		 * so we never bothered with saving split-point information.
+		 * The key/value pairs didn't fit into a single page, but either
+		 * we've already noticed that and are now processing the rest of
+		 * the pairs at split size boundaries, or the split size was the
+		 * same as the page size, and we never bothered with split point
+		 * information at all.
 		 */
 		WT_RET(__rec_split_bnd_grow(session, r));
 		last = &r->bnd[r->bnd_next++];
@@ -2007,6 +2052,24 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	case SPLIT_TRACKING_RAW:
 	WT_ILLEGAL_VALUE(session);
 	}
+
+	/*
+	 * Overflow values can be larger than the maximum page size but still be
+	 * "on-page". If the next key/value pair is larger than space available
+	 * after a split has happened (in other words, larger than the maximum
+	 * page size), create a page sized to hold that one key/value pair. This
+	 * generally splits the page into key/value pairs before a large object,
+	 * the object, and key/value pairs after the object. It's possible other
+	 * key/value pairs will also be aggregated onto the bigger page before
+	 * or after, if the page happens to hold them, but it won't necessarily
+	 * happen that way.
+	 */
+	if (r->space_avail < next_len)
+		WT_RET(__rec_split_grow(session, r, next_len));
+
+	/* We're done saving split chunks. */
+	r->bnd_state = SPLIT_TRACKING_OFF;
+
 	return (0);
 }
 
@@ -2015,8 +2078,8 @@ __rec_split(WT_SESSION_IMPL *session, WT_RECONCILE *r)
  *	Handle the raw compression page reconciliation bookkeeping.
  */
 static int
-__rec_split_raw_worker(
-    WT_SESSION_IMPL *session, WT_RECONCILE *r, int no_more_rows)
+__rec_split_raw_worker(WT_SESSION_IMPL *session,
+    WT_RECONCILE *r, size_t next_len, int no_more_rows)
 {
 	WT_BM *bm;
 	WT_BOUNDARY *last, *next;
@@ -2048,6 +2111,12 @@ __rec_split_raw_worker(
 	next = last + 1;
 
 	/*
+	 * We can get here if the first key/value pair won't fit.
+	 */
+	if (r->entries == 0)
+		goto split_grow;
+
+	/*
 	 * Build arrays of offsets and cumulative counts of cells and rows in
 	 * the page: the offset is the byte offset to the possible split-point
 	 * (adjusted for an initial chunk that cannot be compressed), entries
@@ -2150,27 +2219,29 @@ __rec_split_raw_worker(
 	    WT_STORE_SIZE(WT_PTRDIFF(cell, dsk) - WT_BLOCK_COMPRESS_SKIP);
 
 	/*
-	 * Allocate a destination buffer.  If there's a pre-size function, use
-	 * it to determine the destination buffer's minimum size, otherwise the
-	 * destination buffer is documented to be at least the maximum object
-	 * size.
+	 * Allocate a destination buffer. If there's a pre-size function, call
+	 * it to determine the destination buffer's size, else the destination
+	 * buffer is documented to be at least the source size. (We can't use
+	 * the target page size, any single key/value could be larger than the
+	 * page size. Don't bother figuring out a minimum, just use the source
+	 * size.)
 	 *
-	 * The destination buffer really only needs to be large enough for the
-	 * target block size, corrected for the requirements of the underlying
-	 * block manager.  If the target block size is 8KB, that's a multiple
-	 * of 512B and so the underlying block manager is fine with it.  But...
-	 * we don't control what the pre_size method returns us as a required
-	 * size, and we don't want to document the compress_raw method has to
-	 * skip bytes in the buffer because that's confusing, so do something
-	 * more complicated.  First, find out how much space the compress_raw
-	 * function might need, either the value returned from pre_size, or the
-	 * maximum object size.  Add the compress-skip bytes, and then correct
-	 * that value for the underlying block manager.   As a result, we have
-	 * a destination buffer that's the right "object" size when calling the
-	 * compress_raw method, and there are bytes in the header just for us.
+	 * The destination buffer needs to be large enough for the final block
+	 * size, corrected for the requirements of the underlying block manager.
+	 * If the final block size is 8KB, that's a multiple of 512B and so the
+	 * underlying block manager is fine with it.  But... we don't control
+	 * what the pre_size method returns us as a required size, and we don't
+	 * want to document the compress_raw method has to skip bytes in the
+	 * buffer because that's confusing, so do something more complicated.
+	 * First, find out how much space the compress_raw function might need,
+	 * either the value returned from pre_size, or the initial source size.
+	 * Add the compress-skip bytes, and then correct that value for the
+	 * underlying block manager.   As a result, we have a destination buffer
+	 * that's large enough when calling the compress_raw method, and there
+	 * are bytes in the header just for us.
 	 */
 	if (compressor->pre_size == NULL)
-		result_len = r->page_size_max;
+		result_len = (size_t)r->raw_offsets[slots];
 	else
 		WT_RET(compressor->pre_size(compressor, wt_session,
 		    (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP,
@@ -2185,7 +2256,7 @@ __rec_split_raw_worker(
 	 */
 	memcpy(dst->mem, dsk, WT_BLOCK_COMPRESS_SKIP);
 	ret = compressor->compress_raw(compressor, wt_session,
-	    r->page_size_max, btree->split_pct,
+	    r->page_size_orig, btree->split_pct,
 	    WT_BLOCK_COMPRESS_SKIP, (uint8_t *)dsk + WT_BLOCK_COMPRESS_SKIP,
 	    r->raw_offsets, slots,
 	    (uint8_t *)dst->mem + WT_BLOCK_COMPRESS_SKIP,
@@ -2296,15 +2367,16 @@ no_slots:
 		 * Note use of memmove, the source and destination buffers can
 		 * overlap.
 		 */
-		len = WT_PTRDIFF(r->first_free, (uint8_t *)dsk +
-		    r->raw_offsets[result_slots] + WT_BLOCK_COMPRESS_SKIP);
+		len = WT_PTRDIFF(
+		    r->first_free, (uint8_t *)dsk + dsk_dst->mem_size);
 		dsk_start = WT_PAGE_HEADER_BYTE(btree, dsk);
 		(void)memmove(dsk_start, (uint8_t *)r->first_free - len, len);
 
 		r->entries -= r->raw_entries[result_slots - 1];
 		r->first_free = dsk_start + len;
-		r->space_avail =
-		    r->page_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len);
+		r->space_avail += r->raw_offsets[result_slots];
+		WT_ASSERT(session, r->first_free + r->space_avail <=
+		    (uint8_t *)r->dsk.mem + r->dsk.memsize);
 
 		/*
 		 * Set the key for the next block (before writing the block, a
@@ -2358,15 +2430,14 @@ no_slots:
 		 */
 		WT_STAT_FAST_DATA_INCR(session, compress_raw_fail_temporary);
 
-		len = WT_PTRDIFF(r->first_free, r->dsk.mem);
-		corrected_page_size = r->page_size * 2;
-		WT_RET(bm->write_size(bm, session, &corrected_page_size));
-		WT_RET(__wt_buf_grow(session, &r->dsk, corrected_page_size));
+split_grow:	/*
+		 * Double the page size and make sure we accommodate at least
+		 * one more record. The reason for the latter is that we may
+		 * be here because there's a large key/value pair that won't
+		 * fit in our initial page buffer, even at its expanded size.
+		 */
 		r->page_size *= 2;
-		r->first_free = (uint8_t *)r->dsk.mem + len;
-		r->space_avail =
-		    r->page_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len);
-		return (0);
+		return (__rec_split_grow(session, r, r->page_size + next_len));
 	}
 
 	/* We have a block, update the boundary counter. */
@@ -2438,9 +2509,9 @@ err:	__wt_scr_free(&tmp);
  *	Raw compression split routine.
  */
 static inline int
-__rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r)
+__rec_split_raw(WT_SESSION_IMPL *session, WT_RECONCILE *r, size_t next_len)
 {
-	return (__rec_split_raw_worker(session, r, 0));
+	return (__rec_split_raw_worker(session, r, next_len, 0));
 }
 
 /*
@@ -2456,7 +2527,6 @@ __rec_split_finish_std(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	/* Adjust the boundary information based on our split status. */
 	switch (r->bnd_state) {
 	case SPLIT_BOUNDARY:
-	case SPLIT_MAX:
 		/*
 		 * We never split, the reconciled page fit into a maximum page
 		 * size.  Change the first boundary slot to represent the full
@@ -2516,7 +2586,7 @@ __rec_split_finish(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	/* We're done reconciling - write the final page */
 	if (r->raw_compression && r->entries != 0) {
 		while (r->entries != 0)
-			WT_RET(__rec_split_raw_worker(session, r, 1));
+			WT_RET(__rec_split_raw_worker(session, r, 0, 1));
 	} else
 		WT_RET(__rec_split_finish_std(session, r));
 
@@ -2553,7 +2623,7 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 	 * WT_PAGE_HEADER header onto the scratch buffer, most of the header
 	 * information remains unchanged between the pages.
 	 */
-	WT_RET(__wt_scr_alloc(session, r->page_size_max, &tmp));
+	WT_RET(__wt_scr_alloc(session, r->page_size, &tmp));
 	dsk = tmp->mem;
 	memcpy(dsk, r->dsk.mem, WT_PAGE_HEADER_SIZE);
 
@@ -2595,8 +2665,10 @@ __rec_split_fixup(WT_SESSION_IMPL *session, WT_RECONCILE *r)
 
 	r->entries -= r->total_entries;
 	r->first_free = dsk_start + len;
+	WT_ASSERT(session,
+	    r->page_size >= (WT_PAGE_HEADER_BYTE_SIZE(btree) + len));
 	r->space_avail =
-	    (r->split_size - WT_PAGE_HEADER_BYTE_SIZE(btree)) - len;
+	    r->split_size - (WT_PAGE_HEADER_BYTE_SIZE(btree) + len);
 
 err:	__wt_scr_free(&tmp);
 	return (ret);
@@ -2905,17 +2977,17 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 	    cursor->value.data, cursor->value.size, (uint64_t)0));
 
 	/* Boundary: split or write the page. */
-	while (key->len + val->len > r->space_avail)
+	if (key->len + val->len > r->space_avail) {
 		if (r->raw_compression)
-			WT_RET(__rec_split_raw(session, r));
+			WT_RET(
+			    __rec_split_raw(session, r, key->len + val->len));
 		else {
-			WT_RET(__rec_split(session, r));
+			WT_RET(__rec_split(session, r, key->len + val->len));
 
 			/*
 			 * Turn off prefix compression until a full key written
-			 * to the new page, and (unless we're already working
-			 * with an overflow key), rebuild the key without prefix
-			 * compression.
+			 * to the new page, and (unless already working with an
+			 * overflow key), rebuild the key without compression.
 			 */
 			if (r->key_pfx_compress_conf) {
 				r->key_pfx_compress = 0;
@@ -2924,6 +2996,7 @@ __wt_bulk_insert_row(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 					    session, r, NULL, 0, &ovfl_key));
 			}
 		}
+	}
 
 	/* Copy the key/value pair onto the page. */
 	__rec_copy_incr(session, r, key);
@@ -2968,7 +3041,7 @@ __rec_col_fix_bulk_insert_split_check(WT_CURSOR_BULK *cbulk)
 			__rec_incr(session, r, cbulk->entry,
 			    __bitstr_size(
 			    (size_t)cbulk->entry * btree->bitcnt));
-			WT_RET(__rec_split(session, r));
+			WT_RET(__rec_split(session, r, 0));
 		}
 		cbulk->entry = 0;
 		cbulk->nrecs = WT_FIX_BYTES_TO_ENTRIES(btree, r->space_avail);
@@ -3048,11 +3121,10 @@ __wt_bulk_insert_var(WT_SESSION_IMPL *session, WT_CURSOR_BULK *cbulk)
 	    session, r, cbulk->last.data, cbulk->last.size, cbulk->rle));
 
 	/* Boundary: split or write the page. */
-	while (val->len > r->space_avail)
-		if (r->raw_compression)
-			WT_RET(__rec_split_raw(session, r));
-		else
-			WT_RET(__rec_split(session, r));
+	if (val->len > r->space_avail)
+		WT_RET(r->raw_compression ?
+		    __rec_split_raw(session, r, val->len) :
+		    __rec_split(session, r, val->len));
 
 	/* Copy the value onto the page. */
 	if (btree->dictionary)
@@ -3171,11 +3243,10 @@ __rec_col_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		CHILD_RELEASE_ERR(session, hazard, ref);
 
 		/* Boundary: split or write the page. */
-		while (val->len > r->space_avail)
-			if (r->raw_compression)
-				WT_ERR(__rec_split_raw(session, r));
-			else
-				WT_ERR(__rec_split(session, r));
+		if (val->len > r->space_avail)
+			WT_ERR(r->raw_compression ?
+			    __rec_split_raw(session, r, val->len) :
+			    __rec_split(session, r, val->len));
 
 		/* Copy the value onto the page. */
 		__rec_copy_incr(session, r, val);
@@ -3217,11 +3288,10 @@ __rec_col_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		    addr->addr, addr->size, __rec_vtype(addr), r->recno);
 
 		/* Boundary: split or write the page. */
-		while (val->len > r->space_avail)
-			if (r->raw_compression)
-				WT_RET(__rec_split_raw(session, r));
-			else
-				WT_RET(__rec_split(session, r));
+		if (val->len > r->space_avail)
+			WT_RET(r->raw_compression ?
+			    __rec_split_raw(session, r, val->len) :
+			    __rec_split(session, r, val->len));
 
 		/* Copy the value onto the page. */
 		__rec_copy_incr(session, r, val);
@@ -3298,7 +3368,7 @@ __rec_col_fix(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 			 */
 			__rec_incr(session, r, entry,
 			    __bitstr_size((size_t)entry * btree->bitcnt));
-			WT_RET(__rec_split(session, r));
+			WT_RET(__rec_split(session, r, 0));
 
 			/* Calculate the number of entries per page. */
 			entry = 0;
@@ -3442,11 +3512,10 @@ __rec_col_var_helper(WT_SESSION_IMPL *session, WT_RECONCILE *r,
 		    session, r, value->data, value->size, rle));
 
 	/* Boundary: split or write the page. */
-	while (val->len > r->space_avail)
-		if (r->raw_compression)
-			WT_RET(__rec_split_raw(session, r));
-		else
-			WT_RET(__rec_split(session, r));
+	if (val->len > r->space_avail)
+		WT_RET(r->raw_compression ?
+		    __rec_split_raw(session, r, val->len) :
+		    __rec_split(session, r, val->len));
 
 	/* Copy the value onto the page. */
 	if (!deleted && !overflow_type && btree->dictionary)
@@ -4034,24 +4103,25 @@ __rec_row_int(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		r->cell_zero = 0;
 
 		/* Boundary: split or write the page. */
-		while (key->len + val->len > r->space_avail) {
-			if (r->raw_compression) {
-				WT_ERR(__rec_split_raw(session, r));
-				continue;
-			}
-
-			/*
-			 * In one path above, we copied address blocks from the
-			 * page rather than building the actual key.  In that
-			 * case, we have to build the actual key now because we
-			 * are about to promote it.
-			 */
-			if (key_onpage_ovfl) {
-				WT_ERR(__wt_buf_set(session,
-				    r->cur, WT_IKEY_DATA(ikey), ikey->size));
-				key_onpage_ovfl = 0;
+		if (key->len + val->len > r->space_avail) {
+			if (r->raw_compression)
+				WT_ERR(__rec_split_raw(
+				    session, r, key->len + val->len));
+			else {
+				/*
+				 * In one path above, we copied address blocks
+				 * from the page rather than building the actual
+				 * key.  In that case, we have to build the key
+				 * now because we are about to promote it.
+				 */
+				if (key_onpage_ovfl) {
+					WT_ERR(__wt_buf_set(session, r->cur,
+					    WT_IKEY_DATA(ikey), ikey->size));
+					key_onpage_ovfl = 0;
+				}
+				WT_ERR(__rec_split(
+				    session, r, key->len + val->len));
 			}
-			WT_ERR(__rec_split(session, r));
 		}
 
 		/* Copy the key and value onto the page. */
@@ -4102,11 +4172,10 @@ __rec_row_merge(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_PAGE *page)
 		    r, addr->addr, addr->size, __rec_vtype(addr), 0);
 
 		/* Boundary: split or write the page. */
-		while (key->len + val->len > r->space_avail)
-			if (r->raw_compression)
-				WT_RET(__rec_split_raw(session, r));
-			else
-				WT_RET(__rec_split(session, r));
+		if (key->len + val->len > r->space_avail)
+			WT_RET(r->raw_compression ?
+			    __rec_split_raw(session, r, key->len + val->len) :
+			    __rec_split(session, r, key->len + val->len));
 
 		/* Copy the key and value onto the page. */
 		__rec_copy_incr(session, r, key);
@@ -4140,7 +4209,7 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
 	size_t size;
 	uint64_t slvg_skip;
 	uint32_t i;
-	int dictionary, onpage_ovfl, ovfl_key;
+	int dictionary, key_onpage_ovfl, ovfl_key;
 	const void *p;
 	void *copy;
 
@@ -4369,9 +4438,9 @@ __rec_row_leaf(WT_SESSION_IMPL *session,
 		 * If the key is an overflow key that hasn't been removed, use
 		 * the original backing blocks.
 		 */
-		onpage_ovfl = kpack != NULL &&
+		key_onpage_ovfl = kpack != NULL &&
 		    kpack->ovfl && kpack->raw != WT_CELL_KEY_OVFL_RM;
-		if (onpage_ovfl) {
+		if (key_onpage_ovfl) {
 			key->buf.data = cell;
 			key->buf.size = __wt_cell_total_len(kpack);
 			key->cell_len = 0;
@@ -4435,36 +4504,39 @@ build:
 		}
 
 		/* Boundary: split or write the page. */
-		while (key->len + val->len > r->space_avail) {
-			if (r->raw_compression) {
-				WT_ERR(__rec_split_raw(session, r));
-				continue;
-			}
-
-			/*
-			 * In one path above, we copied address blocks from the
-			 * page rather than building the actual key.  In that
-			 * case, we have to build the actual key now because we
-			 * are about to promote it.
-			 */
-			if (onpage_ovfl) {
-				WT_ERR(__wt_dsk_cell_data_ref(
-				    session, WT_PAGE_ROW_LEAF, kpack, r->cur));
-				onpage_ovfl = 0;
-			}
-			WT_ERR(__rec_split(session, r));
+		if (key->len + val->len > r->space_avail) {
+			if (r->raw_compression)
+				WT_ERR(__rec_split_raw(
+				    session, r, key->len + val->len));
+			else {
+				/*
+				 * In one path above, we copied address blocks
+				 * from the page rather than building the actual
+				 * key.  In that case, we have to build the key
+				 * now because we are about to promote it.
+				 */
+				if (key_onpage_ovfl) {
+					WT_ERR(__wt_dsk_cell_data_ref(session,
+					    WT_PAGE_ROW_LEAF, kpack, r->cur));
+					key_onpage_ovfl = 0;
+				}
+				WT_ERR(__rec_split(
+				    session, r, key->len + val->len));
 
-			/*
-			 * Turn off prefix compression until a full key written
-			 * to the new page, and (unless we're already working
-			 * with an overflow key), rebuild the key without prefix
-			 * compression.
-			 */
-			if (r->key_pfx_compress_conf) {
-				r->key_pfx_compress = 0;
-				if (!ovfl_key)
-					WT_ERR(__rec_cell_build_leaf_key(
-					    session, r, NULL, 0, &ovfl_key));
+				/*
+				 * Turn off prefix compression until a full key
+				 * written to the new page, and (unless already
+				 * working with an overflow key), rebuild the
+				 * key without compression.
+				 */
+				if (r->key_pfx_compress_conf) {
+					r->key_pfx_compress = 0;
+					if (!ovfl_key)
+						WT_ERR(
+						    __rec_cell_build_leaf_key(
+						    session,
+						    r, NULL, 0, &ovfl_key));
+				}
 			}
 		}
 
@@ -4529,24 +4601,28 @@ __rec_row_leaf_insert(WT_SESSION_IMPL *session, WT_RECONCILE *r, WT_INSERT *ins)
 		    WT_INSERT_KEY(ins), WT_INSERT_KEY_SIZE(ins), &ovfl_key));
 
 		/* Boundary: split or write the page. */
-		while (key->len + val->len > r->space_avail) {
-			if (r->raw_compression) {
-				WT_RET(__rec_split_raw(session, r));
-				continue;
-			}
-			WT_RET(__rec_split(session, r));
+		if (key->len + val->len > r->space_avail) {
+			if (r->raw_compression)
+				WT_RET(__rec_split_raw(
+				    session, r, key->len + val->len));
+			else {
+				WT_RET(__rec_split(
+				    session, r, key->len + val->len));
 
-			/*
-			 * Turn off prefix compression until a full key written
-			 * to the new page, and (unless we're already working
-			 * with an overflow key), rebuild the key without prefix
-			 * compression.
-			 */
-			if (r->key_pfx_compress_conf) {
-				r->key_pfx_compress = 0;
-				if (!ovfl_key)
-					WT_RET(__rec_cell_build_leaf_key(
-					    session, r, NULL, 0, &ovfl_key));
+				/*
+				 * Turn off prefix compression until a full key
+				 * written to the new page, and (unless already
+				 * working with an overflow key), rebuild the
+				 * key without compression.
+				 */
+				if (r->key_pfx_compress_conf) {
+					r->key_pfx_compress = 0;
+					if (!ovfl_key)
+						WT_RET(
+						    __rec_cell_build_leaf_key(
+						    session,
+						    r, NULL, 0, &ovfl_key));
+				}
 			}
 		}
 
@@ -5064,7 +5140,7 @@ __rec_cell_build_int_key(WT_SESSION_IMPL *session,
 	WT_RET(__wt_buf_set(session, &key->buf, data, size));
 
 	/* Create an overflow object if the data won't fit. */
-	if (size > btree->maxintlitem) {
+	if (size > btree->maxintlkey) {
 		WT_STAT_FAST_DATA_INCR(session, rec_overflow_key_internal);
 
 		*is_ovflp = 1;
@@ -5159,7 +5235,7 @@ __rec_cell_build_leaf_key(WT_SESSION_IMPL *session,
 		    key->buf.data, (uint32_t)key->buf.size, &key->buf));
 
 	/* Create an overflow object if the data won't fit. */
-	if (key->buf.size > btree->maxleafitem) {
+	if (key->buf.size > btree->maxleafkey) {
 		/*
 		 * Overflow objects aren't prefix compressed -- rebuild any
 		 * object that was prefix compressed.
@@ -5246,7 +5322,7 @@ __rec_cell_build_val(WT_SESSION_IMPL *session,
 			    val->buf.data, (uint32_t)val->buf.size, &val->buf));
 
 		/* Create an overflow object if the data won't fit. */
-		if (val->buf.size > btree->maxleafitem) {
+		if (val->buf.size > btree->maxleafvalue) {
 			WT_STAT_FAST_DATA_INCR(session, rec_overflow_value);
 
 			return (__rec_cell_build_ovfl(
diff --git a/src/schema/schema_open.c b/src/schema/schema_open.c
index f5937381cbb..4699fdeee02 100644
--- a/src/schema/schema_open.c
+++ b/src/schema/schema_open.c
@@ -83,7 +83,7 @@ __wt_schema_open_colgroups(WT_SESSION_IMPL *session, WT_TABLE *table)
 			goto err;
 		}
 
-		WT_ERR(__wt_calloc_def(session, 1, &colgroup));
+		WT_ERR(__wt_calloc_one(session, &colgroup));
 		WT_ERR(__wt_strndup(
 		    session, buf->data, buf->size, &colgroup->name));
 		colgroup->config = cgconfig;
@@ -319,7 +319,7 @@ __wt_schema_open_index(WT_SESSION_IMPL *session,
 
 		if (table->indices[i] == NULL) {
 			WT_ERR(cursor->get_value(cursor, &idxconf));
-			WT_ERR(__wt_calloc_def(session, 1, &idx));
+			WT_ERR(__wt_calloc_one(session, &idx));
 			WT_ERR(__wt_strdup(session, uri, &idx->name));
 			WT_ERR(__wt_strdup(session, idxconf, &idx->config));
 			WT_ERR(__open_index(session, table, idx));
@@ -392,7 +392,7 @@ __wt_schema_open_table(WT_SESSION_IMPL *session,
 	WT_ERR(cursor->search(cursor));
 	WT_ERR(cursor->get_value(cursor, &tconfig));
 
-	WT_ERR(__wt_calloc_def(session, 1, &table));
+	WT_ERR(__wt_calloc_one(session, &table));
 	table->name = tablename;
 	tablename = NULL;
 	table->name_hash = __wt_hash_city64(name, namelen);
diff --git a/src/session/session_api.c b/src/session/session_api.c
index dc3c7d7041f..8f460dcc29f 100644
--- a/src/session/session_api.c
+++ b/src/session/session_api.c
@@ -953,6 +953,7 @@ __wt_open_session(WT_CONNECTION_IMPL *conn,
 {
 	static const WT_SESSION stds = {
 		NULL,
+		NULL,
 		__session_close,
 		__session_reconfigure,
 		__session_open_cursor,
diff --git a/src/session/session_dhandle.c b/src/session/session_dhandle.c
index e28e277d5f6..85483c7c8ae 100644
--- a/src/session/session_dhandle.c
+++ b/src/session/session_dhandle.c
@@ -20,7 +20,7 @@ __session_add_dhandle(
 	WT_DATA_HANDLE_CACHE *dhandle_cache;
 	uint64_t bucket;
 
-	WT_RET(__wt_calloc_def(session, 1, &dhandle_cache));
+	WT_RET(__wt_calloc_one(session, &dhandle_cache));
 	dhandle_cache->dhandle = session->dhandle;
 
 	bucket = dhandle_cache->dhandle->name_hash % WT_HASH_ARRAY_SIZE;
diff --git a/src/support/huffman.c b/src/support/huffman.c
index 5a06b72d33e..9625e879381 100644
--- a/src/support/huffman.c
+++ b/src/support/huffman.c
@@ -306,7 +306,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session,
 	combined_nodes = leaves = NULL;
 	node = node2 = tempnode = NULL;
 
-	WT_RET(__wt_calloc_def(session, 1, &huffman));
+	WT_RET(__wt_calloc_one(session, &huffman));
 
 	/*
 	 * The frequency table is 4B pairs of symbol and frequency.  The symbol
@@ -381,8 +381,8 @@ __wt_huffman_open(WT_SESSION_IMPL *session,
 	    symcnt, sizeof(INDEXED_SYMBOL), indexed_freq_compare);
 
 	/* We need two node queues to build the tree. */
-	WT_ERR(__wt_calloc_def(session, 1, &leaves));
-	WT_ERR(__wt_calloc_def(session, 1, &combined_nodes));
+	WT_ERR(__wt_calloc_one(session, &leaves));
+	WT_ERR(__wt_calloc_one(session, &combined_nodes));
 
 	/*
 	 * Adding the leaves to the queue.
@@ -393,7 +393,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session,
 	 */
 	for (i = 0; i < symcnt; ++i)
 		if (indexed_freqs[i].frequency > 0) {
-			WT_ERR(__wt_calloc_def(session, 1, &tempnode));
+			WT_ERR(__wt_calloc_one(session, &tempnode));
 			tempnode->symbol = (uint8_t)indexed_freqs[i].symbol;
 			tempnode->weight = indexed_freqs[i].frequency;
 			WT_ERR(node_queue_enqueue(session, leaves, tempnode));
@@ -431,7 +431,7 @@ __wt_huffman_open(WT_SESSION_IMPL *session,
 		 * In every second run, we have both node and node2 initialized.
 		 */
 		if (node != NULL && node2 != NULL) {
-			WT_ERR(__wt_calloc_def(session, 1, &tempnode));
+			WT_ERR(__wt_calloc_one(session, &tempnode));
 
 			/* The new weight is the sum of the two weights. */
 			tempnode->weight = node->weight + node2->weight;
@@ -845,7 +845,7 @@ node_queue_enqueue(
 	NODE_QUEUE_ELEM *elem;
 
 	/* Allocating a new linked list element */
-	WT_RET(__wt_calloc_def(session, 1, &elem));
+	WT_RET(__wt_calloc_one(session, &elem));
 
 	/* It holds the tree node, and has no next element yet */
 	elem->node = node;
diff --git a/src/support/scratch.c b/src/support/scratch.c
index ca2cdac8377..e4df04a36ed 100644
--- a/src/support/scratch.c
+++ b/src/support/scratch.c
@@ -216,7 +216,7 @@ __wt_scr_alloc_func(WT_SESSION_IMPL *session, size_t size, WT_ITEM **scratchp
 		WT_ASSERT(session, slot != NULL);
 		best = slot;
 
-		WT_ERR(__wt_calloc_def(session, 1, best));
+		WT_ERR(__wt_calloc_one(session, best));
 
 		/* Scratch buffers must be aligned. */
 		F_SET(*best, WT_ITEM_ALIGNED);
diff --git a/src/support/stat.c b/src/support/stat.c
index 21d56238f4a..19aa9170c5b 100644
--- a/src/support/stat.c
+++ b/src/support/stat.c
@@ -30,11 +30,11 @@ __wt_stat_init_dsrc_stats(WT_DSRC_STATS *stats)
 	stats->btree_column_variable.desc =
 	    "btree: column-store variable-size leaf pages";
 	stats->btree_fixed_len.desc = "btree: fixed-record size";
-	stats->btree_maxintlitem.desc =
-	    "btree: maximum internal page item size";
+	stats->btree_maxintlkey.desc = "btree: maximum internal page key size";
 	stats->btree_maxintlpage.desc = "btree: maximum internal page size";
-	stats->btree_maxleafitem.desc = "btree: maximum leaf page item size";
+	stats->btree_maxleafkey.desc = "btree: maximum leaf page key size";
 	stats->btree_maxleafpage.desc = "btree: maximum leaf page size";
+	stats->btree_maxleafvalue.desc = "btree: maximum leaf page value size";
 	stats->btree_maximum_depth.desc = "btree: maximum tree depth";
 	stats->btree_entries.desc = "btree: number of key/value pairs";
 	stats->btree_overflow.desc = "btree: overflow pages";
@@ -154,10 +154,11 @@ __wt_stat_refresh_dsrc_stats(void *stats_arg)
 	stats->btree_column_deleted.v = 0;
 	stats->btree_column_variable.v = 0;
 	stats->btree_fixed_len.v = 0;
-	stats->btree_maxintlitem.v = 0;
+	stats->btree_maxintlkey.v = 0;
 	stats->btree_maxintlpage.v = 0;
-	stats->btree_maxleafitem.v = 0;
+	stats->btree_maxleafkey.v = 0;
 	stats->btree_maxleafpage.v = 0;
+	stats->btree_maxleafvalue.v = 0;
 	stats->btree_maximum_depth.v = 0;
 	stats->btree_entries.v = 0;
 	stats->btree_overflow.v = 0;
diff --git a/test/format/format.h b/test/format/format.h
index 8020d18d716..902cea6cc5d 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -89,6 +89,8 @@ extern WT_EXTENSION_API *wt_api;
 
 #undef	M
 #define	M(v)		((v) * 1000000)		/* Million */
+#undef	KILOBYTE
+#define	KILOBYTE(v)	((v) * 1024)
 #undef	MEGABYTE
 #define	MEGABYTE(v)	((v) * 1048576)
 #undef	GIGABYTE
diff --git a/test/format/ops.c b/test/format/ops.c
index 28f1079b30d..bbaeabcc479 100644
--- a/test/format/ops.c
+++ b/test/format/ops.c
@@ -52,7 +52,7 @@ wts_ops(void)
 	WT_SESSION *session;
 	pthread_t backup_tid, compact_tid;
 	uint64_t thread_ops;
-	uint32_t i, tenths;
+	uint32_t i, fourths;
 	int ret, running;
 
 	conn = g.wts_conn;
@@ -72,16 +72,16 @@ wts_ops(void)
 	 * There are two mechanisms to specify the length of the run, a number
 	 * of operations or a timer.  If the former, each thread does an equal
 	 * share of the total operations (and make sure that it's not 0).  If
-	 * the latter, calculate how many tenth-of-a-second sleeps until this
+	 * the latter, calculate how many fourth-of-a-second sleeps until this
 	 * part of the run finishes.
 	 */
 	if (g.c_timer == 0) {
-		tenths = 0;
+		fourths = 0;
 		if (g.c_ops < g.c_threads)
 			g.c_ops = g.c_threads;
 		thread_ops = g.c_ops / g.c_threads;
 	} else {
-		tenths = (g.c_timer * 10 * 60) / FORMAT_OPERATION_REPS;
+		fourths = (g.c_timer * 4 * 60) / FORMAT_OPERATION_REPS;
 		thread_ops = 0;
 	}
 
@@ -141,7 +141,7 @@ wts_ops(void)
 
 			/* Tell the thread if it's done. */
 			if (thread_ops == 0) {
-				if (tenths == 0)
+				if (fourths == 0)
 					tinfo[i].quit = 1;
 			} else
 				if (tinfo[i].ops >= thread_ops)
@@ -151,8 +151,8 @@ wts_ops(void)
 		if (!running)
 			break;
 		(void)usleep(250000);		/* 1/4th of a second */
-		if (tenths != 0)
-			--tenths;
+		if (fourths != 0)
+			--fourths;
 	}
 	free(tinfo);
 
diff --git a/test/format/util.c b/test/format/util.c
index b043475842e..4880dfbbdd0 100644
--- a/test/format/util.c
+++ b/test/format/util.c
@@ -27,17 +27,24 @@
 
 #include "format.h"
 
+#ifndef MAX
+#define	MAX(a, b)	(((a) > (b)) ? (a) : (b))
+#endif
+
 static inline uint32_t
 kv_len(uint64_t keyno, uint32_t min, uint32_t max)
 {
 	/*
-	 * We want to focus on relatively small key/value items, but admitting
-	 * the possibility of larger items.  Pick a size close to the minimum
-	 * most of the time, only roll the dice for a really big item 1 in 20
-	 * times.  (The configuration can force large key/value minimum sizes,
-	 * where every key/value item will be an overflow.)
+	 * Focus on relatively small key/value items, admitting the possibility
+	 * of larger items.  Pick a size close to the minimum most of the time,
+	 * only create a larger item 1 in 20 times, and a really big item 1 in
+	 * 1000 times. (Configuration can force large key/value minimum sizes,
+	 * where every key/value item is an overflow.)
 	 */
-	if (keyno % 20 != 0 && max > min + 20)
+	if (keyno % 1000 == 0 && max < KILOBYTE(80)) {
+		min = KILOBYTE(80);
+		max = KILOBYTE(100);
+	} else if (keyno % 20 != 0 && max > min + 20)
 		max = min + 20;
 	return (MMRAND(min, max));
 }
@@ -65,13 +72,14 @@ void
 key_gen_setup(uint8_t **keyp)
 {
 	uint8_t *key;
-	size_t i;
+	size_t i, len;
 
 	*keyp = NULL;
 
-	if ((key = malloc(g.c_key_max)) == NULL)
+	len = MAX(KILOBYTE(100), g.c_key_max);
+	if ((key = malloc(len)) == NULL)
 		die(errno, "malloc");
-	for (i = 0; i < g.c_key_max; ++i)
+	for (i = 0; i < len; ++i)
 		key[i] = (uint8_t)("abcdefghijklmnopqrstuvwxyz"[i % 26]);
 	*keyp = key;
 }
@@ -118,7 +126,7 @@ val_gen_setup(uint8_t **valp)
 	 * into the buffer by a few extra bytes, used to generate different
 	 * data for column-store run-length encoded files.
 	 */
-	len = g.c_value_max + 20;
+	len = MAX(KILOBYTE(100), g.c_value_max) + 20;
 	if ((val = malloc(len)) == NULL)
 		die(errno, "malloc");
 	for (i = 0; i < len; ++i)
diff --git a/test/format/wts.c b/test/format/wts.c
index 29b40eda74d..21e7806e982 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -173,12 +173,15 @@ wts_create(void)
 {
 	WT_CONNECTION *conn;
 	WT_SESSION *session;
-	uint32_t maxintlpage, maxintlitem, maxleafpage, maxleafitem;
+	uint32_t maxintlpage, maxintlkey, maxleafpage, maxleafkey, maxleafvalue;
 	int ret;
 	char config[4096], *end, *p;
 
 	conn = g.wts_conn;
 
+	p = config;
+	end = config + sizeof(config);
+
 	/*
 	 * Ensure that we can service at least one operation per-thread
 	 * concurrently without filling the cache with pinned pages. We
@@ -197,23 +200,30 @@ wts_create(void)
 		if (maxleafpage > 512)
 			maxleafpage >>= 1;
 	}
-	maxintlitem = MMRAND(maxintlpage / 50, maxintlpage / 40);
-	if (maxintlitem < 40)
-		maxintlitem = 40;
-	maxleafitem = MMRAND(maxleafpage / 50, maxleafpage / 40);
-	if (maxleafitem < 40)
-		maxleafitem = 40;
-
-	p = config;
-	end = config + sizeof(config);
 	p += snprintf(p, (size_t)(end - p),
 	    "key_format=%s,"
 	    "allocation_size=512,%s"
-	    "internal_page_max=%d,internal_item_max=%d,"
-	    "leaf_page_max=%d,leaf_item_max=%d",
+	    "internal_page_max=%d,leaf_page_max=%d",
 	    (g.type == ROW) ? "u" : "r",
 	    g.c_firstfit ? "block_allocation=first," : "",
-	    maxintlpage, maxintlitem, maxleafpage, maxleafitem);
+	    maxintlpage, maxleafpage);
+
+	/*
+	 * Configure the maximum key/value sizes, but leave it as the default
+	 * if we come up with something crazy.
+	 */
+	maxintlkey = MMRAND(maxintlpage / 50, maxintlpage / 40);
+	if (maxintlkey > 20)
+		p += snprintf(p, (size_t)(end - p),
+		    ",internal_key_max=%d", maxintlkey);
+	maxleafkey = MMRAND(maxleafpage / 50, maxleafpage / 40);
+	if (maxleafkey > 20)
+		p += snprintf(p, (size_t)(end - p),
+		    ",leaf_key_max=%d", maxleafkey);
+	maxleafvalue = MMRAND(maxleafpage * 10, maxleafpage / 40);
+	if (maxleafvalue > 40 && maxleafvalue < 100 * 1024)
+		p += snprintf(p, (size_t)(end - p),
+		    ",leaf_value_max=%d", maxleafvalue);
 
 	switch (g.type) {
 	case FIX:
diff --git a/test/suite/run.py b/test/suite/run.py
index 32dc8835d4b..a29f7af2212 100644
--- a/test/suite/run.py
+++ b/test/suite/run.py
@@ -82,6 +82,7 @@ Options:\n\
   -g      | --gdb                all subprocesses (like calls to wt) use gdb\n\
   -h      | --help               show this message\n\
   -j N    | --parallel N         run all tests in parallel using N processes\n\
+  -l      | --long               run the entire test suite\n\
   -p      | --preserve           preserve output files in WT_TEST/<testname>\n\
   -t      | --timestamp          name WT_TEST according to timestamp\n\
   -v N    | --verbose N          set verboseness to N (0<=N<=3, default=1)\n\
@@ -219,7 +220,7 @@ if __name__ == '__main__':
     tests = unittest.TestSuite()
 
     # Turn numbers and ranges into test module names
-    preserve = timestamp = debug = gdbSub = False
+    preserve = timestamp = debug = gdbSub = longtest = False
     parallel = 0
     configfile = None
     configwrite = False
@@ -243,6 +244,15 @@ if __name__ == '__main__':
             if option == '-debug' or option == 'd':
                 debug = True
                 continue
+            if option == '-gdb' or option == 'g':
+                gdbSub = True
+                continue
+            if option == '-help' or option == 'h':
+                usage()
+                sys.exit(True)
+            if option == '-long' or option == 'l':
+                longtest = True
+                continue
             if option == '-parallel' or option == 'j':
                 if parallel != 0 or len(args) == 0:
                     usage()
@@ -255,12 +265,6 @@ if __name__ == '__main__':
             if option == '-timestamp' or option == 't':
                 timestamp = True
                 continue
-            if option == '-gdb' or option == 'g':
-                gdbSub = True
-                continue
-            if option == '-help' or option == 'h':
-                usage()
-                sys.exit(True)
             if option == '-verbose' or option == 'v':
                 if len(args) == 0:
                     usage()
@@ -292,7 +296,7 @@ if __name__ == '__main__':
     # All global variables should be set before any test classes are loaded.
     # That way, verbose printing can be done at the class definition level.
     wttest.WiredTigerTestCase.globalSetup(preserve, timestamp, gdbSub,
-                                          verbose, dirarg)
+                                          verbose, dirarg, longtest)
 
     # Without any tests listed as arguments, do discovery
     if len(testargs) == 0:
diff --git a/test/suite/test_lsm02.py b/test/suite/test_lsm02.py
index 2b3d48f8f30..41d82d8ad0d 100644
--- a/test/suite/test_lsm02.py
+++ b/test/suite/test_lsm02.py
@@ -54,9 +54,12 @@ class test_lsm02(wttest.WiredTigerTestCase):
         v = '\x14\x14'
         self.add_key(self.uri, 'k1', v)
         self.verify_key_exists(self.uri, 'k1', v)
-        v += 'a' * 1000
+        v = '\x14\x14\0\0\0\0\0\0'
         self.add_key(self.uri, 'k2', v)
         self.verify_key_exists(self.uri, 'k2', v)
+        v += 'a' * 1000
+        self.add_key(self.uri, 'k3', v)
+        self.verify_key_exists(self.uri, 'k3', v)
 
     def test_lsm_rename01(self):
         self.session.create(self.uri, 'key_format=S,value_format=S')
diff --git a/test/suite/test_sweep01.py b/test/suite/test_sweep01.py
index 8d561763091..d2ebb796d28 100644
--- a/test/suite/test_sweep01.py
+++ b/test/suite/test_sweep01.py
@@ -33,7 +33,7 @@
 import fnmatch, os, shutil, run, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import wiredtiger_open, stat
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
 import wttest
 
 class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
@@ -55,7 +55,7 @@ class test_sweep01(wttest.WiredTigerTestCase, suite_subprocess):
                     create_params = 'key_format=r,value_format=8t')),
     ]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types, ckpt_list))
+    scenarios = number_scenarios(prune_scenarios(multiply_scenarios('.', types, ckpt_list), 1, 100))
 
     # Overrides WiredTigerTestCase
     def setUpConnectionOpen(self, dir):
diff --git a/test/suite/test_txn02.py b/test/suite/test_txn02.py
index 32165c380a9..d83bf6ce5f8 100644
--- a/test/suite/test_txn02.py
+++ b/test/suite/test_txn02.py
@@ -32,7 +32,7 @@
 import fnmatch, os, shutil, time
 from suite_subprocess import suite_subprocess
 from wiredtiger import wiredtiger_open
-from wtscenario import multiply_scenarios, number_scenarios
+from wtscenario import multiply_scenarios, number_scenarios, prune_scenarios
 import wttest
 
 class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
@@ -81,8 +81,19 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
     txn3s = [('t3c', dict(txn3='commit')), ('t3r', dict(txn3='rollback'))]
     txn4s = [('t4c', dict(txn4='commit')), ('t4r', dict(txn4='rollback'))]
 
-    scenarios = number_scenarios(multiply_scenarios('.', types,
-            op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s))
+    all_scenarios = multiply_scenarios('.', types,
+        op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)
+
+    # This test generates thousands of potential scenarios.
+    # For default runs, we'll use a small subset of them, for
+    # long runs (when --long is set) we'll set a much larger limit.
+    scenarios = number_scenarios(prune_scenarios(all_scenarios, 20, 5000))
+
+    # Each check_log() call takes a second, so we don't call it for
+    # every scenario, we'll limit it to the value of checklog_calls.
+    checklog_calls = 100 if wttest.islongtest() else 2
+    checklog_mod = (len(scenarios) / checklog_calls + 1)
+
     # scenarios = number_scenarios(multiply_scenarios('.', types,
     # op1s, txn1s, op2s, txn2s, op3s, txn3s, op4s, txn4s)) [:3]
     # Overrides WiredTigerTestCase
@@ -253,10 +264,8 @@ class test_txn02(wttest.WiredTigerTestCase, suite_subprocess):
             # Check the state after each commit/rollback.
             self.check_all(current, committed)
 
-        # Check the log state after the entire op completes and run recovery.
-        # check_log() takes over a second to run, so we don't want to run it
-        # for all scenarios, rather, we run it about 100 times overall.
-        if self.scenario_number % (len(test_txn02.scenarios) / 100 + 1) == 0:
+        # check_log() is slow, we don't run it on every scenario.
+        if self.scenario_number % test_txn02.checklog_mod == 0:
             self.check_log(committed)
 
 if __name__ == '__main__':
diff --git a/test/suite/wtscenario.py b/test/suite/wtscenario.py
index a8fd4031ceb..70497102bb0 100644
--- a/test/suite/wtscenario.py
+++ b/test/suite/wtscenario.py
@@ -102,17 +102,29 @@ def prune_sorter_key(scene):
         p = scene[1]['P']
     return p * scene[1]['_rand']
 
-def prune_scenarios(scenes, count = -1):
+def prune_resort_key(scene):
+    """
+    Used by prune_scenerios to extract the original ordering key for sorting.
+    """
+    return scene[1]['_order']
+
+def set_long_run(islong):
+    global _is_long_run
+    _is_long_run = islong
+
+def prune_scenarios(scenes, default_count = -1, long_count = -1):
     """
     Use listed probabilities for pruning the list of scenarios.
     That is, the highest probability (value of P in the scendario)
-    are chosen more often.  With a second argument, only the
-    given number of scenarios are returned.  With no second argument,
-    only scenarios with P > .5 are returned half the time, etc.
+    are chosen more often.  With just one argument, only scenarios
+    with P > .5 are returned half the time, etc. A second argument
+    limits the number of scenarios. When a third argument is present,
+    it is a separate limit for a long run.
     """
+    global _is_long_run
     r = suite_random.suite_random()
     result = []
-    if count == -1:
+    if default_count == -1:
         # Missing second arg - return those with P == .3 at
         # 30% probability, for example.
         for scene in scenes:
@@ -123,25 +135,41 @@ def prune_scenarios(scenes, count = -1):
             result.append(scene)
         return result
     else:
-        # With second arg, we want exactly 'count' items
-        # returned.  So we'll sort them all and choose
+        # With at least a second arg present, we'll want a specific count
+        # of items returned.  So we'll sort them all and choose
         # the top number.  Not the most efficient solution,
         # but it's easy.
+        if _is_long_run and long_count != -1:
+            count = long_count
+        else:
+            count = default_count
+
+        l = len(scenes)
+        if l <= count:
+            return scenes
+        if count == 0:
+            return []
+        order = 0
         for scene in scenes:
             scene[1]['_rand'] = r.rand_float()
-        scenes = sorted(scenes, key=prune_sorter_key)
+            scene[1]['_order'] = order
+            order += 1
+        scenes = sorted(scenes, key=prune_sorter_key) # random sort driven by P
+        scenes = scenes[l-count:l]                    # truncate to get best
+        scenes = sorted(scenes, key=prune_resort_key) # original order
         for scene in scenes:
             del scene[1]['_rand']
-        l = len(scenes)
-        return scenes[l-count:l]
+            del scene[1]['_order']
+        return scenes
 
 def number_scenarios(scenes):
     """
-    Add a 'scenario_number' variable to each scenario.
+    Add a 'scenario_number' and 'scenario_name' variable to each scenario.
     The hash table for each scenario is altered!
     """
     count = 0
     for scene in scenes:
+        scene[1]['scenario_name'] = scene[0]
         scene[1]['scenario_number'] = count
         count += 1
     return scenes
diff --git a/test/suite/wttest.py b/test/suite/wttest.py
index d1705434988..4de09a143b2 100644
--- a/test/suite/wttest.py
+++ b/test/suite/wttest.py
@@ -37,7 +37,7 @@ except ImportError:
 
 from contextlib import contextmanager
 import os, re, shutil, sys, time, traceback
-
+import wtscenario
 import wiredtiger
 
 def shortenWithEllipsis(s, maxlen):
@@ -141,17 +141,20 @@ class WiredTigerTestCase(unittest.TestCase):
 
     @staticmethod
     def globalSetup(preserveFiles = False, useTimestamp = False,
-                    gdbSub = False, verbose = 1, dirarg = None):
+                    gdbSub = False, verbose = 1, dirarg = None,
+                    longtest = False):
         WiredTigerTestCase._preserveFiles = preserveFiles
         d = 'WT_TEST' if dirarg == None else dirarg
         if useTimestamp:
             d += '.' + time.strftime('%Y%m%d-%H%M%S', time.localtime())
         shutil.rmtree(d, ignore_errors=True)
         os.makedirs(d)
+        wtscenario.set_long_run(longtest)
         WiredTigerTestCase._parentTestdir = d
         WiredTigerTestCase._origcwd = os.getcwd()
         WiredTigerTestCase._resultfile = open(os.path.join(d, 'results.txt'), "w", 0)  # unbuffered
         WiredTigerTestCase._gdbSubprocess = gdbSub
+        WiredTigerTestCase._longtest = longtest
         WiredTigerTestCase._verbose = verbose
         WiredTigerTestCase._dupout = os.dup(sys.stdout.fileno())
         WiredTigerTestCase._stdout = sys.stdout
@@ -182,8 +185,9 @@ class WiredTigerTestCase(unittest.TestCase):
         # is used, then each scenario is given a number, which can
         # help distinguish tests.
         scen = ''
-        if hasattr(self, 'scenario_number'):
-            scen = '(scenario ' + str(self.scenario_number) + ')'
+        if hasattr(self, 'scenario_number') and hasattr(self, 'scenario_name'):
+            scen = '(scenario ' + str(self.scenario_number) + \
+                   ': ' + self.scenario_name + ')'
         return self.simpleName() + scen
 
     def simpleName(self):
@@ -283,7 +287,7 @@ class WiredTigerTestCase(unittest.TestCase):
             self.pr('preserving directory ' + self.testdir)
 
         if not passed and not skipped:
-            print "ERROR in " + self.testsubdir
+            print "ERROR in " + str(self)
             self.pr('FAIL')
             self.prexception(excinfo)
             self.pr('preserving directory ' + self.testdir)
@@ -431,6 +435,23 @@ class WiredTigerTestCase(unittest.TestCase):
     def className(self):
         return self.__class__.__name__
 
+
+def longtest(description):
+    """
+    Used as a function decorator, for example, @wttest.longtest("description").
+    The decorator indicates that this test function should only be included
+    when running the test suite with the --long option.
+    """
+    def runit_decorator(func):
+        return func
+    if not WiredTigerTestCase._longtest:
+        return unittest.skip(description + ' (enable with --long)')
+    else:
+        return runit_decorator
+
+def islongtest():
+    return WiredTigerTestCase._longtest
+
 def runsuite(suite, parallel):
     suite_to_run = suite
     if parallel > 1:
diff --git a/tools/stat_data.py b/tools/stat_data.py
index 50528dbd26a..3d192be7566 100644
--- a/tools/stat_data.py
+++ b/tools/stat_data.py
@@ -34,10 +34,11 @@ no_scale_per_second_list = [
     'btree: column-store variable-size deleted values',
     'btree: column-store variable-size leaf pages',
     'btree: fixed-record size',
-    'btree: maximum internal page item size',
+    'btree: maximum internal page key size',
     'btree: maximum internal page size',
-    'btree: maximum leaf page item size',
+    'btree: maximum leaf page key size',
     'btree: maximum leaf page size',
+    'btree: maximum leaf page value size',
     'btree: maximum tree depth',
     'btree: number of key/value pairs',
     'btree: overflow pages',
author	Keith Bostic <keith@wiredtiger.com>	2014-12-15 09:35:54 -0500
committer	Keith Bostic <keith@wiredtiger.com>	2014-12-15 09:35:54 -0500
commit	980165614f114dbcf02344ba7209ae77369bcb80 (patch)
tree	952a89a49aa758ec177ed9ce491524d0c1c79c1f
parent	4c26d2324bae1d7030b0142d50dbd2ccf11ddeb6 (diff)
parent	5cf21acf8fd66876e71334cc09deac0a09e8ea91 (diff)
download	mongo-980165614f114dbcf02344ba7209ae77369bcb80.tar.gz