Merge branch 'master' of https://github.com/wiredtiger/wiredtiger

author: Don Anderson <dda@ddanderson.com> 2012-03-15 14:29:22 -0400
committer: Don Anderson <dda@ddanderson.com> 2012-03-15 14:29:22 -0400
commit: b9b00694d3b7e4fcaf59fdf8e5196bcd0040b581 (patch)
tree: 0e43cb64a2d788cbec61843f1d4a739081ee1344
parent: c0f8a06d6a77ea64f24b28b59c62c866c9982ae0 (diff)
parent: 9e67b397abc3f7931aa708327011c0acf8e61f60 (diff)
download: mongo-b9b00694d3b7e4fcaf59fdf8e5196bcd0040b581.tar.gz
18 files changed, 916 insertions, 895 deletions
diff --git a/dist/s_copyright b/dist/s_copyright
index 05ea9a4e6be..afca1458c46 100644
--- a/dist/s_copyright
+++ b/dist/s_copyright
@@ -72,8 +72,8 @@ l="LICENSE COPYING"
 
 # Search for files, ignoring test/3rdparty.
 for i in `cd .. &&
-    find bench build_posix dist docs lang src test \
-        -name '*.[chi]' -o -name '*.cxx' -o -name '*.java' -o -name '*.py' |
+    find [a-z]* -name '*.[chi]' \
+        -o -name '*.cxx' -o -name '*.java' -o -name '*.py' |
     sed -e '/test\/3rdparty\//d' -e 's/^\.\///'`; do
 	check $i
 done
diff --git a/src/btree/bt_curprev.c b/src/btree/bt_curprev.c
index 865fcf2ee44..39d0aa8aed3 100644
--- a/src/btree/bt_curprev.c
+++ b/src/btree/bt_curprev.c
@@ -16,12 +16,14 @@
  * search item's next array).
  *
  * Helper macros to go from a stack pointer at level i, pointing into a next
- * array, to insert node containing that next array.
+ * array, back to the insert node containing that next array.
  */
+#undef	PREV_ITEM
 #define	PREV_ITEM(ins_head, insp, i)					\
 	(((insp) == &(ins_head)->head[i] || (insp) == NULL) ? NULL :	\
 	    (WT_INSERT *)((char *)((insp) - (i)) - offsetof(WT_INSERT, next)))
 
+#undef	PREV_INS
 #define	PREV_INS(cbt, i)						\
 	PREV_ITEM((cbt)->ins_head, (cbt)->ins_stack[(i)], (i))
 
@@ -39,11 +41,12 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt)
 
 	session = (WT_SESSION_IMPL *)cbt->iface.session;
 
+restart:
 	/*
 	 * If the search stack does not point at the current item, fill it in
 	 * with a search.
 	 */
-	if ((current = cbt->ins) != PREV_INS(cbt, 0)) {
+	while ((current = cbt->ins) != PREV_INS(cbt, 0)) {
 		if (cbt->btree->type == BTREE_ROW) {
 			key.data = WT_INSERT_KEY(current);
 			key.size = WT_INSERT_KEY_SIZE(current);
@@ -52,10 +55,6 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt)
 		} else
 			cbt->ins = __col_insert_search(cbt->ins_head,
 			    cbt->ins_stack, WT_INSERT_RECNO(current));
-
-		/* Check that we found the expected item. */
-		WT_ASSERT(session, cbt->ins == current);
-		WT_ASSERT(session, PREV_INS(cbt, 0) == current);
 	}
 
 	/*
@@ -91,7 +90,14 @@ __cursor_skip_prev(WT_CURSOR_BTREE *cbt)
 
 	/* Walk any remaining levels until just before the current node. */
 	while (i >= 0) {
-		WT_ASSERT(session, ins != NULL);
+		/*
+		 * If we get to the end of a list without finding the current
+		 * item, we must have raced with an insert.  Restart the search.
+		 */
+		if (ins == NULL) {
+			cbt->ins_stack[0] = NULL;
+			goto restart;
+		}
 		if (ins->next[i] != current)		/* Stay at this level */
 			ins = ins->next[i];
 		else {					/* Drop down a level */
diff --git a/src/btree/bt_evict.c b/src/btree/bt_evict.c
index b6970cd3202..fe2bd27da2f 100644
--- a/src/btree/bt_evict.c
+++ b/src/btree/bt_evict.c
@@ -22,9 +22,9 @@ static int  __evict_worker(WT_SESSION_IMPL *);
  * Tuning constants: I hesitate to call this tuning, but we want to review some
  * number of pages from each file's in-memory tree for each page we evict.
  */
-#define	WT_EVICT_GROUP		10	/* Evict N pages at a time */
-#define	WT_EVICT_WALK_PER_TABLE	20	/* Pages to visit per file */
-#define	WT_EVICT_WALK_BASE	100	/* Pages tracked across file visits */
+#define	WT_EVICT_GROUP		20	/* Evict N pages at a time */
+#define	WT_EVICT_WALK_PER_TABLE	25	/* Pages to visit per file */
+#define	WT_EVICT_WALK_BASE	50	/* Pages tracked across file visits */
 
 /*
  * WT_EVICT_REQ_FOREACH --
@@ -198,6 +198,7 @@ __wt_evict_page_request(WT_SESSION_IMPL *session, WT_PAGE *page)
 	 * thread will see this later.
 	 */
 	WT_VERBOSE(session, evictserver, "eviction server request table full");
+	page->ref->state = WT_REF_MEM;
 	return (WT_RESTART);
 }
 
@@ -230,7 +231,7 @@ __wt_cache_evict_server(void *arg)
 		 * whether there is work to do.  If so, evict_cond will
 		 * be signalled and the wait below won't block.
 		 */
-		__wt_eviction_check(session, NULL);
+		__wt_eviction_check(session, NULL, 1);
 
 		WT_VERBOSE(session, evictserver, "sleeping");
 		__wt_cond_wait(session, cache->evict_cond);
@@ -353,16 +354,6 @@ __evict_request_walk(WT_SESSION_IMPL *session)
 		memset(cache->evict, 0, cache->evict_allocated);
 
 		if (F_ISSET(er, WT_EVICT_REQ_PAGE)) {
-			/*
-			 * If we are pushing out a page, that page might be our
-			 * eviction location.  If so, try to move on to the
-			 * next page, or restart the walk if that fails
-			 * (evict_page will be set to NULL).
-			 */
-			if (session->btree->evict_page == er->page)
-				(void)__wt_tree_np(
-				    session, &session->btree->evict_page, 1, 1);
-
 			ref = er->page->ref;
 			WT_ASSERT(session, ref->page == er->page);
 			WT_ASSERT(session, ref->state == WT_REF_EVICTING);
@@ -379,16 +370,13 @@ __evict_request_walk(WT_SESSION_IMPL *session)
 			__wt_yield();
 
 			/*
-			 * If eviction fails, free up the page and hope it
+			 * If eviction fails, it will free up the page: hope it
 			 * works next time.  Application threads may be holding
 			 * a reference while trying to get another (e.g., if
 			 * they have two cursors open), so blocking
 			 * indefinitely leads to deadlock.
 			 */
-			if ((ret = __wt_rec_evict(session, er->page, 0)) != 0) {
-				WT_ASSERT(session, ref->page == er->page);
-				ref->state = WT_REF_MEM;
-			}
+			ret = __wt_rec_evict(session, er->page, 0);
 		} else {
 			/*
 			 * If we're about to do a walk of the file tree (and
@@ -570,7 +558,7 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
 	WT_BTREE *btree;
 	WT_CACHE *cache;
 	WT_PAGE *page;
-	int i, restarted_once;
+	int i, restarts, ret;
 
 	btree = session->btree;
 	cache = S2C(session)->cache;
@@ -588,36 +576,29 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
 	 * We can't evict the page just returned to us, it marks our place in
 	 * the tree.  So, always stay one page ahead of the page being returned.
 	 */
-	i = restarted_once = 0;
-	do {
-		if ((page = btree->evict_page) == NULL)
-			goto skip;
-
-		/*
-		 * Root and pinned pages can't be evicted.
-		 * !!!
-		 * It's still in flux if root pages are pinned or not, test for
-		 * both cases for now.
-		 */
-		if (WT_PAGE_IS_ROOT(page))
-			goto skip;
-
-		/*
-		 * Skip locked pages: we would skip them later, and they just
-		 * fill up the eviction list for no benefit.
-		 */
-		if (page->ref->state != WT_REF_MEM)
-			goto skip;
+	for (i = restarts = ret = 0;
+	    i < WT_EVICT_WALK_PER_TABLE && restarts <= 1 && ret == 0;
+	    ret = __wt_tree_np(session, &btree->evict_page, 1, 1)) {
+		if ((page = btree->evict_page) == NULL) {
+			++restarts;
+			continue;
+		}
 
 		/*
+		 * Root and pinned pages can't be evicted, nor can locked
+		 * pages: we would skip them later, and they just fill up the
+		 * eviction list for no benefit.
+		 *
 		 * Skip pages that must be merged into their parents.  Don't
 		 * skip pages marked WT_PAGE_REC_EMPTY or SPLIT: updates after
 		 * their last reconciliation may have changed their state and
 		 * only the eviction code can check whether they should really
 		 * be skipped.
 		 */
-		if (F_ISSET(page, WT_PAGE_REC_SPLIT_MERGE))
-			goto skip;
+		if (WT_PAGE_IS_ROOT(page) ||
+		    page->ref->state != WT_REF_MEM ||
+		    F_ISSET(page, WT_PAGE_REC_SPLIT_MERGE))
+			continue;
 
 		WT_VERBOSE(session, evictserver,
 		    "select: %p, size %" PRIu32, page, page->memory_footprint);
@@ -626,13 +607,9 @@ __evict_walk_file(WT_SESSION_IMPL *session, u_int *slotp)
 		cache->evict[*slotp].page = page;
 		cache->evict[*slotp].btree = btree;
 		++*slotp;
+	}
 
-skip:		WT_RET(__wt_tree_np(session, &btree->evict_page, 1, 1));
-		if (btree->evict_page == NULL && restarted_once++ == 1)
-			break;
-	} while (i < WT_EVICT_WALK_PER_TABLE);
-
-	return (0);
+	return (ret);
 }
 
 /*
@@ -664,7 +641,7 @@ __evict_dup_remove(WT_SESSION_IMPL *session)
 	 */
 	evict = cache->evict;
 	elem = cache->evict_entries;
-	qsort(evict, (size_t)elem, sizeof(WT_EVICT_LIST), __evict_page_cmp);
+	qsort(evict, elem, sizeof(WT_EVICT_LIST), __evict_page_cmp);
 	for (i = 0; i < elem; i = j) {
 		/*
 		 * Once we hit a NULL, we're done, the NULLs all sorted to the
@@ -673,17 +650,15 @@ __evict_dup_remove(WT_SESSION_IMPL *session)
 		if (evict[i].page == NULL)
 			break;
 
-		for (j = i + 1; j < elem; ++j) {
-			/* Delete the second and any subsequent duplicates. */
-			if (evict[i].page == evict[j].page)
-				__evict_clr(&evict[j]);
-			else
-				break;
-		}
+		/* Delete any subsequent duplicates. */
+		for (j = i + 1;
+		    j < elem && evict[j].page == evict[i].page;
+		    ++j)
+			__evict_clr(&evict[j]);
 	}
 
 	/* Sort the array by LRU, then evict the most promising candidates. */
-	qsort(cache->evict, elem, sizeof(WT_EVICT_LIST), __evict_lru_cmp);
+	qsort(evict, i, sizeof(WT_EVICT_LIST), __evict_lru_cmp);
 }
 
 /*
@@ -735,15 +710,6 @@ __evict_get_page(
 		*btreep = evict->btree;
 
 		/*
-		 * If we're evicting our current eviction point in the file,
-		 * try to move on to the next page, or restart the walk if that
-		 * fails (evict_page will be set to NULL).
-		 */
-		if (*pagep == evict->btree->evict_page)
-			(void)__wt_tree_np(
-			    session, &evict->btree->evict_page, 1, 1);
-
-		/*
 		 * Paranoia: remove the entry so we never try and reconcile
 		 * the same page on reconciliation error.
 		 */
@@ -768,6 +734,8 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session)
 	if (page == NULL)
 		return (WT_NOTFOUND);
 
+	WT_ASSERT(session, page->ref->state == WT_REF_EVICTING);
+
 	/* Reference the correct WT_BTREE handle. */
 	saved_btree = session->btree;
 	WT_SET_BTREE_IN_SESSION(session, btree);
@@ -776,19 +744,14 @@ __wt_evict_lru_page(WT_SESSION_IMPL *session)
 	 * We don't care why eviction failed (maybe the page was dirty and we're
 	 * out of disk space, or the page had an in-memory subtree already being
 	 * evicted).  Regardless, don't pick the same page every time.
+	 *
+	 * We used to bump the page's read_gen only if eviction failed, but
+	 * that isn't safe: at that point, eviction has already unlocked the
+	 * page and some other thread may have evicted it by the time we look
+	 * at it.
 	 */
-	if (__wt_rec_evict(session, page, 0) != 0) {
-		page->read_gen = __wt_cache_read_gen(session);
-
-		/*
-		 * If the evicting state of the page was not cleared, clear it
-		 * now to make the page available again.
-		 */
-		if (page->ref->state == WT_REF_EVICTING) {
-			WT_ASSERT(session, page->ref->page == page);
-			page->ref->state = WT_REF_MEM;
-		}
-	}
+	page->read_gen = __wt_cache_read_gen(session);
+	(void)__wt_rec_evict(session, page, 0);
 
 	WT_ATOMIC_ADD(btree->lru_count, -1);
 
diff --git a/src/btree/bt_page.c b/src/btree/bt_page.c
index 97f565571e3..21cdddaa635 100644
--- a/src/btree/bt_page.c
+++ b/src/btree/bt_page.c
@@ -26,7 +26,13 @@ __wt_page_in_func(
 #endif
     )
 {
-	int read_lockout;
+	int first, read_lockout;
+
+	/*
+	 * Only wake the eviction server once: after that, we're just wasting
+	 * effort and making a busy mutex busier.
+	 */
+	first = 1;
 
 	for (;;) {
 		switch (ref->state) {
@@ -35,7 +41,8 @@ __wt_page_in_func(
 			 * The page isn't in memory, attempt to set the
 			 * state to WT_REF_READING.  If successful, read it.
 			 */
-			__wt_eviction_check(session, &read_lockout);
+			__wt_eviction_check(session, &read_lockout, first);
+			first = 0;
 			if (read_lockout || !WT_ATOMIC_CAS(
 			    ref->state, WT_REF_DISK, WT_REF_READING))
 				break;
@@ -334,8 +341,8 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
 
 	/*
 	 * Internal row-store page entries map one-to-two to the number of
-	 * physical entries on the page (each physical entry is a data item
-	 * and offset object).
+	 * physical entries on the page (each in-memory entry is a key item
+	 * and location coookie).
 	 */
 	nindx = dsk->u.entries / 2;
 	WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.intl.t)));
@@ -351,9 +358,8 @@ __inmem_row_int(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
 
 	/*
 	 * Walk the page, instantiating keys: the page contains sorted key and
-	 * offpage-reference pairs.  Keys are row store internal pages with
-	 * on-page/overflow (WT_CELL_KEY/KEY_OVFL) items, and offpage references
-	 * are WT_CELL_OFF items.
+	 * location cookie pairs.  Keys are on-page/overflow items and location
+	 * cookies are WT_CELL_ADDR items.
 	 */
 	ref = page->u.intl.t;
 	WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
@@ -455,33 +461,43 @@ __inmem_row_leaf(WT_SESSION_IMPL *session, WT_PAGE *page, size_t *inmem_sizep)
 
 	/*
 	 * Leaf row-store page entries map to a maximum of two-to-one to the
-	 * number of physical entries on the page (each physical entry might
-	 * be a key without any subsequent data item).
-	 */
-	WT_RET((__wt_calloc_def(
-	    session, (size_t)dsk->u.entries * 2, &page->u.row.d)));
-	if (inmem_sizep != NULL)
-		*inmem_sizep += 2 * dsk->u.entries * sizeof(*page->u.row.d);
-
-	/*
-	 * Walk a row-store page of WT_CELLs, building indices and finding the
-	 * end of the page.
+	 * number of physical entries on the page (each physical entry might be
+	 * a key without a subsequent data item).  To avoid over-allocation in
+	 * workloads with large numbers of empty data items, first walk the page
+	 * counting the number of keys, then allocate the indices.
 	 *
 	 * The page contains key/data pairs.  Keys are on-page (WT_CELL_KEY) or
 	 * overflow (WT_CELL_KEY_OVFL) items, data are either a single on-page
 	 * (WT_CELL_VALUE) or overflow (WT_CELL_VALUE_OVFL) item.
 	 */
 	nindx = 0;
-	rip = page->u.row.d;
 	WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
 		__wt_cell_unpack(cell, unpack);
 		switch (unpack->type) {
 		case WT_CELL_KEY:
 		case WT_CELL_KEY_OVFL:
 			++nindx;
-			if (rip->key != NULL)
-				++rip;
+			break;
+		case WT_CELL_VALUE:
+		case WT_CELL_VALUE_OVFL:
+			break;
+		WT_ILLEGAL_VALUE(session);
+		}
+	}
+
+	WT_RET((__wt_calloc_def(session, (size_t)nindx, &page->u.row.d)));
+	if (inmem_sizep != NULL)
+		*inmem_sizep += nindx * sizeof(*page->u.row.d);
+
+	/* Walk the page again, building indices. */
+	rip = page->u.row.d;
+	WT_CELL_FOREACH(btree, dsk, cell, unpack, i) {
+		__wt_cell_unpack(cell, unpack);
+		switch (unpack->type) {
+		case WT_CELL_KEY:
+		case WT_CELL_KEY_OVFL:
 			rip->key = cell;
+			++rip;
 			break;
 		case WT_CELL_VALUE:
 		case WT_CELL_VALUE_OVFL:
diff --git a/src/btree/bt_read.c b/src/btree/bt_read.c
index 4d9017ca0cd..a0057e733d4 100644
--- a/src/btree/bt_read.c
+++ b/src/btree/bt_read.c
@@ -47,7 +47,7 @@ __wt_cache_read(WT_SESSION_IMPL *session, WT_PAGE *parent, WT_REF *ref)
 
 	WT_ASSERT(session, page != NULL);
 	ref->page = page;
-	ref->state = WT_REF_MEM;
+	WT_PUBLISH(ref->state, WT_REF_MEM);
 	return (0);
 
 err:	ref->state = WT_REF_DISK;
diff --git a/src/btree/rec_evict.c b/src/btree/rec_evict.c
index 716d79b3251..9da60eda485 100644
--- a/src/btree/rec_evict.c
+++ b/src/btree/rec_evict.c
@@ -13,7 +13,7 @@ static int  __rec_discard_page(WT_SESSION_IMPL *, WT_PAGE *);
 static void __rec_excl_clear(WT_SESSION_IMPL *);
 static int  __rec_page_clean_update(WT_SESSION_IMPL *, WT_PAGE *);
 static int  __rec_page_dirty_update(WT_SESSION_IMPL *, WT_PAGE *);
-static int  __rec_review(WT_SESSION_IMPL *, WT_PAGE *, uint32_t, int);
+static int  __rec_review(WT_SESSION_IMPL *, WT_REF *, WT_PAGE *, uint32_t, int);
 static int  __rec_root_addr_update(WT_SESSION_IMPL *, uint8_t *, uint32_t);
 static int  __rec_root_clean_update(WT_SESSION_IMPL *, WT_PAGE *);
 static int  __rec_root_dirty_update(WT_SESSION_IMPL *, WT_PAGE *);
@@ -43,8 +43,12 @@ __wt_rec_evict(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags)
 	 * we're done.  We have to make this check for clean pages, too: while
 	 * unlikely eviction would choose an internal page with children, it's
 	 * not disallowed anywhere.
+	 *
+	 * Note that page->ref may be NULL in some cases (e.g., for root pages
+	 * or during salvage).  That's OK if WT_REC_SINGLE is set: we won't
+	 * check hazard references in that case.
 	 */
-	WT_ERR(__rec_review(session, page, flags, 1));
+	WT_ERR(__rec_review(session, page->ref, page, flags, 1));
 
 	/* Count evictions of internal pages during normal operation. */
 	if (!LF_ISSET(WT_REC_SINGLE) &&
@@ -325,12 +329,22 @@ __rec_discard_page(WT_SESSION_IMPL *session, WT_PAGE *page)
 		 * a split-merge page, then the reference must be cleared before
 		 * the page is discarded.
 		 */
-		if (F_ISSET(
-		    page, WT_PAGE_REC_MASK) == WT_PAGE_REC_SPLIT &&
+		if (F_ISSET(page, WT_PAGE_REC_MASK) == WT_PAGE_REC_SPLIT &&
 		    mod->u.split != NULL)
 			__wt_page_out(session, mod->u.split, 0);
 	}
 
+	/*
+	 * If we are evicting the file's current eviction point, clear it so
+	 * the walk will be restarted.
+	 *
+	 * !!!
+	 * This check would arguably be cleaner in bt_evict.c, but that level
+	 * isn't aware of all of the pages within a subtree that are evicted.
+	 */
+	if (session->btree->evict_page == page)
+		session->btree->evict_page = NULL;
+
 	/* Discard the page itself. */
 	__wt_page_out(session, page, 0);
 
@@ -341,11 +355,17 @@ __rec_discard_page(WT_SESSION_IMPL *session, WT_PAGE *page)
  * __rec_review --
  *	Get exclusive access to the page and review the page and its subtree
  *	for conditions that would block its eviction.
+ *
+ *	The ref and page arguments may appear to be redundant, because usually
+ *	ref->page == page and page->ref == ref.  However, we need both because
+ *	(a) there are cases where ref == NULL (e.g., for root page or during
+ *	salvage), and (b) we can't safely look at page->ref until we have a
+ *	hazard reference.
  */
 static int
-__rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int top)
+__rec_review(WT_SESSION_IMPL *session,
+    WT_REF *ref, WT_PAGE *page, uint32_t flags, int top)
 {
-	WT_REF *ref;
 	uint32_t i;
 
 	/*
@@ -353,7 +373,7 @@ __rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int top)
 	 * locked down.
 	 */
 	if (!LF_ISSET(WT_REC_SINGLE))
-		WT_RET(__hazard_exclusive(session, page->ref, top));
+		WT_RET(__hazard_exclusive(session, ref, top));
 
 	/*
 	 * Recurse through the page's subtree: this happens first because we
@@ -366,8 +386,8 @@ __rec_review(WT_SESSION_IMPL *session, WT_PAGE *page, uint32_t flags, int top)
 			case WT_REF_DISK:		/* On-disk */
 				break;
 			case WT_REF_MEM:		/* In-memory */
-				WT_RET(
-				    __rec_review(session, ref->page, flags, 0));
+				WT_RET(__rec_review(
+				    session, ref, ref->page, flags, 0));
 				break;
 			case WT_REF_EVICTING:		/* Being evaluated */
 			case WT_REF_LOCKED:		/* Being evicted */
diff --git a/src/include/cache.i b/src/include/cache.i
index 866f00693c2..5ac61f6cb69 100644
--- a/src/include/cache.i
+++ b/src/include/cache.i
@@ -10,7 +10,7 @@
  *	Wake the eviction server if necessary.
  */
 static inline void
-__wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp)
+__wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp, int wake)
 {
 	WT_CACHE *cache;
 	WT_CONNECTION_IMPL *conn;
@@ -31,7 +31,7 @@ __wt_eviction_check(WT_SESSION_IMPL *session, int *read_lockoutp)
 		*read_lockoutp = (bytes_inuse > bytes_max);
 
 	/* Wake eviction when we're over the trigger cache size. */
-	if (bytes_inuse > cache->eviction_trigger * (bytes_max / 100))
+	if (wake && bytes_inuse > cache->eviction_trigger * (bytes_max / 100))
 		__wt_evict_server_wake(session);
 }
 
@@ -60,7 +60,7 @@ __wt_eviction_page_check(WT_SESSION_IMPL *session, WT_PAGE *page)
 		 */
 		WT_RET(__wt_evict_page_request(session, page));
 	} else
-		__wt_eviction_check(session, NULL);
+		__wt_eviction_check(session, NULL, 1);
 
 	return (0);
 }
diff --git a/test/format/Makefile.am b/test/format/Makefile.am
index 4727e0df742..f10e8b47b49 100644
--- a/test/format/Makefile.am
+++ b/test/format/Makefile.am
@@ -3,7 +3,8 @@ INCLUDES = -I$(top_builddir) -I$(BDB)
 
 noinst_PROGRAMS = t
 noinst_DATA = s_dumpcmp
-t_SOURCES = config.h format.h bdb.c config.c t.c util.c wts.c wts_bulk.c wts_ops.c
+t_SOURCES =\
+	config.h format.h bdb.c config.c t.c util.c wts.c wts_bulk.c wts_ops.c
 t_LDADD = $(top_builddir)/libwiredtiger.la -L$(BDB)/build_unix -ldb
 t_LDFLAGS = -static
 
diff --git a/test/format/README b/test/format/README
index 5f276ad48ac..81cebc894db 100644
--- a/test/format/README
+++ b/test/format/README
@@ -1,9 +1,9 @@
-The test program format randomly generates WiredTiger files with
-different size objects and then does single-threaded operations
-on those files.  The goal is to test the WiredTiger file formats.
+The test/format program randomly generates WiredTiger files with different
+size objects and then does operations on those files.  The goal is to
+test the WiredTiger file formats.
 
-format should be compiled with a version of Berkeley DB (which it
-uses to verify format's results).  Create a link "db" in the
-build_posix directory that links to the top-level of a Berkeley DB
-distribution directory which contains a configured and compiled
+test/format should be compiled with a version of Berkeley DB (which
+it uses to verify format's results).  Create a link "db" in the
+build_posix directory that links to the top-level of a Berkeley
+DB distribution directory which contains a configured and compiled
 build_unix subdirectory.
diff --git a/test/format/bdb.c b/test/format/bdb.c
index bf21a003e17..9cbe8012583 100644
--- a/test/format/bdb.c
+++ b/test/format/bdb.c
@@ -8,6 +8,9 @@
 #define	BDB	1			/* Berkeley DB header files */
 #include "format.h"
 
+static DBT key, value;
+static uint8_t *keybuf;
+
 static int
 bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2)
 {
@@ -23,7 +26,7 @@ bdb_compare_reverse(DB *dbp, const DBT *k1, const DBT *k2)
 }
 
 void
-bdb_startup(void)
+bdb_open(void)
 {
 	DB *db;
 	DBC *dbc;
@@ -48,10 +51,12 @@ bdb_startup(void)
 	g.bdb = db;
 	assert(db->cursor(db, NULL, &dbc, 0) == 0);
 	g.dbc = dbc;
+
+	key_gen_setup(&keybuf);
 }
 
 void
-bdb_teardown(void)
+bdb_close(void)
 {
 	DB *db;
 	DBC *dbc;
@@ -63,6 +68,9 @@ bdb_teardown(void)
 	assert(dbc->close(dbc) == 0);
 	assert(db->close(db, 0) == 0);
 	assert(dbenv->close(dbenv, 0) == 0);
+
+	free(keybuf);
+	keybuf = NULL;
 }
 
 void
@@ -70,7 +78,6 @@ bdb_insert(
     const void *key_data, uint32_t key_size,
     const void *value_data, uint32_t value_size)
 {
-	static DBT key, value;
 	DBC *dbc;
 
 	key.data = (void *)key_data;
@@ -83,118 +90,91 @@ bdb_insert(
 	assert(dbc->put(dbc, &key, &value, DB_KEYFIRST) == 0);
 }
 
-int
+void
 bdb_np(int next,
     void *keyp, uint32_t *keysizep,
     void *valuep, uint32_t *valuesizep, int *notfoundp)
 {
-	static DBT key, value;
-	DB *db = g.bdb;
 	DBC *dbc = g.dbc;
 	int ret;
 
 	*notfoundp = 0;
-
 	if ((ret =
 	    dbc->get(dbc, &key, &value, next ? DB_NEXT : DB_PREV)) != 0) {
-		if (ret == DB_NOTFOUND) {
-			*notfoundp = 1;
-			return (0);
-		}
-		db->err(db, ret,
-		    "dbc->get: %s: {%.*s}",
-		    next ? "DB_NEXT" : "DB_PREV",
-		    (int)key.size, (char *)key.data);
-		return (1);
+		if (ret != DB_NOTFOUND)
+			die(ret, "dbc.get: %s: {%.*s}",
+			    next ? "DB_NEXT" : "DB_PREV",
+			    (int)key.size, (char *)key.data);
+		*notfoundp = 1;
+	} else {
+		*(void **)keyp = key.data;
+		*keysizep = key.size;
+		*(void **)valuep = value.data;
+		*valuesizep = value.size;
 	}
-	*(void **)keyp = key.data;
-	*keysizep = key.size;
-	*(void **)valuep = value.data;
-	*valuesizep = value.size;
-	return (0);
 }
 
-int
+void
 bdb_read(uint64_t keyno, void *valuep, uint32_t *valuesizep, int *notfoundp)
 {
-	static DBT key, value;
-	DB *db = g.bdb;
 	DBC *dbc = g.dbc;
 	int ret;
 
-	*notfoundp = 0;
-
-	key_gen(&key.data, &key.size, keyno, 0);
+	key.data = keybuf;
+	key_gen(key.data, &key.size, keyno, 0);
 
+	*notfoundp = 0;
 	if ((ret = dbc->get(dbc, &key, &value, DB_SET)) != 0) {
-		if (ret == DB_NOTFOUND) {
-			*notfoundp = 1;
-			return (0);
-		}
-		db->err(db, ret,
-		    "dbc->get: DB_SET: {%.*s}",
-		    (int)key.size, (char *)key.data);
-		return (1);
+		if (ret != DB_NOTFOUND)
+			die(ret, "dbc.get: DB_SET: {%.*s}",
+			    (int)key.size, (char *)key.data);
+		*notfoundp = 1;
+	} else {
+		*(void **)valuep = value.data;
+		*valuesizep = value.size;
 	}
-	*(void **)valuep = value.data;
-	*valuesizep = value.size;
-	return (0);
 }
 
-int
+void
 bdb_put(const void *arg_key, uint32_t arg_key_size,
     const void *arg_value, uint32_t arg_value_size, int *notfoundp)
 {
-	static DBT key, value;
-	DB *db = g.bdb;
 	DBC *dbc = g.dbc;
 	int ret;
 
-	*notfoundp = 0;
-
 	key.data = (void *)arg_key;
 	key.size = arg_key_size;
 	value.data = (void *)arg_value;
 	value.size = arg_value_size;
 
+	*notfoundp = 0;
 	if ((ret = dbc->put(dbc, &key, &value, DB_KEYFIRST)) != 0) {
-		if (ret == DB_NOTFOUND) {
-			*notfoundp = 1;
-			return (0);
+		if (ret != DB_NOTFOUND) {
+			die(ret, "dbc.put: DB_KEYFIRST: {%.*s}{%.*s}",
+			    (int)key.size, (char *)key.data,
+			    (int)value.size, (char *)value.data);
 		}
-		db->err(db, ret, "dbc->put: DB_KEYFIRST: {%.*s}{%.*s}",
-		    (int)key.size, (char *)key.data,
-		    (int)value.size, (char *)value.data);
-		return (1);
+		*notfoundp = 1;
 	}
-	return (0);
 }
 
-int
+void
 bdb_del(uint64_t keyno, int *notfoundp)
 {
-	static DBT value;
-	static DBT key;
-	DB *db = g.bdb;
 	DBC *dbc = g.dbc;
 	int ret;
 
-	*notfoundp = 0;
-
-	key_gen(&key.data, &key.size, keyno, 0);
+	key.data = keybuf;
+	key_gen(key.data, &key.size, keyno, 0);
 
-	if ((ret = bdb_read(keyno, &value.data, &value.size, notfoundp)) != 0)
-		return (1);
+	bdb_read(keyno, &value.data, &value.size, notfoundp);
 	if (*notfoundp)
-		return (0);
+		return;
+
 	if ((ret = dbc->del(dbc, 0)) != 0) {
-		if (ret == DB_NOTFOUND) {
-			*notfoundp = 1;
-			return (0);
-		}
-		db->err(db, ret,
-		    "dbc->del: {%.*s}", (int)key.size, (char *)key.data);
-		return (1);
+		if (ret != DB_NOTFOUND)
+			die(ret, "dbc.del: {%.*s}",
+			    (int)key.size, (char *)key.data);
+		*notfoundp = 1;
 	}
-	return (0);
 }
diff --git a/test/format/config.c b/test/format/config.c
index 0b08be14c54..a9bca4e7c00 100644
--- a/test/format/config.c
+++ b/test/format/config.c
@@ -103,7 +103,7 @@ config_print(int error_display)
 		fp = stdout;
 	else
 		if ((fp = fopen("__run", "w")) == NULL)
-			die("__run", errno);
+			die(errno, "fopen: __run");
 
 	fprintf(fp, "############################################\n");
 	fprintf(fp, "#  RUN PARAMETERS\n");
@@ -138,7 +138,7 @@ config_file(const char *name)
 	char *p, buf[256];
 
 	if ((fp = fopen(name, "r")) == NULL)
-		die(name, errno);
+		die(errno, "fopen: %s", name);
 	while (fgets(buf, sizeof(buf), fp) != NULL) {
 		for (p = buf; *p != '\0' && *p != '\n'; ++p)
 			;
diff --git a/test/format/format.h b/test/format/format.h
index 2f3bac9dd3b..4ee5d89b900 100644
--- a/test/format/format.h
+++ b/test/format/format.h
@@ -12,6 +12,7 @@
 #include <errno.h>
 #include <inttypes.h>
 #include <limits.h>
+#include <pthread.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -37,6 +38,8 @@
 
 #define	WT_TABLENAME	"file:__wt"
 
+#define	SINGLETHREADED	(g.threads == 1)
+
 typedef struct {
 	char *progname;				/* Program name */
 
@@ -44,9 +47,6 @@ typedef struct {
 	void *dbc;				/* BDB cursor handle */
 
 	void *wts_conn;				/* WT_CONNECTION handle */
-	void *wts_cursor;			/* WT_CURSOR handle */
-	void *wts_cursor_insert;		/* WT_CURSOR insert handle */
-	void *wts_session;			/* WT_SESSION handle */
 
 	FILE *rand_log;				/* Random number log */
 
@@ -56,15 +56,14 @@ typedef struct {
 	    LOG_FILE=1,				/* Use a log file */
 	    LOG_OPS=2				/* Log all operations */
 	} logging;
-	FILE *logfp;				/* Log file. */
+	FILE *logfp;				/* Log file */
 
 	int replay;				/* Replaying a run. */
 	int track;				/* Track progress */
+	int threads;				/* Threads doing operations */
 
 	char *config_open;			/* Command-line configuration */
 
-	char *key_gen_buf;
-
 	uint32_t c_bitcnt;			/* Config values */
 	uint32_t c_bzip;
 	uint32_t c_cache;
@@ -92,32 +91,49 @@ typedef struct {
 } GLOBAL;
 extern GLOBAL g;
 
-int	 bdb_del(uint64_t, int *);
+typedef struct {
+	uint64_t search;
+	uint64_t insert;
+	uint64_t update;
+	uint64_t remove;
+
+	pthread_t tid;					/* thread ID */
+
+#define	TINFO_RUNNING	1				/* Running */
+#define	TINFO_COMPLETE	2				/* Finished */
+#define	TINFO_JOINED	3				/* Resolved */
+	volatile int state;				/* state */
+} TINFO;
+
+void	 bdb_close(void);
+void	 bdb_del(uint64_t, int *);
 void	 bdb_insert(const void *, uint32_t, const void *, uint32_t);
-int	 bdb_np(int, void *, uint32_t *, void *, uint32_t *, int *);
-int	 bdb_put(const void *, uint32_t, const void *, uint32_t, int *);
-int	 bdb_read(uint64_t, void *, uint32_t *, int *);
-void	 bdb_startup(void);
-void	 bdb_teardown(void);
+void	 bdb_np(int, void *, uint32_t *, void *, uint32_t *, int *);
+void	 bdb_open(void);
+void	 bdb_put(const void *, uint32_t, const void *, uint32_t, int *);
+void	 bdb_read(uint64_t, void *, uint32_t *, int *);
+
+void	 config_error(void);
 const char *
 	 config_dtype(void);
-void	 config_error(void);
 void	 config_file(const char *);
 void	 config_print(int);
 void	 config_setup(void);
 void	 config_single(const char *, int);
-void	 die(const char *,  int);
-void	 key_gen(void *, uint32_t *, uint64_t, int);
-void	 key_gen_setup(void);
-void	 track(const char *, uint64_t);
-void	 value_gen(void *, uint32_t *, uint64_t);
-int	 wts_bulk_load(void);
-int	 wts_dump(const char *, int);
-int	 wts_ops(void);
+void	 die(int, const char *, ...);
+void	 key_len_setup(void);
+void	 key_gen_setup(uint8_t **);
+void	 key_gen(uint8_t *, uint32_t *, uint64_t, int);
+void	 track(const char *, uint64_t, TINFO *);
+void	 val_gen_setup(uint8_t **);
+void	 value_gen(uint8_t *, uint32_t *, uint64_t);
+void	 wts_close(void);
+void	 wts_dump(const char *, int);
+void	 wts_load(void);
+void	 wts_open(void);
+void	 wts_ops(void);
 uint32_t wts_rand(void);
-int	 wts_read_scan(void);
-int	 wts_salvage(void);
-int	 wts_startup(int);
-int	 wts_stats(void);
-int	 wts_teardown(void);
-int	 wts_verify(const char *);
+void	 wts_read_scan(void);
+void	 wts_salvage(void);
+void	 wts_stats(void);
+void	 wts_verify(const char *);
diff --git a/test/format/s_dumpcmp.in b/test/format/s_dumpcmp.in
index c523fcd507c..71eed7b055f 100644
--- a/test/format/s_dumpcmp.in
+++ b/test/format/s_dumpcmp.in
@@ -27,26 +27,6 @@ if test $# -ne 0; then
 	exit 1
 fi
 
-if test $dump_bdb -eq 1; then
-	if test $colflag -eq 0; then
-		$bdb/db_dump -p __bdb |
-		    sed -e '1,/HEADER=END/d' \
-			-e '/DATA=END/d' \
-			-e 's/^ //' > __bdb_dump
-	else
-		# Format stores record numbers in Berkeley DB as string keys,
-		# it's simpler that way.  Convert record numbers from strings
-		# to numbers.
-		$bdb/db_dump -p __bdb |
-		    sed -e '1,/HEADER=END/d' \
-			-e '/DATA=END/d' \
-			-e 's/^ //' |
-		    sed -e 's/^0*//' \
-			-e 's/\.00$//' \
-			-e N > __bdb_dump
-	fi
-fi
-
 ext='"../../ext/collators/reverse/.libs/reverse_collator.so"'
 bzext="../../ext/compressors/bzip2_compress/.libs/bzip2_compress.so"
 if test -e $bzext ; then
@@ -54,7 +34,30 @@ if test -e $bzext ; then
 fi
 config='extensions=['$ext']'
 
-$build_top/wt -C "$config" dump file:__wt |
-    sed -e '1,/^Data$/d' > __wt_dump
+$build_top/wt -C "$config" dump file:__wt | sed -e '1,/^Data$/d' > __wt_dump
+
+if test $dump_bdb -ne 1; then
+	exit 0
+fi
+
+if test $colflag -eq 0; then
+	$bdb/db_dump -p __bdb |
+	    sed -e '1,/HEADER=END/d' \
+		-e '/DATA=END/d' \
+		-e 's/^ //' > __bdb_dump
+else
+	# Format stores record numbers in Berkeley DB as string keys,
+	# it's simpler that way.  Convert record numbers from strings
+	# to numbers.
+	$bdb/db_dump -p __bdb |
+	    sed -e '1,/HEADER=END/d' \
+		-e '/DATA=END/d' \
+		-e 's/^ //' |
+	    sed -e 's/^0*//' \
+		-e 's/\.00$//' \
+		-e N > __bdb_dump
+fi
+
 cmp __wt_dump __bdb_dump > /dev/null
+
 exit $?
diff --git a/test/format/t.c b/test/format/t.c
index a7d7c42e62a..a0ca4dadec5 100644
--- a/test/format/t.c
+++ b/test/format/t.c
@@ -16,9 +16,7 @@ static void usage(void);
 int
 main(int argc, char *argv[])
 {
-	int ch, reps, ret;
-
-	ret = 0;
+	int ch, reps;
 
 	if ((g.progname = strrchr(argv[0], '/')) == NULL)
 		g.progname = argv[0];
@@ -34,11 +32,14 @@ main(int argc, char *argv[])
 		config_file("CONFIG");
 	}
 
+	/* Default to a single thread. */
+	g.threads = 1;
+
 	/* Track progress unless we're re-directing output to a file. */
 	g.track = isatty(STDOUT_FILENO) ? 1 : 0;
 
 	/* Set values from the command line. */
-	while ((ch = getopt(argc, argv, "1C:c:Llqr")) != EOF)
+	while ((ch = getopt(argc, argv, "1C:c:Llqrt:")) != EOF)
 		switch (ch) {
 		case '1':			/* One run */
 			g.c_runs = 1;
@@ -67,10 +68,20 @@ main(int argc, char *argv[])
 			g.replay = 1;
 			g.c_runs = 1;
 			break;
+		case 't':			/* Threads */
+			g.threads = atoi(optarg);
+			break;
 		default:
 			usage();
 		}
 
+	/* Multi-threaded runs cannot be replayed. */
+	if (g.threads != 1 && g.replay) {
+		fprintf(stderr,
+		    "%s: -r and -t are mutually exclusive\n", g.progname);
+		return (EXIT_FAILURE);
+	}
+
 	argc -= optind;
 	argv += optind;
 	for (; *argv != NULL; ++argv)
@@ -85,36 +96,28 @@ main(int argc, char *argv[])
 
 		config_setup();			/* Run configuration */
 		config_print(0);		/* Dump run configuration */
+		key_len_setup();		/* Setup keys */
+
+		if (SINGLETHREADED)
+			bdb_open();		/* Initial file config */
+		wts_open();
 
-		bdb_startup();			/* Initial file config */
-		if (wts_startup(0))
-			return (EXIT_FAILURE);
+		wts_load();			/* Load initial records */
+		wts_verify("post-bulk verify");	/* Verify */
 
-		key_gen_setup();		/* Setup keys */
-		if (wts_bulk_load())		/* Load initial records */
-			goto err;
-						/* Close, verify */
-		if (wts_teardown() || wts_verify("post-bulk verify"))
-			goto err;
 						/* Loop reading & operations */
 		for (reps = 0; reps < 3; ++reps) {
-			if (wts_startup(1))
-				goto err;
+			wts_read_scan();	/* Read scan */
 
-			if (wts_read_scan())	/* Read scan */
-				goto err;
-
-						/* Random operations */
-			if (g.c_ops != 0 && wts_ops())
-				goto err;
+			if (g.c_ops != 0)	/* Random operations */
+				wts_ops();
 
 						/* Statistics */
-			if ((g.c_ops == 0 || reps == 2) && wts_stats())
-				goto err;
+			if (g.c_ops == 0 || reps == 2)
+				wts_stats();
 
-						/* Close, verify */
-			if (wts_teardown() || wts_verify("post-ops verify"))
-				goto err;
+						/* Verify */
+			wts_verify("post-ops verify");
 
 			/*
 			 * If no operations scheduled, quit after a single
@@ -124,11 +127,14 @@ main(int argc, char *argv[])
 				break;
 		}
 
-		track("shutting down BDB", 0ULL);
-		bdb_teardown();
+		if (SINGLETHREADED) {
+			track("shutting down BDB", 0ULL, NULL);
+			bdb_close();
 
-		if (wts_dump("standard", 1))	/* Dump the file */
-			goto err;
+			wts_close();			/* Dump the file */
+			wts_dump("standard", 1);
+			wts_open();
+		}
 
 		/*
 		 * If we don't delete any records, we can salvage the file.  The
@@ -139,36 +145,27 @@ main(int argc, char *argv[])
 		 * Save a copy, salvage, verify, dump.
 		 */
 		if (g.c_delete_pct == 0) {
-			/*
-			 * Save a copy of the interesting files so we can replay
-			 * the salvage step as necessary.
-			 */
-			if (system(
-			    "rm -rf __slvg.copy && "
-			    "mkdir __slvg.copy && "
-			    "cp WiredTiger* __wt __slvg.copy/") != 0)
-				goto err;
-
-			if (wts_salvage() ||
-			    wts_verify("post-salvage verify") ||
-			    wts_dump("salvage", 0))
-				goto err;
+			wts_salvage();			/* Salvage & verify */
+			wts_verify("post-salvage verify");
+
+			wts_close();			/* Dump the file */
+			wts_dump("salvage", 0);
+			wts_open();
 		}
 
-		printf("%4d: %-40s\n", g.run_cnt, config_dtype());
-	}
+		wts_close();			/* Close */
 
-	if (0) {
-err:		ret = 1;
+		printf("%4d: %-40s\n", g.run_cnt, config_dtype());
 	}
 
+	/* Flush/close any logging information. */
 	if (g.logfp != NULL)
 		(void)fclose(g.logfp);
 	if (g.rand_log != NULL)
 		(void)fclose(g.rand_log);
 
-	config_print(ret);
-	return (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
+	config_print(0);
+	return (EXIT_SUCCESS);
 }
 
 /*
@@ -196,7 +193,7 @@ startup(void)
 	/* Open/truncate the logging file. */
 	if (g.logging != 0) {
 		if ((g.logfp = fopen("__log", "w")) == NULL)
-			die("__log", errno);
+			die(errno, "fopen: __log");
 		(void)setvbuf(g.logfp, NULL, _IOLBF, 0);
 	}
 }
@@ -222,9 +219,29 @@ onint(int signo)
  *	Report an error and quit.
  */
 void
-die(const char *m, int e)
+die(int e, const char *fmt, ...)
 {
-	fprintf(stderr, "%s: %s: %s\n", g.progname, m, wiredtiger_strerror(e));
+	va_list ap;
+
+	if (fmt != NULL) {				/* Death message. */
+		fprintf(stderr, "%s: ", g.progname);
+		va_start(ap, fmt);
+		vfprintf(stderr, fmt, ap);
+		va_end(ap);
+		if (e != 0)
+			fprintf(stderr, ": %s", wiredtiger_strerror(e));
+		fprintf(stderr, "\n");
+	}
+
+	/* Flush/close any logging information. */
+	if (g.logfp != NULL)
+		(void)fclose(g.logfp);
+	if (g.rand_log != NULL)
+		(void)fclose(g.rand_log);
+
+	/* Display the configuration that failed. */
+	config_print(1);
+
 	exit(EXIT_FAILURE);
 }
 
@@ -236,8 +253,9 @@ static void
 usage(void)
 {
 	fprintf(stderr,
-	    "usage: %s [-1Llqr] [-C wiredtiger-config] [-c config-file] "
-	    "[name=value ...]\n",
+	    "usage: %s [-1Llqr]\n    "
+	    "[-C wiredtiger-config] [-c config-file] "
+	    "[-t threads] [name=value ...]\n",
 	    g.progname);
 	fprintf(stderr, "%s",
 	    "\t-1 run once\n"
@@ -246,7 +264,8 @@ usage(void)
 	    "\t-L output to a log file\n"
 	    "\t-l log operations (implies -L)\n"
 	    "\t-q run quietly\n"
-	    "\t-r replay the last run\n");
+	    "\t-r replay the last run\n"
+	    "\t-t threads\n");
 
 	fprintf(stderr, "\n");
 
diff --git a/test/format/util.c b/test/format/util.c
index b9435b0add3..648b5d82d9c 100644
--- a/test/format/util.c
+++ b/test/format/util.c
@@ -8,86 +8,87 @@
 #include "format.h"
 
 void
-key_gen(void *keyp, uint32_t *sizep, uint64_t keyno, int insert)
+key_len_setup()
 {
-	int len;
+	size_t i;
+
+	/*
+	 * The key is a variable length item with a leading 10-digit value.
+	 * Since we have to be able re-construct it from the record number
+	 * (when doing row lookups), we pre-load a set of random lengths in
+	 * a lookup table, and then use the record number to choose one of
+	 * the pre-loaded lengths.
+	 *
+	 * Fill in the random key lengths.
+	 */
+	for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i)
+		g.key_rand_len[i] = (uint16_t)MMRAND(g.c_key_min, g.c_key_max);
+}
+
+void
+key_gen_setup(uint8_t **keyp)
+{
+	uint8_t *key;
+	size_t i;
+
+	if ((key = malloc(g.c_key_max)) == NULL)
+		die(errno, "malloc");
+	for (i = 0; i < g.c_key_max; ++i)
+		key[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26];
+	*keyp = key;
+}
+
+void
+key_gen(uint8_t *key, uint32_t *sizep, uint64_t keyno, int insert)
+{
+	int len, suffix;
 
 	/*
 	 * The key always starts with a 10-digit string (the specified cnt)
 	 * followed by two digits, a random number between 1 and 15 if it's
 	 * an insert, otherwise 00.
 	 */
-	len = insert ?
-	    sprintf(g.key_gen_buf, "%010" PRIu64 ".%02d", keyno,
-		(int)MMRAND(1, 15)) :
-	    sprintf(g.key_gen_buf, "%010" PRIu64 ".00", keyno);
+	suffix = insert ? (int)MMRAND(1, 15) : 0;
+	len = sprintf((char *)key, "%010" PRIu64 ".%02d", keyno, suffix);
 
 	/*
 	 * In a column-store, the key is only used for BDB, and so it doesn't
 	 * need a random length.
 	 */
 	if (g.c_file_type == ROW) {
-		g.key_gen_buf[len] = '/';
+		key[len] = '/';
 		len = g.key_rand_len[keyno %
 		    (sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]))];
 	}
-	*(void **)keyp = g.key_gen_buf;
 	*sizep = (uint32_t)len;
 }
 
 void
-key_gen_setup(void)
+val_gen_setup(uint8_t **valp)
 {
-	size_t i;
+	uint8_t *val;
+	size_t i, len;
 
 	/*
-	 * The key is a variable length item with a leading 10-digit value.
-	 * Since we have to be able re-construct it from the record number
-	 * (when doing row lookups), we pre-load a set of random lengths in
-	 * a lookup table, and then use the record number to choose one of
-	 * the pre-loaded lengths.
+	 * Set initial buffer contents to reconizable text.
 	 *
-	 * Fill in the random key lengths.
+	 * Add a few extra bytes in order to guarantee we can always offset
+	 * into the buffer by a few extra bytes, used to generate different
+	 * data for column-store run-length encoded files.
 	 */
-	if (g.key_gen_buf != NULL) {
-		free(g.key_gen_buf);
-		g.key_gen_buf = NULL;
-	}
-	for (i = 0; i < sizeof(g.key_rand_len) / sizeof(g.key_rand_len[0]); ++i)
-		g.key_rand_len[i] = (uint16_t)MMRAND(g.c_key_min, g.c_key_max);
+	len = g.c_value_max + 20;
+	if ((val = malloc(len)) == NULL)
+		die(errno, "malloc");
+	for (i = 0; i < len; ++i)
+		val[i] = (u_char)"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26];
 
-	if ((g.key_gen_buf = malloc(g.c_key_max)) == NULL)
-		die("malloc", errno);
-	for (i = 0; i < g.c_key_max; ++i)
-		g.key_gen_buf[i] = "abcdefghijklmnopqrstuvwxyz"[i % 26];
+	*valp = val;
 }
 
 void
-value_gen(void *valuep, uint32_t *sizep, uint64_t keyno)
+value_gen(uint8_t *val, uint32_t *sizep, uint64_t keyno)
 {
-	static size_t blen = 0;
 	static const char *dup_data = "duplicate data item";
-	static u_char *buf = NULL;
-	size_t i;
-
-	/*
-	 * Set initial buffer contents to reconizable text.
-	 *
-	 * Add a few extra bytes in order to guarantee we can always offset
-	 * into the buffer by a few extra bytes, used to generate different
-	 * data for column-store run-length encoded files.
-	 */
-	if (blen < g.c_value_max + 10) {
-		if (buf != NULL) {
-			free(buf);
-			buf = NULL;
-		}
-		blen = g.c_value_max + 10;
-		if ((buf = malloc(blen)) == NULL)
-			die("malloc", errno);
-		for (i = 0; i < blen; ++i)
-			buf[i] = (u_char)"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[i % 26];
-	}
 
 	/*
 	 * Fixed-length records: take the low N bits from the last digit of
@@ -95,16 +96,15 @@ value_gen(void *valuep, uint32_t *sizep, uint64_t keyno)
 	 */
 	if (g.c_file_type == FIX) {
 		switch (g.c_bitcnt) {
-		case 8: buf[0] = MMRAND(1, 0xff); break;
-		case 7: buf[0] = MMRAND(1, 0x7f); break;
-		case 6: buf[0] = MMRAND(1, 0x3f); break;
-		case 5: buf[0] = MMRAND(1, 0x1f); break;
-		case 4: buf[0] = MMRAND(1, 0x0f); break;
-		case 3: buf[0] = MMRAND(1, 0x07); break;
-		case 2: buf[0] = MMRAND(1, 0x03); break;
-		case 1: buf[0] = 1; break;
+		case 8: val[0] = MMRAND(1, 0xff); break;
+		case 7: val[0] = MMRAND(1, 0x7f); break;
+		case 6: val[0] = MMRAND(1, 0x3f); break;
+		case 5: val[0] = MMRAND(1, 0x1f); break;
+		case 4: val[0] = MMRAND(1, 0x0f); break;
+		case 3: val[0] = MMRAND(1, 0x07); break;
+		case 2: val[0] = MMRAND(1, 0x03); break;
+		case 1: val[0] = 1; break;
 		}
-		*(void **)valuep = buf;
 		*sizep = 1;
 		return;
 	}
@@ -114,7 +114,7 @@ value_gen(void *valuep, uint32_t *sizep, uint64_t keyno)
 	 * test that by inserting a zero-length data item every so often.
 	 */
 	if (++keyno % 63 == 0) {
-		*(void **)valuep = buf;
+		val[0] = '\0';
 		*sizep = 0;
 		return;
 	}
@@ -130,33 +130,37 @@ value_gen(void *valuep, uint32_t *sizep, uint64_t keyno)
 	if (g.c_file_type == VAR &&
 	    g.c_repeat_data_pct != 0 &&
 	    (u_int)wts_rand() % 100 > g.c_repeat_data_pct) {
-		*(void **)valuep = (void *)dup_data;
+		(void)strcpy((char *)val, dup_data);
 		*sizep = (uint32_t)strlen(dup_data);
 		return;
 	}
 
-	snprintf((char *)buf, blen, "%010" PRIu64, keyno);
-	buf[10] = '/';
-	*(void **)valuep = buf;
+	sprintf((char *)val, "%010" PRIu64, keyno);
+	val[10] = '/';
 	*sizep = MMRAND(g.c_value_min, g.c_value_max);
 }
 
 void
-track(const char *s, uint64_t i)
+track(const char *tag, uint64_t cnt, TINFO *tinfo)
 {
 	static int lastlen = 0;
 	int len;
 	char msg[128];
 
-	if (!g.track || s == NULL)
+	if (!g.track || tag == NULL)
 		return;
 
-	if (i == 0)
-		len = snprintf(msg, sizeof(msg), "%4d: %s",
-		    g.run_cnt, s);
+	if (tinfo == NULL && cnt == 0)
+		len = snprintf(msg, sizeof(msg), "%4d: %s", g.run_cnt, tag);
+	else if (tinfo == NULL)
+		len = snprintf(
+		    msg, sizeof(msg), "%4d: %s: %" PRIu64, g.run_cnt, tag, cnt);
 	else
-		len = snprintf(msg, sizeof(msg), "%4d: %s %" PRIu64,
-		    g.run_cnt, s, i);
+		len = snprintf(msg, sizeof(msg),
+		    "%4d: %s: " "search %" PRIu64
+		    ", insert %" PRIu64 ", update %" PRIu64 ", remove %" PRIu64,
+		    g.run_cnt, tag,
+		    tinfo->search, tinfo->insert, tinfo->update, tinfo->remove);
 
 	if (lastlen > len) {
 		memset(msg + len, ' ', (size_t)(lastlen - len));
@@ -178,6 +182,10 @@ wts_rand(void)
 	char buf[64];
 	uint32_t r;
 
+	/* If we're threaded, it's not repeatable, ignore the log. */
+	if (!SINGLETHREADED)
+		return ((uint32_t)rand());
+
 	/*
 	 * We can entirely reproduce a run based on the random numbers used
 	 * in the initial run, plus the configuration files.  It would be
@@ -189,7 +197,7 @@ wts_rand(void)
 	if (g.rand_log == NULL) {
 		if ((g.rand_log =
 		    fopen("__rand", g.replay ? "r" : "w")) == NULL)
-			die("__rand", errno);
+			die(errno, "fopen: __rand");
 		if (!g.replay) {
 			srand((u_int)(0xdeadbeef ^ (u_int)time(NULL)));
 			(void)setvbuf(g.rand_log, NULL, _IOLBF, 0);
@@ -203,7 +211,7 @@ wts_rand(void)
 				    "exiting\n");
 				exit(EXIT_SUCCESS);
 			}
-			die("random number log", errno);
+			die(errno, "feof: random number log");
 		}
 
 		r = (uint32_t)strtoul(buf, NULL, 10);
diff --git a/test/format/wts.c b/test/format/wts.c
index 16623c9bbee..11e1f05d2e8 100644
--- a/test/format/wts.c
+++ b/test/format/wts.c
@@ -7,9 +7,7 @@
 
 #include "format.h"
 
-static int  wts_close(WT_CONNECTION *);
-static int  wts_open(WT_CONNECTION **, WT_SESSION **session);
-static int  wts_sync(void);
+static void wts_sync(void);
 
 static int
 handle_message(WT_EVENT_HANDLER *handler, const char *message)
@@ -28,12 +26,12 @@ handle_message(WT_EVENT_HANDLER *handler, const char *message)
  *	Default WT_EVENT_HANDLER->handle_progress implementation: ignore.
  */
 static int
-handle_progress(WT_EVENT_HANDLER *handler,
-     const char *operation, uint64_t progress)
+handle_progress(
+    WT_EVENT_HANDLER *handler, const char *operation, uint64_t progress)
 {
 	UNUSED(handler);
 
-	track(operation, progress);
+	track(operation, progress, NULL);
 	return (0);
 }
 
@@ -43,14 +41,15 @@ static WT_EVENT_HANDLER event_handler = {
 	handle_progress
 };
 
-static int
-wts_open(WT_CONNECTION **connp, WT_SESSION **sessionp)
+void
+wts_open(void)
 {
 	WT_CONNECTION *conn;
 	WT_SESSION *session;
+	uint32_t maxintlpage, maxintlitem, maxleafpage, maxleafitem;
 	int ret;
 	const char *ext1, *ext2;
-	char config[256];
+	char config[512], *end, *p;
 
 	/* If the bzip2 compression module has been built, use it. */
 	ext1 = "../../ext/compressors/bzip2_compress/.libs/bzip2_compress.so";
@@ -70,50 +69,11 @@ wts_open(WT_CONNECTION **connp, WT_SESSION **sessionp)
 	    g.progname, g.c_cache, ext1, ext2,
 	    g.config_open == NULL ? "" : g.config_open);
 
-	if ((ret = wiredtiger_open(NULL, &event_handler, config, &conn)) != 0) {
-		fprintf(stderr, "%s: wiredtiger_open: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
-
-	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0) {
-		fprintf(stderr, "%s: conn.session: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		(void)conn->close(conn, NULL);
-		return (1);
-	}
+	if ((ret = wiredtiger_open(NULL, &event_handler, config, &conn)) != 0)
+		die(ret, "wiredtiger_open");
 
-	*sessionp = session;
-	*connp = conn;
-	return (0);
-}
-
-static int
-wts_close(WT_CONNECTION *conn)
-{
-	int ret;
-	if ((ret = conn->close(conn, NULL)) != 0) {
-		fprintf(stderr, "%s: conn.close: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
-
-	return (0);
-}
-
-int
-wts_startup(int open_cursors)
-{
-	time_t now;
-	WT_CONNECTION *conn;
-	WT_CURSOR *cursor, *cursor_insert;
-	WT_SESSION *session;
-	uint32_t maxintlpage, maxintlitem, maxleafpage, maxleafitem;
-	int ret;
-	char config[512], *end, *p;
-
-	if (wts_open(&conn, &session))
-		return (1);
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
 
 	maxintlpage = 1U << g.c_intl_page_max;
 	maxintlitem = MMRAND(maxintlpage / 50, maxintlpage / 40);
@@ -160,94 +120,36 @@ wts_startup(int open_cursors)
 		break;
 	}
 
-	if ((ret = session->create(session, WT_TABLENAME, config)) != 0) {
-		fprintf(stderr, "%s: create table: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
+	if ((ret = session->create(session, WT_TABLENAME, config)) != 0)
+		die(ret, "session.create: %s", WT_TABLENAME);
 
-	cursor = cursor_insert = NULL;
-	if (open_cursors) {
-		/*
-		 * We open two cursors: one configured for overwriting and one
-		 * configured for append if we're dealing with a column-store.
-		 *
-		 * The reason is when testing with existing records, we don't
-		 * track if a record was deleted or not, which means we need to
-		 * use cursor->insert with overwriting configured.  But, in
-		 * column-store files where we're testing with new, appended
-		 * records, we don't want to have to specify the record number,
-		 * which requires an append configuration.
-		 */
-		if ((ret = session->open_cursor(
-		    session, WT_TABLENAME, NULL, "overwrite", &cursor)) != 0) {
-			fprintf(stderr, "%s: open_cursor: %s\n",
-			    g.progname, wiredtiger_strerror(ret));
-			return (1);
-		}
-		if ((g.c_file_type == FIX || g.c_file_type == VAR) &&
-		    (ret = session->open_cursor(session,
-		    WT_TABLENAME, NULL, "append", &cursor_insert)) != 0) {
-			fprintf(stderr, "%s: open_cursor: %s\n",
-			    g.progname, wiredtiger_strerror(ret));
-			return (1);
-		}
-	}
-
-	if (g.logging == LOG_OPS) {
-		(void)time(&now);
-		(void)session->msg_printf(session,
-		    "===============\nWT start: %s===============",
-		    ctime(&now));
-	}
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
 
 	g.wts_conn = conn;
-	g.wts_cursor = cursor;
-	g.wts_cursor_insert = cursor_insert;
-	g.wts_session = session;
-
-	return (0);
 }
 
-int
-wts_teardown(void)
+void
+wts_close()
 {
 	WT_CONNECTION *conn;
-	WT_CURSOR *cursor, *cursor_insert;
-	WT_SESSION *session;
-	time_t now;
 	int ret;
 
 	conn = g.wts_conn;
-	cursor = g.wts_cursor;
-	cursor_insert = g.wts_cursor_insert;
-	session = g.wts_session;
-
-	if (g.logging == LOG_OPS) {
-		(void)time(&now);
-		(void)session->msg_printf(session,
-		    "===============\nWT stop: %s===============",
-		    ctime(&now));
-	}
 
-	/*
-	 * Close the open cursors -- they will block sync.
-	 */
-	if ((cursor_insert != NULL &&
-	    (ret = cursor_insert->close(cursor_insert)) != 0) ||
-	    (cursor != NULL && (ret = cursor->close(cursor)) != 0))
-		die("cursor.close", ret);
+	wts_sync();
 
-	ret = wts_sync();
-	return (wts_close(conn) ? 1 : ret);
+	if ((ret = conn->close(conn, NULL)) != 0)
+		die(ret, "connection.close");
 }
 
-int
+void
 wts_dump(const char *tag, int dump_bdb)
 {
-	char cmd[128];
+	int ret;
+	char cmd[256];
 
-	track("dump files and compare", 0ULL);
+	track("dump files and compare", 0ULL, NULL);
 	switch (g.c_file_type) {
 	case FIX:
 	case VAR:
@@ -258,84 +160,87 @@ wts_dump(const char *tag, int dump_bdb)
 		snprintf(cmd, sizeof(cmd),
 		    "sh ./s_dumpcmp%s", dump_bdb ? " -b" : "");
 		break;
-	default:
-		return (1);
-	}
-	if (system(cmd) != 0) {
-		fprintf(stderr,
-		    "%s: %s dump comparison failed\n", g.progname, tag);
-		return (1);
 	}
-
-	return (0);
+	if ((ret = system(cmd)) != 0)
+		die(ret, "%s: dump comparison failed", tag);
 }
 
-int
+void
 wts_salvage(void)
 {
 	WT_CONNECTION *conn;
 	WT_SESSION *session;
 	int ret;
 
-	track("salvage", 0ULL);
+	conn = g.wts_conn;
 
-	if (wts_open(&conn, &session))
-		return (1);
+	track("salvage", 0ULL, NULL);
 
-	if ((ret = session->salvage(session, WT_TABLENAME, NULL)) != 0) {
-		fprintf(stderr, "%s: salvage: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
-
-	return (wts_close(conn));
+	/*
+	 * Save a copy of the interesting files so we can replay the salvage
+	 * step as necessary.
+	 */
+	if ((ret = system(
+	    "rm -rf __slvg.copy && "
+	    "mkdir __slvg.copy && "
+	    "cp WiredTiger* __wt __slvg.copy/")) != 0)
+		die(ret, "salvage cleanup step failed");
+
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
+	if ((ret = session->salvage(session, WT_TABLENAME, NULL)) != 0)
+		die(ret, "session.salvage: %s", WT_TABLENAME);
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
 }
 
-static int
+static void
 wts_sync(void)
 {
+	WT_CONNECTION *conn;
 	WT_SESSION *session;
 	int ret;
 
-	session = g.wts_session;
+	conn = g.wts_conn;
 
-	track("sync", 0ULL);
+	track("sync", 0ULL, NULL);
 
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
 	if ((ret = session->sync(
-	    session, WT_TABLENAME, NULL)) != 0 && ret != EBUSY) {
-		fprintf(stderr, "%s: sync: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
-	return (0);
+	    session, WT_TABLENAME, NULL)) != 0 && ret != EBUSY)
+		die(ret, "session.sync: %s", WT_TABLENAME);
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
 }
 
-int
+void
 wts_verify(const char *tag)
 {
 	WT_CONNECTION *conn;
 	WT_SESSION *session;
 	int ret;
 
-	track("verify", 0ULL);
+	conn = g.wts_conn;
 
-	if (wts_open(&conn, &session))
-		return (1);
+	track("verify", 0ULL, NULL);
 
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
 	if ((ret = session->verify(session, WT_TABLENAME, NULL)) != 0)
-		fprintf(stderr, "%s: %s verify: %s\n",
-		    g.progname, tag, wiredtiger_strerror(ret));
-
-	return (wts_close(conn) ? 1 : ret);
+		die(ret, "session.verify: %s: %s", WT_TABLENAME, tag);
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
 }
 
 /*
  * wts_stats --
  *	Dump the run's statistics.
  */
-int
+void
 wts_stats(void)
 {
+	WT_CONNECTION *conn;
 	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	FILE *fp;
@@ -343,52 +248,48 @@ wts_stats(void)
 	uint64_t v;
 	int ret;
 
-	session = g.wts_session;
+	track("stat", 0ULL, NULL);
 
-	track("stat", 0ULL);
+	conn = g.wts_conn;
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
 
 	if ((fp = fopen("__stats", "w")) == NULL)
-		die("__stats", errno);
+		die(errno, "fopen: __stats");
 
 	/* Connection statistics. */
 	if ((ret = session->open_cursor(session,
-	    "statistics:", NULL, NULL, &cursor)) != 0) {
-		fprintf(stderr, "%s: stat cursor open failed: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
+	    "statistics:", NULL, NULL, &cursor)) != 0)
+		die(ret, "session.open_cursor");
+
 	while ((ret = cursor->next(cursor)) == 0 &&
 	    (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
-		if (fprintf(fp, "%s=%s\n", desc, pval) < 0) {
-			ret = errno;
-			break;
-		}
+		if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
+			die(errno, "fprintf");
 
 	if (ret != WT_NOTFOUND)
-		die("cursor.next", ret);
+		die(ret, "cursor.next");
 	if ((ret = cursor->close(cursor)) != 0)
-		die("cursor.close", ret);
+		die(ret, "cursor.close");
 
 	/* File statistics. */
 	if ((ret = session->open_cursor(session,
-	    "statistics:" WT_TABLENAME, NULL, NULL, &cursor)) != 0) {
-		fprintf(stderr, "%s: stat cursor open failed: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
+	    "statistics:" WT_TABLENAME, NULL, NULL, &cursor)) != 0)
+		die(ret, "session.open_cursor");
+
 	while ((ret = cursor->next(cursor)) == 0 &&
 	    (ret = cursor->get_value(cursor, &desc, &pval, &v)) == 0)
-		if (fprintf(fp, "%s=%s\n", desc, pval) < 0) {
-			ret = errno;
-			break;
-		}
+		if (fprintf(fp, "%s=%s\n", desc, pval) < 0)
+			die(errno, "fprintf");
 
 	if (ret != WT_NOTFOUND)
-		die("cursor.next", ret);
+		die(ret, "cursor.next");
 	if ((ret = cursor->close(cursor)) != 0)
-		die("cursor.close", ret);
+		die(ret, "cursor.close");
 
-	(void)fclose(fp);
+	if ((ret = fclose(fp)) != 0)
+		die(ret, "fclose");
 
-	return (0);
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
 }
diff --git a/test/format/wts_bulk.c b/test/format/wts_bulk.c
index 2d514a4878b..9a1ffb55047 100644
--- a/test/format/wts_bulk.c
+++ b/test/format/wts_bulk.c
@@ -7,19 +7,20 @@
 
 #include "format.h"
 
-static int  bulk(WT_ITEM **, WT_ITEM **);
-
-int
-wts_bulk_load(void)
+void
+wts_load(void)
 {
+	WT_CONNECTION *conn;
 	WT_CURSOR *cursor;
+	WT_ITEM key, value;
 	WT_SESSION *session;
-	WT_ITEM *key, *value;
-	uint64_t insert_count;
+	uint8_t *keybuf, *valbuf;
 	int ret;
 
-	session = g.wts_session;
-	key = value = NULL;		/* -Wuninitialized */
+	conn = g.wts_conn;
+
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
 
 	/*
 	 * Avoid bulk load with a custom collator, because the order of
@@ -27,90 +28,78 @@ wts_bulk_load(void)
 	 */
 	if ((ret = session->open_cursor(session, WT_TABLENAME, NULL,
 	    (g.c_file_type == ROW && g.c_reverse) ? NULL : "bulk",
-	    &cursor)) != 0) {
-		fprintf(stderr, "%s: cursor open failed: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
+	    &cursor)) != 0)
+		die(ret, "session.open_cursor");
 
-	insert_count = 0;
-	while (bulk(&key, &value) == 0) {
-		/* Report on progress every 100 inserts. */
-		if (++insert_count % 100 == 0)
-			track("bulk load", insert_count);
-
-		if (key != NULL)
-			cursor->set_key(cursor, key);
-		if (g.c_file_type == FIX)
-			cursor->set_value(cursor, *(uint8_t *)value->data);
-		else
-			cursor->set_value(cursor, value);
-		if ((ret = cursor->insert(cursor)) != 0) {
-			fprintf(stderr, "%s: cursor insert failed: %s\n",
-			    g.progname, wiredtiger_strerror(ret));
-			ret = 1;
-			goto err;
+	/* Set up the default key buffer. */
+	memset(&key, 0, sizeof(key));
+	key_gen_setup(&keybuf);
+	memset(&value, 0, sizeof(value));
+	val_gen_setup(&valbuf);
+
+	for (;;) {
+		if (++g.key_cnt > g.c_rows) {
+			g.key_cnt = g.rows = g.c_rows;
+			break;
 		}
-	}
 
-err:	(void)cursor->close(cursor);
-	return (ret);
-}
+		/* Report on progress every 100 inserts. */
+		if (g.key_cnt % 100 == 0)
+			track("bulk load", g.key_cnt, NULL);
 
-/*
- * bulk --
- *	WiredTiger bulk load routine.
- */
-static int
-bulk(WT_ITEM **keyp, WT_ITEM **valuep)
-{
-	static WT_ITEM key, value;
-	WT_SESSION *session;
+		key_gen(keybuf, &key.size, (uint64_t)g.key_cnt, 0);
+		key.data = keybuf;
+		value_gen(valbuf, &value.size, (uint64_t)g.key_cnt);
+		value.data = valbuf;
 
-	session = g.wts_session;
+		switch (g.c_file_type) {
+		case FIX:
+			if (g.logging == LOG_OPS)
+				(void)session->msg_printf(session,
+				    "%-10s %" PRIu32 " {0x%02" PRIx8 "}",
+				    "bulk V",
+				    g.key_cnt, ((uint8_t *)value.data)[0]);
+			cursor->set_value(cursor, *(uint8_t *)value.data);
+			break;
+		case VAR:
+			cursor->set_value(cursor, &value);
+			if (g.logging == LOG_OPS)
+				(void)session->msg_printf(session,
+				    "%-10s %" PRIu32 " {%.*s}", "bulk V",
+				    g.key_cnt,
+				    (int)value.size, (char *)value.data);
+			break;
+		case ROW:
+			cursor->set_key(cursor, &key);
+			if (g.logging == LOG_OPS)
+				(void)session->msg_printf(session,
+				    "%-10s %" PRIu32 " {%.*s}", "bulk K",
+				    g.key_cnt, (int)key.size, (char *)key.data);
+			cursor->set_value(cursor, &value);
+			if (g.logging == LOG_OPS)
+				(void)session->msg_printf(session,
+				    "%-10s %" PRIu32 " {%.*s}", "bulk V",
+				    g.key_cnt,
+				    (int)value.size, (char *)value.data);
+			break;
+		}
 
-	if (++g.key_cnt > g.c_rows) {
-		g.key_cnt = g.rows = g.c_rows;
-		return (1);
-	}
+		if ((ret = cursor->insert(cursor)) != 0)
+			die(ret, "cursor.insert");
+
+		if (!SINGLETHREADED)
+			continue;
 
-	key_gen(&key.data, &key.size, (uint64_t)g.key_cnt, 0);
-	value_gen(&value.data, &value.size, (uint64_t)g.key_cnt);
-
-	switch (g.c_file_type) {
-	case FIX:
-		*keyp = NULL;
-		*valuep = &value;
-		if (g.logging == LOG_OPS)
-			(void)session->msg_printf(session,
-			    "%-10s %" PRIu32 " {0x%02" PRIx8 "}",
-			    "bulk V",
-			    g.key_cnt, ((uint8_t *)value.data)[0]);
-		break;
-	case VAR:
-		*keyp = NULL;
-		*valuep = &value;
-		if (g.logging == LOG_OPS)
-			(void)session->msg_printf(session,
-			    "%-10s %" PRIu32 " {%.*s}", "bulk V",
-			    g.key_cnt, (int)value.size, (char *)value.data);
-		break;
-	case ROW:
-		*keyp = &key;
-		if (g.logging == LOG_OPS)
-			(void)session->msg_printf(session,
-			    "%-10s %" PRIu32 " {%.*s}", "bulk K",
-		    g.key_cnt, (int)key.size, (char *)key.data);
-		*valuep = &value;
-		if (g.logging == LOG_OPS)
-			(void)session->msg_printf(session,
-			    "%-10s %" PRIu32 " {%.*s}", "bulk V",
-			    g.key_cnt, (int)value.size, (char *)value.data);
-		break;
+		/* Insert the item into BDB. */
+		bdb_insert(key.data, key.size, value.data, value.size);
 	}
 
-	/* Insert the item into BDB. */
-	bdb_insert(key.data, key.size, value.data, value.size);
+	if ((ret = cursor->close(cursor)) != 0)
+		die(ret, "cursor.close");
+
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
 
-	return (0);
+	free(keybuf);
+	free(valbuf);
 }
diff --git a/test/format/wts_ops.c b/test/format/wts_ops.c
index b9f4b80772e..0a2c8e343da 100644
--- a/test/format/wts_ops.c
+++ b/test/format/wts_ops.c
@@ -7,34 +7,150 @@
 
 #include "format.h"
 
-static int  wts_col_del(uint64_t, int *);
-static int  wts_col_insert(uint64_t *);
-static int  wts_col_put(uint64_t);
-static int  wts_notfound_chk(const char *, int, int, uint64_t);
-static int  wts_np(int, int, int *);
-static int  wts_read(uint64_t);
-static int  wts_row_del(uint64_t, int *);
-static int  wts_row_put(uint64_t, int);
-static void wts_stream_item(const char *, WT_ITEM *);
+static void  col_del(WT_CURSOR *, WT_ITEM *, uint64_t, int *);
+static void  col_insert(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t *);
+static void  col_put(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t);
+static void  nextprev(WT_CURSOR *, int, int *);
+static int   notfound_chk(const char *, int, int, uint64_t);
+static void *ops(void *);
+static void  read_row(WT_CURSOR *, WT_ITEM *, uint64_t);
+static void  row_del(WT_CURSOR *, WT_ITEM *, uint64_t, int *);
+static void  row_put(WT_CURSOR *, WT_ITEM *, WT_ITEM *, uint64_t, int);
+static void  print_item(const char *, WT_ITEM *);
 
 /*
  * wts_ops --
- *	Perform a number of operations.
+ *	Perform a number of operations in a set of threads.
  */
-int
+void
 wts_ops(void)
 {
+	TINFO *tinfo, total;
+	WT_CONNECTION *conn;
+	WT_SESSION *session;
+	time_t now;
+	int i, ret, running;
+
+	conn = g.wts_conn;
+
+	/* Open a session. */
+	if (g.logging == LOG_OPS) {
+		if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+			die(ret, "connection.open_session");
+
+		(void)time(&now);
+		(void)session->msg_printf(session,
+		    "===============\nthread ops start: %s===============",
+		    ctime(&now));
+	}
+
+	if (g.threads == 1) {
+		memset(&total, 0, sizeof(total));
+		(void)ops(&total);
+	} else {
+		/* Create thread structure. */
+		if ((tinfo = calloc((size_t)g.threads, sizeof(*tinfo))) == NULL)
+			die(errno, "calloc");
+		for (i = 0; i < g.threads; ++i)
+			if ((ret = pthread_create(
+			    &tinfo[i].tid, NULL, ops, &tinfo[i])) != 0)
+				die(ret, "pthread_create");
+
+		/* Wait for the threads. */
+		for (;;) {
+			total.search =
+			    total.insert = total.remove = total.update = 0;
+			for (i = running = 0; i < g.threads; ++i) {
+				total.search += tinfo[i].search;
+				total.insert += tinfo[i].insert;
+				total.remove += tinfo[i].remove;
+				total.update += tinfo[i].update;
+				switch (tinfo[i].state) {
+				case TINFO_RUNNING:
+					running = 1;
+					break;
+				case TINFO_COMPLETE:
+					tinfo[i].state = TINFO_JOINED;
+					(void)pthread_join(tinfo[i].tid, NULL);
+					break;
+				case TINFO_JOINED:
+					break;
+				}
+			}
+			track("read/write ops", 0ULL, &total);
+			if (!running)
+				break;
+			usleep(750000);			/* 3/4 of a second */
+		}
+	}
+
+	if (g.logging == LOG_OPS) {
+		(void)time(&now);
+		(void)session->msg_printf(session,
+		    "===============\nthread ops stop: %s===============",
+		    ctime(&now));
+
+		if ((ret = session->close(session, NULL)) != 0)
+			die(ret, "session.close");
+	}
+}
+
+static void *
+ops(void *arg)
+{
+	TINFO *tinfo;
+	WT_CONNECTION *conn;
+	WT_CURSOR *cursor, *cursor_insert;
+	WT_SESSION *session;
+	WT_ITEM key, value;
 	uint64_t cnt, keyno;
 	uint32_t op;
 	u_int np;
-	int dir, insert, notfound;
+	int dir, insert, notfound, ret;
+	uint8_t *keybuf, *valbuf;
+
+	conn = g.wts_conn;
+
+	tinfo = arg;
+	tinfo->state = TINFO_RUNNING;
+
+	/* Set up the default key and value buffers. */
+	memset(&key, 0, sizeof(key));
+	key_gen_setup(&keybuf);
+	memset(&value, 0, sizeof(value));
+	val_gen_setup(&valbuf);
+
+	/* Open a session. */
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
+
+	/*
+	 * Open two cursors: one configured for overwriting and one configured
+	 * for append if we're dealing with a column-store.
+	 *
+	 * The reason is when testing with existing records, we don't track if
+	 * a record was deleted or not, which means we must use cursor->insert
+	 * with overwriting configured.  But, in column-store files where we're
+	 * testing with new, appended records, we don't want to have to specify
+	 * the record number, which requires an append configuration.
+	 */
+	if ((ret = session->open_cursor(session,
+	    WT_TABLENAME, NULL, "overwrite", &cursor)) != 0)
+		die(ret, "session.open_cursor");
+	if ((g.c_file_type == FIX || g.c_file_type == VAR) &&
+	    (ret = session->open_cursor(session,
+	    WT_TABLENAME, NULL, "append", &cursor_insert)) != 0)
+		die(ret, "session.open_cursor");
 
 	for (cnt = 0; cnt < g.c_ops; ++cnt) {
-		if (cnt % 10 == 0)
-			track("read/write ops", cnt);
+		if (SINGLETHREADED && cnt % 100 == 0)
+			track("read/write ops", 0ULL, tinfo);
 
 		insert = notfound = 0;
+
 		keyno = MMRAND(1, g.rows);
+		key.data = keybuf;
+		value.data = valbuf;
 
 		/*
 		 * Perform some number of operations: the percentage of deletes,
@@ -45,50 +161,52 @@ wts_ops(void)
 		 */
 		op = (uint32_t)(wts_rand() % 100);
 		if (op < g.c_delete_pct) {
+			++tinfo->remove;
 			switch (g.c_file_type) {
 			case ROW:
 				/*
 				 * If deleting a non-existent record, the cursor
 				 * won't be positioned, and so can't do a next.
 				 */
-				if (wts_row_del(keyno, &notfound))
-					return (1);
+				row_del(cursor, &key, keyno, &notfound);
 				break;
 			case FIX:
 			case VAR:
-				if (wts_col_del(keyno, &notfound))
-					return (1);
+				col_del(cursor, &key, keyno, &notfound);
 				break;
 			}
 		} else if (op < g.c_delete_pct + g.c_insert_pct) {
+			++tinfo->insert;
 			switch (g.c_file_type) {
 			case ROW:
-				if (wts_row_put(keyno, 1))
-					return (1);
+				row_put(cursor, &key, &value, keyno, 1);
 				break;
 			case FIX:
 			case VAR:
-				if (wts_col_insert(&keyno))
-					return (1);
+				/*
+				 * Reset the standard cursor so it doesn't keep
+				 * pages pinned.
+				 */
+				cursor->reset(cursor);
+				col_insert(cursor_insert, &key, &value, &keyno);
 				insert = 1;
 				break;
 			}
 		} else if (
 		    op < g.c_delete_pct + g.c_insert_pct + g.c_write_pct) {
+			++tinfo->update;
 			switch (g.c_file_type) {
 			case ROW:
-				if (wts_row_put(keyno, 0))
-					return (1);
+				row_put(cursor, &key, &value, keyno, 0);
 				break;
 			case FIX:
 			case VAR:
-				if (wts_col_put(keyno))
-					return (1);
+				col_put(cursor, &key, &value, keyno);
 				break;
 			}
 		} else {
-			if (wts_read(keyno))
-				return (1);
+			++tinfo->search;
+			read_row(cursor, &key, keyno);
 			continue;
 		}
 
@@ -100,30 +218,54 @@ wts_ops(void)
 		for (np = 0; np < MMRAND(1, 8); ++np) {
 			if (notfound)
 				break;
-			if (wts_np(dir, insert, &notfound))
-				return (1);
+			nextprev(
+			    insert ? cursor_insert : cursor, dir, &notfound);
 		}
 
-		if (insert) {
-			WT_CURSOR *cursor = g.wts_cursor_insert;
-			cursor->reset(cursor);
-		}
+		if (insert)
+			cursor_insert->reset(cursor_insert);
 
-		/* Then read the value we modified to confirm it worked. */
-		if (wts_read(keyno))
-			return (1);
+		/* Read the value we modified to confirm the operation. */
+		read_row(cursor, &key, keyno);
 	}
-	return (0);
+
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
+
+	free(keybuf);
+	free(valbuf);
+
+	tinfo->state = TINFO_COMPLETE;
+	return (NULL);
 }
 
 /*
  * wts_read_scan --
  *	Read and verify all elements in a file.
  */
-int
+void
 wts_read_scan(void)
 {
+	WT_CONNECTION *conn;
+	WT_CURSOR *cursor;
+	WT_ITEM key;
+	WT_SESSION *session;
 	uint64_t cnt, last_cnt;
+	uint8_t *keybuf;
+	int ret;
+
+	conn = g.wts_conn;
+
+	/* Set up the default key buffer. */
+	memset(&key, 0, sizeof(key));
+	key_gen_setup(&keybuf);
+
+	/* Open a session and cursor pair. */
+	if ((ret = conn->open_session(conn, NULL, NULL, &session)) != 0)
+		die(ret, "connection.open_session");
+	if ((ret = session->open_cursor(
+	    session, WT_TABLENAME, NULL, NULL, &cursor)) != 0)
+		die(ret, "session.open_cursor");
 
 	/* Check a random subset of the records using the key. */
 	for (last_cnt = cnt = 0; cnt < g.key_cnt;) {
@@ -131,14 +273,18 @@ wts_read_scan(void)
 		if (cnt > g.rows)
 			cnt = g.rows;
 		if (cnt - last_cnt > 1000) {
-			track("read row scan", cnt);
+			track("read row scan", cnt, NULL);
 			last_cnt = cnt;
 		}
 
-		if (wts_read(cnt))
-			return (1);
+		key.data = keybuf;
+		read_row(cursor, &key, cnt);
 	}
-	return (0);
+
+	if ((ret = session->close(session, NULL)) != 0)
+		die(ret, "session.close");
+
+	free(keybuf);
 }
 
 #define	NTF_CHK(a) do {							\
@@ -146,37 +292,29 @@ wts_read_scan(void)
 	case 0:								\
 		break;							\
 	case 1:								\
-		return (1);						\
-	case 2:								\
-		return (0);						\
+		return;							\
 	}								\
 } while (0)
 
 /*
- * wts_read --
+ * read_row --
  *	Read and verify a single element in a row- or column-store file.
  */
-static int
-wts_read(uint64_t keyno)
+static void
+read_row(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno)
 {
-	static WT_ITEM key, value, bdb_value;
-	WT_CURSOR *cursor;
+	WT_ITEM bdb_value, value;
 	WT_SESSION *session;
 	int notfound, ret;
 	uint8_t bitfield;
 
-	cursor = g.wts_cursor;
-	session = g.wts_session;
+	session = cursor->session;
 
 	/* Log the operation */
 	if (g.logging == LOG_OPS)
 		(void)session->msg_printf(
 		    session, "%-10s%" PRIu64, "read", keyno);
 
-	/* Retrieve the BDB value. */
-	if (bdb_read(keyno, &bdb_value.data, &bdb_value.size, &notfound))
-		return (1);
-
 	/* Retrieve the key/value pair by key. */
 	switch (g.c_file_type) {
 	case FIX:
@@ -184,8 +322,8 @@ wts_read(uint64_t keyno)
 		cursor->set_key(cursor, keyno);
 		break;
 	case ROW:
-		key_gen(&key.data, &key.size, keyno, 0);
-		cursor->set_key(cursor, &key);
+		key_gen((uint8_t *)key->data, &key->size, keyno, 0);
+		cursor->set_key(cursor, key);
 		break;
 	}
 
@@ -194,14 +332,20 @@ wts_read(uint64_t keyno)
 			ret = cursor->get_value(cursor, &bitfield);
 			value.data = &bitfield;
 			value.size = 1;
-		} else
+		} else {
+			memset(&value, 0, sizeof(value));
 			ret = cursor->get_value(cursor, &value);
+		}
 	}
-	if (ret != 0 && ret != WT_NOTFOUND) {
-		fprintf(stderr, "%s: wts_read: read row %" PRIu64 ": %s\n",
-		    g.progname, keyno, wiredtiger_strerror(ret));
-		return (1);
-	}
+	if (ret != 0 && ret != WT_NOTFOUND)
+		die(ret, "read_row: read row %" PRIu64, keyno);
+
+	if (!SINGLETHREADED)
+		return;
+
+	/* Retrieve the BDB value. */
+	memset(&bdb_value, 0, sizeof(bdb_value));
+	bdb_read(keyno, &bdb_value.data, &bdb_value.size, &notfound);
 
 	/*
 	 * Check for not-found status.
@@ -215,29 +359,27 @@ wts_read(uint64_t keyno)
 		ret = 0;
 	}
 
-	NTF_CHK(wts_notfound_chk("wts_read", ret, notfound, keyno));
+	NTF_CHK(notfound_chk("read_row", ret, notfound, keyno));
 
 	/* Compare the two. */
 	if (value.size != bdb_value.size ||
 	    memcmp(value.data, bdb_value.data, value.size) != 0) {
 		fprintf(stderr,
-		    "wts_read: read row value mismatch %" PRIu64 ":\n", keyno);
-		wts_stream_item("bdb", &bdb_value);
-		wts_stream_item(" wt", &value);
-		return (1);
+		    "read_row: read row value mismatch %" PRIu64 ":\n", keyno);
+		print_item("bdb", &bdb_value);
+		print_item(" wt", &value);
+		die(0, NULL);
 	}
-	return (0);
 }
 
 /*
- * wts_np --
+ * nextprev --
  *	Read and verify the next/prev element in a row- or column-store file.
  */
-static int
-wts_np(int next, int insert, int *notfoundp)
+static void
+nextprev(WT_CURSOR *cursor, int next, int *notfoundp)
 {
-	static WT_ITEM key, value, bdb_key, bdb_value;
-	WT_CURSOR *cursor;
+	WT_ITEM key, value, bdb_key, bdb_value;
 	WT_SESSION *session;
 	uint64_t keyno;
 	int notfound, ret;
@@ -245,16 +387,9 @@ wts_np(int next, int insert, int *notfoundp)
 	const char *which;
 	char *p;
 
-	cursor = insert ? g.wts_cursor_insert : g.wts_cursor;
-	session = g.wts_session;
+	session = cursor->session;
 	which = next ? "next" : "prev";
 
-	/* Retrieve the BDB value. */
-	if (bdb_np(next, &bdb_key.data, &bdb_key.size,
-	    &bdb_value.data, &bdb_value.size, &notfound))
-		return (1);
-	*notfoundp = notfound;
-
 	keyno = 0;
 	ret = next ? cursor->next(cursor) : cursor->prev(cursor);
 	if (ret == 0)
@@ -275,42 +410,45 @@ wts_np(int next, int insert, int *notfoundp)
 				ret = cursor->get_value(cursor, &value);
 			break;
 		}
-	if (ret != 0 && ret != WT_NOTFOUND) {
-		fprintf(stderr,
-		    "%s: wts_%s: %s\n",
-		    g.progname, which, wiredtiger_strerror(ret));
-		return (1);
-	}
+	if (ret != 0 && ret != WT_NOTFOUND)
+		die(ret, "%s", which);
+	*notfoundp = ret == WT_NOTFOUND;
 
-	NTF_CHK(wts_notfound_chk(
-	    next ? "wts_np(next)" : "wts_np(prev)", ret, notfound, keyno));
+	if (!SINGLETHREADED)
+		return;
+
+	/* Retrieve the BDB value. */
+	bdb_np(next, &bdb_key.data, &bdb_key.size,
+	    &bdb_value.data, &bdb_value.size, &notfound);
+	NTF_CHK(notfound_chk(
+	    next ? "nextprev(next)" : "nextprev(prev)", ret, notfound, keyno));
 
 	/* Compare the two. */
 	if (g.c_file_type == ROW) {
 		if (key.size != bdb_key.size ||
 		    memcmp(key.data, bdb_key.data, key.size) != 0) {
-			fprintf(stderr, "wts_np: %s key mismatch:\n", which);
-			wts_stream_item("bdb-key", &bdb_key);
-			wts_stream_item(" wt-key", &key);
-			return (1);
+			fprintf(stderr, "nextprev: %s key mismatch:\n", which);
+			print_item("bdb-key", &bdb_key);
+			print_item(" wt-key", &key);
+			die(0, NULL);
 		}
 	} else {
 		if (keyno != (uint64_t)atoll(bdb_key.data)) {
 			if ((p = strchr((char *)bdb_key.data, '.')) != NULL)
 				*p = '\0';
 			fprintf(stderr,
-			    "wts_np: %s key mismatch: %.*s != %" PRIu64 "\n",
+			    "nextprev: %s key mismatch: %.*s != %" PRIu64 "\n",
 			    which,
 			    (int)bdb_key.size, (char *)bdb_key.data, keyno);
-			return (1);
+			die(0, NULL);
 		}
 	}
 	if (value.size != bdb_value.size ||
 	    memcmp(value.data, bdb_value.data, value.size) != 0) {
-		fprintf(stderr, "wts_np: %s value mismatch:\n", which);
-		wts_stream_item("bdb-value", &bdb_value);
-		wts_stream_item(" wt-value", &value);
-		return (1);
+		fprintf(stderr, "nextprev: %s value mismatch:\n", which);
+		print_item("bdb-value", &bdb_value);
+		print_item(" wt-value", &value);
+		die(0, NULL);
 	}
 
 	if (g.logging == LOG_OPS)
@@ -332,71 +470,60 @@ wts_np(int next, int insert, int *notfoundp)
 			    which, keyno, (int)value.size, (char *)value.data);
 			break;
 		}
-
-	return (0);
 }
 
 /*
- * wts_row_put --
+ * row_put --
  *	Update an element in a row-store file.
  */
-static int
-wts_row_put(uint64_t keyno, int insert)
+static void
+row_put(
+    WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno, int insert)
 {
-	static WT_ITEM key, value;
-	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	int notfound, ret;
 
-	cursor = g.wts_cursor;
-	session = g.wts_session;
+	session = cursor->session;
 
-	key_gen(&key.data, &key.size, keyno, insert);
-	value_gen(&value.data, &value.size, keyno);
+	key_gen((uint8_t *)key->data, &key->size, keyno, insert);
+	value_gen((uint8_t *)value->data, &value->size, keyno);
 
 	/* Log the operation */
 	if (g.logging == LOG_OPS)
 		(void)session->msg_printf(session, "%-10s{%.*s}\n%-10s{%.*s}",
 		    insert ? "insertK" : "putK",
-		    (int)key.size, (char *)key.data,
+		    (int)key->size, (char *)key->data,
 		    insert ? "insertV" : "putV",
-		    (int)value.size, (char *)value.data);
-
-	if (bdb_put(key.data, key.size, value.data, value.size, &notfound))
-		return (1);
+		    (int)value->size, (char *)value->data);
 
-	cursor->set_key(cursor, &key);
-	cursor->set_value(cursor, &value);
+	cursor->set_key(cursor, key);
+	cursor->set_value(cursor, value);
 	ret = cursor->insert(cursor);
-	if (ret != 0 && ret != WT_NOTFOUND) {
-		fprintf(stderr,
-		    "%s: wts_row_put: %s row %" PRIu64 " by key: %s\n",
-		    g.progname, insert ? "insert" : "update",
-		    keyno, wiredtiger_strerror(ret));
-		return (1);
-	}
+	if (ret != 0 && ret != WT_NOTFOUND)
+		die(ret,
+		    "row_put: %s row %" PRIu64 " by key",
+		    insert ? "insert" : "update", keyno);
 
-	NTF_CHK(wts_notfound_chk("wts_row_put", ret, notfound, keyno));
-	return (0);
+	if (!SINGLETHREADED)
+		return;
+
+	bdb_put(key->data, key->size, value->data, value->size, &notfound);
+	NTF_CHK(notfound_chk("row_put", ret, notfound, keyno));
 }
 
 /*
- * wts_col_put --
+ * col_put --
  *	Update an element in a column-store file.
  */
-static int
-wts_col_put(uint64_t keyno)
+static void
+col_put(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t keyno)
 {
-	static WT_ITEM key, value;
-	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	int notfound, ret;
 
-	cursor = g.wts_cursor;
-	session = g.wts_session;
+	session = cursor->session;
 
-	key_gen(&key.data, &key.size, keyno, 0);
-	value_gen(&value.data, &value.size, keyno);
+	value_gen((uint8_t *)value->data, &value->size, keyno);
 
 	/* Log the operation */
 	if (g.logging == LOG_OPS) {
@@ -404,201 +531,173 @@ wts_col_put(uint64_t keyno)
 			(void)session->msg_printf(session,
 			    "%-10s%" PRIu64 " {0x%02" PRIx8 "}",
 			    "update", keyno,
-			    ((uint8_t *)value.data)[0]);
+			    ((uint8_t *)value->data)[0]);
 		else
 			(void)session->msg_printf(session,
 			    "%-10s%" PRIu64 " {%.*s}",
 			    "update", keyno,
-			    (int)value.size, (char *)value.data);
+			    (int)value->size, (char *)value->data);
 	}
 
 	cursor->set_key(cursor, keyno);
 	if (g.c_file_type == FIX)
-		cursor->set_value(cursor, *(uint8_t *)value.data);
+		cursor->set_value(cursor, *(uint8_t *)value->data);
 	else
-		cursor->set_value(cursor, &value);
+		cursor->set_value(cursor, value);
 	ret = cursor->insert(cursor);
-	if (ret != 0 && ret != WT_NOTFOUND) {
-		fprintf(stderr,
-		    "%s: wts_col_put: %" PRIu64 " : %s\n",
-		    g.progname, keyno, wiredtiger_strerror(ret));
-		return (1);
-	}
+	if (ret != 0 && ret != WT_NOTFOUND)
+		die(ret, "col_put: %" PRIu64, keyno);
 
-	if (bdb_put(key.data, key.size, value.data, value.size, &notfound))
-		return (1);
+	if (!SINGLETHREADED)
+		return;
 
-	NTF_CHK(wts_notfound_chk("wts_col_put", ret, notfound, keyno));
-	return (0);
+	key_gen((uint8_t *)key->data, &key->size, keyno, 0);
+	bdb_put(key->data, key->size, value->data, value->size, &notfound);
+	NTF_CHK(notfound_chk("col_put", ret, notfound, keyno));
 }
 
 /*
- * wts_col_insert --
+ * col_insert --
  *	Insert an element in a column-store file.
  */
-static int
-wts_col_insert(uint64_t *keynop)
+static void
+col_insert(WT_CURSOR *cursor, WT_ITEM *key, WT_ITEM *value, uint64_t *keynop)
 {
-	static WT_ITEM key, value;
-	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	uint64_t keyno;
 	int notfound, ret;
 
-	/* Reset the other cursor so it doesn't keep pages pinned. */
-	cursor = g.wts_cursor;
-	cursor->reset(cursor);
+	session = cursor->session;
 
-	cursor = g.wts_cursor_insert;
-	session = g.wts_session;
-
-	value_gen(&value.data, &value.size, g.rows + 1);
+	value_gen((uint8_t *)value->data, &value->size, g.rows + 1);
 
 	if (g.c_file_type == FIX)
-		cursor->set_value(cursor, *(uint8_t *)value.data);
+		cursor->set_value(cursor, *(uint8_t *)value->data);
 	else
-		cursor->set_value(cursor, &value);
-	ret = cursor->insert(cursor);
-	if (ret != 0) {
-		fprintf(stderr, "%s: wts_col_insert: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
-	if ((ret = cursor->get_key(cursor, &keyno)) != 0) {
-		fprintf(stderr, "%s: cursor->get_key: %s\n",
-		    g.progname, wiredtiger_strerror(ret));
-		return (1);
-	}
-	if (keyno <= g.rows) {
-		fprintf(stderr,
-		    "%s: inserted key did not create new row\n", g.progname);
-		return (1);
-	}
-	*keynop = g.rows = (uint32_t)keyno;
+		cursor->set_value(cursor, value);
+	if ((ret = cursor->insert(cursor)) != 0)
+		die(ret, "cursor.insert");
+	if ((ret = cursor->get_key(cursor, &keyno)) != 0)
+		die(ret, "cursor.get_key");
+	*keynop = (uint32_t)keyno;
+
+	/*
+	 * Assign the maximum number of rows to the returned key: that key may
+	 * not be the current maximum value, if we race with another thread,
+	 * but that's OK, we just want it to keep increasing so we don't ignore
+	 * records at the end of the table.
+	 */
+	g.rows = (uint32_t)keyno;
 
 	if (g.logging == LOG_OPS) {
 		if (g.c_file_type == FIX)
 			(void)session->msg_printf(session,
 			    "%-10s%" PRIu64 " {0x%02" PRIx8 "}",
 			    "insert", keyno,
-			    ((uint8_t *)value.data)[0]);
+			    ((uint8_t *)value->data)[0]);
 		else
 			(void)session->msg_printf(session,
 			    "%-10s%" PRIu64 " {%.*s}",
 			    "insert", keyno,
-			    (int)value.size, (char *)value.data);
+			    (int)value->size, (char *)value->data);
 	}
 
-	key_gen(&key.data, &key.size, keyno, 0);
-	return (bdb_put(
-	    key.data, key.size, value.data, value.size, &notfound) ? 1 : 0);
+	if (!SINGLETHREADED)
+		return;
+
+	key_gen((uint8_t *)key->data, &key->size, keyno, 0);
+	bdb_put(key->data, key->size, value->data, value->size, &notfound);
 }
 
 /*
- * wts_row_del --
+ * row_del --
  *	Delete an element from a row-store file.
  */
-static int
-wts_row_del(uint64_t keyno, int *notfoundp)
+static void
+row_del(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
 {
-	static WT_ITEM key;
-	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	int notfound, ret;
 
-	*notfoundp = 0;
-	cursor = g.wts_cursor;
-	session = g.wts_session;
+	session = cursor->session;
 
-	key_gen(&key.data, &key.size, keyno, 0);
+	key_gen((uint8_t *)key->data, &key->size, keyno, 0);
 
 	/* Log the operation */
 	if (g.logging == LOG_OPS)
 		(void)session->msg_printf(
 		    session, "%-10s%" PRIu64, "remove", keyno);
 
-	if (bdb_del(keyno, &notfound))
-		return (1);
-	*notfoundp = notfound;
-
-	cursor->set_key(cursor, &key);
+	cursor->set_key(cursor, key);
 	ret = cursor->remove(cursor);
-	if (ret != 0 && ret != WT_NOTFOUND) {
-		fprintf(stderr,
-		    "%s: wts_row_del: remove %" PRIu64 " by key: %s\n",
-		    g.progname, keyno, wiredtiger_strerror(ret));
-		return (1);
-	}
+	if (ret != 0 && ret != WT_NOTFOUND)
+		die(ret, "row_del: remove %" PRIu64 " by key", keyno);
+	*notfoundp = ret == WT_NOTFOUND;
 
-	NTF_CHK(wts_notfound_chk("wts_row_del", ret, notfound, keyno));
-	return (0);
+	if (!SINGLETHREADED)
+		return;
+
+	bdb_del(keyno, &notfound);
+	NTF_CHK(notfound_chk("row_del", ret, notfound, keyno));
 }
 
 /*
- * wts_col_del --
+ * col_del --
  *	Delete an element from a column-store file.
  */
-static int
-wts_col_del(uint64_t keyno, int *notfoundp)
+static void
+col_del(WT_CURSOR *cursor, WT_ITEM *key, uint64_t keyno, int *notfoundp)
 {
-	static WT_ITEM key;
-	WT_CURSOR *cursor;
 	WT_SESSION *session;
 	int notfound, ret;
 
-	cursor = g.wts_cursor;
-	session = g.wts_session;
+	session = cursor->session;
 
 	/* Log the operation */
 	if (g.logging == LOG_OPS)
 		(void)session->msg_printf(
 		    session, "%-10s%" PRIu64, "remove", keyno);
 
+	cursor->set_key(cursor, keyno);
+	ret = cursor->remove(cursor);
+	if (ret != 0 && ret != WT_NOTFOUND)
+		die(ret, "col_del: remove %" PRIu64 " by key", keyno);
+	*notfoundp = ret == WT_NOTFOUND;
+
+	if (!SINGLETHREADED)
+		return;
+
 	/*
 	 * Deleting a fixed-length item is the same as setting the bits to 0;
 	 * do the same thing for the BDB store.
 	 */
 	if (g.c_file_type == FIX) {
-		key_gen(&key.data, &key.size, keyno, 0);
-		if (bdb_put(key.data, key.size, "\0", 1, &notfound))
-			return (1);
-	} else {
-		if (bdb_del(keyno, &notfound))
-			return (1);
-		*notfoundp = notfound;
-	}
+		key_gen((uint8_t *)key->data, &key->size, keyno, 0);
+		bdb_put(key->data, key->size, "\0", 1, &notfound);
+	} else
+		bdb_del(keyno, &notfound);
 
-	cursor->set_key(cursor, keyno);
-	ret = cursor->remove(cursor);
-	if (ret != 0 && ret != WT_NOTFOUND) {
-		fprintf(stderr,
-		    "%s: wts_col_del: remove %" PRIu64 " by key: %s\n",
-		    g.progname, keyno, wiredtiger_strerror(ret));
-		return (1);
-	}
-
-	NTF_CHK(wts_notfound_chk("wts_col_del", ret, notfound, keyno));
-	return (0);
+	NTF_CHK(notfound_chk("col_del", ret, notfound, keyno));
 }
 
 /*
- * wts_notfound_chk --
+ * notfound_chk --
  *	Compare notfound returns for consistency.
  */
 static int
-wts_notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
+notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
 {
 	/* Check for not found status. */
-	if (bdb_notfound) {
-		if (wt_ret == WT_NOTFOUND)
-			return (2);
+	if (bdb_notfound && wt_ret == WT_NOTFOUND)
+		return (1);
 
+	if (bdb_notfound) {
 		fprintf(stderr, "%s: %s:", g.progname, f);
 		if (keyno != 0)
 			fprintf(stderr, " row %" PRIu64 ":", keyno);
 		fprintf(stderr,
 		    " not found in Berkeley DB, found in WiredTiger\n");
-		return (1);
+		die(0, NULL);
 	}
 	if (wt_ret == WT_NOTFOUND) {
 		fprintf(stderr, "%s: %s:", g.progname, f);
@@ -606,17 +705,17 @@ wts_notfound_chk(const char *f, int wt_ret, int bdb_notfound, uint64_t keyno)
 			fprintf(stderr, " row %" PRIu64 ":", keyno);
 		fprintf(stderr,
 		    " found in Berkeley DB, not found in WiredTiger\n");
-		return (1);
+		die(0, NULL);
 	}
 	return (0);
 }
 
 /*
- * wts_stream_item --
- *	Dump a single data/size pair, with a tag.
+ * print_item --
+ *	Display a single data/size pair, with a tag.
  */
 static void
-wts_stream_item(const char *tag, WT_ITEM *item)
+print_item(const char *tag, WT_ITEM *item)
 {
 	static const char hex[] = "0123456789abcdef";
 	const uint8_t *data;
author	Don Anderson <dda@ddanderson.com>	2012-03-15 14:29:22 -0400
committer	Don Anderson <dda@ddanderson.com>	2012-03-15 14:29:22 -0400
commit	b9b00694d3b7e4fcaf59fdf8e5196bcd0040b581 (patch)
tree	0e43cb64a2d788cbec61843f1d4a739081ee1344
parent	c0f8a06d6a77ea64f24b28b59c62c866c9982ae0 (diff)
parent	9e67b397abc3f7931aa708327011c0acf8e61f60 (diff)
download	mongo-b9b00694d3b7e4fcaf59fdf8e5196bcd0040b581.tar.gz